{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.98631074606434, "eval_steps": 500, "global_step": 7300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0027378507871321013, "grad_norm": 5.0040283203125, "learning_rate": 9.998630136986302e-07, "log_odds_chosen": -0.6810488700866699, "log_odds_ratio": -1.260475516319275, "logits/chosen": -0.001064881682395935, "logits/rejected": -0.033191047608852386, "logps/chosen": -3.466616153717041, "logps/rejected": -2.7802464962005615, "loss": 2.8386, "nll_loss": 2.7125558853149414, "rewards/accuracies": 0.5, "rewards/chosen": -0.34666162729263306, "rewards/margins": -0.06863699108362198, "rewards/rejected": -0.2780246436595917, "step": 1 }, { "epoch": 0.0054757015742642025, "grad_norm": 5.070333003997803, "learning_rate": 9.997260273972602e-07, "log_odds_chosen": -0.35554832220077515, "log_odds_ratio": -1.0722215175628662, "logits/chosen": -0.063265860080719, "logits/rejected": -0.09344077855348587, "logps/chosen": -3.3857219219207764, "logps/rejected": -3.0373075008392334, "loss": 2.8734, "nll_loss": 2.766209602355957, "rewards/accuracies": 0.5, "rewards/chosen": -0.3385722041130066, "rewards/margins": -0.03484143689274788, "rewards/rejected": -0.3037307858467102, "step": 2 }, { "epoch": 0.008213552361396304, "grad_norm": 6.013066291809082, "learning_rate": 9.995890410958904e-07, "log_odds_chosen": -0.7668721675872803, "log_odds_ratio": -1.3143620491027832, "logits/chosen": 0.05035081505775452, "logits/rejected": 0.11144508421421051, "logps/chosen": -4.296844482421875, "logps/rejected": -3.541597366333008, "loss": 3.0368, "nll_loss": 2.905344009399414, "rewards/accuracies": 0.25, "rewards/chosen": -0.42968446016311646, "rewards/margins": -0.0755247101187706, "rewards/rejected": -0.35415977239608765, "step": 3 }, { "epoch": 0.010951403148528405, "grad_norm": 5.7046003341674805, "learning_rate": 9.994520547945206e-07, "log_odds_chosen": -1.4119532108306885, "log_odds_ratio": -1.7889153957366943, "logits/chosen": 0.17451678216457367, "logits/rejected": 0.27507954835891724, "logps/chosen": -4.276388645172119, "logps/rejected": -2.8799326419830322, "loss": 3.0323, "nll_loss": 2.853397846221924, "rewards/accuracies": 0.25, "rewards/chosen": -0.42763882875442505, "rewards/margins": -0.13964557647705078, "rewards/rejected": -0.28799331188201904, "step": 4 }, { "epoch": 0.013689253935660506, "grad_norm": 5.39284086227417, "learning_rate": 9.993150684931506e-07, "log_odds_chosen": -0.6499373316764832, "log_odds_ratio": -1.3020315170288086, "logits/chosen": -0.027141541242599487, "logits/rejected": -0.04773163050413132, "logps/chosen": -4.347743988037109, "logps/rejected": -3.6917502880096436, "loss": 2.9132, "nll_loss": 2.782986640930176, "rewards/accuracies": 0.5, "rewards/chosen": -0.4347744584083557, "rewards/margins": -0.06559940427541733, "rewards/rejected": -0.3691750168800354, "step": 5 }, { "epoch": 0.01642710472279261, "grad_norm": 5.323212146759033, "learning_rate": 9.991780821917808e-07, "log_odds_chosen": -0.27191150188446045, "log_odds_ratio": -1.0104033946990967, "logits/chosen": -0.1392054259777069, "logits/rejected": -0.09127771854400635, "logps/chosen": -3.208475112915039, "logps/rejected": -2.9302151203155518, "loss": 2.875, "nll_loss": 2.77400541305542, "rewards/accuracies": 0.5, "rewards/chosen": -0.3208475708961487, "rewards/margins": -0.02782602608203888, "rewards/rejected": -0.2930215001106262, "step": 6 }, { "epoch": 0.019164955509924708, "grad_norm": 5.14642858505249, "learning_rate": 9.99041095890411e-07, "log_odds_chosen": -0.6687689423561096, "log_odds_ratio": -1.216036319732666, "logits/chosen": 0.03207097575068474, "logits/rejected": 0.013430304825305939, "logps/chosen": -3.5791008472442627, "logps/rejected": -2.90120792388916, "loss": 2.8485, "nll_loss": 2.7269389629364014, "rewards/accuracies": 0.375, "rewards/chosen": -0.3579100966453552, "rewards/margins": -0.06778930872678757, "rewards/rejected": -0.29012078046798706, "step": 7 }, { "epoch": 0.02190280629705681, "grad_norm": 4.275904178619385, "learning_rate": 9.98904109589041e-07, "log_odds_chosen": 0.10254860669374466, "log_odds_ratio": -0.7067513465881348, "logits/chosen": 0.0728575810790062, "logits/rejected": 0.009077345952391624, "logps/chosen": -2.3619914054870605, "logps/rejected": -2.4356000423431396, "loss": 2.725, "nll_loss": 2.65429949760437, "rewards/accuracies": 0.625, "rewards/chosen": -0.23619914054870605, "rewards/margins": 0.00736084021627903, "rewards/rejected": -0.24355998635292053, "step": 8 }, { "epoch": 0.024640657084188913, "grad_norm": 5.558749675750732, "learning_rate": 9.987671232876712e-07, "log_odds_chosen": -0.7328005433082581, "log_odds_ratio": -1.2284011840820312, "logits/chosen": -0.05651663616299629, "logits/rejected": 0.03782377764582634, "logps/chosen": -4.686506748199463, "logps/rejected": -3.9722394943237305, "loss": 2.9863, "nll_loss": 2.863485813140869, "rewards/accuracies": 0.125, "rewards/chosen": -0.4686507284641266, "rewards/margins": -0.07142674177885056, "rewards/rejected": -0.39722394943237305, "step": 9 }, { "epoch": 0.02737850787132101, "grad_norm": 4.820083141326904, "learning_rate": 9.986301369863014e-07, "log_odds_chosen": -0.5045827627182007, "log_odds_ratio": -1.0346410274505615, "logits/chosen": 0.06435639411211014, "logits/rejected": 0.025332950055599213, "logps/chosen": -3.048752784729004, "logps/rejected": -2.5829286575317383, "loss": 2.8112, "nll_loss": 2.7077817916870117, "rewards/accuracies": 0.375, "rewards/chosen": -0.30487528443336487, "rewards/margins": -0.04658240079879761, "rewards/rejected": -0.25829288363456726, "step": 10 }, { "epoch": 0.030116358658453114, "grad_norm": 4.941049575805664, "learning_rate": 9.984931506849314e-07, "log_odds_chosen": -0.07449714839458466, "log_odds_ratio": -0.8135035037994385, "logits/chosen": 0.054927676916122437, "logits/rejected": 0.06424494087696075, "logps/chosen": -2.726111888885498, "logps/rejected": -2.580936908721924, "loss": 2.7277, "nll_loss": 2.6463804244995117, "rewards/accuracies": 0.375, "rewards/chosen": -0.27261117100715637, "rewards/margins": -0.014517467468976974, "rewards/rejected": -0.2580937147140503, "step": 11 }, { "epoch": 0.03285420944558522, "grad_norm": 5.909090995788574, "learning_rate": 9.983561643835616e-07, "log_odds_chosen": -1.066605567932129, "log_odds_ratio": -1.6916459798812866, "logits/chosen": 0.04953520745038986, "logits/rejected": 0.19729897379875183, "logps/chosen": -4.09175968170166, "logps/rejected": -3.0427966117858887, "loss": 3.0479, "nll_loss": 2.8787355422973633, "rewards/accuracies": 0.25, "rewards/chosen": -0.40917593240737915, "rewards/margins": -0.10489631444215775, "rewards/rejected": -0.3042796552181244, "step": 12 }, { "epoch": 0.03559206023271732, "grad_norm": 4.681210994720459, "learning_rate": 9.982191780821918e-07, "log_odds_chosen": -0.010377582162618637, "log_odds_ratio": -0.7253429889678955, "logits/chosen": -0.02414539083838463, "logits/rejected": -0.05176248401403427, "logps/chosen": -2.883589744567871, "logps/rejected": -2.877103328704834, "loss": 2.696, "nll_loss": 2.623417854309082, "rewards/accuracies": 0.375, "rewards/chosen": -0.28835898637771606, "rewards/margins": -0.0006486363708972931, "rewards/rejected": -0.2877103388309479, "step": 13 }, { "epoch": 0.038329911019849415, "grad_norm": 5.61288595199585, "learning_rate": 9.980821917808218e-07, "log_odds_chosen": -0.464860200881958, "log_odds_ratio": -1.088516116142273, "logits/chosen": 0.025740012526512146, "logits/rejected": 0.045695818960666656, "logps/chosen": -3.5254321098327637, "logps/rejected": -3.0469329357147217, "loss": 2.9044, "nll_loss": 2.795560359954834, "rewards/accuracies": 0.375, "rewards/chosen": -0.3525432050228119, "rewards/margins": -0.04784989729523659, "rewards/rejected": -0.304693341255188, "step": 14 }, { "epoch": 0.04106776180698152, "grad_norm": 5.830635070800781, "learning_rate": 9.97945205479452e-07, "log_odds_chosen": -0.630207896232605, "log_odds_ratio": -1.456902027130127, "logits/chosen": -0.0035403892397880554, "logits/rejected": 0.06429289281368256, "logps/chosen": -3.752187728881836, "logps/rejected": -3.0315604209899902, "loss": 2.9576, "nll_loss": 2.811903238296509, "rewards/accuracies": 0.5, "rewards/chosen": -0.37521880865097046, "rewards/margins": -0.07206273078918457, "rewards/rejected": -0.3031560480594635, "step": 15 }, { "epoch": 0.04380561259411362, "grad_norm": 4.766214847564697, "learning_rate": 9.978082191780822e-07, "log_odds_chosen": -0.4864382743835449, "log_odds_ratio": -1.1577229499816895, "logits/chosen": -0.0255502387881279, "logits/rejected": -0.05697725713253021, "logps/chosen": -3.470975160598755, "logps/rejected": -2.952425718307495, "loss": 2.849, "nll_loss": 2.7332510948181152, "rewards/accuracies": 0.5, "rewards/chosen": -0.3470975160598755, "rewards/margins": -0.051854923367500305, "rewards/rejected": -0.295242577791214, "step": 16 }, { "epoch": 0.04654346338124572, "grad_norm": 6.055099010467529, "learning_rate": 9.976712328767124e-07, "log_odds_chosen": -0.5097405314445496, "log_odds_ratio": -1.1363489627838135, "logits/chosen": 0.04118158668279648, "logits/rejected": 0.08707933872938156, "logps/chosen": -3.9034948348999023, "logps/rejected": -3.4018471240997314, "loss": 2.9811, "nll_loss": 2.8674421310424805, "rewards/accuracies": 0.5, "rewards/chosen": -0.39034947752952576, "rewards/margins": -0.05016474425792694, "rewards/rejected": -0.34018474817276, "step": 17 }, { "epoch": 0.049281314168377825, "grad_norm": 4.915632724761963, "learning_rate": 9.975342465753424e-07, "log_odds_chosen": -0.18554408848285675, "log_odds_ratio": -0.9184417128562927, "logits/chosen": 0.08274684101343155, "logits/rejected": 0.051673732697963715, "logps/chosen": -3.014207363128662, "logps/rejected": -2.7851743698120117, "loss": 2.8175, "nll_loss": 2.725693941116333, "rewards/accuracies": 0.5, "rewards/chosen": -0.3014207184314728, "rewards/margins": -0.022903291508555412, "rewards/rejected": -0.2785174250602722, "step": 18 }, { "epoch": 0.05201916495550993, "grad_norm": 4.601833343505859, "learning_rate": 9.973972602739726e-07, "log_odds_chosen": -0.5313608646392822, "log_odds_ratio": -1.1391754150390625, "logits/chosen": -0.15415409207344055, "logits/rejected": -0.26682722568511963, "logps/chosen": -3.729454278945923, "logps/rejected": -3.178182601928711, "loss": 2.7783, "nll_loss": 2.6643545627593994, "rewards/accuracies": 0.375, "rewards/chosen": -0.3729454278945923, "rewards/margins": -0.055127181112766266, "rewards/rejected": -0.3178182542324066, "step": 19 }, { "epoch": 0.05475701574264202, "grad_norm": 4.649076461791992, "learning_rate": 9.972602739726028e-07, "log_odds_chosen": 0.007364869117736816, "log_odds_ratio": -0.839417040348053, "logits/chosen": 0.15006844699382782, "logits/rejected": 0.11718703806400299, "logps/chosen": -2.7175867557525635, "logps/rejected": -2.6965858936309814, "loss": 2.6591, "nll_loss": 2.5751426219940186, "rewards/accuracies": 0.5, "rewards/chosen": -0.27175867557525635, "rewards/margins": -0.0021000951528549194, "rewards/rejected": -0.2696585953235626, "step": 20 }, { "epoch": 0.057494866529774126, "grad_norm": 4.819366931915283, "learning_rate": 9.971232876712328e-07, "log_odds_chosen": 0.6280566453933716, "log_odds_ratio": -0.5128471851348877, "logits/chosen": 0.10264478623867035, "logits/rejected": 0.07943382114171982, "logps/chosen": -2.427276372909546, "logps/rejected": -2.994718074798584, "loss": 2.7719, "nll_loss": 2.7206082344055176, "rewards/accuracies": 0.75, "rewards/chosen": -0.2427276372909546, "rewards/margins": 0.05674419552087784, "rewards/rejected": -0.2994718551635742, "step": 21 }, { "epoch": 0.06023271731690623, "grad_norm": 5.027676105499268, "learning_rate": 9.96986301369863e-07, "log_odds_chosen": 0.40974360704421997, "log_odds_ratio": -0.7643404006958008, "logits/chosen": -0.24290071427822113, "logits/rejected": -0.17920729517936707, "logps/chosen": -3.0672435760498047, "logps/rejected": -3.4380717277526855, "loss": 2.8122, "nll_loss": 2.735720157623291, "rewards/accuracies": 0.5, "rewards/chosen": -0.3067243695259094, "rewards/margins": 0.03708280995488167, "rewards/rejected": -0.3438071608543396, "step": 22 }, { "epoch": 0.06297056810403832, "grad_norm": 5.347170352935791, "learning_rate": 9.968493150684932e-07, "log_odds_chosen": -0.1355494260787964, "log_odds_ratio": -0.9636363983154297, "logits/chosen": -0.04326516017317772, "logits/rejected": -0.05769798159599304, "logps/chosen": -3.513963222503662, "logps/rejected": -3.3431615829467773, "loss": 2.7994, "nll_loss": 2.703035354614258, "rewards/accuracies": 0.625, "rewards/chosen": -0.3513962924480438, "rewards/margins": -0.017080139368772507, "rewards/rejected": -0.3343161344528198, "step": 23 }, { "epoch": 0.06570841889117043, "grad_norm": 4.899316787719727, "learning_rate": 9.967123287671232e-07, "log_odds_chosen": -0.20790570974349976, "log_odds_ratio": -0.9205588102340698, "logits/chosen": 0.010844096541404724, "logits/rejected": 0.004730232059955597, "logps/chosen": -3.249471664428711, "logps/rejected": -3.037924289703369, "loss": 2.8015, "nll_loss": 2.709411144256592, "rewards/accuracies": 0.375, "rewards/chosen": -0.32494714856147766, "rewards/margins": -0.02115471474826336, "rewards/rejected": -0.30379244685173035, "step": 24 }, { "epoch": 0.06844626967830253, "grad_norm": 5.141045093536377, "learning_rate": 9.965753424657534e-07, "log_odds_chosen": -0.34573274850845337, "log_odds_ratio": -1.1100118160247803, "logits/chosen": -0.04250911995768547, "logits/rejected": 0.021847927942872047, "logps/chosen": -3.0320470333099365, "logps/rejected": -2.6443116664886475, "loss": 2.9611, "nll_loss": 2.850067377090454, "rewards/accuracies": 0.625, "rewards/chosen": -0.3032046854496002, "rewards/margins": -0.038773540407419205, "rewards/rejected": -0.2644311785697937, "step": 25 }, { "epoch": 0.07118412046543464, "grad_norm": 4.638236999511719, "learning_rate": 9.964383561643836e-07, "log_odds_chosen": 0.2737106382846832, "log_odds_ratio": -0.6138269305229187, "logits/chosen": 0.053380757570266724, "logits/rejected": -0.029358072206377983, "logps/chosen": -3.0447943210601807, "logps/rejected": -3.263441562652588, "loss": 2.7661, "nll_loss": 2.704679250717163, "rewards/accuracies": 0.75, "rewards/chosen": -0.3044794499874115, "rewards/margins": 0.02186471037566662, "rewards/rejected": -0.3263441324234009, "step": 26 }, { "epoch": 0.07392197125256673, "grad_norm": 4.616994380950928, "learning_rate": 9.963013698630136e-07, "log_odds_chosen": -0.27089840173721313, "log_odds_ratio": -1.0014469623565674, "logits/chosen": -0.030504144728183746, "logits/rejected": -0.08286699652671814, "logps/chosen": -3.483492851257324, "logps/rejected": -3.184018611907959, "loss": 2.7414, "nll_loss": 2.641270160675049, "rewards/accuracies": 0.75, "rewards/chosen": -0.34834927320480347, "rewards/margins": -0.029947401955723763, "rewards/rejected": -0.31840184330940247, "step": 27 }, { "epoch": 0.07665982203969883, "grad_norm": 5.18505859375, "learning_rate": 9.961643835616438e-07, "log_odds_chosen": 0.009053587913513184, "log_odds_ratio": -0.8918295502662659, "logits/chosen": 0.12135280668735504, "logits/rejected": 0.15652324259281158, "logps/chosen": -3.3017497062683105, "logps/rejected": -3.28719425201416, "loss": 2.8426, "nll_loss": 2.753439426422119, "rewards/accuracies": 0.625, "rewards/chosen": -0.33017498254776, "rewards/margins": -0.0014555584639310837, "rewards/rejected": -0.32871943712234497, "step": 28 }, { "epoch": 0.07939767282683094, "grad_norm": 5.625412464141846, "learning_rate": 9.96027397260274e-07, "log_odds_chosen": -1.116824746131897, "log_odds_ratio": -1.553316354751587, "logits/chosen": 0.20381973683834076, "logits/rejected": 0.2962232232093811, "logps/chosen": -4.057847499847412, "logps/rejected": -2.955991268157959, "loss": 3.0801, "nll_loss": 2.924777030944824, "rewards/accuracies": 0.375, "rewards/chosen": -0.4057847559452057, "rewards/margins": -0.11018560826778412, "rewards/rejected": -0.29559916257858276, "step": 29 }, { "epoch": 0.08213552361396304, "grad_norm": 5.555380344390869, "learning_rate": 9.95890410958904e-07, "log_odds_chosen": -1.3266509771347046, "log_odds_ratio": -1.8842099905014038, "logits/chosen": 0.12565293908119202, "logits/rejected": 0.19540923833847046, "logps/chosen": -4.436695098876953, "logps/rejected": -3.100005626678467, "loss": 3.0048, "nll_loss": 2.81638765335083, "rewards/accuracies": 0.375, "rewards/chosen": -0.44366952776908875, "rewards/margins": -0.1336689442396164, "rewards/rejected": -0.31000056862831116, "step": 30 }, { "epoch": 0.08487337440109514, "grad_norm": 6.084508895874023, "learning_rate": 9.957534246575342e-07, "log_odds_chosen": -1.2154487371444702, "log_odds_ratio": -1.6096341609954834, "logits/chosen": 0.08194025605916977, "logits/rejected": 0.1756715476512909, "logps/chosen": -4.1222968101501465, "logps/rejected": -2.921145439147949, "loss": 2.965, "nll_loss": 2.8040614128112793, "rewards/accuracies": 0.25, "rewards/chosen": -0.4122296869754791, "rewards/margins": -0.12011513113975525, "rewards/rejected": -0.2921145558357239, "step": 31 }, { "epoch": 0.08761122518822724, "grad_norm": 5.244553565979004, "learning_rate": 9.956164383561644e-07, "log_odds_chosen": -0.22286808490753174, "log_odds_ratio": -1.5409315824508667, "logits/chosen": -0.007757712155580521, "logits/rejected": -0.04161946475505829, "logps/chosen": -3.93947434425354, "logps/rejected": -3.686946392059326, "loss": 2.9127, "nll_loss": 2.758647918701172, "rewards/accuracies": 0.25, "rewards/chosen": -0.39394742250442505, "rewards/margins": -0.025252796709537506, "rewards/rejected": -0.3686946630477905, "step": 32 }, { "epoch": 0.09034907597535935, "grad_norm": 4.770648956298828, "learning_rate": 9.954794520547944e-07, "log_odds_chosen": -0.15856331586837769, "log_odds_ratio": -0.8036017417907715, "logits/chosen": 0.0652264803647995, "logits/rejected": 0.05111066997051239, "logps/chosen": -2.926429271697998, "logps/rejected": -2.767559766769409, "loss": 2.7456, "nll_loss": 2.665252208709717, "rewards/accuracies": 0.5, "rewards/chosen": -0.2926429510116577, "rewards/margins": -0.015886951237916946, "rewards/rejected": -0.2767559885978699, "step": 33 }, { "epoch": 0.09308692676249145, "grad_norm": 4.830965518951416, "learning_rate": 9.953424657534246e-07, "log_odds_chosen": -0.16527973115444183, "log_odds_ratio": -0.9055407047271729, "logits/chosen": 0.19274450838565826, "logits/rejected": 0.11465256661176682, "logps/chosen": -3.424217700958252, "logps/rejected": -3.240734100341797, "loss": 2.7827, "nll_loss": 2.69211483001709, "rewards/accuracies": 0.75, "rewards/chosen": -0.3424217700958252, "rewards/margins": -0.01834835112094879, "rewards/rejected": -0.3240734040737152, "step": 34 }, { "epoch": 0.09582477754962354, "grad_norm": 4.708650588989258, "learning_rate": 9.952054794520548e-07, "log_odds_chosen": -0.5544571876525879, "log_odds_ratio": -1.3393467664718628, "logits/chosen": -0.1677793562412262, "logits/rejected": -0.1901005357503891, "logps/chosen": -3.5399529933929443, "logps/rejected": -2.9572417736053467, "loss": 2.7149, "nll_loss": 2.5810110569000244, "rewards/accuracies": 0.5, "rewards/chosen": -0.35399529337882996, "rewards/margins": -0.05827111750841141, "rewards/rejected": -0.29572415351867676, "step": 35 }, { "epoch": 0.09856262833675565, "grad_norm": 5.376469135284424, "learning_rate": 9.950684931506848e-07, "log_odds_chosen": -0.5475904941558838, "log_odds_ratio": -1.088226318359375, "logits/chosen": -0.0014420785009860992, "logits/rejected": 0.08472368866205215, "logps/chosen": -3.179635763168335, "logps/rejected": -2.636232852935791, "loss": 2.9045, "nll_loss": 2.7957191467285156, "rewards/accuracies": 0.375, "rewards/chosen": -0.3179636001586914, "rewards/margins": -0.054340340197086334, "rewards/rejected": -0.26362326741218567, "step": 36 }, { "epoch": 0.10130047912388775, "grad_norm": 4.777587413787842, "learning_rate": 9.94931506849315e-07, "log_odds_chosen": 0.4012875556945801, "log_odds_ratio": -0.573728621006012, "logits/chosen": -0.03620409220457077, "logits/rejected": -0.0462312288582325, "logps/chosen": -2.542311906814575, "logps/rejected": -2.865460157394409, "loss": 2.6426, "nll_loss": 2.5851995944976807, "rewards/accuracies": 0.75, "rewards/chosen": -0.25423121452331543, "rewards/margins": 0.03231482207775116, "rewards/rejected": -0.286545991897583, "step": 37 }, { "epoch": 0.10403832991101986, "grad_norm": 5.004133224487305, "learning_rate": 9.947945205479452e-07, "log_odds_chosen": -0.300178587436676, "log_odds_ratio": -1.024843692779541, "logits/chosen": 0.026658132672309875, "logits/rejected": 0.07710682600736618, "logps/chosen": -3.4007883071899414, "logps/rejected": -3.0612425804138184, "loss": 2.7823, "nll_loss": 2.679805040359497, "rewards/accuracies": 0.625, "rewards/chosen": -0.34007883071899414, "rewards/margins": -0.03395456820726395, "rewards/rejected": -0.3061242699623108, "step": 38 }, { "epoch": 0.10677618069815195, "grad_norm": 5.136014461517334, "learning_rate": 9.946575342465752e-07, "log_odds_chosen": -0.01612687110900879, "log_odds_ratio": -0.7921706438064575, "logits/chosen": -0.04480292648077011, "logits/rejected": -0.01441686600446701, "logps/chosen": -3.368060827255249, "logps/rejected": -3.3625738620758057, "loss": 2.9134, "nll_loss": 2.8341362476348877, "rewards/accuracies": 0.25, "rewards/chosen": -0.33680611848831177, "rewards/margins": -0.0005487147718667984, "rewards/rejected": -0.3362573981285095, "step": 39 }, { "epoch": 0.10951403148528405, "grad_norm": 5.379917621612549, "learning_rate": 9.945205479452054e-07, "log_odds_chosen": 0.05474239960312843, "log_odds_ratio": -0.7903103828430176, "logits/chosen": -0.04024696350097656, "logits/rejected": -0.030547045171260834, "logps/chosen": -3.2675392627716064, "logps/rejected": -3.2731070518493652, "loss": 2.8434, "nll_loss": 2.76436710357666, "rewards/accuracies": 0.5, "rewards/chosen": -0.3267539143562317, "rewards/margins": 0.0005567930638790131, "rewards/rejected": -0.3273107409477234, "step": 40 }, { "epoch": 0.11225188227241616, "grad_norm": 4.967020511627197, "learning_rate": 9.943835616438356e-07, "log_odds_chosen": -0.9217313528060913, "log_odds_ratio": -1.3594685792922974, "logits/chosen": -0.00897288229316473, "logits/rejected": -0.009242486208677292, "logps/chosen": -3.474071979522705, "logps/rejected": -2.5782723426818848, "loss": 2.807, "nll_loss": 2.671017646789551, "rewards/accuracies": 0.25, "rewards/chosen": -0.34740719199180603, "rewards/margins": -0.08957996964454651, "rewards/rejected": -0.2578272223472595, "step": 41 }, { "epoch": 0.11498973305954825, "grad_norm": 4.70627498626709, "learning_rate": 9.942465753424656e-07, "log_odds_chosen": 0.766663670539856, "log_odds_ratio": -0.47851717472076416, "logits/chosen": 0.048907842487096786, "logits/rejected": -0.04257715493440628, "logps/chosen": -2.892897605895996, "logps/rejected": -3.6046361923217773, "loss": 2.7379, "nll_loss": 2.6900839805603027, "rewards/accuracies": 0.75, "rewards/chosen": -0.28928980231285095, "rewards/margins": 0.07117386907339096, "rewards/rejected": -0.3604636490345001, "step": 42 }, { "epoch": 0.11772758384668036, "grad_norm": 5.1037821769714355, "learning_rate": 9.941095890410958e-07, "log_odds_chosen": -1.3064481019973755, "log_odds_ratio": -1.7622811794281006, "logits/chosen": -0.17422360181808472, "logits/rejected": -0.15908803045749664, "logps/chosen": -3.9083077907562256, "logps/rejected": -2.631472587585449, "loss": 2.8856, "nll_loss": 2.7094149589538574, "rewards/accuracies": 0.375, "rewards/chosen": -0.3908308148384094, "rewards/margins": -0.12768353521823883, "rewards/rejected": -0.2631472945213318, "step": 43 }, { "epoch": 0.12046543463381246, "grad_norm": 6.03995418548584, "learning_rate": 9.93972602739726e-07, "log_odds_chosen": -0.830864429473877, "log_odds_ratio": -1.669744849205017, "logits/chosen": 0.08528456091880798, "logits/rejected": 0.15812747180461884, "logps/chosen": -4.31163215637207, "logps/rejected": -3.4684176445007324, "loss": 2.923, "nll_loss": 2.7560529708862305, "rewards/accuracies": 0.375, "rewards/chosen": -0.4311631917953491, "rewards/margins": -0.08432147651910782, "rewards/rejected": -0.3468417525291443, "step": 44 }, { "epoch": 0.12320328542094455, "grad_norm": 5.839385032653809, "learning_rate": 9.938356164383562e-07, "log_odds_chosen": -0.6422004699707031, "log_odds_ratio": -1.1133497953414917, "logits/chosen": 0.027497777715325356, "logits/rejected": 0.10773377120494843, "logps/chosen": -3.43453311920166, "logps/rejected": -2.828002691268921, "loss": 2.8699, "nll_loss": 2.758568048477173, "rewards/accuracies": 0.25, "rewards/chosen": -0.3434533476829529, "rewards/margins": -0.06065305694937706, "rewards/rejected": -0.2828002870082855, "step": 45 }, { "epoch": 0.12594113620807665, "grad_norm": 4.990361213684082, "learning_rate": 9.936986301369862e-07, "log_odds_chosen": -0.8402199149131775, "log_odds_ratio": -1.3696062564849854, "logits/chosen": -0.02361050620675087, "logits/rejected": -0.03596203774213791, "logps/chosen": -3.735870122909546, "logps/rejected": -2.9160242080688477, "loss": 2.9015, "nll_loss": 2.7645320892333984, "rewards/accuracies": 0.125, "rewards/chosen": -0.37358707189559937, "rewards/margins": -0.08198460936546326, "rewards/rejected": -0.2916024327278137, "step": 46 }, { "epoch": 0.12867898699520877, "grad_norm": 5.279430866241455, "learning_rate": 9.935616438356164e-07, "log_odds_chosen": -0.5845677852630615, "log_odds_ratio": -1.1561214923858643, "logits/chosen": -0.009628333151340485, "logits/rejected": 0.058011867105960846, "logps/chosen": -2.930483341217041, "logps/rejected": -2.397038221359253, "loss": 2.8774, "nll_loss": 2.761819839477539, "rewards/accuracies": 0.5, "rewards/chosen": -0.29304835200309753, "rewards/margins": -0.053344517946243286, "rewards/rejected": -0.23970383405685425, "step": 47 }, { "epoch": 0.13141683778234087, "grad_norm": 4.762871265411377, "learning_rate": 9.934246575342466e-07, "log_odds_chosen": -0.1966322660446167, "log_odds_ratio": -0.9987320303916931, "logits/chosen": -0.06456788629293442, "logits/rejected": 0.0014803819358348846, "logps/chosen": -2.9770150184631348, "logps/rejected": -2.7751426696777344, "loss": 2.743, "nll_loss": 2.6431081295013428, "rewards/accuracies": 0.375, "rewards/chosen": -0.29770153760910034, "rewards/margins": -0.020187247544527054, "rewards/rejected": -0.2775142788887024, "step": 48 }, { "epoch": 0.13415468856947296, "grad_norm": 5.249630451202393, "learning_rate": 9.932876712328766e-07, "log_odds_chosen": 0.4115442633628845, "log_odds_ratio": -0.6845101714134216, "logits/chosen": -0.049117811024188995, "logits/rejected": -0.05753291770815849, "logps/chosen": -3.5740346908569336, "logps/rejected": -3.952871322631836, "loss": 2.8633, "nll_loss": 2.7948498725891113, "rewards/accuracies": 0.625, "rewards/chosen": -0.3574034869670868, "rewards/margins": 0.037883639335632324, "rewards/rejected": -0.3952871561050415, "step": 49 }, { "epoch": 0.13689253935660506, "grad_norm": 5.6214470863342285, "learning_rate": 9.931506849315068e-07, "log_odds_chosen": -0.4850051701068878, "log_odds_ratio": -1.181840419769287, "logits/chosen": 0.002927936613559723, "logits/rejected": 0.06007879972457886, "logps/chosen": -3.3396949768066406, "logps/rejected": -2.8460466861724854, "loss": 2.82, "nll_loss": 2.701768159866333, "rewards/accuracies": 0.625, "rewards/chosen": -0.33396950364112854, "rewards/margins": -0.049364835023880005, "rewards/rejected": -0.28460466861724854, "step": 50 }, { "epoch": 0.13963039014373715, "grad_norm": 4.872282028198242, "learning_rate": 9.93013698630137e-07, "log_odds_chosen": -0.2607291340827942, "log_odds_ratio": -0.937413215637207, "logits/chosen": 0.06424236297607422, "logits/rejected": 0.0776367336511612, "logps/chosen": -3.2114195823669434, "logps/rejected": -2.949260711669922, "loss": 2.8481, "nll_loss": 2.754389762878418, "rewards/accuracies": 0.5, "rewards/chosen": -0.32114195823669434, "rewards/margins": -0.02621588297188282, "rewards/rejected": -0.29492607712745667, "step": 51 }, { "epoch": 0.14236824093086928, "grad_norm": 5.802570343017578, "learning_rate": 9.928767123287672e-07, "log_odds_chosen": -1.4159520864486694, "log_odds_ratio": -2.0649375915527344, "logits/chosen": 0.09997137635946274, "logits/rejected": 0.06807305663824081, "logps/chosen": -4.404201507568359, "logps/rejected": -3.020540237426758, "loss": 2.9574, "nll_loss": 2.7508785724639893, "rewards/accuracies": 0.375, "rewards/chosen": -0.44042015075683594, "rewards/margins": -0.13836613297462463, "rewards/rejected": -0.3020540177822113, "step": 52 }, { "epoch": 0.14510609171800137, "grad_norm": 5.020723342895508, "learning_rate": 9.927397260273972e-07, "log_odds_chosen": 0.9747331738471985, "log_odds_ratio": -0.794732391834259, "logits/chosen": -0.09723836183547974, "logits/rejected": -0.09579548239707947, "logps/chosen": -3.019228458404541, "logps/rejected": -3.984806776046753, "loss": 2.8668, "nll_loss": 2.787306785583496, "rewards/accuracies": 0.25, "rewards/chosen": -0.30192288756370544, "rewards/margins": 0.09655781090259552, "rewards/rejected": -0.39848068356513977, "step": 53 }, { "epoch": 0.14784394250513347, "grad_norm": 5.808315753936768, "learning_rate": 9.926027397260274e-07, "log_odds_chosen": -1.0459648370742798, "log_odds_ratio": -1.62717604637146, "logits/chosen": 0.06377433985471725, "logits/rejected": 0.11266200244426727, "logps/chosen": -4.603038311004639, "logps/rejected": -3.5664730072021484, "loss": 3.0305, "nll_loss": 2.8677637577056885, "rewards/accuracies": 0.25, "rewards/chosen": -0.46030378341674805, "rewards/margins": -0.10365648567676544, "rewards/rejected": -0.3566473126411438, "step": 54 }, { "epoch": 0.15058179329226556, "grad_norm": 4.634006977081299, "learning_rate": 9.924657534246574e-07, "log_odds_chosen": 1.1705703735351562, "log_odds_ratio": -0.5544939041137695, "logits/chosen": -0.16598471999168396, "logits/rejected": -0.3104209005832672, "logps/chosen": -2.528529644012451, "logps/rejected": -3.622429609298706, "loss": 2.6652, "nll_loss": 2.609768867492676, "rewards/accuracies": 0.75, "rewards/chosen": -0.2528529763221741, "rewards/margins": 0.10938997566699982, "rewards/rejected": -0.3622429370880127, "step": 55 }, { "epoch": 0.15331964407939766, "grad_norm": 4.759185314178467, "learning_rate": 9.923287671232876e-07, "log_odds_chosen": -0.8514690399169922, "log_odds_ratio": -1.3893282413482666, "logits/chosen": -0.03901505842804909, "logits/rejected": -0.16699865460395813, "logps/chosen": -3.3034095764160156, "logps/rejected": -2.478149890899658, "loss": 2.7597, "nll_loss": 2.6207921504974365, "rewards/accuracies": 0.25, "rewards/chosen": -0.3303409814834595, "rewards/margins": -0.08252597600221634, "rewards/rejected": -0.24781496822834015, "step": 56 }, { "epoch": 0.15605749486652978, "grad_norm": 5.299474716186523, "learning_rate": 9.921917808219178e-07, "log_odds_chosen": -0.41471993923187256, "log_odds_ratio": -0.99680095911026, "logits/chosen": -0.007700156420469284, "logits/rejected": 0.05373016744852066, "logps/chosen": -3.3678712844848633, "logps/rejected": -2.964707374572754, "loss": 2.8343, "nll_loss": 2.734644889831543, "rewards/accuracies": 0.375, "rewards/chosen": -0.3367871046066284, "rewards/margins": -0.0403163880109787, "rewards/rejected": -0.2964707612991333, "step": 57 }, { "epoch": 0.15879534565366188, "grad_norm": 4.740145206451416, "learning_rate": 9.920547945205478e-07, "log_odds_chosen": 0.945258617401123, "log_odds_ratio": -0.5944814085960388, "logits/chosen": 0.03067988157272339, "logits/rejected": -0.027779471129179, "logps/chosen": -2.491730213165283, "logps/rejected": -3.3526663780212402, "loss": 2.6551, "nll_loss": 2.5956883430480957, "rewards/accuracies": 0.625, "rewards/chosen": -0.24917304515838623, "rewards/margins": 0.08609359711408615, "rewards/rejected": -0.335266649723053, "step": 58 }, { "epoch": 0.16153319644079397, "grad_norm": 5.079645156860352, "learning_rate": 9.91917808219178e-07, "log_odds_chosen": -1.1518454551696777, "log_odds_ratio": -1.6328136920928955, "logits/chosen": -0.06617017835378647, "logits/rejected": 0.004633462056517601, "logps/chosen": -3.817099094390869, "logps/rejected": -2.6831612586975098, "loss": 2.9273, "nll_loss": 2.76405668258667, "rewards/accuracies": 0.375, "rewards/chosen": -0.3817099332809448, "rewards/margins": -0.11339379101991653, "rewards/rejected": -0.2683161199092865, "step": 59 }, { "epoch": 0.16427104722792607, "grad_norm": 4.6303791999816895, "learning_rate": 9.917808219178082e-07, "log_odds_chosen": 0.7495785355567932, "log_odds_ratio": -0.5613210201263428, "logits/chosen": 0.08961384743452072, "logits/rejected": 0.043095022439956665, "logps/chosen": -2.934703826904297, "logps/rejected": -3.6256089210510254, "loss": 2.7203, "nll_loss": 2.66414213180542, "rewards/accuracies": 0.75, "rewards/chosen": -0.2934703826904297, "rewards/margins": 0.06909048557281494, "rewards/rejected": -0.36256086826324463, "step": 60 }, { "epoch": 0.16700889801505817, "grad_norm": 5.269148826599121, "learning_rate": 9.916438356164382e-07, "log_odds_chosen": -0.5651938915252686, "log_odds_ratio": -1.1093158721923828, "logits/chosen": -0.046633630990982056, "logits/rejected": -0.026720810681581497, "logps/chosen": -3.6148734092712402, "logps/rejected": -3.0776171684265137, "loss": 2.882, "nll_loss": 2.771111488342285, "rewards/accuracies": 0.375, "rewards/chosen": -0.36148732900619507, "rewards/margins": -0.05372562259435654, "rewards/rejected": -0.3077617287635803, "step": 61 }, { "epoch": 0.1697467488021903, "grad_norm": 4.633469104766846, "learning_rate": 9.915068493150684e-07, "log_odds_chosen": -0.3493087887763977, "log_odds_ratio": -1.0405735969543457, "logits/chosen": -0.17285121977329254, "logits/rejected": -0.1728096604347229, "logps/chosen": -2.7876622676849365, "logps/rejected": -2.43934965133667, "loss": 2.6827, "nll_loss": 2.578643321990967, "rewards/accuracies": 0.5, "rewards/chosen": -0.2787662148475647, "rewards/margins": -0.03483123704791069, "rewards/rejected": -0.2439349889755249, "step": 62 }, { "epoch": 0.17248459958932238, "grad_norm": 5.375319004058838, "learning_rate": 9.913698630136986e-07, "log_odds_chosen": -0.49574708938598633, "log_odds_ratio": -1.1880484819412231, "logits/chosen": -0.038310080766677856, "logits/rejected": -0.015774652361869812, "logps/chosen": -3.622703790664673, "logps/rejected": -3.085822582244873, "loss": 2.9163, "nll_loss": 2.7975032329559326, "rewards/accuracies": 0.375, "rewards/chosen": -0.36227038502693176, "rewards/margins": -0.053688131272792816, "rewards/rejected": -0.30858224630355835, "step": 63 }, { "epoch": 0.17522245037645448, "grad_norm": 4.979943752288818, "learning_rate": 9.912328767123286e-07, "log_odds_chosen": -0.5324112772941589, "log_odds_ratio": -1.1791377067565918, "logits/chosen": 0.005478162318468094, "logits/rejected": 0.01471768319606781, "logps/chosen": -3.391286611557007, "logps/rejected": -2.8274176120758057, "loss": 2.822, "nll_loss": 2.704129219055176, "rewards/accuracies": 0.375, "rewards/chosen": -0.33912867307662964, "rewards/margins": -0.05638689547777176, "rewards/rejected": -0.2827417552471161, "step": 64 }, { "epoch": 0.17796030116358658, "grad_norm": 4.801113128662109, "learning_rate": 9.910958904109588e-07, "log_odds_chosen": -0.29410529136657715, "log_odds_ratio": -1.0024876594543457, "logits/chosen": -0.03683172166347504, "logits/rejected": -0.08776921778917313, "logps/chosen": -2.7624337673187256, "logps/rejected": -2.4511752128601074, "loss": 2.6926, "nll_loss": 2.5923218727111816, "rewards/accuracies": 0.5, "rewards/chosen": -0.2762433886528015, "rewards/margins": -0.031125875189900398, "rewards/rejected": -0.24511751532554626, "step": 65 }, { "epoch": 0.1806981519507187, "grad_norm": 5.115155220031738, "learning_rate": 9.90958904109589e-07, "log_odds_chosen": 0.19909468293190002, "log_odds_ratio": -0.8237099647521973, "logits/chosen": 0.08649007976055145, "logits/rejected": 0.10353623330593109, "logps/chosen": -2.7367866039276123, "logps/rejected": -2.9189810752868652, "loss": 2.789, "nll_loss": 2.70664644241333, "rewards/accuracies": 0.375, "rewards/chosen": -0.27367866039276123, "rewards/margins": 0.0182194784283638, "rewards/rejected": -0.29189813137054443, "step": 66 }, { "epoch": 0.1834360027378508, "grad_norm": 5.098727226257324, "learning_rate": 9.90821917808219e-07, "log_odds_chosen": -0.5601481199264526, "log_odds_ratio": -1.2390750646591187, "logits/chosen": -0.0374976247549057, "logits/rejected": 0.009967952966690063, "logps/chosen": -3.327460289001465, "logps/rejected": -2.740292549133301, "loss": 2.9191, "nll_loss": 2.795185089111328, "rewards/accuracies": 0.5, "rewards/chosen": -0.3327460289001465, "rewards/margins": -0.05871676281094551, "rewards/rejected": -0.27402928471565247, "step": 67 }, { "epoch": 0.1861738535249829, "grad_norm": 4.784974575042725, "learning_rate": 9.906849315068492e-07, "log_odds_chosen": -0.0967981368303299, "log_odds_ratio": -0.8636622428894043, "logits/chosen": -0.0007414855062961578, "logits/rejected": 0.03983653709292412, "logps/chosen": -2.7849650382995605, "logps/rejected": -2.647350311279297, "loss": 2.6987, "nll_loss": 2.6123809814453125, "rewards/accuracies": 0.5, "rewards/chosen": -0.27849650382995605, "rewards/margins": -0.013761457055807114, "rewards/rejected": -0.26473504304885864, "step": 68 }, { "epoch": 0.188911704312115, "grad_norm": 5.103812217712402, "learning_rate": 9.905479452054794e-07, "log_odds_chosen": -0.18067681789398193, "log_odds_ratio": -0.8480179309844971, "logits/chosen": 7.328763604164124e-05, "logits/rejected": -0.05978984385728836, "logps/chosen": -3.0597193241119385, "logps/rejected": -2.8920857906341553, "loss": 2.7842, "nll_loss": 2.69943904876709, "rewards/accuracies": 0.375, "rewards/chosen": -0.3059719502925873, "rewards/margins": -0.016763346269726753, "rewards/rejected": -0.2892085909843445, "step": 69 }, { "epoch": 0.19164955509924708, "grad_norm": 4.620169162750244, "learning_rate": 9.904109589041094e-07, "log_odds_chosen": 0.2543816864490509, "log_odds_ratio": -0.8889391422271729, "logits/chosen": 0.14395776391029358, "logits/rejected": -0.02969476580619812, "logps/chosen": -2.478407859802246, "logps/rejected": -2.6635775566101074, "loss": 2.6435, "nll_loss": 2.554600715637207, "rewards/accuracies": 0.625, "rewards/chosen": -0.24784080684185028, "rewards/margins": 0.018516942858695984, "rewards/rejected": -0.26635774970054626, "step": 70 }, { "epoch": 0.1943874058863792, "grad_norm": 4.287913799285889, "learning_rate": 9.902739726027396e-07, "log_odds_chosen": 0.4051719605922699, "log_odds_ratio": -0.6227778792381287, "logits/chosen": 0.07164830714464188, "logits/rejected": -0.014817915856838226, "logps/chosen": -2.1777026653289795, "logps/rejected": -2.5471317768096924, "loss": 2.6384, "nll_loss": 2.576085090637207, "rewards/accuracies": 0.75, "rewards/chosen": -0.2177702784538269, "rewards/margins": 0.03694292902946472, "rewards/rejected": -0.25471317768096924, "step": 71 }, { "epoch": 0.1971252566735113, "grad_norm": 5.193297863006592, "learning_rate": 9.901369863013698e-07, "log_odds_chosen": 0.02839311957359314, "log_odds_ratio": -1.0304545164108276, "logits/chosen": -0.11193174123764038, "logits/rejected": -0.04097743332386017, "logps/chosen": -3.190248966217041, "logps/rejected": -3.2084596157073975, "loss": 2.7795, "nll_loss": 2.676492214202881, "rewards/accuracies": 0.625, "rewards/chosen": -0.3190248906612396, "rewards/margins": 0.0018210597336292267, "rewards/rejected": -0.32084596157073975, "step": 72 }, { "epoch": 0.1998631074606434, "grad_norm": 4.932231426239014, "learning_rate": 9.9e-07, "log_odds_chosen": -0.2566635012626648, "log_odds_ratio": -0.9549156427383423, "logits/chosen": 0.013712354004383087, "logits/rejected": -0.018330127000808716, "logps/chosen": -3.356940746307373, "logps/rejected": -3.0623703002929688, "loss": 2.7408, "nll_loss": 2.6452670097351074, "rewards/accuracies": 0.5, "rewards/chosen": -0.3356940746307373, "rewards/margins": -0.029457030817866325, "rewards/rejected": -0.30623701214790344, "step": 73 }, { "epoch": 0.2026009582477755, "grad_norm": 4.39111328125, "learning_rate": 9.8986301369863e-07, "log_odds_chosen": -0.03362557291984558, "log_odds_ratio": -0.7703865170478821, "logits/chosen": -0.11751857399940491, "logits/rejected": -0.14538010954856873, "logps/chosen": -2.632535457611084, "logps/rejected": -2.593116044998169, "loss": 2.6399, "nll_loss": 2.562824010848999, "rewards/accuracies": 0.625, "rewards/chosen": -0.2632535696029663, "rewards/margins": -0.003941958770155907, "rewards/rejected": -0.25931161642074585, "step": 74 }, { "epoch": 0.2053388090349076, "grad_norm": 5.217278957366943, "learning_rate": 9.897260273972602e-07, "log_odds_chosen": -0.5287898182868958, "log_odds_ratio": -1.1839042901992798, "logits/chosen": 0.1059274673461914, "logits/rejected": 0.1623789519071579, "logps/chosen": -3.6449809074401855, "logps/rejected": -3.11883282661438, "loss": 2.9421, "nll_loss": 2.823728322982788, "rewards/accuracies": 0.375, "rewards/chosen": -0.3644981384277344, "rewards/margins": -0.05261482298374176, "rewards/rejected": -0.3118833005428314, "step": 75 }, { "epoch": 0.2080766598220397, "grad_norm": 4.917469024658203, "learning_rate": 9.895890410958905e-07, "log_odds_chosen": -0.47445833683013916, "log_odds_ratio": -1.1590181589126587, "logits/chosen": 0.033186182379722595, "logits/rejected": 0.018955573439598083, "logps/chosen": -3.5408554077148438, "logps/rejected": -3.055980682373047, "loss": 2.7875, "nll_loss": 2.6716060638427734, "rewards/accuracies": 0.375, "rewards/chosen": -0.3540855646133423, "rewards/margins": -0.04848747327923775, "rewards/rejected": -0.30559808015823364, "step": 76 }, { "epoch": 0.2108145106091718, "grad_norm": 5.297628879547119, "learning_rate": 9.894520547945204e-07, "log_odds_chosen": -0.25846046209335327, "log_odds_ratio": -0.9694849252700806, "logits/chosen": 0.06071566045284271, "logits/rejected": 0.10773549973964691, "logps/chosen": -3.450127363204956, "logps/rejected": -3.179384708404541, "loss": 2.8491, "nll_loss": 2.752110481262207, "rewards/accuracies": 0.5, "rewards/chosen": -0.3450127840042114, "rewards/margins": -0.027074294164776802, "rewards/rejected": -0.3179384469985962, "step": 77 }, { "epoch": 0.2135523613963039, "grad_norm": 4.880711078643799, "learning_rate": 9.893150684931507e-07, "log_odds_chosen": 0.12524116039276123, "log_odds_ratio": -0.770504355430603, "logits/chosen": 0.12420349568128586, "logits/rejected": 0.06585845351219177, "logps/chosen": -2.640308380126953, "logps/rejected": -2.758509635925293, "loss": 2.7117, "nll_loss": 2.634650945663452, "rewards/accuracies": 0.5, "rewards/chosen": -0.2640308141708374, "rewards/margins": 0.011820141226053238, "rewards/rejected": -0.27585095167160034, "step": 78 }, { "epoch": 0.216290212183436, "grad_norm": 5.261929512023926, "learning_rate": 9.891780821917809e-07, "log_odds_chosen": -0.3042720854282379, "log_odds_ratio": -0.9870287179946899, "logits/chosen": -0.08786577731370926, "logits/rejected": 0.0005651041865348816, "logps/chosen": -3.7235324382781982, "logps/rejected": -3.414074420928955, "loss": 2.8373, "nll_loss": 2.738576889038086, "rewards/accuracies": 0.375, "rewards/chosen": -0.3723532557487488, "rewards/margins": -0.030945800244808197, "rewards/rejected": -0.3414074778556824, "step": 79 }, { "epoch": 0.2190280629705681, "grad_norm": 5.4090189933776855, "learning_rate": 9.89041095890411e-07, "log_odds_chosen": -0.9020189642906189, "log_odds_ratio": -1.5104128122329712, "logits/chosen": -0.056990668177604675, "logits/rejected": 0.009287230670452118, "logps/chosen": -3.726689100265503, "logps/rejected": -2.829519748687744, "loss": 2.8678, "nll_loss": 2.7167563438415527, "rewards/accuracies": 0.25, "rewards/chosen": -0.37266892194747925, "rewards/margins": -0.08971692621707916, "rewards/rejected": -0.2829520106315613, "step": 80 }, { "epoch": 0.22176591375770022, "grad_norm": 5.040421485900879, "learning_rate": 9.88904109589041e-07, "log_odds_chosen": 0.17196570336818695, "log_odds_ratio": -0.8140729069709778, "logits/chosen": -0.1289421021938324, "logits/rejected": -0.023923255503177643, "logps/chosen": -2.91805362701416, "logps/rejected": -3.09287428855896, "loss": 2.8344, "nll_loss": 2.752990961074829, "rewards/accuracies": 0.375, "rewards/chosen": -0.29180535674095154, "rewards/margins": 0.017482059076428413, "rewards/rejected": -0.309287428855896, "step": 81 }, { "epoch": 0.2245037645448323, "grad_norm": 5.287463665008545, "learning_rate": 9.887671232876713e-07, "log_odds_chosen": -1.0638575553894043, "log_odds_ratio": -2.001777410507202, "logits/chosen": 0.03115115314722061, "logits/rejected": -0.018674377351999283, "logps/chosen": -4.465646266937256, "logps/rejected": -3.3901572227478027, "loss": 3.0029, "nll_loss": 2.8027565479278564, "rewards/accuracies": 0.625, "rewards/chosen": -0.44656461477279663, "rewards/margins": -0.10754888504743576, "rewards/rejected": -0.3390157222747803, "step": 82 }, { "epoch": 0.2272416153319644, "grad_norm": 5.129518032073975, "learning_rate": 9.886301369863015e-07, "log_odds_chosen": -0.6979171633720398, "log_odds_ratio": -1.3708760738372803, "logits/chosen": 0.055020906031131744, "logits/rejected": 0.08702057600021362, "logps/chosen": -3.4260592460632324, "logps/rejected": -2.7058603763580322, "loss": 2.8338, "nll_loss": 2.6967077255249023, "rewards/accuracies": 0.5, "rewards/chosen": -0.34260594844818115, "rewards/margins": -0.07201991975307465, "rewards/rejected": -0.2705860435962677, "step": 83 }, { "epoch": 0.2299794661190965, "grad_norm": 5.024446964263916, "learning_rate": 9.884931506849315e-07, "log_odds_chosen": -0.0332673117518425, "log_odds_ratio": -0.7592968940734863, "logits/chosen": 0.1057756319642067, "logits/rejected": 0.16298477351665497, "logps/chosen": -2.788316011428833, "logps/rejected": -2.76326584815979, "loss": 2.7548, "nll_loss": 2.6788582801818848, "rewards/accuracies": 0.5, "rewards/chosen": -0.2788316011428833, "rewards/margins": -0.0025050099939107895, "rewards/rejected": -0.27632659673690796, "step": 84 }, { "epoch": 0.2327173169062286, "grad_norm": 4.511392116546631, "learning_rate": 9.883561643835617e-07, "log_odds_chosen": 0.0654553472995758, "log_odds_ratio": -0.7500054240226746, "logits/chosen": -0.06730129569768906, "logits/rejected": 0.003128785640001297, "logps/chosen": -2.4824235439300537, "logps/rejected": -2.531212568283081, "loss": 2.6809, "nll_loss": 2.605944871902466, "rewards/accuracies": 0.625, "rewards/chosen": -0.2482423484325409, "rewards/margins": 0.004878915846347809, "rewards/rejected": -0.2531212866306305, "step": 85 }, { "epoch": 0.23545516769336072, "grad_norm": 5.205360412597656, "learning_rate": 9.882191780821917e-07, "log_odds_chosen": -0.7202206254005432, "log_odds_ratio": -1.56636643409729, "logits/chosen": 0.06956939399242401, "logits/rejected": 0.10934953391551971, "logps/chosen": -3.512958288192749, "logps/rejected": -2.7372517585754395, "loss": 2.7981, "nll_loss": 2.641427516937256, "rewards/accuracies": 0.625, "rewards/chosen": -0.3512958288192749, "rewards/margins": -0.07757064700126648, "rewards/rejected": -0.2737252116203308, "step": 86 }, { "epoch": 0.23819301848049282, "grad_norm": 5.132618427276611, "learning_rate": 9.880821917808219e-07, "log_odds_chosen": -0.3672151565551758, "log_odds_ratio": -0.9749840497970581, "logits/chosen": 0.11470680683851242, "logits/rejected": 0.14767548441886902, "logps/chosen": -2.923811674118042, "logps/rejected": -2.565427780151367, "loss": 2.7437, "nll_loss": 2.646186590194702, "rewards/accuracies": 0.375, "rewards/chosen": -0.2923811674118042, "rewards/margins": -0.03583836928009987, "rewards/rejected": -0.25654280185699463, "step": 87 }, { "epoch": 0.24093086926762491, "grad_norm": 5.583461284637451, "learning_rate": 9.87945205479452e-07, "log_odds_chosen": 0.10623973608016968, "log_odds_ratio": -1.0295180082321167, "logits/chosen": 0.001868486637249589, "logits/rejected": 0.08350097388029099, "logps/chosen": -3.3299827575683594, "logps/rejected": -3.424865484237671, "loss": 2.8447, "nll_loss": 2.7417185306549072, "rewards/accuracies": 0.625, "rewards/chosen": -0.33299827575683594, "rewards/margins": 0.009488269686698914, "rewards/rejected": -0.34248653054237366, "step": 88 }, { "epoch": 0.243668720054757, "grad_norm": 5.216099262237549, "learning_rate": 9.87808219178082e-07, "log_odds_chosen": 0.956062912940979, "log_odds_ratio": -0.8573809862136841, "logits/chosen": 0.062470801174640656, "logits/rejected": -0.0008219331502914429, "logps/chosen": -2.9861812591552734, "logps/rejected": -3.89156174659729, "loss": 2.6854, "nll_loss": 2.599694013595581, "rewards/accuracies": 0.75, "rewards/chosen": -0.2986181378364563, "rewards/margins": 0.09053808450698853, "rewards/rejected": -0.3891562223434448, "step": 89 }, { "epoch": 0.2464065708418891, "grad_norm": 5.420902729034424, "learning_rate": 9.876712328767123e-07, "log_odds_chosen": -0.49113789200782776, "log_odds_ratio": -1.067264199256897, "logits/chosen": 0.04301543906331062, "logits/rejected": 0.057302989065647125, "logps/chosen": -3.426982879638672, "logps/rejected": -2.9275200366973877, "loss": 2.8039, "nll_loss": 2.6971592903137207, "rewards/accuracies": 0.25, "rewards/chosen": -0.3426983058452606, "rewards/margins": -0.049946270883083344, "rewards/rejected": -0.2927520275115967, "step": 90 }, { "epoch": 0.24914442162902123, "grad_norm": 4.983608245849609, "learning_rate": 9.875342465753425e-07, "log_odds_chosen": -0.03193660080432892, "log_odds_ratio": -0.8275931477546692, "logits/chosen": -0.030316002666950226, "logits/rejected": -0.062139466404914856, "logps/chosen": -2.686675786972046, "logps/rejected": -2.6589536666870117, "loss": 2.7378, "nll_loss": 2.655036211013794, "rewards/accuracies": 0.375, "rewards/chosen": -0.268667608499527, "rewards/margins": -0.002772213891148567, "rewards/rejected": -0.26589539647102356, "step": 91 }, { "epoch": 0.2518822724161533, "grad_norm": 5.045980930328369, "learning_rate": 9.873972602739725e-07, "log_odds_chosen": 0.11022639274597168, "log_odds_ratio": -0.8162941336631775, "logits/chosen": 0.22268152236938477, "logits/rejected": 0.24221082031726837, "logps/chosen": -2.994019031524658, "logps/rejected": -3.1171507835388184, "loss": 2.7107, "nll_loss": 2.629117012023926, "rewards/accuracies": 0.5, "rewards/chosen": -0.2994019389152527, "rewards/margins": 0.012313159182667732, "rewards/rejected": -0.31171509623527527, "step": 92 }, { "epoch": 0.2546201232032854, "grad_norm": 5.231735706329346, "learning_rate": 9.872602739726027e-07, "log_odds_chosen": -0.5580106973648071, "log_odds_ratio": -1.0977122783660889, "logits/chosen": -0.1160627156496048, "logits/rejected": 0.0913223922252655, "logps/chosen": -2.725052833557129, "logps/rejected": -2.1884145736694336, "loss": 2.8625, "nll_loss": 2.7526869773864746, "rewards/accuracies": 0.375, "rewards/chosen": -0.2725052833557129, "rewards/margins": -0.05366380512714386, "rewards/rejected": -0.21884146332740784, "step": 93 }, { "epoch": 0.25735797399041754, "grad_norm": 4.752773284912109, "learning_rate": 9.871232876712329e-07, "log_odds_chosen": 0.10502710193395615, "log_odds_ratio": -0.7220371961593628, "logits/chosen": 0.027543261647224426, "logits/rejected": 0.029822111129760742, "logps/chosen": -3.005152463912964, "logps/rejected": -3.0924625396728516, "loss": 2.7312, "nll_loss": 2.6590442657470703, "rewards/accuracies": 0.625, "rewards/chosen": -0.3005152642726898, "rewards/margins": 0.00873100571334362, "rewards/rejected": -0.3092462420463562, "step": 94 }, { "epoch": 0.2600958247775496, "grad_norm": 5.6471686363220215, "learning_rate": 9.869863013698629e-07, "log_odds_chosen": -0.769503116607666, "log_odds_ratio": -1.321831464767456, "logits/chosen": 0.01791486144065857, "logits/rejected": 0.06301550567150116, "logps/chosen": -4.087765693664551, "logps/rejected": -3.3439652919769287, "loss": 2.8972, "nll_loss": 2.7650389671325684, "rewards/accuracies": 0.25, "rewards/chosen": -0.40877655148506165, "rewards/margins": -0.07438001781702042, "rewards/rejected": -0.3343965411186218, "step": 95 }, { "epoch": 0.26283367556468173, "grad_norm": 4.5243635177612305, "learning_rate": 9.86849315068493e-07, "log_odds_chosen": 0.21530264616012573, "log_odds_ratio": -0.6282068490982056, "logits/chosen": 0.059582002460956573, "logits/rejected": 0.0008949711918830872, "logps/chosen": -2.4007225036621094, "logps/rejected": -2.5665109157562256, "loss": 2.6463, "nll_loss": 2.5835113525390625, "rewards/accuracies": 0.625, "rewards/chosen": -0.24007226526737213, "rewards/margins": 0.01657884381711483, "rewards/rejected": -0.2566511034965515, "step": 96 }, { "epoch": 0.2655715263518138, "grad_norm": 4.62525749206543, "learning_rate": 9.867123287671233e-07, "log_odds_chosen": -0.01519484631717205, "log_odds_ratio": -0.7411022186279297, "logits/chosen": -0.17577514052391052, "logits/rejected": -0.1749776154756546, "logps/chosen": -3.4747636318206787, "logps/rejected": -3.4520061016082764, "loss": 2.5813, "nll_loss": 2.5071566104888916, "rewards/accuracies": 0.625, "rewards/chosen": -0.34747636318206787, "rewards/margins": -0.002275770530104637, "rewards/rejected": -0.3452005982398987, "step": 97 }, { "epoch": 0.2683093771389459, "grad_norm": 4.982630729675293, "learning_rate": 9.865753424657533e-07, "log_odds_chosen": -0.8919167518615723, "log_odds_ratio": -1.3801298141479492, "logits/chosen": -0.06764490902423859, "logits/rejected": -0.05804187431931496, "logps/chosen": -3.5652167797088623, "logps/rejected": -2.689534902572632, "loss": 2.7677, "nll_loss": 2.62972092628479, "rewards/accuracies": 0.25, "rewards/chosen": -0.35652169585227966, "rewards/margins": -0.08756820857524872, "rewards/rejected": -0.26895347237586975, "step": 98 }, { "epoch": 0.27104722792607805, "grad_norm": 4.980642795562744, "learning_rate": 9.864383561643835e-07, "log_odds_chosen": 0.11508647352457047, "log_odds_ratio": -0.8868616819381714, "logits/chosen": -0.06694584339857101, "logits/rejected": -0.06551503390073776, "logps/chosen": -3.2617666721343994, "logps/rejected": -3.3355705738067627, "loss": 2.7203, "nll_loss": 2.6315722465515137, "rewards/accuracies": 0.75, "rewards/chosen": -0.32617664337158203, "rewards/margins": 0.007380392402410507, "rewards/rejected": -0.33355703949928284, "step": 99 }, { "epoch": 0.2737850787132101, "grad_norm": 5.253653526306152, "learning_rate": 9.863013698630137e-07, "log_odds_chosen": -0.4490603804588318, "log_odds_ratio": -1.0722631216049194, "logits/chosen": -0.061398696154356, "logits/rejected": -0.07815108448266983, "logps/chosen": -3.224452018737793, "logps/rejected": -2.7689168453216553, "loss": 2.7139, "nll_loss": 2.6066906452178955, "rewards/accuracies": 0.375, "rewards/chosen": -0.32244521379470825, "rewards/margins": -0.045553527772426605, "rewards/rejected": -0.27689167857170105, "step": 100 }, { "epoch": 0.27652292950034224, "grad_norm": 4.483826637268066, "learning_rate": 9.861643835616437e-07, "log_odds_chosen": 0.7477086782455444, "log_odds_ratio": -0.7263435125350952, "logits/chosen": 0.042984604835510254, "logits/rejected": -0.07989447563886642, "logps/chosen": -2.4423258304595947, "logps/rejected": -3.155019998550415, "loss": 2.6873, "nll_loss": 2.6146914958953857, "rewards/accuracies": 0.625, "rewards/chosen": -0.24423259496688843, "rewards/margins": 0.07126940786838531, "rewards/rejected": -0.31550198793411255, "step": 101 }, { "epoch": 0.2792607802874743, "grad_norm": 4.933300495147705, "learning_rate": 9.860273972602739e-07, "log_odds_chosen": -0.1567578911781311, "log_odds_ratio": -1.1467907428741455, "logits/chosen": -0.09643837809562683, "logits/rejected": -0.08139074593782425, "logps/chosen": -3.6488397121429443, "logps/rejected": -3.4997000694274902, "loss": 2.74, "nll_loss": 2.6252858638763428, "rewards/accuracies": 0.5, "rewards/chosen": -0.3648839592933655, "rewards/margins": -0.014913981780409813, "rewards/rejected": -0.3499699831008911, "step": 102 }, { "epoch": 0.28199863107460643, "grad_norm": 4.452278137207031, "learning_rate": 9.85890410958904e-07, "log_odds_chosen": 0.14342878758907318, "log_odds_ratio": -0.7878068685531616, "logits/chosen": -0.10493747889995575, "logits/rejected": -0.1662173569202423, "logps/chosen": -2.5139431953430176, "logps/rejected": -2.651623249053955, "loss": 2.6467, "nll_loss": 2.5679428577423096, "rewards/accuracies": 0.5, "rewards/chosen": -0.2513943314552307, "rewards/margins": 0.013768007978796959, "rewards/rejected": -0.26516231894493103, "step": 103 }, { "epoch": 0.28473648186173856, "grad_norm": 4.8789381980896, "learning_rate": 9.857534246575343e-07, "log_odds_chosen": -0.5268964767456055, "log_odds_ratio": -1.2786792516708374, "logits/chosen": 0.07942096143960953, "logits/rejected": 0.030651427805423737, "logps/chosen": -3.6572721004486084, "logps/rejected": -3.091378688812256, "loss": 2.6929, "nll_loss": 2.565030813217163, "rewards/accuracies": 0.5, "rewards/chosen": -0.3657272458076477, "rewards/margins": -0.05658937990665436, "rewards/rejected": -0.30913788080215454, "step": 104 }, { "epoch": 0.2874743326488706, "grad_norm": 4.420632362365723, "learning_rate": 9.856164383561643e-07, "log_odds_chosen": -0.1668822318315506, "log_odds_ratio": -0.9598227739334106, "logits/chosen": 0.17732149362564087, "logits/rejected": 0.05356922745704651, "logps/chosen": -2.7542171478271484, "logps/rejected": -2.562817335128784, "loss": 2.5811, "nll_loss": 2.4851369857788086, "rewards/accuracies": 0.5, "rewards/chosen": -0.27542170882225037, "rewards/margins": -0.01913994736969471, "rewards/rejected": -0.2562817633152008, "step": 105 }, { "epoch": 0.29021218343600275, "grad_norm": 5.501986980438232, "learning_rate": 9.854794520547945e-07, "log_odds_chosen": -0.5716249346733093, "log_odds_ratio": -1.1267179250717163, "logits/chosen": 0.09042792022228241, "logits/rejected": 0.11897275596857071, "logps/chosen": -3.712156057357788, "logps/rejected": -3.140970230102539, "loss": 2.8137, "nll_loss": 2.70100736618042, "rewards/accuracies": 0.375, "rewards/chosen": -0.3712155818939209, "rewards/margins": -0.05711854249238968, "rewards/rejected": -0.3140970468521118, "step": 106 }, { "epoch": 0.2929500342231348, "grad_norm": 4.338537693023682, "learning_rate": 9.853424657534247e-07, "log_odds_chosen": -0.38486310839653015, "log_odds_ratio": -1.0052354335784912, "logits/chosen": -0.10509517788887024, "logits/rejected": -0.11840923875570297, "logps/chosen": -2.519534111022949, "logps/rejected": -2.1192216873168945, "loss": 2.5284, "nll_loss": 2.427903413772583, "rewards/accuracies": 0.625, "rewards/chosen": -0.2519534230232239, "rewards/margins": -0.04003123566508293, "rewards/rejected": -0.21192218363285065, "step": 107 }, { "epoch": 0.29568788501026694, "grad_norm": 5.557394027709961, "learning_rate": 9.852054794520547e-07, "log_odds_chosen": -0.4685130715370178, "log_odds_ratio": -1.0829358100891113, "logits/chosen": 0.10997374355792999, "logits/rejected": 0.17667467892169952, "logps/chosen": -3.775928020477295, "logps/rejected": -3.307669162750244, "loss": 2.7709, "nll_loss": 2.6626434326171875, "rewards/accuracies": 0.5, "rewards/chosen": -0.3775928020477295, "rewards/margins": -0.04682587832212448, "rewards/rejected": -0.3307669460773468, "step": 108 }, { "epoch": 0.29842573579739906, "grad_norm": 4.504358291625977, "learning_rate": 9.850684931506849e-07, "log_odds_chosen": 0.03743825852870941, "log_odds_ratio": -0.8008598685264587, "logits/chosen": -0.04750660061836243, "logits/rejected": -0.03715944662690163, "logps/chosen": -2.444415807723999, "logps/rejected": -2.459193229675293, "loss": 2.6587, "nll_loss": 2.578657627105713, "rewards/accuracies": 0.625, "rewards/chosen": -0.24444158375263214, "rewards/margins": 0.0014777369797229767, "rewards/rejected": -0.24591931700706482, "step": 109 }, { "epoch": 0.30116358658453113, "grad_norm": 5.34977388381958, "learning_rate": 9.84931506849315e-07, "log_odds_chosen": -0.06839463114738464, "log_odds_ratio": -0.8368319869041443, "logits/chosen": -0.05776354670524597, "logits/rejected": 0.03446725383400917, "logps/chosen": -3.524332046508789, "logps/rejected": -3.4574594497680664, "loss": 2.7195, "nll_loss": 2.635850429534912, "rewards/accuracies": 0.375, "rewards/chosen": -0.3524332046508789, "rewards/margins": -0.006687266752123833, "rewards/rejected": -0.3457459509372711, "step": 110 }, { "epoch": 0.30390143737166325, "grad_norm": 4.321720600128174, "learning_rate": 9.847945205479453e-07, "log_odds_chosen": 0.11856560409069061, "log_odds_ratio": -0.6615714430809021, "logits/chosen": -0.0659106969833374, "logits/rejected": -0.15277716517448425, "logps/chosen": -2.587048053741455, "logps/rejected": -2.684950590133667, "loss": 2.5726, "nll_loss": 2.506462812423706, "rewards/accuracies": 0.5, "rewards/chosen": -0.25870481133461, "rewards/margins": 0.00979025661945343, "rewards/rejected": -0.2684950530529022, "step": 111 }, { "epoch": 0.3066392881587953, "grad_norm": 5.395603179931641, "learning_rate": 9.846575342465753e-07, "log_odds_chosen": -1.1915748119354248, "log_odds_ratio": -1.7184953689575195, "logits/chosen": 0.05429256707429886, "logits/rejected": 0.11709930002689362, "logps/chosen": -4.21763801574707, "logps/rejected": -3.039224863052368, "loss": 2.8646, "nll_loss": 2.6927082538604736, "rewards/accuracies": 0.5, "rewards/chosen": -0.4217638075351715, "rewards/margins": -0.11784131824970245, "rewards/rejected": -0.30392247438430786, "step": 112 }, { "epoch": 0.30937713894592744, "grad_norm": 4.265131950378418, "learning_rate": 9.845205479452055e-07, "log_odds_chosen": 0.8515306711196899, "log_odds_ratio": -0.6545284986495972, "logits/chosen": -0.10622307658195496, "logits/rejected": -0.229547381401062, "logps/chosen": -2.3494975566864014, "logps/rejected": -3.1760082244873047, "loss": 2.5491, "nll_loss": 2.483647108078003, "rewards/accuracies": 0.5, "rewards/chosen": -0.2349497526884079, "rewards/margins": 0.08265107125043869, "rewards/rejected": -0.317600816488266, "step": 113 }, { "epoch": 0.31211498973305957, "grad_norm": 5.450100421905518, "learning_rate": 9.843835616438357e-07, "log_odds_chosen": -0.5729635953903198, "log_odds_ratio": -1.1194804906845093, "logits/chosen": 0.04684761166572571, "logits/rejected": 0.050321437418460846, "logps/chosen": -3.6830523014068604, "logps/rejected": -3.1057400703430176, "loss": 2.7732, "nll_loss": 2.661208152770996, "rewards/accuracies": 0.375, "rewards/chosen": -0.3683052659034729, "rewards/margins": -0.05773124098777771, "rewards/rejected": -0.3105739951133728, "step": 114 }, { "epoch": 0.31485284052019163, "grad_norm": 5.810391426086426, "learning_rate": 9.842465753424657e-07, "log_odds_chosen": -1.3955930471420288, "log_odds_ratio": -1.828195571899414, "logits/chosen": 0.06888475269079208, "logits/rejected": 0.2026725858449936, "logps/chosen": -4.463937759399414, "logps/rejected": -3.106631278991699, "loss": 2.9419, "nll_loss": 2.759053945541382, "rewards/accuracies": 0.125, "rewards/chosen": -0.44639381766319275, "rewards/margins": -0.13573068380355835, "rewards/rejected": -0.310663104057312, "step": 115 }, { "epoch": 0.31759069130732376, "grad_norm": 5.16304874420166, "learning_rate": 9.84109589041096e-07, "log_odds_chosen": 0.05272068828344345, "log_odds_ratio": -0.7985025644302368, "logits/chosen": 0.09004530310630798, "logits/rejected": 0.08997360616922379, "logps/chosen": -2.6813013553619385, "logps/rejected": -2.7313356399536133, "loss": 2.7084, "nll_loss": 2.6285135746002197, "rewards/accuracies": 0.375, "rewards/chosen": -0.2681301534175873, "rewards/margins": 0.005003403872251511, "rewards/rejected": -0.2731335461139679, "step": 116 }, { "epoch": 0.3203285420944558, "grad_norm": 4.60917329788208, "learning_rate": 9.83972602739726e-07, "log_odds_chosen": 0.022993117570877075, "log_odds_ratio": -0.7625960111618042, "logits/chosen": 0.06414984166622162, "logits/rejected": 0.0010253600776195526, "logps/chosen": -2.910116672515869, "logps/rejected": -2.9248976707458496, "loss": 2.6938, "nll_loss": 2.6175668239593506, "rewards/accuracies": 0.5, "rewards/chosen": -0.29101166129112244, "rewards/margins": 0.001478102058172226, "rewards/rejected": -0.29248976707458496, "step": 117 }, { "epoch": 0.32306639288158795, "grad_norm": 5.327839374542236, "learning_rate": 9.83835616438356e-07, "log_odds_chosen": -0.39497947692871094, "log_odds_ratio": -1.0258395671844482, "logits/chosen": 0.015714827924966812, "logits/rejected": 0.09488728642463684, "logps/chosen": -3.920980453491211, "logps/rejected": -3.5239651203155518, "loss": 2.7688, "nll_loss": 2.6662402153015137, "rewards/accuracies": 0.5, "rewards/chosen": -0.392098069190979, "rewards/margins": -0.03970152139663696, "rewards/rejected": -0.35239651799201965, "step": 118 }, { "epoch": 0.3258042436687201, "grad_norm": 5.367196559906006, "learning_rate": 9.836986301369863e-07, "log_odds_chosen": -0.49583807587623596, "log_odds_ratio": -1.272647738456726, "logits/chosen": 0.08628401160240173, "logits/rejected": 0.07546932250261307, "logps/chosen": -3.7321391105651855, "logps/rejected": -3.230827808380127, "loss": 2.7918, "nll_loss": 2.6645350456237793, "rewards/accuracies": 0.5, "rewards/chosen": -0.37321388721466064, "rewards/margins": -0.050131119787693024, "rewards/rejected": -0.3230827748775482, "step": 119 }, { "epoch": 0.32854209445585214, "grad_norm": 5.0620646476745605, "learning_rate": 9.835616438356163e-07, "log_odds_chosen": -1.1428886651992798, "log_odds_ratio": -1.5980234146118164, "logits/chosen": -0.1458549201488495, "logits/rejected": -0.12412634491920471, "logps/chosen": -3.9028985500335693, "logps/rejected": -2.7736520767211914, "loss": 2.8085, "nll_loss": 2.6487181186676025, "rewards/accuracies": 0.375, "rewards/chosen": -0.39028987288475037, "rewards/margins": -0.11292465031147003, "rewards/rejected": -0.27736523747444153, "step": 120 }, { "epoch": 0.33127994524298426, "grad_norm": 4.472869873046875, "learning_rate": 9.834246575342465e-07, "log_odds_chosen": -0.12884455919265747, "log_odds_ratio": -0.8620838522911072, "logits/chosen": -0.10252644121646881, "logits/rejected": -0.08950278162956238, "logps/chosen": -2.9740681648254395, "logps/rejected": -2.837595224380493, "loss": 2.6283, "nll_loss": 2.542119026184082, "rewards/accuracies": 0.625, "rewards/chosen": -0.29740679264068604, "rewards/margins": -0.013647301122546196, "rewards/rejected": -0.2837595045566559, "step": 121 }, { "epoch": 0.33401779603011633, "grad_norm": 5.613264083862305, "learning_rate": 9.832876712328767e-07, "log_odds_chosen": -0.2079426646232605, "log_odds_ratio": -1.1849322319030762, "logits/chosen": 0.05869648605585098, "logits/rejected": 0.11657460033893585, "logps/chosen": -3.6833012104034424, "logps/rejected": -3.5119223594665527, "loss": 2.8154, "nll_loss": 2.6968820095062256, "rewards/accuracies": 0.25, "rewards/chosen": -0.36833012104034424, "rewards/margins": -0.017137866467237473, "rewards/rejected": -0.3511922359466553, "step": 122 }, { "epoch": 0.33675564681724846, "grad_norm": 4.780332565307617, "learning_rate": 9.831506849315067e-07, "log_odds_chosen": -0.500229001045227, "log_odds_ratio": -1.2077635526657104, "logits/chosen": 0.12986694276332855, "logits/rejected": 0.16910040378570557, "logps/chosen": -3.6760635375976562, "logps/rejected": -3.1454033851623535, "loss": 2.7243, "nll_loss": 2.603529930114746, "rewards/accuracies": 0.5, "rewards/chosen": -0.36760640144348145, "rewards/margins": -0.05306604504585266, "rewards/rejected": -0.31454038619995117, "step": 123 }, { "epoch": 0.3394934976043806, "grad_norm": 4.4304070472717285, "learning_rate": 9.83013698630137e-07, "log_odds_chosen": -0.14299097657203674, "log_odds_ratio": -0.9653422832489014, "logits/chosen": 0.05262959375977516, "logits/rejected": -0.009872042573988438, "logps/chosen": -2.9768030643463135, "logps/rejected": -2.823819398880005, "loss": 2.7213, "nll_loss": 2.6248128414154053, "rewards/accuracies": 0.5, "rewards/chosen": -0.2976803183555603, "rewards/margins": -0.015298360027372837, "rewards/rejected": -0.28238195180892944, "step": 124 }, { "epoch": 0.34223134839151265, "grad_norm": 6.540836334228516, "learning_rate": 9.828767123287671e-07, "log_odds_chosen": -1.310016393661499, "log_odds_ratio": -1.7602818012237549, "logits/chosen": 0.11626613140106201, "logits/rejected": 0.2587585747241974, "logps/chosen": -4.775489807128906, "logps/rejected": -3.4860682487487793, "loss": 3.0076, "nll_loss": 2.8316009044647217, "rewards/accuracies": 0.25, "rewards/chosen": -0.4775489866733551, "rewards/margins": -0.12894216179847717, "rewards/rejected": -0.34860682487487793, "step": 125 }, { "epoch": 0.34496919917864477, "grad_norm": 5.6862473487854, "learning_rate": 9.82739726027397e-07, "log_odds_chosen": -0.7429652214050293, "log_odds_ratio": -1.465396761894226, "logits/chosen": 0.17115375399589539, "logits/rejected": 0.1933058202266693, "logps/chosen": -4.4842634201049805, "logps/rejected": -3.7596628665924072, "loss": 2.8423, "nll_loss": 2.6957178115844727, "rewards/accuracies": 0.25, "rewards/chosen": -0.44842636585235596, "rewards/margins": -0.07246006280183792, "rewards/rejected": -0.37596631050109863, "step": 126 }, { "epoch": 0.34770704996577684, "grad_norm": 5.57318639755249, "learning_rate": 9.826027397260273e-07, "log_odds_chosen": -0.7552962303161621, "log_odds_ratio": -1.3990161418914795, "logits/chosen": 0.03627917915582657, "logits/rejected": 0.11640186607837677, "logps/chosen": -4.2913079261779785, "logps/rejected": -3.530776023864746, "loss": 2.8512, "nll_loss": 2.7112741470336914, "rewards/accuracies": 0.25, "rewards/chosen": -0.42913079261779785, "rewards/margins": -0.0760531947016716, "rewards/rejected": -0.35307759046554565, "step": 127 }, { "epoch": 0.35044490075290896, "grad_norm": 5.245677471160889, "learning_rate": 9.824657534246575e-07, "log_odds_chosen": -0.9445676207542419, "log_odds_ratio": -1.3302998542785645, "logits/chosen": -0.08404693007469177, "logits/rejected": 0.007854819297790527, "logps/chosen": -3.495981216430664, "logps/rejected": -2.612776279449463, "loss": 2.7705, "nll_loss": 2.6374547481536865, "rewards/accuracies": 0.125, "rewards/chosen": -0.34959813952445984, "rewards/margins": -0.0883205235004425, "rewards/rejected": -0.26127761602401733, "step": 128 }, { "epoch": 0.3531827515400411, "grad_norm": 5.036715030670166, "learning_rate": 9.823287671232875e-07, "log_odds_chosen": -0.49799811840057373, "log_odds_ratio": -1.0795090198516846, "logits/chosen": -0.08384644985198975, "logits/rejected": -0.07624780386686325, "logps/chosen": -3.077749729156494, "logps/rejected": -2.590684175491333, "loss": 2.7098, "nll_loss": 2.6018495559692383, "rewards/accuracies": 0.375, "rewards/chosen": -0.30777496099472046, "rewards/margins": -0.0487065315246582, "rewards/rejected": -0.25906842947006226, "step": 129 }, { "epoch": 0.35592060232717315, "grad_norm": 4.762495040893555, "learning_rate": 9.821917808219177e-07, "log_odds_chosen": -0.05032166838645935, "log_odds_ratio": -1.0437005758285522, "logits/chosen": -0.13137535750865936, "logits/rejected": -0.11968773603439331, "logps/chosen": -3.2003164291381836, "logps/rejected": -3.1068601608276367, "loss": 2.6442, "nll_loss": 2.5398247241973877, "rewards/accuracies": 0.75, "rewards/chosen": -0.32003164291381836, "rewards/margins": -0.009345613420009613, "rewards/rejected": -0.31068605184555054, "step": 130 }, { "epoch": 0.3586584531143053, "grad_norm": 5.238681793212891, "learning_rate": 9.82054794520548e-07, "log_odds_chosen": -0.7818592190742493, "log_odds_ratio": -1.2508831024169922, "logits/chosen": -0.0259847491979599, "logits/rejected": 0.061177290976047516, "logps/chosen": -3.4924869537353516, "logps/rejected": -2.737785577774048, "loss": 2.8459, "nll_loss": 2.7208023071289062, "rewards/accuracies": 0.25, "rewards/chosen": -0.3492487370967865, "rewards/margins": -0.07547014951705933, "rewards/rejected": -0.2737785577774048, "step": 131 }, { "epoch": 0.3613963039014374, "grad_norm": 4.091406345367432, "learning_rate": 9.819178082191781e-07, "log_odds_chosen": 0.6130402684211731, "log_odds_ratio": -0.8139925003051758, "logits/chosen": 0.16210246086120605, "logits/rejected": 0.06425817310810089, "logps/chosen": -2.449502944946289, "logps/rejected": -3.009329319000244, "loss": 2.4867, "nll_loss": 2.4053211212158203, "rewards/accuracies": 0.5, "rewards/chosen": -0.2449502944946289, "rewards/margins": 0.05598266422748566, "rewards/rejected": -0.30093294382095337, "step": 132 }, { "epoch": 0.36413415468856947, "grad_norm": 4.783697128295898, "learning_rate": 9.817808219178081e-07, "log_odds_chosen": 0.12192030251026154, "log_odds_ratio": -0.9777776598930359, "logits/chosen": 0.12096741795539856, "logits/rejected": 0.038923200219869614, "logps/chosen": -2.9197468757629395, "logps/rejected": -3.0735747814178467, "loss": 2.7501, "nll_loss": 2.6523361206054688, "rewards/accuracies": 0.375, "rewards/chosen": -0.2919747233390808, "rewards/margins": 0.015382766723632812, "rewards/rejected": -0.3073574900627136, "step": 133 }, { "epoch": 0.3668720054757016, "grad_norm": 6.29307746887207, "learning_rate": 9.816438356164383e-07, "log_odds_chosen": -1.4457447528839111, "log_odds_ratio": -1.9816482067108154, "logits/chosen": 0.17450137436389923, "logits/rejected": 0.22933128476142883, "logps/chosen": -4.5547356605529785, "logps/rejected": -3.1495490074157715, "loss": 3.0216, "nll_loss": 2.82346773147583, "rewards/accuracies": 0.25, "rewards/chosen": -0.45547354221343994, "rewards/margins": -0.1405186504125595, "rewards/rejected": -0.3149549067020416, "step": 134 }, { "epoch": 0.36960985626283366, "grad_norm": 4.418778419494629, "learning_rate": 9.815068493150685e-07, "log_odds_chosen": 0.35038456320762634, "log_odds_ratio": -0.5477620959281921, "logits/chosen": 0.10843002051115036, "logits/rejected": 0.013400271534919739, "logps/chosen": -2.374087333679199, "logps/rejected": -2.7007951736450195, "loss": 2.6137, "nll_loss": 2.5589685440063477, "rewards/accuracies": 0.875, "rewards/chosen": -0.23740872740745544, "rewards/margins": 0.03267079219222069, "rewards/rejected": -0.27007952332496643, "step": 135 }, { "epoch": 0.3723477070499658, "grad_norm": 4.81036901473999, "learning_rate": 9.813698630136985e-07, "log_odds_chosen": 0.27907630801200867, "log_odds_ratio": -0.6394643187522888, "logits/chosen": 0.01059974730014801, "logits/rejected": -0.018216460943222046, "logps/chosen": -3.078880786895752, "logps/rejected": -3.317622423171997, "loss": 2.573, "nll_loss": 2.5090644359588623, "rewards/accuracies": 0.625, "rewards/chosen": -0.30788806080818176, "rewards/margins": 0.023874185979366302, "rewards/rejected": -0.33176225423812866, "step": 136 }, { "epoch": 0.3750855578370979, "grad_norm": 5.075159549713135, "learning_rate": 9.812328767123287e-07, "log_odds_chosen": -0.2414587140083313, "log_odds_ratio": -0.9348693490028381, "logits/chosen": 0.05894971638917923, "logits/rejected": -0.0012363195419311523, "logps/chosen": -2.5213112831115723, "logps/rejected": -2.283729076385498, "loss": 2.6209, "nll_loss": 2.527392864227295, "rewards/accuracies": 0.5, "rewards/chosen": -0.2521311342716217, "rewards/margins": -0.02375822141766548, "rewards/rejected": -0.22837291657924652, "step": 137 }, { "epoch": 0.37782340862423, "grad_norm": 5.197196006774902, "learning_rate": 9.81095890410959e-07, "log_odds_chosen": 0.29652726650238037, "log_odds_ratio": -0.6685201525688171, "logits/chosen": 0.022185906767845154, "logits/rejected": 0.10214302688837051, "logps/chosen": -3.001833915710449, "logps/rejected": -3.2599830627441406, "loss": 2.6946, "nll_loss": 2.627721071243286, "rewards/accuracies": 0.625, "rewards/chosen": -0.30018341541290283, "rewards/margins": 0.025814931839704514, "rewards/rejected": -0.32599833607673645, "step": 138 }, { "epoch": 0.3805612594113621, "grad_norm": 3.9433462619781494, "learning_rate": 9.809589041095891e-07, "log_odds_chosen": 0.2905261516571045, "log_odds_ratio": -0.6597840785980225, "logits/chosen": 0.1818375289440155, "logits/rejected": 0.08489257097244263, "logps/chosen": -1.951277256011963, "logps/rejected": -2.1517491340637207, "loss": 2.4329, "nll_loss": 2.3669066429138184, "rewards/accuracies": 0.75, "rewards/chosen": -0.1951277256011963, "rewards/margins": 0.020047198981046677, "rewards/rejected": -0.21517494320869446, "step": 139 }, { "epoch": 0.38329911019849416, "grad_norm": 4.373739242553711, "learning_rate": 9.808219178082191e-07, "log_odds_chosen": -0.04655057191848755, "log_odds_ratio": -0.8463748693466187, "logits/chosen": 0.021948209032416344, "logits/rejected": 0.005399022251367569, "logps/chosen": -2.9217381477355957, "logps/rejected": -2.8288049697875977, "loss": 2.5499, "nll_loss": 2.465214729309082, "rewards/accuracies": 0.625, "rewards/chosen": -0.292173832654953, "rewards/margins": -0.009293325245380402, "rewards/rejected": -0.2828804850578308, "step": 140 }, { "epoch": 0.3860369609856263, "grad_norm": 4.82395601272583, "learning_rate": 9.806849315068493e-07, "log_odds_chosen": 0.041703835129737854, "log_odds_ratio": -0.7814750671386719, "logits/chosen": 0.06910443305969238, "logits/rejected": 0.11996160447597504, "logps/chosen": -2.652634620666504, "logps/rejected": -2.7000484466552734, "loss": 2.6584, "nll_loss": 2.5802340507507324, "rewards/accuracies": 0.75, "rewards/chosen": -0.26526349782943726, "rewards/margins": 0.004741359502077103, "rewards/rejected": -0.27000486850738525, "step": 141 }, { "epoch": 0.3887748117727584, "grad_norm": 5.757586479187012, "learning_rate": 9.805479452054795e-07, "log_odds_chosen": -0.22100570797920227, "log_odds_ratio": -1.117691993713379, "logits/chosen": 0.02055218443274498, "logits/rejected": -0.02593444101512432, "logps/chosen": -4.032637596130371, "logps/rejected": -3.7976198196411133, "loss": 2.7063, "nll_loss": 2.5945138931274414, "rewards/accuracies": 0.25, "rewards/chosen": -0.40326377749443054, "rewards/margins": -0.023501820862293243, "rewards/rejected": -0.3797619640827179, "step": 142 }, { "epoch": 0.3915126625598905, "grad_norm": 4.307644844055176, "learning_rate": 9.804109589041095e-07, "log_odds_chosen": 0.14949358999729156, "log_odds_ratio": -0.6638201475143433, "logits/chosen": -0.10764379799365997, "logits/rejected": -0.11273613572120667, "logps/chosen": -2.758852958679199, "logps/rejected": -2.8930885791778564, "loss": 2.4999, "nll_loss": 2.433486223220825, "rewards/accuracies": 0.5, "rewards/chosen": -0.27588531374931335, "rewards/margins": 0.013423538766801357, "rewards/rejected": -0.2893088459968567, "step": 143 }, { "epoch": 0.3942505133470226, "grad_norm": 5.078801155090332, "learning_rate": 9.802739726027397e-07, "log_odds_chosen": -0.08081367611885071, "log_odds_ratio": -0.8781024217605591, "logits/chosen": 0.003029586747288704, "logits/rejected": 0.0957208126783371, "logps/chosen": -2.798001289367676, "logps/rejected": -2.696838855743408, "loss": 2.724, "nll_loss": 2.6362287998199463, "rewards/accuracies": 0.375, "rewards/chosen": -0.279800146818161, "rewards/margins": -0.010116258636116982, "rewards/rejected": -0.2696838974952698, "step": 144 }, { "epoch": 0.39698836413415467, "grad_norm": 4.469725131988525, "learning_rate": 9.8013698630137e-07, "log_odds_chosen": 0.9438123106956482, "log_odds_ratio": -0.377549409866333, "logits/chosen": 0.11666781455278397, "logits/rejected": -0.03109257109463215, "logps/chosen": -2.2619147300720215, "logps/rejected": -3.1011931896209717, "loss": 2.554, "nll_loss": 2.516221761703491, "rewards/accuracies": 0.875, "rewards/chosen": -0.2261914610862732, "rewards/margins": 0.08392784744501114, "rewards/rejected": -0.3101193308830261, "step": 145 }, { "epoch": 0.3997262149212868, "grad_norm": 5.000705718994141, "learning_rate": 9.8e-07, "log_odds_chosen": -0.5623296499252319, "log_odds_ratio": -1.0825344324111938, "logits/chosen": -0.15381811559200287, "logits/rejected": -0.09277191013097763, "logps/chosen": -3.146327495574951, "logps/rejected": -2.576805830001831, "loss": 2.6746, "nll_loss": 2.5663509368896484, "rewards/accuracies": 0.25, "rewards/chosen": -0.31463274359703064, "rewards/margins": -0.056952156126499176, "rewards/rejected": -0.25768059492111206, "step": 146 }, { "epoch": 0.4024640657084189, "grad_norm": 4.8116655349731445, "learning_rate": 9.798630136986301e-07, "log_odds_chosen": -0.28668469190597534, "log_odds_ratio": -1.1072885990142822, "logits/chosen": 0.04749320074915886, "logits/rejected": 0.031387656927108765, "logps/chosen": -3.5471038818359375, "logps/rejected": -3.233865737915039, "loss": 2.6736, "nll_loss": 2.5628626346588135, "rewards/accuracies": 0.375, "rewards/chosen": -0.3547104001045227, "rewards/margins": -0.031323812901973724, "rewards/rejected": -0.3233865797519684, "step": 147 }, { "epoch": 0.405201916495551, "grad_norm": 5.321728229522705, "learning_rate": 9.797260273972601e-07, "log_odds_chosen": -0.4587215781211853, "log_odds_ratio": -1.279991626739502, "logits/chosen": 0.22259165346622467, "logits/rejected": 0.2518879473209381, "logps/chosen": -3.833956241607666, "logps/rejected": -3.3662476539611816, "loss": 2.8151, "nll_loss": 2.6870806217193604, "rewards/accuracies": 0.375, "rewards/chosen": -0.38339561223983765, "rewards/margins": -0.04677087068557739, "rewards/rejected": -0.33662474155426025, "step": 148 }, { "epoch": 0.4079397672826831, "grad_norm": 4.543006896972656, "learning_rate": 9.795890410958903e-07, "log_odds_chosen": -0.2966458797454834, "log_odds_ratio": -1.0575881004333496, "logits/chosen": 0.07245608419179916, "logits/rejected": 0.08018312603235245, "logps/chosen": -3.2882797718048096, "logps/rejected": -2.937077522277832, "loss": 2.67, "nll_loss": 2.5642073154449463, "rewards/accuracies": 0.5, "rewards/chosen": -0.32882797718048096, "rewards/margins": -0.03512023389339447, "rewards/rejected": -0.2937077581882477, "step": 149 }, { "epoch": 0.4106776180698152, "grad_norm": 4.540946960449219, "learning_rate": 9.794520547945205e-07, "log_odds_chosen": 0.47162064909935, "log_odds_ratio": -0.579625129699707, "logits/chosen": -0.09184790402650833, "logits/rejected": -0.12036585807800293, "logps/chosen": -2.6203675270080566, "logps/rejected": -3.055135488510132, "loss": 2.5008, "nll_loss": 2.4428086280822754, "rewards/accuracies": 0.625, "rewards/chosen": -0.2620367705821991, "rewards/margins": 0.04347679764032364, "rewards/rejected": -0.30551356077194214, "step": 150 }, { "epoch": 0.4134154688569473, "grad_norm": 5.13477897644043, "learning_rate": 9.793150684931505e-07, "log_odds_chosen": 0.1541951298713684, "log_odds_ratio": -0.718442976474762, "logits/chosen": 0.22980281710624695, "logits/rejected": 0.2593691051006317, "logps/chosen": -2.979722023010254, "logps/rejected": -3.117194652557373, "loss": 2.6697, "nll_loss": 2.5978338718414307, "rewards/accuracies": 0.625, "rewards/chosen": -0.2979722023010254, "rewards/margins": 0.013747235760092735, "rewards/rejected": -0.31171947717666626, "step": 151 }, { "epoch": 0.4161533196440794, "grad_norm": 5.08950138092041, "learning_rate": 9.791780821917807e-07, "log_odds_chosen": -0.19550126791000366, "log_odds_ratio": -0.9687903523445129, "logits/chosen": -0.10826744884252548, "logits/rejected": -0.12372550368309021, "logps/chosen": -3.6214449405670166, "logps/rejected": -3.4187769889831543, "loss": 2.6964, "nll_loss": 2.5994796752929688, "rewards/accuracies": 0.5, "rewards/chosen": -0.36214447021484375, "rewards/margins": -0.020266786217689514, "rewards/rejected": -0.34187769889831543, "step": 152 }, { "epoch": 0.4188911704312115, "grad_norm": 5.025350093841553, "learning_rate": 9.79041095890411e-07, "log_odds_chosen": -0.9928625822067261, "log_odds_ratio": -1.5570893287658691, "logits/chosen": -0.08855299651622772, "logits/rejected": -0.03278077393770218, "logps/chosen": -3.7309229373931885, "logps/rejected": -2.734337329864502, "loss": 2.6979, "nll_loss": 2.5421791076660156, "rewards/accuracies": 0.25, "rewards/chosen": -0.37309232354164124, "rewards/margins": -0.0996585562825203, "rewards/rejected": -0.27343374490737915, "step": 153 }, { "epoch": 0.4216290212183436, "grad_norm": 4.995957851409912, "learning_rate": 9.78904109589041e-07, "log_odds_chosen": 0.010709241032600403, "log_odds_ratio": -0.9553448557853699, "logits/chosen": 0.008557528257369995, "logits/rejected": -0.14139282703399658, "logps/chosen": -3.148691415786743, "logps/rejected": -3.11324405670166, "loss": 2.5223, "nll_loss": 2.4267611503601074, "rewards/accuracies": 0.375, "rewards/chosen": -0.31486910581588745, "rewards/margins": -0.0035447031259536743, "rewards/rejected": -0.31132444739341736, "step": 154 }, { "epoch": 0.4243668720054757, "grad_norm": 4.787952899932861, "learning_rate": 9.787671232876711e-07, "log_odds_chosen": -0.061803270131349564, "log_odds_ratio": -0.7863101959228516, "logits/chosen": 0.023721497505903244, "logits/rejected": 0.062320057302713394, "logps/chosen": -2.669550895690918, "logps/rejected": -2.577152729034424, "loss": 2.5632, "nll_loss": 2.4845926761627197, "rewards/accuracies": 0.5, "rewards/chosen": -0.26695507764816284, "rewards/margins": -0.0092398039996624, "rewards/rejected": -0.25771525502204895, "step": 155 }, { "epoch": 0.4271047227926078, "grad_norm": 5.068737030029297, "learning_rate": 9.786301369863013e-07, "log_odds_chosen": -0.09330730140209198, "log_odds_ratio": -0.8956432938575745, "logits/chosen": 0.14529535174369812, "logits/rejected": 0.1075684204697609, "logps/chosen": -3.0280861854553223, "logps/rejected": -2.911499500274658, "loss": 2.605, "nll_loss": 2.515413522720337, "rewards/accuracies": 0.5, "rewards/chosen": -0.30280861258506775, "rewards/margins": -0.01165865734219551, "rewards/rejected": -0.29114994406700134, "step": 156 }, { "epoch": 0.42984257357973993, "grad_norm": 5.1779680252075195, "learning_rate": 9.784931506849313e-07, "log_odds_chosen": -1.339076280593872, "log_odds_ratio": -1.7939949035644531, "logits/chosen": -0.21923494338989258, "logits/rejected": -0.11161982268095016, "logps/chosen": -4.226345539093018, "logps/rejected": -2.95051908493042, "loss": 2.8389, "nll_loss": 2.6595425605773926, "rewards/accuracies": 0.25, "rewards/chosen": -0.4226345419883728, "rewards/margins": -0.12758265435695648, "rewards/rejected": -0.2950519025325775, "step": 157 }, { "epoch": 0.432580424366872, "grad_norm": 4.298943996429443, "learning_rate": 9.783561643835615e-07, "log_odds_chosen": -0.16038528084754944, "log_odds_ratio": -0.9894347190856934, "logits/chosen": -0.038959890604019165, "logits/rejected": -0.08928197622299194, "logps/chosen": -2.623842239379883, "logps/rejected": -2.473848819732666, "loss": 2.568, "nll_loss": 2.469017505645752, "rewards/accuracies": 0.375, "rewards/chosen": -0.26238423585891724, "rewards/margins": -0.01499936357140541, "rewards/rejected": -0.24738487601280212, "step": 158 }, { "epoch": 0.4353182751540041, "grad_norm": 4.671489238739014, "learning_rate": 9.782191780821918e-07, "log_odds_chosen": -0.21234183013439178, "log_odds_ratio": -0.8411045074462891, "logits/chosen": -0.15272042155265808, "logits/rejected": -0.14467331767082214, "logps/chosen": -2.6959643363952637, "logps/rejected": -2.475571632385254, "loss": 2.6189, "nll_loss": 2.5348222255706787, "rewards/accuracies": 0.5, "rewards/chosen": -0.2695964574813843, "rewards/margins": -0.022039275616407394, "rewards/rejected": -0.24755717813968658, "step": 159 }, { "epoch": 0.4380561259411362, "grad_norm": 5.1743364334106445, "learning_rate": 9.78082191780822e-07, "log_odds_chosen": -0.10678741335868835, "log_odds_ratio": -1.0306340456008911, "logits/chosen": 0.05746356025338173, "logits/rejected": 0.005924534052610397, "logps/chosen": -3.4296762943267822, "logps/rejected": -3.3119218349456787, "loss": 2.7007, "nll_loss": 2.5976004600524902, "rewards/accuracies": 0.375, "rewards/chosen": -0.3429676294326782, "rewards/margins": -0.011775441467761993, "rewards/rejected": -0.3311921954154968, "step": 160 }, { "epoch": 0.4407939767282683, "grad_norm": 4.751126766204834, "learning_rate": 9.77945205479452e-07, "log_odds_chosen": 0.06571212410926819, "log_odds_ratio": -0.8735952377319336, "logits/chosen": -0.21912507712841034, "logits/rejected": -0.2106812745332718, "logps/chosen": -2.864098072052002, "logps/rejected": -2.9273343086242676, "loss": 2.6308, "nll_loss": 2.543436050415039, "rewards/accuracies": 0.625, "rewards/chosen": -0.28640982508659363, "rewards/margins": 0.006323622539639473, "rewards/rejected": -0.29273343086242676, "step": 161 }, { "epoch": 0.44353182751540043, "grad_norm": 4.779975891113281, "learning_rate": 9.778082191780822e-07, "log_odds_chosen": -0.3976118266582489, "log_odds_ratio": -1.1070137023925781, "logits/chosen": 0.04846369847655296, "logits/rejected": 0.07221006602048874, "logps/chosen": -3.1382803916931152, "logps/rejected": -2.7205333709716797, "loss": 2.6288, "nll_loss": 2.5181283950805664, "rewards/accuracies": 0.5, "rewards/chosen": -0.3138280212879181, "rewards/margins": -0.041774727404117584, "rewards/rejected": -0.2720533311367035, "step": 162 }, { "epoch": 0.4462696783025325, "grad_norm": 5.131536483764648, "learning_rate": 9.776712328767124e-07, "log_odds_chosen": -1.1094319820404053, "log_odds_ratio": -1.531858205795288, "logits/chosen": -0.029782038182020187, "logits/rejected": 0.02192012220621109, "logps/chosen": -3.5679666996002197, "logps/rejected": -2.490895986557007, "loss": 2.7741, "nll_loss": 2.6209535598754883, "rewards/accuracies": 0.25, "rewards/chosen": -0.3567966818809509, "rewards/margins": -0.10770706832408905, "rewards/rejected": -0.24908961355686188, "step": 163 }, { "epoch": 0.4490075290896646, "grad_norm": 5.0943450927734375, "learning_rate": 9.775342465753424e-07, "log_odds_chosen": -0.1956142783164978, "log_odds_ratio": -0.8869513869285583, "logits/chosen": -0.06847389042377472, "logits/rejected": -0.05583008751273155, "logps/chosen": -3.348503351211548, "logps/rejected": -3.157170057296753, "loss": 2.655, "nll_loss": 2.5663468837738037, "rewards/accuracies": 0.375, "rewards/chosen": -0.33485037088394165, "rewards/margins": -0.019133346155285835, "rewards/rejected": -0.31571701169013977, "step": 164 }, { "epoch": 0.4517453798767967, "grad_norm": 5.193617820739746, "learning_rate": 9.773972602739726e-07, "log_odds_chosen": -0.26275181770324707, "log_odds_ratio": -0.9912934899330139, "logits/chosen": -0.06437616050243378, "logits/rejected": 0.0024869581684470177, "logps/chosen": -3.6182332038879395, "logps/rejected": -3.3604736328125, "loss": 2.682, "nll_loss": 2.5828568935394287, "rewards/accuracies": 0.375, "rewards/chosen": -0.36182332038879395, "rewards/margins": -0.025775954127311707, "rewards/rejected": -0.33604735136032104, "step": 165 }, { "epoch": 0.4544832306639288, "grad_norm": 4.537679195404053, "learning_rate": 9.772602739726028e-07, "log_odds_chosen": -0.45121073722839355, "log_odds_ratio": -1.1054843664169312, "logits/chosen": 0.10746030509471893, "logits/rejected": 0.08966897428035736, "logps/chosen": -3.2126054763793945, "logps/rejected": -2.739683151245117, "loss": 2.608, "nll_loss": 2.497483730316162, "rewards/accuracies": 0.5, "rewards/chosen": -0.32126057147979736, "rewards/margins": -0.047292232513427734, "rewards/rejected": -0.27396833896636963, "step": 166 }, { "epoch": 0.45722108145106094, "grad_norm": 5.187558174133301, "learning_rate": 9.77123287671233e-07, "log_odds_chosen": -0.5543379187583923, "log_odds_ratio": -1.156867504119873, "logits/chosen": 0.1353156864643097, "logits/rejected": 0.19099614024162292, "logps/chosen": -3.519794225692749, "logps/rejected": -2.977598190307617, "loss": 2.6319, "nll_loss": 2.516244649887085, "rewards/accuracies": 0.375, "rewards/chosen": -0.35197943449020386, "rewards/margins": -0.054219599813222885, "rewards/rejected": -0.2977598309516907, "step": 167 }, { "epoch": 0.459958932238193, "grad_norm": 4.747788429260254, "learning_rate": 9.76986301369863e-07, "log_odds_chosen": 0.04713277518749237, "log_odds_ratio": -0.7286400198936462, "logits/chosen": -0.16408012807369232, "logits/rejected": -0.09872398525476456, "logps/chosen": -2.875518321990967, "logps/rejected": -2.933595657348633, "loss": 2.5961, "nll_loss": 2.523228168487549, "rewards/accuracies": 0.75, "rewards/chosen": -0.2875518500804901, "rewards/margins": 0.005807720124721527, "rewards/rejected": -0.29335954785346985, "step": 168 }, { "epoch": 0.46269678302532513, "grad_norm": 4.646708965301514, "learning_rate": 9.768493150684932e-07, "log_odds_chosen": 0.12557387351989746, "log_odds_ratio": -0.7853918075561523, "logits/chosen": 0.05398353934288025, "logits/rejected": -0.055403564125299454, "logps/chosen": -2.788079261779785, "logps/rejected": -2.883633852005005, "loss": 2.5145, "nll_loss": 2.4359443187713623, "rewards/accuracies": 0.375, "rewards/chosen": -0.2788079082965851, "rewards/margins": 0.009555475786328316, "rewards/rejected": -0.28836339712142944, "step": 169 }, { "epoch": 0.4654346338124572, "grad_norm": 4.632308483123779, "learning_rate": 9.767123287671234e-07, "log_odds_chosen": 0.19877088069915771, "log_odds_ratio": -0.7461720705032349, "logits/chosen": 0.11483567953109741, "logits/rejected": 0.17591732740402222, "logps/chosen": -2.657789468765259, "logps/rejected": -2.7607760429382324, "loss": 2.587, "nll_loss": 2.5124034881591797, "rewards/accuracies": 0.75, "rewards/chosen": -0.26577895879745483, "rewards/margins": 0.010298673063516617, "rewards/rejected": -0.27607762813568115, "step": 170 }, { "epoch": 0.4681724845995893, "grad_norm": 4.804256439208984, "learning_rate": 9.765753424657534e-07, "log_odds_chosen": -1.000686764717102, "log_odds_ratio": -1.5044052600860596, "logits/chosen": -0.09416041523218155, "logits/rejected": -0.1337248980998993, "logps/chosen": -3.7908949851989746, "logps/rejected": -2.7970995903015137, "loss": 2.6338, "nll_loss": 2.483330011367798, "rewards/accuracies": 0.375, "rewards/chosen": -0.3790895342826843, "rewards/margins": -0.0993795245885849, "rewards/rejected": -0.27970996499061584, "step": 171 }, { "epoch": 0.47091033538672145, "grad_norm": 4.648379802703857, "learning_rate": 9.764383561643836e-07, "log_odds_chosen": -0.4557608366012573, "log_odds_ratio": -1.012894868850708, "logits/chosen": -0.06593253463506699, "logits/rejected": 0.0030886344611644745, "logps/chosen": -2.9142026901245117, "logps/rejected": -2.513319253921509, "loss": 2.5272, "nll_loss": 2.425872564315796, "rewards/accuracies": 0.25, "rewards/chosen": -0.29142025113105774, "rewards/margins": -0.04008834436535835, "rewards/rejected": -0.2513319253921509, "step": 172 }, { "epoch": 0.4736481861738535, "grad_norm": 5.048556327819824, "learning_rate": 9.763013698630138e-07, "log_odds_chosen": 0.24035769701004028, "log_odds_ratio": -0.7579019665718079, "logits/chosen": 0.06424508988857269, "logits/rejected": 0.03331582620739937, "logps/chosen": -3.3595051765441895, "logps/rejected": -3.5809898376464844, "loss": 2.6121, "nll_loss": 2.536283493041992, "rewards/accuracies": 0.625, "rewards/chosen": -0.3359505534172058, "rewards/margins": 0.02214844711124897, "rewards/rejected": -0.35809898376464844, "step": 173 }, { "epoch": 0.47638603696098564, "grad_norm": 5.632077693939209, "learning_rate": 9.761643835616438e-07, "log_odds_chosen": -1.194157600402832, "log_odds_ratio": -1.7078309059143066, "logits/chosen": 0.0011504478752613068, "logits/rejected": 0.08681042492389679, "logps/chosen": -4.157700061798096, "logps/rejected": -2.9911019802093506, "loss": 2.7336, "nll_loss": 2.562822103500366, "rewards/accuracies": 0.25, "rewards/chosen": -0.4157700538635254, "rewards/margins": -0.11665983498096466, "rewards/rejected": -0.29911020398139954, "step": 174 }, { "epoch": 0.4791238877481177, "grad_norm": 4.7381591796875, "learning_rate": 9.76027397260274e-07, "log_odds_chosen": -0.37062782049179077, "log_odds_ratio": -0.9964894652366638, "logits/chosen": -0.035233646631240845, "logits/rejected": -0.0026968717575073242, "logps/chosen": -2.97426438331604, "logps/rejected": -2.6012589931488037, "loss": 2.5788, "nll_loss": 2.47916841506958, "rewards/accuracies": 0.375, "rewards/chosen": -0.2974264621734619, "rewards/margins": -0.037300530821084976, "rewards/rejected": -0.26012590527534485, "step": 175 }, { "epoch": 0.48186173853524983, "grad_norm": 4.8316755294799805, "learning_rate": 9.758904109589042e-07, "log_odds_chosen": 0.12296667695045471, "log_odds_ratio": -0.768628716468811, "logits/chosen": -0.12481598556041718, "logits/rejected": -0.013429924845695496, "logps/chosen": -3.0030479431152344, "logps/rejected": -3.100154399871826, "loss": 2.6018, "nll_loss": 2.5249249935150146, "rewards/accuracies": 0.375, "rewards/chosen": -0.3003048300743103, "rewards/margins": 0.009710615500807762, "rewards/rejected": -0.3100154399871826, "step": 176 }, { "epoch": 0.48459958932238195, "grad_norm": 5.6362128257751465, "learning_rate": 9.757534246575342e-07, "log_odds_chosen": -0.059983327984809875, "log_odds_ratio": -0.8329845666885376, "logits/chosen": 0.10580731928348541, "logits/rejected": 0.1335211843252182, "logps/chosen": -3.5663390159606934, "logps/rejected": -3.498800277709961, "loss": 2.7403, "nll_loss": 2.6570301055908203, "rewards/accuracies": 0.25, "rewards/chosen": -0.35663390159606934, "rewards/margins": -0.0067538488656282425, "rewards/rejected": -0.34988003969192505, "step": 177 }, { "epoch": 0.487337440109514, "grad_norm": 5.719078063964844, "learning_rate": 9.756164383561644e-07, "log_odds_chosen": -1.1858198642730713, "log_odds_ratio": -1.6140249967575073, "logits/chosen": -0.006408404558897018, "logits/rejected": 0.029750246554613113, "logps/chosen": -4.8120832443237305, "logps/rejected": -3.6597840785980225, "loss": 2.7612, "nll_loss": 2.599839925765991, "rewards/accuracies": 0.25, "rewards/chosen": -0.48120832443237305, "rewards/margins": -0.11522989720106125, "rewards/rejected": -0.3659784495830536, "step": 178 }, { "epoch": 0.49007529089664614, "grad_norm": 4.317660331726074, "learning_rate": 9.754794520547944e-07, "log_odds_chosen": 0.003009498119354248, "log_odds_ratio": -0.8360417485237122, "logits/chosen": -0.06573931872844696, "logits/rejected": -0.08107304573059082, "logps/chosen": -3.0959603786468506, "logps/rejected": -3.0659265518188477, "loss": 2.4819, "nll_loss": 2.398317337036133, "rewards/accuracies": 0.75, "rewards/chosen": -0.30959606170654297, "rewards/margins": -0.003003399819135666, "rewards/rejected": -0.3065926432609558, "step": 179 }, { "epoch": 0.4928131416837782, "grad_norm": 6.0648579597473145, "learning_rate": 9.753424657534246e-07, "log_odds_chosen": -1.136418104171753, "log_odds_ratio": -1.478827714920044, "logits/chosen": 0.22184476256370544, "logits/rejected": 0.38049083948135376, "logps/chosen": -4.247244358062744, "logps/rejected": -3.1341538429260254, "loss": 2.8311, "nll_loss": 2.6831958293914795, "rewards/accuracies": 0.125, "rewards/chosen": -0.4247244596481323, "rewards/margins": -0.11130904406309128, "rewards/rejected": -0.31341540813446045, "step": 180 }, { "epoch": 0.49555099247091033, "grad_norm": 5.293877124786377, "learning_rate": 9.752054794520548e-07, "log_odds_chosen": -0.7108347415924072, "log_odds_ratio": -1.2135283946990967, "logits/chosen": -0.114501953125, "logits/rejected": 0.0016976520419120789, "logps/chosen": -3.5424702167510986, "logps/rejected": -2.8453009128570557, "loss": 2.6966, "nll_loss": 2.575291633605957, "rewards/accuracies": 0.25, "rewards/chosen": -0.3542470335960388, "rewards/margins": -0.06971696764230728, "rewards/rejected": -0.28453004360198975, "step": 181 }, { "epoch": 0.49828884325804246, "grad_norm": 4.4454145431518555, "learning_rate": 9.750684931506848e-07, "log_odds_chosen": 0.8313325047492981, "log_odds_ratio": -0.6045193076133728, "logits/chosen": -0.09668722003698349, "logits/rejected": -0.18417684733867645, "logps/chosen": -2.7889745235443115, "logps/rejected": -3.592949390411377, "loss": 2.5378, "nll_loss": 2.477372169494629, "rewards/accuracies": 0.75, "rewards/chosen": -0.2788974642753601, "rewards/margins": 0.08039748668670654, "rewards/rejected": -0.35929492115974426, "step": 182 }, { "epoch": 0.5010266940451745, "grad_norm": 4.936166763305664, "learning_rate": 9.74931506849315e-07, "log_odds_chosen": 0.07937360554933548, "log_odds_ratio": -0.7206393480300903, "logits/chosen": 0.03173375502228737, "logits/rejected": 0.04514855146408081, "logps/chosen": -3.1268112659454346, "logps/rejected": -3.1723976135253906, "loss": 2.5823, "nll_loss": 2.5102548599243164, "rewards/accuracies": 0.5, "rewards/chosen": -0.3126811385154724, "rewards/margins": 0.00455867312848568, "rewards/rejected": -0.31723979115486145, "step": 183 }, { "epoch": 0.5037645448323066, "grad_norm": 4.727694988250732, "learning_rate": 9.747945205479452e-07, "log_odds_chosen": -0.32824602723121643, "log_odds_ratio": -1.0813744068145752, "logits/chosen": 0.18486584722995758, "logits/rejected": 0.18327003717422485, "logps/chosen": -2.9881904125213623, "logps/rejected": -2.642911911010742, "loss": 2.5958, "nll_loss": 2.487666606903076, "rewards/accuracies": 0.375, "rewards/chosen": -0.29881903529167175, "rewards/margins": -0.03452785313129425, "rewards/rejected": -0.2642911970615387, "step": 184 }, { "epoch": 0.5065023956194388, "grad_norm": 4.675774097442627, "learning_rate": 9.746575342465752e-07, "log_odds_chosen": -0.01838904619216919, "log_odds_ratio": -0.8196979761123657, "logits/chosen": -0.07967939972877502, "logits/rejected": -0.09593412280082703, "logps/chosen": -2.3724403381347656, "logps/rejected": -2.3229966163635254, "loss": 2.498, "nll_loss": 2.416050434112549, "rewards/accuracies": 0.5, "rewards/chosen": -0.23724402487277985, "rewards/margins": -0.0049443673342466354, "rewards/rejected": -0.23229967057704926, "step": 185 }, { "epoch": 0.5092402464065708, "grad_norm": 4.861566543579102, "learning_rate": 9.745205479452054e-07, "log_odds_chosen": -0.21670600771903992, "log_odds_ratio": -0.851565957069397, "logits/chosen": 0.11710162460803986, "logits/rejected": 0.15376603603363037, "logps/chosen": -2.9813201427459717, "logps/rejected": -2.770460844039917, "loss": 2.5895, "nll_loss": 2.504347085952759, "rewards/accuracies": 0.5, "rewards/chosen": -0.2981320023536682, "rewards/margins": -0.02108592540025711, "rewards/rejected": -0.2770460844039917, "step": 186 }, { "epoch": 0.5119780971937029, "grad_norm": 4.790656566619873, "learning_rate": 9.743835616438356e-07, "log_odds_chosen": -0.18796110153198242, "log_odds_ratio": -0.8918939828872681, "logits/chosen": -0.09481202065944672, "logits/rejected": -0.06545832753181458, "logps/chosen": -2.9704036712646484, "logps/rejected": -2.8001868724823, "loss": 2.548, "nll_loss": 2.4587764739990234, "rewards/accuracies": 0.5, "rewards/chosen": -0.2970403730869293, "rewards/margins": -0.01702168956398964, "rewards/rejected": -0.28001868724823, "step": 187 }, { "epoch": 0.5147159479808351, "grad_norm": 4.91495418548584, "learning_rate": 9.742465753424656e-07, "log_odds_chosen": -0.29833802580833435, "log_odds_ratio": -1.0990138053894043, "logits/chosen": 0.0389089360833168, "logits/rejected": -0.057697977870702744, "logps/chosen": -3.38804030418396, "logps/rejected": -3.0775563716888428, "loss": 2.5838, "nll_loss": 2.4739136695861816, "rewards/accuracies": 0.25, "rewards/chosen": -0.3388040065765381, "rewards/margins": -0.031048400327563286, "rewards/rejected": -0.30775561928749084, "step": 188 }, { "epoch": 0.5174537987679672, "grad_norm": 4.433568000793457, "learning_rate": 9.741095890410958e-07, "log_odds_chosen": 0.6134500503540039, "log_odds_ratio": -0.461143434047699, "logits/chosen": 0.11126819252967834, "logits/rejected": 0.0729735866189003, "logps/chosen": -2.2976536750793457, "logps/rejected": -2.818859100341797, "loss": 2.4047, "nll_loss": 2.3585546016693115, "rewards/accuracies": 0.875, "rewards/chosen": -0.229765385389328, "rewards/margins": 0.052120521664619446, "rewards/rejected": -0.28188592195510864, "step": 189 }, { "epoch": 0.5201916495550992, "grad_norm": 4.967480182647705, "learning_rate": 9.73972602739726e-07, "log_odds_chosen": 0.08084918558597565, "log_odds_ratio": -1.1406877040863037, "logits/chosen": -0.03508254513144493, "logits/rejected": -0.025370176881551743, "logps/chosen": -3.1605374813079834, "logps/rejected": -3.2680437564849854, "loss": 2.6272, "nll_loss": 2.513132333755493, "rewards/accuracies": 0.375, "rewards/chosen": -0.3160537779331207, "rewards/margins": 0.01075058989226818, "rewards/rejected": -0.32680433988571167, "step": 190 }, { "epoch": 0.5229295003422314, "grad_norm": 4.932470798492432, "learning_rate": 9.738356164383562e-07, "log_odds_chosen": -0.5107154846191406, "log_odds_ratio": -1.1743547916412354, "logits/chosen": -0.016485095024108887, "logits/rejected": 0.12529578804969788, "logps/chosen": -3.248772621154785, "logps/rejected": -2.752161979675293, "loss": 2.7103, "nll_loss": 2.592893123626709, "rewards/accuracies": 0.375, "rewards/chosen": -0.3248772621154785, "rewards/margins": -0.049661051481962204, "rewards/rejected": -0.2752162218093872, "step": 191 }, { "epoch": 0.5256673511293635, "grad_norm": 5.5184807777404785, "learning_rate": 9.736986301369862e-07, "log_odds_chosen": -0.6176950931549072, "log_odds_ratio": -1.0882575511932373, "logits/chosen": 0.07275325059890747, "logits/rejected": 0.14534083008766174, "logps/chosen": -3.6760101318359375, "logps/rejected": -3.0744881629943848, "loss": 2.6841, "nll_loss": 2.5753185749053955, "rewards/accuracies": 0.25, "rewards/chosen": -0.36760103702545166, "rewards/margins": -0.060152214020490646, "rewards/rejected": -0.3074488043785095, "step": 192 }, { "epoch": 0.5284052019164955, "grad_norm": 5.375781059265137, "learning_rate": 9.735616438356164e-07, "log_odds_chosen": -0.8608238101005554, "log_odds_ratio": -1.4444029331207275, "logits/chosen": -0.08444515615701675, "logits/rejected": 0.05374491959810257, "logps/chosen": -3.761265754699707, "logps/rejected": -2.909226179122925, "loss": 2.6653, "nll_loss": 2.520897626876831, "rewards/accuracies": 0.375, "rewards/chosen": -0.37612658739089966, "rewards/margins": -0.08520396053791046, "rewards/rejected": -0.2909226417541504, "step": 193 }, { "epoch": 0.5311430527036276, "grad_norm": 5.682226181030273, "learning_rate": 9.734246575342466e-07, "log_odds_chosen": 0.18341220915317535, "log_odds_ratio": -1.3826940059661865, "logits/chosen": -0.0031421780586242676, "logits/rejected": -0.005838900804519653, "logps/chosen": -4.351650238037109, "logps/rejected": -4.480443000793457, "loss": 2.6517, "nll_loss": 2.5134785175323486, "rewards/accuracies": 0.5, "rewards/chosen": -0.43516504764556885, "rewards/margins": 0.012879226356744766, "rewards/rejected": -0.4480442702770233, "step": 194 }, { "epoch": 0.5338809034907598, "grad_norm": 4.768420219421387, "learning_rate": 9.732876712328766e-07, "log_odds_chosen": 0.03428906202316284, "log_odds_ratio": -0.9172612428665161, "logits/chosen": 0.06221834942698479, "logits/rejected": 0.022795680910348892, "logps/chosen": -3.0811524391174316, "logps/rejected": -3.1290321350097656, "loss": 2.5125, "nll_loss": 2.420729398727417, "rewards/accuracies": 0.5, "rewards/chosen": -0.30811524391174316, "rewards/margins": 0.004787974059581757, "rewards/rejected": -0.3129032254219055, "step": 195 }, { "epoch": 0.5366187542778919, "grad_norm": 5.055652141571045, "learning_rate": 9.731506849315068e-07, "log_odds_chosen": -0.34654322266578674, "log_odds_ratio": -1.4086599349975586, "logits/chosen": 0.00029753148555755615, "logits/rejected": 0.0144745372235775, "logps/chosen": -3.3312559127807617, "logps/rejected": -3.0288567543029785, "loss": 2.5921, "nll_loss": 2.451249599456787, "rewards/accuracies": 0.375, "rewards/chosen": -0.33312559127807617, "rewards/margins": -0.030239909887313843, "rewards/rejected": -0.30288568139076233, "step": 196 }, { "epoch": 0.5393566050650239, "grad_norm": 4.56836462020874, "learning_rate": 9.73013698630137e-07, "log_odds_chosen": -0.15029749274253845, "log_odds_ratio": -0.8775341510772705, "logits/chosen": -0.18987469375133514, "logits/rejected": -0.24570146203041077, "logps/chosen": -3.0914196968078613, "logps/rejected": -2.9028217792510986, "loss": 2.502, "nll_loss": 2.4142916202545166, "rewards/accuracies": 0.5, "rewards/chosen": -0.30914199352264404, "rewards/margins": -0.01885979250073433, "rewards/rejected": -0.2902821898460388, "step": 197 }, { "epoch": 0.5420944558521561, "grad_norm": 5.046235084533691, "learning_rate": 9.728767123287672e-07, "log_odds_chosen": 0.5753980278968811, "log_odds_ratio": -0.6464288234710693, "logits/chosen": 0.0419413261115551, "logits/rejected": 0.03623756021261215, "logps/chosen": -3.099954605102539, "logps/rejected": -3.66304612159729, "loss": 2.5766, "nll_loss": 2.5119638442993164, "rewards/accuracies": 0.625, "rewards/chosen": -0.30999547243118286, "rewards/margins": 0.05630914866924286, "rewards/rejected": -0.3663046061992645, "step": 198 }, { "epoch": 0.5448323066392882, "grad_norm": 4.5280938148498535, "learning_rate": 9.727397260273972e-07, "log_odds_chosen": 0.2929825782775879, "log_odds_ratio": -0.7984487414360046, "logits/chosen": -0.07890239357948303, "logits/rejected": -0.04174983873963356, "logps/chosen": -2.7341768741607666, "logps/rejected": -3.0045793056488037, "loss": 2.4856, "nll_loss": 2.405801296234131, "rewards/accuracies": 0.5, "rewards/chosen": -0.27341771125793457, "rewards/margins": 0.027040280401706696, "rewards/rejected": -0.30045798420906067, "step": 199 }, { "epoch": 0.5475701574264202, "grad_norm": 4.80644416809082, "learning_rate": 9.726027397260274e-07, "log_odds_chosen": -0.17267674207687378, "log_odds_ratio": -0.8732116222381592, "logits/chosen": 0.07231611013412476, "logits/rejected": 0.08704949915409088, "logps/chosen": -2.900470733642578, "logps/rejected": -2.7212300300598145, "loss": 2.5077, "nll_loss": 2.420384168624878, "rewards/accuracies": 0.5, "rewards/chosen": -0.2900471091270447, "rewards/margins": -0.01792406477034092, "rewards/rejected": -0.2721230387687683, "step": 200 }, { "epoch": 0.5503080082135524, "grad_norm": 5.472315311431885, "learning_rate": 9.724657534246576e-07, "log_odds_chosen": -0.7079372406005859, "log_odds_ratio": -1.2219041585922241, "logits/chosen": 0.010657936334609985, "logits/rejected": 0.07106409966945648, "logps/chosen": -3.6374552249908447, "logps/rejected": -2.955141067504883, "loss": 2.6687, "nll_loss": 2.5464725494384766, "rewards/accuracies": 0.25, "rewards/chosen": -0.3637455105781555, "rewards/margins": -0.06823141127824783, "rewards/rejected": -0.2955141067504883, "step": 201 }, { "epoch": 0.5530458590006845, "grad_norm": 4.688785552978516, "learning_rate": 9.723287671232876e-07, "log_odds_chosen": -0.17866221070289612, "log_odds_ratio": -1.06145441532135, "logits/chosen": 0.028373710811138153, "logits/rejected": -0.019224761053919792, "logps/chosen": -3.1008009910583496, "logps/rejected": -2.861412286758423, "loss": 2.4938, "nll_loss": 2.3876852989196777, "rewards/accuracies": 0.625, "rewards/chosen": -0.3100801110267639, "rewards/margins": -0.02393888682126999, "rewards/rejected": -0.28614121675491333, "step": 202 }, { "epoch": 0.5557837097878165, "grad_norm": 6.392763614654541, "learning_rate": 9.721917808219178e-07, "log_odds_chosen": -0.9123173952102661, "log_odds_ratio": -1.3406713008880615, "logits/chosen": -0.08564937114715576, "logits/rejected": 0.034728072583675385, "logps/chosen": -4.105902671813965, "logps/rejected": -3.217963695526123, "loss": 2.7527, "nll_loss": 2.6186366081237793, "rewards/accuracies": 0.25, "rewards/chosen": -0.4105902910232544, "rewards/margins": -0.08879388868808746, "rewards/rejected": -0.32179635763168335, "step": 203 }, { "epoch": 0.5585215605749486, "grad_norm": 6.148142337799072, "learning_rate": 9.72054794520548e-07, "log_odds_chosen": -1.6313835382461548, "log_odds_ratio": -1.9823875427246094, "logits/chosen": 0.06978648155927658, "logits/rejected": 0.1360906958580017, "logps/chosen": -4.903953552246094, "logps/rejected": -3.2903316020965576, "loss": 2.743, "nll_loss": 2.544734477996826, "rewards/accuracies": 0.25, "rewards/chosen": -0.49039530754089355, "rewards/margins": -0.1613621711730957, "rewards/rejected": -0.32903316617012024, "step": 204 }, { "epoch": 0.5612594113620808, "grad_norm": 4.425902366638184, "learning_rate": 9.71917808219178e-07, "log_odds_chosen": -0.05341004952788353, "log_odds_ratio": -0.7818436622619629, "logits/chosen": -0.08052800595760345, "logits/rejected": -0.15091833472251892, "logps/chosen": -2.6066737174987793, "logps/rejected": -2.5295445919036865, "loss": 2.4548, "nll_loss": 2.376581907272339, "rewards/accuracies": 0.625, "rewards/chosen": -0.2606673836708069, "rewards/margins": -0.007712924852967262, "rewards/rejected": -0.25295448303222656, "step": 205 }, { "epoch": 0.5639972621492129, "grad_norm": 5.450557708740234, "learning_rate": 9.717808219178082e-07, "log_odds_chosen": -1.786769151687622, "log_odds_ratio": -2.1269068717956543, "logits/chosen": -0.05265820771455765, "logits/rejected": -0.02471664734184742, "logps/chosen": -4.476875305175781, "logps/rejected": -2.75663423538208, "loss": 2.7999, "nll_loss": 2.5872044563293457, "rewards/accuracies": 0.125, "rewards/chosen": -0.4476875066757202, "rewards/margins": -0.17202407121658325, "rewards/rejected": -0.27566343545913696, "step": 206 }, { "epoch": 0.5667351129363449, "grad_norm": 4.888574123382568, "learning_rate": 9.716438356164384e-07, "log_odds_chosen": 0.25527557730674744, "log_odds_ratio": -0.7764156460762024, "logits/chosen": 0.25296345353126526, "logits/rejected": 0.1746755838394165, "logps/chosen": -2.747941732406616, "logps/rejected": -2.9955716133117676, "loss": 2.5071, "nll_loss": 2.429499387741089, "rewards/accuracies": 0.5, "rewards/chosen": -0.27479416131973267, "rewards/margins": 0.024762991815805435, "rewards/rejected": -0.2995571494102478, "step": 207 }, { "epoch": 0.5694729637234771, "grad_norm": 5.833363056182861, "learning_rate": 9.715068493150684e-07, "log_odds_chosen": 0.20316988229751587, "log_odds_ratio": -0.6556535363197327, "logits/chosen": 0.12788522243499756, "logits/rejected": 0.23254179954528809, "logps/chosen": -3.090296745300293, "logps/rejected": -3.2459876537323, "loss": 2.6052, "nll_loss": 2.539595127105713, "rewards/accuracies": 0.625, "rewards/chosen": -0.3090296983718872, "rewards/margins": 0.015569083392620087, "rewards/rejected": -0.3245987594127655, "step": 208 }, { "epoch": 0.5722108145106092, "grad_norm": 4.53002405166626, "learning_rate": 9.713698630136986e-07, "log_odds_chosen": 0.4518182873725891, "log_odds_ratio": -0.5520776510238647, "logits/chosen": 0.11482799798250198, "logits/rejected": -0.04107166826725006, "logps/chosen": -2.239680528640747, "logps/rejected": -2.6128909587860107, "loss": 2.358, "nll_loss": 2.3027453422546387, "rewards/accuracies": 0.75, "rewards/chosen": -0.223968043923378, "rewards/margins": 0.03732103854417801, "rewards/rejected": -0.2612890899181366, "step": 209 }, { "epoch": 0.5749486652977412, "grad_norm": 5.400753498077393, "learning_rate": 9.712328767123286e-07, "log_odds_chosen": -0.7007875442504883, "log_odds_ratio": -1.3340814113616943, "logits/chosen": -0.036059051752090454, "logits/rejected": 0.012500200420618057, "logps/chosen": -4.0171895027160645, "logps/rejected": -3.3155698776245117, "loss": 2.6202, "nll_loss": 2.4868276119232178, "rewards/accuracies": 0.25, "rewards/chosen": -0.40171897411346436, "rewards/margins": -0.07016197592020035, "rewards/rejected": -0.3315570056438446, "step": 210 }, { "epoch": 0.5776865160848734, "grad_norm": 5.221093654632568, "learning_rate": 9.710958904109588e-07, "log_odds_chosen": -0.9361579418182373, "log_odds_ratio": -1.478963851928711, "logits/chosen": -0.06763207167387009, "logits/rejected": -0.005244709551334381, "logps/chosen": -4.4167985916137695, "logps/rejected": -3.473646640777588, "loss": 2.655, "nll_loss": 2.507072687149048, "rewards/accuracies": 0.375, "rewards/chosen": -0.44167983531951904, "rewards/margins": -0.09431520104408264, "rewards/rejected": -0.3473646640777588, "step": 211 }, { "epoch": 0.5804243668720055, "grad_norm": 5.48545503616333, "learning_rate": 9.70958904109589e-07, "log_odds_chosen": -0.966913640499115, "log_odds_ratio": -1.5073566436767578, "logits/chosen": 0.04347855970263481, "logits/rejected": 0.13040167093276978, "logps/chosen": -4.577866077423096, "logps/rejected": -3.617612361907959, "loss": 2.7372, "nll_loss": 2.5865025520324707, "rewards/accuracies": 0.375, "rewards/chosen": -0.4577866196632385, "rewards/margins": -0.09602540731430054, "rewards/rejected": -0.361761212348938, "step": 212 }, { "epoch": 0.5831622176591376, "grad_norm": 4.936248302459717, "learning_rate": 9.70821917808219e-07, "log_odds_chosen": 0.33418527245521545, "log_odds_ratio": -0.6762725710868835, "logits/chosen": -0.011262152343988419, "logits/rejected": 0.017528725787997246, "logps/chosen": -3.1597886085510254, "logps/rejected": -3.431100845336914, "loss": 2.4764, "nll_loss": 2.4088175296783447, "rewards/accuracies": 0.625, "rewards/chosen": -0.31597888469696045, "rewards/margins": 0.027131209149956703, "rewards/rejected": -0.3431100845336914, "step": 213 }, { "epoch": 0.5859000684462696, "grad_norm": 5.074418067932129, "learning_rate": 9.706849315068492e-07, "log_odds_chosen": -0.5191650390625, "log_odds_ratio": -1.1832327842712402, "logits/chosen": -0.029420629143714905, "logits/rejected": 0.019552044570446014, "logps/chosen": -3.216453790664673, "logps/rejected": -2.7347376346588135, "loss": 2.5731, "nll_loss": 2.454737901687622, "rewards/accuracies": 0.375, "rewards/chosen": -0.3216454088687897, "rewards/margins": -0.048171643167734146, "rewards/rejected": -0.2734737694263458, "step": 214 }, { "epoch": 0.5886379192334018, "grad_norm": 5.034954071044922, "learning_rate": 9.705479452054794e-07, "log_odds_chosen": 0.3564872145652771, "log_odds_ratio": -0.7534281015396118, "logits/chosen": 0.21612955629825592, "logits/rejected": 0.15976157784461975, "logps/chosen": -3.3073720932006836, "logps/rejected": -3.652630090713501, "loss": 2.4629, "nll_loss": 2.3875198364257812, "rewards/accuracies": 0.625, "rewards/chosen": -0.33073723316192627, "rewards/margins": 0.0345258004963398, "rewards/rejected": -0.3652629852294922, "step": 215 }, { "epoch": 0.5913757700205339, "grad_norm": 4.872989654541016, "learning_rate": 9.704109589041094e-07, "log_odds_chosen": -0.900260865688324, "log_odds_ratio": -1.4228256940841675, "logits/chosen": -0.00891464576125145, "logits/rejected": -0.048518888652324677, "logps/chosen": -3.491506338119507, "logps/rejected": -2.61334490776062, "loss": 2.5944, "nll_loss": 2.452086925506592, "rewards/accuracies": 0.375, "rewards/chosen": -0.3491506576538086, "rewards/margins": -0.08781614899635315, "rewards/rejected": -0.26133447885513306, "step": 216 }, { "epoch": 0.5941136208076659, "grad_norm": 4.733345031738281, "learning_rate": 9.702739726027396e-07, "log_odds_chosen": -0.07901132106781006, "log_odds_ratio": -0.8713518381118774, "logits/chosen": -0.08362513780593872, "logits/rejected": -0.07922884076833725, "logps/chosen": -2.910022497177124, "logps/rejected": -2.8289499282836914, "loss": 2.5114, "nll_loss": 2.4242162704467773, "rewards/accuracies": 0.375, "rewards/chosen": -0.2910022735595703, "rewards/margins": -0.008107263594865799, "rewards/rejected": -0.28289496898651123, "step": 217 }, { "epoch": 0.5968514715947981, "grad_norm": 4.535912990570068, "learning_rate": 9.701369863013698e-07, "log_odds_chosen": -0.27715134620666504, "log_odds_ratio": -0.9853969812393188, "logits/chosen": 0.162321075797081, "logits/rejected": 0.16681139171123505, "logps/chosen": -2.669154644012451, "logps/rejected": -2.369086980819702, "loss": 2.4156, "nll_loss": 2.3170249462127686, "rewards/accuracies": 0.375, "rewards/chosen": -0.2669154703617096, "rewards/margins": -0.030006783083081245, "rewards/rejected": -0.2369087040424347, "step": 218 }, { "epoch": 0.5995893223819302, "grad_norm": 4.711472511291504, "learning_rate": 9.7e-07, "log_odds_chosen": -0.9781354069709778, "log_odds_ratio": -1.3936045169830322, "logits/chosen": 0.02769971825182438, "logits/rejected": 0.09565185010433197, "logps/chosen": -3.416177988052368, "logps/rejected": -2.468125820159912, "loss": 2.6245, "nll_loss": 2.485170364379883, "rewards/accuracies": 0.25, "rewards/chosen": -0.34161779284477234, "rewards/margins": -0.09480519592761993, "rewards/rejected": -0.2468125820159912, "step": 219 }, { "epoch": 0.6023271731690623, "grad_norm": 4.276717185974121, "learning_rate": 9.6986301369863e-07, "log_odds_chosen": -0.105948805809021, "log_odds_ratio": -0.854441225528717, "logits/chosen": 0.034444428980350494, "logits/rejected": -0.059279777109622955, "logps/chosen": -2.5543370246887207, "logps/rejected": -2.438819169998169, "loss": 2.5448, "nll_loss": 2.45932936668396, "rewards/accuracies": 0.375, "rewards/chosen": -0.25543370842933655, "rewards/margins": -0.011551782488822937, "rewards/rejected": -0.2438819408416748, "step": 220 }, { "epoch": 0.6050650239561944, "grad_norm": 4.842446804046631, "learning_rate": 9.697260273972602e-07, "log_odds_chosen": -0.7539954781532288, "log_odds_ratio": -1.2605398893356323, "logits/chosen": -0.13917329907417297, "logits/rejected": -0.1056232824921608, "logps/chosen": -2.8687727451324463, "logps/rejected": -2.17104434967041, "loss": 2.4722, "nll_loss": 2.3461344242095947, "rewards/accuracies": 0.375, "rewards/chosen": -0.28687727451324463, "rewards/margins": -0.06977284699678421, "rewards/rejected": -0.21710443496704102, "step": 221 }, { "epoch": 0.6078028747433265, "grad_norm": 4.6206841468811035, "learning_rate": 9.695890410958904e-07, "log_odds_chosen": -0.4367074966430664, "log_odds_ratio": -1.3252662420272827, "logits/chosen": -0.02302567847073078, "logits/rejected": -0.1338597685098648, "logps/chosen": -3.2961766719818115, "logps/rejected": -2.8465757369995117, "loss": 2.5593, "nll_loss": 2.4268033504486084, "rewards/accuracies": 0.5, "rewards/chosen": -0.3296176791191101, "rewards/margins": -0.04496008902788162, "rewards/rejected": -0.2846575975418091, "step": 222 }, { "epoch": 0.6105407255304586, "grad_norm": 5.621045112609863, "learning_rate": 9.694520547945204e-07, "log_odds_chosen": -0.9954736232757568, "log_odds_ratio": -1.5142240524291992, "logits/chosen": 0.02060522884130478, "logits/rejected": 0.08605127036571503, "logps/chosen": -3.872037887573242, "logps/rejected": -2.8832530975341797, "loss": 2.6634, "nll_loss": 2.511997699737549, "rewards/accuracies": 0.375, "rewards/chosen": -0.38720381259918213, "rewards/margins": -0.09887848049402237, "rewards/rejected": -0.28832530975341797, "step": 223 }, { "epoch": 0.6132785763175906, "grad_norm": 5.0545196533203125, "learning_rate": 9.693150684931506e-07, "log_odds_chosen": 0.04171772301197052, "log_odds_ratio": -0.7913470268249512, "logits/chosen": 0.1658046543598175, "logits/rejected": 0.12728136777877808, "logps/chosen": -3.1288793087005615, "logps/rejected": -3.122335433959961, "loss": 2.5564, "nll_loss": 2.477306842803955, "rewards/accuracies": 0.625, "rewards/chosen": -0.31288793683052063, "rewards/margins": -0.0006543863564729691, "rewards/rejected": -0.3122335374355316, "step": 224 }, { "epoch": 0.6160164271047228, "grad_norm": 4.26010274887085, "learning_rate": 9.691780821917808e-07, "log_odds_chosen": 0.38992834091186523, "log_odds_ratio": -0.6090903878211975, "logits/chosen": -0.06174871325492859, "logits/rejected": -0.12811921536922455, "logps/chosen": -2.1293153762817383, "logps/rejected": -2.4739010334014893, "loss": 2.3339, "nll_loss": 2.2729551792144775, "rewards/accuracies": 0.5, "rewards/chosen": -0.2129315435886383, "rewards/margins": 0.03445854038000107, "rewards/rejected": -0.24739010632038116, "step": 225 }, { "epoch": 0.6187542778918549, "grad_norm": 4.366180896759033, "learning_rate": 9.69041095890411e-07, "log_odds_chosen": -0.3921465277671814, "log_odds_ratio": -1.0299992561340332, "logits/chosen": -0.24370838701725006, "logits/rejected": -0.22976034879684448, "logps/chosen": -2.9898781776428223, "logps/rejected": -2.595611333847046, "loss": 2.4201, "nll_loss": 2.3171226978302, "rewards/accuracies": 0.375, "rewards/chosen": -0.29898780584335327, "rewards/margins": -0.03942666947841644, "rewards/rejected": -0.259561151266098, "step": 226 }, { "epoch": 0.621492128678987, "grad_norm": 5.095418453216553, "learning_rate": 9.68904109589041e-07, "log_odds_chosen": -0.034110844135284424, "log_odds_ratio": -0.8604307174682617, "logits/chosen": 0.08088222146034241, "logits/rejected": 0.1361086368560791, "logps/chosen": -2.8984408378601074, "logps/rejected": -2.8804736137390137, "loss": 2.4553, "nll_loss": 2.369230270385742, "rewards/accuracies": 0.5, "rewards/chosen": -0.2898440957069397, "rewards/margins": -0.0017967205494642258, "rewards/rejected": -0.2880473732948303, "step": 227 }, { "epoch": 0.6242299794661191, "grad_norm": 4.630975246429443, "learning_rate": 9.687671232876712e-07, "log_odds_chosen": 0.004923015832901001, "log_odds_ratio": -0.924522340297699, "logits/chosen": -0.12713439762592316, "logits/rejected": -0.10261677205562592, "logps/chosen": -2.7804510593414307, "logps/rejected": -2.7388477325439453, "loss": 2.4858, "nll_loss": 2.393364667892456, "rewards/accuracies": 0.625, "rewards/chosen": -0.278045117855072, "rewards/margins": -0.004160307347774506, "rewards/rejected": -0.2738848030567169, "step": 228 }, { "epoch": 0.6269678302532512, "grad_norm": 5.291557312011719, "learning_rate": 9.686301369863014e-07, "log_odds_chosen": -0.7399674654006958, "log_odds_ratio": -1.2710957527160645, "logits/chosen": 0.15899938344955444, "logits/rejected": 0.19629207253456116, "logps/chosen": -3.835099458694458, "logps/rejected": -3.129396915435791, "loss": 2.6275, "nll_loss": 2.5003976821899414, "rewards/accuracies": 0.375, "rewards/chosen": -0.38350993394851685, "rewards/margins": -0.07057024538516998, "rewards/rejected": -0.31293970346450806, "step": 229 }, { "epoch": 0.6297056810403833, "grad_norm": 5.270972728729248, "learning_rate": 9.684931506849314e-07, "log_odds_chosen": -0.6079094409942627, "log_odds_ratio": -1.286229133605957, "logits/chosen": 0.04328816384077072, "logits/rejected": 0.09669431298971176, "logps/chosen": -3.65733003616333, "logps/rejected": -3.0225064754486084, "loss": 2.5377, "nll_loss": 2.409119129180908, "rewards/accuracies": 0.5, "rewards/chosen": -0.36573299765586853, "rewards/margins": -0.06348233669996262, "rewards/rejected": -0.3022506833076477, "step": 230 }, { "epoch": 0.6324435318275154, "grad_norm": 5.010683059692383, "learning_rate": 9.683561643835616e-07, "log_odds_chosen": 0.8338879346847534, "log_odds_ratio": -0.8722488880157471, "logits/chosen": 0.14212310314178467, "logits/rejected": 0.07852759957313538, "logps/chosen": -3.008382797241211, "logps/rejected": -3.8228399753570557, "loss": 2.4921, "nll_loss": 2.404921531677246, "rewards/accuracies": 0.625, "rewards/chosen": -0.30083829164505005, "rewards/margins": 0.08144570887088776, "rewards/rejected": -0.3822839856147766, "step": 231 }, { "epoch": 0.6351813826146475, "grad_norm": 5.826498508453369, "learning_rate": 9.682191780821918e-07, "log_odds_chosen": -0.9627295732498169, "log_odds_ratio": -1.5075100660324097, "logits/chosen": -0.001488424837589264, "logits/rejected": 0.07295212894678116, "logps/chosen": -4.287321090698242, "logps/rejected": -3.2813563346862793, "loss": 2.6072, "nll_loss": 2.4564852714538574, "rewards/accuracies": 0.25, "rewards/chosen": -0.42873209714889526, "rewards/margins": -0.10059644281864166, "rewards/rejected": -0.3281356394290924, "step": 232 }, { "epoch": 0.6379192334017796, "grad_norm": 5.16738748550415, "learning_rate": 9.680821917808218e-07, "log_odds_chosen": -0.6411076188087463, "log_odds_ratio": -1.10576331615448, "logits/chosen": 0.10441717505455017, "logits/rejected": 0.1831427812576294, "logps/chosen": -3.1961231231689453, "logps/rejected": -2.5909812450408936, "loss": 2.5594, "nll_loss": 2.4488320350646973, "rewards/accuracies": 0.125, "rewards/chosen": -0.3196123242378235, "rewards/margins": -0.060514166951179504, "rewards/rejected": -0.2590981423854828, "step": 233 }, { "epoch": 0.6406570841889117, "grad_norm": 4.931056022644043, "learning_rate": 9.67945205479452e-07, "log_odds_chosen": -0.47604817152023315, "log_odds_ratio": -1.0486384630203247, "logits/chosen": 0.03187204897403717, "logits/rejected": 0.05573812127113342, "logps/chosen": -2.8156633377075195, "logps/rejected": -2.365269660949707, "loss": 2.5492, "nll_loss": 2.44431209564209, "rewards/accuracies": 0.5, "rewards/chosen": -0.2815663516521454, "rewards/margins": -0.04503939300775528, "rewards/rejected": -0.2365269511938095, "step": 234 }, { "epoch": 0.6433949349760438, "grad_norm": 5.466026782989502, "learning_rate": 9.678082191780823e-07, "log_odds_chosen": -0.9064662456512451, "log_odds_ratio": -1.4801669120788574, "logits/chosen": 0.10097973793745041, "logits/rejected": 0.1672208458185196, "logps/chosen": -3.680523157119751, "logps/rejected": -2.784043073654175, "loss": 2.6674, "nll_loss": 2.519345760345459, "rewards/accuracies": 0.25, "rewards/chosen": -0.36805233359336853, "rewards/margins": -0.08964803069829941, "rewards/rejected": -0.2784042954444885, "step": 235 }, { "epoch": 0.6461327857631759, "grad_norm": 5.453719139099121, "learning_rate": 9.676712328767122e-07, "log_odds_chosen": -0.7619211673736572, "log_odds_ratio": -1.4170602560043335, "logits/chosen": 0.09880535304546356, "logits/rejected": 0.14753390848636627, "logps/chosen": -3.4801886081695557, "logps/rejected": -2.6824021339416504, "loss": 2.5592, "nll_loss": 2.417497158050537, "rewards/accuracies": 0.375, "rewards/chosen": -0.34801891446113586, "rewards/margins": -0.07977867126464844, "rewards/rejected": -0.26824021339416504, "step": 236 }, { "epoch": 0.648870636550308, "grad_norm": 5.088179588317871, "learning_rate": 9.675342465753424e-07, "log_odds_chosen": -0.06595081090927124, "log_odds_ratio": -0.9074616432189941, "logits/chosen": -0.11095093190670013, "logits/rejected": -0.11336223036050797, "logps/chosen": -3.1971261501312256, "logps/rejected": -3.1065306663513184, "loss": 2.5077, "nll_loss": 2.4169678688049316, "rewards/accuracies": 0.625, "rewards/chosen": -0.31971263885498047, "rewards/margins": -0.009059546515345573, "rewards/rejected": -0.31065306067466736, "step": 237 }, { "epoch": 0.6516084873374401, "grad_norm": 5.246406078338623, "learning_rate": 9.673972602739724e-07, "log_odds_chosen": 0.12204255163669586, "log_odds_ratio": -0.8208588361740112, "logits/chosen": 0.11362063884735107, "logits/rejected": 0.20009857416152954, "logps/chosen": -3.343703031539917, "logps/rejected": -3.456242561340332, "loss": 2.5617, "nll_loss": 2.4796485900878906, "rewards/accuracies": 0.375, "rewards/chosen": -0.33437031507492065, "rewards/margins": 0.011253952980041504, "rewards/rejected": -0.34562426805496216, "step": 238 }, { "epoch": 0.6543463381245722, "grad_norm": 4.972338676452637, "learning_rate": 9.672602739726026e-07, "log_odds_chosen": -0.28728535771369934, "log_odds_ratio": -0.9575276970863342, "logits/chosen": -0.06647904217243195, "logits/rejected": -0.10147391259670258, "logps/chosen": -3.1723718643188477, "logps/rejected": -2.8632960319519043, "loss": 2.4909, "nll_loss": 2.395124673843384, "rewards/accuracies": 0.5, "rewards/chosen": -0.3172371983528137, "rewards/margins": -0.03090756945312023, "rewards/rejected": -0.28632962703704834, "step": 239 }, { "epoch": 0.6570841889117043, "grad_norm": 4.240230083465576, "learning_rate": 9.671232876712329e-07, "log_odds_chosen": -0.06749477982521057, "log_odds_ratio": -0.7866199016571045, "logits/chosen": 0.12174589931964874, "logits/rejected": 0.05685564503073692, "logps/chosen": -2.3688979148864746, "logps/rejected": -2.294412851333618, "loss": 2.4225, "nll_loss": 2.3438515663146973, "rewards/accuracies": 0.625, "rewards/chosen": -0.2368898093700409, "rewards/margins": -0.007448505610227585, "rewards/rejected": -0.22944128513336182, "step": 240 }, { "epoch": 0.6598220396988365, "grad_norm": 4.895077228546143, "learning_rate": 9.669863013698628e-07, "log_odds_chosen": -1.2907705307006836, "log_odds_ratio": -1.7391321659088135, "logits/chosen": 0.007742535322904587, "logits/rejected": 0.05379335582256317, "logps/chosen": -3.5282106399536133, "logps/rejected": -2.2735164165496826, "loss": 2.6012, "nll_loss": 2.4272427558898926, "rewards/accuracies": 0.375, "rewards/chosen": -0.3528210520744324, "rewards/margins": -0.1254694163799286, "rewards/rejected": -0.22735163569450378, "step": 241 }, { "epoch": 0.6625598904859685, "grad_norm": 4.956612586975098, "learning_rate": 9.66849315068493e-07, "log_odds_chosen": -0.13756275177001953, "log_odds_ratio": -1.1570454835891724, "logits/chosen": -0.09499455988407135, "logits/rejected": -0.04838539659976959, "logps/chosen": -3.602815866470337, "logps/rejected": -3.4627082347869873, "loss": 2.5347, "nll_loss": 2.4190053939819336, "rewards/accuracies": 0.375, "rewards/chosen": -0.3602815866470337, "rewards/margins": -0.014010753482580185, "rewards/rejected": -0.3462708294391632, "step": 242 }, { "epoch": 0.6652977412731006, "grad_norm": 5.6988091468811035, "learning_rate": 9.667123287671233e-07, "log_odds_chosen": -0.6762851476669312, "log_odds_ratio": -1.232023000717163, "logits/chosen": -0.042998772114515305, "logits/rejected": -0.0382816381752491, "logps/chosen": -4.0183000564575195, "logps/rejected": -3.3311338424682617, "loss": 2.5877, "nll_loss": 2.464539051055908, "rewards/accuracies": 0.375, "rewards/chosen": -0.4018300175666809, "rewards/margins": -0.06871665269136429, "rewards/rejected": -0.3331134021282196, "step": 243 }, { "epoch": 0.6680355920602327, "grad_norm": 5.222919940948486, "learning_rate": 9.665753424657532e-07, "log_odds_chosen": -0.9087775945663452, "log_odds_ratio": -1.3776352405548096, "logits/chosen": 0.021230269223451614, "logits/rejected": 0.02046058513224125, "logps/chosen": -3.6322600841522217, "logps/rejected": -2.729060173034668, "loss": 2.5199, "nll_loss": 2.3821358680725098, "rewards/accuracies": 0.25, "rewards/chosen": -0.3632259964942932, "rewards/margins": -0.09031999111175537, "rewards/rejected": -0.27290600538253784, "step": 244 }, { "epoch": 0.6707734428473648, "grad_norm": 4.512066841125488, "learning_rate": 9.664383561643835e-07, "log_odds_chosen": -0.4119202494621277, "log_odds_ratio": -1.0302737951278687, "logits/chosen": 0.06104406341910362, "logits/rejected": 0.13229013979434967, "logps/chosen": -2.6698880195617676, "logps/rejected": -2.2515783309936523, "loss": 2.4907, "nll_loss": 2.3876378536224365, "rewards/accuracies": 0.5, "rewards/chosen": -0.2669888138771057, "rewards/margins": -0.041830964386463165, "rewards/rejected": -0.22515784204006195, "step": 245 }, { "epoch": 0.6735112936344969, "grad_norm": 4.622467041015625, "learning_rate": 9.663013698630137e-07, "log_odds_chosen": -0.1537056267261505, "log_odds_ratio": -0.9588910341262817, "logits/chosen": -0.14877519011497498, "logits/rejected": -0.16132807731628418, "logps/chosen": -2.9246201515197754, "logps/rejected": -2.746577262878418, "loss": 2.4164, "nll_loss": 2.3204853534698486, "rewards/accuracies": 0.375, "rewards/chosen": -0.29246199131011963, "rewards/margins": -0.017804289236664772, "rewards/rejected": -0.2746577262878418, "step": 246 }, { "epoch": 0.676249144421629, "grad_norm": 5.264472961425781, "learning_rate": 9.661643835616437e-07, "log_odds_chosen": -1.7058181762695312, "log_odds_ratio": -2.0175223350524902, "logits/chosen": 0.019923333078622818, "logits/rejected": 0.009107954800128937, "logps/chosen": -4.102169513702393, "logps/rejected": -2.4531898498535156, "loss": 2.573, "nll_loss": 2.3712046146392822, "rewards/accuracies": 0.125, "rewards/chosen": -0.41021695733070374, "rewards/margins": -0.16489797830581665, "rewards/rejected": -0.24531899392604828, "step": 247 }, { "epoch": 0.6789869952087612, "grad_norm": 4.891294956207275, "learning_rate": 9.660273972602739e-07, "log_odds_chosen": -0.6725013852119446, "log_odds_ratio": -1.2555632591247559, "logits/chosen": -0.0045211464166641235, "logits/rejected": -0.005360327661037445, "logps/chosen": -2.6541028022766113, "logps/rejected": -2.0300211906433105, "loss": 2.5249, "nll_loss": 2.399343729019165, "rewards/accuracies": 0.5, "rewards/chosen": -0.26541027426719666, "rewards/margins": -0.06240813434123993, "rewards/rejected": -0.20300212502479553, "step": 248 }, { "epoch": 0.6817248459958932, "grad_norm": 4.720438003540039, "learning_rate": 9.65890410958904e-07, "log_odds_chosen": -0.14600712060928345, "log_odds_ratio": -0.7853116989135742, "logits/chosen": 0.046549998223781586, "logits/rejected": 0.02017974853515625, "logps/chosen": -3.0175790786743164, "logps/rejected": -2.8607659339904785, "loss": 2.4103, "nll_loss": 2.331815242767334, "rewards/accuracies": 0.25, "rewards/chosen": -0.30175793170928955, "rewards/margins": -0.015681320801377296, "rewards/rejected": -0.2860765755176544, "step": 249 }, { "epoch": 0.6844626967830253, "grad_norm": 4.103271007537842, "learning_rate": 9.657534246575343e-07, "log_odds_chosen": 0.6991625428199768, "log_odds_ratio": -0.49338221549987793, "logits/chosen": 0.2439284473657608, "logits/rejected": 0.1742836982011795, "logps/chosen": -1.4747967720031738, "logps/rejected": -2.066518545150757, "loss": 2.3081, "nll_loss": 2.2588067054748535, "rewards/accuracies": 0.75, "rewards/chosen": -0.14747968316078186, "rewards/margins": 0.05917216092348099, "rewards/rejected": -0.20665183663368225, "step": 250 }, { "epoch": 0.6872005475701575, "grad_norm": 4.851373672485352, "learning_rate": 9.656164383561643e-07, "log_odds_chosen": 0.49505293369293213, "log_odds_ratio": -0.5109018087387085, "logits/chosen": -0.11137010902166367, "logits/rejected": -0.07439924776554108, "logps/chosen": -2.3758091926574707, "logps/rejected": -2.8177192211151123, "loss": 2.3654, "nll_loss": 2.3143253326416016, "rewards/accuracies": 0.875, "rewards/chosen": -0.23758092522621155, "rewards/margins": 0.04419100284576416, "rewards/rejected": -0.2817719280719757, "step": 251 }, { "epoch": 0.6899383983572895, "grad_norm": 4.194988250732422, "learning_rate": 9.654794520547945e-07, "log_odds_chosen": -0.16706587374210358, "log_odds_ratio": -0.9055578708648682, "logits/chosen": -0.032828111201524734, "logits/rejected": -0.08270447701215744, "logps/chosen": -2.642117500305176, "logps/rejected": -2.461799383163452, "loss": 2.3655, "nll_loss": 2.2749221324920654, "rewards/accuracies": 0.375, "rewards/chosen": -0.2642117738723755, "rewards/margins": -0.01803181692957878, "rewards/rejected": -0.2461799532175064, "step": 252 }, { "epoch": 0.6926762491444216, "grad_norm": 4.7835540771484375, "learning_rate": 9.653424657534247e-07, "log_odds_chosen": 0.014133721590042114, "log_odds_ratio": -0.8604475259780884, "logits/chosen": -0.12725526094436646, "logits/rejected": -0.16282197833061218, "logps/chosen": -3.15759539604187, "logps/rejected": -3.1173770427703857, "loss": 2.4233, "nll_loss": 2.337289810180664, "rewards/accuracies": 0.625, "rewards/chosen": -0.315759539604187, "rewards/margins": -0.004021856933832169, "rewards/rejected": -0.31173771619796753, "step": 253 }, { "epoch": 0.6954140999315537, "grad_norm": 5.804475784301758, "learning_rate": 9.652054794520549e-07, "log_odds_chosen": -0.697458028793335, "log_odds_ratio": -1.434964895248413, "logits/chosen": 0.12398525327444077, "logits/rejected": 0.23032936453819275, "logps/chosen": -3.9208734035491943, "logps/rejected": -3.2017884254455566, "loss": 2.6226, "nll_loss": 2.4791502952575684, "rewards/accuracies": 0.375, "rewards/chosen": -0.39208734035491943, "rewards/margins": -0.07190848141908646, "rewards/rejected": -0.3201788365840912, "step": 254 }, { "epoch": 0.6981519507186859, "grad_norm": 5.017906188964844, "learning_rate": 9.650684931506849e-07, "log_odds_chosen": -0.687363862991333, "log_odds_ratio": -1.1946018934249878, "logits/chosen": -0.09959954023361206, "logits/rejected": -0.113641157746315, "logps/chosen": -3.4275801181793213, "logps/rejected": -2.7506282329559326, "loss": 2.5603, "nll_loss": 2.4408223628997803, "rewards/accuracies": 0.125, "rewards/chosen": -0.34275805950164795, "rewards/margins": -0.06769519299268723, "rewards/rejected": -0.2750628590583801, "step": 255 }, { "epoch": 0.7008898015058179, "grad_norm": 5.27588415145874, "learning_rate": 9.64931506849315e-07, "log_odds_chosen": -0.06387044489383698, "log_odds_ratio": -0.8171401023864746, "logits/chosen": -0.001242200843989849, "logits/rejected": 0.04305408149957657, "logps/chosen": -3.3457534313201904, "logps/rejected": -3.2367782592773438, "loss": 2.5014, "nll_loss": 2.4197328090667725, "rewards/accuracies": 0.375, "rewards/chosen": -0.334575355052948, "rewards/margins": -0.0108975013718009, "rewards/rejected": -0.32367783784866333, "step": 256 }, { "epoch": 0.70362765229295, "grad_norm": 4.33807373046875, "learning_rate": 9.647945205479453e-07, "log_odds_chosen": -0.42685210704803467, "log_odds_ratio": -0.9862661361694336, "logits/chosen": -0.06481590121984482, "logits/rejected": -0.06549976766109467, "logps/chosen": -2.6907103061676025, "logps/rejected": -2.2821528911590576, "loss": 2.3664, "nll_loss": 2.267763137817383, "rewards/accuracies": 0.25, "rewards/chosen": -0.2690710127353668, "rewards/margins": -0.04085574299097061, "rewards/rejected": -0.2282152771949768, "step": 257 }, { "epoch": 0.7063655030800822, "grad_norm": 4.168562412261963, "learning_rate": 9.646575342465753e-07, "log_odds_chosen": 0.17367340624332428, "log_odds_ratio": -0.901726484298706, "logits/chosen": 0.04596823453903198, "logits/rejected": 0.009218793362379074, "logps/chosen": -2.410019636154175, "logps/rejected": -2.5284931659698486, "loss": 2.2642, "nll_loss": 2.174075126647949, "rewards/accuracies": 0.625, "rewards/chosen": -0.24100197851657867, "rewards/margins": 0.011847343295812607, "rewards/rejected": -0.2528493106365204, "step": 258 }, { "epoch": 0.7091033538672142, "grad_norm": 5.542229175567627, "learning_rate": 9.645205479452055e-07, "log_odds_chosen": -1.1772730350494385, "log_odds_ratio": -1.5301891565322876, "logits/chosen": 0.005259942263364792, "logits/rejected": 0.07962510734796524, "logps/chosen": -4.2319560050964355, "logps/rejected": -3.084150791168213, "loss": 2.6518, "nll_loss": 2.498778820037842, "rewards/accuracies": 0.25, "rewards/chosen": -0.42319560050964355, "rewards/margins": -0.1147804707288742, "rewards/rejected": -0.30841511487960815, "step": 259 }, { "epoch": 0.7118412046543463, "grad_norm": 4.7007036209106445, "learning_rate": 9.643835616438357e-07, "log_odds_chosen": 0.43698781728744507, "log_odds_ratio": -0.5893779397010803, "logits/chosen": 0.09531356394290924, "logits/rejected": 0.16362348198890686, "logps/chosen": -2.384662628173828, "logps/rejected": -2.753512382507324, "loss": 2.378, "nll_loss": 2.3190290927886963, "rewards/accuracies": 0.875, "rewards/chosen": -0.2384662926197052, "rewards/margins": 0.03688495233654976, "rewards/rejected": -0.27535122632980347, "step": 260 }, { "epoch": 0.7145790554414785, "grad_norm": 5.523900985717773, "learning_rate": 9.642465753424657e-07, "log_odds_chosen": -0.5140159130096436, "log_odds_ratio": -1.2176690101623535, "logits/chosen": -0.05991426482796669, "logits/rejected": 0.002989932894706726, "logps/chosen": -3.790980339050293, "logps/rejected": -3.274505138397217, "loss": 2.5624, "nll_loss": 2.440598487854004, "rewards/accuracies": 0.5, "rewards/chosen": -0.37909799814224243, "rewards/margins": -0.051647480577230453, "rewards/rejected": -0.32745054364204407, "step": 261 }, { "epoch": 0.7173169062286106, "grad_norm": 4.88407039642334, "learning_rate": 9.641095890410959e-07, "log_odds_chosen": -0.5485669374465942, "log_odds_ratio": -1.1416479349136353, "logits/chosen": -0.0016264989972114563, "logits/rejected": 0.006131622940301895, "logps/chosen": -3.1870853900909424, "logps/rejected": -2.6333229541778564, "loss": 2.3881, "nll_loss": 2.2739813327789307, "rewards/accuracies": 0.5, "rewards/chosen": -0.31870853900909424, "rewards/margins": -0.055376239120960236, "rewards/rejected": -0.2633323073387146, "step": 262 }, { "epoch": 0.7200547570157426, "grad_norm": 4.371847629547119, "learning_rate": 9.63972602739726e-07, "log_odds_chosen": -0.36282700300216675, "log_odds_ratio": -1.1005816459655762, "logits/chosen": -0.055019401013851166, "logits/rejected": -0.1498664766550064, "logps/chosen": -2.837162971496582, "logps/rejected": -2.482205390930176, "loss": 2.4246, "nll_loss": 2.314511775970459, "rewards/accuracies": 0.25, "rewards/chosen": -0.2837163209915161, "rewards/margins": -0.03549578785896301, "rewards/rejected": -0.2482205331325531, "step": 263 }, { "epoch": 0.7227926078028748, "grad_norm": 6.013926029205322, "learning_rate": 9.63835616438356e-07, "log_odds_chosen": -0.7580671310424805, "log_odds_ratio": -1.2269054651260376, "logits/chosen": 0.11311057209968567, "logits/rejected": 0.20190110802650452, "logps/chosen": -3.906663179397583, "logps/rejected": -3.1534194946289062, "loss": 2.5343, "nll_loss": 2.411609172821045, "rewards/accuracies": 0.25, "rewards/chosen": -0.39066633582115173, "rewards/margins": -0.07532437145709991, "rewards/rejected": -0.3153419494628906, "step": 264 }, { "epoch": 0.7255304585900069, "grad_norm": 5.960892200469971, "learning_rate": 9.636986301369863e-07, "log_odds_chosen": -1.0897349119186401, "log_odds_ratio": -1.5599761009216309, "logits/chosen": 0.04646892100572586, "logits/rejected": 0.12831038236618042, "logps/chosen": -4.525331020355225, "logps/rejected": -3.439751625061035, "loss": 2.6332, "nll_loss": 2.4772369861602783, "rewards/accuracies": 0.375, "rewards/chosen": -0.45253312587738037, "rewards/margins": -0.10855792462825775, "rewards/rejected": -0.34397515654563904, "step": 265 }, { "epoch": 0.7282683093771389, "grad_norm": 5.271402835845947, "learning_rate": 9.635616438356165e-07, "log_odds_chosen": 0.5599414110183716, "log_odds_ratio": -0.5822115540504456, "logits/chosen": 0.20522300899028778, "logits/rejected": 0.19670452177524567, "logps/chosen": -2.8880932331085205, "logps/rejected": -3.3950860500335693, "loss": 2.4394, "nll_loss": 2.3811845779418945, "rewards/accuracies": 0.75, "rewards/chosen": -0.28880932927131653, "rewards/margins": 0.050699301064014435, "rewards/rejected": -0.33950862288475037, "step": 266 }, { "epoch": 0.731006160164271, "grad_norm": 4.66467809677124, "learning_rate": 9.634246575342465e-07, "log_odds_chosen": -0.054757654666900635, "log_odds_ratio": -0.8105286359786987, "logits/chosen": -0.10566211491823196, "logits/rejected": -0.09994181990623474, "logps/chosen": -2.6612844467163086, "logps/rejected": -2.612445116043091, "loss": 2.4862, "nll_loss": 2.405160427093506, "rewards/accuracies": 0.5, "rewards/chosen": -0.26612845063209534, "rewards/margins": -0.004883935675024986, "rewards/rejected": -0.261244535446167, "step": 267 }, { "epoch": 0.7337440109514032, "grad_norm": 5.579357624053955, "learning_rate": 9.632876712328767e-07, "log_odds_chosen": -0.9269177913665771, "log_odds_ratio": -1.4138740301132202, "logits/chosen": 0.0033198557794094086, "logits/rejected": 0.08674738556146622, "logps/chosen": -3.816555976867676, "logps/rejected": -2.8881163597106934, "loss": 2.5845, "nll_loss": 2.4431471824645996, "rewards/accuracies": 0.25, "rewards/chosen": -0.38165560364723206, "rewards/margins": -0.09284396469593048, "rewards/rejected": -0.2888116240501404, "step": 268 }, { "epoch": 0.7364818617385352, "grad_norm": 5.113103866577148, "learning_rate": 9.631506849315067e-07, "log_odds_chosen": 0.6432868242263794, "log_odds_ratio": -0.7250003814697266, "logits/chosen": -0.10748923569917679, "logits/rejected": -0.14161139726638794, "logps/chosen": -2.9440712928771973, "logps/rejected": -3.5066967010498047, "loss": 2.3976, "nll_loss": 2.325108528137207, "rewards/accuracies": 0.625, "rewards/chosen": -0.2944071292877197, "rewards/margins": 0.0562625527381897, "rewards/rejected": -0.3506696820259094, "step": 269 }, { "epoch": 0.7392197125256673, "grad_norm": 4.729809284210205, "learning_rate": 9.630136986301369e-07, "log_odds_chosen": 0.057779461145401, "log_odds_ratio": -0.7561678290367126, "logits/chosen": 0.164201021194458, "logits/rejected": 0.17369025945663452, "logps/chosen": -2.567441463470459, "logps/rejected": -2.5884008407592773, "loss": 2.3461, "nll_loss": 2.2704381942749023, "rewards/accuracies": 0.625, "rewards/chosen": -0.2567441463470459, "rewards/margins": 0.002095947042107582, "rewards/rejected": -0.25884008407592773, "step": 270 }, { "epoch": 0.7419575633127995, "grad_norm": 5.445892333984375, "learning_rate": 9.62876712328767e-07, "log_odds_chosen": -0.3494798541069031, "log_odds_ratio": -1.0519611835479736, "logits/chosen": -0.013393672183156013, "logits/rejected": -8.747726678848267e-05, "logps/chosen": -3.219917058944702, "logps/rejected": -2.8621177673339844, "loss": 2.4702, "nll_loss": 2.364985466003418, "rewards/accuracies": 0.375, "rewards/chosen": -0.3219917416572571, "rewards/margins": -0.03577994182705879, "rewards/rejected": -0.2862117886543274, "step": 271 }, { "epoch": 0.7446954140999316, "grad_norm": 4.913464069366455, "learning_rate": 9.62739726027397e-07, "log_odds_chosen": -0.04950001835823059, "log_odds_ratio": -1.0411295890808105, "logits/chosen": -0.0507119856774807, "logits/rejected": -0.03279763460159302, "logps/chosen": -3.4097447395324707, "logps/rejected": -3.3496737480163574, "loss": 2.3964, "nll_loss": 2.292271375656128, "rewards/accuracies": 0.25, "rewards/chosen": -0.34097450971603394, "rewards/margins": -0.006007075309753418, "rewards/rejected": -0.33496740460395813, "step": 272 }, { "epoch": 0.7474332648870636, "grad_norm": 4.194859027862549, "learning_rate": 9.626027397260273e-07, "log_odds_chosen": 0.2737077474594116, "log_odds_ratio": -0.6369046568870544, "logits/chosen": -0.032620225101709366, "logits/rejected": -0.13435955345630646, "logps/chosen": -2.542234420776367, "logps/rejected": -2.792478084564209, "loss": 2.2923, "nll_loss": 2.2285609245300293, "rewards/accuracies": 0.75, "rewards/chosen": -0.25422343611717224, "rewards/margins": 0.025024380534887314, "rewards/rejected": -0.27924782037734985, "step": 273 }, { "epoch": 0.7501711156741958, "grad_norm": 5.725597858428955, "learning_rate": 9.624657534246575e-07, "log_odds_chosen": -0.6673886775970459, "log_odds_ratio": -1.1373510360717773, "logits/chosen": 0.10525862872600555, "logits/rejected": 0.16145560145378113, "logps/chosen": -3.808814287185669, "logps/rejected": -3.1579859256744385, "loss": 2.5108, "nll_loss": 2.3970324993133545, "rewards/accuracies": 0.25, "rewards/chosen": -0.3808814287185669, "rewards/margins": -0.0650828406214714, "rewards/rejected": -0.3157985806465149, "step": 274 }, { "epoch": 0.7529089664613279, "grad_norm": 5.065811634063721, "learning_rate": 9.623287671232875e-07, "log_odds_chosen": 1.0587977170944214, "log_odds_ratio": -0.8498696088790894, "logits/chosen": -0.009627774357795715, "logits/rejected": -0.14986708760261536, "logps/chosen": -3.0370395183563232, "logps/rejected": -4.017193794250488, "loss": 2.3549, "nll_loss": 2.2698974609375, "rewards/accuracies": 0.5, "rewards/chosen": -0.3037039637565613, "rewards/margins": 0.09801545739173889, "rewards/rejected": -0.40171942114830017, "step": 275 }, { "epoch": 0.75564681724846, "grad_norm": 5.303601264953613, "learning_rate": 9.621917808219177e-07, "log_odds_chosen": -0.11578185856342316, "log_odds_ratio": -0.995297908782959, "logits/chosen": 0.23089180886745453, "logits/rejected": 0.17972446978092194, "logps/chosen": -3.092944622039795, "logps/rejected": -2.9696149826049805, "loss": 2.4197, "nll_loss": 2.320190906524658, "rewards/accuracies": 0.5, "rewards/chosen": -0.3092944622039795, "rewards/margins": -0.012332966551184654, "rewards/rejected": -0.2969614863395691, "step": 276 }, { "epoch": 0.758384668035592, "grad_norm": 5.537681579589844, "learning_rate": 9.62054794520548e-07, "log_odds_chosen": -0.27272582054138184, "log_odds_ratio": -1.095238208770752, "logits/chosen": -0.016171781346201897, "logits/rejected": -0.03321462869644165, "logps/chosen": -3.737013101577759, "logps/rejected": -3.470128059387207, "loss": 2.5013, "nll_loss": 2.3918166160583496, "rewards/accuracies": 0.25, "rewards/chosen": -0.3737013041973114, "rewards/margins": -0.02668851986527443, "rewards/rejected": -0.34701278805732727, "step": 277 }, { "epoch": 0.7611225188227242, "grad_norm": 5.352672100067139, "learning_rate": 9.61917808219178e-07, "log_odds_chosen": 0.3073631823062897, "log_odds_ratio": -0.7442864179611206, "logits/chosen": 0.1629636287689209, "logits/rejected": 0.09259045869112015, "logps/chosen": -2.7903101444244385, "logps/rejected": -3.086893081665039, "loss": 2.4098, "nll_loss": 2.3354012966156006, "rewards/accuracies": 0.5, "rewards/chosen": -0.27903103828430176, "rewards/margins": 0.029658284038305283, "rewards/rejected": -0.30868929624557495, "step": 278 }, { "epoch": 0.7638603696098563, "grad_norm": 5.157726764678955, "learning_rate": 9.61780821917808e-07, "log_odds_chosen": -1.115432858467102, "log_odds_ratio": -1.5759310722351074, "logits/chosen": -0.10080602765083313, "logits/rejected": -0.03657233342528343, "logps/chosen": -3.8261032104492188, "logps/rejected": -2.731342077255249, "loss": 2.515, "nll_loss": 2.3574259281158447, "rewards/accuracies": 0.375, "rewards/chosen": -0.3826103210449219, "rewards/margins": -0.10947611182928085, "rewards/rejected": -0.2731342017650604, "step": 279 }, { "epoch": 0.7665982203969883, "grad_norm": 5.971823215484619, "learning_rate": 9.616438356164383e-07, "log_odds_chosen": 0.07333554327487946, "log_odds_ratio": -0.7277467846870422, "logits/chosen": 0.09004412591457367, "logits/rejected": 0.1593427062034607, "logps/chosen": -4.439670085906982, "logps/rejected": -4.512580871582031, "loss": 2.5519, "nll_loss": 2.479135513305664, "rewards/accuracies": 0.5, "rewards/chosen": -0.44396698474884033, "rewards/margins": 0.007291123270988464, "rewards/rejected": -0.45125812292099, "step": 280 }, { "epoch": 0.7693360711841205, "grad_norm": 4.171647548675537, "learning_rate": 9.615068493150685e-07, "log_odds_chosen": 0.30183449387550354, "log_odds_ratio": -0.6507082581520081, "logits/chosen": -0.004605688154697418, "logits/rejected": 0.007779296487569809, "logps/chosen": -2.037020683288574, "logps/rejected": -2.318331480026245, "loss": 2.2507, "nll_loss": 2.1856095790863037, "rewards/accuracies": 0.625, "rewards/chosen": -0.20370206236839294, "rewards/margins": 0.028131086379289627, "rewards/rejected": -0.23183314502239227, "step": 281 }, { "epoch": 0.7720739219712526, "grad_norm": 4.1006855964660645, "learning_rate": 9.613698630136985e-07, "log_odds_chosen": 0.49364665150642395, "log_odds_ratio": -0.6261439323425293, "logits/chosen": 0.24122962355613708, "logits/rejected": 0.15461409091949463, "logps/chosen": -2.1450886726379395, "logps/rejected": -2.578157901763916, "loss": 2.2666, "nll_loss": 2.2039778232574463, "rewards/accuracies": 0.875, "rewards/chosen": -0.21450886130332947, "rewards/margins": 0.04330693185329437, "rewards/rejected": -0.25781577825546265, "step": 282 }, { "epoch": 0.7748117727583846, "grad_norm": 4.4697184562683105, "learning_rate": 9.612328767123287e-07, "log_odds_chosen": 0.033054664731025696, "log_odds_ratio": -0.8718881607055664, "logits/chosen": 0.0022184960544109344, "logits/rejected": -0.03685139864683151, "logps/chosen": -3.0170602798461914, "logps/rejected": -3.010546922683716, "loss": 2.3809, "nll_loss": 2.2936973571777344, "rewards/accuracies": 0.5, "rewards/chosen": -0.3017060160636902, "rewards/margins": -0.000651337206363678, "rewards/rejected": -0.3010547161102295, "step": 283 }, { "epoch": 0.7775496235455168, "grad_norm": 5.207334041595459, "learning_rate": 9.61095890410959e-07, "log_odds_chosen": -0.09799926728010178, "log_odds_ratio": -0.7834513187408447, "logits/chosen": -0.1040889322757721, "logits/rejected": -0.1173517256975174, "logps/chosen": -2.687650442123413, "logps/rejected": -2.578873872756958, "loss": 2.3986, "nll_loss": 2.320225238800049, "rewards/accuracies": 0.5, "rewards/chosen": -0.26876506209373474, "rewards/margins": -0.010877659544348717, "rewards/rejected": -0.2578873932361603, "step": 284 }, { "epoch": 0.7802874743326489, "grad_norm": 4.490034580230713, "learning_rate": 9.609589041095891e-07, "log_odds_chosen": 0.86125248670578, "log_odds_ratio": -0.8177599906921387, "logits/chosen": -0.0003189966082572937, "logits/rejected": -0.09300240129232407, "logps/chosen": -2.765592575073242, "logps/rejected": -3.6236684322357178, "loss": 2.3583, "nll_loss": 2.2765398025512695, "rewards/accuracies": 0.625, "rewards/chosen": -0.2765592932701111, "rewards/margins": 0.08580759167671204, "rewards/rejected": -0.36236685514450073, "step": 285 }, { "epoch": 0.783025325119781, "grad_norm": 5.082967281341553, "learning_rate": 9.608219178082191e-07, "log_odds_chosen": -0.45209240913391113, "log_odds_ratio": -1.0655745267868042, "logits/chosen": -0.04843525215983391, "logits/rejected": 0.0009681060910224915, "logps/chosen": -3.02764892578125, "logps/rejected": -2.5716512203216553, "loss": 2.4024, "nll_loss": 2.2958335876464844, "rewards/accuracies": 0.5, "rewards/chosen": -0.302764892578125, "rewards/margins": -0.04559978470206261, "rewards/rejected": -0.2571651339530945, "step": 286 }, { "epoch": 0.785763175906913, "grad_norm": 5.601000785827637, "learning_rate": 9.606849315068493e-07, "log_odds_chosen": -1.3213807344436646, "log_odds_ratio": -1.802513837814331, "logits/chosen": -0.12075196951627731, "logits/rejected": -0.08959996700286865, "logps/chosen": -3.8492608070373535, "logps/rejected": -2.5614967346191406, "loss": 2.5062, "nll_loss": 2.325985908508301, "rewards/accuracies": 0.375, "rewards/chosen": -0.38492608070373535, "rewards/margins": -0.1287764012813568, "rewards/rejected": -0.25614964962005615, "step": 287 }, { "epoch": 0.7885010266940452, "grad_norm": 4.8353590965271, "learning_rate": 9.605479452054795e-07, "log_odds_chosen": -0.8575533032417297, "log_odds_ratio": -1.3608425855636597, "logits/chosen": -0.006437338888645172, "logits/rejected": 0.04307781159877777, "logps/chosen": -3.2894580364227295, "logps/rejected": -2.4804649353027344, "loss": 2.4624, "nll_loss": 2.326303243637085, "rewards/accuracies": 0.25, "rewards/chosen": -0.3289458155632019, "rewards/margins": -0.08089933544397354, "rewards/rejected": -0.24804648756980896, "step": 288 }, { "epoch": 0.7912388774811773, "grad_norm": 4.6036224365234375, "learning_rate": 9.604109589041095e-07, "log_odds_chosen": -0.35572776198387146, "log_odds_ratio": -0.9905093908309937, "logits/chosen": 0.1723451316356659, "logits/rejected": 0.1139339804649353, "logps/chosen": -2.626804828643799, "logps/rejected": -2.2760887145996094, "loss": 2.4015, "nll_loss": 2.302426338195801, "rewards/accuracies": 0.375, "rewards/chosen": -0.2626805305480957, "rewards/margins": -0.03507161885499954, "rewards/rejected": -0.22760888934135437, "step": 289 }, { "epoch": 0.7939767282683093, "grad_norm": 5.997448921203613, "learning_rate": 9.602739726027397e-07, "log_odds_chosen": -0.6636552810668945, "log_odds_ratio": -1.1821995973587036, "logits/chosen": 0.11151806265115738, "logits/rejected": 0.16801077127456665, "logps/chosen": -3.861210823059082, "logps/rejected": -3.220517635345459, "loss": 2.5005, "nll_loss": 2.382324457168579, "rewards/accuracies": 0.375, "rewards/chosen": -0.38612109422683716, "rewards/margins": -0.06406931579113007, "rewards/rejected": -0.3220517635345459, "step": 290 }, { "epoch": 0.7967145790554415, "grad_norm": 4.915792942047119, "learning_rate": 9.6013698630137e-07, "log_odds_chosen": 0.5165778994560242, "log_odds_ratio": -0.6945069432258606, "logits/chosen": 0.033130913972854614, "logits/rejected": -0.011484481394290924, "logps/chosen": -3.041409969329834, "logps/rejected": -3.5228190422058105, "loss": 2.2723, "nll_loss": 2.2028017044067383, "rewards/accuracies": 0.5, "rewards/chosen": -0.30414098501205444, "rewards/margins": 0.04814090207219124, "rewards/rejected": -0.3522818982601166, "step": 291 }, { "epoch": 0.7994524298425736, "grad_norm": 5.907946586608887, "learning_rate": 9.6e-07, "log_odds_chosen": -0.871310293674469, "log_odds_ratio": -1.3271843194961548, "logits/chosen": 0.09547516703605652, "logits/rejected": 0.13401171565055847, "logps/chosen": -3.6381285190582275, "logps/rejected": -2.781693935394287, "loss": 2.4717, "nll_loss": 2.3389854431152344, "rewards/accuracies": 0.125, "rewards/chosen": -0.3638128638267517, "rewards/margins": -0.0856434777379036, "rewards/rejected": -0.2781693935394287, "step": 292 }, { "epoch": 0.8021902806297057, "grad_norm": 5.3876543045043945, "learning_rate": 9.598630136986301e-07, "log_odds_chosen": -0.4607892632484436, "log_odds_ratio": -1.1936414241790771, "logits/chosen": -0.0900409147143364, "logits/rejected": -0.09709371626377106, "logps/chosen": -3.5312280654907227, "logps/rejected": -3.0799787044525146, "loss": 2.4336, "nll_loss": 2.3141932487487793, "rewards/accuracies": 0.5, "rewards/chosen": -0.3531228303909302, "rewards/margins": -0.04512493312358856, "rewards/rejected": -0.3079978823661804, "step": 293 }, { "epoch": 0.8049281314168378, "grad_norm": 4.3576459884643555, "learning_rate": 9.597260273972603e-07, "log_odds_chosen": 0.5842174887657166, "log_odds_ratio": -0.6583250761032104, "logits/chosen": 0.19146248698234558, "logits/rejected": 0.055396806448698044, "logps/chosen": -2.287510395050049, "logps/rejected": -2.82122802734375, "loss": 2.3155, "nll_loss": 2.2496654987335205, "rewards/accuracies": 0.625, "rewards/chosen": -0.2287510335445404, "rewards/margins": 0.053371790796518326, "rewards/rejected": -0.28212282061576843, "step": 294 }, { "epoch": 0.8076659822039699, "grad_norm": 5.243520259857178, "learning_rate": 9.595890410958903e-07, "log_odds_chosen": 0.13092747330665588, "log_odds_ratio": -0.726117730140686, "logits/chosen": 0.010461833328008652, "logits/rejected": -0.08922155201435089, "logps/chosen": -2.8917293548583984, "logps/rejected": -3.005275249481201, "loss": 2.3152, "nll_loss": 2.242614984512329, "rewards/accuracies": 0.5, "rewards/chosen": -0.2891729474067688, "rewards/margins": 0.011354565620422363, "rewards/rejected": -0.3005274832248688, "step": 295 }, { "epoch": 0.810403832991102, "grad_norm": 4.96592903137207, "learning_rate": 9.594520547945205e-07, "log_odds_chosen": -0.3602064251899719, "log_odds_ratio": -1.0353882312774658, "logits/chosen": 0.12664277851581573, "logits/rejected": 0.10804586857557297, "logps/chosen": -2.8549416065216064, "logps/rejected": -2.482717752456665, "loss": 2.3959, "nll_loss": 2.292374610900879, "rewards/accuracies": 0.5, "rewards/chosen": -0.2854941487312317, "rewards/margins": -0.03722238168120384, "rewards/rejected": -0.24827177822589874, "step": 296 }, { "epoch": 0.813141683778234, "grad_norm": 6.362805366516113, "learning_rate": 9.593150684931507e-07, "log_odds_chosen": -1.4061282873153687, "log_odds_ratio": -1.7661280632019043, "logits/chosen": 0.0010157302021980286, "logits/rejected": 0.11778803169727325, "logps/chosen": -4.2730817794799805, "logps/rejected": -2.9020869731903076, "loss": 2.5443, "nll_loss": 2.3676416873931885, "rewards/accuracies": 0.25, "rewards/chosen": -0.42730820178985596, "rewards/margins": -0.137099489569664, "rewards/rejected": -0.29020869731903076, "step": 297 }, { "epoch": 0.8158795345653662, "grad_norm": 5.648478984832764, "learning_rate": 9.591780821917807e-07, "log_odds_chosen": -0.501911461353302, "log_odds_ratio": -1.1685287952423096, "logits/chosen": -0.017198629677295685, "logits/rejected": 0.04100743308663368, "logps/chosen": -3.492643117904663, "logps/rejected": -2.9940648078918457, "loss": 2.4645, "nll_loss": 2.347686290740967, "rewards/accuracies": 0.375, "rewards/chosen": -0.34926432371139526, "rewards/margins": -0.04985782504081726, "rewards/rejected": -0.299406498670578, "step": 298 }, { "epoch": 0.8186173853524983, "grad_norm": 5.114130020141602, "learning_rate": 9.59041095890411e-07, "log_odds_chosen": -0.7076602578163147, "log_odds_ratio": -1.2366498708724976, "logits/chosen": -0.0709017962217331, "logits/rejected": -0.06748402118682861, "logps/chosen": -3.269031524658203, "logps/rejected": -2.5747718811035156, "loss": 2.4209, "nll_loss": 2.2972609996795654, "rewards/accuracies": 0.375, "rewards/chosen": -0.32690316438674927, "rewards/margins": -0.06942597031593323, "rewards/rejected": -0.25747716426849365, "step": 299 }, { "epoch": 0.8213552361396304, "grad_norm": 5.218059062957764, "learning_rate": 9.58904109589041e-07, "log_odds_chosen": 0.5643274188041687, "log_odds_ratio": -0.5072710514068604, "logits/chosen": -0.03024657443165779, "logits/rejected": -0.07481090724468231, "logps/chosen": -2.937727928161621, "logps/rejected": -3.4714736938476562, "loss": 2.3772, "nll_loss": 2.3265221118927, "rewards/accuracies": 0.75, "rewards/chosen": -0.29377278685569763, "rewards/margins": 0.05337456613779068, "rewards/rejected": -0.3471473455429077, "step": 300 }, { "epoch": 0.8240930869267625, "grad_norm": 4.645852088928223, "learning_rate": 9.587671232876711e-07, "log_odds_chosen": -0.41951990127563477, "log_odds_ratio": -1.0079249143600464, "logits/chosen": -0.025463759899139404, "logits/rejected": -0.00837738811969757, "logps/chosen": -2.62092924118042, "logps/rejected": -2.210888385772705, "loss": 2.3187, "nll_loss": 2.217885971069336, "rewards/accuracies": 0.5, "rewards/chosen": -0.2620929479598999, "rewards/margins": -0.04100406914949417, "rewards/rejected": -0.22108885645866394, "step": 301 }, { "epoch": 0.8268309377138946, "grad_norm": 5.4579176902771, "learning_rate": 9.586301369863013e-07, "log_odds_chosen": -0.660114049911499, "log_odds_ratio": -1.3913776874542236, "logits/chosen": 0.2030487060546875, "logits/rejected": 0.2519386410713196, "logps/chosen": -3.5199103355407715, "logps/rejected": -2.8358936309814453, "loss": 2.4378, "nll_loss": 2.298693895339966, "rewards/accuracies": 0.625, "rewards/chosen": -0.35199102759361267, "rewards/margins": -0.06840168684720993, "rewards/rejected": -0.28358936309814453, "step": 302 }, { "epoch": 0.8295687885010267, "grad_norm": 5.50700044631958, "learning_rate": 9.584931506849313e-07, "log_odds_chosen": -0.3091774582862854, "log_odds_ratio": -1.01149320602417, "logits/chosen": 0.11401257663965225, "logits/rejected": 0.1594378650188446, "logps/chosen": -2.840402841567993, "logps/rejected": -2.5535128116607666, "loss": 2.4566, "nll_loss": 2.355464458465576, "rewards/accuracies": 0.5, "rewards/chosen": -0.28404030203819275, "rewards/margins": -0.028689013794064522, "rewards/rejected": -0.2553512752056122, "step": 303 }, { "epoch": 0.8323066392881588, "grad_norm": 6.330807685852051, "learning_rate": 9.583561643835615e-07, "log_odds_chosen": -0.4099533259868622, "log_odds_ratio": -1.0310155153274536, "logits/chosen": 0.07630832493305206, "logits/rejected": 0.21171417832374573, "logps/chosen": -4.072067737579346, "logps/rejected": -3.62715482711792, "loss": 2.4355, "nll_loss": 2.3323659896850586, "rewards/accuracies": 0.375, "rewards/chosen": -0.40720677375793457, "rewards/margins": -0.044491276144981384, "rewards/rejected": -0.36271554231643677, "step": 304 }, { "epoch": 0.8350444900752909, "grad_norm": 5.247747898101807, "learning_rate": 9.582191780821917e-07, "log_odds_chosen": -0.12782229483127594, "log_odds_ratio": -0.8028196692466736, "logits/chosen": 0.03584502264857292, "logits/rejected": 0.10656116902828217, "logps/chosen": -2.8593220710754395, "logps/rejected": -2.7303953170776367, "loss": 2.3425, "nll_loss": 2.262221336364746, "rewards/accuracies": 0.375, "rewards/chosen": -0.28593218326568604, "rewards/margins": -0.012892641127109528, "rewards/rejected": -0.2730395793914795, "step": 305 }, { "epoch": 0.837782340862423, "grad_norm": 4.311183452606201, "learning_rate": 9.58082191780822e-07, "log_odds_chosen": -0.27973055839538574, "log_odds_ratio": -0.8907830715179443, "logits/chosen": 0.08888589590787888, "logits/rejected": -0.04322202876210213, "logps/chosen": -2.7225003242492676, "logps/rejected": -2.458800792694092, "loss": 2.3054, "nll_loss": 2.2163686752319336, "rewards/accuracies": 0.375, "rewards/chosen": -0.2722500264644623, "rewards/margins": -0.0263699721544981, "rewards/rejected": -0.24588006734848022, "step": 306 }, { "epoch": 0.840520191649555, "grad_norm": 4.505098342895508, "learning_rate": 9.57945205479452e-07, "log_odds_chosen": 0.84199059009552, "log_odds_ratio": -0.41251176595687866, "logits/chosen": 0.04607824608683586, "logits/rejected": -0.03395092487335205, "logps/chosen": -2.0615885257720947, "logps/rejected": -2.808293342590332, "loss": 2.1893, "nll_loss": 2.1480579376220703, "rewards/accuracies": 0.875, "rewards/chosen": -0.206158846616745, "rewards/margins": 0.07467049360275269, "rewards/rejected": -0.2808293402194977, "step": 307 }, { "epoch": 0.8432580424366872, "grad_norm": 4.829372406005859, "learning_rate": 9.578082191780821e-07, "log_odds_chosen": -0.4718676507472992, "log_odds_ratio": -1.0874550342559814, "logits/chosen": -0.10707922279834747, "logits/rejected": -0.11742310971021652, "logps/chosen": -2.7625694274902344, "logps/rejected": -2.292126178741455, "loss": 2.355, "nll_loss": 2.2462258338928223, "rewards/accuracies": 0.5, "rewards/chosen": -0.2762569189071655, "rewards/margins": -0.04704432189464569, "rewards/rejected": -0.22921261191368103, "step": 308 }, { "epoch": 0.8459958932238193, "grad_norm": 4.754515171051025, "learning_rate": 9.576712328767123e-07, "log_odds_chosen": 0.11311192810535431, "log_odds_ratio": -0.7260119915008545, "logits/chosen": 0.010975673794746399, "logits/rejected": -0.002652886090800166, "logps/chosen": -2.6639933586120605, "logps/rejected": -2.7610440254211426, "loss": 2.2791, "nll_loss": 2.2065305709838867, "rewards/accuracies": 0.5, "rewards/chosen": -0.2663993239402771, "rewards/margins": 0.009705064818263054, "rewards/rejected": -0.2761043906211853, "step": 309 }, { "epoch": 0.8487337440109514, "grad_norm": 4.719974994659424, "learning_rate": 9.575342465753423e-07, "log_odds_chosen": 0.9609853625297546, "log_odds_ratio": -0.6268738508224487, "logits/chosen": -0.04425850510597229, "logits/rejected": -0.13681691884994507, "logps/chosen": -2.340785026550293, "logps/rejected": -3.2263131141662598, "loss": 2.2377, "nll_loss": 2.1749725341796875, "rewards/accuracies": 0.625, "rewards/chosen": -0.234078511595726, "rewards/margins": 0.0885528028011322, "rewards/rejected": -0.322631299495697, "step": 310 }, { "epoch": 0.8514715947980835, "grad_norm": 4.615502834320068, "learning_rate": 9.573972602739725e-07, "log_odds_chosen": 1.26601243019104, "log_odds_ratio": -0.5222892165184021, "logits/chosen": -0.1766570806503296, "logits/rejected": -0.23226293921470642, "logps/chosen": -2.1028199195861816, "logps/rejected": -3.2804388999938965, "loss": 2.2562, "nll_loss": 2.2039592266082764, "rewards/accuracies": 0.75, "rewards/chosen": -0.21028198301792145, "rewards/margins": 0.11776190251111984, "rewards/rejected": -0.3280438780784607, "step": 311 }, { "epoch": 0.8542094455852156, "grad_norm": 5.290574550628662, "learning_rate": 9.572602739726027e-07, "log_odds_chosen": 0.12040485441684723, "log_odds_ratio": -0.822729229927063, "logits/chosen": -0.0002451203763484955, "logits/rejected": -0.026701632887125015, "logps/chosen": -3.242685079574585, "logps/rejected": -3.3489584922790527, "loss": 2.3978, "nll_loss": 2.315546989440918, "rewards/accuracies": 0.5, "rewards/chosen": -0.32426851987838745, "rewards/margins": 0.010627355426549911, "rewards/rejected": -0.3348958492279053, "step": 312 }, { "epoch": 0.8569472963723477, "grad_norm": 5.083124160766602, "learning_rate": 9.57123287671233e-07, "log_odds_chosen": -0.2976799011230469, "log_odds_ratio": -1.1048885583877563, "logits/chosen": -0.019574467092752457, "logits/rejected": -0.05745375156402588, "logps/chosen": -3.1671009063720703, "logps/rejected": -2.8431358337402344, "loss": 2.37, "nll_loss": 2.2594971656799316, "rewards/accuracies": 0.5, "rewards/chosen": -0.31671005487442017, "rewards/margins": -0.03239649534225464, "rewards/rejected": -0.2843135893344879, "step": 313 }, { "epoch": 0.8596851471594799, "grad_norm": 5.899298667907715, "learning_rate": 9.56986301369863e-07, "log_odds_chosen": -1.2578356266021729, "log_odds_ratio": -1.6289786100387573, "logits/chosen": -0.06131450831890106, "logits/rejected": 0.07506481558084488, "logps/chosen": -3.5959060192108154, "logps/rejected": -2.37673020362854, "loss": 2.4201, "nll_loss": 2.257204532623291, "rewards/accuracies": 0.25, "rewards/chosen": -0.3595905900001526, "rewards/margins": -0.12191757559776306, "rewards/rejected": -0.23767304420471191, "step": 314 }, { "epoch": 0.8624229979466119, "grad_norm": 5.433193206787109, "learning_rate": 9.568493150684931e-07, "log_odds_chosen": -0.3293965756893158, "log_odds_ratio": -0.9595505595207214, "logits/chosen": 0.0994613915681839, "logits/rejected": 0.18779830634593964, "logps/chosen": -2.97963809967041, "logps/rejected": -2.660881280899048, "loss": 2.3687, "nll_loss": 2.272714138031006, "rewards/accuracies": 0.625, "rewards/chosen": -0.29796379804611206, "rewards/margins": -0.03187567740678787, "rewards/rejected": -0.2660881280899048, "step": 315 }, { "epoch": 0.865160848733744, "grad_norm": 5.567021369934082, "learning_rate": 9.567123287671234e-07, "log_odds_chosen": -0.7451454997062683, "log_odds_ratio": -1.3156299591064453, "logits/chosen": 0.11195940524339676, "logits/rejected": 0.13121852278709412, "logps/chosen": -3.552522897720337, "logps/rejected": -2.8092174530029297, "loss": 2.4123, "nll_loss": 2.2807695865631104, "rewards/accuracies": 0.375, "rewards/chosen": -0.35525232553482056, "rewards/margins": -0.07433056086301804, "rewards/rejected": -0.2809217572212219, "step": 316 }, { "epoch": 0.8678986995208761, "grad_norm": 4.7793049812316895, "learning_rate": 9.565753424657533e-07, "log_odds_chosen": -0.046151138842105865, "log_odds_ratio": -0.786890983581543, "logits/chosen": 0.22416552901268005, "logits/rejected": 0.12725169956684113, "logps/chosen": -2.4080018997192383, "logps/rejected": -2.3321533203125, "loss": 2.2648, "nll_loss": 2.1860973834991455, "rewards/accuracies": 0.5, "rewards/chosen": -0.24080020189285278, "rewards/margins": -0.007584873586893082, "rewards/rejected": -0.2332153171300888, "step": 317 }, { "epoch": 0.8706365503080082, "grad_norm": 4.898028373718262, "learning_rate": 9.564383561643836e-07, "log_odds_chosen": 0.08327362686395645, "log_odds_ratio": -0.7198626399040222, "logits/chosen": -0.03377828374505043, "logits/rejected": -0.022671794518828392, "logps/chosen": -2.612813711166382, "logps/rejected": -2.6613190174102783, "loss": 2.271, "nll_loss": 2.199025869369507, "rewards/accuracies": 0.625, "rewards/chosen": -0.2612813413143158, "rewards/margins": 0.0048505403101444244, "rewards/rejected": -0.2661319077014923, "step": 318 }, { "epoch": 0.8733744010951403, "grad_norm": 5.062870025634766, "learning_rate": 9.563013698630138e-07, "log_odds_chosen": -1.0355581045150757, "log_odds_ratio": -1.6094142198562622, "logits/chosen": -0.1247478574514389, "logits/rejected": -0.1356336623430252, "logps/chosen": -3.2561497688293457, "logps/rejected": -2.2252707481384277, "loss": 2.3723, "nll_loss": 2.211331367492676, "rewards/accuracies": 0.375, "rewards/chosen": -0.32561495900154114, "rewards/margins": -0.1030879020690918, "rewards/rejected": -0.22252708673477173, "step": 319 }, { "epoch": 0.8761122518822724, "grad_norm": 5.154851913452148, "learning_rate": 9.561643835616437e-07, "log_odds_chosen": -0.18569780886173248, "log_odds_ratio": -0.877605140209198, "logits/chosen": -0.12625563144683838, "logits/rejected": -0.07753349840641022, "logps/chosen": -2.874152183532715, "logps/rejected": -2.7231051921844482, "loss": 2.4114, "nll_loss": 2.3236260414123535, "rewards/accuracies": 0.25, "rewards/chosen": -0.2874152362346649, "rewards/margins": -0.015104670077562332, "rewards/rejected": -0.2723105549812317, "step": 320 }, { "epoch": 0.8788501026694046, "grad_norm": 6.551784038543701, "learning_rate": 9.56027397260274e-07, "log_odds_chosen": -0.6758964657783508, "log_odds_ratio": -1.3456844091415405, "logits/chosen": 0.1223127618432045, "logits/rejected": 0.24480509757995605, "logps/chosen": -3.7319862842559814, "logps/rejected": -3.0844740867614746, "loss": 2.4694, "nll_loss": 2.3348233699798584, "rewards/accuracies": 0.25, "rewards/chosen": -0.37319862842559814, "rewards/margins": -0.06475124508142471, "rewards/rejected": -0.3084474205970764, "step": 321 }, { "epoch": 0.8815879534565366, "grad_norm": 5.321468830108643, "learning_rate": 9.558904109589042e-07, "log_odds_chosen": 0.29341819882392883, "log_odds_ratio": -0.7392905950546265, "logits/chosen": 0.10301992297172546, "logits/rejected": -0.0007280409336090088, "logps/chosen": -3.2099881172180176, "logps/rejected": -3.4847755432128906, "loss": 2.3682, "nll_loss": 2.294274091720581, "rewards/accuracies": 0.625, "rewards/chosen": -0.32099878787994385, "rewards/margins": 0.027478758245706558, "rewards/rejected": -0.3484775722026825, "step": 322 }, { "epoch": 0.8843258042436687, "grad_norm": 4.853733539581299, "learning_rate": 9.557534246575342e-07, "log_odds_chosen": -0.37773317098617554, "log_odds_ratio": -0.9806450605392456, "logits/chosen": 0.10240510106086731, "logits/rejected": 0.15055835247039795, "logps/chosen": -2.566410541534424, "logps/rejected": -2.214423179626465, "loss": 2.3218, "nll_loss": 2.223698854446411, "rewards/accuracies": 0.25, "rewards/chosen": -0.25664106011390686, "rewards/margins": -0.03519873321056366, "rewards/rejected": -0.2214423418045044, "step": 323 }, { "epoch": 0.8870636550308009, "grad_norm": 5.717052459716797, "learning_rate": 9.556164383561644e-07, "log_odds_chosen": -0.2182992696762085, "log_odds_ratio": -1.019476056098938, "logits/chosen": 0.08792001754045486, "logits/rejected": 0.10372407734394073, "logps/chosen": -3.804266929626465, "logps/rejected": -3.5790350437164307, "loss": 2.3862, "nll_loss": 2.284247875213623, "rewards/accuracies": 0.625, "rewards/chosen": -0.38042670488357544, "rewards/margins": -0.02252320572733879, "rewards/rejected": -0.35790348052978516, "step": 324 }, { "epoch": 0.8898015058179329, "grad_norm": 5.944345951080322, "learning_rate": 9.554794520547946e-07, "log_odds_chosen": -0.8188538551330566, "log_odds_ratio": -1.4038007259368896, "logits/chosen": 0.06603960692882538, "logits/rejected": 0.15175960958003998, "logps/chosen": -4.001237392425537, "logps/rejected": -3.1746068000793457, "loss": 2.4974, "nll_loss": 2.3570423126220703, "rewards/accuracies": 0.375, "rewards/chosen": -0.4001237750053406, "rewards/margins": -0.08266308903694153, "rewards/rejected": -0.31746065616607666, "step": 325 }, { "epoch": 0.892539356605065, "grad_norm": 4.901820182800293, "learning_rate": 9.553424657534246e-07, "log_odds_chosen": 0.11882494390010834, "log_odds_ratio": -0.764499306678772, "logits/chosen": 0.15142086148262024, "logits/rejected": 0.15988872945308685, "logps/chosen": -2.655449867248535, "logps/rejected": -2.740319013595581, "loss": 2.2843, "nll_loss": 2.2078118324279785, "rewards/accuracies": 0.375, "rewards/chosen": -0.26554495096206665, "rewards/margins": 0.00848693959414959, "rewards/rejected": -0.2740319073200226, "step": 326 }, { "epoch": 0.8952772073921971, "grad_norm": 5.18091344833374, "learning_rate": 9.552054794520548e-07, "log_odds_chosen": 1.0611191987991333, "log_odds_ratio": -0.6154732704162598, "logits/chosen": -0.12742829322814941, "logits/rejected": -0.10809778422117233, "logps/chosen": -2.8802084922790527, "logps/rejected": -3.921889543533325, "loss": 2.2855, "nll_loss": 2.2239651679992676, "rewards/accuracies": 0.625, "rewards/chosen": -0.2880208492279053, "rewards/margins": 0.1041681319475174, "rewards/rejected": -0.3921889662742615, "step": 327 }, { "epoch": 0.8980150581793293, "grad_norm": 5.231921195983887, "learning_rate": 9.55068493150685e-07, "log_odds_chosen": -0.5966106057167053, "log_odds_ratio": -1.1943600177764893, "logits/chosen": -0.16573409736156464, "logits/rejected": -0.15498748421669006, "logps/chosen": -3.346494674682617, "logps/rejected": -2.777541160583496, "loss": 2.4279, "nll_loss": 2.3084208965301514, "rewards/accuracies": 0.25, "rewards/chosen": -0.3346494734287262, "rewards/margins": -0.05689536780118942, "rewards/rejected": -0.2777540683746338, "step": 328 }, { "epoch": 0.9007529089664613, "grad_norm": 5.385656356811523, "learning_rate": 9.54931506849315e-07, "log_odds_chosen": 0.2303374856710434, "log_odds_ratio": -0.773637592792511, "logits/chosen": 0.1677294224500656, "logits/rejected": 0.1792013794183731, "logps/chosen": -3.2331080436706543, "logps/rejected": -3.409407138824463, "loss": 2.2679, "nll_loss": 2.190498113632202, "rewards/accuracies": 0.75, "rewards/chosen": -0.32331082224845886, "rewards/margins": 0.01762991026043892, "rewards/rejected": -0.3409407436847687, "step": 329 }, { "epoch": 0.9034907597535934, "grad_norm": 4.799583435058594, "learning_rate": 9.547945205479452e-07, "log_odds_chosen": -0.27554500102996826, "log_odds_ratio": -0.9181267023086548, "logits/chosen": 0.041767120361328125, "logits/rejected": -0.014444475993514061, "logps/chosen": -2.5502965450286865, "logps/rejected": -2.2853384017944336, "loss": 2.3193, "nll_loss": 2.2275278568267822, "rewards/accuracies": 0.5, "rewards/chosen": -0.25502967834472656, "rewards/margins": -0.026495827361941338, "rewards/rejected": -0.2285338193178177, "step": 330 }, { "epoch": 0.9062286105407256, "grad_norm": 4.895698070526123, "learning_rate": 9.546575342465752e-07, "log_odds_chosen": 1.3653838634490967, "log_odds_ratio": -0.5460960268974304, "logits/chosen": -0.14247268438339233, "logits/rejected": -0.19229257106781006, "logps/chosen": -2.386273145675659, "logps/rejected": -3.695711135864258, "loss": 2.2489, "nll_loss": 2.1942408084869385, "rewards/accuracies": 0.625, "rewards/chosen": -0.23862731456756592, "rewards/margins": 0.13094381988048553, "rewards/rejected": -0.36957114934921265, "step": 331 }, { "epoch": 0.9089664613278576, "grad_norm": 4.797516822814941, "learning_rate": 9.545205479452054e-07, "log_odds_chosen": -0.3804118037223816, "log_odds_ratio": -1.1656230688095093, "logits/chosen": 0.0786200687289238, "logits/rejected": 0.12935501337051392, "logps/chosen": -2.9785313606262207, "logps/rejected": -2.5760998725891113, "loss": 2.3178, "nll_loss": 2.2012691497802734, "rewards/accuracies": 0.75, "rewards/chosen": -0.29785311222076416, "rewards/margins": -0.04024314880371094, "rewards/rejected": -0.2576099634170532, "step": 332 }, { "epoch": 0.9117043121149897, "grad_norm": 6.2913970947265625, "learning_rate": 9.543835616438356e-07, "log_odds_chosen": -0.8000460863113403, "log_odds_ratio": -1.2626487016677856, "logits/chosen": 0.08815865218639374, "logits/rejected": 0.16732512414455414, "logps/chosen": -3.7077362537384033, "logps/rejected": -2.9244329929351807, "loss": 2.3853, "nll_loss": 2.2590513229370117, "rewards/accuracies": 0.25, "rewards/chosen": -0.3707736134529114, "rewards/margins": -0.07833033800125122, "rewards/rejected": -0.29244330525398254, "step": 333 }, { "epoch": 0.9144421629021219, "grad_norm": 4.983987331390381, "learning_rate": 9.542465753424656e-07, "log_odds_chosen": -0.4633328914642334, "log_odds_ratio": -1.1321659088134766, "logits/chosen": -0.13777944445610046, "logits/rejected": -0.14659631252288818, "logps/chosen": -2.577911615371704, "logps/rejected": -2.135427951812744, "loss": 2.3344, "nll_loss": 2.2211802005767822, "rewards/accuracies": 0.375, "rewards/chosen": -0.257791131734848, "rewards/margins": -0.04424834996461868, "rewards/rejected": -0.21354278922080994, "step": 334 }, { "epoch": 0.917180013689254, "grad_norm": 4.437222957611084, "learning_rate": 9.541095890410958e-07, "log_odds_chosen": 0.010298371315002441, "log_odds_ratio": -0.910836398601532, "logits/chosen": -0.10378243029117584, "logits/rejected": -0.13240619003772736, "logps/chosen": -2.481032609939575, "logps/rejected": -2.45648193359375, "loss": 2.2722, "nll_loss": 2.1810975074768066, "rewards/accuracies": 0.625, "rewards/chosen": -0.24810326099395752, "rewards/margins": -0.002455044537782669, "rewards/rejected": -0.24564820528030396, "step": 335 }, { "epoch": 0.919917864476386, "grad_norm": 5.739270210266113, "learning_rate": 9.53972602739726e-07, "log_odds_chosen": -1.1916135549545288, "log_odds_ratio": -1.7055732011795044, "logits/chosen": 0.004351543262600899, "logits/rejected": 0.07183244824409485, "logps/chosen": -4.552479267120361, "logps/rejected": -3.3523738384246826, "loss": 2.4949, "nll_loss": 2.324347972869873, "rewards/accuracies": 0.375, "rewards/chosen": -0.4552479386329651, "rewards/margins": -0.12001055479049683, "rewards/rejected": -0.33523738384246826, "step": 336 }, { "epoch": 0.9226557152635181, "grad_norm": 5.7922682762146, "learning_rate": 9.538356164383562e-07, "log_odds_chosen": -0.8809901475906372, "log_odds_ratio": -1.271897792816162, "logits/chosen": -0.17501579225063324, "logits/rejected": -0.01147528737783432, "logps/chosen": -3.374772548675537, "logps/rejected": -2.566555976867676, "loss": 2.3702, "nll_loss": 2.2430295944213867, "rewards/accuracies": 0.125, "rewards/chosen": -0.33747726678848267, "rewards/margins": -0.08082166314125061, "rewards/rejected": -0.25665563344955444, "step": 337 }, { "epoch": 0.9253935660506503, "grad_norm": 5.975912094116211, "learning_rate": 9.536986301369862e-07, "log_odds_chosen": -0.5684641003608704, "log_odds_ratio": -1.132134199142456, "logits/chosen": -0.013607076369225979, "logits/rejected": 0.05458371713757515, "logps/chosen": -3.3884620666503906, "logps/rejected": -2.8331570625305176, "loss": 2.3155, "nll_loss": 2.202335834503174, "rewards/accuracies": 0.375, "rewards/chosen": -0.33884620666503906, "rewards/margins": -0.05553050339221954, "rewards/rejected": -0.2833157181739807, "step": 338 }, { "epoch": 0.9281314168377823, "grad_norm": 5.594021797180176, "learning_rate": 9.535616438356165e-07, "log_odds_chosen": -0.8190889954566956, "log_odds_ratio": -1.2853299379348755, "logits/chosen": -0.08884536474943161, "logits/rejected": -0.023794464766979218, "logps/chosen": -3.3419182300567627, "logps/rejected": -2.5423333644866943, "loss": 2.4155, "nll_loss": 2.2869820594787598, "rewards/accuracies": 0.25, "rewards/chosen": -0.33419179916381836, "rewards/margins": -0.0799584686756134, "rewards/rejected": -0.25423333048820496, "step": 339 }, { "epoch": 0.9308692676249144, "grad_norm": 5.67239236831665, "learning_rate": 9.534246575342465e-07, "log_odds_chosen": -0.41745567321777344, "log_odds_ratio": -1.0849279165267944, "logits/chosen": 0.01635558158159256, "logits/rejected": 0.0991840660572052, "logps/chosen": -2.7249538898468018, "logps/rejected": -2.2929837703704834, "loss": 2.2896, "nll_loss": 2.1811320781707764, "rewards/accuracies": 0.5, "rewards/chosen": -0.2724953889846802, "rewards/margins": -0.043197017163038254, "rewards/rejected": -0.229298397898674, "step": 340 }, { "epoch": 0.9336071184120466, "grad_norm": 5.256378173828125, "learning_rate": 9.532876712328767e-07, "log_odds_chosen": 0.009260844439268112, "log_odds_ratio": -0.7381439208984375, "logits/chosen": 0.04067285358905792, "logits/rejected": 0.02105458825826645, "logps/chosen": -3.016925811767578, "logps/rejected": -3.0191824436187744, "loss": 2.2983, "nll_loss": 2.2244977951049805, "rewards/accuracies": 0.5, "rewards/chosen": -0.30169254541397095, "rewards/margins": 0.0002256873995065689, "rewards/rejected": -0.30191826820373535, "step": 341 }, { "epoch": 0.9363449691991786, "grad_norm": 6.628493309020996, "learning_rate": 9.531506849315069e-07, "log_odds_chosen": -0.7066621780395508, "log_odds_ratio": -1.3402385711669922, "logits/chosen": 0.11084595322608948, "logits/rejected": 0.1974620819091797, "logps/chosen": -3.9820895195007324, "logps/rejected": -3.2528743743896484, "loss": 2.4516, "nll_loss": 2.3175907135009766, "rewards/accuracies": 0.375, "rewards/chosen": -0.39820897579193115, "rewards/margins": -0.0729215145111084, "rewards/rejected": -0.32528746128082275, "step": 342 }, { "epoch": 0.9390828199863107, "grad_norm": 6.3991780281066895, "learning_rate": 9.530136986301369e-07, "log_odds_chosen": -0.5773782134056091, "log_odds_ratio": -1.1056394577026367, "logits/chosen": -0.08321630209684372, "logits/rejected": -0.004794776439666748, "logps/chosen": -3.5885889530181885, "logps/rejected": -3.0151424407958984, "loss": 2.3395, "nll_loss": 2.228929042816162, "rewards/accuracies": 0.25, "rewards/chosen": -0.3588588833808899, "rewards/margins": -0.05734465643763542, "rewards/rejected": -0.30151423811912537, "step": 343 }, { "epoch": 0.9418206707734429, "grad_norm": 6.2557597160339355, "learning_rate": 9.528767123287671e-07, "log_odds_chosen": 0.16504356265068054, "log_odds_ratio": -0.7717517018318176, "logits/chosen": 0.05918828770518303, "logits/rejected": 0.16545718908309937, "logps/chosen": -3.323375701904297, "logps/rejected": -3.4893641471862793, "loss": 2.3175, "nll_loss": 2.240293264389038, "rewards/accuracies": 0.375, "rewards/chosen": -0.3323376178741455, "rewards/margins": 0.01659882627427578, "rewards/rejected": -0.34893640875816345, "step": 344 }, { "epoch": 0.944558521560575, "grad_norm": 5.268024921417236, "learning_rate": 9.527397260273973e-07, "log_odds_chosen": -0.27311640977859497, "log_odds_ratio": -0.9755730628967285, "logits/chosen": -0.14798273146152496, "logits/rejected": -0.0898270308971405, "logps/chosen": -2.6541237831115723, "logps/rejected": -2.386308431625366, "loss": 2.2622, "nll_loss": 2.1646010875701904, "rewards/accuracies": 0.25, "rewards/chosen": -0.2654123902320862, "rewards/margins": -0.02678152360022068, "rewards/rejected": -0.23863084614276886, "step": 345 }, { "epoch": 0.947296372347707, "grad_norm": 5.007688522338867, "learning_rate": 9.526027397260274e-07, "log_odds_chosen": 0.06483595073223114, "log_odds_ratio": -0.7240989804267883, "logits/chosen": -0.07847673445940018, "logits/rejected": -0.07095802575349808, "logps/chosen": -2.6890830993652344, "logps/rejected": -2.7486016750335693, "loss": 2.3331, "nll_loss": 2.2606635093688965, "rewards/accuracies": 0.5, "rewards/chosen": -0.26890829205513, "rewards/margins": 0.005951846018433571, "rewards/rejected": -0.274860143661499, "step": 346 }, { "epoch": 0.9500342231348392, "grad_norm": 5.458993911743164, "learning_rate": 9.524657534246575e-07, "log_odds_chosen": -0.6827676296234131, "log_odds_ratio": -1.2118771076202393, "logits/chosen": -0.15365365147590637, "logits/rejected": -0.08766742050647736, "logps/chosen": -3.717073440551758, "logps/rejected": -3.0317013263702393, "loss": 2.3276, "nll_loss": 2.206434726715088, "rewards/accuracies": 0.375, "rewards/chosen": -0.37170737981796265, "rewards/margins": -0.06853723526000977, "rewards/rejected": -0.3031701445579529, "step": 347 }, { "epoch": 0.9527720739219713, "grad_norm": 4.88341760635376, "learning_rate": 9.523287671232876e-07, "log_odds_chosen": 0.06853142380714417, "log_odds_ratio": -0.6980263590812683, "logits/chosen": 0.10769668966531754, "logits/rejected": 0.02027609758079052, "logps/chosen": -2.64170503616333, "logps/rejected": -2.6718909740448, "loss": 2.2921, "nll_loss": 2.2223403453826904, "rewards/accuracies": 0.5, "rewards/chosen": -0.2641705274581909, "rewards/margins": 0.0030185654759407043, "rewards/rejected": -0.267189085483551, "step": 348 }, { "epoch": 0.9555099247091033, "grad_norm": 5.740365028381348, "learning_rate": 9.521917808219178e-07, "log_odds_chosen": -0.292941689491272, "log_odds_ratio": -0.9868268370628357, "logits/chosen": -0.0700315535068512, "logits/rejected": -0.022724878042936325, "logps/chosen": -3.4038584232330322, "logps/rejected": -3.10543155670166, "loss": 2.297, "nll_loss": 2.1983563899993896, "rewards/accuracies": 0.375, "rewards/chosen": -0.3403858542442322, "rewards/margins": -0.02984270080924034, "rewards/rejected": -0.3105431795120239, "step": 349 }, { "epoch": 0.9582477754962354, "grad_norm": 5.307773113250732, "learning_rate": 9.520547945205479e-07, "log_odds_chosen": -0.5950849056243896, "log_odds_ratio": -1.1488677263259888, "logits/chosen": 0.020130090415477753, "logits/rejected": 0.08824379742145538, "logps/chosen": -3.156156539916992, "logps/rejected": -2.5941579341888428, "loss": 2.2753, "nll_loss": 2.1604318618774414, "rewards/accuracies": 0.375, "rewards/chosen": -0.3156156539916992, "rewards/margins": -0.056199852377176285, "rewards/rejected": -0.25941580533981323, "step": 350 }, { "epoch": 0.9609856262833676, "grad_norm": 6.419719219207764, "learning_rate": 9.51917808219178e-07, "log_odds_chosen": -0.7696012854576111, "log_odds_ratio": -1.3250603675842285, "logits/chosen": 0.02090797945857048, "logits/rejected": 0.14003735780715942, "logps/chosen": -4.037397861480713, "logps/rejected": -3.274852752685547, "loss": 2.3561, "nll_loss": 2.223580837249756, "rewards/accuracies": 0.5, "rewards/chosen": -0.4037398099899292, "rewards/margins": -0.07625453174114227, "rewards/rejected": -0.32748526334762573, "step": 351 }, { "epoch": 0.9637234770704997, "grad_norm": 6.622101783752441, "learning_rate": 9.517808219178082e-07, "log_odds_chosen": -1.1681026220321655, "log_odds_ratio": -1.5039796829223633, "logits/chosen": 0.009297149255871773, "logits/rejected": 0.15774960815906525, "logps/chosen": -4.130267143249512, "logps/rejected": -2.9922025203704834, "loss": 2.369, "nll_loss": 2.2185757160186768, "rewards/accuracies": 0.125, "rewards/chosen": -0.41302672028541565, "rewards/margins": -0.11380644142627716, "rewards/rejected": -0.2992202639579773, "step": 352 }, { "epoch": 0.9664613278576317, "grad_norm": 4.818763732910156, "learning_rate": 9.516438356164384e-07, "log_odds_chosen": 0.44318872690200806, "log_odds_ratio": -0.5535244941711426, "logits/chosen": 0.09398294985294342, "logits/rejected": 0.0383463054895401, "logps/chosen": -2.407134771347046, "logps/rejected": -2.7987306118011475, "loss": 2.1678, "nll_loss": 2.1124489307403564, "rewards/accuracies": 0.75, "rewards/chosen": -0.2407134771347046, "rewards/margins": 0.03915958106517792, "rewards/rejected": -0.2798730731010437, "step": 353 }, { "epoch": 0.9691991786447639, "grad_norm": 4.645961284637451, "learning_rate": 9.515068493150684e-07, "log_odds_chosen": -0.08634324371814728, "log_odds_ratio": -0.7639809846878052, "logits/chosen": -0.059451863169670105, "logits/rejected": -0.07406061142683029, "logps/chosen": -2.1483335494995117, "logps/rejected": -2.090611696243286, "loss": 2.116, "nll_loss": 2.0395572185516357, "rewards/accuracies": 0.5, "rewards/chosen": -0.2148333489894867, "rewards/margins": -0.005772178992629051, "rewards/rejected": -0.2090611755847931, "step": 354 }, { "epoch": 0.971937029431896, "grad_norm": 5.574306964874268, "learning_rate": 9.513698630136986e-07, "log_odds_chosen": -0.3418336510658264, "log_odds_ratio": -1.0843228101730347, "logits/chosen": -0.057479240000247955, "logits/rejected": -0.01798519492149353, "logps/chosen": -3.341172218322754, "logps/rejected": -2.9868106842041016, "loss": 2.3549, "nll_loss": 2.24649715423584, "rewards/accuracies": 0.625, "rewards/chosen": -0.33411723375320435, "rewards/margins": -0.035436153411865234, "rewards/rejected": -0.2986810803413391, "step": 355 }, { "epoch": 0.974674880219028, "grad_norm": 4.5619988441467285, "learning_rate": 9.512328767123288e-07, "log_odds_chosen": 0.6582419276237488, "log_odds_ratio": -0.629913866519928, "logits/chosen": 0.09678729623556137, "logits/rejected": -0.029396414756774902, "logps/chosen": -2.38191556930542, "logps/rejected": -2.9781270027160645, "loss": 2.1725, "nll_loss": 2.1094653606414795, "rewards/accuracies": 0.625, "rewards/chosen": -0.23819157481193542, "rewards/margins": 0.059621136635541916, "rewards/rejected": -0.29781270027160645, "step": 356 }, { "epoch": 0.9774127310061602, "grad_norm": 5.832407474517822, "learning_rate": 9.510958904109588e-07, "log_odds_chosen": -0.09191014617681503, "log_odds_ratio": -0.8381401896476746, "logits/chosen": -0.029282663017511368, "logits/rejected": 0.03670237213373184, "logps/chosen": -3.4979770183563232, "logps/rejected": -3.3997788429260254, "loss": 2.3047, "nll_loss": 2.2209343910217285, "rewards/accuracies": 0.625, "rewards/chosen": -0.34979769587516785, "rewards/margins": -0.009819839149713516, "rewards/rejected": -0.33997786045074463, "step": 357 }, { "epoch": 0.9801505817932923, "grad_norm": 5.725776195526123, "learning_rate": 9.50958904109589e-07, "log_odds_chosen": 0.31033462285995483, "log_odds_ratio": -0.7854690551757812, "logits/chosen": -0.023276569321751595, "logits/rejected": -0.002595525234937668, "logps/chosen": -2.929433822631836, "logps/rejected": -3.1762735843658447, "loss": 2.2683, "nll_loss": 2.189744472503662, "rewards/accuracies": 0.5, "rewards/chosen": -0.29294341802597046, "rewards/margins": 0.024683967232704163, "rewards/rejected": -0.3176273703575134, "step": 358 }, { "epoch": 0.9828884325804244, "grad_norm": 5.677575588226318, "learning_rate": 9.508219178082192e-07, "log_odds_chosen": -0.13016286492347717, "log_odds_ratio": -0.98359215259552, "logits/chosen": -0.00787341594696045, "logits/rejected": 0.027395177632570267, "logps/chosen": -3.33793306350708, "logps/rejected": -3.2057416439056396, "loss": 2.3227, "nll_loss": 2.2243897914886475, "rewards/accuracies": 0.5, "rewards/chosen": -0.3337933123111725, "rewards/margins": -0.013219144195318222, "rewards/rejected": -0.32057416439056396, "step": 359 }, { "epoch": 0.9856262833675564, "grad_norm": 6.0647501945495605, "learning_rate": 9.506849315068493e-07, "log_odds_chosen": -0.5732727646827698, "log_odds_ratio": -1.1222838163375854, "logits/chosen": 0.04890625923871994, "logits/rejected": 0.07325194031000137, "logps/chosen": -3.5555171966552734, "logps/rejected": -2.992178201675415, "loss": 2.2959, "nll_loss": 2.1836347579956055, "rewards/accuracies": 0.25, "rewards/chosen": -0.35555174946784973, "rewards/margins": -0.05633392184972763, "rewards/rejected": -0.2992178201675415, "step": 360 }, { "epoch": 0.9883641341546886, "grad_norm": 4.726506233215332, "learning_rate": 9.505479452054794e-07, "log_odds_chosen": 0.7112345695495605, "log_odds_ratio": -0.5187444090843201, "logits/chosen": 0.01306251809000969, "logits/rejected": 0.021586567163467407, "logps/chosen": -2.46525502204895, "logps/rejected": -3.134573459625244, "loss": 2.2558, "nll_loss": 2.203972578048706, "rewards/accuracies": 0.625, "rewards/chosen": -0.24652552604675293, "rewards/margins": 0.066931813955307, "rewards/rejected": -0.31345731019973755, "step": 361 }, { "epoch": 0.9911019849418207, "grad_norm": 4.827758312225342, "learning_rate": 9.504109589041095e-07, "log_odds_chosen": -0.06671053171157837, "log_odds_ratio": -0.8086488246917725, "logits/chosen": -0.25182294845581055, "logits/rejected": -0.315932035446167, "logps/chosen": -2.327711343765259, "logps/rejected": -2.2355027198791504, "loss": 2.1774, "nll_loss": 2.0965657234191895, "rewards/accuracies": 0.5, "rewards/chosen": -0.2327711433172226, "rewards/margins": -0.009220858104526997, "rewards/rejected": -0.22355028986930847, "step": 362 }, { "epoch": 0.9938398357289527, "grad_norm": 5.516923427581787, "learning_rate": 9.502739726027397e-07, "log_odds_chosen": -0.2747182846069336, "log_odds_ratio": -0.9125169515609741, "logits/chosen": -0.006361015141010284, "logits/rejected": 0.08698927611112595, "logps/chosen": -3.080440044403076, "logps/rejected": -2.780668020248413, "loss": 2.2509, "nll_loss": 2.1596384048461914, "rewards/accuracies": 0.375, "rewards/chosen": -0.3080440163612366, "rewards/margins": -0.029977208003401756, "rewards/rejected": -0.2780667841434479, "step": 363 }, { "epoch": 0.9965776865160849, "grad_norm": 4.671178817749023, "learning_rate": 9.501369863013698e-07, "log_odds_chosen": 0.23100151121616364, "log_odds_ratio": -0.6295503973960876, "logits/chosen": 0.08636001497507095, "logits/rejected": 0.02857334539294243, "logps/chosen": -2.27949857711792, "logps/rejected": -2.468498706817627, "loss": 2.1175, "nll_loss": 2.054551839828491, "rewards/accuracies": 0.75, "rewards/chosen": -0.227949857711792, "rewards/margins": 0.018900003284215927, "rewards/rejected": -0.24684986472129822, "step": 364 }, { "epoch": 0.999315537303217, "grad_norm": 5.593679428100586, "learning_rate": 9.499999999999999e-07, "log_odds_chosen": -0.23811371624469757, "log_odds_ratio": -0.8712306022644043, "logits/chosen": -0.09325552731752396, "logits/rejected": -0.10491539537906647, "logps/chosen": -3.1868064403533936, "logps/rejected": -2.9556736946105957, "loss": 2.2956, "nll_loss": 2.208437919616699, "rewards/accuracies": 0.5, "rewards/chosen": -0.31868064403533936, "rewards/margins": -0.023113282397389412, "rewards/rejected": -0.2955673635005951, "step": 365 }, { "epoch": 1.002053388090349, "grad_norm": 6.279559135437012, "learning_rate": 9.498630136986301e-07, "log_odds_chosen": -0.422732949256897, "log_odds_ratio": -1.2646654844284058, "logits/chosen": -0.014727042987942696, "logits/rejected": -0.0710483193397522, "logps/chosen": -4.123187065124512, "logps/rejected": -3.6817288398742676, "loss": 2.2765, "nll_loss": 2.15000057220459, "rewards/accuracies": 0.375, "rewards/chosen": -0.41231870651245117, "rewards/margins": -0.044145792722702026, "rewards/rejected": -0.36817291378974915, "step": 366 }, { "epoch": 1.0047912388774811, "grad_norm": 6.191728591918945, "learning_rate": 9.497260273972603e-07, "log_odds_chosen": -0.5764631032943726, "log_odds_ratio": -1.3061985969543457, "logits/chosen": -0.06200553849339485, "logits/rejected": 0.022157706320285797, "logps/chosen": -3.763303518295288, "logps/rejected": -3.1535959243774414, "loss": 2.2478, "nll_loss": 2.117177724838257, "rewards/accuracies": 0.375, "rewards/chosen": -0.3763303756713867, "rewards/margins": -0.06097078323364258, "rewards/rejected": -0.31535959243774414, "step": 367 }, { "epoch": 1.0075290896646132, "grad_norm": 5.379656791687012, "learning_rate": 9.495890410958903e-07, "log_odds_chosen": 0.6105749011039734, "log_odds_ratio": -0.7581477761268616, "logits/chosen": -0.06203547492623329, "logits/rejected": -0.1271355152130127, "logps/chosen": -2.788794994354248, "logps/rejected": -3.3369476795196533, "loss": 2.1759, "nll_loss": 2.1000876426696777, "rewards/accuracies": 0.75, "rewards/chosen": -0.2788795232772827, "rewards/margins": 0.05481523275375366, "rewards/rejected": -0.3336947560310364, "step": 368 }, { "epoch": 1.0102669404517455, "grad_norm": 6.694376468658447, "learning_rate": 9.494520547945205e-07, "log_odds_chosen": -0.6535106897354126, "log_odds_ratio": -1.167740821838379, "logits/chosen": 0.10443469882011414, "logits/rejected": 0.2365357130765915, "logps/chosen": -3.8433470726013184, "logps/rejected": -3.2266154289245605, "loss": 2.4069, "nll_loss": 2.290112257003784, "rewards/accuracies": 0.25, "rewards/chosen": -0.3843347132205963, "rewards/margins": -0.061673182994127274, "rewards/rejected": -0.32266151905059814, "step": 369 }, { "epoch": 1.0130047912388775, "grad_norm": 5.852021217346191, "learning_rate": 9.493150684931507e-07, "log_odds_chosen": -0.6095138788223267, "log_odds_ratio": -1.2458113431930542, "logits/chosen": 0.10048819333314896, "logits/rejected": 0.15863817930221558, "logps/chosen": -3.394094944000244, "logps/rejected": -2.7836079597473145, "loss": 2.246, "nll_loss": 2.1214592456817627, "rewards/accuracies": 0.375, "rewards/chosen": -0.3394095003604889, "rewards/margins": -0.0610487200319767, "rewards/rejected": -0.2783608138561249, "step": 370 }, { "epoch": 1.0157426420260096, "grad_norm": 5.916851043701172, "learning_rate": 9.491780821917807e-07, "log_odds_chosen": -0.22826547920703888, "log_odds_ratio": -0.9888208508491516, "logits/chosen": -0.03743167966604233, "logits/rejected": 0.03522946685552597, "logps/chosen": -3.406202793121338, "logps/rejected": -3.1677603721618652, "loss": 2.2247, "nll_loss": 2.125782012939453, "rewards/accuracies": 0.5, "rewards/chosen": -0.3406202793121338, "rewards/margins": -0.02384425699710846, "rewards/rejected": -0.3167760372161865, "step": 371 }, { "epoch": 1.0184804928131417, "grad_norm": 4.96109676361084, "learning_rate": 9.490410958904109e-07, "log_odds_chosen": 0.13526496291160583, "log_odds_ratio": -0.7085267901420593, "logits/chosen": 0.06318745762109756, "logits/rejected": 0.03270937502384186, "logps/chosen": -2.280144214630127, "logps/rejected": -2.357900381088257, "loss": 2.1891, "nll_loss": 2.1182007789611816, "rewards/accuracies": 0.5, "rewards/chosen": -0.22801443934440613, "rewards/margins": 0.007775610312819481, "rewards/rejected": -0.23579004406929016, "step": 372 }, { "epoch": 1.0212183436002737, "grad_norm": 6.59883451461792, "learning_rate": 9.489041095890411e-07, "log_odds_chosen": -1.6295442581176758, "log_odds_ratio": -1.9736595153808594, "logits/chosen": -0.0356208011507988, "logits/rejected": 0.173897385597229, "logps/chosen": -3.996497631072998, "logps/rejected": -2.4787728786468506, "loss": 2.3821, "nll_loss": 2.1847691535949707, "rewards/accuracies": 0.25, "rewards/chosen": -0.3996497690677643, "rewards/margins": -0.15177249908447266, "rewards/rejected": -0.24787726998329163, "step": 373 }, { "epoch": 1.0239561943874058, "grad_norm": 5.145351886749268, "learning_rate": 9.487671232876712e-07, "log_odds_chosen": 0.06432276964187622, "log_odds_ratio": -0.8338844180107117, "logits/chosen": 0.009063374251127243, "logits/rejected": -0.01299557089805603, "logps/chosen": -2.6294586658477783, "logps/rejected": -2.6425058841705322, "loss": 2.1953, "nll_loss": 2.1119418144226074, "rewards/accuracies": 0.75, "rewards/chosen": -0.26294586062431335, "rewards/margins": 0.0013047289103269577, "rewards/rejected": -0.26425057649612427, "step": 374 }, { "epoch": 1.0266940451745379, "grad_norm": 5.881422519683838, "learning_rate": 9.486301369863013e-07, "log_odds_chosen": -0.681448221206665, "log_odds_ratio": -1.2976841926574707, "logits/chosen": 0.0011903513222932816, "logits/rejected": 0.07918676733970642, "logps/chosen": -3.458218812942505, "logps/rejected": -2.8210222721099854, "loss": 2.3209, "nll_loss": 2.191108465194702, "rewards/accuracies": 0.5, "rewards/chosen": -0.34582191705703735, "rewards/margins": -0.06371966004371643, "rewards/rejected": -0.28210222721099854, "step": 375 }, { "epoch": 1.0294318959616702, "grad_norm": 4.976439476013184, "learning_rate": 9.484931506849314e-07, "log_odds_chosen": 0.1655610203742981, "log_odds_ratio": -0.7419437170028687, "logits/chosen": -0.01494630053639412, "logits/rejected": -0.017691250890493393, "logps/chosen": -3.1713314056396484, "logps/rejected": -3.3026206493377686, "loss": 2.2045, "nll_loss": 2.130342960357666, "rewards/accuracies": 0.625, "rewards/chosen": -0.3171331286430359, "rewards/margins": 0.01312890276312828, "rewards/rejected": -0.33026206493377686, "step": 376 }, { "epoch": 1.0321697467488022, "grad_norm": 5.828282833099365, "learning_rate": 9.483561643835616e-07, "log_odds_chosen": 0.32865434885025024, "log_odds_ratio": -0.7673771381378174, "logits/chosen": -0.05540797486901283, "logits/rejected": -0.07019752264022827, "logps/chosen": -2.939112663269043, "logps/rejected": -3.220870018005371, "loss": 2.1686, "nll_loss": 2.091871976852417, "rewards/accuracies": 0.5, "rewards/chosen": -0.29391127824783325, "rewards/margins": 0.028175704181194305, "rewards/rejected": -0.32208698987960815, "step": 377 }, { "epoch": 1.0349075975359343, "grad_norm": 5.789738655090332, "learning_rate": 9.482191780821917e-07, "log_odds_chosen": -0.703098475933075, "log_odds_ratio": -1.1747751235961914, "logits/chosen": 0.023495415225625038, "logits/rejected": 0.06789467483758926, "logps/chosen": -3.3891220092773438, "logps/rejected": -2.704770088195801, "loss": 2.2736, "nll_loss": 2.1560826301574707, "rewards/accuracies": 0.375, "rewards/chosen": -0.3389121890068054, "rewards/margins": -0.0684351995587349, "rewards/rejected": -0.2704769968986511, "step": 378 }, { "epoch": 1.0376454483230664, "grad_norm": 5.374542713165283, "learning_rate": 9.480821917808218e-07, "log_odds_chosen": 0.0702207013964653, "log_odds_ratio": -0.6875378489494324, "logits/chosen": 0.017208397388458252, "logits/rejected": -4.343688488006592e-06, "logps/chosen": -2.5110480785369873, "logps/rejected": -2.569131851196289, "loss": 2.1412, "nll_loss": 2.0724925994873047, "rewards/accuracies": 0.75, "rewards/chosen": -0.25110483169555664, "rewards/margins": 0.005808372050523758, "rewards/rejected": -0.2569131851196289, "step": 379 }, { "epoch": 1.0403832991101984, "grad_norm": 5.632322311401367, "learning_rate": 9.47945205479452e-07, "log_odds_chosen": 0.04004937410354614, "log_odds_ratio": -1.2147538661956787, "logits/chosen": -0.11641165614128113, "logits/rejected": -0.15800291299819946, "logps/chosen": -3.6457934379577637, "logps/rejected": -3.656465530395508, "loss": 2.2407, "nll_loss": 2.119175672531128, "rewards/accuracies": 0.625, "rewards/chosen": -0.36457934975624084, "rewards/margins": 0.0010671988129615784, "rewards/rejected": -0.3656465411186218, "step": 380 }, { "epoch": 1.0431211498973305, "grad_norm": 5.451156139373779, "learning_rate": 9.478082191780822e-07, "log_odds_chosen": 0.011429652571678162, "log_odds_ratio": -0.8331680297851562, "logits/chosen": -0.04583968222141266, "logits/rejected": -0.06568676233291626, "logps/chosen": -2.824504852294922, "logps/rejected": -2.819434881210327, "loss": 2.1589, "nll_loss": 2.0756020545959473, "rewards/accuracies": 0.5, "rewards/chosen": -0.28245049715042114, "rewards/margins": -0.000506984069943428, "rewards/rejected": -0.28194350004196167, "step": 381 }, { "epoch": 1.0458590006844628, "grad_norm": 5.144972801208496, "learning_rate": 9.476712328767122e-07, "log_odds_chosen": -0.008890479803085327, "log_odds_ratio": -0.8907365798950195, "logits/chosen": -0.005655102431774139, "logits/rejected": -0.021994899958372116, "logps/chosen": -2.6335482597351074, "logps/rejected": -2.569493055343628, "loss": 2.1496, "nll_loss": 2.060502529144287, "rewards/accuracies": 0.625, "rewards/chosen": -0.2633548378944397, "rewards/margins": -0.006405523046851158, "rewards/rejected": -0.2569493055343628, "step": 382 }, { "epoch": 1.0485968514715949, "grad_norm": 5.083721160888672, "learning_rate": 9.475342465753424e-07, "log_odds_chosen": 0.28610697388648987, "log_odds_ratio": -0.5737445950508118, "logits/chosen": -0.0340486578643322, "logits/rejected": -0.06483778357505798, "logps/chosen": -2.3133811950683594, "logps/rejected": -2.5623412132263184, "loss": 2.1047, "nll_loss": 2.0473177433013916, "rewards/accuracies": 0.875, "rewards/chosen": -0.23133814334869385, "rewards/margins": 0.024895980954170227, "rewards/rejected": -0.2562341094017029, "step": 383 }, { "epoch": 1.051334702258727, "grad_norm": 5.801252841949463, "learning_rate": 9.473972602739726e-07, "log_odds_chosen": 0.3514469861984253, "log_odds_ratio": -0.7892253398895264, "logits/chosen": 0.0270476546138525, "logits/rejected": -0.005554933100938797, "logps/chosen": -3.2237493991851807, "logps/rejected": -3.5495598316192627, "loss": 2.179, "nll_loss": 2.100031852722168, "rewards/accuracies": 0.5, "rewards/chosen": -0.32237496972084045, "rewards/margins": 0.03258104994893074, "rewards/rejected": -0.3549560308456421, "step": 384 }, { "epoch": 1.054072553045859, "grad_norm": 5.236269950866699, "learning_rate": 9.472602739726026e-07, "log_odds_chosen": -0.17136290669441223, "log_odds_ratio": -0.9341999888420105, "logits/chosen": -0.10181965678930283, "logits/rejected": -0.08249533921480179, "logps/chosen": -2.812511920928955, "logps/rejected": -2.613804340362549, "loss": 2.1995, "nll_loss": 2.106050968170166, "rewards/accuracies": 0.5, "rewards/chosen": -0.2812512218952179, "rewards/margins": -0.019870752468705177, "rewards/rejected": -0.2613804340362549, "step": 385 }, { "epoch": 1.056810403832991, "grad_norm": 4.907290935516357, "learning_rate": 9.471232876712328e-07, "log_odds_chosen": 0.871002733707428, "log_odds_ratio": -0.5526246428489685, "logits/chosen": -0.018742520362138748, "logits/rejected": -0.07749088108539581, "logps/chosen": -2.2034404277801514, "logps/rejected": -2.985111713409424, "loss": 2.1213, "nll_loss": 2.0660576820373535, "rewards/accuracies": 0.75, "rewards/chosen": -0.22034403681755066, "rewards/margins": 0.0781671404838562, "rewards/rejected": -0.29851120710372925, "step": 386 }, { "epoch": 1.0595482546201231, "grad_norm": 6.850405216217041, "learning_rate": 9.46986301369863e-07, "log_odds_chosen": -0.902972936630249, "log_odds_ratio": -1.789198875427246, "logits/chosen": -0.040647465735673904, "logits/rejected": -0.03355865180492401, "logps/chosen": -3.7885541915893555, "logps/rejected": -2.8774518966674805, "loss": 2.2234, "nll_loss": 2.0444600582122803, "rewards/accuracies": 0.375, "rewards/chosen": -0.3788554072380066, "rewards/margins": -0.0911102369427681, "rewards/rejected": -0.2877451777458191, "step": 387 }, { "epoch": 1.0622861054072552, "grad_norm": 5.479655742645264, "learning_rate": 9.46849315068493e-07, "log_odds_chosen": -0.07434894889593124, "log_odds_ratio": -0.8717222213745117, "logits/chosen": -0.12447405606508255, "logits/rejected": -0.14101877808570862, "logps/chosen": -3.054076671600342, "logps/rejected": -2.962691307067871, "loss": 2.2015, "nll_loss": 2.1143455505371094, "rewards/accuracies": 0.5, "rewards/chosen": -0.30540764331817627, "rewards/margins": -0.00913851149380207, "rewards/rejected": -0.29626911878585815, "step": 388 }, { "epoch": 1.0650239561943875, "grad_norm": 5.908736705780029, "learning_rate": 9.467123287671232e-07, "log_odds_chosen": -0.6928707361221313, "log_odds_ratio": -1.1467478275299072, "logits/chosen": -0.04995908588171005, "logits/rejected": 0.012963347136974335, "logps/chosen": -3.4274001121520996, "logps/rejected": -2.7557482719421387, "loss": 2.3087, "nll_loss": 2.1940340995788574, "rewards/accuracies": 0.125, "rewards/chosen": -0.342739999294281, "rewards/margins": -0.06716515123844147, "rewards/rejected": -0.27557483315467834, "step": 389 }, { "epoch": 1.0677618069815196, "grad_norm": 5.666234493255615, "learning_rate": 9.465753424657534e-07, "log_odds_chosen": -0.21910825371742249, "log_odds_ratio": -0.9592845439910889, "logits/chosen": 0.01275249570608139, "logits/rejected": 0.03885611146688461, "logps/chosen": -3.4070510864257812, "logps/rejected": -3.2055306434631348, "loss": 2.3305, "nll_loss": 2.234617233276367, "rewards/accuracies": 0.5, "rewards/chosen": -0.34070512652397156, "rewards/margins": -0.02015206776559353, "rewards/rejected": -0.3205530643463135, "step": 390 }, { "epoch": 1.0704996577686516, "grad_norm": 5.478916168212891, "learning_rate": 9.464383561643835e-07, "log_odds_chosen": -1.3545541763305664, "log_odds_ratio": -1.902097463607788, "logits/chosen": -0.15497848391532898, "logits/rejected": -0.19112300872802734, "logps/chosen": -3.96440052986145, "logps/rejected": -2.579429864883423, "loss": 2.2868, "nll_loss": 2.0966129302978516, "rewards/accuracies": 0.5, "rewards/chosen": -0.3964400589466095, "rewards/margins": -0.13849706947803497, "rewards/rejected": -0.25794297456741333, "step": 391 }, { "epoch": 1.0732375085557837, "grad_norm": 4.9121198654174805, "learning_rate": 9.463013698630136e-07, "log_odds_chosen": 0.14259514212608337, "log_odds_ratio": -0.661129355430603, "logits/chosen": 0.04186528921127319, "logits/rejected": -0.010606206953525543, "logps/chosen": -2.357736110687256, "logps/rejected": -2.448392868041992, "loss": 2.1197, "nll_loss": 2.0536224842071533, "rewards/accuracies": 0.75, "rewards/chosen": -0.23577363789081573, "rewards/margins": 0.009065652266144753, "rewards/rejected": -0.24483928084373474, "step": 392 }, { "epoch": 1.0759753593429158, "grad_norm": 5.854374885559082, "learning_rate": 9.461643835616437e-07, "log_odds_chosen": -0.7438597679138184, "log_odds_ratio": -1.3572442531585693, "logits/chosen": 0.0410967692732811, "logits/rejected": 0.09944278001785278, "logps/chosen": -3.527475118637085, "logps/rejected": -2.8109045028686523, "loss": 2.2032, "nll_loss": 2.0674614906311035, "rewards/accuracies": 0.25, "rewards/chosen": -0.35274752974510193, "rewards/margins": -0.07165704667568207, "rewards/rejected": -0.28109049797058105, "step": 393 }, { "epoch": 1.0787132101300478, "grad_norm": 5.462269306182861, "learning_rate": 9.460273972602739e-07, "log_odds_chosen": 0.09996678680181503, "log_odds_ratio": -0.7003674507141113, "logits/chosen": -0.09036475419998169, "logits/rejected": -0.043192215263843536, "logps/chosen": -2.825531482696533, "logps/rejected": -2.8701937198638916, "loss": 2.0948, "nll_loss": 2.0247340202331543, "rewards/accuracies": 0.375, "rewards/chosen": -0.28255313634872437, "rewards/margins": 0.0044662076979875565, "rewards/rejected": -0.28701937198638916, "step": 394 }, { "epoch": 1.0814510609171801, "grad_norm": 5.046082496643066, "learning_rate": 9.45890410958904e-07, "log_odds_chosen": 0.31338536739349365, "log_odds_ratio": -0.7271140217781067, "logits/chosen": 0.041142139583826065, "logits/rejected": -0.03146664798259735, "logps/chosen": -2.566237211227417, "logps/rejected": -2.8819165229797363, "loss": 2.1279, "nll_loss": 2.055203914642334, "rewards/accuracies": 0.625, "rewards/chosen": -0.2566237151622772, "rewards/margins": 0.03156793490052223, "rewards/rejected": -0.28819164633750916, "step": 395 }, { "epoch": 1.0841889117043122, "grad_norm": 5.937069892883301, "learning_rate": 9.457534246575341e-07, "log_odds_chosen": -0.7760448455810547, "log_odds_ratio": -1.361546516418457, "logits/chosen": -0.14536871016025543, "logits/rejected": -0.1603129655122757, "logps/chosen": -3.5592846870422363, "logps/rejected": -2.7969655990600586, "loss": 2.3273, "nll_loss": 2.191152572631836, "rewards/accuracies": 0.375, "rewards/chosen": -0.3559284806251526, "rewards/margins": -0.07623191177845001, "rewards/rejected": -0.2796965539455414, "step": 396 }, { "epoch": 1.0869267624914443, "grad_norm": 5.30023193359375, "learning_rate": 9.456164383561643e-07, "log_odds_chosen": -0.2445082664489746, "log_odds_ratio": -1.2004892826080322, "logits/chosen": -0.07519736140966415, "logits/rejected": -0.16660554707050323, "logps/chosen": -3.0509450435638428, "logps/rejected": -2.8097357749938965, "loss": 2.1919, "nll_loss": 2.0718963146209717, "rewards/accuracies": 0.375, "rewards/chosen": -0.30509454011917114, "rewards/margins": -0.02412094548344612, "rewards/rejected": -0.2809735834598541, "step": 397 }, { "epoch": 1.0896646132785763, "grad_norm": 5.028170108795166, "learning_rate": 9.454794520547946e-07, "log_odds_chosen": 0.23993021249771118, "log_odds_ratio": -0.7785637378692627, "logits/chosen": -0.03898364678025246, "logits/rejected": -0.07312134653329849, "logps/chosen": -2.2886154651641846, "logps/rejected": -2.502896308898926, "loss": 2.1729, "nll_loss": 2.095046043395996, "rewards/accuracies": 0.625, "rewards/chosen": -0.22886154055595398, "rewards/margins": 0.021428102627396584, "rewards/rejected": -0.250289648771286, "step": 398 }, { "epoch": 1.0924024640657084, "grad_norm": 6.429267883300781, "learning_rate": 9.453424657534245e-07, "log_odds_chosen": -0.7260942459106445, "log_odds_ratio": -1.39812433719635, "logits/chosen": -0.04076787829399109, "logits/rejected": -0.040327541530132294, "logps/chosen": -3.925908088684082, "logps/rejected": -3.1913862228393555, "loss": 2.2274, "nll_loss": 2.087563991546631, "rewards/accuracies": 0.375, "rewards/chosen": -0.39259082078933716, "rewards/margins": -0.07345215976238251, "rewards/rejected": -0.31913864612579346, "step": 399 }, { "epoch": 1.0951403148528405, "grad_norm": 4.724008083343506, "learning_rate": 9.452054794520548e-07, "log_odds_chosen": 0.4232317805290222, "log_odds_ratio": -0.5727472305297852, "logits/chosen": 0.05408839136362076, "logits/rejected": -0.05872774124145508, "logps/chosen": -2.0978763103485107, "logps/rejected": -2.5016510486602783, "loss": 2.0495, "nll_loss": 1.9922386407852173, "rewards/accuracies": 0.625, "rewards/chosen": -0.20978762209415436, "rewards/margins": 0.04037749022245407, "rewards/rejected": -0.25016510486602783, "step": 400 }, { "epoch": 1.0978781656399725, "grad_norm": 6.49908447265625, "learning_rate": 9.45068493150685e-07, "log_odds_chosen": -0.5668092370033264, "log_odds_ratio": -1.0895040035247803, "logits/chosen": -0.0053877197206020355, "logits/rejected": 0.12180618941783905, "logps/chosen": -3.3749632835388184, "logps/rejected": -2.8420252799987793, "loss": 2.2128, "nll_loss": 2.1038522720336914, "rewards/accuracies": 0.375, "rewards/chosen": -0.3374963402748108, "rewards/margins": -0.053293805569410324, "rewards/rejected": -0.284202516078949, "step": 401 }, { "epoch": 1.1006160164271048, "grad_norm": 6.272973537445068, "learning_rate": 9.44931506849315e-07, "log_odds_chosen": -0.7030003070831299, "log_odds_ratio": -1.4144715070724487, "logits/chosen": -0.038520149886608124, "logits/rejected": 0.05266977474093437, "logps/chosen": -3.8986783027648926, "logps/rejected": -3.186516761779785, "loss": 2.2626, "nll_loss": 2.121189594268799, "rewards/accuracies": 0.5, "rewards/chosen": -0.3898678421974182, "rewards/margins": -0.0712161511182785, "rewards/rejected": -0.3186516761779785, "step": 402 }, { "epoch": 1.103353867214237, "grad_norm": 6.945471286773682, "learning_rate": 9.447945205479452e-07, "log_odds_chosen": -0.6807181239128113, "log_odds_ratio": -1.335251808166504, "logits/chosen": 0.08620329201221466, "logits/rejected": 0.09104853868484497, "logps/chosen": -4.121284484863281, "logps/rejected": -3.423562526702881, "loss": 2.165, "nll_loss": 2.0314888954162598, "rewards/accuracies": 0.5, "rewards/chosen": -0.4121284484863281, "rewards/margins": -0.06977219879627228, "rewards/rejected": -0.34235623478889465, "step": 403 }, { "epoch": 1.106091718001369, "grad_norm": 5.411383152008057, "learning_rate": 9.446575342465754e-07, "log_odds_chosen": 0.10090063512325287, "log_odds_ratio": -0.8686198592185974, "logits/chosen": 0.06522078067064285, "logits/rejected": 0.058814678341150284, "logps/chosen": -2.764681100845337, "logps/rejected": -2.8492345809936523, "loss": 2.2049, "nll_loss": 2.1180777549743652, "rewards/accuracies": 0.375, "rewards/chosen": -0.2764681279659271, "rewards/margins": 0.008455328643321991, "rewards/rejected": -0.2849234640598297, "step": 404 }, { "epoch": 1.108829568788501, "grad_norm": 5.244421005249023, "learning_rate": 9.445205479452055e-07, "log_odds_chosen": -0.17915162444114685, "log_odds_ratio": -0.8435153365135193, "logits/chosen": -0.1625255048274994, "logits/rejected": -0.1477990597486496, "logps/chosen": -2.950761318206787, "logps/rejected": -2.7712841033935547, "loss": 2.1861, "nll_loss": 2.1017343997955322, "rewards/accuracies": 0.5, "rewards/chosen": -0.2950761318206787, "rewards/margins": -0.0179477259516716, "rewards/rejected": -0.2771283984184265, "step": 405 }, { "epoch": 1.111567419575633, "grad_norm": 5.409100532531738, "learning_rate": 9.443835616438356e-07, "log_odds_chosen": 0.979373574256897, "log_odds_ratio": -0.5835527181625366, "logits/chosen": -0.04839053750038147, "logits/rejected": -0.08283739537000656, "logps/chosen": -2.472601890563965, "logps/rejected": -3.4015636444091797, "loss": 2.031, "nll_loss": 1.972622275352478, "rewards/accuracies": 0.625, "rewards/chosen": -0.2472601979970932, "rewards/margins": 0.09289620071649551, "rewards/rejected": -0.3401564061641693, "step": 406 }, { "epoch": 1.1143052703627652, "grad_norm": NaN, "learning_rate": 9.443835616438356e-07, "log_odds_chosen": -0.4704110026359558, "log_odds_ratio": -1.0704874992370605, "logits/chosen": -0.0884602963924408, "logits/rejected": -0.022332392632961273, "logps/chosen": -3.6985878944396973, "logps/rejected": -3.229856491088867, "loss": 2.1725, "nll_loss": 2.0654850006103516, "rewards/accuracies": 0.25, "rewards/chosen": -0.3698588013648987, "rewards/margins": -0.046873174607753754, "rewards/rejected": -0.3229856491088867, "step": 407 }, { "epoch": 1.1170431211498972, "grad_norm": 6.297947883605957, "learning_rate": 9.442465753424657e-07, "log_odds_chosen": -0.9987748265266418, "log_odds_ratio": -1.387382984161377, "logits/chosen": -0.12450941652059555, "logits/rejected": -0.10021807253360748, "logps/chosen": -3.819575309753418, "logps/rejected": -2.856168746948242, "loss": 2.2414, "nll_loss": 2.1026716232299805, "rewards/accuracies": 0.125, "rewards/chosen": -0.3819575309753418, "rewards/margins": -0.09634068608283997, "rewards/rejected": -0.2856168746948242, "step": 408 }, { "epoch": 1.1197809719370295, "grad_norm": 7.034818649291992, "learning_rate": 9.441095890410959e-07, "log_odds_chosen": -1.185797929763794, "log_odds_ratio": -1.5070958137512207, "logits/chosen": -0.06136221066117287, "logits/rejected": 0.06524252891540527, "logps/chosen": -4.402207374572754, "logps/rejected": -3.241363525390625, "loss": 2.2858, "nll_loss": 2.135087490081787, "rewards/accuracies": 0.125, "rewards/chosen": -0.4402207136154175, "rewards/margins": -0.11608438938856125, "rewards/rejected": -0.324136346578598, "step": 409 }, { "epoch": 1.1225188227241616, "grad_norm": 5.127902030944824, "learning_rate": 9.43972602739726e-07, "log_odds_chosen": 0.13494613766670227, "log_odds_ratio": -0.7459206581115723, "logits/chosen": 0.12319779396057129, "logits/rejected": 0.11250339448451996, "logps/chosen": -2.4002676010131836, "logps/rejected": -2.5306005477905273, "loss": 2.0984, "nll_loss": 2.0237679481506348, "rewards/accuracies": 0.375, "rewards/chosen": -0.24002675712108612, "rewards/margins": 0.0130333062261343, "rewards/rejected": -0.25306007266044617, "step": 410 }, { "epoch": 1.1252566735112937, "grad_norm": 5.6266937255859375, "learning_rate": 9.438356164383561e-07, "log_odds_chosen": 0.3070373833179474, "log_odds_ratio": -0.7651762366294861, "logits/chosen": -0.19131547212600708, "logits/rejected": -0.2040310651063919, "logps/chosen": -2.7605955600738525, "logps/rejected": -3.0613040924072266, "loss": 2.1227, "nll_loss": 2.046225070953369, "rewards/accuracies": 0.625, "rewards/chosen": -0.2760595679283142, "rewards/margins": 0.030070839449763298, "rewards/rejected": -0.30613040924072266, "step": 411 }, { "epoch": 1.1279945242984257, "grad_norm": 5.5291314125061035, "learning_rate": 9.436986301369863e-07, "log_odds_chosen": -0.46412166953086853, "log_odds_ratio": -1.0802685022354126, "logits/chosen": -0.11705497652292252, "logits/rejected": -0.1353626549243927, "logps/chosen": -3.115537405014038, "logps/rejected": -2.656769275665283, "loss": 2.1769, "nll_loss": 2.068831443786621, "rewards/accuracies": 0.375, "rewards/chosen": -0.3115537762641907, "rewards/margins": -0.04587683826684952, "rewards/rejected": -0.26567691564559937, "step": 412 }, { "epoch": 1.1307323750855578, "grad_norm": 5.7770586013793945, "learning_rate": 9.435616438356165e-07, "log_odds_chosen": 0.18708190321922302, "log_odds_ratio": -0.6482164859771729, "logits/chosen": -0.18061377108097076, "logits/rejected": -0.09510186314582825, "logps/chosen": -2.2445759773254395, "logps/rejected": -2.389604091644287, "loss": 2.0394, "nll_loss": 1.974579930305481, "rewards/accuracies": 0.75, "rewards/chosen": -0.22445759177207947, "rewards/margins": 0.014502833597362041, "rewards/rejected": -0.2389604151248932, "step": 413 }, { "epoch": 1.1334702258726899, "grad_norm": 5.818032741546631, "learning_rate": 9.434246575342465e-07, "log_odds_chosen": -0.04200027883052826, "log_odds_ratio": -0.8197177648544312, "logits/chosen": 0.0010757073760032654, "logits/rejected": 0.03288683295249939, "logps/chosen": -3.1948647499084473, "logps/rejected": -3.1554126739501953, "loss": 2.0925, "nll_loss": 2.0104856491088867, "rewards/accuracies": 0.5, "rewards/chosen": -0.31948646903038025, "rewards/margins": -0.003945186734199524, "rewards/rejected": -0.31554126739501953, "step": 414 }, { "epoch": 1.136208076659822, "grad_norm": 5.821584701538086, "learning_rate": 9.432876712328767e-07, "log_odds_chosen": -0.5058469772338867, "log_odds_ratio": -1.1424765586853027, "logits/chosen": -0.03707996755838394, "logits/rejected": 0.004206802695989609, "logps/chosen": -3.258704662322998, "logps/rejected": -2.7671430110931396, "loss": 2.185, "nll_loss": 2.0707342624664307, "rewards/accuracies": 0.25, "rewards/chosen": -0.32587048411369324, "rewards/margins": -0.04915616288781166, "rewards/rejected": -0.2767143249511719, "step": 415 }, { "epoch": 1.1389459274469542, "grad_norm": 6.551729679107666, "learning_rate": 9.431506849315069e-07, "log_odds_chosen": -0.46798479557037354, "log_odds_ratio": -1.0684633255004883, "logits/chosen": -0.06988595426082611, "logits/rejected": 0.010737976990640163, "logps/chosen": -3.2186896800994873, "logps/rejected": -2.7532660961151123, "loss": 2.1459, "nll_loss": 2.039046287536621, "rewards/accuracies": 0.375, "rewards/chosen": -0.32186901569366455, "rewards/margins": -0.04654238373041153, "rewards/rejected": -0.27532660961151123, "step": 416 }, { "epoch": 1.1416837782340863, "grad_norm": 5.79766845703125, "learning_rate": 9.430136986301369e-07, "log_odds_chosen": -0.3660900592803955, "log_odds_ratio": -0.9719778895378113, "logits/chosen": 0.031044865027070045, "logits/rejected": 0.07826905697584152, "logps/chosen": -3.1238653659820557, "logps/rejected": -2.7942416667938232, "loss": 2.2525, "nll_loss": 2.155285358428955, "rewards/accuracies": 0.125, "rewards/chosen": -0.31238654255867004, "rewards/margins": -0.032962366938591, "rewards/rejected": -0.27942419052124023, "step": 417 }, { "epoch": 1.1444216290212184, "grad_norm": 6.041375637054443, "learning_rate": 9.428767123287671e-07, "log_odds_chosen": 0.043172404170036316, "log_odds_ratio": -0.7130857706069946, "logits/chosen": -0.028544694185256958, "logits/rejected": -0.07245392352342606, "logps/chosen": -2.893998146057129, "logps/rejected": -2.955742359161377, "loss": 2.1199, "nll_loss": 2.0486252307891846, "rewards/accuracies": 0.5, "rewards/chosen": -0.28939980268478394, "rewards/margins": 0.006174440495669842, "rewards/rejected": -0.29557424783706665, "step": 418 }, { "epoch": 1.1471594798083504, "grad_norm": 6.450129985809326, "learning_rate": 9.427397260273973e-07, "log_odds_chosen": 0.13051161170005798, "log_odds_ratio": -0.7629696726799011, "logits/chosen": -0.029743889346718788, "logits/rejected": 0.013210724107921124, "logps/chosen": -3.2297117710113525, "logps/rejected": -3.306828737258911, "loss": 2.0651, "nll_loss": 1.9888288974761963, "rewards/accuracies": 0.625, "rewards/chosen": -0.3229711651802063, "rewards/margins": 0.007711689919233322, "rewards/rejected": -0.3306828737258911, "step": 419 }, { "epoch": 1.1498973305954825, "grad_norm": 5.852695941925049, "learning_rate": 9.426027397260274e-07, "log_odds_chosen": 0.40372517704963684, "log_odds_ratio": -0.6353563666343689, "logits/chosen": -0.036743298172950745, "logits/rejected": -0.0237591490149498, "logps/chosen": -2.6840708255767822, "logps/rejected": -3.0065150260925293, "loss": 2.0248, "nll_loss": 1.9612170457839966, "rewards/accuracies": 0.625, "rewards/chosen": -0.26840707659721375, "rewards/margins": 0.03224443644285202, "rewards/rejected": -0.30065152049064636, "step": 420 }, { "epoch": 1.1526351813826146, "grad_norm": 6.474692344665527, "learning_rate": 9.424657534246575e-07, "log_odds_chosen": -0.1912587583065033, "log_odds_ratio": -0.8849354982376099, "logits/chosen": -0.13530796766281128, "logits/rejected": -0.05604903772473335, "logps/chosen": -3.783574104309082, "logps/rejected": -3.6061418056488037, "loss": 2.2544, "nll_loss": 2.1659183502197266, "rewards/accuracies": 0.375, "rewards/chosen": -0.3783574104309082, "rewards/margins": -0.01774320937693119, "rewards/rejected": -0.36061418056488037, "step": 421 }, { "epoch": 1.1553730321697468, "grad_norm": 5.53525972366333, "learning_rate": 9.423287671232877e-07, "log_odds_chosen": 0.031443215906620026, "log_odds_ratio": -0.8043985366821289, "logits/chosen": -0.1104322075843811, "logits/rejected": -0.11145613342523575, "logps/chosen": -2.9888367652893066, "logps/rejected": -2.989877700805664, "loss": 2.1122, "nll_loss": 2.0317940711975098, "rewards/accuracies": 0.625, "rewards/chosen": -0.29888370633125305, "rewards/margins": 0.00010409951210021973, "rewards/rejected": -0.29898780584335327, "step": 422 }, { "epoch": 1.158110882956879, "grad_norm": 5.040380477905273, "learning_rate": 9.421917808219178e-07, "log_odds_chosen": 0.6290126442909241, "log_odds_ratio": -0.8198691606521606, "logits/chosen": 0.020665444433689117, "logits/rejected": -0.04401346668601036, "logps/chosen": -2.5501351356506348, "logps/rejected": -3.10945987701416, "loss": 2.1065, "nll_loss": 2.024463653564453, "rewards/accuracies": 0.875, "rewards/chosen": -0.25501352548599243, "rewards/margins": 0.055932458490133286, "rewards/rejected": -0.310945987701416, "step": 423 }, { "epoch": 1.160848733744011, "grad_norm": 5.552083969116211, "learning_rate": 9.420547945205479e-07, "log_odds_chosen": -0.016317464411258698, "log_odds_ratio": -0.8246505260467529, "logits/chosen": 0.10068154335021973, "logits/rejected": 0.05642801523208618, "logps/chosen": -2.604311466217041, "logps/rejected": -2.539991617202759, "loss": 2.0622, "nll_loss": 1.9797115325927734, "rewards/accuracies": 0.5, "rewards/chosen": -0.260431170463562, "rewards/margins": -0.006432009860873222, "rewards/rejected": -0.25399914383888245, "step": 424 }, { "epoch": 1.163586584531143, "grad_norm": 6.401136875152588, "learning_rate": 9.41917808219178e-07, "log_odds_chosen": -0.04189436882734299, "log_odds_ratio": -0.7573281526565552, "logits/chosen": -0.10038471221923828, "logits/rejected": -0.030990052968263626, "logps/chosen": -3.101898670196533, "logps/rejected": -3.057220697402954, "loss": 2.1315, "nll_loss": 2.055776357650757, "rewards/accuracies": 0.5, "rewards/chosen": -0.3101898729801178, "rewards/margins": -0.004467794671654701, "rewards/rejected": -0.30572208762168884, "step": 425 }, { "epoch": 1.1663244353182751, "grad_norm": 6.737373352050781, "learning_rate": 9.417808219178082e-07, "log_odds_chosen": 0.30863723158836365, "log_odds_ratio": -1.1540844440460205, "logits/chosen": -0.06145595759153366, "logits/rejected": 0.022942055016756058, "logps/chosen": -3.9621572494506836, "logps/rejected": -4.196833610534668, "loss": 2.1851, "nll_loss": 2.06966495513916, "rewards/accuracies": 0.375, "rewards/chosen": -0.3962157070636749, "rewards/margins": 0.02346765622496605, "rewards/rejected": -0.4196833372116089, "step": 426 }, { "epoch": 1.1690622861054072, "grad_norm": 5.735945701599121, "learning_rate": 9.416438356164384e-07, "log_odds_chosen": -0.22595860064029694, "log_odds_ratio": -1.071522831916809, "logits/chosen": 0.043928131461143494, "logits/rejected": 0.1051948145031929, "logps/chosen": -3.051159381866455, "logps/rejected": -2.8286232948303223, "loss": 2.1131, "nll_loss": 2.005960464477539, "rewards/accuracies": 0.625, "rewards/chosen": -0.3051159381866455, "rewards/margins": -0.022253625094890594, "rewards/rejected": -0.2828623056411743, "step": 427 }, { "epoch": 1.1718001368925393, "grad_norm": 6.885965347290039, "learning_rate": 9.415068493150684e-07, "log_odds_chosen": -1.3043023347854614, "log_odds_ratio": -1.7355300188064575, "logits/chosen": 0.044758137315511703, "logits/rejected": 0.03927639126777649, "logps/chosen": -4.0309295654296875, "logps/rejected": -2.741992712020874, "loss": 2.2226, "nll_loss": 2.049060344696045, "rewards/accuracies": 0.375, "rewards/chosen": -0.4030929505825043, "rewards/margins": -0.1288936734199524, "rewards/rejected": -0.2741992771625519, "step": 428 }, { "epoch": 1.1745379876796715, "grad_norm": 5.847698211669922, "learning_rate": 9.413698630136986e-07, "log_odds_chosen": -0.3165764808654785, "log_odds_ratio": -0.9970992207527161, "logits/chosen": -0.09879711270332336, "logits/rejected": -0.08798207342624664, "logps/chosen": -2.8348872661590576, "logps/rejected": -2.5023365020751953, "loss": 2.0987, "nll_loss": 1.999013900756836, "rewards/accuracies": 0.5, "rewards/chosen": -0.2834887206554413, "rewards/margins": -0.03325507789850235, "rewards/rejected": -0.25023365020751953, "step": 429 }, { "epoch": 1.1772758384668036, "grad_norm": 6.410250186920166, "learning_rate": 9.412328767123288e-07, "log_odds_chosen": -0.1632748544216156, "log_odds_ratio": -0.9232394695281982, "logits/chosen": 0.03422340750694275, "logits/rejected": 0.11932813376188278, "logps/chosen": -3.317654609680176, "logps/rejected": -3.126004219055176, "loss": 2.1704, "nll_loss": 2.078064441680908, "rewards/accuracies": 0.375, "rewards/chosen": -0.33176547288894653, "rewards/margins": -0.019165022298693657, "rewards/rejected": -0.31260043382644653, "step": 430 }, { "epoch": 1.1800136892539357, "grad_norm": 5.152364253997803, "learning_rate": 9.410958904109588e-07, "log_odds_chosen": 0.5926710367202759, "log_odds_ratio": -0.5220581889152527, "logits/chosen": -0.16013583540916443, "logits/rejected": -0.1691088080406189, "logps/chosen": -2.0544824600219727, "logps/rejected": -2.5400290489196777, "loss": 1.9556, "nll_loss": 1.903435468673706, "rewards/accuracies": 0.75, "rewards/chosen": -0.20544825494289398, "rewards/margins": 0.048554640263319016, "rewards/rejected": -0.2540028989315033, "step": 431 }, { "epoch": 1.1827515400410678, "grad_norm": 5.786583423614502, "learning_rate": 9.40958904109589e-07, "log_odds_chosen": -0.17259401082992554, "log_odds_ratio": -1.0347139835357666, "logits/chosen": -0.08460815995931625, "logits/rejected": -0.10206609219312668, "logps/chosen": -3.139163017272949, "logps/rejected": -2.920278310775757, "loss": 2.1097, "nll_loss": 2.0062503814697266, "rewards/accuracies": 0.625, "rewards/chosen": -0.31391632556915283, "rewards/margins": -0.02188848704099655, "rewards/rejected": -0.2920278310775757, "step": 432 }, { "epoch": 1.1854893908281998, "grad_norm": 5.450777053833008, "learning_rate": 9.408219178082192e-07, "log_odds_chosen": 0.5693631172180176, "log_odds_ratio": -0.8387799263000488, "logits/chosen": -0.02373652160167694, "logits/rejected": -0.1462545394897461, "logps/chosen": -2.482243537902832, "logps/rejected": -2.987240791320801, "loss": 2.1193, "nll_loss": 2.035463809967041, "rewards/accuracies": 0.75, "rewards/chosen": -0.24822434782981873, "rewards/margins": 0.05049975961446762, "rewards/rejected": -0.29872411489486694, "step": 433 }, { "epoch": 1.1882272416153319, "grad_norm": 5.136311054229736, "learning_rate": 9.406849315068493e-07, "log_odds_chosen": -0.11008358001708984, "log_odds_ratio": -0.7972437739372253, "logits/chosen": -0.15907825529575348, "logits/rejected": -0.2277897596359253, "logps/chosen": -2.4339709281921387, "logps/rejected": -2.3226962089538574, "loss": 2.1195, "nll_loss": 2.039771318435669, "rewards/accuracies": 0.625, "rewards/chosen": -0.24339711666107178, "rewards/margins": -0.011127470061182976, "rewards/rejected": -0.23226964473724365, "step": 434 }, { "epoch": 1.1909650924024642, "grad_norm": 6.994316577911377, "learning_rate": 9.405479452054794e-07, "log_odds_chosen": -0.631445050239563, "log_odds_ratio": -1.1810026168823242, "logits/chosen": -0.08246169984340668, "logits/rejected": 0.0016830787062644958, "logps/chosen": -3.514801502227783, "logps/rejected": -2.900592565536499, "loss": 2.0771, "nll_loss": 1.9590280055999756, "rewards/accuracies": 0.375, "rewards/chosen": -0.3514801561832428, "rewards/margins": -0.061420902609825134, "rewards/rejected": -0.29005926847457886, "step": 435 }, { "epoch": 1.1937029431895962, "grad_norm": 5.436774253845215, "learning_rate": 9.404109589041096e-07, "log_odds_chosen": 0.6457853317260742, "log_odds_ratio": -0.5519204139709473, "logits/chosen": -0.044890396296978, "logits/rejected": -0.1925698220729828, "logps/chosen": -3.2375857830047607, "logps/rejected": -3.805898904800415, "loss": 2.069, "nll_loss": 2.013828992843628, "rewards/accuracies": 0.5, "rewards/chosen": -0.323758602142334, "rewards/margins": 0.05683130398392677, "rewards/rejected": -0.38058990240097046, "step": 436 }, { "epoch": 1.1964407939767283, "grad_norm": 5.9308624267578125, "learning_rate": 9.402739726027397e-07, "log_odds_chosen": 0.021418988704681396, "log_odds_ratio": -0.8320231437683105, "logits/chosen": -0.13456906378269196, "logits/rejected": -0.1350402534008026, "logps/chosen": -2.676574468612671, "logps/rejected": -2.6679062843322754, "loss": 2.103, "nll_loss": 2.019815683364868, "rewards/accuracies": 0.5, "rewards/chosen": -0.26765742897987366, "rewards/margins": -0.0008668005466461182, "rewards/rejected": -0.26679062843322754, "step": 437 }, { "epoch": 1.1991786447638604, "grad_norm": 5.881263256072998, "learning_rate": 9.401369863013698e-07, "log_odds_chosen": -0.34654074907302856, "log_odds_ratio": -0.9725050926208496, "logits/chosen": -0.15042859315872192, "logits/rejected": -0.15439127385616302, "logps/chosen": -3.3132987022399902, "logps/rejected": -2.9604275226593018, "loss": 2.0803, "nll_loss": 1.9830660820007324, "rewards/accuracies": 0.5, "rewards/chosen": -0.3313298225402832, "rewards/margins": -0.035287097096443176, "rewards/rejected": -0.2960427403450012, "step": 438 }, { "epoch": 1.2019164955509924, "grad_norm": 4.814217567443848, "learning_rate": 9.399999999999999e-07, "log_odds_chosen": 0.9027681350708008, "log_odds_ratio": -0.6628704071044922, "logits/chosen": 0.12547887861728668, "logits/rejected": 0.015761297196149826, "logps/chosen": -1.8946895599365234, "logps/rejected": -2.770899772644043, "loss": 1.9691, "nll_loss": 1.9028476476669312, "rewards/accuracies": 0.625, "rewards/chosen": -0.18946895003318787, "rewards/margins": 0.08762101083993912, "rewards/rejected": -0.2770899534225464, "step": 439 }, { "epoch": 1.2046543463381245, "grad_norm": 7.488702297210693, "learning_rate": 9.398630136986301e-07, "log_odds_chosen": -0.4997009038925171, "log_odds_ratio": -1.1346989870071411, "logits/chosen": 0.03548038750886917, "logits/rejected": 0.1644054651260376, "logps/chosen": -3.6240973472595215, "logps/rejected": -3.1595261096954346, "loss": 2.1541, "nll_loss": 2.0406572818756104, "rewards/accuracies": 0.25, "rewards/chosen": -0.36240971088409424, "rewards/margins": -0.04645712673664093, "rewards/rejected": -0.3159526288509369, "step": 440 }, { "epoch": 1.2073921971252566, "grad_norm": 7.340654373168945, "learning_rate": 9.397260273972603e-07, "log_odds_chosen": -1.1285154819488525, "log_odds_ratio": -1.8008668422698975, "logits/chosen": -0.028152383863925934, "logits/rejected": 0.011786498129367828, "logps/chosen": -4.544304847717285, "logps/rejected": -3.4420790672302246, "loss": 2.2091, "nll_loss": 2.0289714336395264, "rewards/accuracies": 0.25, "rewards/chosen": -0.4544305205345154, "rewards/margins": -0.11022260040044785, "rewards/rejected": -0.34420791268348694, "step": 441 }, { "epoch": 1.2101300479123887, "grad_norm": 6.757407188415527, "learning_rate": 9.395890410958903e-07, "log_odds_chosen": 0.035901233553886414, "log_odds_ratio": -0.725590705871582, "logits/chosen": -0.06844629347324371, "logits/rejected": -0.037490472197532654, "logps/chosen": -3.2024168968200684, "logps/rejected": -3.253554105758667, "loss": 2.0491, "nll_loss": 1.9765706062316895, "rewards/accuracies": 0.5, "rewards/chosen": -0.3202417194843292, "rewards/margins": 0.005113700404763222, "rewards/rejected": -0.3253554105758667, "step": 442 }, { "epoch": 1.212867898699521, "grad_norm": 6.079657554626465, "learning_rate": 9.394520547945205e-07, "log_odds_chosen": -0.21565848588943481, "log_odds_ratio": -0.8647938966751099, "logits/chosen": -0.17321538925170898, "logits/rejected": -0.12760041654109955, "logps/chosen": -2.8801796436309814, "logps/rejected": -2.6632232666015625, "loss": 2.0599, "nll_loss": 1.9734067916870117, "rewards/accuracies": 0.375, "rewards/chosen": -0.28801798820495605, "rewards/margins": -0.021695630624890327, "rewards/rejected": -0.26632237434387207, "step": 443 }, { "epoch": 1.215605749486653, "grad_norm": 6.319919109344482, "learning_rate": 9.393150684931507e-07, "log_odds_chosen": -0.40626442432403564, "log_odds_ratio": -1.1698627471923828, "logits/chosen": 0.12668152153491974, "logits/rejected": 0.19231495261192322, "logps/chosen": -3.042750597000122, "logps/rejected": -2.6063332557678223, "loss": 2.0062, "nll_loss": 1.8891661167144775, "rewards/accuracies": 0.5, "rewards/chosen": -0.3042750954627991, "rewards/margins": -0.04364175349473953, "rewards/rejected": -0.26063331961631775, "step": 444 }, { "epoch": 1.218343600273785, "grad_norm": 6.5045390129089355, "learning_rate": 9.391780821917807e-07, "log_odds_chosen": 0.09232018887996674, "log_odds_ratio": -0.7028149366378784, "logits/chosen": 0.12206407636404037, "logits/rejected": 0.21447038650512695, "logps/chosen": -2.7421281337738037, "logps/rejected": -2.8154656887054443, "loss": 2.0146, "nll_loss": 1.9443120956420898, "rewards/accuracies": 0.625, "rewards/chosen": -0.2742128074169159, "rewards/margins": 0.007333770394325256, "rewards/rejected": -0.28154656291007996, "step": 445 }, { "epoch": 1.2210814510609171, "grad_norm": 6.493402004241943, "learning_rate": 9.390410958904109e-07, "log_odds_chosen": 0.04663806036114693, "log_odds_ratio": -0.7509204149246216, "logits/chosen": 0.006917964667081833, "logits/rejected": -0.024275533854961395, "logps/chosen": -2.812053918838501, "logps/rejected": -2.8087968826293945, "loss": 2.0402, "nll_loss": 1.9651203155517578, "rewards/accuracies": 0.625, "rewards/chosen": -0.2812053859233856, "rewards/margins": -0.0003256918862462044, "rewards/rejected": -0.2808796763420105, "step": 446 }, { "epoch": 1.2238193018480492, "grad_norm": 4.9620819091796875, "learning_rate": 9.389041095890411e-07, "log_odds_chosen": 0.20451225340366364, "log_odds_ratio": -0.6426928043365479, "logits/chosen": -0.010977678000926971, "logits/rejected": -0.12467227876186371, "logps/chosen": -2.081716537475586, "logps/rejected": -2.256412982940674, "loss": 1.9828, "nll_loss": 1.9185168743133545, "rewards/accuracies": 0.5, "rewards/chosen": -0.20817166566848755, "rewards/margins": 0.0174696184694767, "rewards/rejected": -0.22564125061035156, "step": 447 }, { "epoch": 1.2265571526351815, "grad_norm": 5.2874956130981445, "learning_rate": 9.387671232876712e-07, "log_odds_chosen": 0.12184207141399384, "log_odds_ratio": -0.6956649422645569, "logits/chosen": 0.06886376440525055, "logits/rejected": 0.06067454442381859, "logps/chosen": -2.2705492973327637, "logps/rejected": -2.3747851848602295, "loss": 2.0274, "nll_loss": 1.9578691720962524, "rewards/accuracies": 0.75, "rewards/chosen": -0.22705495357513428, "rewards/margins": 0.010423587635159492, "rewards/rejected": -0.23747850954532623, "step": 448 }, { "epoch": 1.2292950034223136, "grad_norm": 6.69230318069458, "learning_rate": 9.386301369863013e-07, "log_odds_chosen": 0.26338082551956177, "log_odds_ratio": -0.6788816452026367, "logits/chosen": 0.06091475486755371, "logits/rejected": 0.12080074846744537, "logps/chosen": -2.9769725799560547, "logps/rejected": -3.1810965538024902, "loss": 2.0469, "nll_loss": 1.9790256023406982, "rewards/accuracies": 0.625, "rewards/chosen": -0.2976972460746765, "rewards/margins": 0.020412389189004898, "rewards/rejected": -0.3181096315383911, "step": 449 }, { "epoch": 1.2320328542094456, "grad_norm": 5.773446083068848, "learning_rate": 9.384931506849315e-07, "log_odds_chosen": -0.08613263815641403, "log_odds_ratio": -0.9096523523330688, "logits/chosen": -0.24005576968193054, "logits/rejected": -0.29048845171928406, "logps/chosen": -3.0421714782714844, "logps/rejected": -2.9797208309173584, "loss": 2.0425, "nll_loss": 1.9515671730041504, "rewards/accuracies": 0.375, "rewards/chosen": -0.3042171597480774, "rewards/margins": -0.006245063617825508, "rewards/rejected": -0.29797208309173584, "step": 450 }, { "epoch": 1.2347707049965777, "grad_norm": 7.8787126541137695, "learning_rate": 9.383561643835616e-07, "log_odds_chosen": -0.9100937247276306, "log_odds_ratio": -1.5326261520385742, "logits/chosen": 0.04988580569624901, "logits/rejected": 0.09799322485923767, "logps/chosen": -4.179008960723877, "logps/rejected": -3.2686705589294434, "loss": 2.106, "nll_loss": 1.9527744054794312, "rewards/accuracies": 0.375, "rewards/chosen": -0.4179009199142456, "rewards/margins": -0.09103383868932724, "rewards/rejected": -0.3268670439720154, "step": 451 }, { "epoch": 1.2375085557837098, "grad_norm": 5.698753356933594, "learning_rate": 9.382191780821917e-07, "log_odds_chosen": 0.49230045080184937, "log_odds_ratio": -0.5523064732551575, "logits/chosen": 0.04897916316986084, "logits/rejected": -0.07734450697898865, "logps/chosen": -2.4449479579925537, "logps/rejected": -2.8827767372131348, "loss": 1.999, "nll_loss": 1.943744421005249, "rewards/accuracies": 0.625, "rewards/chosen": -0.24449479579925537, "rewards/margins": 0.04378288611769676, "rewards/rejected": -0.28827768564224243, "step": 452 }, { "epoch": 1.2402464065708418, "grad_norm": 6.204151153564453, "learning_rate": 9.380821917808219e-07, "log_odds_chosen": -0.1110333800315857, "log_odds_ratio": -1.0313379764556885, "logits/chosen": 0.04237861558794975, "logits/rejected": -0.04812121018767357, "logps/chosen": -2.978623151779175, "logps/rejected": -2.8304948806762695, "loss": 2.0467, "nll_loss": 1.9435625076293945, "rewards/accuracies": 0.625, "rewards/chosen": -0.29786235094070435, "rewards/margins": -0.014812842011451721, "rewards/rejected": -0.28304949402809143, "step": 453 }, { "epoch": 1.242984257357974, "grad_norm": 5.854977607727051, "learning_rate": 9.37945205479452e-07, "log_odds_chosen": -0.3142443001270294, "log_odds_ratio": -0.9079422950744629, "logits/chosen": -0.28308725357055664, "logits/rejected": -0.28391793370246887, "logps/chosen": -2.817986011505127, "logps/rejected": -2.5325801372528076, "loss": 2.0234, "nll_loss": 1.932644009590149, "rewards/accuracies": 0.5, "rewards/chosen": -0.2817986011505127, "rewards/margins": -0.028540577739477158, "rewards/rejected": -0.2532580494880676, "step": 454 }, { "epoch": 1.245722108145106, "grad_norm": 6.426697254180908, "learning_rate": 9.378082191780822e-07, "log_odds_chosen": -0.03509041666984558, "log_odds_ratio": -0.8634753823280334, "logits/chosen": -0.04950820654630661, "logits/rejected": -0.09758874028921127, "logps/chosen": -3.426693916320801, "logps/rejected": -3.3446202278137207, "loss": 2.0779, "nll_loss": 1.9915213584899902, "rewards/accuracies": 0.625, "rewards/chosen": -0.34266942739486694, "rewards/margins": -0.008207365870475769, "rewards/rejected": -0.33446204662323, "step": 455 }, { "epoch": 1.2484599589322383, "grad_norm": 6.469742774963379, "learning_rate": 9.376712328767122e-07, "log_odds_chosen": 0.06363630294799805, "log_odds_ratio": -0.7756506204605103, "logits/chosen": -0.08502018451690674, "logits/rejected": -0.034950610250234604, "logps/chosen": -2.6381595134735107, "logps/rejected": -2.6913375854492188, "loss": 2.0683, "nll_loss": 1.9907376766204834, "rewards/accuracies": 0.5, "rewards/chosen": -0.2638159692287445, "rewards/margins": 0.005317792296409607, "rewards/rejected": -0.2691337466239929, "step": 456 }, { "epoch": 1.2511978097193703, "grad_norm": 6.407381057739258, "learning_rate": 9.375342465753424e-07, "log_odds_chosen": 0.24105948209762573, "log_odds_ratio": -0.7588833570480347, "logits/chosen": -0.0903555229306221, "logits/rejected": -0.02907221019268036, "logps/chosen": -3.3369240760803223, "logps/rejected": -3.547560691833496, "loss": 2.0213, "nll_loss": 1.9453697204589844, "rewards/accuracies": 0.75, "rewards/chosen": -0.33369240164756775, "rewards/margins": 0.02106366492807865, "rewards/rejected": -0.35475605726242065, "step": 457 }, { "epoch": 1.2539356605065024, "grad_norm": 6.960292339324951, "learning_rate": 9.373972602739726e-07, "log_odds_chosen": -0.12514717876911163, "log_odds_ratio": -0.7933743596076965, "logits/chosen": -0.15552698075771332, "logits/rejected": -0.11155910789966583, "logps/chosen": -3.173758029937744, "logps/rejected": -3.0493950843811035, "loss": 2.0117, "nll_loss": 1.9323163032531738, "rewards/accuracies": 0.375, "rewards/chosen": -0.3173758089542389, "rewards/margins": -0.012436304241418839, "rewards/rejected": -0.30493947863578796, "step": 458 }, { "epoch": 1.2566735112936345, "grad_norm": 6.510374069213867, "learning_rate": 9.372602739726026e-07, "log_odds_chosen": 0.2924765944480896, "log_odds_ratio": -0.6108769178390503, "logits/chosen": -0.14339639246463776, "logits/rejected": -0.13839857280254364, "logps/chosen": -2.842043399810791, "logps/rejected": -3.1145248413085938, "loss": 1.9988, "nll_loss": 1.9376938343048096, "rewards/accuracies": 0.75, "rewards/chosen": -0.284204363822937, "rewards/margins": 0.027248132973909378, "rewards/rejected": -0.3114524781703949, "step": 459 }, { "epoch": 1.2594113620807665, "grad_norm": 5.638012886047363, "learning_rate": 9.371232876712328e-07, "log_odds_chosen": 0.42478007078170776, "log_odds_ratio": -0.607079267501831, "logits/chosen": -0.010891899466514587, "logits/rejected": -0.18172305822372437, "logps/chosen": -2.406454086303711, "logps/rejected": -2.792642116546631, "loss": 1.9961, "nll_loss": 1.9354207515716553, "rewards/accuracies": 0.625, "rewards/chosen": -0.2406454086303711, "rewards/margins": 0.038618799299001694, "rewards/rejected": -0.2792642116546631, "step": 460 }, { "epoch": 1.2621492128678988, "grad_norm": 5.584589004516602, "learning_rate": 9.36986301369863e-07, "log_odds_chosen": -0.04340885579586029, "log_odds_ratio": -0.7541563510894775, "logits/chosen": -0.012220669537782669, "logits/rejected": -0.045330800116062164, "logps/chosen": -2.2999682426452637, "logps/rejected": -2.2767858505249023, "loss": 1.9278, "nll_loss": 1.8523595333099365, "rewards/accuracies": 0.625, "rewards/chosen": -0.22999683022499084, "rewards/margins": -0.0023182546719908714, "rewards/rejected": -0.2276785671710968, "step": 461 }, { "epoch": 1.264887063655031, "grad_norm": 6.1609787940979, "learning_rate": 9.36849315068493e-07, "log_odds_chosen": 0.47539669275283813, "log_odds_ratio": -0.6814793348312378, "logits/chosen": -0.1603679358959198, "logits/rejected": -0.17644177377223969, "logps/chosen": -2.9854612350463867, "logps/rejected": -3.420835018157959, "loss": 1.9787, "nll_loss": 1.9105539321899414, "rewards/accuracies": 0.375, "rewards/chosen": -0.29854610562324524, "rewards/margins": 0.043537385761737823, "rewards/rejected": -0.34208351373672485, "step": 462 }, { "epoch": 1.267624914442163, "grad_norm": 5.533017635345459, "learning_rate": 9.367123287671232e-07, "log_odds_chosen": 0.8908612728118896, "log_odds_ratio": -0.4206203818321228, "logits/chosen": 0.05054266005754471, "logits/rejected": -0.08628642559051514, "logps/chosen": -1.944718599319458, "logps/rejected": -2.768383502960205, "loss": 1.8716, "nll_loss": 1.829517126083374, "rewards/accuracies": 0.875, "rewards/chosen": -0.19447186589241028, "rewards/margins": 0.08236651867628098, "rewards/rejected": -0.27683836221694946, "step": 463 }, { "epoch": 1.270362765229295, "grad_norm": 5.910041332244873, "learning_rate": 9.365753424657534e-07, "log_odds_chosen": 1.7115483283996582, "log_odds_ratio": -0.49788156151771545, "logits/chosen": -0.09633143991231918, "logits/rejected": -0.16721968352794647, "logps/chosen": -2.4308230876922607, "logps/rejected": -4.0902252197265625, "loss": 1.9524, "nll_loss": 1.902604579925537, "rewards/accuracies": 0.75, "rewards/chosen": -0.24308231472969055, "rewards/margins": 0.16594024002552032, "rewards/rejected": -0.4090225398540497, "step": 464 }, { "epoch": 1.273100616016427, "grad_norm": 8.899232864379883, "learning_rate": 9.364383561643835e-07, "log_odds_chosen": -0.9563772082328796, "log_odds_ratio": -1.3160090446472168, "logits/chosen": 0.005281224846839905, "logits/rejected": 0.13414308428764343, "logps/chosen": -4.451850414276123, "logps/rejected": -3.515002727508545, "loss": 2.0255, "nll_loss": 1.8939392566680908, "rewards/accuracies": 0.0, "rewards/chosen": -0.44518500566482544, "rewards/margins": -0.09368473291397095, "rewards/rejected": -0.3515003025531769, "step": 465 }, { "epoch": 1.2758384668035592, "grad_norm": 6.659613132476807, "learning_rate": 9.363013698630136e-07, "log_odds_chosen": -0.2895565629005432, "log_odds_ratio": -0.9934070706367493, "logits/chosen": -0.09768948704004288, "logits/rejected": -0.12581753730773926, "logps/chosen": -3.2767295837402344, "logps/rejected": -2.9686388969421387, "loss": 1.9591, "nll_loss": 1.859762191772461, "rewards/accuracies": 0.375, "rewards/chosen": -0.32767295837402344, "rewards/margins": -0.030809074640274048, "rewards/rejected": -0.2968639135360718, "step": 466 }, { "epoch": 1.2785763175906912, "grad_norm": 6.497788906097412, "learning_rate": 9.361643835616438e-07, "log_odds_chosen": -0.15405265986919403, "log_odds_ratio": -0.8683485388755798, "logits/chosen": -0.16778963804244995, "logits/rejected": -0.17338134348392487, "logps/chosen": -3.062025308609009, "logps/rejected": -2.8903799057006836, "loss": 1.9388, "nll_loss": 1.85194730758667, "rewards/accuracies": 0.375, "rewards/chosen": -0.3062025308609009, "rewards/margins": -0.01716454327106476, "rewards/rejected": -0.2890380024909973, "step": 467 }, { "epoch": 1.2813141683778233, "grad_norm": 8.953910827636719, "learning_rate": 9.360273972602739e-07, "log_odds_chosen": -1.2042899131774902, "log_odds_ratio": -1.6898422241210938, "logits/chosen": 0.019595958292484283, "logits/rejected": 0.08149012923240662, "logps/chosen": -4.544984340667725, "logps/rejected": -3.3163485527038574, "loss": 2.0616, "nll_loss": 1.8926358222961426, "rewards/accuracies": 0.25, "rewards/chosen": -0.45449844002723694, "rewards/margins": -0.12286358326673508, "rewards/rejected": -0.33163484930992126, "step": 468 }, { "epoch": 1.2840520191649554, "grad_norm": 6.777801513671875, "learning_rate": 9.358904109589041e-07, "log_odds_chosen": -0.11635860800743103, "log_odds_ratio": -0.9190118908882141, "logits/chosen": -0.19978220760822296, "logits/rejected": -0.16790983080863953, "logps/chosen": -2.979928493499756, "logps/rejected": -2.837279796600342, "loss": 1.9591, "nll_loss": 1.8672131299972534, "rewards/accuracies": 0.375, "rewards/chosen": -0.29799288511276245, "rewards/margins": -0.014264890924096107, "rewards/rejected": -0.2837279736995697, "step": 469 }, { "epoch": 1.2867898699520877, "grad_norm": 6.175589561462402, "learning_rate": 9.357534246575341e-07, "log_odds_chosen": 0.6604171395301819, "log_odds_ratio": -0.5916960835456848, "logits/chosen": -0.19348955154418945, "logits/rejected": -0.1812255084514618, "logps/chosen": -2.4682421684265137, "logps/rejected": -3.015697717666626, "loss": 1.9603, "nll_loss": 1.9011505842208862, "rewards/accuracies": 0.5, "rewards/chosen": -0.2468242347240448, "rewards/margins": 0.05474554002285004, "rewards/rejected": -0.30156975984573364, "step": 470 }, { "epoch": 1.2895277207392197, "grad_norm": 7.826776504516602, "learning_rate": 9.356164383561643e-07, "log_odds_chosen": 0.35809651017189026, "log_odds_ratio": -0.7291797399520874, "logits/chosen": -0.022199753671884537, "logits/rejected": -0.04909362643957138, "logps/chosen": -3.2560818195343018, "logps/rejected": -3.5845046043395996, "loss": 1.9188, "nll_loss": 1.845914363861084, "rewards/accuracies": 0.75, "rewards/chosen": -0.32560819387435913, "rewards/margins": 0.03284227475523949, "rewards/rejected": -0.35845044255256653, "step": 471 }, { "epoch": 1.2922655715263518, "grad_norm": 6.880213737487793, "learning_rate": 9.354794520547945e-07, "log_odds_chosen": -0.4959982633590698, "log_odds_ratio": -1.0327560901641846, "logits/chosen": -0.20341797173023224, "logits/rejected": -0.16166944801807404, "logps/chosen": -3.1981024742126465, "logps/rejected": -2.7144336700439453, "loss": 2.0512, "nll_loss": 1.9479148387908936, "rewards/accuracies": 0.375, "rewards/chosen": -0.31981027126312256, "rewards/margins": -0.04836687073111534, "rewards/rejected": -0.27144336700439453, "step": 472 }, { "epoch": 1.2950034223134839, "grad_norm": 7.14364767074585, "learning_rate": 9.353424657534245e-07, "log_odds_chosen": -0.5145339965820312, "log_odds_ratio": -1.0290336608886719, "logits/chosen": -0.17104440927505493, "logits/rejected": -0.12096111476421356, "logps/chosen": -3.1967878341674805, "logps/rejected": -2.7153730392456055, "loss": 2.0314, "nll_loss": 1.9285097122192383, "rewards/accuracies": 0.25, "rewards/chosen": -0.31967878341674805, "rewards/margins": -0.048141494393348694, "rewards/rejected": -0.27153730392456055, "step": 473 }, { "epoch": 1.2977412731006162, "grad_norm": 7.361808776855469, "learning_rate": 9.352054794520547e-07, "log_odds_chosen": 0.21981282532215118, "log_odds_ratio": -0.7919771671295166, "logits/chosen": -0.12605595588684082, "logits/rejected": -0.09748886525630951, "logps/chosen": -2.5607523918151855, "logps/rejected": -2.77260684967041, "loss": 1.9285, "nll_loss": 1.849346399307251, "rewards/accuracies": 0.625, "rewards/chosen": -0.2560752332210541, "rewards/margins": 0.021185442805290222, "rewards/rejected": -0.2772606909275055, "step": 474 }, { "epoch": 1.3004791238877482, "grad_norm": 6.261199474334717, "learning_rate": 9.350684931506849e-07, "log_odds_chosen": -0.5677189826965332, "log_odds_ratio": -1.0763823986053467, "logits/chosen": -0.1138366088271141, "logits/rejected": -0.1569441705942154, "logps/chosen": -2.796691417694092, "logps/rejected": -2.2817792892456055, "loss": 2.0023, "nll_loss": 1.8946810960769653, "rewards/accuracies": 0.125, "rewards/chosen": -0.2796691358089447, "rewards/margins": -0.05149122700095177, "rewards/rejected": -0.22817791998386383, "step": 475 }, { "epoch": 1.3032169746748803, "grad_norm": 6.684975624084473, "learning_rate": 9.349315068493149e-07, "log_odds_chosen": 0.22174468636512756, "log_odds_ratio": -0.7004354596138, "logits/chosen": -0.18674685060977936, "logits/rejected": -0.1968977153301239, "logps/chosen": -2.5615410804748535, "logps/rejected": -2.77352237701416, "loss": 1.9546, "nll_loss": 1.8845723867416382, "rewards/accuracies": 0.5, "rewards/chosen": -0.2561541199684143, "rewards/margins": 0.021198146045207977, "rewards/rejected": -0.2773522734642029, "step": 476 }, { "epoch": 1.3059548254620124, "grad_norm": 8.099464416503906, "learning_rate": 9.347945205479451e-07, "log_odds_chosen": -0.7531293034553528, "log_odds_ratio": -1.452790379524231, "logits/chosen": -0.13186681270599365, "logits/rejected": -0.09719499200582504, "logps/chosen": -3.8020665645599365, "logps/rejected": -3.0498311519622803, "loss": 1.9962, "nll_loss": 1.8509273529052734, "rewards/accuracies": 0.5, "rewards/chosen": -0.3802066743373871, "rewards/margins": -0.07522356510162354, "rewards/rejected": -0.30498310923576355, "step": 477 }, { "epoch": 1.3086926762491444, "grad_norm": 8.562336921691895, "learning_rate": 9.346575342465753e-07, "log_odds_chosen": -0.725472092628479, "log_odds_ratio": -1.1602264642715454, "logits/chosen": -0.10637436807155609, "logits/rejected": -0.016863610595464706, "logps/chosen": -4.1517863273620605, "logps/rejected": -3.4416885375976562, "loss": 2.0364, "nll_loss": 1.9203903675079346, "rewards/accuracies": 0.25, "rewards/chosen": -0.4151786267757416, "rewards/margins": -0.07100974023342133, "rewards/rejected": -0.34416890144348145, "step": 478 }, { "epoch": 1.3114305270362765, "grad_norm": 7.883884429931641, "learning_rate": 9.345205479452054e-07, "log_odds_chosen": -0.744693398475647, "log_odds_ratio": -1.2721235752105713, "logits/chosen": 0.007406502962112427, "logits/rejected": 0.03488468378782272, "logps/chosen": -3.558108329772949, "logps/rejected": -2.823091983795166, "loss": 2.0378, "nll_loss": 1.9105961322784424, "rewards/accuracies": 0.5, "rewards/chosen": -0.35581082105636597, "rewards/margins": -0.07350163161754608, "rewards/rejected": -0.2823092043399811, "step": 479 }, { "epoch": 1.3141683778234086, "grad_norm": 7.193782806396484, "learning_rate": 9.343835616438355e-07, "log_odds_chosen": 0.320141077041626, "log_odds_ratio": -0.6728485822677612, "logits/chosen": -0.16155694425106049, "logits/rejected": -0.11167629063129425, "logps/chosen": -2.7378180027008057, "logps/rejected": -2.9716618061065674, "loss": 1.9189, "nll_loss": 1.8516018390655518, "rewards/accuracies": 0.625, "rewards/chosen": -0.27378183603286743, "rewards/margins": 0.02338438481092453, "rewards/rejected": -0.29716619849205017, "step": 480 }, { "epoch": 1.3169062286105406, "grad_norm": 7.200674057006836, "learning_rate": 9.342465753424658e-07, "log_odds_chosen": 0.09067685902118683, "log_odds_ratio": -0.769990086555481, "logits/chosen": 0.0009598154574632645, "logits/rejected": 0.042895641177892685, "logps/chosen": -3.0947647094726562, "logps/rejected": -3.172114849090576, "loss": 1.8984, "nll_loss": 1.8214020729064941, "rewards/accuracies": 0.625, "rewards/chosen": -0.30947649478912354, "rewards/margins": 0.00773501954972744, "rewards/rejected": -0.3172115087509155, "step": 481 }, { "epoch": 1.3196440793976727, "grad_norm": 7.217822551727295, "learning_rate": 9.341095890410959e-07, "log_odds_chosen": -0.365786075592041, "log_odds_ratio": -1.0960357189178467, "logits/chosen": -0.1376594603061676, "logits/rejected": -0.1156233623623848, "logps/chosen": -3.305227041244507, "logps/rejected": -2.986506700515747, "loss": 1.9774, "nll_loss": 1.8677680492401123, "rewards/accuracies": 0.375, "rewards/chosen": -0.33052271604537964, "rewards/margins": -0.03187201917171478, "rewards/rejected": -0.29865068197250366, "step": 482 }, { "epoch": 1.322381930184805, "grad_norm": 7.208761692047119, "learning_rate": 9.33972602739726e-07, "log_odds_chosen": -1.0405683517456055, "log_odds_ratio": -1.557146668434143, "logits/chosen": -0.23741120100021362, "logits/rejected": -0.23669129610061646, "logps/chosen": -3.9245071411132812, "logps/rejected": -2.8922557830810547, "loss": 2.0517, "nll_loss": 1.8960065841674805, "rewards/accuracies": 0.375, "rewards/chosen": -0.392450749874115, "rewards/margins": -0.10322517156600952, "rewards/rejected": -0.28922557830810547, "step": 483 }, { "epoch": 1.325119780971937, "grad_norm": 8.543277740478516, "learning_rate": 9.338356164383562e-07, "log_odds_chosen": -0.22174957394599915, "log_odds_ratio": -1.110335111618042, "logits/chosen": -0.11517457664012909, "logits/rejected": -0.042325329035520554, "logps/chosen": -3.808152675628662, "logps/rejected": -3.5843088626861572, "loss": 2.0104, "nll_loss": 1.8993616104125977, "rewards/accuracies": 0.375, "rewards/chosen": -0.3808152675628662, "rewards/margins": -0.02238435298204422, "rewards/rejected": -0.3584308922290802, "step": 484 }, { "epoch": 1.3278576317590691, "grad_norm": 7.230460166931152, "learning_rate": 9.336986301369863e-07, "log_odds_chosen": -0.8628613948822021, "log_odds_ratio": -1.3262708187103271, "logits/chosen": -0.19540947675704956, "logits/rejected": -0.15178778767585754, "logps/chosen": -3.825709819793701, "logps/rejected": -2.9839069843292236, "loss": 1.9618, "nll_loss": 1.8291535377502441, "rewards/accuracies": 0.25, "rewards/chosen": -0.3825710117816925, "rewards/margins": -0.08418029546737671, "rewards/rejected": -0.2983907163143158, "step": 485 }, { "epoch": 1.3305954825462012, "grad_norm": 8.714103698730469, "learning_rate": 9.335616438356165e-07, "log_odds_chosen": -0.9611393809318542, "log_odds_ratio": -1.3747693300247192, "logits/chosen": -0.12686191499233246, "logits/rejected": -0.06625960767269135, "logps/chosen": -3.996443748474121, "logps/rejected": -3.0781121253967285, "loss": 2.0586, "nll_loss": 1.9211595058441162, "rewards/accuracies": 0.125, "rewards/chosen": -0.3996443748474121, "rewards/margins": -0.09183315932750702, "rewards/rejected": -0.3078112006187439, "step": 486 }, { "epoch": 1.3333333333333333, "grad_norm": 6.028587818145752, "learning_rate": 9.334246575342465e-07, "log_odds_chosen": -0.12490344047546387, "log_odds_ratio": -0.871067464351654, "logits/chosen": -0.0964454859495163, "logits/rejected": -0.19763849675655365, "logps/chosen": -2.5178415775299072, "logps/rejected": -2.3895692825317383, "loss": 1.9314, "nll_loss": 1.8442789316177368, "rewards/accuracies": 0.625, "rewards/chosen": -0.25178417563438416, "rewards/margins": -0.01282723993062973, "rewards/rejected": -0.23895695805549622, "step": 487 }, { "epoch": 1.3360711841204655, "grad_norm": 6.8826446533203125, "learning_rate": 9.332876712328767e-07, "log_odds_chosen": -0.8946650624275208, "log_odds_ratio": -1.3259575366973877, "logits/chosen": -0.2864667773246765, "logits/rejected": -0.22799307107925415, "logps/chosen": -3.306166648864746, "logps/rejected": -2.460308790206909, "loss": 2.0266, "nll_loss": 1.8940458297729492, "rewards/accuracies": 0.125, "rewards/chosen": -0.33061665296554565, "rewards/margins": -0.08458579331636429, "rewards/rejected": -0.24603088200092316, "step": 488 }, { "epoch": 1.3388090349075976, "grad_norm": 7.602329730987549, "learning_rate": 9.331506849315069e-07, "log_odds_chosen": -0.5487110614776611, "log_odds_ratio": -1.183469295501709, "logits/chosen": -0.12768831849098206, "logits/rejected": -0.09388339519500732, "logps/chosen": -3.899545907974243, "logps/rejected": -3.335409164428711, "loss": 1.9326, "nll_loss": 1.8142814636230469, "rewards/accuracies": 0.375, "rewards/chosen": -0.3899546265602112, "rewards/margins": -0.0564136803150177, "rewards/rejected": -0.3335409462451935, "step": 489 }, { "epoch": 1.3415468856947297, "grad_norm": 7.2812957763671875, "learning_rate": 9.330136986301369e-07, "log_odds_chosen": -0.7379436492919922, "log_odds_ratio": -1.2372934818267822, "logits/chosen": -0.17892585694789886, "logits/rejected": -0.09605428576469421, "logps/chosen": -3.553781509399414, "logps/rejected": -2.8235063552856445, "loss": 1.9222, "nll_loss": 1.7984462976455688, "rewards/accuracies": 0.125, "rewards/chosen": -0.3553781509399414, "rewards/margins": -0.07302749902009964, "rewards/rejected": -0.2823506295681, "step": 490 }, { "epoch": 1.3442847364818618, "grad_norm": 6.5149383544921875, "learning_rate": 9.328767123287671e-07, "log_odds_chosen": -0.8146211504936218, "log_odds_ratio": -1.4150505065917969, "logits/chosen": -0.13134470582008362, "logits/rejected": -0.05989505350589752, "logps/chosen": -3.202047824859619, "logps/rejected": -2.3990559577941895, "loss": 2.0268, "nll_loss": 1.885280966758728, "rewards/accuracies": 0.625, "rewards/chosen": -0.32020479440689087, "rewards/margins": -0.08029919117689133, "rewards/rejected": -0.23990559577941895, "step": 491 }, { "epoch": 1.3470225872689938, "grad_norm": 7.0301947593688965, "learning_rate": 9.327397260273973e-07, "log_odds_chosen": -0.16811034083366394, "log_odds_ratio": -1.0497184991836548, "logits/chosen": -0.11134379357099533, "logits/rejected": -0.18351911008358002, "logps/chosen": -3.247535228729248, "logps/rejected": -3.100637674331665, "loss": 1.9464, "nll_loss": 1.8414453268051147, "rewards/accuracies": 0.25, "rewards/chosen": -0.3247535228729248, "rewards/margins": -0.014689743518829346, "rewards/rejected": -0.31006374955177307, "step": 492 }, { "epoch": 1.3497604380561259, "grad_norm": 7.718425273895264, "learning_rate": 9.326027397260274e-07, "log_odds_chosen": 0.023541517555713654, "log_odds_ratio": -0.9629849195480347, "logits/chosen": -0.16487565636634827, "logits/rejected": -0.06112153083086014, "logps/chosen": -3.7480697631835938, "logps/rejected": -3.8010783195495605, "loss": 1.9202, "nll_loss": 1.8238873481750488, "rewards/accuracies": 0.375, "rewards/chosen": -0.3748069703578949, "rewards/margins": 0.005300883203744888, "rewards/rejected": -0.3801078200340271, "step": 493 }, { "epoch": 1.352498288843258, "grad_norm": 7.021302700042725, "learning_rate": 9.324657534246575e-07, "log_odds_chosen": 0.17051492631435394, "log_odds_ratio": -0.6285663843154907, "logits/chosen": 0.032014086842536926, "logits/rejected": 0.03530222177505493, "logps/chosen": -3.5435688495635986, "logps/rejected": -3.7093255519866943, "loss": 1.9298, "nll_loss": 1.8669371604919434, "rewards/accuracies": 0.5, "rewards/chosen": -0.35435688495635986, "rewards/margins": 0.016575662419199944, "rewards/rejected": -0.37093254923820496, "step": 494 }, { "epoch": 1.35523613963039, "grad_norm": 7.1578240394592285, "learning_rate": 9.323287671232877e-07, "log_odds_chosen": -0.2901650071144104, "log_odds_ratio": -0.9430963397026062, "logits/chosen": -0.05520801618695259, "logits/rejected": -0.01988614723086357, "logps/chosen": -3.484797477722168, "logps/rejected": -3.1656582355499268, "loss": 1.9472, "nll_loss": 1.8528724908828735, "rewards/accuracies": 0.375, "rewards/chosen": -0.3484797477722168, "rewards/margins": -0.03191390633583069, "rewards/rejected": -0.3165658414363861, "step": 495 }, { "epoch": 1.3579739904175223, "grad_norm": 6.603648662567139, "learning_rate": 9.321917808219178e-07, "log_odds_chosen": -0.018344581127166748, "log_odds_ratio": -0.7642827033996582, "logits/chosen": -0.08410815894603729, "logits/rejected": -0.07840962707996368, "logps/chosen": -2.8814172744750977, "logps/rejected": -2.8533122539520264, "loss": 1.902, "nll_loss": 1.825595498085022, "rewards/accuracies": 0.5, "rewards/chosen": -0.28814175724983215, "rewards/margins": -0.0028105024248361588, "rewards/rejected": -0.28533124923706055, "step": 496 }, { "epoch": 1.3607118412046544, "grad_norm": 5.945331573486328, "learning_rate": 9.320547945205479e-07, "log_odds_chosen": -0.06949543952941895, "log_odds_ratio": -0.808373212814331, "logits/chosen": -0.14304819703102112, "logits/rejected": -0.2121325433254242, "logps/chosen": -2.662160634994507, "logps/rejected": -2.573479652404785, "loss": 1.8672, "nll_loss": 1.7863216400146484, "rewards/accuracies": 0.5, "rewards/chosen": -0.26621606945991516, "rewards/margins": -0.008868115022778511, "rewards/rejected": -0.2573479413986206, "step": 497 }, { "epoch": 1.3634496919917864, "grad_norm": 5.559112071990967, "learning_rate": 9.319178082191781e-07, "log_odds_chosen": 0.3176124691963196, "log_odds_ratio": -0.6344820261001587, "logits/chosen": -0.025096740573644638, "logits/rejected": -0.11225530505180359, "logps/chosen": -1.9171744585037231, "logps/rejected": -2.159975290298462, "loss": 1.9105, "nll_loss": 1.8470896482467651, "rewards/accuracies": 0.625, "rewards/chosen": -0.1917174756526947, "rewards/margins": 0.02428007312119007, "rewards/rejected": -0.21599753201007843, "step": 498 }, { "epoch": 1.3661875427789185, "grad_norm": 7.360130310058594, "learning_rate": 9.317808219178082e-07, "log_odds_chosen": 0.24277403950691223, "log_odds_ratio": -0.830397367477417, "logits/chosen": -0.04304021596908569, "logits/rejected": -0.04949511960148811, "logps/chosen": -3.093879222869873, "logps/rejected": -3.305297613143921, "loss": 1.8729, "nll_loss": 1.7899013757705688, "rewards/accuracies": 0.5, "rewards/chosen": -0.3093879222869873, "rewards/margins": 0.021141840144991875, "rewards/rejected": -0.3305297791957855, "step": 499 }, { "epoch": 1.3689253935660506, "grad_norm": 5.775421142578125, "learning_rate": 9.316438356164384e-07, "log_odds_chosen": 0.11468780040740967, "log_odds_ratio": -0.7015857696533203, "logits/chosen": -0.1363600492477417, "logits/rejected": -0.3366256356239319, "logps/chosen": -2.8594560623168945, "logps/rejected": -2.9631412029266357, "loss": 1.8583, "nll_loss": 1.7881652116775513, "rewards/accuracies": 0.375, "rewards/chosen": -0.2859455943107605, "rewards/margins": 0.01036851480603218, "rewards/rejected": -0.2963141202926636, "step": 500 }, { "epoch": 1.3716632443531829, "grad_norm": 6.825518608093262, "learning_rate": 9.315068493150684e-07, "log_odds_chosen": -0.038352616131305695, "log_odds_ratio": -0.8423122763633728, "logits/chosen": -0.060829609632492065, "logits/rejected": -0.03439291566610336, "logps/chosen": -3.3272416591644287, "logps/rejected": -3.279733657836914, "loss": 1.9917, "nll_loss": 1.9074459075927734, "rewards/accuracies": 0.625, "rewards/chosen": -0.3327241539955139, "rewards/margins": -0.004750791937112808, "rewards/rejected": -0.3279733657836914, "step": 501 }, { "epoch": 1.374401095140315, "grad_norm": 7.0353312492370605, "learning_rate": 9.313698630136986e-07, "log_odds_chosen": -0.6353121399879456, "log_odds_ratio": -1.1348755359649658, "logits/chosen": -0.16987238824367523, "logits/rejected": -0.10134944319725037, "logps/chosen": -3.6414265632629395, "logps/rejected": -3.0183029174804688, "loss": 1.9177, "nll_loss": 1.8041832447052002, "rewards/accuracies": 0.25, "rewards/chosen": -0.36414265632629395, "rewards/margins": -0.06231236457824707, "rewards/rejected": -0.3018302917480469, "step": 502 }, { "epoch": 1.377138945927447, "grad_norm": 5.682284832000732, "learning_rate": 9.312328767123288e-07, "log_odds_chosen": 0.33405357599258423, "log_odds_ratio": -0.598849892616272, "logits/chosen": -0.1838800609111786, "logits/rejected": -0.2200966775417328, "logps/chosen": -2.1419496536254883, "logps/rejected": -2.4530038833618164, "loss": 1.8519, "nll_loss": 1.7920557260513306, "rewards/accuracies": 0.875, "rewards/chosen": -0.21419498324394226, "rewards/margins": 0.03110542520880699, "rewards/rejected": -0.24530041217803955, "step": 503 }, { "epoch": 1.379876796714579, "grad_norm": 6.463913440704346, "learning_rate": 9.310958904109588e-07, "log_odds_chosen": -0.06747068464756012, "log_odds_ratio": -0.8716768622398376, "logits/chosen": -0.07705385982990265, "logits/rejected": -0.08040977269411087, "logps/chosen": -3.138439178466797, "logps/rejected": -3.047764301300049, "loss": 1.8835, "nll_loss": 1.7963037490844727, "rewards/accuracies": 0.625, "rewards/chosen": -0.3138439655303955, "rewards/margins": -0.00906747579574585, "rewards/rejected": -0.30477645993232727, "step": 504 }, { "epoch": 1.3826146475017111, "grad_norm": 6.367725849151611, "learning_rate": 9.30958904109589e-07, "log_odds_chosen": 0.2323857545852661, "log_odds_ratio": -0.7213320732116699, "logits/chosen": -0.016175922006368637, "logits/rejected": 0.033310528844594955, "logps/chosen": -2.854224920272827, "logps/rejected": -3.048591375350952, "loss": 1.873, "nll_loss": 1.8008240461349487, "rewards/accuracies": 0.5, "rewards/chosen": -0.28542250394821167, "rewards/margins": 0.019436649978160858, "rewards/rejected": -0.3048591613769531, "step": 505 }, { "epoch": 1.3853524982888432, "grad_norm": 6.864601135253906, "learning_rate": 9.308219178082192e-07, "log_odds_chosen": -0.07736103236675262, "log_odds_ratio": -0.8157106637954712, "logits/chosen": -0.13142496347427368, "logits/rejected": -0.08467303961515427, "logps/chosen": -2.9414358139038086, "logps/rejected": -2.8427183628082275, "loss": 1.8495, "nll_loss": 1.7679654359817505, "rewards/accuracies": 0.625, "rewards/chosen": -0.29414358735084534, "rewards/margins": -0.00987173430621624, "rewards/rejected": -0.28427183628082275, "step": 506 }, { "epoch": 1.3880903490759753, "grad_norm": 5.46818208694458, "learning_rate": 9.306849315068493e-07, "log_odds_chosen": -0.013668008148670197, "log_odds_ratio": -0.7362710237503052, "logits/chosen": -0.17484793066978455, "logits/rejected": -0.22779104113578796, "logps/chosen": -2.443589687347412, "logps/rejected": -2.4108264446258545, "loss": 1.8195, "nll_loss": 1.7458734512329102, "rewards/accuracies": 0.5, "rewards/chosen": -0.24435898661613464, "rewards/margins": -0.0032763294875621796, "rewards/rejected": -0.24108265340328217, "step": 507 }, { "epoch": 1.3908281998631074, "grad_norm": 6.900245666503906, "learning_rate": 9.305479452054794e-07, "log_odds_chosen": 0.5123583674430847, "log_odds_ratio": -0.5658900141716003, "logits/chosen": -0.14258015155792236, "logits/rejected": -0.21959835290908813, "logps/chosen": -3.2560112476348877, "logps/rejected": -3.7247257232666016, "loss": 1.81, "nll_loss": 1.7534537315368652, "rewards/accuracies": 0.75, "rewards/chosen": -0.32560113072395325, "rewards/margins": 0.04687146842479706, "rewards/rejected": -0.3724725842475891, "step": 508 }, { "epoch": 1.3935660506502396, "grad_norm": 6.306366443634033, "learning_rate": 9.304109589041096e-07, "log_odds_chosen": -0.3472515344619751, "log_odds_ratio": -0.9133239984512329, "logits/chosen": -0.22892658412456512, "logits/rejected": -0.16704991459846497, "logps/chosen": -2.7405052185058594, "logps/rejected": -2.4129254817962646, "loss": 1.8684, "nll_loss": 1.7770404815673828, "rewards/accuracies": 0.375, "rewards/chosen": -0.2740505337715149, "rewards/margins": -0.03275798633694649, "rewards/rejected": -0.2412925660610199, "step": 509 }, { "epoch": 1.3963039014373717, "grad_norm": 6.004973411560059, "learning_rate": 9.302739726027397e-07, "log_odds_chosen": 0.012471430003643036, "log_odds_ratio": -0.7452807426452637, "logits/chosen": -0.1110643744468689, "logits/rejected": -0.11155823618173599, "logps/chosen": -2.5373992919921875, "logps/rejected": -2.5644726753234863, "loss": 1.9057, "nll_loss": 1.8311920166015625, "rewards/accuracies": 0.375, "rewards/chosen": -0.2537398934364319, "rewards/margins": 0.0027073565870523453, "rewards/rejected": -0.25644728541374207, "step": 510 }, { "epoch": 1.3990417522245038, "grad_norm": 7.807291507720947, "learning_rate": 9.301369863013698e-07, "log_odds_chosen": -0.7094260454177856, "log_odds_ratio": -1.1975713968276978, "logits/chosen": -0.17076170444488525, "logits/rejected": 0.03700067102909088, "logps/chosen": -3.6247141361236572, "logps/rejected": -2.9776721000671387, "loss": 1.8738, "nll_loss": 1.7540829181671143, "rewards/accuracies": 0.25, "rewards/chosen": -0.36247143149375916, "rewards/margins": -0.06470417976379395, "rewards/rejected": -0.2977672219276428, "step": 511 }, { "epoch": 1.4017796030116358, "grad_norm": 5.87154483795166, "learning_rate": 9.3e-07, "log_odds_chosen": 0.47411930561065674, "log_odds_ratio": -0.6444377899169922, "logits/chosen": -0.14215198159217834, "logits/rejected": -0.17891371250152588, "logps/chosen": -2.495349407196045, "logps/rejected": -2.9282641410827637, "loss": 1.8632, "nll_loss": 1.7988008260726929, "rewards/accuracies": 0.625, "rewards/chosen": -0.2495349794626236, "rewards/margins": 0.04329146444797516, "rewards/rejected": -0.29282644391059875, "step": 512 }, { "epoch": 1.404517453798768, "grad_norm": 6.520447254180908, "learning_rate": 9.298630136986301e-07, "log_odds_chosen": 0.3169657588005066, "log_odds_ratio": -0.8223369121551514, "logits/chosen": -0.16076982021331787, "logits/rejected": -0.14188358187675476, "logps/chosen": -3.3770711421966553, "logps/rejected": -3.6308133602142334, "loss": 1.8713, "nll_loss": 1.789089322090149, "rewards/accuracies": 0.625, "rewards/chosen": -0.33770713210105896, "rewards/margins": 0.025374190881848335, "rewards/rejected": -0.36308133602142334, "step": 513 }, { "epoch": 1.4072553045859002, "grad_norm": 7.561253070831299, "learning_rate": 9.297260273972603e-07, "log_odds_chosen": -0.2891818881034851, "log_odds_ratio": -1.1427243947982788, "logits/chosen": 0.05558343976736069, "logits/rejected": 0.05628300830721855, "logps/chosen": -4.026388645172119, "logps/rejected": -3.7224321365356445, "loss": 1.9002, "nll_loss": 1.785917043685913, "rewards/accuracies": 0.375, "rewards/chosen": -0.40263885259628296, "rewards/margins": -0.03039562702178955, "rewards/rejected": -0.3722432255744934, "step": 514 }, { "epoch": 1.4099931553730323, "grad_norm": 6.0637407302856445, "learning_rate": 9.295890410958903e-07, "log_odds_chosen": 0.4294165372848511, "log_odds_ratio": -0.545200526714325, "logits/chosen": -0.11790597438812256, "logits/rejected": -0.08752311021089554, "logps/chosen": -2.7575266361236572, "logps/rejected": -3.119748115539551, "loss": 1.8112, "nll_loss": 1.7567181587219238, "rewards/accuracies": 0.875, "rewards/chosen": -0.2757526636123657, "rewards/margins": 0.03622213751077652, "rewards/rejected": -0.31197479367256165, "step": 515 }, { "epoch": 1.4127310061601643, "grad_norm": 5.727234363555908, "learning_rate": 9.294520547945205e-07, "log_odds_chosen": 0.10074132680892944, "log_odds_ratio": -0.8092735409736633, "logits/chosen": -0.02855614386498928, "logits/rejected": -0.1778666377067566, "logps/chosen": -2.3660547733306885, "logps/rejected": -2.4443225860595703, "loss": 1.7662, "nll_loss": 1.6852859258651733, "rewards/accuracies": 0.75, "rewards/chosen": -0.2366054654121399, "rewards/margins": 0.007826797664165497, "rewards/rejected": -0.244432270526886, "step": 516 }, { "epoch": 1.4154688569472964, "grad_norm": 6.189049243927002, "learning_rate": 9.293150684931507e-07, "log_odds_chosen": -0.8267070055007935, "log_odds_ratio": -1.2659249305725098, "logits/chosen": -0.18069812655448914, "logits/rejected": -0.13325420022010803, "logps/chosen": -3.597414970397949, "logps/rejected": -2.794869899749756, "loss": 1.9445, "nll_loss": 1.8179419040679932, "rewards/accuracies": 0.25, "rewards/chosen": -0.3597415089607239, "rewards/margins": -0.08025451749563217, "rewards/rejected": -0.2794869840145111, "step": 517 }, { "epoch": 1.4182067077344285, "grad_norm": 6.044815540313721, "learning_rate": 9.291780821917807e-07, "log_odds_chosen": 0.3338850140571594, "log_odds_ratio": -1.3512110710144043, "logits/chosen": -0.10719111561775208, "logits/rejected": -0.1980738341808319, "logps/chosen": -3.655078887939453, "logps/rejected": -3.927063465118408, "loss": 1.9041, "nll_loss": 1.7689526081085205, "rewards/accuracies": 0.625, "rewards/chosen": -0.36550790071487427, "rewards/margins": 0.027198461815714836, "rewards/rejected": -0.39270639419555664, "step": 518 }, { "epoch": 1.4209445585215605, "grad_norm": 6.452555179595947, "learning_rate": 9.290410958904109e-07, "log_odds_chosen": 0.2632027268409729, "log_odds_ratio": -0.7571596503257751, "logits/chosen": -0.10076509416103363, "logits/rejected": -0.07893940061330795, "logps/chosen": -2.976759672164917, "logps/rejected": -3.175258159637451, "loss": 1.8459, "nll_loss": 1.7702305316925049, "rewards/accuracies": 0.5, "rewards/chosen": -0.2976759672164917, "rewards/margins": 0.01984981819987297, "rewards/rejected": -0.31752580404281616, "step": 519 }, { "epoch": 1.4236824093086926, "grad_norm": 6.0527119636535645, "learning_rate": 9.289041095890411e-07, "log_odds_chosen": 0.26865503191947937, "log_odds_ratio": -0.7317936420440674, "logits/chosen": -0.132125124335289, "logits/rejected": -0.08778606355190277, "logps/chosen": -2.478879451751709, "logps/rejected": -2.763338088989258, "loss": 1.8896, "nll_loss": 1.8164587020874023, "rewards/accuracies": 0.375, "rewards/chosen": -0.24788793921470642, "rewards/margins": 0.028445856645703316, "rewards/rejected": -0.2763338088989258, "step": 520 }, { "epoch": 1.4264202600958247, "grad_norm": 6.343183994293213, "learning_rate": 9.287671232876712e-07, "log_odds_chosen": 0.17724066972732544, "log_odds_ratio": -0.685905933380127, "logits/chosen": -0.05254068225622177, "logits/rejected": -0.06649415194988251, "logps/chosen": -3.4183969497680664, "logps/rejected": -3.5724997520446777, "loss": 1.8063, "nll_loss": 1.7376681566238403, "rewards/accuracies": 0.5, "rewards/chosen": -0.3418397009372711, "rewards/margins": 0.015410270541906357, "rewards/rejected": -0.3572499752044678, "step": 521 }, { "epoch": 1.4291581108829567, "grad_norm": 5.935749053955078, "learning_rate": 9.286301369863013e-07, "log_odds_chosen": 0.11765901744365692, "log_odds_ratio": -0.7434380054473877, "logits/chosen": -0.18853992223739624, "logits/rejected": -0.1423201858997345, "logps/chosen": -2.9160048961639404, "logps/rejected": -2.9928174018859863, "loss": 1.836, "nll_loss": 1.7617055177688599, "rewards/accuracies": 0.625, "rewards/chosen": -0.2916004955768585, "rewards/margins": 0.007681244984269142, "rewards/rejected": -0.2992817461490631, "step": 522 }, { "epoch": 1.431895961670089, "grad_norm": 7.17549467086792, "learning_rate": 9.284931506849315e-07, "log_odds_chosen": -1.0713610649108887, "log_odds_ratio": -1.5791380405426025, "logits/chosen": -0.09738627076148987, "logits/rejected": -0.005906190723180771, "logps/chosen": -4.640854835510254, "logps/rejected": -3.588733434677124, "loss": 1.9488, "nll_loss": 1.7909235954284668, "rewards/accuracies": 0.125, "rewards/chosen": -0.46408548951148987, "rewards/margins": -0.10521210730075836, "rewards/rejected": -0.3588733673095703, "step": 523 }, { "epoch": 1.434633812457221, "grad_norm": 6.9330620765686035, "learning_rate": 9.283561643835616e-07, "log_odds_chosen": -0.2555004954338074, "log_odds_ratio": -0.9830788969993591, "logits/chosen": -0.1678553968667984, "logits/rejected": -0.11134406924247742, "logps/chosen": -3.2835116386413574, "logps/rejected": -3.0151724815368652, "loss": 1.8184, "nll_loss": 1.7201242446899414, "rewards/accuracies": 0.375, "rewards/chosen": -0.32835114002227783, "rewards/margins": -0.026833876967430115, "rewards/rejected": -0.3015172779560089, "step": 524 }, { "epoch": 1.4373716632443532, "grad_norm": 6.70466423034668, "learning_rate": 9.282191780821917e-07, "log_odds_chosen": 0.7802824974060059, "log_odds_ratio": -0.8381357192993164, "logits/chosen": -0.16392114758491516, "logits/rejected": -0.15698175132274628, "logps/chosen": -3.5673065185546875, "logps/rejected": -4.302981853485107, "loss": 1.8576, "nll_loss": 1.7738333940505981, "rewards/accuracies": 0.375, "rewards/chosen": -0.3567306697368622, "rewards/margins": 0.07356751710176468, "rewards/rejected": -0.4302981495857239, "step": 525 }, { "epoch": 1.4401095140314852, "grad_norm": 6.135169506072998, "learning_rate": 9.280821917808219e-07, "log_odds_chosen": -0.3871970772743225, "log_odds_ratio": -1.0024518966674805, "logits/chosen": -0.151174396276474, "logits/rejected": -0.1863129436969757, "logps/chosen": -2.905648708343506, "logps/rejected": -2.5259201526641846, "loss": 1.8675, "nll_loss": 1.7672173976898193, "rewards/accuracies": 0.5, "rewards/chosen": -0.2905648648738861, "rewards/margins": -0.03797284513711929, "rewards/rejected": -0.2525920271873474, "step": 526 }, { "epoch": 1.4428473648186173, "grad_norm": 5.715885639190674, "learning_rate": 9.27945205479452e-07, "log_odds_chosen": -0.008864432573318481, "log_odds_ratio": -0.9270391464233398, "logits/chosen": -0.18869474530220032, "logits/rejected": -0.3174169361591339, "logps/chosen": -2.986379623413086, "logps/rejected": -2.964163303375244, "loss": 1.8021, "nll_loss": 1.709371566772461, "rewards/accuracies": 0.25, "rewards/chosen": -0.2986379861831665, "rewards/margins": -0.0022216197103261948, "rewards/rejected": -0.29641634225845337, "step": 527 }, { "epoch": 1.4455852156057496, "grad_norm": 5.416377067565918, "learning_rate": 9.278082191780822e-07, "log_odds_chosen": 0.4862583577632904, "log_odds_ratio": -0.5379292964935303, "logits/chosen": -0.047462668269872665, "logits/rejected": -0.08481195569038391, "logps/chosen": -2.4279286861419678, "logps/rejected": -2.8551597595214844, "loss": 1.7741, "nll_loss": 1.7202813625335693, "rewards/accuracies": 0.75, "rewards/chosen": -0.24279287457466125, "rewards/margins": 0.04272310063242912, "rewards/rejected": -0.2855159640312195, "step": 528 }, { "epoch": 1.4483230663928817, "grad_norm": 5.993664264678955, "learning_rate": 9.276712328767123e-07, "log_odds_chosen": 0.18967588245868683, "log_odds_ratio": -0.8578121662139893, "logits/chosen": -0.10959651321172714, "logits/rejected": -0.10998973250389099, "logps/chosen": -2.81959867477417, "logps/rejected": -2.9616992473602295, "loss": 1.8001, "nll_loss": 1.7143685817718506, "rewards/accuracies": 0.625, "rewards/chosen": -0.2819598913192749, "rewards/margins": 0.014210028573870659, "rewards/rejected": -0.2961699366569519, "step": 529 }, { "epoch": 1.4510609171800137, "grad_norm": 5.041894912719727, "learning_rate": 9.275342465753424e-07, "log_odds_chosen": 0.20683610439300537, "log_odds_ratio": -0.7079383730888367, "logits/chosen": 0.06609141826629639, "logits/rejected": -0.11854762583971024, "logps/chosen": -2.089445114135742, "logps/rejected": -2.263516902923584, "loss": 1.8068, "nll_loss": 1.7360498905181885, "rewards/accuracies": 0.75, "rewards/chosen": -0.20894451439380646, "rewards/margins": 0.017407158389687538, "rewards/rejected": -0.22635169327259064, "step": 530 }, { "epoch": 1.4537987679671458, "grad_norm": 7.515732765197754, "learning_rate": 9.273972602739726e-07, "log_odds_chosen": -0.6173474788665771, "log_odds_ratio": -1.2546589374542236, "logits/chosen": -0.08717181533575058, "logits/rejected": -0.043430015444755554, "logps/chosen": -3.4816527366638184, "logps/rejected": -2.8917078971862793, "loss": 1.8527, "nll_loss": 1.7272634506225586, "rewards/accuracies": 0.375, "rewards/chosen": -0.34816527366638184, "rewards/margins": -0.05899447202682495, "rewards/rejected": -0.2891708016395569, "step": 531 }, { "epoch": 1.4565366187542779, "grad_norm": 5.373440265655518, "learning_rate": 9.272602739726026e-07, "log_odds_chosen": 0.13046443462371826, "log_odds_ratio": -0.719249963760376, "logits/chosen": -0.1316080540418625, "logits/rejected": -0.15338227152824402, "logps/chosen": -2.780801296234131, "logps/rejected": -2.9059531688690186, "loss": 1.8301, "nll_loss": 1.7581965923309326, "rewards/accuracies": 0.625, "rewards/chosen": -0.27808016538619995, "rewards/margins": 0.012515183538198471, "rewards/rejected": -0.29059532284736633, "step": 532 }, { "epoch": 1.45927446954141, "grad_norm": 5.585673809051514, "learning_rate": 9.271232876712328e-07, "log_odds_chosen": 0.2711012065410614, "log_odds_ratio": -0.658095121383667, "logits/chosen": -0.09800702333450317, "logits/rejected": -0.1050950139760971, "logps/chosen": -2.189837694168091, "logps/rejected": -2.4175779819488525, "loss": 1.8123, "nll_loss": 1.7464838027954102, "rewards/accuracies": 0.5, "rewards/chosen": -0.21898379921913147, "rewards/margins": 0.022774022072553635, "rewards/rejected": -0.24175779521465302, "step": 533 }, { "epoch": 1.462012320328542, "grad_norm": 6.117877006530762, "learning_rate": 9.26986301369863e-07, "log_odds_chosen": -0.6715766191482544, "log_odds_ratio": -1.1992292404174805, "logits/chosen": -0.07815425843000412, "logits/rejected": -0.062102966010570526, "logps/chosen": -3.56046199798584, "logps/rejected": -2.9371654987335205, "loss": 1.9344, "nll_loss": 1.8144670724868774, "rewards/accuracies": 0.25, "rewards/chosen": -0.35604622960090637, "rewards/margins": -0.062329649925231934, "rewards/rejected": -0.29371657967567444, "step": 534 }, { "epoch": 1.464750171115674, "grad_norm": 5.455132961273193, "learning_rate": 9.268493150684931e-07, "log_odds_chosen": -0.6987391710281372, "log_odds_ratio": -1.2118675708770752, "logits/chosen": -0.18613553047180176, "logits/rejected": -0.18734683096408844, "logps/chosen": -2.572129964828491, "logps/rejected": -1.9251549243927002, "loss": 1.8154, "nll_loss": 1.6942195892333984, "rewards/accuracies": 0.25, "rewards/chosen": -0.2572129964828491, "rewards/margins": -0.0646975189447403, "rewards/rejected": -0.19251549243927002, "step": 535 }, { "epoch": 1.4674880219028064, "grad_norm": 7.148718357086182, "learning_rate": 9.267123287671232e-07, "log_odds_chosen": -0.3244037926197052, "log_odds_ratio": -1.0423084497451782, "logits/chosen": -0.10151444375514984, "logits/rejected": 0.024173758924007416, "logps/chosen": -3.592799186706543, "logps/rejected": -3.2928106784820557, "loss": 1.7988, "nll_loss": 1.6945264339447021, "rewards/accuracies": 0.375, "rewards/chosen": -0.35927993059158325, "rewards/margins": -0.029998833313584328, "rewards/rejected": -0.3292810916900635, "step": 536 }, { "epoch": 1.4702258726899384, "grad_norm": 5.466585159301758, "learning_rate": 9.265753424657534e-07, "log_odds_chosen": -0.2726173996925354, "log_odds_ratio": -0.9753192663192749, "logits/chosen": -0.2829759418964386, "logits/rejected": -0.303254097700119, "logps/chosen": -2.8115644454956055, "logps/rejected": -2.5437331199645996, "loss": 1.8698, "nll_loss": 1.77226984500885, "rewards/accuracies": 0.625, "rewards/chosen": -0.28115642070770264, "rewards/margins": -0.02678312361240387, "rewards/rejected": -0.25437331199645996, "step": 537 }, { "epoch": 1.4729637234770705, "grad_norm": 5.135889530181885, "learning_rate": 9.264383561643835e-07, "log_odds_chosen": 0.7422135472297668, "log_odds_ratio": -0.4604189991950989, "logits/chosen": 0.03350858390331268, "logits/rejected": -0.09341827034950256, "logps/chosen": -2.1128628253936768, "logps/rejected": -2.7955799102783203, "loss": 1.7261, "nll_loss": 1.6800477504730225, "rewards/accuracies": 0.75, "rewards/chosen": -0.2112862765789032, "rewards/margins": 0.06827171891927719, "rewards/rejected": -0.279558002948761, "step": 538 }, { "epoch": 1.4757015742642026, "grad_norm": 6.394707202911377, "learning_rate": 9.263013698630136e-07, "log_odds_chosen": 0.2723426818847656, "log_odds_ratio": -0.7640278935432434, "logits/chosen": -0.2907554507255554, "logits/rejected": -0.31568050384521484, "logps/chosen": -3.490527391433716, "logps/rejected": -3.714501142501831, "loss": 1.8339, "nll_loss": 1.7574796676635742, "rewards/accuracies": 0.5, "rewards/chosen": -0.349052757024765, "rewards/margins": 0.022397397086024284, "rewards/rejected": -0.37145012617111206, "step": 539 }, { "epoch": 1.4784394250513346, "grad_norm": 5.812414646148682, "learning_rate": 9.261643835616438e-07, "log_odds_chosen": 0.0717703253030777, "log_odds_ratio": -0.7048478126525879, "logits/chosen": -0.07665485888719559, "logits/rejected": -0.019888602197170258, "logps/chosen": -2.7548797130584717, "logps/rejected": -2.812361240386963, "loss": 1.7383, "nll_loss": 1.6678588390350342, "rewards/accuracies": 0.5, "rewards/chosen": -0.2754879593849182, "rewards/margins": 0.005748138763010502, "rewards/rejected": -0.28123611211776733, "step": 540 }, { "epoch": 1.481177275838467, "grad_norm": 5.832167148590088, "learning_rate": 9.260273972602739e-07, "log_odds_chosen": 0.4031076729297638, "log_odds_ratio": -0.5855600833892822, "logits/chosen": -0.02292744070291519, "logits/rejected": 0.044335972517728806, "logps/chosen": -2.8210806846618652, "logps/rejected": -3.1926090717315674, "loss": 1.823, "nll_loss": 1.7644712924957275, "rewards/accuracies": 0.75, "rewards/chosen": -0.2821080982685089, "rewards/margins": 0.03715282678604126, "rewards/rejected": -0.3192608952522278, "step": 541 }, { "epoch": 1.483915126625599, "grad_norm": 5.591067790985107, "learning_rate": 9.258904109589041e-07, "log_odds_chosen": -0.11322362720966339, "log_odds_ratio": -0.8454899787902832, "logits/chosen": -0.1442733108997345, "logits/rejected": -0.19522807002067566, "logps/chosen": -2.834972858428955, "logps/rejected": -2.7100329399108887, "loss": 1.806, "nll_loss": 1.7214466333389282, "rewards/accuracies": 0.375, "rewards/chosen": -0.28349730372428894, "rewards/margins": -0.012493995949625969, "rewards/rejected": -0.2710033059120178, "step": 542 }, { "epoch": 1.486652977412731, "grad_norm": 6.287854194641113, "learning_rate": 9.257534246575342e-07, "log_odds_chosen": -0.04992825537919998, "log_odds_ratio": -0.9168726205825806, "logits/chosen": -0.04068797081708908, "logits/rejected": -0.005922503769397736, "logps/chosen": -2.7617132663726807, "logps/rejected": -2.6343729496002197, "loss": 1.7481, "nll_loss": 1.6563990116119385, "rewards/accuracies": 0.625, "rewards/chosen": -0.27617132663726807, "rewards/margins": -0.012734021060168743, "rewards/rejected": -0.26343733072280884, "step": 543 }, { "epoch": 1.4893908281998631, "grad_norm": 6.907861709594727, "learning_rate": 9.256164383561643e-07, "log_odds_chosen": 0.16681405901908875, "log_odds_ratio": -0.7319722175598145, "logits/chosen": -0.0691518783569336, "logits/rejected": -0.04169797524809837, "logps/chosen": -3.297506332397461, "logps/rejected": -3.450967788696289, "loss": 1.7572, "nll_loss": 1.6840307712554932, "rewards/accuracies": 0.5, "rewards/chosen": -0.329750657081604, "rewards/margins": 0.015346154570579529, "rewards/rejected": -0.3450968265533447, "step": 544 }, { "epoch": 1.4921286789869952, "grad_norm": 5.9958696365356445, "learning_rate": 9.254794520547945e-07, "log_odds_chosen": 0.1915203332901001, "log_odds_ratio": -0.6380472183227539, "logits/chosen": -0.22041070461273193, "logits/rejected": -0.29295265674591064, "logps/chosen": -3.1381731033325195, "logps/rejected": -3.3077945709228516, "loss": 1.8001, "nll_loss": 1.7362632751464844, "rewards/accuracies": 0.5, "rewards/chosen": -0.3138173222541809, "rewards/margins": 0.016962165012955666, "rewards/rejected": -0.33077946305274963, "step": 545 }, { "epoch": 1.4948665297741273, "grad_norm": 5.471934795379639, "learning_rate": 9.253424657534245e-07, "log_odds_chosen": 0.16862481832504272, "log_odds_ratio": -0.8866100907325745, "logits/chosen": 0.03179502114653587, "logits/rejected": -0.052607737481594086, "logps/chosen": -2.7214157581329346, "logps/rejected": -2.863248825073242, "loss": 1.7577, "nll_loss": 1.6690033674240112, "rewards/accuracies": 0.5, "rewards/chosen": -0.27214157581329346, "rewards/margins": 0.014183301478624344, "rewards/rejected": -0.2863248586654663, "step": 546 }, { "epoch": 1.4976043805612593, "grad_norm": 5.338510990142822, "learning_rate": 9.252054794520547e-07, "log_odds_chosen": 0.07100953161716461, "log_odds_ratio": -0.7227138876914978, "logits/chosen": -0.06616472452878952, "logits/rejected": -0.09601257741451263, "logps/chosen": -2.3040504455566406, "logps/rejected": -2.3356432914733887, "loss": 1.7679, "nll_loss": 1.6956390142440796, "rewards/accuracies": 0.5, "rewards/chosen": -0.2304050624370575, "rewards/margins": 0.003159269690513611, "rewards/rejected": -0.2335643321275711, "step": 547 }, { "epoch": 1.5003422313483914, "grad_norm": 6.383164405822754, "learning_rate": 9.250684931506849e-07, "log_odds_chosen": -0.6175957918167114, "log_odds_ratio": -1.2004969120025635, "logits/chosen": -0.16202865540981293, "logits/rejected": -0.09759972989559174, "logps/chosen": -3.346682071685791, "logps/rejected": -2.725285053253174, "loss": 1.8206, "nll_loss": 1.7005807161331177, "rewards/accuracies": 0.375, "rewards/chosen": -0.33466821908950806, "rewards/margins": -0.062139712274074554, "rewards/rejected": -0.2725285291671753, "step": 548 }, { "epoch": 1.5030800821355235, "grad_norm": 5.987927436828613, "learning_rate": 9.249315068493149e-07, "log_odds_chosen": -0.35137298703193665, "log_odds_ratio": -0.9766762256622314, "logits/chosen": -0.21356219053268433, "logits/rejected": -0.19094018638134003, "logps/chosen": -3.1450257301330566, "logps/rejected": -2.8172216415405273, "loss": 1.8116, "nll_loss": 1.713894248008728, "rewards/accuracies": 0.5, "rewards/chosen": -0.3145025670528412, "rewards/margins": -0.03278038650751114, "rewards/rejected": -0.28172218799591064, "step": 549 }, { "epoch": 1.5058179329226558, "grad_norm": 6.189830780029297, "learning_rate": 9.247945205479451e-07, "log_odds_chosen": -0.286736398935318, "log_odds_ratio": -0.9074016213417053, "logits/chosen": -0.20040811598300934, "logits/rejected": -0.09779270738363266, "logps/chosen": -2.875703811645508, "logps/rejected": -2.6156387329101562, "loss": 1.8122, "nll_loss": 1.721508502960205, "rewards/accuracies": 0.25, "rewards/chosen": -0.28757038712501526, "rewards/margins": -0.02600651979446411, "rewards/rejected": -0.26156389713287354, "step": 550 }, { "epoch": 1.5085557837097878, "grad_norm": 5.313136100769043, "learning_rate": 9.246575342465753e-07, "log_odds_chosen": -0.19887912273406982, "log_odds_ratio": -0.8729656934738159, "logits/chosen": 0.004645846784114838, "logits/rejected": -0.04999992623925209, "logps/chosen": -2.7659268379211426, "logps/rejected": -2.563260555267334, "loss": 1.7644, "nll_loss": 1.6771303415298462, "rewards/accuracies": 0.625, "rewards/chosen": -0.2765927016735077, "rewards/margins": -0.02026664838194847, "rewards/rejected": -0.2563260495662689, "step": 551 }, { "epoch": 1.51129363449692, "grad_norm": 6.6751861572265625, "learning_rate": 9.245205479452054e-07, "log_odds_chosen": -0.6525740623474121, "log_odds_ratio": -1.3846880197525024, "logits/chosen": -0.13442906737327576, "logits/rejected": -0.11592091619968414, "logps/chosen": -3.779143810272217, "logps/rejected": -3.1325645446777344, "loss": 1.7685, "nll_loss": 1.6300336122512817, "rewards/accuracies": 0.25, "rewards/chosen": -0.3779143989086151, "rewards/margins": -0.06465792655944824, "rewards/rejected": -0.31325647234916687, "step": 552 }, { "epoch": 1.5140314852840522, "grad_norm": 6.434945106506348, "learning_rate": 9.243835616438355e-07, "log_odds_chosen": -0.049044154584407806, "log_odds_ratio": -0.8271571397781372, "logits/chosen": -0.1287490576505661, "logits/rejected": -0.09885425865650177, "logps/chosen": -2.8920063972473145, "logps/rejected": -2.8115453720092773, "loss": 1.7084, "nll_loss": 1.625664234161377, "rewards/accuracies": 0.75, "rewards/chosen": -0.28920063376426697, "rewards/margins": -0.008046082220971584, "rewards/rejected": -0.2811545729637146, "step": 553 }, { "epoch": 1.5167693360711842, "grad_norm": 6.199353218078613, "learning_rate": 9.242465753424657e-07, "log_odds_chosen": -0.1876334249973297, "log_odds_ratio": -0.8521504402160645, "logits/chosen": -0.16706526279449463, "logits/rejected": -0.10036782175302505, "logps/chosen": -2.9355533123016357, "logps/rejected": -2.7473700046539307, "loss": 1.7647, "nll_loss": 1.6795284748077393, "rewards/accuracies": 0.375, "rewards/chosen": -0.2935553193092346, "rewards/margins": -0.018818335607647896, "rewards/rejected": -0.27473700046539307, "step": 554 }, { "epoch": 1.5195071868583163, "grad_norm": 5.973548889160156, "learning_rate": 9.241095890410958e-07, "log_odds_chosen": 0.39928534626960754, "log_odds_ratio": -0.6544954180717468, "logits/chosen": -0.17233629524707794, "logits/rejected": -0.24981755018234253, "logps/chosen": -3.6028504371643066, "logps/rejected": -3.969956874847412, "loss": 1.7957, "nll_loss": 1.7302567958831787, "rewards/accuracies": 0.625, "rewards/chosen": -0.36028504371643066, "rewards/margins": 0.03671063110232353, "rewards/rejected": -0.3969956934452057, "step": 555 }, { "epoch": 1.5222450376454484, "grad_norm": 5.64979362487793, "learning_rate": 9.239726027397259e-07, "log_odds_chosen": -0.0012522116303443909, "log_odds_ratio": -0.7738747596740723, "logits/chosen": -0.12888294458389282, "logits/rejected": -0.17685559391975403, "logps/chosen": -2.4307022094726562, "logps/rejected": -2.4086828231811523, "loss": 1.6957, "nll_loss": 1.6182790994644165, "rewards/accuracies": 0.75, "rewards/chosen": -0.24307024478912354, "rewards/margins": -0.0022019390016794205, "rewards/rejected": -0.24086830019950867, "step": 556 }, { "epoch": 1.5249828884325805, "grad_norm": 5.65573263168335, "learning_rate": 9.238356164383561e-07, "log_odds_chosen": -0.616959810256958, "log_odds_ratio": -1.243466854095459, "logits/chosen": -0.13047541677951813, "logits/rejected": -0.1840771585702896, "logps/chosen": -3.353749990463257, "logps/rejected": -2.7493021488189697, "loss": 1.8188, "nll_loss": 1.694498062133789, "rewards/accuracies": 0.25, "rewards/chosen": -0.33537501096725464, "rewards/margins": -0.06044478714466095, "rewards/rejected": -0.2749302089214325, "step": 557 }, { "epoch": 1.5277207392197125, "grad_norm": 5.91209602355957, "learning_rate": 9.236986301369862e-07, "log_odds_chosen": 0.1895839124917984, "log_odds_ratio": -0.6153408288955688, "logits/chosen": -0.14035889506340027, "logits/rejected": -0.16124913096427917, "logps/chosen": -2.682772159576416, "logps/rejected": -2.855562686920166, "loss": 1.7166, "nll_loss": 1.6550180912017822, "rewards/accuracies": 0.875, "rewards/chosen": -0.26827722787857056, "rewards/margins": 0.01727902702987194, "rewards/rejected": -0.28555625677108765, "step": 558 }, { "epoch": 1.5304585900068446, "grad_norm": 5.301373481750488, "learning_rate": 9.235616438356164e-07, "log_odds_chosen": -0.32780173420906067, "log_odds_ratio": -0.9651700258255005, "logits/chosen": -0.19480831921100616, "logits/rejected": -0.2517542541027069, "logps/chosen": -2.6590614318847656, "logps/rejected": -2.3234493732452393, "loss": 1.7821, "nll_loss": 1.685545802116394, "rewards/accuracies": 0.375, "rewards/chosen": -0.2659061849117279, "rewards/margins": -0.03356122970581055, "rewards/rejected": -0.23234494030475616, "step": 559 }, { "epoch": 1.5331964407939767, "grad_norm": 6.743518829345703, "learning_rate": 9.234246575342465e-07, "log_odds_chosen": -0.18966224789619446, "log_odds_ratio": -0.904726505279541, "logits/chosen": -0.11199507117271423, "logits/rejected": -0.05495178699493408, "logps/chosen": -3.6883130073547363, "logps/rejected": -3.4952311515808105, "loss": 1.6913, "nll_loss": 1.600874662399292, "rewards/accuracies": 0.375, "rewards/chosen": -0.3688312768936157, "rewards/margins": -0.01930820941925049, "rewards/rejected": -0.34952312707901, "step": 560 }, { "epoch": 1.5359342915811087, "grad_norm": 5.980684757232666, "learning_rate": 9.232876712328766e-07, "log_odds_chosen": 0.04901620000600815, "log_odds_ratio": -0.7894347906112671, "logits/chosen": 0.02157159335911274, "logits/rejected": 0.06557025015354156, "logps/chosen": -2.9291012287139893, "logps/rejected": -2.970494270324707, "loss": 1.6997, "nll_loss": 1.6207613945007324, "rewards/accuracies": 0.5, "rewards/chosen": -0.292910099029541, "rewards/margins": 0.004139300435781479, "rewards/rejected": -0.2970494329929352, "step": 561 }, { "epoch": 1.5386721423682408, "grad_norm": 6.67162561416626, "learning_rate": 9.231506849315069e-07, "log_odds_chosen": -0.5187985301017761, "log_odds_ratio": -1.321599006652832, "logits/chosen": -0.11481209099292755, "logits/rejected": -0.08149734139442444, "logps/chosen": -4.030457496643066, "logps/rejected": -3.5172111988067627, "loss": 1.7681, "nll_loss": 1.6359822750091553, "rewards/accuracies": 0.5, "rewards/chosen": -0.40304574370384216, "rewards/margins": -0.05132461339235306, "rewards/rejected": -0.3517211675643921, "step": 562 }, { "epoch": 1.541409993155373, "grad_norm": 5.670852184295654, "learning_rate": 9.230136986301368e-07, "log_odds_chosen": -0.4203439950942993, "log_odds_ratio": -1.057837724685669, "logits/chosen": -0.2435297667980194, "logits/rejected": -0.23006315529346466, "logps/chosen": -2.903317451477051, "logps/rejected": -2.484002113342285, "loss": 1.7071, "nll_loss": 1.601316213607788, "rewards/accuracies": 0.5, "rewards/chosen": -0.29033175110816956, "rewards/margins": -0.04193154722452164, "rewards/rejected": -0.24840021133422852, "step": 563 }, { "epoch": 1.5441478439425051, "grad_norm": 6.101190090179443, "learning_rate": 9.22876712328767e-07, "log_odds_chosen": -0.41476768255233765, "log_odds_ratio": -1.0184760093688965, "logits/chosen": -0.1659492552280426, "logits/rejected": -0.1298617422580719, "logps/chosen": -3.195120096206665, "logps/rejected": -2.801478624343872, "loss": 1.7836, "nll_loss": 1.6817649602890015, "rewards/accuracies": 0.375, "rewards/chosen": -0.3195120096206665, "rewards/margins": -0.03936413303017616, "rewards/rejected": -0.28014785051345825, "step": 564 }, { "epoch": 1.5468856947296372, "grad_norm": 6.294743537902832, "learning_rate": 9.227397260273973e-07, "log_odds_chosen": -0.12835779786109924, "log_odds_ratio": -1.0219401121139526, "logits/chosen": -0.1567625105381012, "logits/rejected": -0.21275931596755981, "logps/chosen": -3.4084722995758057, "logps/rejected": -3.264997720718384, "loss": 1.7544, "nll_loss": 1.6522489786148071, "rewards/accuracies": 0.5, "rewards/chosen": -0.3408472537994385, "rewards/margins": -0.014347447082400322, "rewards/rejected": -0.3264997899532318, "step": 565 }, { "epoch": 1.5496235455167693, "grad_norm": 5.357804775238037, "learning_rate": 9.226027397260274e-07, "log_odds_chosen": 0.9128842353820801, "log_odds_ratio": -0.6979804635047913, "logits/chosen": -0.010756319388747215, "logits/rejected": -0.007239237427711487, "logps/chosen": -2.5241928100585938, "logps/rejected": -3.321934223175049, "loss": 1.7097, "nll_loss": 1.6398688554763794, "rewards/accuracies": 0.75, "rewards/chosen": -0.25241929292678833, "rewards/margins": 0.07977411150932312, "rewards/rejected": -0.33219340443611145, "step": 566 }, { "epoch": 1.5523613963039016, "grad_norm": 5.147227764129639, "learning_rate": 9.224657534246575e-07, "log_odds_chosen": 0.01978437602519989, "log_odds_ratio": -0.8200136423110962, "logits/chosen": -0.13163521885871887, "logits/rejected": -0.18843534588813782, "logps/chosen": -2.7130305767059326, "logps/rejected": -2.6804986000061035, "loss": 1.7631, "nll_loss": 1.6810940504074097, "rewards/accuracies": 0.625, "rewards/chosen": -0.27130305767059326, "rewards/margins": -0.0032531702890992165, "rewards/rejected": -0.2680498957633972, "step": 567 }, { "epoch": 1.5550992470910336, "grad_norm": 4.895539283752441, "learning_rate": 9.223287671232877e-07, "log_odds_chosen": 0.4459351897239685, "log_odds_ratio": -0.6188564300537109, "logits/chosen": -0.20007944107055664, "logits/rejected": -0.3016687035560608, "logps/chosen": -2.1633529663085938, "logps/rejected": -2.4953293800354004, "loss": 1.699, "nll_loss": 1.6371245384216309, "rewards/accuracies": 0.625, "rewards/chosen": -0.21633529663085938, "rewards/margins": 0.03319764882326126, "rewards/rejected": -0.24953295290470123, "step": 568 }, { "epoch": 1.5578370978781657, "grad_norm": 5.773138999938965, "learning_rate": 9.221917808219178e-07, "log_odds_chosen": 0.2614701986312866, "log_odds_ratio": -0.648228645324707, "logits/chosen": -0.04636281728744507, "logits/rejected": -0.07004893571138382, "logps/chosen": -3.0675783157348633, "logps/rejected": -3.3069138526916504, "loss": 1.7543, "nll_loss": 1.6895087957382202, "rewards/accuracies": 0.75, "rewards/chosen": -0.3067578077316284, "rewards/margins": 0.023933546617627144, "rewards/rejected": -0.3306913375854492, "step": 569 }, { "epoch": 1.5605749486652978, "grad_norm": 5.868766784667969, "learning_rate": 9.220547945205479e-07, "log_odds_chosen": 0.12449084222316742, "log_odds_ratio": -0.700771152973175, "logits/chosen": -0.03384159877896309, "logits/rejected": -0.04082486405968666, "logps/chosen": -3.2579197883605957, "logps/rejected": -3.3500514030456543, "loss": 1.6314, "nll_loss": 1.5612932443618774, "rewards/accuracies": 0.5, "rewards/chosen": -0.32579201459884644, "rewards/margins": 0.00921315886080265, "rewards/rejected": -0.33500516414642334, "step": 570 }, { "epoch": 1.5633127994524298, "grad_norm": 6.887150287628174, "learning_rate": 9.219178082191781e-07, "log_odds_chosen": -0.3615228831768036, "log_odds_ratio": -1.0083014965057373, "logits/chosen": -0.09729386121034622, "logits/rejected": -0.0034700408577919006, "logps/chosen": -3.5638575553894043, "logps/rejected": -3.204775333404541, "loss": 1.6905, "nll_loss": 1.5896718502044678, "rewards/accuracies": 0.375, "rewards/chosen": -0.3563857674598694, "rewards/margins": -0.03590821474790573, "rewards/rejected": -0.32047754526138306, "step": 571 }, { "epoch": 1.566050650239562, "grad_norm": 5.8733344078063965, "learning_rate": 9.217808219178082e-07, "log_odds_chosen": 0.31549978256225586, "log_odds_ratio": -0.6049168109893799, "logits/chosen": -0.20444127917289734, "logits/rejected": -0.07118034362792969, "logps/chosen": -2.599975109100342, "logps/rejected": -2.8938331604003906, "loss": 1.6525, "nll_loss": 1.5919787883758545, "rewards/accuracies": 0.75, "rewards/chosen": -0.25999751687049866, "rewards/margins": 0.029385795816779137, "rewards/rejected": -0.28938332200050354, "step": 572 }, { "epoch": 1.568788501026694, "grad_norm": 6.827439308166504, "learning_rate": 9.216438356164384e-07, "log_odds_chosen": -0.3058663010597229, "log_odds_ratio": -1.0957685708999634, "logits/chosen": -0.054412953555583954, "logits/rejected": -0.12372830510139465, "logps/chosen": -3.2115793228149414, "logps/rejected": -2.8790762424468994, "loss": 1.6631, "nll_loss": 1.5535147190093994, "rewards/accuracies": 0.375, "rewards/chosen": -0.32115796208381653, "rewards/margins": -0.0332503467798233, "rewards/rejected": -0.28790760040283203, "step": 573 }, { "epoch": 1.571526351813826, "grad_norm": 5.3873491287231445, "learning_rate": 9.215068493150685e-07, "log_odds_chosen": 0.22389879822731018, "log_odds_ratio": -0.6506476402282715, "logits/chosen": -0.14367379248142242, "logits/rejected": -0.16043692827224731, "logps/chosen": -2.359098196029663, "logps/rejected": -2.527118682861328, "loss": 1.6407, "nll_loss": 1.5756522417068481, "rewards/accuracies": 0.375, "rewards/chosen": -0.2359098345041275, "rewards/margins": 0.016802042722702026, "rewards/rejected": -0.25271186232566833, "step": 574 }, { "epoch": 1.5742642026009581, "grad_norm": 5.71211051940918, "learning_rate": 9.213698630136986e-07, "log_odds_chosen": 0.8340956568717957, "log_odds_ratio": -0.5813170671463013, "logits/chosen": -0.1741708666086197, "logits/rejected": -0.22509752213954926, "logps/chosen": -3.342867851257324, "logps/rejected": -4.143290996551514, "loss": 1.7115, "nll_loss": 1.6533641815185547, "rewards/accuracies": 0.625, "rewards/chosen": -0.33428680896759033, "rewards/margins": 0.08004231005907059, "rewards/rejected": -0.4143291115760803, "step": 575 }, { "epoch": 1.5770020533880902, "grad_norm": 6.2557244300842285, "learning_rate": 9.212328767123288e-07, "log_odds_chosen": -0.02790084481239319, "log_odds_ratio": -0.774125874042511, "logits/chosen": -0.11701609194278717, "logits/rejected": -0.045969463884830475, "logps/chosen": -2.749264717102051, "logps/rejected": -2.6912100315093994, "loss": 1.6712, "nll_loss": 1.5938174724578857, "rewards/accuracies": 0.5, "rewards/chosen": -0.27492645382881165, "rewards/margins": -0.005805464461445808, "rewards/rejected": -0.269120991230011, "step": 576 }, { "epoch": 1.5797399041752225, "grad_norm": 4.948258876800537, "learning_rate": 9.210958904109588e-07, "log_odds_chosen": 0.12445394694805145, "log_odds_ratio": -0.7006542682647705, "logits/chosen": -0.1819838583469391, "logits/rejected": -0.16300907731056213, "logps/chosen": -2.4778151512145996, "logps/rejected": -2.5618793964385986, "loss": 1.7781, "nll_loss": 1.7080490589141846, "rewards/accuracies": 0.375, "rewards/chosen": -0.24778153002262115, "rewards/margins": 0.008406398817896843, "rewards/rejected": -0.25618791580200195, "step": 577 }, { "epoch": 1.5824777549623545, "grad_norm": 5.7212677001953125, "learning_rate": 9.20958904109589e-07, "log_odds_chosen": 0.9202393293380737, "log_odds_ratio": -0.358228862285614, "logits/chosen": -0.07554472982883453, "logits/rejected": -0.01716550812125206, "logps/chosen": -2.048530101776123, "logps/rejected": -2.8485910892486572, "loss": 1.5251, "nll_loss": 1.4892743825912476, "rewards/accuracies": 1.0, "rewards/chosen": -0.20485299825668335, "rewards/margins": 0.08000612258911133, "rewards/rejected": -0.2848591208457947, "step": 578 }, { "epoch": 1.5852156057494866, "grad_norm": 5.111844062805176, "learning_rate": 9.208219178082192e-07, "log_odds_chosen": 0.1980634182691574, "log_odds_ratio": -0.6500450372695923, "logits/chosen": -0.10326512902975082, "logits/rejected": -0.16045205295085907, "logps/chosen": -2.6828384399414062, "logps/rejected": -2.8414525985717773, "loss": 1.7212, "nll_loss": 1.656214714050293, "rewards/accuracies": 0.75, "rewards/chosen": -0.2682838439941406, "rewards/margins": 0.01586141064763069, "rewards/rejected": -0.2841452956199646, "step": 579 }, { "epoch": 1.587953456536619, "grad_norm": 5.140942573547363, "learning_rate": 9.206849315068493e-07, "log_odds_chosen": 0.07188227772712708, "log_odds_ratio": -0.7304526567459106, "logits/chosen": -0.09617263078689575, "logits/rejected": -0.11400917172431946, "logps/chosen": -2.7817444801330566, "logps/rejected": -2.8392319679260254, "loss": 1.6668, "nll_loss": 1.593776822090149, "rewards/accuracies": 0.375, "rewards/chosen": -0.27817443013191223, "rewards/margins": 0.005748753435909748, "rewards/rejected": -0.2839232087135315, "step": 580 }, { "epoch": 1.590691307323751, "grad_norm": 5.995230197906494, "learning_rate": 9.205479452054794e-07, "log_odds_chosen": 0.19295679032802582, "log_odds_ratio": -0.7972798943519592, "logits/chosen": -0.1397612988948822, "logits/rejected": -0.07367043942213058, "logps/chosen": -3.288881778717041, "logps/rejected": -3.476897716522217, "loss": 1.6644, "nll_loss": 1.584678053855896, "rewards/accuracies": 0.625, "rewards/chosen": -0.3288882076740265, "rewards/margins": 0.018801569938659668, "rewards/rejected": -0.34768977761268616, "step": 581 }, { "epoch": 1.593429158110883, "grad_norm": 5.387901782989502, "learning_rate": 9.204109589041096e-07, "log_odds_chosen": 0.2207847535610199, "log_odds_ratio": -0.6064544916152954, "logits/chosen": -0.198918879032135, "logits/rejected": -0.27749747037887573, "logps/chosen": -2.715135097503662, "logps/rejected": -2.913048267364502, "loss": 1.648, "nll_loss": 1.5873608589172363, "rewards/accuracies": 0.625, "rewards/chosen": -0.27151352167129517, "rewards/margins": 0.019791321828961372, "rewards/rejected": -0.2913048267364502, "step": 582 }, { "epoch": 1.596167008898015, "grad_norm": 6.657504558563232, "learning_rate": 9.202739726027397e-07, "log_odds_chosen": -0.8963413238525391, "log_odds_ratio": -1.3302841186523438, "logits/chosen": -0.14749102294445038, "logits/rejected": -0.1563408374786377, "logps/chosen": -3.2985432147979736, "logps/rejected": -2.438049077987671, "loss": 1.7174, "nll_loss": 1.5843815803527832, "rewards/accuracies": 0.125, "rewards/chosen": -0.3298543691635132, "rewards/margins": -0.08604943007230759, "rewards/rejected": -0.2438049167394638, "step": 583 }, { "epoch": 1.5989048596851472, "grad_norm": 6.300824165344238, "learning_rate": 9.201369863013698e-07, "log_odds_chosen": -0.06973327696323395, "log_odds_ratio": -1.0830323696136475, "logits/chosen": -0.11784392595291138, "logits/rejected": -0.06804263591766357, "logps/chosen": -3.5877110958099365, "logps/rejected": -3.4909558296203613, "loss": 1.6876, "nll_loss": 1.5793201923370361, "rewards/accuracies": 0.375, "rewards/chosen": -0.35877111554145813, "rewards/margins": -0.009675553068518639, "rewards/rejected": -0.34909558296203613, "step": 584 }, { "epoch": 1.6016427104722792, "grad_norm": 5.36124324798584, "learning_rate": 9.2e-07, "log_odds_chosen": 0.2815913259983063, "log_odds_ratio": -0.7372855544090271, "logits/chosen": 0.030539557337760925, "logits/rejected": 0.04574422538280487, "logps/chosen": -2.503431797027588, "logps/rejected": -2.7312886714935303, "loss": 1.6404, "nll_loss": 1.5666637420654297, "rewards/accuracies": 0.625, "rewards/chosen": -0.2503431737422943, "rewards/margins": 0.022785693407058716, "rewards/rejected": -0.273128867149353, "step": 585 }, { "epoch": 1.6043805612594113, "grad_norm": 8.418536186218262, "learning_rate": 9.198630136986301e-07, "log_odds_chosen": -0.7743750214576721, "log_odds_ratio": -1.4363672733306885, "logits/chosen": -0.029308674857020378, "logits/rejected": 0.022293850779533386, "logps/chosen": -4.675793170928955, "logps/rejected": -3.901719093322754, "loss": 1.7423, "nll_loss": 1.5986313819885254, "rewards/accuracies": 0.5, "rewards/chosen": -0.46757933497428894, "rewards/margins": -0.07740740478038788, "rewards/rejected": -0.39017191529273987, "step": 586 }, { "epoch": 1.6071184120465434, "grad_norm": 6.1586737632751465, "learning_rate": 9.197260273972603e-07, "log_odds_chosen": -0.5202854871749878, "log_odds_ratio": -1.089689016342163, "logits/chosen": -0.08953694254159927, "logits/rejected": -0.036313511431217194, "logps/chosen": -3.5655806064605713, "logps/rejected": -3.0308637619018555, "loss": 1.7163, "nll_loss": 1.6073002815246582, "rewards/accuracies": 0.5, "rewards/chosen": -0.35655805468559265, "rewards/margins": -0.0534716472029686, "rewards/rejected": -0.30308640003204346, "step": 587 }, { "epoch": 1.6098562628336754, "grad_norm": 5.265634059906006, "learning_rate": 9.195890410958904e-07, "log_odds_chosen": 0.4456843435764313, "log_odds_ratio": -0.59765625, "logits/chosen": -0.01034482941031456, "logits/rejected": 0.02895023301243782, "logps/chosen": -2.651923894882202, "logps/rejected": -3.063772678375244, "loss": 1.6264, "nll_loss": 1.5666345357894897, "rewards/accuracies": 0.75, "rewards/chosen": -0.2651923894882202, "rewards/margins": 0.04118485748767853, "rewards/rejected": -0.30637726187705994, "step": 588 }, { "epoch": 1.6125941136208075, "grad_norm": 5.86743688583374, "learning_rate": 9.194520547945205e-07, "log_odds_chosen": -0.1487271785736084, "log_odds_ratio": -0.9040473699569702, "logits/chosen": 0.050581954419612885, "logits/rejected": -0.001621730625629425, "logps/chosen": -2.9321351051330566, "logps/rejected": -2.7420578002929688, "loss": 1.7456, "nll_loss": 1.6551471948623657, "rewards/accuracies": 0.625, "rewards/chosen": -0.29321354627609253, "rewards/margins": -0.019007738679647446, "rewards/rejected": -0.2742058038711548, "step": 589 }, { "epoch": 1.6153319644079398, "grad_norm": 4.946707248687744, "learning_rate": 9.193150684931507e-07, "log_odds_chosen": -0.10562245547771454, "log_odds_ratio": -0.7689120769500732, "logits/chosen": -0.12946254014968872, "logits/rejected": -0.19102001190185547, "logps/chosen": -3.1823315620422363, "logps/rejected": -3.0798728466033936, "loss": 1.7178, "nll_loss": 1.6408894062042236, "rewards/accuracies": 0.125, "rewards/chosen": -0.3182331919670105, "rewards/margins": -0.01024589128792286, "rewards/rejected": -0.3079872727394104, "step": 590 }, { "epoch": 1.6180698151950719, "grad_norm": 5.420330047607422, "learning_rate": 9.191780821917808e-07, "log_odds_chosen": 0.23436690866947174, "log_odds_ratio": -0.6702085137367249, "logits/chosen": -0.07390481233596802, "logits/rejected": -0.06985104829072952, "logps/chosen": -2.825086832046509, "logps/rejected": -3.0248773097991943, "loss": 1.644, "nll_loss": 1.5769726037979126, "rewards/accuracies": 0.5, "rewards/chosen": -0.2825087010860443, "rewards/margins": 0.019979044795036316, "rewards/rejected": -0.30248773097991943, "step": 591 }, { "epoch": 1.620807665982204, "grad_norm": 5.777070045471191, "learning_rate": 9.190410958904109e-07, "log_odds_chosen": 0.4491695165634155, "log_odds_ratio": -0.6843962073326111, "logits/chosen": -0.1328037530183792, "logits/rejected": -0.16870945692062378, "logps/chosen": -3.247144937515259, "logps/rejected": -3.6810927391052246, "loss": 1.6771, "nll_loss": 1.6086113452911377, "rewards/accuracies": 0.625, "rewards/chosen": -0.3247145116329193, "rewards/margins": 0.0433947890996933, "rewards/rejected": -0.3681092858314514, "step": 592 }, { "epoch": 1.6235455167693362, "grad_norm": 6.617186546325684, "learning_rate": 9.189041095890411e-07, "log_odds_chosen": 0.11982697993516922, "log_odds_ratio": -0.6906243562698364, "logits/chosen": -0.06285107135772705, "logits/rejected": 0.05270655080676079, "logps/chosen": -3.440761089324951, "logps/rejected": -3.5614449977874756, "loss": 1.6035, "nll_loss": 1.5344257354736328, "rewards/accuracies": 0.625, "rewards/chosen": -0.34407609701156616, "rewards/margins": 0.01206839270889759, "rewards/rejected": -0.3561444878578186, "step": 593 }, { "epoch": 1.6262833675564683, "grad_norm": 6.865300178527832, "learning_rate": 9.187671232876712e-07, "log_odds_chosen": -0.5955897569656372, "log_odds_ratio": -1.3210490942001343, "logits/chosen": -0.00735735148191452, "logits/rejected": 0.07753807306289673, "logps/chosen": -3.6951422691345215, "logps/rejected": -3.1250646114349365, "loss": 1.6005, "nll_loss": 1.46839439868927, "rewards/accuracies": 0.375, "rewards/chosen": -0.36951422691345215, "rewards/margins": -0.05700777471065521, "rewards/rejected": -0.31250643730163574, "step": 594 }, { "epoch": 1.6290212183436004, "grad_norm": 5.350645065307617, "learning_rate": 9.186301369863013e-07, "log_odds_chosen": 0.35773569345474243, "log_odds_ratio": -0.5584431886672974, "logits/chosen": -0.03796667233109474, "logits/rejected": 0.00853799283504486, "logps/chosen": -1.8138012886047363, "logps/rejected": -2.1038835048675537, "loss": 1.5771, "nll_loss": 1.5212936401367188, "rewards/accuracies": 0.875, "rewards/chosen": -0.18138013780117035, "rewards/margins": 0.029008222743868828, "rewards/rejected": -0.21038836240768433, "step": 595 }, { "epoch": 1.6317590691307324, "grad_norm": 5.699516773223877, "learning_rate": 9.184931506849315e-07, "log_odds_chosen": -0.48873472213745117, "log_odds_ratio": -1.067962646484375, "logits/chosen": -0.15441395342350006, "logits/rejected": -0.09353327751159668, "logps/chosen": -3.0943734645843506, "logps/rejected": -2.6236824989318848, "loss": 1.6538, "nll_loss": 1.5470339059829712, "rewards/accuracies": 0.5, "rewards/chosen": -0.3094373643398285, "rewards/margins": -0.04706910252571106, "rewards/rejected": -0.26236823201179504, "step": 596 }, { "epoch": 1.6344969199178645, "grad_norm": 5.18526554107666, "learning_rate": 9.183561643835616e-07, "log_odds_chosen": -0.5083591938018799, "log_odds_ratio": -1.0808441638946533, "logits/chosen": -0.013002146035432816, "logits/rejected": -0.0004228837788105011, "logps/chosen": -2.6292057037353516, "logps/rejected": -2.158355474472046, "loss": 1.7146, "nll_loss": 1.6065617799758911, "rewards/accuracies": 0.25, "rewards/chosen": -0.2629205584526062, "rewards/margins": -0.047085002064704895, "rewards/rejected": -0.2158355414867401, "step": 597 }, { "epoch": 1.6372347707049966, "grad_norm": 5.408290386199951, "learning_rate": 9.182191780821917e-07, "log_odds_chosen": 0.4361186623573303, "log_odds_ratio": -0.5443177819252014, "logits/chosen": -0.026608657091856003, "logits/rejected": 0.017698638141155243, "logps/chosen": -2.728701591491699, "logps/rejected": -3.1139166355133057, "loss": 1.5589, "nll_loss": 1.504428505897522, "rewards/accuracies": 0.75, "rewards/chosen": -0.27287015318870544, "rewards/margins": 0.038521502166986465, "rewards/rejected": -0.3113916516304016, "step": 598 }, { "epoch": 1.6399726214921286, "grad_norm": 6.265475273132324, "learning_rate": 9.180821917808219e-07, "log_odds_chosen": -0.09191690385341644, "log_odds_ratio": -1.0200823545455933, "logits/chosen": -0.11044599115848541, "logits/rejected": -0.13376860320568085, "logps/chosen": -3.130113124847412, "logps/rejected": -2.9712584018707275, "loss": 1.6207, "nll_loss": 1.5187301635742188, "rewards/accuracies": 0.5, "rewards/chosen": -0.3130113184452057, "rewards/margins": -0.015885457396507263, "rewards/rejected": -0.29712584614753723, "step": 599 }, { "epoch": 1.6427104722792607, "grad_norm": 6.122819423675537, "learning_rate": 9.17945205479452e-07, "log_odds_chosen": 0.5793256759643555, "log_odds_ratio": -0.6254332661628723, "logits/chosen": -0.05587322637438774, "logits/rejected": -0.06221120432019234, "logps/chosen": -2.912888526916504, "logps/rejected": -3.429121494293213, "loss": 1.5332, "nll_loss": 1.4706605672836304, "rewards/accuracies": 0.625, "rewards/chosen": -0.2912888526916504, "rewards/margins": 0.051623307168483734, "rewards/rejected": -0.34291213750839233, "step": 600 }, { "epoch": 1.6454483230663928, "grad_norm": 5.874113082885742, "learning_rate": 9.178082191780822e-07, "log_odds_chosen": -0.14976640045642853, "log_odds_ratio": -0.8223494291305542, "logits/chosen": -0.11990250647068024, "logits/rejected": -0.0287339948117733, "logps/chosen": -2.9006311893463135, "logps/rejected": -2.769075393676758, "loss": 1.5642, "nll_loss": 1.4819457530975342, "rewards/accuracies": 0.375, "rewards/chosen": -0.29006314277648926, "rewards/margins": -0.013155581429600716, "rewards/rejected": -0.2769075632095337, "step": 601 }, { "epoch": 1.6481861738535248, "grad_norm": 5.799101829528809, "learning_rate": 9.176712328767123e-07, "log_odds_chosen": -0.41039395332336426, "log_odds_ratio": -1.0071704387664795, "logits/chosen": -0.13956767320632935, "logits/rejected": -0.12278331816196442, "logps/chosen": -3.5598807334899902, "logps/rejected": -3.1625003814697266, "loss": 1.6991, "nll_loss": 1.598389744758606, "rewards/accuracies": 0.375, "rewards/chosen": -0.355988085269928, "rewards/margins": -0.0397380106151104, "rewards/rejected": -0.3162500262260437, "step": 602 }, { "epoch": 1.6509240246406571, "grad_norm": 6.176170825958252, "learning_rate": 9.175342465753424e-07, "log_odds_chosen": 0.2335076928138733, "log_odds_ratio": -0.7827848196029663, "logits/chosen": 0.07109338790178299, "logits/rejected": 0.03633267804980278, "logps/chosen": -3.104407787322998, "logps/rejected": -3.259334087371826, "loss": 1.5611, "nll_loss": 1.482800006866455, "rewards/accuracies": 0.5, "rewards/chosen": -0.3104407787322998, "rewards/margins": 0.015492644160985947, "rewards/rejected": -0.32593342661857605, "step": 603 }, { "epoch": 1.6536618754277892, "grad_norm": 5.437766075134277, "learning_rate": 9.173972602739726e-07, "log_odds_chosen": 0.3834731876850128, "log_odds_ratio": -0.8956271409988403, "logits/chosen": -0.09375391900539398, "logits/rejected": -0.03290870785713196, "logps/chosen": -2.7443671226501465, "logps/rejected": -3.0971083641052246, "loss": 1.6192, "nll_loss": 1.5296282768249512, "rewards/accuracies": 0.5, "rewards/chosen": -0.27443671226501465, "rewards/margins": 0.03527410328388214, "rewards/rejected": -0.3097108006477356, "step": 604 }, { "epoch": 1.6563997262149213, "grad_norm": 6.118452548980713, "learning_rate": 9.172602739726027e-07, "log_odds_chosen": -0.21042820811271667, "log_odds_ratio": -0.9593822956085205, "logits/chosen": -0.025633398443460464, "logits/rejected": 0.04593691974878311, "logps/chosen": -3.014700412750244, "logps/rejected": -2.8203670978546143, "loss": 1.5116, "nll_loss": 1.4156451225280762, "rewards/accuracies": 0.625, "rewards/chosen": -0.3014700412750244, "rewards/margins": -0.01943332329392433, "rewards/rejected": -0.2820367217063904, "step": 605 }, { "epoch": 1.6591375770020536, "grad_norm": 5.080877780914307, "learning_rate": 9.171232876712328e-07, "log_odds_chosen": 0.6664682030677795, "log_odds_ratio": -0.4848993718624115, "logits/chosen": -0.09553079307079315, "logits/rejected": -0.1077350452542305, "logps/chosen": -1.677633285522461, "logps/rejected": -2.2250101566314697, "loss": 1.5598, "nll_loss": 1.5113152265548706, "rewards/accuracies": 0.875, "rewards/chosen": -0.1677633374929428, "rewards/margins": 0.05473768338561058, "rewards/rejected": -0.2225010097026825, "step": 606 }, { "epoch": 1.6618754277891856, "grad_norm": 5.0682148933410645, "learning_rate": 9.16986301369863e-07, "log_odds_chosen": -0.013035416603088379, "log_odds_ratio": -0.8426463007926941, "logits/chosen": -0.03630916029214859, "logits/rejected": -0.06789195537567139, "logps/chosen": -2.732898235321045, "logps/rejected": -2.7183356285095215, "loss": 1.6289, "nll_loss": 1.5446592569351196, "rewards/accuracies": 0.625, "rewards/chosen": -0.27328982949256897, "rewards/margins": -0.0014562606811523438, "rewards/rejected": -0.271833598613739, "step": 607 }, { "epoch": 1.6646132785763177, "grad_norm": 5.526639461517334, "learning_rate": 9.168493150684931e-07, "log_odds_chosen": -0.3553718030452728, "log_odds_ratio": -1.0430439710617065, "logits/chosen": 0.005685336887836456, "logits/rejected": 0.05490195006132126, "logps/chosen": -3.3237342834472656, "logps/rejected": -2.9521193504333496, "loss": 1.7093, "nll_loss": 1.604962944984436, "rewards/accuracies": 0.5, "rewards/chosen": -0.3323734402656555, "rewards/margins": -0.037161462008953094, "rewards/rejected": -0.2952119708061218, "step": 608 }, { "epoch": 1.6673511293634498, "grad_norm": 5.2227911949157715, "learning_rate": 9.167123287671232e-07, "log_odds_chosen": 0.5679910778999329, "log_odds_ratio": -0.5127849578857422, "logits/chosen": -0.13264510035514832, "logits/rejected": -0.11106493324041367, "logps/chosen": -2.597869873046875, "logps/rejected": -3.1220006942749023, "loss": 1.5238, "nll_loss": 1.472508192062378, "rewards/accuracies": 0.75, "rewards/chosen": -0.2597869634628296, "rewards/margins": 0.052413105964660645, "rewards/rejected": -0.3122000992298126, "step": 609 }, { "epoch": 1.6700889801505818, "grad_norm": 6.2200446128845215, "learning_rate": 9.165753424657534e-07, "log_odds_chosen": 0.729231059551239, "log_odds_ratio": -0.5090463161468506, "logits/chosen": -0.015351220965385437, "logits/rejected": 0.06166470795869827, "logps/chosen": -2.3098504543304443, "logps/rejected": -2.95786714553833, "loss": 1.5279, "nll_loss": 1.4769670963287354, "rewards/accuracies": 0.75, "rewards/chosen": -0.23098506033420563, "rewards/margins": 0.0648016631603241, "rewards/rejected": -0.2957867383956909, "step": 610 }, { "epoch": 1.672826830937714, "grad_norm": 5.039375305175781, "learning_rate": 9.164383561643835e-07, "log_odds_chosen": -0.03872007131576538, "log_odds_ratio": -0.7970093488693237, "logits/chosen": -0.11532717943191528, "logits/rejected": -0.19295720756053925, "logps/chosen": -2.66862154006958, "logps/rejected": -2.62162709236145, "loss": 1.6775, "nll_loss": 1.5978127717971802, "rewards/accuracies": 0.625, "rewards/chosen": -0.2668621242046356, "rewards/margins": -0.004699430428445339, "rewards/rejected": -0.2621626853942871, "step": 611 }, { "epoch": 1.675564681724846, "grad_norm": 5.7827372550964355, "learning_rate": 9.163013698630136e-07, "log_odds_chosen": -0.616951584815979, "log_odds_ratio": -1.1393768787384033, "logits/chosen": 0.029959287494421005, "logits/rejected": 0.08004105091094971, "logps/chosen": -3.093214988708496, "logps/rejected": -2.51406192779541, "loss": 1.5866, "nll_loss": 1.4726779460906982, "rewards/accuracies": 0.375, "rewards/chosen": -0.3093215227127075, "rewards/margins": -0.05791531130671501, "rewards/rejected": -0.251406192779541, "step": 612 }, { "epoch": 1.678302532511978, "grad_norm": 5.736118793487549, "learning_rate": 9.161643835616438e-07, "log_odds_chosen": -0.35032981634140015, "log_odds_ratio": -1.0981552600860596, "logits/chosen": -0.03381669521331787, "logits/rejected": -0.006166486069560051, "logps/chosen": -3.4634034633636475, "logps/rejected": -3.1021580696105957, "loss": 1.6158, "nll_loss": 1.5060179233551025, "rewards/accuracies": 0.375, "rewards/chosen": -0.3463403582572937, "rewards/margins": -0.03612452745437622, "rewards/rejected": -0.3102158308029175, "step": 613 }, { "epoch": 1.68104038329911, "grad_norm": 5.28913688659668, "learning_rate": 9.160273972602739e-07, "log_odds_chosen": 0.25360628962516785, "log_odds_ratio": -0.6763423681259155, "logits/chosen": -0.12703751027584076, "logits/rejected": -0.19882607460021973, "logps/chosen": -2.412409782409668, "logps/rejected": -2.636446475982666, "loss": 1.5794, "nll_loss": 1.511770248413086, "rewards/accuracies": 0.625, "rewards/chosen": -0.241240993142128, "rewards/margins": 0.02240367792546749, "rewards/rejected": -0.2636446952819824, "step": 614 }, { "epoch": 1.6837782340862422, "grad_norm": 5.049253940582275, "learning_rate": 9.158904109589041e-07, "log_odds_chosen": -0.14426255226135254, "log_odds_ratio": -0.8707355260848999, "logits/chosen": -0.13974446058273315, "logits/rejected": -0.13984838128089905, "logps/chosen": -2.8807971477508545, "logps/rejected": -2.737337112426758, "loss": 1.6878, "nll_loss": 1.600682020187378, "rewards/accuracies": 0.5, "rewards/chosen": -0.2880796790122986, "rewards/margins": -0.01434599980711937, "rewards/rejected": -0.2737337052822113, "step": 615 }, { "epoch": 1.6865160848733745, "grad_norm": 4.4845709800720215, "learning_rate": 9.157534246575342e-07, "log_odds_chosen": 0.6815441846847534, "log_odds_ratio": -0.5713246464729309, "logits/chosen": -0.021792009472846985, "logits/rejected": -0.1723494827747345, "logps/chosen": -2.174199104309082, "logps/rejected": -2.80963397026062, "loss": 1.6182, "nll_loss": 1.5610325336456299, "rewards/accuracies": 0.75, "rewards/chosen": -0.21741992235183716, "rewards/margins": 0.06354348361492157, "rewards/rejected": -0.28096339106559753, "step": 616 }, { "epoch": 1.6892539356605065, "grad_norm": 5.605014801025391, "learning_rate": 9.156164383561643e-07, "log_odds_chosen": 0.12568266689777374, "log_odds_ratio": -0.8004844188690186, "logits/chosen": 0.035184115171432495, "logits/rejected": 0.08742070943117142, "logps/chosen": -2.9811482429504395, "logps/rejected": -3.0615758895874023, "loss": 1.577, "nll_loss": 1.4969863891601562, "rewards/accuracies": 0.75, "rewards/chosen": -0.2981148064136505, "rewards/margins": 0.00804278627038002, "rewards/rejected": -0.30615758895874023, "step": 617 }, { "epoch": 1.6919917864476386, "grad_norm": 4.782644271850586, "learning_rate": 9.154794520547945e-07, "log_odds_chosen": 0.3735918402671814, "log_odds_ratio": -0.6292477250099182, "logits/chosen": -0.020856741815805435, "logits/rejected": -0.07886208593845367, "logps/chosen": -2.4124791622161865, "logps/rejected": -2.7361745834350586, "loss": 1.6139, "nll_loss": 1.5509281158447266, "rewards/accuracies": 0.75, "rewards/chosen": -0.24124793708324432, "rewards/margins": 0.032369524240493774, "rewards/rejected": -0.2736174464225769, "step": 618 }, { "epoch": 1.6947296372347707, "grad_norm": 5.747493267059326, "learning_rate": 9.153424657534246e-07, "log_odds_chosen": 0.20971931517124176, "log_odds_ratio": -0.6522649526596069, "logits/chosen": -0.20417636632919312, "logits/rejected": -0.1593794971704483, "logps/chosen": -2.672605037689209, "logps/rejected": -2.8598055839538574, "loss": 1.5642, "nll_loss": 1.4990122318267822, "rewards/accuracies": 0.625, "rewards/chosen": -0.26726049184799194, "rewards/margins": 0.018720097839832306, "rewards/rejected": -0.28598058223724365, "step": 619 }, { "epoch": 1.697467488021903, "grad_norm": 5.833572864532471, "learning_rate": 9.152054794520547e-07, "log_odds_chosen": -0.08016437292098999, "log_odds_ratio": -0.8148267865180969, "logits/chosen": -0.05957941338419914, "logits/rejected": 0.010561231523752213, "logps/chosen": -3.2641236782073975, "logps/rejected": -3.1679768562316895, "loss": 1.6489, "nll_loss": 1.5674004554748535, "rewards/accuracies": 0.5, "rewards/chosen": -0.3264123499393463, "rewards/margins": -0.009614676237106323, "rewards/rejected": -0.3167977035045624, "step": 620 }, { "epoch": 1.700205338809035, "grad_norm": 5.37935733795166, "learning_rate": 9.150684931506849e-07, "log_odds_chosen": 0.507736086845398, "log_odds_ratio": -0.6939529180526733, "logits/chosen": -0.10819629579782486, "logits/rejected": -0.10410093516111374, "logps/chosen": -2.551767587661743, "logps/rejected": -2.9519214630126953, "loss": 1.5363, "nll_loss": 1.4669041633605957, "rewards/accuracies": 0.625, "rewards/chosen": -0.25517672300338745, "rewards/margins": 0.04001540690660477, "rewards/rejected": -0.2951921224594116, "step": 621 }, { "epoch": 1.702943189596167, "grad_norm": 5.953371524810791, "learning_rate": 9.149315068493151e-07, "log_odds_chosen": -0.4248729348182678, "log_odds_ratio": -1.2529798746109009, "logits/chosen": -0.1419530212879181, "logits/rejected": -0.11698786914348602, "logps/chosen": -3.406350612640381, "logps/rejected": -2.9897923469543457, "loss": 1.7203, "nll_loss": 1.594953179359436, "rewards/accuracies": 0.5, "rewards/chosen": -0.3406350612640381, "rewards/margins": -0.04165581613779068, "rewards/rejected": -0.2989792823791504, "step": 622 }, { "epoch": 1.7056810403832992, "grad_norm": 4.79548454284668, "learning_rate": 9.147945205479451e-07, "log_odds_chosen": 0.45546862483024597, "log_odds_ratio": -0.5916121006011963, "logits/chosen": -0.09719938039779663, "logits/rejected": -0.1215277910232544, "logps/chosen": -2.72908878326416, "logps/rejected": -3.152897834777832, "loss": 1.5988, "nll_loss": 1.5396642684936523, "rewards/accuracies": 0.75, "rewards/chosen": -0.27290886640548706, "rewards/margins": 0.0423809289932251, "rewards/rejected": -0.31528979539871216, "step": 623 }, { "epoch": 1.7084188911704312, "grad_norm": 6.8021745681762695, "learning_rate": 9.146575342465753e-07, "log_odds_chosen": -0.1782481074333191, "log_odds_ratio": -1.1300065517425537, "logits/chosen": -0.07894186675548553, "logits/rejected": 0.054212819784879684, "logps/chosen": -3.5169622898101807, "logps/rejected": -3.339001178741455, "loss": 1.4786, "nll_loss": 1.3656033277511597, "rewards/accuracies": 0.375, "rewards/chosen": -0.35169628262519836, "rewards/margins": -0.017796112224459648, "rewards/rejected": -0.33390012383461, "step": 624 }, { "epoch": 1.7111567419575633, "grad_norm": 5.6848273277282715, "learning_rate": 9.145205479452054e-07, "log_odds_chosen": -0.6175681352615356, "log_odds_ratio": -1.2025762796401978, "logits/chosen": -0.17699119448661804, "logits/rejected": -0.0724896714091301, "logps/chosen": -2.966163158416748, "logps/rejected": -2.379868745803833, "loss": 1.5978, "nll_loss": 1.4775376319885254, "rewards/accuracies": 0.375, "rewards/chosen": -0.2966163456439972, "rewards/margins": -0.058629486709833145, "rewards/rejected": -0.23798686265945435, "step": 625 }, { "epoch": 1.7138945927446954, "grad_norm": 4.294968605041504, "learning_rate": 9.143835616438355e-07, "log_odds_chosen": 0.4842681586742401, "log_odds_ratio": -0.5196366310119629, "logits/chosen": -0.09469746053218842, "logits/rejected": -0.26791107654571533, "logps/chosen": -1.8945358991622925, "logps/rejected": -2.2968051433563232, "loss": 1.5811, "nll_loss": 1.5291539430618286, "rewards/accuracies": 0.75, "rewards/chosen": -0.189453586935997, "rewards/margins": 0.04022693634033203, "rewards/rejected": -0.22968053817749023, "step": 626 }, { "epoch": 1.7166324435318274, "grad_norm": 5.996635437011719, "learning_rate": 9.142465753424657e-07, "log_odds_chosen": -0.7239983081817627, "log_odds_ratio": -1.1875786781311035, "logits/chosen": -0.12546329200267792, "logits/rejected": -0.1286468505859375, "logps/chosen": -3.024376630783081, "logps/rejected": -2.3630449771881104, "loss": 1.6563, "nll_loss": 1.5375810861587524, "rewards/accuracies": 0.125, "rewards/chosen": -0.3024376630783081, "rewards/margins": -0.06613317877054214, "rewards/rejected": -0.23630449175834656, "step": 627 }, { "epoch": 1.7193702943189595, "grad_norm": 5.7118306159973145, "learning_rate": 9.141095890410958e-07, "log_odds_chosen": -0.47826284170150757, "log_odds_ratio": -1.1018884181976318, "logits/chosen": -0.00018169358372688293, "logits/rejected": 0.030751777812838554, "logps/chosen": -3.4404239654541016, "logps/rejected": -2.9552900791168213, "loss": 1.6213, "nll_loss": 1.5110690593719482, "rewards/accuracies": 0.25, "rewards/chosen": -0.3440423905849457, "rewards/margins": -0.04851337522268295, "rewards/rejected": -0.29552900791168213, "step": 628 }, { "epoch": 1.7221081451060916, "grad_norm": 5.567117214202881, "learning_rate": 9.13972602739726e-07, "log_odds_chosen": -0.43790575861930847, "log_odds_ratio": -0.9823772311210632, "logits/chosen": 0.046743184328079224, "logits/rejected": 0.004703156650066376, "logps/chosen": -2.98936128616333, "logps/rejected": -2.5708391666412354, "loss": 1.6834, "nll_loss": 1.5851716995239258, "rewards/accuracies": 0.25, "rewards/chosen": -0.298936128616333, "rewards/margins": -0.04185221344232559, "rewards/rejected": -0.257083922624588, "step": 629 }, { "epoch": 1.7248459958932238, "grad_norm": 4.8468217849731445, "learning_rate": 9.138356164383561e-07, "log_odds_chosen": 0.5684337615966797, "log_odds_ratio": -0.5841518044471741, "logits/chosen": 0.05970226600766182, "logits/rejected": -0.029591232538223267, "logps/chosen": -2.706258535385132, "logps/rejected": -3.258774518966675, "loss": 1.5539, "nll_loss": 1.4954745769500732, "rewards/accuracies": 0.625, "rewards/chosen": -0.27062585949897766, "rewards/margins": 0.055251605808734894, "rewards/rejected": -0.32587748765945435, "step": 630 }, { "epoch": 1.727583846680356, "grad_norm": 6.0442986488342285, "learning_rate": 9.136986301369862e-07, "log_odds_chosen": 0.03920602798461914, "log_odds_ratio": -1.0303237438201904, "logits/chosen": 0.03587164729833603, "logits/rejected": 0.06457212567329407, "logps/chosen": -3.5029029846191406, "logps/rejected": -3.527432918548584, "loss": 1.5798, "nll_loss": 1.4768092632293701, "rewards/accuracies": 0.375, "rewards/chosen": -0.35029029846191406, "rewards/margins": 0.002452995628118515, "rewards/rejected": -0.3527432978153229, "step": 631 }, { "epoch": 1.730321697467488, "grad_norm": 5.308730125427246, "learning_rate": 9.135616438356164e-07, "log_odds_chosen": 0.091062992811203, "log_odds_ratio": -0.6920521855354309, "logits/chosen": -0.09476649761199951, "logits/rejected": -0.052843548357486725, "logps/chosen": -2.4552457332611084, "logps/rejected": -2.5362906455993652, "loss": 1.4986, "nll_loss": 1.4294145107269287, "rewards/accuracies": 0.75, "rewards/chosen": -0.2455245852470398, "rewards/margins": 0.008104484528303146, "rewards/rejected": -0.25362905859947205, "step": 632 }, { "epoch": 1.7330595482546203, "grad_norm": 4.848819255828857, "learning_rate": 9.134246575342465e-07, "log_odds_chosen": -0.009899675846099854, "log_odds_ratio": -0.79334557056427, "logits/chosen": -0.02596355229616165, "logits/rejected": 0.003405548632144928, "logps/chosen": -2.273939371109009, "logps/rejected": -2.2530550956726074, "loss": 1.5128, "nll_loss": 1.4334434270858765, "rewards/accuracies": 0.625, "rewards/chosen": -0.22739392518997192, "rewards/margins": -0.0020884256809949875, "rewards/rejected": -0.22530551254749298, "step": 633 }, { "epoch": 1.7357973990417523, "grad_norm": 5.759427070617676, "learning_rate": 9.132876712328766e-07, "log_odds_chosen": -0.48140454292297363, "log_odds_ratio": -1.047340989112854, "logits/chosen": -0.06752091646194458, "logits/rejected": -0.003614405170083046, "logps/chosen": -3.2239317893981934, "logps/rejected": -2.729910373687744, "loss": 1.5176, "nll_loss": 1.412827491760254, "rewards/accuracies": 0.25, "rewards/chosen": -0.32239317893981934, "rewards/margins": -0.0494021512567997, "rewards/rejected": -0.27299103140830994, "step": 634 }, { "epoch": 1.7385352498288844, "grad_norm": 6.924006462097168, "learning_rate": 9.131506849315068e-07, "log_odds_chosen": -0.3367644250392914, "log_odds_ratio": -1.1251044273376465, "logits/chosen": -0.040151286870241165, "logits/rejected": 0.0675031840801239, "logps/chosen": -3.408801555633545, "logps/rejected": -3.0800180435180664, "loss": 1.5814, "nll_loss": 1.4689342975616455, "rewards/accuracies": 0.375, "rewards/chosen": -0.3408801555633545, "rewards/margins": -0.032878320664167404, "rewards/rejected": -0.308001846075058, "step": 635 }, { "epoch": 1.7412731006160165, "grad_norm": 5.201977252960205, "learning_rate": 9.13013698630137e-07, "log_odds_chosen": 0.32624316215515137, "log_odds_ratio": -0.6018314361572266, "logits/chosen": -0.00737902894616127, "logits/rejected": -0.013625355437397957, "logps/chosen": -2.3522839546203613, "logps/rejected": -2.6545331478118896, "loss": 1.5322, "nll_loss": 1.4720032215118408, "rewards/accuracies": 0.625, "rewards/chosen": -0.23522840440273285, "rewards/margins": 0.03022490255534649, "rewards/rejected": -0.2654533088207245, "step": 636 }, { "epoch": 1.7440109514031485, "grad_norm": 5.357319355010986, "learning_rate": 9.12876712328767e-07, "log_odds_chosen": 1.306748390197754, "log_odds_ratio": -0.47695064544677734, "logits/chosen": -0.07636993378400803, "logits/rejected": -0.09717495739459991, "logps/chosen": -2.186087131500244, "logps/rejected": -3.406926155090332, "loss": 1.4041, "nll_loss": 1.3564081192016602, "rewards/accuracies": 0.75, "rewards/chosen": -0.21860872209072113, "rewards/margins": 0.12208393216133118, "rewards/rejected": -0.3406926393508911, "step": 637 }, { "epoch": 1.7467488021902806, "grad_norm": 5.309232711791992, "learning_rate": 9.127397260273972e-07, "log_odds_chosen": -0.4226312041282654, "log_odds_ratio": -1.059153437614441, "logits/chosen": -0.1674061268568039, "logits/rejected": -0.12128480523824692, "logps/chosen": -2.89150333404541, "logps/rejected": -2.481520414352417, "loss": 1.6823, "nll_loss": 1.5764044523239136, "rewards/accuracies": 0.625, "rewards/chosen": -0.28915032744407654, "rewards/margins": -0.04099828004837036, "rewards/rejected": -0.24815204739570618, "step": 638 }, { "epoch": 1.7494866529774127, "grad_norm": 6.279641628265381, "learning_rate": 9.126027397260273e-07, "log_odds_chosen": 0.6309162378311157, "log_odds_ratio": -0.5032315850257874, "logits/chosen": 0.042344290763139725, "logits/rejected": 0.08682425320148468, "logps/chosen": -3.1266140937805176, "logps/rejected": -3.7328238487243652, "loss": 1.4637, "nll_loss": 1.413388729095459, "rewards/accuracies": 0.875, "rewards/chosen": -0.31266143918037415, "rewards/margins": 0.06062096729874611, "rewards/rejected": -0.37328240275382996, "step": 639 }, { "epoch": 1.7522245037645447, "grad_norm": 5.3455634117126465, "learning_rate": 9.124657534246574e-07, "log_odds_chosen": 0.6167086958885193, "log_odds_ratio": -0.9554412961006165, "logits/chosen": -0.1398146003484726, "logits/rejected": -0.1349382996559143, "logps/chosen": -2.6489133834838867, "logps/rejected": -3.248161554336548, "loss": 1.5922, "nll_loss": 1.496696949005127, "rewards/accuracies": 0.5, "rewards/chosen": -0.2648913562297821, "rewards/margins": 0.059924811124801636, "rewards/rejected": -0.32481616735458374, "step": 640 }, { "epoch": 1.7549623545516768, "grad_norm": 6.587216854095459, "learning_rate": 9.123287671232876e-07, "log_odds_chosen": -0.9520765542984009, "log_odds_ratio": -1.4423816204071045, "logits/chosen": -0.12121690809726715, "logits/rejected": -0.1083223968744278, "logps/chosen": -3.877058982849121, "logps/rejected": -2.9211368560791016, "loss": 1.6136, "nll_loss": 1.4693139791488647, "rewards/accuracies": 0.25, "rewards/chosen": -0.38770589232444763, "rewards/margins": -0.09559221565723419, "rewards/rejected": -0.29211366176605225, "step": 641 }, { "epoch": 1.7577002053388089, "grad_norm": 6.410890579223633, "learning_rate": 9.121917808219177e-07, "log_odds_chosen": -0.22964981198310852, "log_odds_ratio": -1.1372755765914917, "logits/chosen": 0.06556223332881927, "logits/rejected": 0.00031011924147605896, "logps/chosen": -3.333115577697754, "logps/rejected": -3.103482246398926, "loss": 1.6248, "nll_loss": 1.5111209154129028, "rewards/accuracies": 0.75, "rewards/chosen": -0.33331161737442017, "rewards/margins": -0.0229633878916502, "rewards/rejected": -0.3103482127189636, "step": 642 }, { "epoch": 1.7604380561259412, "grad_norm": 5.813877582550049, "learning_rate": 9.120547945205478e-07, "log_odds_chosen": -0.30914306640625, "log_odds_ratio": -0.9694010019302368, "logits/chosen": 0.04094249755144119, "logits/rejected": 0.02646537683904171, "logps/chosen": -3.1654303073883057, "logps/rejected": -2.843841791152954, "loss": 1.5002, "nll_loss": 1.403235912322998, "rewards/accuracies": 0.625, "rewards/chosen": -0.3165430426597595, "rewards/margins": -0.03215885907411575, "rewards/rejected": -0.284384161233902, "step": 643 }, { "epoch": 1.7631759069130732, "grad_norm": 4.767459869384766, "learning_rate": 9.11917808219178e-07, "log_odds_chosen": 0.29252368211746216, "log_odds_ratio": -0.6342282295227051, "logits/chosen": -0.025571994483470917, "logits/rejected": -0.11796710640192032, "logps/chosen": -2.2705936431884766, "logps/rejected": -2.531604766845703, "loss": 1.6445, "nll_loss": 1.5810836553573608, "rewards/accuracies": 0.5, "rewards/chosen": -0.22705936431884766, "rewards/margins": 0.026101117953658104, "rewards/rejected": -0.2531604766845703, "step": 644 }, { "epoch": 1.7659137577002053, "grad_norm": 5.036641597747803, "learning_rate": 9.117808219178082e-07, "log_odds_chosen": -0.10228584706783295, "log_odds_ratio": -0.8116579055786133, "logits/chosen": -0.06154690310359001, "logits/rejected": -0.11925802379846573, "logps/chosen": -2.233509063720703, "logps/rejected": -2.142942428588867, "loss": 1.6265, "nll_loss": 1.5453495979309082, "rewards/accuracies": 0.5, "rewards/chosen": -0.2233509123325348, "rewards/margins": -0.009056685492396355, "rewards/rejected": -0.2142942249774933, "step": 645 }, { "epoch": 1.7686516084873376, "grad_norm": 5.677955627441406, "learning_rate": 9.116438356164384e-07, "log_odds_chosen": -0.3491785526275635, "log_odds_ratio": -1.0875624418258667, "logits/chosen": -0.11161894351243973, "logits/rejected": -0.10372621566057205, "logps/chosen": -3.126011848449707, "logps/rejected": -2.7472853660583496, "loss": 1.5394, "nll_loss": 1.4306830167770386, "rewards/accuracies": 0.5, "rewards/chosen": -0.3126011788845062, "rewards/margins": -0.03787263482809067, "rewards/rejected": -0.27472853660583496, "step": 646 }, { "epoch": 1.7713894592744697, "grad_norm": 5.38969087600708, "learning_rate": 9.115068493150685e-07, "log_odds_chosen": 0.11171749234199524, "log_odds_ratio": -0.7073543071746826, "logits/chosen": -0.010598741471767426, "logits/rejected": -0.10256434977054596, "logps/chosen": -2.808579921722412, "logps/rejected": -2.894369602203369, "loss": 1.5413, "nll_loss": 1.4705238342285156, "rewards/accuracies": 0.75, "rewards/chosen": -0.28085801005363464, "rewards/margins": 0.008578957989811897, "rewards/rejected": -0.2894369661808014, "step": 647 }, { "epoch": 1.7741273100616017, "grad_norm": 6.590665817260742, "learning_rate": 9.113698630136986e-07, "log_odds_chosen": 0.13419978320598602, "log_odds_ratio": -0.6509923934936523, "logits/chosen": 0.057119857519865036, "logits/rejected": 0.1451394259929657, "logps/chosen": -3.3608572483062744, "logps/rejected": -3.4754178524017334, "loss": 1.3666, "nll_loss": 1.3015038967132568, "rewards/accuracies": 0.5, "rewards/chosen": -0.336085706949234, "rewards/margins": 0.011456064879894257, "rewards/rejected": -0.34754177927970886, "step": 648 }, { "epoch": 1.7768651608487338, "grad_norm": 4.929500579833984, "learning_rate": 9.112328767123288e-07, "log_odds_chosen": 0.28391391038894653, "log_odds_ratio": -0.5903414487838745, "logits/chosen": 0.08955908566713333, "logits/rejected": 0.038200996816158295, "logps/chosen": -2.141958475112915, "logps/rejected": -2.395690441131592, "loss": 1.4519, "nll_loss": 1.3928791284561157, "rewards/accuracies": 0.875, "rewards/chosen": -0.2141958773136139, "rewards/margins": 0.025373198091983795, "rewards/rejected": -0.2395690679550171, "step": 649 }, { "epoch": 1.7796030116358659, "grad_norm": 5.82886266708374, "learning_rate": 9.11095890410959e-07, "log_odds_chosen": 0.6725531816482544, "log_odds_ratio": -0.7186357975006104, "logits/chosen": -0.1111975908279419, "logits/rejected": -0.08468898385763168, "logps/chosen": -3.1325795650482178, "logps/rejected": -3.744040012359619, "loss": 1.5451, "nll_loss": 1.473215937614441, "rewards/accuracies": 0.75, "rewards/chosen": -0.31325799226760864, "rewards/margins": 0.06114604324102402, "rewards/rejected": -0.37440401315689087, "step": 650 }, { "epoch": 1.782340862422998, "grad_norm": 5.584893703460693, "learning_rate": 9.10958904109589e-07, "log_odds_chosen": -0.8181859850883484, "log_odds_ratio": -1.2437121868133545, "logits/chosen": 0.02459101378917694, "logits/rejected": 0.03632257133722305, "logps/chosen": -3.3315470218658447, "logps/rejected": -2.534430980682373, "loss": 1.5903, "nll_loss": 1.4659347534179688, "rewards/accuracies": 0.375, "rewards/chosen": -0.33315467834472656, "rewards/margins": -0.07971159368753433, "rewards/rejected": -0.2534431219100952, "step": 651 }, { "epoch": 1.78507871321013, "grad_norm": 5.006797790527344, "learning_rate": 9.108219178082192e-07, "log_odds_chosen": 0.05552089214324951, "log_odds_ratio": -0.8307293057441711, "logits/chosen": -0.05533658340573311, "logits/rejected": -0.11125098168849945, "logps/chosen": -2.7151594161987305, "logps/rejected": -2.6914398670196533, "loss": 1.5227, "nll_loss": 1.4395889043807983, "rewards/accuracies": 0.5, "rewards/chosen": -0.27151593565940857, "rewards/margins": -0.0023719556629657745, "rewards/rejected": -0.2691439688205719, "step": 652 }, { "epoch": 1.787816563997262, "grad_norm": 5.2942609786987305, "learning_rate": 9.106849315068493e-07, "log_odds_chosen": 0.34146708250045776, "log_odds_ratio": -0.7271745204925537, "logits/chosen": -0.10549738258123398, "logits/rejected": -0.11698459833860397, "logps/chosen": -2.5093770027160645, "logps/rejected": -2.8336732387542725, "loss": 1.5412, "nll_loss": 1.4685208797454834, "rewards/accuracies": 0.375, "rewards/chosen": -0.25093770027160645, "rewards/margins": 0.03242962807416916, "rewards/rejected": -0.2833673357963562, "step": 653 }, { "epoch": 1.7905544147843941, "grad_norm": 5.335822582244873, "learning_rate": 9.105479452054794e-07, "log_odds_chosen": -0.17567187547683716, "log_odds_ratio": -0.9972118735313416, "logits/chosen": 0.020832199603319168, "logits/rejected": 0.07645418494939804, "logps/chosen": -2.6313083171844482, "logps/rejected": -2.4692740440368652, "loss": 1.5113, "nll_loss": 1.4115619659423828, "rewards/accuracies": 0.75, "rewards/chosen": -0.2631308436393738, "rewards/margins": -0.01620342954993248, "rewards/rejected": -0.246927410364151, "step": 654 }, { "epoch": 1.7932922655715262, "grad_norm": 5.623659133911133, "learning_rate": 9.104109589041096e-07, "log_odds_chosen": -0.0037474632263183594, "log_odds_ratio": -0.7883797287940979, "logits/chosen": -0.08578136563301086, "logits/rejected": -0.05227535590529442, "logps/chosen": -2.6411948204040527, "logps/rejected": -2.6271610260009766, "loss": 1.5193, "nll_loss": 1.4404553174972534, "rewards/accuracies": 0.5, "rewards/chosen": -0.2641195058822632, "rewards/margins": -0.0014033950865268707, "rewards/rejected": -0.2627160847187042, "step": 655 }, { "epoch": 1.7960301163586585, "grad_norm": 5.077853202819824, "learning_rate": 9.102739726027397e-07, "log_odds_chosen": -0.07459799945354462, "log_odds_ratio": -0.803326427936554, "logits/chosen": -0.0992116928100586, "logits/rejected": -0.15951773524284363, "logps/chosen": -3.026482105255127, "logps/rejected": -2.951725721359253, "loss": 1.6511, "nll_loss": 1.5707931518554688, "rewards/accuracies": 0.625, "rewards/chosen": -0.3026482164859772, "rewards/margins": -0.007475624792277813, "rewards/rejected": -0.2951726019382477, "step": 656 }, { "epoch": 1.7987679671457906, "grad_norm": 5.85186243057251, "learning_rate": 9.101369863013698e-07, "log_odds_chosen": 0.14047449827194214, "log_odds_ratio": -0.7501985430717468, "logits/chosen": -0.10254140198230743, "logits/rejected": -0.021219681948423386, "logps/chosen": -3.160635471343994, "logps/rejected": -3.2920055389404297, "loss": 1.4524, "nll_loss": 1.3774018287658691, "rewards/accuracies": 0.5, "rewards/chosen": -0.3160635530948639, "rewards/margins": 0.013137001544237137, "rewards/rejected": -0.3292005658149719, "step": 657 }, { "epoch": 1.8015058179329226, "grad_norm": 5.363225936889648, "learning_rate": 9.1e-07, "log_odds_chosen": 0.3643951714038849, "log_odds_ratio": -0.6437596082687378, "logits/chosen": -0.08925898373126984, "logits/rejected": -0.11280902475118637, "logps/chosen": -2.872692108154297, "logps/rejected": -3.19831919670105, "loss": 1.4513, "nll_loss": 1.3869301080703735, "rewards/accuracies": 0.875, "rewards/chosen": -0.2872692048549652, "rewards/margins": 0.032562702894210815, "rewards/rejected": -0.319831907749176, "step": 658 }, { "epoch": 1.8042436687200547, "grad_norm": 6.833887577056885, "learning_rate": 9.098630136986301e-07, "log_odds_chosen": 0.45717141032218933, "log_odds_ratio": -0.7003172636032104, "logits/chosen": 0.0036919545382261276, "logits/rejected": 0.031185103580355644, "logps/chosen": -3.745260238647461, "logps/rejected": -4.154576301574707, "loss": 1.4272, "nll_loss": 1.357198715209961, "rewards/accuracies": 0.875, "rewards/chosen": -0.3745259940624237, "rewards/margins": 0.04093163087964058, "rewards/rejected": -0.4154576361179352, "step": 659 }, { "epoch": 1.806981519507187, "grad_norm": 6.940057754516602, "learning_rate": 9.097260273972603e-07, "log_odds_chosen": -0.41814833879470825, "log_odds_ratio": -1.1401383876800537, "logits/chosen": -0.030439700931310654, "logits/rejected": 0.060267411172389984, "logps/chosen": -3.543327808380127, "logps/rejected": -3.1142005920410156, "loss": 1.5676, "nll_loss": 1.4535818099975586, "rewards/accuracies": 0.375, "rewards/chosen": -0.3543328046798706, "rewards/margins": -0.04291275888681412, "rewards/rejected": -0.3114200532436371, "step": 660 }, { "epoch": 1.809719370294319, "grad_norm": 5.504570007324219, "learning_rate": 9.095890410958904e-07, "log_odds_chosen": 0.10122275352478027, "log_odds_ratio": -0.7736507654190063, "logits/chosen": -0.0003253147006034851, "logits/rejected": -0.020755112171173096, "logps/chosen": -2.9866836071014404, "logps/rejected": -3.0508909225463867, "loss": 1.5402, "nll_loss": 1.462859869003296, "rewards/accuracies": 0.5, "rewards/chosen": -0.29866838455200195, "rewards/margins": 0.006420735269784927, "rewards/rejected": -0.3050891160964966, "step": 661 }, { "epoch": 1.8124572210814511, "grad_norm": 5.654898643493652, "learning_rate": 9.094520547945205e-07, "log_odds_chosen": 0.24952836334705353, "log_odds_ratio": -0.6600240468978882, "logits/chosen": -0.08602763712406158, "logits/rejected": -0.08942260593175888, "logps/chosen": -3.2066240310668945, "logps/rejected": -3.3975112438201904, "loss": 1.5247, "nll_loss": 1.4586726427078247, "rewards/accuracies": 0.5, "rewards/chosen": -0.3206624388694763, "rewards/margins": 0.01908871904015541, "rewards/rejected": -0.33975112438201904, "step": 662 }, { "epoch": 1.8151950718685832, "grad_norm": 5.054746627807617, "learning_rate": 9.093150684931507e-07, "log_odds_chosen": 0.20859596133232117, "log_odds_ratio": -0.7681059837341309, "logits/chosen": -0.05864652991294861, "logits/rejected": -0.09940380603075027, "logps/chosen": -3.172478199005127, "logps/rejected": -3.357316493988037, "loss": 1.5647, "nll_loss": 1.4878506660461426, "rewards/accuracies": 0.625, "rewards/chosen": -0.31724783778190613, "rewards/margins": 0.018483862280845642, "rewards/rejected": -0.3357316851615906, "step": 663 }, { "epoch": 1.8179329226557153, "grad_norm": 5.577322959899902, "learning_rate": 9.091780821917808e-07, "log_odds_chosen": 0.6049891710281372, "log_odds_ratio": -0.679424524307251, "logits/chosen": -0.0009948872029781342, "logits/rejected": -0.0618572011590004, "logps/chosen": -2.7985076904296875, "logps/rejected": -3.2828471660614014, "loss": 1.4682, "nll_loss": 1.400278925895691, "rewards/accuracies": 0.625, "rewards/chosen": -0.2798507809638977, "rewards/margins": 0.04843396693468094, "rewards/rejected": -0.32828474044799805, "step": 664 }, { "epoch": 1.8206707734428473, "grad_norm": 5.309604167938232, "learning_rate": 9.090410958904109e-07, "log_odds_chosen": 0.4014589488506317, "log_odds_ratio": -0.577696681022644, "logits/chosen": 0.10746753960847855, "logits/rejected": 0.11326368153095245, "logps/chosen": -2.8572700023651123, "logps/rejected": -3.2096052169799805, "loss": 1.5718, "nll_loss": 1.5140039920806885, "rewards/accuracies": 0.875, "rewards/chosen": -0.28572702407836914, "rewards/margins": 0.03523349389433861, "rewards/rejected": -0.32096052169799805, "step": 665 }, { "epoch": 1.8234086242299794, "grad_norm": 4.846029758453369, "learning_rate": 9.089041095890411e-07, "log_odds_chosen": 0.6693605184555054, "log_odds_ratio": -0.48912447690963745, "logits/chosen": 0.03080824948847294, "logits/rejected": -0.00917840376496315, "logps/chosen": -2.0311808586120605, "logps/rejected": -2.607931613922119, "loss": 1.4662, "nll_loss": 1.4173270463943481, "rewards/accuracies": 0.625, "rewards/chosen": -0.20311810076236725, "rewards/margins": 0.057675063610076904, "rewards/rejected": -0.26079317927360535, "step": 666 }, { "epoch": 1.8261464750171115, "grad_norm": 6.482645034790039, "learning_rate": 9.087671232876713e-07, "log_odds_chosen": -1.3503708839416504, "log_odds_ratio": -1.7552711963653564, "logits/chosen": -0.10322017222642899, "logits/rejected": -0.020904213190078735, "logps/chosen": -4.086149215698242, "logps/rejected": -2.7634267807006836, "loss": 1.5432, "nll_loss": 1.3676856756210327, "rewards/accuracies": 0.375, "rewards/chosen": -0.4086149334907532, "rewards/margins": -0.13227224349975586, "rewards/rejected": -0.2763426899909973, "step": 667 }, { "epoch": 1.8288843258042435, "grad_norm": 4.914063453674316, "learning_rate": 9.086301369863013e-07, "log_odds_chosen": 0.33388495445251465, "log_odds_ratio": -0.5944373607635498, "logits/chosen": -0.08168817311525345, "logits/rejected": -0.08207378536462784, "logps/chosen": -2.0484519004821777, "logps/rejected": -2.3402516841888428, "loss": 1.4704, "nll_loss": 1.4109102487564087, "rewards/accuracies": 0.625, "rewards/chosen": -0.20484521985054016, "rewards/margins": 0.02917996048927307, "rewards/rejected": -0.23402516543865204, "step": 668 }, { "epoch": 1.8316221765913756, "grad_norm": 5.260858058929443, "learning_rate": 9.084931506849315e-07, "log_odds_chosen": 1.144741415977478, "log_odds_ratio": -0.49830013513565063, "logits/chosen": 0.0239294171333313, "logits/rejected": -0.060355715453624725, "logps/chosen": -2.5692529678344727, "logps/rejected": -3.645220994949341, "loss": 1.494, "nll_loss": 1.4441473484039307, "rewards/accuracies": 0.75, "rewards/chosen": -0.2569252848625183, "rewards/margins": 0.10759677737951279, "rewards/rejected": -0.3645220994949341, "step": 669 }, { "epoch": 1.834360027378508, "grad_norm": 5.925809860229492, "learning_rate": 9.083561643835616e-07, "log_odds_chosen": -0.7952602505683899, "log_odds_ratio": -1.3938279151916504, "logits/chosen": -0.1466570347547531, "logits/rejected": -0.2113867849111557, "logps/chosen": -3.327324867248535, "logps/rejected": -2.5273871421813965, "loss": 1.5581, "nll_loss": 1.418681263923645, "rewards/accuracies": 0.5, "rewards/chosen": -0.33273249864578247, "rewards/margins": -0.07999378442764282, "rewards/rejected": -0.25273871421813965, "step": 670 }, { "epoch": 1.83709787816564, "grad_norm": 6.987085819244385, "learning_rate": 9.082191780821917e-07, "log_odds_chosen": -0.29451701045036316, "log_odds_ratio": -1.0555942058563232, "logits/chosen": 0.10966044664382935, "logits/rejected": 0.13864004611968994, "logps/chosen": -3.562844753265381, "logps/rejected": -3.250976085662842, "loss": 1.4793, "nll_loss": 1.373764157295227, "rewards/accuracies": 0.5, "rewards/chosen": -0.356284499168396, "rewards/margins": -0.03118688054382801, "rewards/rejected": -0.32509759068489075, "step": 671 }, { "epoch": 1.839835728952772, "grad_norm": 5.0482988357543945, "learning_rate": 9.080821917808219e-07, "log_odds_chosen": 0.7617295384407043, "log_odds_ratio": -0.5910096168518066, "logits/chosen": 0.14962682127952576, "logits/rejected": 0.10111347585916519, "logps/chosen": -2.664689064025879, "logps/rejected": -3.36889386177063, "loss": 1.5284, "nll_loss": 1.4693479537963867, "rewards/accuracies": 0.625, "rewards/chosen": -0.26646894216537476, "rewards/margins": 0.07042047381401062, "rewards/rejected": -0.336889386177063, "step": 672 }, { "epoch": 1.8425735797399043, "grad_norm": 5.600563049316406, "learning_rate": 9.07945205479452e-07, "log_odds_chosen": 0.04316091537475586, "log_odds_ratio": -0.7167925238609314, "logits/chosen": 0.0904848501086235, "logits/rejected": 0.13499051332473755, "logps/chosen": -2.990915536880493, "logps/rejected": -3.0252392292022705, "loss": 1.5029, "nll_loss": 1.431251049041748, "rewards/accuracies": 0.5, "rewards/chosen": -0.2990915775299072, "rewards/margins": 0.003432365134358406, "rewards/rejected": -0.3025239408016205, "step": 673 }, { "epoch": 1.8453114305270364, "grad_norm": 5.422566890716553, "learning_rate": 9.078082191780822e-07, "log_odds_chosen": -0.18240699172019958, "log_odds_ratio": -0.8417048454284668, "logits/chosen": -0.0890912115573883, "logits/rejected": -0.004785165190696716, "logps/chosen": -2.6000356674194336, "logps/rejected": -2.444877862930298, "loss": 1.4856, "nll_loss": 1.4013798236846924, "rewards/accuracies": 0.5, "rewards/chosen": -0.26000356674194336, "rewards/margins": -0.01551579125225544, "rewards/rejected": -0.24448779225349426, "step": 674 }, { "epoch": 1.8480492813141685, "grad_norm": 5.276905536651611, "learning_rate": 9.076712328767123e-07, "log_odds_chosen": 0.11571663618087769, "log_odds_ratio": -0.7881863117218018, "logits/chosen": -0.0955650582909584, "logits/rejected": -0.06706997007131577, "logps/chosen": -2.933373212814331, "logps/rejected": -3.0302135944366455, "loss": 1.4581, "nll_loss": 1.3792818784713745, "rewards/accuracies": 0.375, "rewards/chosen": -0.293337345123291, "rewards/margins": 0.009684037417173386, "rewards/rejected": -0.3030213713645935, "step": 675 }, { "epoch": 1.8507871321013005, "grad_norm": 5.715381622314453, "learning_rate": 9.075342465753424e-07, "log_odds_chosen": 0.001506127417087555, "log_odds_ratio": -0.7880832552909851, "logits/chosen": -0.13315293192863464, "logits/rejected": -0.10912713408470154, "logps/chosen": -3.444039821624756, "logps/rejected": -3.4436612129211426, "loss": 1.4422, "nll_loss": 1.3634259700775146, "rewards/accuracies": 0.625, "rewards/chosen": -0.3444039821624756, "rewards/margins": -3.78880649805069e-05, "rewards/rejected": -0.3443661034107208, "step": 676 }, { "epoch": 1.8535249828884326, "grad_norm": 5.60575532913208, "learning_rate": 9.073972602739726e-07, "log_odds_chosen": 0.16457024216651917, "log_odds_ratio": -0.7115508317947388, "logits/chosen": 0.24752426147460938, "logits/rejected": 0.2456921935081482, "logps/chosen": -2.5762734413146973, "logps/rejected": -2.6908180713653564, "loss": 1.4133, "nll_loss": 1.342123031616211, "rewards/accuracies": 0.875, "rewards/chosen": -0.25762733817100525, "rewards/margins": 0.011454434134066105, "rewards/rejected": -0.2690817713737488, "step": 677 }, { "epoch": 1.8562628336755647, "grad_norm": 5.179043292999268, "learning_rate": 9.072602739726027e-07, "log_odds_chosen": 1.452834129333496, "log_odds_ratio": -0.4853561818599701, "logits/chosen": 0.07435858249664307, "logits/rejected": 0.0032295547425746918, "logps/chosen": -2.788511276245117, "logps/rejected": -4.18669319152832, "loss": 1.4008, "nll_loss": 1.3522506952285767, "rewards/accuracies": 0.875, "rewards/chosen": -0.27885112166404724, "rewards/margins": 0.1398181915283203, "rewards/rejected": -0.41866934299468994, "step": 678 }, { "epoch": 1.8590006844626967, "grad_norm": 5.897744178771973, "learning_rate": 9.071232876712328e-07, "log_odds_chosen": 0.38844919204711914, "log_odds_ratio": -0.6188111305236816, "logits/chosen": 0.09557788074016571, "logits/rejected": 0.025463160127401352, "logps/chosen": -2.7911038398742676, "logps/rejected": -3.135162353515625, "loss": 1.3452, "nll_loss": 1.2833528518676758, "rewards/accuracies": 0.625, "rewards/chosen": -0.2791104018688202, "rewards/margins": 0.03440584987401962, "rewards/rejected": -0.3135162591934204, "step": 679 }, { "epoch": 1.8617385352498288, "grad_norm": 4.971566677093506, "learning_rate": 9.06986301369863e-07, "log_odds_chosen": 0.7108525037765503, "log_odds_ratio": -0.4787586033344269, "logits/chosen": 0.053261175751686096, "logits/rejected": 0.027192339301109314, "logps/chosen": -2.191807270050049, "logps/rejected": -2.8529176712036133, "loss": 1.3912, "nll_loss": 1.3432872295379639, "rewards/accuracies": 0.875, "rewards/chosen": -0.21918071806430817, "rewards/margins": 0.06611104309558868, "rewards/rejected": -0.28529176115989685, "step": 680 }, { "epoch": 1.8644763860369609, "grad_norm": 6.658471584320068, "learning_rate": 9.068493150684932e-07, "log_odds_chosen": -0.35641735792160034, "log_odds_ratio": -1.0324467420578003, "logits/chosen": 0.05681641772389412, "logits/rejected": 0.13573451340198517, "logps/chosen": -3.38175892829895, "logps/rejected": -3.0334274768829346, "loss": 1.3666, "nll_loss": 1.2633942365646362, "rewards/accuracies": 0.375, "rewards/chosen": -0.3381759226322174, "rewards/margins": -0.0348331592977047, "rewards/rejected": -0.3033427596092224, "step": 681 }, { "epoch": 1.867214236824093, "grad_norm": 7.072437286376953, "learning_rate": 9.067123287671232e-07, "log_odds_chosen": -0.11137673258781433, "log_odds_ratio": -0.9550151824951172, "logits/chosen": 0.014401115477085114, "logits/rejected": 0.00963558629155159, "logps/chosen": -3.3506500720977783, "logps/rejected": -3.2060177326202393, "loss": 1.4781, "nll_loss": 1.3825916051864624, "rewards/accuracies": 0.75, "rewards/chosen": -0.33506500720977783, "rewards/margins": -0.01446324773132801, "rewards/rejected": -0.32060176134109497, "step": 682 }, { "epoch": 1.8699520876112252, "grad_norm": 5.37047815322876, "learning_rate": 9.065753424657534e-07, "log_odds_chosen": -0.1516505628824234, "log_odds_ratio": -0.8369318246841431, "logits/chosen": -0.0432191863656044, "logits/rejected": -0.05718949809670448, "logps/chosen": -2.638025999069214, "logps/rejected": -2.488192319869995, "loss": 1.4784, "nll_loss": 1.3946772813796997, "rewards/accuracies": 0.5, "rewards/chosen": -0.2638026177883148, "rewards/margins": -0.01498337835073471, "rewards/rejected": -0.24881921708583832, "step": 683 }, { "epoch": 1.8726899383983573, "grad_norm": 6.646732807159424, "learning_rate": 9.064383561643835e-07, "log_odds_chosen": -0.26602140069007874, "log_odds_ratio": -1.0221748352050781, "logits/chosen": 0.0022870078682899475, "logits/rejected": 0.1264944225549698, "logps/chosen": -3.2809741497039795, "logps/rejected": -3.002633571624756, "loss": 1.4411, "nll_loss": 1.3389105796813965, "rewards/accuracies": 0.375, "rewards/chosen": -0.328097403049469, "rewards/margins": -0.027834035456180573, "rewards/rejected": -0.300263375043869, "step": 684 }, { "epoch": 1.8754277891854894, "grad_norm": 7.33802604675293, "learning_rate": 9.063013698630136e-07, "log_odds_chosen": -1.1654667854309082, "log_odds_ratio": -1.6415400505065918, "logits/chosen": 0.20381250977516174, "logits/rejected": 0.270859956741333, "logps/chosen": -4.020402431488037, "logps/rejected": -2.8707261085510254, "loss": 1.5675, "nll_loss": 1.403337001800537, "rewards/accuracies": 0.375, "rewards/chosen": -0.4020402431488037, "rewards/margins": -0.11496762931346893, "rewards/rejected": -0.2870725989341736, "step": 685 }, { "epoch": 1.8781656399726216, "grad_norm": 5.064664363861084, "learning_rate": 9.061643835616438e-07, "log_odds_chosen": -0.0908486619591713, "log_odds_ratio": -0.8055988550186157, "logits/chosen": -0.10139546543359756, "logits/rejected": -0.1284540891647339, "logps/chosen": -2.605619192123413, "logps/rejected": -2.5322952270507812, "loss": 1.4637, "nll_loss": 1.3830955028533936, "rewards/accuracies": 0.375, "rewards/chosen": -0.26056191325187683, "rewards/margins": -0.007332415319979191, "rewards/rejected": -0.2532294988632202, "step": 686 }, { "epoch": 1.8809034907597537, "grad_norm": 5.181798458099365, "learning_rate": 9.060273972602739e-07, "log_odds_chosen": -0.0494438111782074, "log_odds_ratio": -0.8015796542167664, "logits/chosen": -0.01553078182041645, "logits/rejected": 0.016203219071030617, "logps/chosen": -2.6092638969421387, "logps/rejected": -2.5559921264648438, "loss": 1.4232, "nll_loss": 1.3430756330490112, "rewards/accuracies": 0.5, "rewards/chosen": -0.26092639565467834, "rewards/margins": -0.005327204242348671, "rewards/rejected": -0.2555992007255554, "step": 687 }, { "epoch": 1.8836413415468858, "grad_norm": 5.335884094238281, "learning_rate": 9.058904109589041e-07, "log_odds_chosen": 1.127673864364624, "log_odds_ratio": -0.7189455628395081, "logits/chosen": 0.16638804972171783, "logits/rejected": 0.06820660084486008, "logps/chosen": -2.861783504486084, "logps/rejected": -3.96897554397583, "loss": 1.4156, "nll_loss": 1.343717336654663, "rewards/accuracies": 0.5, "rewards/chosen": -0.2861783504486084, "rewards/margins": 0.11071918159723282, "rewards/rejected": -0.3968975245952606, "step": 688 }, { "epoch": 1.8863791923340179, "grad_norm": 6.277740955352783, "learning_rate": 9.057534246575342e-07, "log_odds_chosen": -0.5790262222290039, "log_odds_ratio": -1.2261275053024292, "logits/chosen": -0.06253896653652191, "logits/rejected": -0.06169700622558594, "logps/chosen": -3.9161789417266846, "logps/rejected": -3.336483955383301, "loss": 1.4815, "nll_loss": 1.3589322566986084, "rewards/accuracies": 0.25, "rewards/chosen": -0.39161789417266846, "rewards/margins": -0.057969506829977036, "rewards/rejected": -0.3336483836174011, "step": 689 }, { "epoch": 1.88911704312115, "grad_norm": 6.60009241104126, "learning_rate": 9.056164383561643e-07, "log_odds_chosen": 0.27583998441696167, "log_odds_ratio": -0.7687781453132629, "logits/chosen": 0.06904526054859161, "logits/rejected": 0.14970800280570984, "logps/chosen": -3.344876766204834, "logps/rejected": -3.590242862701416, "loss": 1.3453, "nll_loss": 1.2684061527252197, "rewards/accuracies": 0.625, "rewards/chosen": -0.3344876766204834, "rewards/margins": 0.024536635726690292, "rewards/rejected": -0.359024316072464, "step": 690 }, { "epoch": 1.891854893908282, "grad_norm": 5.479407787322998, "learning_rate": 9.054794520547945e-07, "log_odds_chosen": 0.6191139221191406, "log_odds_ratio": -0.8306854367256165, "logits/chosen": 0.10416008532047272, "logits/rejected": 0.04664982110261917, "logps/chosen": -2.299818277359009, "logps/rejected": -2.858431816101074, "loss": 1.4593, "nll_loss": 1.3762235641479492, "rewards/accuracies": 0.5, "rewards/chosen": -0.22998183965682983, "rewards/margins": 0.05586136132478714, "rewards/rejected": -0.2858431935310364, "step": 691 }, { "epoch": 1.894592744695414, "grad_norm": 5.9776835441589355, "learning_rate": 9.053424657534246e-07, "log_odds_chosen": -0.46445056796073914, "log_odds_ratio": -1.070489525794983, "logits/chosen": -0.027264514937996864, "logits/rejected": 0.0016882065683603287, "logps/chosen": -3.483415126800537, "logps/rejected": -3.0315566062927246, "loss": 1.4948, "nll_loss": 1.3877136707305908, "rewards/accuracies": 0.25, "rewards/chosen": -0.34834152460098267, "rewards/margins": -0.0451858714222908, "rewards/rejected": -0.30315569043159485, "step": 692 }, { "epoch": 1.8973305954825461, "grad_norm": 6.323030948638916, "learning_rate": 9.052054794520547e-07, "log_odds_chosen": -0.3475892245769501, "log_odds_ratio": -1.0237077474594116, "logits/chosen": -0.028685782104730606, "logits/rejected": -0.0813964456319809, "logps/chosen": -3.3272857666015625, "logps/rejected": -2.963954210281372, "loss": 1.4346, "nll_loss": 1.3322420120239258, "rewards/accuracies": 0.625, "rewards/chosen": -0.3327285945415497, "rewards/margins": -0.036333173513412476, "rewards/rejected": -0.2963954210281372, "step": 693 }, { "epoch": 1.9000684462696782, "grad_norm": 4.544037342071533, "learning_rate": 9.050684931506849e-07, "log_odds_chosen": 0.7664134502410889, "log_odds_ratio": -0.6509377956390381, "logits/chosen": 0.03004857338964939, "logits/rejected": -0.12832550704479218, "logps/chosen": -2.491732597351074, "logps/rejected": -3.2213962078094482, "loss": 1.4743, "nll_loss": 1.4091763496398926, "rewards/accuracies": 0.625, "rewards/chosen": -0.24917323887348175, "rewards/margins": 0.07296635955572128, "rewards/rejected": -0.3221396207809448, "step": 694 }, { "epoch": 1.9028062970568103, "grad_norm": 5.938077926635742, "learning_rate": 9.049315068493151e-07, "log_odds_chosen": -0.5637698769569397, "log_odds_ratio": -1.0839579105377197, "logits/chosen": -0.19381234049797058, "logits/rejected": -0.07374812662601471, "logps/chosen": -3.2832016944885254, "logps/rejected": -2.7543625831604004, "loss": 1.4033, "nll_loss": 1.2949001789093018, "rewards/accuracies": 0.25, "rewards/chosen": -0.328320175409317, "rewards/margins": -0.05288391560316086, "rewards/rejected": -0.27543628215789795, "step": 695 }, { "epoch": 1.9055441478439425, "grad_norm": 5.335797309875488, "learning_rate": 9.047945205479451e-07, "log_odds_chosen": 0.36244165897369385, "log_odds_ratio": -0.7341945171356201, "logits/chosen": -0.0930773913860321, "logits/rejected": -0.05831214040517807, "logps/chosen": -2.73793888092041, "logps/rejected": -3.0861120223999023, "loss": 1.4784, "nll_loss": 1.4049444198608398, "rewards/accuracies": 0.75, "rewards/chosen": -0.27379387617111206, "rewards/margins": 0.03481731563806534, "rewards/rejected": -0.3086112141609192, "step": 696 }, { "epoch": 1.9082819986310746, "grad_norm": 5.532586097717285, "learning_rate": 9.046575342465753e-07, "log_odds_chosen": -0.10024464875459671, "log_odds_ratio": -0.7870299816131592, "logits/chosen": -0.03694760426878929, "logits/rejected": -0.07761485129594803, "logps/chosen": -3.274655342102051, "logps/rejected": -3.176161289215088, "loss": 1.3691, "nll_loss": 1.2903740406036377, "rewards/accuracies": 0.625, "rewards/chosen": -0.3274655342102051, "rewards/margins": -0.009849390015006065, "rewards/rejected": -0.31761613488197327, "step": 697 }, { "epoch": 1.9110198494182067, "grad_norm": 4.953485488891602, "learning_rate": 9.045205479452055e-07, "log_odds_chosen": 0.296719491481781, "log_odds_ratio": -0.6926387548446655, "logits/chosen": 0.07025185227394104, "logits/rejected": -0.001472368836402893, "logps/chosen": -2.7332804203033447, "logps/rejected": -2.9969658851623535, "loss": 1.4514, "nll_loss": 1.3821041584014893, "rewards/accuracies": 0.75, "rewards/chosen": -0.2733280658721924, "rewards/margins": 0.02636851742863655, "rewards/rejected": -0.29969659447669983, "step": 698 }, { "epoch": 1.913757700205339, "grad_norm": 5.707128047943115, "learning_rate": 9.043835616438355e-07, "log_odds_chosen": 0.4480306804180145, "log_odds_ratio": -0.7713850736618042, "logits/chosen": -0.021557297557592392, "logits/rejected": -0.0515114888548851, "logps/chosen": -3.285853624343872, "logps/rejected": -3.7194502353668213, "loss": 1.4658, "nll_loss": 1.3887054920196533, "rewards/accuracies": 0.375, "rewards/chosen": -0.3285853862762451, "rewards/margins": 0.04335964471101761, "rewards/rejected": -0.37194502353668213, "step": 699 }, { "epoch": 1.916495550992471, "grad_norm": 5.781423091888428, "learning_rate": 9.042465753424657e-07, "log_odds_chosen": -0.154222771525383, "log_odds_ratio": -0.86244136095047, "logits/chosen": -0.023783594369888306, "logits/rejected": 0.13482746481895447, "logps/chosen": -3.059206962585449, "logps/rejected": -2.9323158264160156, "loss": 1.3747, "nll_loss": 1.288415551185608, "rewards/accuracies": 0.375, "rewards/chosen": -0.30592072010040283, "rewards/margins": -0.01268911361694336, "rewards/rejected": -0.2932316064834595, "step": 700 }, { "epoch": 1.919233401779603, "grad_norm": 5.191410064697266, "learning_rate": 9.041095890410958e-07, "log_odds_chosen": 0.05883319675922394, "log_odds_ratio": -0.6841819882392883, "logits/chosen": 0.05130798742175102, "logits/rejected": 0.006596057675778866, "logps/chosen": -2.932727098464966, "logps/rejected": -3.0018670558929443, "loss": 1.5087, "nll_loss": 1.4402399063110352, "rewards/accuracies": 0.625, "rewards/chosen": -0.2932727038860321, "rewards/margins": 0.006913982331752777, "rewards/rejected": -0.3001866936683655, "step": 701 }, { "epoch": 1.9219712525667352, "grad_norm": 5.428308963775635, "learning_rate": 9.03972602739726e-07, "log_odds_chosen": -0.5888969898223877, "log_odds_ratio": -1.2365148067474365, "logits/chosen": -0.14065206050872803, "logits/rejected": -0.1350269615650177, "logps/chosen": -2.8180952072143555, "logps/rejected": -2.2225208282470703, "loss": 1.4667, "nll_loss": 1.3430548906326294, "rewards/accuracies": 0.5, "rewards/chosen": -0.2818095088005066, "rewards/margins": -0.05955743044614792, "rewards/rejected": -0.22225208580493927, "step": 702 }, { "epoch": 1.9247091033538672, "grad_norm": 5.27585506439209, "learning_rate": 9.038356164383561e-07, "log_odds_chosen": 0.11716128140687943, "log_odds_ratio": -0.6789137721061707, "logits/chosen": 0.12455764412879944, "logits/rejected": 0.07678598165512085, "logps/chosen": -2.8438608646392822, "logps/rejected": -2.9388766288757324, "loss": 1.3769, "nll_loss": 1.3089745044708252, "rewards/accuracies": 0.625, "rewards/chosen": -0.2843860685825348, "rewards/margins": 0.009501596912741661, "rewards/rejected": -0.2938876748085022, "step": 703 }, { "epoch": 1.9274469541409993, "grad_norm": 5.429149627685547, "learning_rate": 9.036986301369862e-07, "log_odds_chosen": 0.31716102361679077, "log_odds_ratio": -0.8243668079376221, "logits/chosen": -0.04598249867558479, "logits/rejected": -0.10112249851226807, "logps/chosen": -3.207644462585449, "logps/rejected": -3.4685232639312744, "loss": 1.5083, "nll_loss": 1.4258219003677368, "rewards/accuracies": 0.375, "rewards/chosen": -0.3207644820213318, "rewards/margins": 0.02608785778284073, "rewards/rejected": -0.3468523323535919, "step": 704 }, { "epoch": 1.9301848049281314, "grad_norm": 5.633625030517578, "learning_rate": 9.035616438356164e-07, "log_odds_chosen": -0.10992775857448578, "log_odds_ratio": -0.8535380363464355, "logits/chosen": 0.07904985547065735, "logits/rejected": 0.04553481191396713, "logps/chosen": -2.6678504943847656, "logps/rejected": -2.5109448432922363, "loss": 1.3469, "nll_loss": 1.261580467224121, "rewards/accuracies": 0.5, "rewards/chosen": -0.2667850852012634, "rewards/margins": -0.015690581873059273, "rewards/rejected": -0.2510944902896881, "step": 705 }, { "epoch": 1.9329226557152634, "grad_norm": 5.103896617889404, "learning_rate": 9.034246575342465e-07, "log_odds_chosen": 0.2787693738937378, "log_odds_ratio": -0.6108972430229187, "logits/chosen": 0.06949642300605774, "logits/rejected": -0.05782187730073929, "logps/chosen": -2.599191665649414, "logps/rejected": -2.8290345668792725, "loss": 1.4882, "nll_loss": 1.4270678758621216, "rewards/accuracies": 0.625, "rewards/chosen": -0.2599191665649414, "rewards/margins": 0.022984279319643974, "rewards/rejected": -0.2829034626483917, "step": 706 }, { "epoch": 1.9356605065023955, "grad_norm": 4.954079627990723, "learning_rate": 9.032876712328766e-07, "log_odds_chosen": 0.5931088924407959, "log_odds_ratio": -0.5001386404037476, "logits/chosen": 0.1574324518442154, "logits/rejected": 0.11415641754865646, "logps/chosen": -2.2653000354766846, "logps/rejected": -2.7948222160339355, "loss": 1.3342, "nll_loss": 1.2842158079147339, "rewards/accuracies": 0.75, "rewards/chosen": -0.22652998566627502, "rewards/margins": 0.052952226251363754, "rewards/rejected": -0.2794822156429291, "step": 707 }, { "epoch": 1.9383983572895276, "grad_norm": 5.139930725097656, "learning_rate": 9.031506849315068e-07, "log_odds_chosen": -0.09868277609348297, "log_odds_ratio": -0.8648868203163147, "logits/chosen": -0.07960756868124008, "logits/rejected": -0.052679553627967834, "logps/chosen": -2.807288885116577, "logps/rejected": -2.6544837951660156, "loss": 1.3943, "nll_loss": 1.3077625036239624, "rewards/accuracies": 0.5, "rewards/chosen": -0.28072887659072876, "rewards/margins": -0.015280498191714287, "rewards/rejected": -0.26544836163520813, "step": 708 }, { "epoch": 1.9411362080766599, "grad_norm": 5.526027202606201, "learning_rate": 9.03013698630137e-07, "log_odds_chosen": 0.2777063846588135, "log_odds_ratio": -0.7096993923187256, "logits/chosen": 0.12888634204864502, "logits/rejected": 0.16947682201862335, "logps/chosen": -3.0711050033569336, "logps/rejected": -3.341740608215332, "loss": 1.3669, "nll_loss": 1.295935869216919, "rewards/accuracies": 0.625, "rewards/chosen": -0.3071104884147644, "rewards/margins": 0.027063589543104172, "rewards/rejected": -0.3341740667819977, "step": 709 }, { "epoch": 1.943874058863792, "grad_norm": 5.009483337402344, "learning_rate": 9.02876712328767e-07, "log_odds_chosen": 0.040998674929142, "log_odds_ratio": -0.7559510469436646, "logits/chosen": 0.1451537311077118, "logits/rejected": 0.12054234743118286, "logps/chosen": -2.5787696838378906, "logps/rejected": -2.644221782684326, "loss": 1.5026, "nll_loss": 1.427046537399292, "rewards/accuracies": 0.5, "rewards/chosen": -0.2578769624233246, "rewards/margins": 0.006545182317495346, "rewards/rejected": -0.26442214846611023, "step": 710 }, { "epoch": 1.946611909650924, "grad_norm": 4.665572166442871, "learning_rate": 9.027397260273972e-07, "log_odds_chosen": 0.1344120353460312, "log_odds_ratio": -0.6883367300033569, "logits/chosen": 0.2535417079925537, "logits/rejected": 0.23883871734142303, "logps/chosen": -2.1757614612579346, "logps/rejected": -2.314286231994629, "loss": 1.4746, "nll_loss": 1.4057214260101318, "rewards/accuracies": 0.75, "rewards/chosen": -0.21757617592811584, "rewards/margins": 0.013852465897798538, "rewards/rejected": -0.2314286231994629, "step": 711 }, { "epoch": 1.949349760438056, "grad_norm": 5.493515491485596, "learning_rate": 9.026027397260274e-07, "log_odds_chosen": 0.15460321307182312, "log_odds_ratio": -0.7468448877334595, "logits/chosen": 0.28676488995552063, "logits/rejected": 0.31733205914497375, "logps/chosen": -2.8361728191375732, "logps/rejected": -2.991764545440674, "loss": 1.2697, "nll_loss": 1.1950604915618896, "rewards/accuracies": 0.5, "rewards/chosen": -0.2836172878742218, "rewards/margins": 0.015559162944555283, "rewards/rejected": -0.2991764545440674, "step": 712 }, { "epoch": 1.9520876112251884, "grad_norm": 5.831904411315918, "learning_rate": 9.024657534246574e-07, "log_odds_chosen": 0.21369953453540802, "log_odds_ratio": -0.7570252418518066, "logits/chosen": 0.010992461815476418, "logits/rejected": 0.08986039459705353, "logps/chosen": -2.92350172996521, "logps/rejected": -3.0593206882476807, "loss": 1.2857, "nll_loss": 1.2100131511688232, "rewards/accuracies": 0.625, "rewards/chosen": -0.292350172996521, "rewards/margins": 0.01358192041516304, "rewards/rejected": -0.30593210458755493, "step": 713 }, { "epoch": 1.9548254620123204, "grad_norm": 5.453153133392334, "learning_rate": 9.023287671232876e-07, "log_odds_chosen": -0.17374679446220398, "log_odds_ratio": -0.860346794128418, "logits/chosen": 0.1793844848871231, "logits/rejected": 0.20531633496284485, "logps/chosen": -3.0872302055358887, "logps/rejected": -2.927117347717285, "loss": 1.4324, "nll_loss": 1.3463839292526245, "rewards/accuracies": 0.5, "rewards/chosen": -0.3087230324745178, "rewards/margins": -0.016011280938982964, "rewards/rejected": -0.2927117347717285, "step": 714 }, { "epoch": 1.9575633127994525, "grad_norm": 6.367825031280518, "learning_rate": 9.021917808219177e-07, "log_odds_chosen": 0.13886205852031708, "log_odds_ratio": -0.8062055706977844, "logits/chosen": 0.23236291110515594, "logits/rejected": 0.32395386695861816, "logps/chosen": -3.7972183227539062, "logps/rejected": -3.9340062141418457, "loss": 1.2996, "nll_loss": 1.2189728021621704, "rewards/accuracies": 0.5, "rewards/chosen": -0.37972187995910645, "rewards/margins": 0.013678746297955513, "rewards/rejected": -0.3934006094932556, "step": 715 }, { "epoch": 1.9603011635865846, "grad_norm": 5.116738319396973, "learning_rate": 9.020547945205479e-07, "log_odds_chosen": 1.2709922790527344, "log_odds_ratio": -0.5237890481948853, "logits/chosen": -0.057327352464199066, "logits/rejected": -0.06993715465068817, "logps/chosen": -2.832953453063965, "logps/rejected": -4.057226181030273, "loss": 1.453, "nll_loss": 1.4005920886993408, "rewards/accuracies": 0.875, "rewards/chosen": -0.2832953631877899, "rewards/margins": 0.12242724746465683, "rewards/rejected": -0.40572261810302734, "step": 716 }, { "epoch": 1.9630390143737166, "grad_norm": 5.751468658447266, "learning_rate": 9.01917808219178e-07, "log_odds_chosen": 0.48223990201950073, "log_odds_ratio": -0.5135727524757385, "logits/chosen": 0.09022780507802963, "logits/rejected": 0.1299647092819214, "logps/chosen": -2.9113645553588867, "logps/rejected": -3.366241455078125, "loss": 1.2357, "nll_loss": 1.1843072175979614, "rewards/accuracies": 0.875, "rewards/chosen": -0.2911364436149597, "rewards/margins": 0.04548773169517517, "rewards/rejected": -0.3366241455078125, "step": 717 }, { "epoch": 1.9657768651608487, "grad_norm": 5.64945125579834, "learning_rate": 9.017808219178081e-07, "log_odds_chosen": 0.0662359893321991, "log_odds_ratio": -0.7005355358123779, "logits/chosen": 0.018241895362734795, "logits/rejected": 0.09684480726718903, "logps/chosen": -2.711322784423828, "logps/rejected": -2.760908603668213, "loss": 1.4027, "nll_loss": 1.3326865434646606, "rewards/accuracies": 0.625, "rewards/chosen": -0.2711322605609894, "rewards/margins": 0.004958587698638439, "rewards/rejected": -0.2760908603668213, "step": 718 }, { "epoch": 1.9685147159479808, "grad_norm": 5.352878093719482, "learning_rate": 9.016438356164383e-07, "log_odds_chosen": 0.716213583946228, "log_odds_ratio": -0.43918752670288086, "logits/chosen": 0.017017507925629616, "logits/rejected": 0.07751792669296265, "logps/chosen": -2.428471565246582, "logps/rejected": -3.0787200927734375, "loss": 1.2966, "nll_loss": 1.252656102180481, "rewards/accuracies": 0.875, "rewards/chosen": -0.24284714460372925, "rewards/margins": 0.06502486020326614, "rewards/rejected": -0.3078719973564148, "step": 719 }, { "epoch": 1.9712525667351128, "grad_norm": 4.959578990936279, "learning_rate": 9.015068493150684e-07, "log_odds_chosen": 0.7564728856086731, "log_odds_ratio": -0.49339744448661804, "logits/chosen": -0.06984781473875046, "logits/rejected": -0.061174530535936356, "logps/chosen": -2.610013961791992, "logps/rejected": -3.3174314498901367, "loss": 1.3974, "nll_loss": 1.3480935096740723, "rewards/accuracies": 0.875, "rewards/chosen": -0.2610014081001282, "rewards/margins": 0.07074173539876938, "rewards/rejected": -0.33174315094947815, "step": 720 }, { "epoch": 1.973990417522245, "grad_norm": 6.843796730041504, "learning_rate": 9.013698630136985e-07, "log_odds_chosen": -0.5916363000869751, "log_odds_ratio": -1.199575662612915, "logits/chosen": 0.10227596014738083, "logits/rejected": 0.22869256138801575, "logps/chosen": -3.7110397815704346, "logps/rejected": -3.121666669845581, "loss": 1.3416, "nll_loss": 1.2216343879699707, "rewards/accuracies": 0.25, "rewards/chosen": -0.371103972196579, "rewards/margins": -0.058937303721904755, "rewards/rejected": -0.312166690826416, "step": 721 }, { "epoch": 1.976728268309377, "grad_norm": 4.561530590057373, "learning_rate": 9.012328767123287e-07, "log_odds_chosen": 0.331025630235672, "log_odds_ratio": -0.6234476566314697, "logits/chosen": 0.09764464944601059, "logits/rejected": -0.015529446303844452, "logps/chosen": -1.9922713041305542, "logps/rejected": -2.275864601135254, "loss": 1.3572, "nll_loss": 1.2948906421661377, "rewards/accuracies": 0.75, "rewards/chosen": -0.19922712445259094, "rewards/margins": 0.02835935354232788, "rewards/rejected": -0.22758647799491882, "step": 722 }, { "epoch": 1.9794661190965093, "grad_norm": 6.0921630859375, "learning_rate": 9.01095890410959e-07, "log_odds_chosen": -0.26984599232673645, "log_odds_ratio": -1.1493384838104248, "logits/chosen": 0.014323432929813862, "logits/rejected": 0.02036524936556816, "logps/chosen": -3.5398683547973633, "logps/rejected": -3.2465991973876953, "loss": 1.3774, "nll_loss": 1.2624733448028564, "rewards/accuracies": 0.375, "rewards/chosen": -0.35398685932159424, "rewards/margins": -0.029326919466257095, "rewards/rejected": -0.32465994358062744, "step": 723 }, { "epoch": 1.9822039698836413, "grad_norm": 5.651134490966797, "learning_rate": 9.00958904109589e-07, "log_odds_chosen": -0.11236253380775452, "log_odds_ratio": -0.8487135767936707, "logits/chosen": -0.04535786435008049, "logits/rejected": 0.00556109519675374, "logps/chosen": -3.075338840484619, "logps/rejected": -2.9639134407043457, "loss": 1.3754, "nll_loss": 1.2905514240264893, "rewards/accuracies": 0.625, "rewards/chosen": -0.3075338900089264, "rewards/margins": -0.011142509058117867, "rewards/rejected": -0.2963913679122925, "step": 724 }, { "epoch": 1.9849418206707734, "grad_norm": 5.259877681732178, "learning_rate": 9.008219178082192e-07, "log_odds_chosen": -0.047013916075229645, "log_odds_ratio": -0.7948074340820312, "logits/chosen": 0.08526169508695602, "logits/rejected": 0.1438683420419693, "logps/chosen": -2.5833301544189453, "logps/rejected": -2.522395133972168, "loss": 1.3114, "nll_loss": 1.231959581375122, "rewards/accuracies": 0.625, "rewards/chosen": -0.2583330273628235, "rewards/margins": -0.006093526259064674, "rewards/rejected": -0.25223949551582336, "step": 725 }, { "epoch": 1.9876796714579057, "grad_norm": 5.279478073120117, "learning_rate": 9.006849315068494e-07, "log_odds_chosen": 0.23362916707992554, "log_odds_ratio": -0.7921596169471741, "logits/chosen": 0.04760390520095825, "logits/rejected": -0.010610546916723251, "logps/chosen": -2.788951873779297, "logps/rejected": -3.0158028602600098, "loss": 1.3653, "nll_loss": 1.2861254215240479, "rewards/accuracies": 0.5, "rewards/chosen": -0.27889519929885864, "rewards/margins": 0.022685080766677856, "rewards/rejected": -0.3015803098678589, "step": 726 }, { "epoch": 1.9904175222450378, "grad_norm": 6.241796493530273, "learning_rate": 9.005479452054794e-07, "log_odds_chosen": 0.09953173995018005, "log_odds_ratio": -0.7816530466079712, "logits/chosen": 0.11804452538490295, "logits/rejected": 0.12489339709281921, "logps/chosen": -3.2249464988708496, "logps/rejected": -3.2978763580322266, "loss": 1.2515, "nll_loss": 1.1732988357543945, "rewards/accuracies": 0.5, "rewards/chosen": -0.32249465584754944, "rewards/margins": 0.007293010130524635, "rewards/rejected": -0.3297876715660095, "step": 727 }, { "epoch": 1.9931553730321698, "grad_norm": 4.682016849517822, "learning_rate": 9.004109589041096e-07, "log_odds_chosen": 0.4860062599182129, "log_odds_ratio": -0.5105868577957153, "logits/chosen": -0.0012889280915260315, "logits/rejected": -0.024035386741161346, "logps/chosen": -2.3320631980895996, "logps/rejected": -2.789804220199585, "loss": 1.3356, "nll_loss": 1.2845706939697266, "rewards/accuracies": 0.875, "rewards/chosen": -0.23320633172988892, "rewards/margins": 0.04577409848570824, "rewards/rejected": -0.27898043394088745, "step": 728 }, { "epoch": 1.995893223819302, "grad_norm": 4.970042705535889, "learning_rate": 9.002739726027398e-07, "log_odds_chosen": -0.09150045365095139, "log_odds_ratio": -0.792113184928894, "logits/chosen": -0.0036946851760149, "logits/rejected": -0.046423882246017456, "logps/chosen": -2.4002981185913086, "logps/rejected": -2.3226230144500732, "loss": 1.4113, "nll_loss": 1.3320897817611694, "rewards/accuracies": 0.5, "rewards/chosen": -0.24002978205680847, "rewards/margins": -0.007767495699226856, "rewards/rejected": -0.2322622835636139, "step": 729 }, { "epoch": 1.998631074606434, "grad_norm": 6.299317359924316, "learning_rate": 9.001369863013698e-07, "log_odds_chosen": 0.44303321838378906, "log_odds_ratio": -0.9957646727561951, "logits/chosen": 0.03044775500893593, "logits/rejected": 0.030638404190540314, "logps/chosen": -3.4656105041503906, "logps/rejected": -3.8642399311065674, "loss": 1.3907, "nll_loss": 1.2911337614059448, "rewards/accuracies": 0.375, "rewards/chosen": -0.346561074256897, "rewards/margins": 0.03986295312643051, "rewards/rejected": -0.3864240050315857, "step": 730 }, { "epoch": 2.001368925393566, "grad_norm": 5.283613681793213, "learning_rate": 9e-07, "log_odds_chosen": 0.43056368827819824, "log_odds_ratio": -0.8053244948387146, "logits/chosen": 0.1233825534582138, "logits/rejected": 0.16076429188251495, "logps/chosen": -3.3823485374450684, "logps/rejected": -3.8040988445281982, "loss": 1.3758, "nll_loss": 1.2953163385391235, "rewards/accuracies": 0.625, "rewards/chosen": -0.3382348418235779, "rewards/margins": 0.0421750582754612, "rewards/rejected": -0.3804098963737488, "step": 731 }, { "epoch": 2.004106776180698, "grad_norm": 6.495110034942627, "learning_rate": 8.998630136986301e-07, "log_odds_chosen": 0.3320707678794861, "log_odds_ratio": -0.6577354669570923, "logits/chosen": 0.14688736200332642, "logits/rejected": 0.17339760065078735, "logps/chosen": -3.173590660095215, "logps/rejected": -3.4716854095458984, "loss": 1.2723, "nll_loss": 1.2065684795379639, "rewards/accuracies": 0.75, "rewards/chosen": -0.3173590898513794, "rewards/margins": 0.029809467494487762, "rewards/rejected": -0.34716853499412537, "step": 732 }, { "epoch": 2.00684462696783, "grad_norm": 5.66990852355957, "learning_rate": 8.997260273972603e-07, "log_odds_chosen": 0.20701278746128082, "log_odds_ratio": -0.6786509156227112, "logits/chosen": 0.08212488889694214, "logits/rejected": 0.15510308742523193, "logps/chosen": -3.0821406841278076, "logps/rejected": -3.264516830444336, "loss": 1.2457, "nll_loss": 1.1778582334518433, "rewards/accuracies": 0.625, "rewards/chosen": -0.30821406841278076, "rewards/margins": 0.01823764108121395, "rewards/rejected": -0.32645171880722046, "step": 733 }, { "epoch": 2.0095824777549622, "grad_norm": 4.821579456329346, "learning_rate": 8.995890410958904e-07, "log_odds_chosen": 1.2003228664398193, "log_odds_ratio": -0.54002845287323, "logits/chosen": 0.16695694625377655, "logits/rejected": 0.13439856469631195, "logps/chosen": -2.3265793323516846, "logps/rejected": -3.476959705352783, "loss": 1.3004, "nll_loss": 1.2463959455490112, "rewards/accuracies": 0.875, "rewards/chosen": -0.23265792429447174, "rewards/margins": 0.11503802984952927, "rewards/rejected": -0.3476959466934204, "step": 734 }, { "epoch": 2.0123203285420943, "grad_norm": 5.6029462814331055, "learning_rate": 8.994520547945205e-07, "log_odds_chosen": -0.13149911165237427, "log_odds_ratio": -0.8775892853736877, "logits/chosen": 0.15991425514221191, "logits/rejected": 0.10891716182231903, "logps/chosen": -3.008467435836792, "logps/rejected": -2.8568365573883057, "loss": 1.2892, "nll_loss": 1.2014837265014648, "rewards/accuracies": 0.5, "rewards/chosen": -0.30084675550460815, "rewards/margins": -0.015163097530603409, "rewards/rejected": -0.28568363189697266, "step": 735 }, { "epoch": 2.0150581793292264, "grad_norm": 6.033130168914795, "learning_rate": 8.993150684931507e-07, "log_odds_chosen": 0.0541347861289978, "log_odds_ratio": -0.8157953023910522, "logits/chosen": 0.15234798192977905, "logits/rejected": 0.21986165642738342, "logps/chosen": -2.840641736984253, "logps/rejected": -2.8197436332702637, "loss": 1.3064, "nll_loss": 1.2248612642288208, "rewards/accuracies": 0.75, "rewards/chosen": -0.2840641736984253, "rewards/margins": -0.0020898208022117615, "rewards/rejected": -0.2819743752479553, "step": 736 }, { "epoch": 2.0177960301163584, "grad_norm": 4.557549476623535, "learning_rate": 8.991780821917808e-07, "log_odds_chosen": 0.6827643513679504, "log_odds_ratio": -0.5105754137039185, "logits/chosen": 0.18229086697101593, "logits/rejected": 0.11982382833957672, "logps/chosen": -1.9933099746704102, "logps/rejected": -2.630342483520508, "loss": 1.3665, "nll_loss": 1.3154534101486206, "rewards/accuracies": 0.75, "rewards/chosen": -0.19933100044727325, "rewards/margins": 0.06370324641466141, "rewards/rejected": -0.26303425431251526, "step": 737 }, { "epoch": 2.020533880903491, "grad_norm": 4.919419288635254, "learning_rate": 8.990410958904109e-07, "log_odds_chosen": 0.07590354979038239, "log_odds_ratio": -0.6930575370788574, "logits/chosen": 0.11989861726760864, "logits/rejected": 0.10893838852643967, "logps/chosen": -2.185469388961792, "logps/rejected": -2.27152419090271, "loss": 1.3071, "nll_loss": 1.2377715110778809, "rewards/accuracies": 0.375, "rewards/chosen": -0.21854692697525024, "rewards/margins": 0.008605491369962692, "rewards/rejected": -0.22715240716934204, "step": 738 }, { "epoch": 2.023271731690623, "grad_norm": 5.944755554199219, "learning_rate": 8.989041095890411e-07, "log_odds_chosen": -0.3465481400489807, "log_odds_ratio": -1.1728792190551758, "logits/chosen": 0.17242610454559326, "logits/rejected": 0.1754530966281891, "logps/chosen": -3.385354995727539, "logps/rejected": -3.0039308071136475, "loss": 1.4068, "nll_loss": 1.2894644737243652, "rewards/accuracies": 0.75, "rewards/chosen": -0.33853551745414734, "rewards/margins": -0.03814244270324707, "rewards/rejected": -0.30039307475090027, "step": 739 }, { "epoch": 2.026009582477755, "grad_norm": 5.351871013641357, "learning_rate": 8.987671232876713e-07, "log_odds_chosen": 0.37228846549987793, "log_odds_ratio": -0.589410126209259, "logits/chosen": 0.06563717126846313, "logits/rejected": 0.09139084815979004, "logps/chosen": -3.070939064025879, "logps/rejected": -3.427781581878662, "loss": 1.2826, "nll_loss": 1.2236794233322144, "rewards/accuracies": 0.75, "rewards/chosen": -0.30709391832351685, "rewards/margins": 0.0356842540204525, "rewards/rejected": -0.34277817606925964, "step": 740 }, { "epoch": 2.028747433264887, "grad_norm": 5.177361011505127, "learning_rate": 8.986301369863013e-07, "log_odds_chosen": 0.2198687195777893, "log_odds_ratio": -0.8263553977012634, "logits/chosen": 0.2574143707752228, "logits/rejected": 0.1972324252128601, "logps/chosen": -2.7890236377716064, "logps/rejected": -2.9709219932556152, "loss": 1.2134, "nll_loss": 1.13076913356781, "rewards/accuracies": 0.875, "rewards/chosen": -0.2789023816585541, "rewards/margins": 0.018189843744039536, "rewards/rejected": -0.2970921993255615, "step": 741 }, { "epoch": 2.0314852840520192, "grad_norm": 4.889588356018066, "learning_rate": 8.984931506849315e-07, "log_odds_chosen": 0.6201960444450378, "log_odds_ratio": -0.6362556219100952, "logits/chosen": -0.018340758979320526, "logits/rejected": -0.032712820917367935, "logps/chosen": -2.939131498336792, "logps/rejected": -3.509791612625122, "loss": 1.3421, "nll_loss": 1.2785131931304932, "rewards/accuracies": 0.625, "rewards/chosen": -0.2939131259918213, "rewards/margins": 0.057065993547439575, "rewards/rejected": -0.35097914934158325, "step": 742 }, { "epoch": 2.0342231348391513, "grad_norm": 5.225351333618164, "learning_rate": 8.983561643835617e-07, "log_odds_chosen": -0.27584248781204224, "log_odds_ratio": -0.983191728591919, "logits/chosen": 0.13281700015068054, "logits/rejected": 0.1608809381723404, "logps/chosen": -3.0761899948120117, "logps/rejected": -2.7889342308044434, "loss": 1.2965, "nll_loss": 1.1981585025787354, "rewards/accuracies": 0.5, "rewards/chosen": -0.30761897563934326, "rewards/margins": -0.028725571930408478, "rewards/rejected": -0.2788934111595154, "step": 743 }, { "epoch": 2.0369609856262834, "grad_norm": 4.931036472320557, "learning_rate": 8.982191780821917e-07, "log_odds_chosen": -0.30404919385910034, "log_odds_ratio": -1.022444486618042, "logits/chosen": 0.23019695281982422, "logits/rejected": 0.2095191925764084, "logps/chosen": -2.305319309234619, "logps/rejected": -1.9880481958389282, "loss": 1.4774, "nll_loss": 1.3751254081726074, "rewards/accuracies": 0.625, "rewards/chosen": -0.2305319458246231, "rewards/margins": -0.03172711655497551, "rewards/rejected": -0.1988048255443573, "step": 744 }, { "epoch": 2.0396988364134154, "grad_norm": 4.393705368041992, "learning_rate": 8.980821917808219e-07, "log_odds_chosen": 0.732088565826416, "log_odds_ratio": -0.4303528070449829, "logits/chosen": 0.2735850512981415, "logits/rejected": 0.17223931849002838, "logps/chosen": -2.3306238651275635, "logps/rejected": -3.0241103172302246, "loss": 1.3316, "nll_loss": 1.2885501384735107, "rewards/accuracies": 0.875, "rewards/chosen": -0.23306238651275635, "rewards/margins": 0.06934864819049835, "rewards/rejected": -0.3024110198020935, "step": 745 }, { "epoch": 2.0424366872005475, "grad_norm": 6.294990062713623, "learning_rate": 8.97945205479452e-07, "log_odds_chosen": -0.42591744661331177, "log_odds_ratio": -1.0400059223175049, "logits/chosen": 0.19007955491542816, "logits/rejected": 0.24285799264907837, "logps/chosen": -3.646456003189087, "logps/rejected": -3.2299306392669678, "loss": 1.2359, "nll_loss": 1.1318929195404053, "rewards/accuracies": 0.125, "rewards/chosen": -0.3646456003189087, "rewards/margins": -0.04165252670645714, "rewards/rejected": -0.32299306988716125, "step": 746 }, { "epoch": 2.0451745379876796, "grad_norm": 4.532721996307373, "learning_rate": 8.978082191780822e-07, "log_odds_chosen": 0.21949028968811035, "log_odds_ratio": -0.629106342792511, "logits/chosen": 0.09157636761665344, "logits/rejected": 0.0779542326927185, "logps/chosen": -2.390249252319336, "logps/rejected": -2.580483913421631, "loss": 1.2385, "nll_loss": 1.1756136417388916, "rewards/accuracies": 0.625, "rewards/chosen": -0.23902490735054016, "rewards/margins": 0.019023466855287552, "rewards/rejected": -0.2580483853816986, "step": 747 }, { "epoch": 2.0479123887748116, "grad_norm": 4.414247035980225, "learning_rate": 8.976712328767123e-07, "log_odds_chosen": 0.5414022207260132, "log_odds_ratio": -0.5894378423690796, "logits/chosen": 0.06629331409931183, "logits/rejected": 0.03270904719829559, "logps/chosen": -2.337780714035034, "logps/rejected": -2.807697296142578, "loss": 1.3716, "nll_loss": 1.3126802444458008, "rewards/accuracies": 0.75, "rewards/chosen": -0.23377807438373566, "rewards/margins": 0.04699166864156723, "rewards/rejected": -0.2807697355747223, "step": 748 }, { "epoch": 2.0506502395619437, "grad_norm": 6.41048002243042, "learning_rate": 8.975342465753424e-07, "log_odds_chosen": -0.16883055865764618, "log_odds_ratio": -0.967837929725647, "logits/chosen": 0.13504107296466827, "logits/rejected": 0.1572374701499939, "logps/chosen": -3.9118599891662598, "logps/rejected": -3.7395858764648438, "loss": 1.2564, "nll_loss": 1.1595709323883057, "rewards/accuracies": 0.375, "rewards/chosen": -0.391185998916626, "rewards/margins": -0.01722744293510914, "rewards/rejected": -0.3739585876464844, "step": 749 }, { "epoch": 2.0533880903490758, "grad_norm": 5.133131504058838, "learning_rate": 8.973972602739726e-07, "log_odds_chosen": -0.37966591119766235, "log_odds_ratio": -1.0743495225906372, "logits/chosen": 0.1705031543970108, "logits/rejected": 0.2352445423603058, "logps/chosen": -2.8758997917175293, "logps/rejected": -2.502350091934204, "loss": 1.3306, "nll_loss": 1.2231404781341553, "rewards/accuracies": 0.375, "rewards/chosen": -0.287589967250824, "rewards/margins": -0.037354957312345505, "rewards/rejected": -0.25023502111434937, "step": 750 }, { "epoch": 2.0561259411362083, "grad_norm": 4.424262046813965, "learning_rate": 8.972602739726027e-07, "log_odds_chosen": 0.35844409465789795, "log_odds_ratio": -0.5818377733230591, "logits/chosen": 0.20401808619499207, "logits/rejected": 0.1569913923740387, "logps/chosen": -2.534381151199341, "logps/rejected": -2.8861210346221924, "loss": 1.3539, "nll_loss": 1.29574716091156, "rewards/accuracies": 0.5, "rewards/chosen": -0.25343814492225647, "rewards/margins": 0.035173967480659485, "rewards/rejected": -0.28861212730407715, "step": 751 }, { "epoch": 2.0588637919233403, "grad_norm": 4.629861354827881, "learning_rate": 8.971232876712328e-07, "log_odds_chosen": 0.15983566641807556, "log_odds_ratio": -0.7787998914718628, "logits/chosen": 0.2890493869781494, "logits/rejected": 0.24481432139873505, "logps/chosen": -2.672872304916382, "logps/rejected": -2.7837424278259277, "loss": 1.3866, "nll_loss": 1.3087210655212402, "rewards/accuracies": 0.75, "rewards/chosen": -0.2672872245311737, "rewards/margins": 0.011087022721767426, "rewards/rejected": -0.27837425470352173, "step": 752 }, { "epoch": 2.0616016427104724, "grad_norm": 5.3198628425598145, "learning_rate": 8.96986301369863e-07, "log_odds_chosen": -0.2301061600446701, "log_odds_ratio": -0.8683526515960693, "logits/chosen": 0.04299547150731087, "logits/rejected": 0.12470471858978271, "logps/chosen": -3.0358595848083496, "logps/rejected": -2.8172385692596436, "loss": 1.2753, "nll_loss": 1.188460111618042, "rewards/accuracies": 0.5, "rewards/chosen": -0.303585946559906, "rewards/margins": -0.021862123161554337, "rewards/rejected": -0.28172385692596436, "step": 753 }, { "epoch": 2.0643394934976045, "grad_norm": 4.86634635925293, "learning_rate": 8.968493150684932e-07, "log_odds_chosen": 0.5779427886009216, "log_odds_ratio": -0.5856115818023682, "logits/chosen": 0.09867766499519348, "logits/rejected": 0.07674181461334229, "logps/chosen": -2.6936874389648438, "logps/rejected": -3.236093044281006, "loss": 1.2788, "nll_loss": 1.2201919555664062, "rewards/accuracies": 0.625, "rewards/chosen": -0.2693687677383423, "rewards/margins": 0.054240547120571136, "rewards/rejected": -0.323609322309494, "step": 754 }, { "epoch": 2.0670773442847366, "grad_norm": 4.981109619140625, "learning_rate": 8.967123287671232e-07, "log_odds_chosen": 0.3680479824542999, "log_odds_ratio": -0.7387081384658813, "logits/chosen": 0.1894269734621048, "logits/rejected": 0.17601843178272247, "logps/chosen": -2.8597183227539062, "logps/rejected": -3.1725080013275146, "loss": 1.2264, "nll_loss": 1.1525769233703613, "rewards/accuracies": 0.75, "rewards/chosen": -0.28597182035446167, "rewards/margins": 0.03127897158265114, "rewards/rejected": -0.3172507882118225, "step": 755 }, { "epoch": 2.0698151950718686, "grad_norm": 5.434076309204102, "learning_rate": 8.965753424657534e-07, "log_odds_chosen": -0.42769837379455566, "log_odds_ratio": -1.0008834600448608, "logits/chosen": 0.24655166268348694, "logits/rejected": 0.25035426020622253, "logps/chosen": -3.331869602203369, "logps/rejected": -2.939735174179077, "loss": 1.3261, "nll_loss": 1.2259647846221924, "rewards/accuracies": 0.25, "rewards/chosen": -0.3331869840621948, "rewards/margins": -0.039213452488183975, "rewards/rejected": -0.29397350549697876, "step": 756 }, { "epoch": 2.0725530458590007, "grad_norm": 5.660238265991211, "learning_rate": 8.964383561643836e-07, "log_odds_chosen": 0.2670910358428955, "log_odds_ratio": -0.7278347015380859, "logits/chosen": 0.0710853785276413, "logits/rejected": 0.1044294685125351, "logps/chosen": -3.8083136081695557, "logps/rejected": -4.052874565124512, "loss": 1.3532, "nll_loss": 1.2803733348846436, "rewards/accuracies": 0.75, "rewards/chosen": -0.38083136081695557, "rewards/margins": 0.024456094950437546, "rewards/rejected": -0.4052874445915222, "step": 757 }, { "epoch": 2.0752908966461328, "grad_norm": 5.994651794433594, "learning_rate": 8.963013698630136e-07, "log_odds_chosen": 0.5834388732910156, "log_odds_ratio": -0.6090536117553711, "logits/chosen": 0.21377503871917725, "logits/rejected": 0.3100004196166992, "logps/chosen": -2.945065498352051, "logps/rejected": -3.4935028553009033, "loss": 1.1668, "nll_loss": 1.105944275856018, "rewards/accuracies": 0.5, "rewards/chosen": -0.2945065200328827, "rewards/margins": 0.054843753576278687, "rewards/rejected": -0.3493502736091614, "step": 758 }, { "epoch": 2.078028747433265, "grad_norm": 5.019863605499268, "learning_rate": 8.961643835616438e-07, "log_odds_chosen": 0.9702685475349426, "log_odds_ratio": -0.4555622637271881, "logits/chosen": 0.28344303369522095, "logits/rejected": 0.2753596901893616, "logps/chosen": -2.5034844875335693, "logps/rejected": -3.405777931213379, "loss": 1.136, "nll_loss": 1.0904383659362793, "rewards/accuracies": 0.875, "rewards/chosen": -0.25034844875335693, "rewards/margins": 0.0902293398976326, "rewards/rejected": -0.3405778110027313, "step": 759 }, { "epoch": 2.080766598220397, "grad_norm": 5.114793300628662, "learning_rate": 8.96027397260274e-07, "log_odds_chosen": -0.020998291671276093, "log_odds_ratio": -0.9036746621131897, "logits/chosen": 0.08192002028226852, "logits/rejected": 0.11945746093988419, "logps/chosen": -3.473956823348999, "logps/rejected": -3.4563562870025635, "loss": 1.3339, "nll_loss": 1.2434988021850586, "rewards/accuracies": 0.5, "rewards/chosen": -0.347395658493042, "rewards/margins": -0.001760026440024376, "rewards/rejected": -0.34563565254211426, "step": 760 }, { "epoch": 2.083504449007529, "grad_norm": 4.508927345275879, "learning_rate": 8.958904109589041e-07, "log_odds_chosen": 0.38539430499076843, "log_odds_ratio": -0.5748023986816406, "logits/chosen": 0.2045350819826126, "logits/rejected": 0.12543079257011414, "logps/chosen": -2.4554030895233154, "logps/rejected": -2.8335022926330566, "loss": 1.3396, "nll_loss": 1.2820777893066406, "rewards/accuracies": 0.625, "rewards/chosen": -0.2455402910709381, "rewards/margins": 0.03780992701649666, "rewards/rejected": -0.28335022926330566, "step": 761 }, { "epoch": 2.086242299794661, "grad_norm": 4.842044353485107, "learning_rate": 8.957534246575342e-07, "log_odds_chosen": 0.15305879712104797, "log_odds_ratio": -0.8354778289794922, "logits/chosen": 0.1520063877105713, "logits/rejected": 0.15982957184314728, "logps/chosen": -2.8716554641723633, "logps/rejected": -2.9796595573425293, "loss": 1.3782, "nll_loss": 1.294631004333496, "rewards/accuracies": 0.75, "rewards/chosen": -0.2871655523777008, "rewards/margins": 0.010800410062074661, "rewards/rejected": -0.29796597361564636, "step": 762 }, { "epoch": 2.088980150581793, "grad_norm": 4.43699836730957, "learning_rate": 8.956164383561643e-07, "log_odds_chosen": 0.33090996742248535, "log_odds_ratio": -0.5876561403274536, "logits/chosen": 0.14366218447685242, "logits/rejected": 0.17685741186141968, "logps/chosen": -2.3099894523620605, "logps/rejected": -2.60636043548584, "loss": 1.3786, "nll_loss": 1.3198790550231934, "rewards/accuracies": 0.625, "rewards/chosen": -0.2309989482164383, "rewards/margins": 0.029637068510055542, "rewards/rejected": -0.26063603162765503, "step": 763 }, { "epoch": 2.0917180013689256, "grad_norm": 4.6550493240356445, "learning_rate": 8.954794520547945e-07, "log_odds_chosen": 0.32274481654167175, "log_odds_ratio": -0.6882232427597046, "logits/chosen": -0.02637295611202717, "logits/rejected": -0.06774499267339706, "logps/chosen": -2.859055995941162, "logps/rejected": -3.1407814025878906, "loss": 1.3034, "nll_loss": 1.2345343828201294, "rewards/accuracies": 0.5, "rewards/chosen": -0.2859055995941162, "rewards/margins": 0.02817252092063427, "rewards/rejected": -0.31407812237739563, "step": 764 }, { "epoch": 2.0944558521560577, "grad_norm": 4.938938140869141, "learning_rate": 8.953424657534246e-07, "log_odds_chosen": 0.48552244901657104, "log_odds_ratio": -0.6125648021697998, "logits/chosen": -0.017213519662618637, "logits/rejected": 0.0030004680156707764, "logps/chosen": -2.5366873741149902, "logps/rejected": -3.0091352462768555, "loss": 1.2912, "nll_loss": 1.229957103729248, "rewards/accuracies": 0.5, "rewards/chosen": -0.25366872549057007, "rewards/margins": 0.04724480211734772, "rewards/rejected": -0.300913542509079, "step": 765 }, { "epoch": 2.0971937029431897, "grad_norm": 4.662557125091553, "learning_rate": 8.952054794520547e-07, "log_odds_chosen": 0.44075456261634827, "log_odds_ratio": -0.5705784559249878, "logits/chosen": 0.28531551361083984, "logits/rejected": 0.27292436361312866, "logps/chosen": -2.89277720451355, "logps/rejected": -3.2855498790740967, "loss": 1.2339, "nll_loss": 1.176797866821289, "rewards/accuracies": 0.625, "rewards/chosen": -0.28927773237228394, "rewards/margins": 0.03927726298570633, "rewards/rejected": -0.32855498790740967, "step": 766 }, { "epoch": 2.099931553730322, "grad_norm": 5.207242488861084, "learning_rate": 8.950684931506849e-07, "log_odds_chosen": 0.29960405826568604, "log_odds_ratio": -0.664844810962677, "logits/chosen": 0.19275566935539246, "logits/rejected": 0.34033453464508057, "logps/chosen": -2.9760642051696777, "logps/rejected": -3.268838405609131, "loss": 1.2064, "nll_loss": 1.139905571937561, "rewards/accuracies": 0.625, "rewards/chosen": -0.2976064085960388, "rewards/margins": 0.02927745133638382, "rewards/rejected": -0.32688388228416443, "step": 767 }, { "epoch": 2.102669404517454, "grad_norm": 5.7074785232543945, "learning_rate": 8.949315068493151e-07, "log_odds_chosen": 1.342594027519226, "log_odds_ratio": -0.6215930581092834, "logits/chosen": 0.2135944366455078, "logits/rejected": 0.23318710923194885, "logps/chosen": -2.4357666969299316, "logps/rejected": -3.6802706718444824, "loss": 1.3158, "nll_loss": 1.2535969018936157, "rewards/accuracies": 0.625, "rewards/chosen": -0.24357670545578003, "rewards/margins": 0.12445036321878433, "rewards/rejected": -0.3680270314216614, "step": 768 }, { "epoch": 2.105407255304586, "grad_norm": 4.50938606262207, "learning_rate": 8.947945205479451e-07, "log_odds_chosen": 0.6260785460472107, "log_odds_ratio": -0.5475093722343445, "logits/chosen": 0.22870199382305145, "logits/rejected": 0.16511477530002594, "logps/chosen": -2.5139594078063965, "logps/rejected": -3.083627223968506, "loss": 1.3003, "nll_loss": 1.2455946207046509, "rewards/accuracies": 0.75, "rewards/chosen": -0.25139594078063965, "rewards/margins": 0.05696678161621094, "rewards/rejected": -0.3083627223968506, "step": 769 }, { "epoch": 2.108145106091718, "grad_norm": 4.718268394470215, "learning_rate": 8.946575342465753e-07, "log_odds_chosen": 0.10158873349428177, "log_odds_ratio": -0.7052299976348877, "logits/chosen": -0.008919578045606613, "logits/rejected": -0.0083538219332695, "logps/chosen": -2.7089033126831055, "logps/rejected": -2.776913642883301, "loss": 1.278, "nll_loss": 1.2074980735778809, "rewards/accuracies": 0.625, "rewards/chosen": -0.27089035511016846, "rewards/margins": 0.006800994277000427, "rewards/rejected": -0.2776913642883301, "step": 770 }, { "epoch": 2.11088295687885, "grad_norm": 5.118640422821045, "learning_rate": 8.945205479452055e-07, "log_odds_chosen": 1.4179736375808716, "log_odds_ratio": -0.5993789434432983, "logits/chosen": 0.17570224404335022, "logits/rejected": 0.17504331469535828, "logps/chosen": -3.081997871398926, "logps/rejected": -4.410439491271973, "loss": 1.1687, "nll_loss": 1.1087932586669922, "rewards/accuracies": 0.75, "rewards/chosen": -0.30819979310035706, "rewards/margins": 0.13284413516521454, "rewards/rejected": -0.4410439729690552, "step": 771 }, { "epoch": 2.113620807665982, "grad_norm": 6.269607067108154, "learning_rate": 8.943835616438355e-07, "log_odds_chosen": -0.7152857780456543, "log_odds_ratio": -1.1790685653686523, "logits/chosen": 0.026714568957686424, "logits/rejected": 0.041978009045124054, "logps/chosen": -3.307645320892334, "logps/rejected": -2.6045756340026855, "loss": 1.3763, "nll_loss": 1.258392095565796, "rewards/accuracies": 0.25, "rewards/chosen": -0.3307645320892334, "rewards/margins": -0.07030695676803589, "rewards/rejected": -0.2604575753211975, "step": 772 }, { "epoch": 2.116358658453114, "grad_norm": 5.394425868988037, "learning_rate": 8.942465753424657e-07, "log_odds_chosen": -0.15819919109344482, "log_odds_ratio": -0.891361653804779, "logits/chosen": 0.1236012727022171, "logits/rejected": 0.15314379334449768, "logps/chosen": -2.8689393997192383, "logps/rejected": -2.728336811065674, "loss": 1.3407, "nll_loss": 1.2515449523925781, "rewards/accuracies": 0.5, "rewards/chosen": -0.28689396381378174, "rewards/margins": -0.01406027004122734, "rewards/rejected": -0.2728336751461029, "step": 773 }, { "epoch": 2.1190965092402463, "grad_norm": 5.299673080444336, "learning_rate": 8.941095890410959e-07, "log_odds_chosen": 0.09890826046466827, "log_odds_ratio": -0.7495562434196472, "logits/chosen": 0.21679097414016724, "logits/rejected": 0.243691548705101, "logps/chosen": -3.092214584350586, "logps/rejected": -3.183499336242676, "loss": 1.177, "nll_loss": 1.1020116806030273, "rewards/accuracies": 0.75, "rewards/chosen": -0.30922141671180725, "rewards/margins": 0.009128481149673462, "rewards/rejected": -0.3183498978614807, "step": 774 }, { "epoch": 2.1218343600273784, "grad_norm": 5.092208385467529, "learning_rate": 8.93972602739726e-07, "log_odds_chosen": -0.24508413672447205, "log_odds_ratio": -0.9975707530975342, "logits/chosen": 0.17875458300113678, "logits/rejected": 0.23648957908153534, "logps/chosen": -3.77182674407959, "logps/rejected": -3.51094126701355, "loss": 1.2176, "nll_loss": 1.1178895235061646, "rewards/accuracies": 0.5, "rewards/chosen": -0.3771826922893524, "rewards/margins": -0.026088541373610497, "rewards/rejected": -0.351094126701355, "step": 775 }, { "epoch": 2.1245722108145104, "grad_norm": 5.800390720367432, "learning_rate": 8.938356164383561e-07, "log_odds_chosen": -0.6856015920639038, "log_odds_ratio": -1.245908260345459, "logits/chosen": 0.04575151950120926, "logits/rejected": 0.012885775417089462, "logps/chosen": -3.6050052642822266, "logps/rejected": -2.9348087310791016, "loss": 1.4476, "nll_loss": 1.3230358362197876, "rewards/accuracies": 0.375, "rewards/chosen": -0.3605005145072937, "rewards/margins": -0.06701962649822235, "rewards/rejected": -0.29348090291023254, "step": 776 }, { "epoch": 2.1273100616016425, "grad_norm": 4.788737773895264, "learning_rate": 8.936986301369862e-07, "log_odds_chosen": 0.22363856434822083, "log_odds_ratio": -0.6862109899520874, "logits/chosen": 0.032040227204561234, "logits/rejected": 0.05266629904508591, "logps/chosen": -3.1949663162231445, "logps/rejected": -3.415492057800293, "loss": 1.2214, "nll_loss": 1.1528180837631226, "rewards/accuracies": 0.375, "rewards/chosen": -0.31949663162231445, "rewards/margins": 0.022052565589547157, "rewards/rejected": -0.34154921770095825, "step": 777 }, { "epoch": 2.130047912388775, "grad_norm": 4.512456893920898, "learning_rate": 8.935616438356164e-07, "log_odds_chosen": -0.016751155257225037, "log_odds_ratio": -0.721876859664917, "logits/chosen": 0.23714134097099304, "logits/rejected": 0.18544255197048187, "logps/chosen": -2.538547992706299, "logps/rejected": -2.5206122398376465, "loss": 1.3556, "nll_loss": 1.2834365367889404, "rewards/accuracies": 0.375, "rewards/chosen": -0.25385481119155884, "rewards/margins": -0.0017936043441295624, "rewards/rejected": -0.25206121802330017, "step": 778 }, { "epoch": 2.132785763175907, "grad_norm": 4.977276802062988, "learning_rate": 8.934246575342465e-07, "log_odds_chosen": -0.03649844229221344, "log_odds_ratio": -0.7807019352912903, "logits/chosen": 0.03659871593117714, "logits/rejected": -0.0013313144445419312, "logps/chosen": -2.934330463409424, "logps/rejected": -2.9289684295654297, "loss": 1.3472, "nll_loss": 1.269100546836853, "rewards/accuracies": 0.375, "rewards/chosen": -0.2934330403804779, "rewards/margins": -0.0005362033843994141, "rewards/rejected": -0.2928968369960785, "step": 779 }, { "epoch": 2.135523613963039, "grad_norm": 4.151784896850586, "learning_rate": 8.932876712328766e-07, "log_odds_chosen": 1.3772779703140259, "log_odds_ratio": -0.40810978412628174, "logits/chosen": 0.1571195423603058, "logits/rejected": -0.07289554178714752, "logps/chosen": -1.9543461799621582, "logps/rejected": -3.2364025115966797, "loss": 1.2597, "nll_loss": 1.2188911437988281, "rewards/accuracies": 0.875, "rewards/chosen": -0.19543461501598358, "rewards/margins": 0.12820564210414886, "rewards/rejected": -0.32364022731781006, "step": 780 }, { "epoch": 2.138261464750171, "grad_norm": 4.2961249351501465, "learning_rate": 8.931506849315068e-07, "log_odds_chosen": 0.42253342270851135, "log_odds_ratio": -0.5678352117538452, "logits/chosen": 0.10751461982727051, "logits/rejected": 0.1374383270740509, "logps/chosen": -2.4857077598571777, "logps/rejected": -2.888190746307373, "loss": 1.2522, "nll_loss": 1.195438265800476, "rewards/accuracies": 0.75, "rewards/chosen": -0.2485707849264145, "rewards/margins": 0.04024830460548401, "rewards/rejected": -0.2888190746307373, "step": 781 }, { "epoch": 2.1409993155373033, "grad_norm": 4.853588104248047, "learning_rate": 8.93013698630137e-07, "log_odds_chosen": 0.5444348454475403, "log_odds_ratio": -0.543678879737854, "logits/chosen": 0.11591611802577972, "logits/rejected": 0.03832443803548813, "logps/chosen": -2.6653833389282227, "logps/rejected": -3.1996185779571533, "loss": 1.3386, "nll_loss": 1.2841964960098267, "rewards/accuracies": 0.625, "rewards/chosen": -0.2665383219718933, "rewards/margins": 0.05342353135347366, "rewards/rejected": -0.31996187567710876, "step": 782 }, { "epoch": 2.1437371663244353, "grad_norm": 5.907915115356445, "learning_rate": 8.92876712328767e-07, "log_odds_chosen": 0.14274190366268158, "log_odds_ratio": -0.8096694946289062, "logits/chosen": 0.14067062735557556, "logits/rejected": 0.05478588119149208, "logps/chosen": -2.616501569747925, "logps/rejected": -2.6832196712493896, "loss": 1.3219, "nll_loss": 1.2409660816192627, "rewards/accuracies": 0.75, "rewards/chosen": -0.2616502046585083, "rewards/margins": 0.006671786308288574, "rewards/rejected": -0.2683219611644745, "step": 783 }, { "epoch": 2.1464750171115674, "grad_norm": 3.911010980606079, "learning_rate": 8.927397260273972e-07, "log_odds_chosen": 1.0478942394256592, "log_odds_ratio": -0.39020872116088867, "logits/chosen": 0.18286196887493134, "logits/rejected": 0.039699528366327286, "logps/chosen": -2.331778049468994, "logps/rejected": -3.2890877723693848, "loss": 1.193, "nll_loss": 1.1539347171783447, "rewards/accuracies": 0.875, "rewards/chosen": -0.23317782580852509, "rewards/margins": 0.0957309901714325, "rewards/rejected": -0.3289088010787964, "step": 784 }, { "epoch": 2.1492128678986995, "grad_norm": 5.593517780303955, "learning_rate": 8.926027397260274e-07, "log_odds_chosen": -0.36094972491264343, "log_odds_ratio": -0.9807063341140747, "logits/chosen": 0.11791570484638214, "logits/rejected": 0.2304394245147705, "logps/chosen": -2.8134775161743164, "logps/rejected": -2.4774603843688965, "loss": 1.3302, "nll_loss": 1.23212468624115, "rewards/accuracies": 0.5, "rewards/chosen": -0.28134775161743164, "rewards/margins": -0.033601704984903336, "rewards/rejected": -0.2477460503578186, "step": 785 }, { "epoch": 2.1519507186858315, "grad_norm": 4.924951553344727, "learning_rate": 8.924657534246574e-07, "log_odds_chosen": 0.19253545999526978, "log_odds_ratio": -1.0432682037353516, "logits/chosen": 0.1382690966129303, "logits/rejected": 0.17841793596744537, "logps/chosen": -3.2538301944732666, "logps/rejected": -3.424445152282715, "loss": 1.2958, "nll_loss": 1.19150972366333, "rewards/accuracies": 0.75, "rewards/chosen": -0.3253830373287201, "rewards/margins": 0.01706152968108654, "rewards/rejected": -0.3424445390701294, "step": 786 }, { "epoch": 2.1546885694729636, "grad_norm": 4.409965515136719, "learning_rate": 8.923287671232876e-07, "log_odds_chosen": 0.0445222407579422, "log_odds_ratio": -0.7051782608032227, "logits/chosen": 0.2181883454322815, "logits/rejected": 0.20472204685211182, "logps/chosen": -2.5344576835632324, "logps/rejected": -2.5840682983398438, "loss": 1.2302, "nll_loss": 1.1597098112106323, "rewards/accuracies": 0.625, "rewards/chosen": -0.2534457743167877, "rewards/margins": 0.004961068741977215, "rewards/rejected": -0.2584068179130554, "step": 787 }, { "epoch": 2.1574264202600957, "grad_norm": 5.8205790519714355, "learning_rate": 8.921917808219178e-07, "log_odds_chosen": -0.19199423491954803, "log_odds_ratio": -0.9255800247192383, "logits/chosen": 0.2108745127916336, "logits/rejected": 0.26697543263435364, "logps/chosen": -3.0604074001312256, "logps/rejected": -2.8611838817596436, "loss": 1.2406, "nll_loss": 1.1480913162231445, "rewards/accuracies": 0.375, "rewards/chosen": -0.3060407340526581, "rewards/margins": -0.019922347739338875, "rewards/rejected": -0.28611838817596436, "step": 788 }, { "epoch": 2.1601642710472277, "grad_norm": 6.0206522941589355, "learning_rate": 8.920547945205479e-07, "log_odds_chosen": 0.07833706587553024, "log_odds_ratio": -0.9142032861709595, "logits/chosen": 0.20537137985229492, "logits/rejected": 0.15019087493419647, "logps/chosen": -2.974855422973633, "logps/rejected": -3.0308589935302734, "loss": 1.357, "nll_loss": 1.2655916213989258, "rewards/accuracies": 0.625, "rewards/chosen": -0.2974855303764343, "rewards/margins": 0.0056003667414188385, "rewards/rejected": -0.30308592319488525, "step": 789 }, { "epoch": 2.1629021218343603, "grad_norm": 4.454682350158691, "learning_rate": 8.91917808219178e-07, "log_odds_chosen": 0.30270320177078247, "log_odds_ratio": -0.7778173685073853, "logits/chosen": 0.1873440444469452, "logits/rejected": 0.1379052698612213, "logps/chosen": -2.491983652114868, "logps/rejected": -2.7619330883026123, "loss": 1.3413, "nll_loss": 1.2635257244110107, "rewards/accuracies": 0.625, "rewards/chosen": -0.24919837713241577, "rewards/margins": 0.02699493058025837, "rewards/rejected": -0.2761932909488678, "step": 790 }, { "epoch": 2.1656399726214923, "grad_norm": 5.77728796005249, "learning_rate": 8.917808219178081e-07, "log_odds_chosen": -0.16545359790325165, "log_odds_ratio": -1.0058244466781616, "logits/chosen": 0.24367055296897888, "logits/rejected": 0.19061455130577087, "logps/chosen": -2.963730812072754, "logps/rejected": -2.782404899597168, "loss": 1.3017, "nll_loss": 1.2011221647262573, "rewards/accuracies": 0.625, "rewards/chosen": -0.29637306928634644, "rewards/margins": -0.018132580444216728, "rewards/rejected": -0.27824050188064575, "step": 791 }, { "epoch": 2.1683778234086244, "grad_norm": 5.524450778961182, "learning_rate": 8.916438356164383e-07, "log_odds_chosen": -0.30696648359298706, "log_odds_ratio": -1.0699728727340698, "logits/chosen": 0.2573409080505371, "logits/rejected": 0.3057798445224762, "logps/chosen": -3.7715835571289062, "logps/rejected": -3.4812350273132324, "loss": 1.2451, "nll_loss": 1.1381077766418457, "rewards/accuracies": 0.375, "rewards/chosen": -0.37715837359428406, "rewards/margins": -0.029034888371825218, "rewards/rejected": -0.3481234908103943, "step": 792 }, { "epoch": 2.1711156741957565, "grad_norm": 3.9504220485687256, "learning_rate": 8.915068493150684e-07, "log_odds_chosen": 0.9375959634780884, "log_odds_ratio": -0.462388277053833, "logits/chosen": 0.20878826081752777, "logits/rejected": 0.13839665055274963, "logps/chosen": -2.2855796813964844, "logps/rejected": -3.1716456413269043, "loss": 1.3232, "nll_loss": 1.277010202407837, "rewards/accuracies": 1.0, "rewards/chosen": -0.22855797410011292, "rewards/margins": 0.0886065885424614, "rewards/rejected": -0.3171645998954773, "step": 793 }, { "epoch": 2.1738535249828885, "grad_norm": 4.205752849578857, "learning_rate": 8.913698630136985e-07, "log_odds_chosen": 0.7273598313331604, "log_odds_ratio": -0.48262813687324524, "logits/chosen": 0.1545216590166092, "logits/rejected": 0.15729738771915436, "logps/chosen": -2.3710741996765137, "logps/rejected": -3.0292413234710693, "loss": 1.232, "nll_loss": 1.183739185333252, "rewards/accuracies": 0.875, "rewards/chosen": -0.23710741102695465, "rewards/margins": 0.0658167153596878, "rewards/rejected": -0.30292415618896484, "step": 794 }, { "epoch": 2.1765913757700206, "grad_norm": 4.687727928161621, "learning_rate": 8.912328767123287e-07, "log_odds_chosen": 0.24616564810276031, "log_odds_ratio": -0.6659517288208008, "logits/chosen": 0.303886353969574, "logits/rejected": 0.32362332940101624, "logps/chosen": -2.7447564601898193, "logps/rejected": -2.9564273357391357, "loss": 1.2964, "nll_loss": 1.2297686338424683, "rewards/accuracies": 0.75, "rewards/chosen": -0.27447566390037537, "rewards/margins": 0.021167077124118805, "rewards/rejected": -0.2956427335739136, "step": 795 }, { "epoch": 2.1793292265571527, "grad_norm": 4.612395286560059, "learning_rate": 8.910958904109589e-07, "log_odds_chosen": 0.010161079466342926, "log_odds_ratio": -0.7145543098449707, "logits/chosen": 0.08369151502847672, "logits/rejected": 0.08803396672010422, "logps/chosen": -2.386390447616577, "logps/rejected": -2.3772993087768555, "loss": 1.2968, "nll_loss": 1.2253352403640747, "rewards/accuracies": 0.5, "rewards/chosen": -0.23863905668258667, "rewards/margins": -0.0009091272950172424, "rewards/rejected": -0.23772993683815002, "step": 796 }, { "epoch": 2.1820670773442847, "grad_norm": 4.138397693634033, "learning_rate": 8.909589041095889e-07, "log_odds_chosen": 0.2548587918281555, "log_odds_ratio": -0.690574586391449, "logits/chosen": 0.38267630338668823, "logits/rejected": 0.4134277403354645, "logps/chosen": -2.4964632987976074, "logps/rejected": -2.7376365661621094, "loss": 1.2442, "nll_loss": 1.1751596927642822, "rewards/accuracies": 0.625, "rewards/chosen": -0.24964633584022522, "rewards/margins": 0.02411733753979206, "rewards/rejected": -0.27376365661621094, "step": 797 }, { "epoch": 2.184804928131417, "grad_norm": 3.78987193107605, "learning_rate": 8.908219178082191e-07, "log_odds_chosen": 0.4213353395462036, "log_odds_ratio": -0.5749386548995972, "logits/chosen": 0.26996004581451416, "logits/rejected": 0.212189719080925, "logps/chosen": -2.2377099990844727, "logps/rejected": -2.6166138648986816, "loss": 1.2701, "nll_loss": 1.212592601776123, "rewards/accuracies": 0.625, "rewards/chosen": -0.22377099096775055, "rewards/margins": 0.03789038211107254, "rewards/rejected": -0.2616613805294037, "step": 798 }, { "epoch": 2.187542778918549, "grad_norm": 4.990018367767334, "learning_rate": 8.906849315068493e-07, "log_odds_chosen": -0.3676973581314087, "log_odds_ratio": -1.0641875267028809, "logits/chosen": 0.2707580029964447, "logits/rejected": 0.28581321239471436, "logps/chosen": -3.7731354236602783, "logps/rejected": -3.4268546104431152, "loss": 1.2234, "nll_loss": 1.116940975189209, "rewards/accuracies": 0.375, "rewards/chosen": -0.3773135840892792, "rewards/margins": -0.03462807834148407, "rewards/rejected": -0.3426854908466339, "step": 799 }, { "epoch": 2.190280629705681, "grad_norm": 6.236916542053223, "learning_rate": 8.905479452054793e-07, "log_odds_chosen": -0.3512398600578308, "log_odds_ratio": -0.9719126224517822, "logits/chosen": 0.2054756134748459, "logits/rejected": 0.1714905947446823, "logps/chosen": -3.6100122928619385, "logps/rejected": -3.254624843597412, "loss": 1.263, "nll_loss": 1.1658036708831787, "rewards/accuracies": 0.5, "rewards/chosen": -0.36100125312805176, "rewards/margins": -0.03553876280784607, "rewards/rejected": -0.3254624605178833, "step": 800 }, { "epoch": 2.193018480492813, "grad_norm": 4.009089469909668, "learning_rate": 8.904109589041095e-07, "log_odds_chosen": 0.9133141040802002, "log_odds_ratio": -0.3680410087108612, "logits/chosen": 0.26433777809143066, "logits/rejected": 0.2821387052536011, "logps/chosen": -2.103114128112793, "logps/rejected": -2.9452767372131348, "loss": 1.1323, "nll_loss": 1.095457673072815, "rewards/accuracies": 1.0, "rewards/chosen": -0.2103114128112793, "rewards/margins": 0.08421623706817627, "rewards/rejected": -0.29452764987945557, "step": 801 }, { "epoch": 2.195756331279945, "grad_norm": 4.528045177459717, "learning_rate": 8.902739726027398e-07, "log_odds_chosen": 0.4632868468761444, "log_odds_ratio": -0.512218713760376, "logits/chosen": 0.289989709854126, "logits/rejected": 0.3024742603302002, "logps/chosen": -2.6955347061157227, "logps/rejected": -3.114680051803589, "loss": 1.1933, "nll_loss": 1.142075777053833, "rewards/accuracies": 0.75, "rewards/chosen": -0.2695534825325012, "rewards/margins": 0.04191455617547035, "rewards/rejected": -0.3114680349826813, "step": 802 }, { "epoch": 2.198494182067077, "grad_norm": 5.684357643127441, "learning_rate": 8.901369863013697e-07, "log_odds_chosen": -0.6545548439025879, "log_odds_ratio": -1.3369730710983276, "logits/chosen": 0.23103779554367065, "logits/rejected": 0.3074863851070404, "logps/chosen": -3.3970727920532227, "logps/rejected": -2.6836276054382324, "loss": 1.2832, "nll_loss": 1.149518609046936, "rewards/accuracies": 0.5, "rewards/chosen": -0.33970728516578674, "rewards/margins": -0.0713445246219635, "rewards/rejected": -0.26836276054382324, "step": 803 }, { "epoch": 2.2012320328542097, "grad_norm": 4.258436679840088, "learning_rate": 8.9e-07, "log_odds_chosen": 0.9690814018249512, "log_odds_ratio": -0.5631700158119202, "logits/chosen": 0.18712350726127625, "logits/rejected": 0.19572140276432037, "logps/chosen": -3.1552071571350098, "logps/rejected": -4.072460651397705, "loss": 1.2308, "nll_loss": 1.1745141744613647, "rewards/accuracies": 0.75, "rewards/chosen": -0.315520703792572, "rewards/margins": 0.09172537922859192, "rewards/rejected": -0.40724611282348633, "step": 804 }, { "epoch": 2.2039698836413417, "grad_norm": 3.9318594932556152, "learning_rate": 8.898630136986302e-07, "log_odds_chosen": 0.26920318603515625, "log_odds_ratio": -0.6400718092918396, "logits/chosen": 0.17814002931118011, "logits/rejected": 0.09742511808872223, "logps/chosen": -2.1458005905151367, "logps/rejected": -2.363921642303467, "loss": 1.2181, "nll_loss": 1.1540687084197998, "rewards/accuracies": 0.625, "rewards/chosen": -0.21458004415035248, "rewards/margins": 0.021812118589878082, "rewards/rejected": -0.23639215528964996, "step": 805 }, { "epoch": 2.206707734428474, "grad_norm": 5.648294925689697, "learning_rate": 8.897260273972603e-07, "log_odds_chosen": -0.19534413516521454, "log_odds_ratio": -1.0567574501037598, "logits/chosen": 0.2446812391281128, "logits/rejected": 0.31203967332839966, "logps/chosen": -3.8577799797058105, "logps/rejected": -3.6415064334869385, "loss": 1.2842, "nll_loss": 1.1785235404968262, "rewards/accuracies": 0.5, "rewards/chosen": -0.38577800989151, "rewards/margins": -0.021627360954880714, "rewards/rejected": -0.36415064334869385, "step": 806 }, { "epoch": 2.209445585215606, "grad_norm": 4.382638454437256, "learning_rate": 8.895890410958904e-07, "log_odds_chosen": 0.7909027338027954, "log_odds_ratio": -0.5576706528663635, "logits/chosen": 0.2937115728855133, "logits/rejected": 0.23764312267303467, "logps/chosen": -2.657820701599121, "logps/rejected": -3.397806167602539, "loss": 1.2149, "nll_loss": 1.1591613292694092, "rewards/accuracies": 0.625, "rewards/chosen": -0.26578205823898315, "rewards/margins": 0.07399856299161911, "rewards/rejected": -0.33978062868118286, "step": 807 }, { "epoch": 2.212183436002738, "grad_norm": 4.576396942138672, "learning_rate": 8.894520547945205e-07, "log_odds_chosen": 0.11233630776405334, "log_odds_ratio": -0.7864761352539062, "logits/chosen": 0.3015047013759613, "logits/rejected": 0.3792676329612732, "logps/chosen": -2.6864404678344727, "logps/rejected": -2.7795422077178955, "loss": 1.2176, "nll_loss": 1.1389769315719604, "rewards/accuracies": 0.5, "rewards/chosen": -0.2686440348625183, "rewards/margins": 0.009310189634561539, "rewards/rejected": -0.27795422077178955, "step": 808 }, { "epoch": 2.21492128678987, "grad_norm": 4.874603748321533, "learning_rate": 8.893150684931507e-07, "log_odds_chosen": -0.2227960079908371, "log_odds_ratio": -0.8781441450119019, "logits/chosen": 0.3785267472267151, "logits/rejected": 0.37975984811782837, "logps/chosen": -2.906431198120117, "logps/rejected": -2.6699576377868652, "loss": 1.203, "nll_loss": 1.1152137517929077, "rewards/accuracies": 0.625, "rewards/chosen": -0.2906431257724762, "rewards/margins": -0.02364737167954445, "rewards/rejected": -0.26699575781822205, "step": 809 }, { "epoch": 2.217659137577002, "grad_norm": 4.129993915557861, "learning_rate": 8.891780821917809e-07, "log_odds_chosen": -0.13341745734214783, "log_odds_ratio": -0.813193142414093, "logits/chosen": 0.21855691075325012, "logits/rejected": 0.17311525344848633, "logps/chosen": -2.9657816886901855, "logps/rejected": -2.840402126312256, "loss": 1.1898, "nll_loss": 1.1085259914398193, "rewards/accuracies": 0.375, "rewards/chosen": -0.29657816886901855, "rewards/margins": -0.01253795437514782, "rewards/rejected": -0.2840402126312256, "step": 810 }, { "epoch": 2.220396988364134, "grad_norm": 6.043828964233398, "learning_rate": 8.890410958904109e-07, "log_odds_chosen": 0.39872995018959045, "log_odds_ratio": -0.7151721119880676, "logits/chosen": 0.2707556188106537, "logits/rejected": 0.33684617280960083, "logps/chosen": -2.9149117469787598, "logps/rejected": -3.335930347442627, "loss": 1.0996, "nll_loss": 1.0281257629394531, "rewards/accuracies": 0.5, "rewards/chosen": -0.29149121046066284, "rewards/margins": 0.04210186004638672, "rewards/rejected": -0.3335930407047272, "step": 811 }, { "epoch": 2.223134839151266, "grad_norm": 4.753869533538818, "learning_rate": 8.889041095890411e-07, "log_odds_chosen": -0.1911095678806305, "log_odds_ratio": -0.9298152327537537, "logits/chosen": 0.1918092668056488, "logits/rejected": 0.1209590882062912, "logps/chosen": -3.0206053256988525, "logps/rejected": -2.7935776710510254, "loss": 1.3139, "nll_loss": 1.2209457159042358, "rewards/accuracies": 0.5, "rewards/chosen": -0.3020605444908142, "rewards/margins": -0.02270275354385376, "rewards/rejected": -0.27935779094696045, "step": 812 }, { "epoch": 2.2258726899383983, "grad_norm": 4.991621971130371, "learning_rate": 8.887671232876713e-07, "log_odds_chosen": -0.005410909652709961, "log_odds_ratio": -0.9392939805984497, "logits/chosen": 0.3428516089916229, "logits/rejected": 0.39428991079330444, "logps/chosen": -3.6632556915283203, "logps/rejected": -3.644801139831543, "loss": 1.2306, "nll_loss": 1.1366627216339111, "rewards/accuracies": 0.625, "rewards/chosen": -0.3663255572319031, "rewards/margins": -0.001845426857471466, "rewards/rejected": -0.3644801080226898, "step": 813 }, { "epoch": 2.2286105407255303, "grad_norm": 4.520697116851807, "learning_rate": 8.886301369863013e-07, "log_odds_chosen": -0.13523142039775848, "log_odds_ratio": -0.8746631145477295, "logits/chosen": 0.07508675754070282, "logits/rejected": 0.07040715217590332, "logps/chosen": -3.113558292388916, "logps/rejected": -2.9872326850891113, "loss": 1.3054, "nll_loss": 1.2178876399993896, "rewards/accuracies": 0.25, "rewards/chosen": -0.3113558292388916, "rewards/margins": -0.012632550671696663, "rewards/rejected": -0.2987232804298401, "step": 814 }, { "epoch": 2.2313483915126624, "grad_norm": 4.718256950378418, "learning_rate": 8.884931506849315e-07, "log_odds_chosen": 0.019419223070144653, "log_odds_ratio": -0.8003710508346558, "logits/chosen": 0.22124236822128296, "logits/rejected": 0.24793069064617157, "logps/chosen": -2.9287877082824707, "logps/rejected": -2.9189183712005615, "loss": 1.117, "nll_loss": 1.037006139755249, "rewards/accuracies": 0.375, "rewards/chosen": -0.29287877678871155, "rewards/margins": -0.0009869392961263657, "rewards/rejected": -0.29189181327819824, "step": 815 }, { "epoch": 2.2340862422997945, "grad_norm": 4.229555606842041, "learning_rate": 8.883561643835617e-07, "log_odds_chosen": 0.40206730365753174, "log_odds_ratio": -0.6257250308990479, "logits/chosen": 0.3038738965988159, "logits/rejected": 0.29864048957824707, "logps/chosen": -2.349759578704834, "logps/rejected": -2.731750249862671, "loss": 1.2249, "nll_loss": 1.1623661518096924, "rewards/accuracies": 0.75, "rewards/chosen": -0.23497596383094788, "rewards/margins": 0.0381990522146225, "rewards/rejected": -0.27317503094673157, "step": 816 }, { "epoch": 2.236824093086927, "grad_norm": 4.447842121124268, "learning_rate": 8.882191780821917e-07, "log_odds_chosen": 0.43791434168815613, "log_odds_ratio": -0.7856631278991699, "logits/chosen": 0.29771214723587036, "logits/rejected": 0.33801722526550293, "logps/chosen": -2.6619648933410645, "logps/rejected": -3.0388355255126953, "loss": 1.1759, "nll_loss": 1.097362995147705, "rewards/accuracies": 0.5, "rewards/chosen": -0.26619648933410645, "rewards/margins": 0.03768705949187279, "rewards/rejected": -0.30388355255126953, "step": 817 }, { "epoch": 2.239561943874059, "grad_norm": 5.081240177154541, "learning_rate": 8.880821917808219e-07, "log_odds_chosen": -0.28355956077575684, "log_odds_ratio": -0.9683966636657715, "logits/chosen": 0.27914249897003174, "logits/rejected": 0.3154461681842804, "logps/chosen": -3.545057535171509, "logps/rejected": -3.2632060050964355, "loss": 1.1165, "nll_loss": 1.0196444988250732, "rewards/accuracies": 0.5, "rewards/chosen": -0.3545057773590088, "rewards/margins": -0.028185158967971802, "rewards/rejected": -0.3263206481933594, "step": 818 }, { "epoch": 2.242299794661191, "grad_norm": 4.216420650482178, "learning_rate": 8.879452054794521e-07, "log_odds_chosen": 0.2424268126487732, "log_odds_ratio": -0.7313874363899231, "logits/chosen": 0.20800939202308655, "logits/rejected": 0.2383778989315033, "logps/chosen": -2.4836277961730957, "logps/rejected": -2.7178823947906494, "loss": 1.254, "nll_loss": 1.180862545967102, "rewards/accuracies": 0.625, "rewards/chosen": -0.24836277961730957, "rewards/margins": 0.023425457999110222, "rewards/rejected": -0.27178823947906494, "step": 819 }, { "epoch": 2.245037645448323, "grad_norm": 4.0482707023620605, "learning_rate": 8.878082191780822e-07, "log_odds_chosen": 0.4552541971206665, "log_odds_ratio": -0.6212545037269592, "logits/chosen": 0.2587054669857025, "logits/rejected": 0.24870939552783966, "logps/chosen": -2.512481212615967, "logps/rejected": -2.94116473197937, "loss": 1.0919, "nll_loss": 1.029822826385498, "rewards/accuracies": 0.75, "rewards/chosen": -0.2512481212615967, "rewards/margins": 0.042868372052907944, "rewards/rejected": -0.2941164970397949, "step": 820 }, { "epoch": 2.2477754962354553, "grad_norm": 3.9564952850341797, "learning_rate": 8.876712328767123e-07, "log_odds_chosen": 0.24074503779411316, "log_odds_ratio": -0.6536583304405212, "logits/chosen": 0.21135172247886658, "logits/rejected": 0.1961507499217987, "logps/chosen": -2.3464195728302, "logps/rejected": -2.5252349376678467, "loss": 1.2709, "nll_loss": 1.2055602073669434, "rewards/accuracies": 0.625, "rewards/chosen": -0.23464198410511017, "rewards/margins": 0.017881521955132484, "rewards/rejected": -0.2525234818458557, "step": 821 }, { "epoch": 2.2505133470225873, "grad_norm": 4.181813716888428, "learning_rate": 8.875342465753424e-07, "log_odds_chosen": 0.23842394351959229, "log_odds_ratio": -0.7005211710929871, "logits/chosen": 0.28567132353782654, "logits/rejected": 0.30921822786331177, "logps/chosen": -2.715711832046509, "logps/rejected": -2.9378981590270996, "loss": 1.2005, "nll_loss": 1.1304428577423096, "rewards/accuracies": 0.625, "rewards/chosen": -0.27157118916511536, "rewards/margins": 0.02221866510808468, "rewards/rejected": -0.2937898337841034, "step": 822 }, { "epoch": 2.2532511978097194, "grad_norm": 4.416650295257568, "learning_rate": 8.873972602739726e-07, "log_odds_chosen": 0.030228257179260254, "log_odds_ratio": -0.7678040266036987, "logits/chosen": 0.17505782842636108, "logits/rejected": 0.0774485319852829, "logps/chosen": -2.4597067832946777, "logps/rejected": -2.4728245735168457, "loss": 1.3824, "nll_loss": 1.3056336641311646, "rewards/accuracies": 0.5, "rewards/chosen": -0.24597066640853882, "rewards/margins": 0.0013117771595716476, "rewards/rejected": -0.24728243052959442, "step": 823 }, { "epoch": 2.2559890485968515, "grad_norm": 3.766728639602661, "learning_rate": 8.872602739726027e-07, "log_odds_chosen": 0.838970422744751, "log_odds_ratio": -0.7520425319671631, "logits/chosen": 0.17780202627182007, "logits/rejected": 0.1704092174768448, "logps/chosen": -2.2356958389282227, "logps/rejected": -3.111241102218628, "loss": 1.1653, "nll_loss": 1.0901083946228027, "rewards/accuracies": 0.5, "rewards/chosen": -0.2235696017742157, "rewards/margins": 0.08755452185869217, "rewards/rejected": -0.31112414598464966, "step": 824 }, { "epoch": 2.2587268993839835, "grad_norm": 3.798835515975952, "learning_rate": 8.871232876712328e-07, "log_odds_chosen": 0.41794031858444214, "log_odds_ratio": -0.5695558190345764, "logits/chosen": 0.2512071132659912, "logits/rejected": 0.2277752161026001, "logps/chosen": -2.2655856609344482, "logps/rejected": -2.644033670425415, "loss": 1.1606, "nll_loss": 1.1036185026168823, "rewards/accuracies": 0.625, "rewards/chosen": -0.22655856609344482, "rewards/margins": 0.037844784557819366, "rewards/rejected": -0.2644033432006836, "step": 825 }, { "epoch": 2.2614647501711156, "grad_norm": 4.271836280822754, "learning_rate": 8.86986301369863e-07, "log_odds_chosen": 0.4578820466995239, "log_odds_ratio": -0.6424186825752258, "logits/chosen": 0.1383102536201477, "logits/rejected": 0.1283649057149887, "logps/chosen": -2.907660484313965, "logps/rejected": -3.3344454765319824, "loss": 1.14, "nll_loss": 1.075740933418274, "rewards/accuracies": 0.625, "rewards/chosen": -0.29076606035232544, "rewards/margins": 0.04267851635813713, "rewards/rejected": -0.33344459533691406, "step": 826 }, { "epoch": 2.2642026009582477, "grad_norm": 3.741272211074829, "learning_rate": 8.868493150684932e-07, "log_odds_chosen": 0.5716442465782166, "log_odds_ratio": -0.564804196357727, "logits/chosen": 0.2021690309047699, "logits/rejected": 0.13552391529083252, "logps/chosen": -2.3452422618865967, "logps/rejected": -2.847899913787842, "loss": 1.1925, "nll_loss": 1.1359845399856567, "rewards/accuracies": 0.75, "rewards/chosen": -0.2345242202281952, "rewards/margins": 0.050265781581401825, "rewards/rejected": -0.2847900092601776, "step": 827 }, { "epoch": 2.2669404517453797, "grad_norm": 4.948389530181885, "learning_rate": 8.867123287671232e-07, "log_odds_chosen": 0.31968480348587036, "log_odds_ratio": -0.789307713508606, "logits/chosen": 0.40050947666168213, "logits/rejected": 0.3613700866699219, "logps/chosen": -2.828704595565796, "logps/rejected": -3.05818247795105, "loss": 1.3137, "nll_loss": 1.234772801399231, "rewards/accuracies": 0.625, "rewards/chosen": -0.28287047147750854, "rewards/margins": 0.022947803139686584, "rewards/rejected": -0.3058182895183563, "step": 828 }, { "epoch": 2.269678302532512, "grad_norm": 4.334080696105957, "learning_rate": 8.865753424657534e-07, "log_odds_chosen": 0.11413444578647614, "log_odds_ratio": -0.719417154788971, "logits/chosen": 0.18170848488807678, "logits/rejected": 0.12944212555885315, "logps/chosen": -3.158416509628296, "logps/rejected": -3.2407705783843994, "loss": 1.2333, "nll_loss": 1.1613919734954834, "rewards/accuracies": 0.5, "rewards/chosen": -0.3158416152000427, "rewards/margins": 0.008235396817326546, "rewards/rejected": -0.3240770399570465, "step": 829 }, { "epoch": 2.272416153319644, "grad_norm": 4.244831085205078, "learning_rate": 8.864383561643836e-07, "log_odds_chosen": -0.40381914377212524, "log_odds_ratio": -0.9795816540718079, "logits/chosen": 0.21059799194335938, "logits/rejected": 0.30482983589172363, "logps/chosen": -2.5521223545074463, "logps/rejected": -2.1927828788757324, "loss": 1.2253, "nll_loss": 1.127376914024353, "rewards/accuracies": 0.125, "rewards/chosen": -0.2552122473716736, "rewards/margins": -0.03593393787741661, "rewards/rejected": -0.21927830576896667, "step": 830 }, { "epoch": 2.2751540041067764, "grad_norm": 4.853871822357178, "learning_rate": 8.863013698630136e-07, "log_odds_chosen": 0.020286381244659424, "log_odds_ratio": -0.7628781199455261, "logits/chosen": 0.23911339044570923, "logits/rejected": 0.2811461389064789, "logps/chosen": -2.9671788215637207, "logps/rejected": -2.994666337966919, "loss": 1.1355, "nll_loss": 1.0592528581619263, "rewards/accuracies": 0.5, "rewards/chosen": -0.29671788215637207, "rewards/margins": 0.0027487482875585556, "rewards/rejected": -0.299466609954834, "step": 831 }, { "epoch": 2.2778918548939084, "grad_norm": 3.9489195346832275, "learning_rate": 8.861643835616438e-07, "log_odds_chosen": 1.1019811630249023, "log_odds_ratio": -0.645405113697052, "logits/chosen": 0.1612415313720703, "logits/rejected": 0.17050214111804962, "logps/chosen": -2.322230100631714, "logps/rejected": -3.3351874351501465, "loss": 1.1438, "nll_loss": 1.0792889595031738, "rewards/accuracies": 0.625, "rewards/chosen": -0.2322230339050293, "rewards/margins": 0.10129570960998535, "rewards/rejected": -0.33351874351501465, "step": 832 }, { "epoch": 2.2806297056810405, "grad_norm": 4.1392974853515625, "learning_rate": 8.86027397260274e-07, "log_odds_chosen": -0.12111705541610718, "log_odds_ratio": -0.891708493232727, "logits/chosen": 0.19910545647144318, "logits/rejected": 0.19125516712665558, "logps/chosen": -2.25384259223938, "logps/rejected": -2.158836841583252, "loss": 1.1481, "nll_loss": 1.0589264631271362, "rewards/accuracies": 0.5, "rewards/chosen": -0.22538425028324127, "rewards/margins": -0.00950055941939354, "rewards/rejected": -0.21588370203971863, "step": 833 }, { "epoch": 2.2833675564681726, "grad_norm": 4.292203426361084, "learning_rate": 8.858904109589041e-07, "log_odds_chosen": 0.15538975596427917, "log_odds_ratio": -0.6739310622215271, "logits/chosen": 0.2595665156841278, "logits/rejected": 0.28163719177246094, "logps/chosen": -2.974184274673462, "logps/rejected": -3.07338285446167, "loss": 1.2512, "nll_loss": 1.183814287185669, "rewards/accuracies": 0.625, "rewards/chosen": -0.2974184453487396, "rewards/margins": 0.009919862262904644, "rewards/rejected": -0.30733829736709595, "step": 834 }, { "epoch": 2.2861054072553046, "grad_norm": 5.9683990478515625, "learning_rate": 8.857534246575342e-07, "log_odds_chosen": -0.41094955801963806, "log_odds_ratio": -1.210140585899353, "logits/chosen": 0.32831627130508423, "logits/rejected": 0.3966616988182068, "logps/chosen": -3.6287286281585693, "logps/rejected": -3.220411539077759, "loss": 1.2428, "nll_loss": 1.121774435043335, "rewards/accuracies": 0.375, "rewards/chosen": -0.362872838973999, "rewards/margins": -0.04083171859383583, "rewards/rejected": -0.3220411539077759, "step": 835 }, { "epoch": 2.2888432580424367, "grad_norm": 3.895313024520874, "learning_rate": 8.856164383561644e-07, "log_odds_chosen": 2.0168330669403076, "log_odds_ratio": -0.45837903022766113, "logits/chosen": 0.36847853660583496, "logits/rejected": 0.3349771499633789, "logps/chosen": -2.568063497543335, "logps/rejected": -4.538311958312988, "loss": 1.2561, "nll_loss": 1.2102396488189697, "rewards/accuracies": 0.625, "rewards/chosen": -0.2568063735961914, "rewards/margins": 0.19702483713626862, "rewards/rejected": -0.45383119583129883, "step": 836 }, { "epoch": 2.291581108829569, "grad_norm": 4.050755500793457, "learning_rate": 8.854794520547945e-07, "log_odds_chosen": 0.377902090549469, "log_odds_ratio": -0.6525757312774658, "logits/chosen": 0.2614974081516266, "logits/rejected": 0.21062885224819183, "logps/chosen": -2.436537742614746, "logps/rejected": -2.7471468448638916, "loss": 1.2747, "nll_loss": 1.2094448804855347, "rewards/accuracies": 0.75, "rewards/chosen": -0.2436537742614746, "rewards/margins": 0.03106091357767582, "rewards/rejected": -0.27471470832824707, "step": 837 }, { "epoch": 2.294318959616701, "grad_norm": 4.99714994430542, "learning_rate": 8.853424657534246e-07, "log_odds_chosen": -0.4383818805217743, "log_odds_ratio": -1.118710994720459, "logits/chosen": 0.23766009509563446, "logits/rejected": 0.2652480900287628, "logps/chosen": -3.4056267738342285, "logps/rejected": -2.9735546112060547, "loss": 1.1911, "nll_loss": 1.0792067050933838, "rewards/accuracies": 0.375, "rewards/chosen": -0.3405626714229584, "rewards/margins": -0.043207235634326935, "rewards/rejected": -0.29735544323921204, "step": 838 }, { "epoch": 2.297056810403833, "grad_norm": 4.22019624710083, "learning_rate": 8.852054794520547e-07, "log_odds_chosen": -0.13427701592445374, "log_odds_ratio": -0.7873132228851318, "logits/chosen": 0.20983214676380157, "logits/rejected": 0.25337111949920654, "logps/chosen": -3.049161195755005, "logps/rejected": -2.925199508666992, "loss": 1.2355, "nll_loss": 1.1568158864974976, "rewards/accuracies": 0.25, "rewards/chosen": -0.3049161434173584, "rewards/margins": -0.012396188452839851, "rewards/rejected": -0.2925199270248413, "step": 839 }, { "epoch": 2.299794661190965, "grad_norm": 4.657027721405029, "learning_rate": 8.850684931506849e-07, "log_odds_chosen": -0.17556536197662354, "log_odds_ratio": -0.8598760366439819, "logits/chosen": 0.338517963886261, "logits/rejected": 0.3472851514816284, "logps/chosen": -3.27640438079834, "logps/rejected": -3.0972399711608887, "loss": 1.3037, "nll_loss": 1.2176966667175293, "rewards/accuracies": 0.625, "rewards/chosen": -0.32764044404029846, "rewards/margins": -0.01791643165051937, "rewards/rejected": -0.30972403287887573, "step": 840 }, { "epoch": 2.302532511978097, "grad_norm": 3.6850180625915527, "learning_rate": 8.849315068493151e-07, "log_odds_chosen": 0.5171332359313965, "log_odds_ratio": -0.6367411017417908, "logits/chosen": 0.18576888740062714, "logits/rejected": 0.09976528584957123, "logps/chosen": -2.4447202682495117, "logps/rejected": -2.896334171295166, "loss": 1.2102, "nll_loss": 1.1465295553207397, "rewards/accuracies": 0.875, "rewards/chosen": -0.24447202682495117, "rewards/margins": 0.04516139626502991, "rewards/rejected": -0.2896334230899811, "step": 841 }, { "epoch": 2.305270362765229, "grad_norm": 4.15344762802124, "learning_rate": 8.847945205479451e-07, "log_odds_chosen": 0.4658608138561249, "log_odds_ratio": -0.5440917015075684, "logits/chosen": 0.32213708758354187, "logits/rejected": 0.34741660952568054, "logps/chosen": -2.4472665786743164, "logps/rejected": -2.857211112976074, "loss": 1.0602, "nll_loss": 1.0058037042617798, "rewards/accuracies": 0.75, "rewards/chosen": -0.24472662806510925, "rewards/margins": 0.04099445790052414, "rewards/rejected": -0.285721093416214, "step": 842 }, { "epoch": 2.3080082135523616, "grad_norm": 4.522626876831055, "learning_rate": 8.846575342465753e-07, "log_odds_chosen": -0.0903787612915039, "log_odds_ratio": -1.0232127904891968, "logits/chosen": 0.38478872179985046, "logits/rejected": 0.3195003271102905, "logps/chosen": -2.80446195602417, "logps/rejected": -2.7008509635925293, "loss": 1.2067, "nll_loss": 1.1043643951416016, "rewards/accuracies": 0.375, "rewards/chosen": -0.28044623136520386, "rewards/margins": -0.01036110706627369, "rewards/rejected": -0.2700851261615753, "step": 843 }, { "epoch": 2.3107460643394937, "grad_norm": 4.810157299041748, "learning_rate": 8.845205479452055e-07, "log_odds_chosen": -0.4375784993171692, "log_odds_ratio": -1.0568616390228271, "logits/chosen": 0.2672443389892578, "logits/rejected": 0.31697481870651245, "logps/chosen": -3.693817615509033, "logps/rejected": -3.2373878955841064, "loss": 1.1157, "nll_loss": 1.009971022605896, "rewards/accuracies": 0.25, "rewards/chosen": -0.36938172578811646, "rewards/margins": -0.04564295709133148, "rewards/rejected": -0.32373881340026855, "step": 844 }, { "epoch": 2.3134839151266258, "grad_norm": 4.367068290710449, "learning_rate": 8.843835616438355e-07, "log_odds_chosen": 0.0644705593585968, "log_odds_ratio": -0.885228157043457, "logits/chosen": 0.3330419361591339, "logits/rejected": 0.378709614276886, "logps/chosen": -3.270693063735962, "logps/rejected": -3.318817377090454, "loss": 1.1185, "nll_loss": 1.029943585395813, "rewards/accuracies": 0.375, "rewards/chosen": -0.3270692825317383, "rewards/margins": 0.00481245294213295, "rewards/rejected": -0.3318817615509033, "step": 845 }, { "epoch": 2.316221765913758, "grad_norm": 4.119396686553955, "learning_rate": 8.842465753424657e-07, "log_odds_chosen": 0.2901872992515564, "log_odds_ratio": -0.5999538898468018, "logits/chosen": 0.3181821405887604, "logits/rejected": 0.26362353563308716, "logps/chosen": -2.4554271697998047, "logps/rejected": -2.722111701965332, "loss": 1.3234, "nll_loss": 1.2634003162384033, "rewards/accuracies": 0.375, "rewards/chosen": -0.2455427348613739, "rewards/margins": 0.026668451726436615, "rewards/rejected": -0.2722111940383911, "step": 846 }, { "epoch": 2.31895961670089, "grad_norm": 4.4039306640625, "learning_rate": 8.841095890410959e-07, "log_odds_chosen": 0.676754355430603, "log_odds_ratio": -0.7103768587112427, "logits/chosen": 0.16063643991947174, "logits/rejected": 0.2224469929933548, "logps/chosen": -2.7106494903564453, "logps/rejected": -3.3451318740844727, "loss": 1.1361, "nll_loss": 1.065026879310608, "rewards/accuracies": 0.375, "rewards/chosen": -0.27106496691703796, "rewards/margins": 0.0634482130408287, "rewards/rejected": -0.33451318740844727, "step": 847 }, { "epoch": 2.321697467488022, "grad_norm": 4.338608264923096, "learning_rate": 8.83972602739726e-07, "log_odds_chosen": 0.026153698563575745, "log_odds_ratio": -0.7826018333435059, "logits/chosen": 0.38745006918907166, "logits/rejected": 0.3663393259048462, "logps/chosen": -2.874915838241577, "logps/rejected": -2.881577491760254, "loss": 1.2731, "nll_loss": 1.1948442459106445, "rewards/accuracies": 0.5, "rewards/chosen": -0.2874915599822998, "rewards/margins": 0.0006661638617515564, "rewards/rejected": -0.28815773129463196, "step": 848 }, { "epoch": 2.324435318275154, "grad_norm": 3.5942883491516113, "learning_rate": 8.838356164383561e-07, "log_odds_chosen": 0.6906033754348755, "log_odds_ratio": -0.4477773606777191, "logits/chosen": 0.33938297629356384, "logits/rejected": 0.2283974140882492, "logps/chosen": -1.7273848056793213, "logps/rejected": -2.305757999420166, "loss": 1.2441, "nll_loss": 1.199329137802124, "rewards/accuracies": 0.75, "rewards/chosen": -0.1727384626865387, "rewards/margins": 0.0578373447060585, "rewards/rejected": -0.2305757999420166, "step": 849 }, { "epoch": 2.327173169062286, "grad_norm": 4.0916523933410645, "learning_rate": 8.836986301369863e-07, "log_odds_chosen": 0.6439347267150879, "log_odds_ratio": -0.5588213801383972, "logits/chosen": 0.20867820084095, "logits/rejected": 0.19218948483467102, "logps/chosen": -2.703446388244629, "logps/rejected": -3.3196966648101807, "loss": 1.1467, "nll_loss": 1.0907680988311768, "rewards/accuracies": 0.75, "rewards/chosen": -0.270344614982605, "rewards/margins": 0.06162504106760025, "rewards/rejected": -0.331969678401947, "step": 850 }, { "epoch": 2.329911019849418, "grad_norm": 4.00573205947876, "learning_rate": 8.835616438356164e-07, "log_odds_chosen": 0.40768104791641235, "log_odds_ratio": -0.5389717221260071, "logits/chosen": 0.2891215682029724, "logits/rejected": 0.25816285610198975, "logps/chosen": -2.5201454162597656, "logps/rejected": -2.8895962238311768, "loss": 1.1354, "nll_loss": 1.0814682245254517, "rewards/accuracies": 0.75, "rewards/chosen": -0.25201451778411865, "rewards/margins": 0.03694510459899902, "rewards/rejected": -0.2889596223831177, "step": 851 }, { "epoch": 2.3326488706365502, "grad_norm": 4.622708320617676, "learning_rate": 8.834246575342465e-07, "log_odds_chosen": 0.3787213861942291, "log_odds_ratio": -0.9498030543327332, "logits/chosen": 0.41318970918655396, "logits/rejected": 0.40274903178215027, "logps/chosen": -3.825165271759033, "logps/rejected": -4.1870903968811035, "loss": 1.1408, "nll_loss": 1.0458059310913086, "rewards/accuracies": 0.75, "rewards/chosen": -0.3825165331363678, "rewards/margins": 0.036192506551742554, "rewards/rejected": -0.41870903968811035, "step": 852 }, { "epoch": 2.3353867214236823, "grad_norm": 4.243980407714844, "learning_rate": 8.832876712328766e-07, "log_odds_chosen": 0.4070814251899719, "log_odds_ratio": -0.6544533967971802, "logits/chosen": 0.29667970538139343, "logits/rejected": 0.3817858099937439, "logps/chosen": -2.9862239360809326, "logps/rejected": -3.389719247817993, "loss": 1.0546, "nll_loss": 0.9891664981842041, "rewards/accuracies": 0.625, "rewards/chosen": -0.29862239956855774, "rewards/margins": 0.04034953564405441, "rewards/rejected": -0.33897191286087036, "step": 853 }, { "epoch": 2.3381245722108144, "grad_norm": 4.671756267547607, "learning_rate": 8.831506849315068e-07, "log_odds_chosen": 0.2165871113538742, "log_odds_ratio": -0.7329490184783936, "logits/chosen": 0.329012393951416, "logits/rejected": 0.3169752061367035, "logps/chosen": -3.149639129638672, "logps/rejected": -3.348390579223633, "loss": 1.0637, "nll_loss": 0.9904398322105408, "rewards/accuracies": 0.5, "rewards/chosen": -0.3149639070034027, "rewards/margins": 0.01987515389919281, "rewards/rejected": -0.3348390460014343, "step": 854 }, { "epoch": 2.3408624229979464, "grad_norm": 4.1202545166015625, "learning_rate": 8.83013698630137e-07, "log_odds_chosen": 0.32041752338409424, "log_odds_ratio": -0.7052744626998901, "logits/chosen": 0.5078393816947937, "logits/rejected": 0.5018351674079895, "logps/chosen": -3.005303382873535, "logps/rejected": -3.29598069190979, "loss": 1.0615, "nll_loss": 0.9910147190093994, "rewards/accuracies": 0.75, "rewards/chosen": -0.3005303144454956, "rewards/margins": 0.0290677547454834, "rewards/rejected": -0.3295980989933014, "step": 855 }, { "epoch": 2.3436002737850785, "grad_norm": 3.99776554107666, "learning_rate": 8.82876712328767e-07, "log_odds_chosen": 0.557375967502594, "log_odds_ratio": -0.5205920338630676, "logits/chosen": 0.20308613777160645, "logits/rejected": 0.1340932697057724, "logps/chosen": -2.1393232345581055, "logps/rejected": -2.6262834072113037, "loss": 1.2528, "nll_loss": 1.200693130493164, "rewards/accuracies": 0.75, "rewards/chosen": -0.2139323204755783, "rewards/margins": 0.04869602248072624, "rewards/rejected": -0.26262834668159485, "step": 856 }, { "epoch": 2.3463381245722106, "grad_norm": 3.4021899700164795, "learning_rate": 8.827397260273972e-07, "log_odds_chosen": 0.49344953894615173, "log_odds_ratio": -0.520706295967102, "logits/chosen": 0.2606581151485443, "logits/rejected": 0.247109055519104, "logps/chosen": -1.9976985454559326, "logps/rejected": -2.4428043365478516, "loss": 1.2104, "nll_loss": 1.1583302021026611, "rewards/accuracies": 0.875, "rewards/chosen": -0.19976983964443207, "rewards/margins": 0.04451059550046921, "rewards/rejected": -0.24428045749664307, "step": 857 }, { "epoch": 2.349075975359343, "grad_norm": 3.6812198162078857, "learning_rate": 8.826027397260274e-07, "log_odds_chosen": 0.07608180493116379, "log_odds_ratio": -0.7736831903457642, "logits/chosen": 0.38344013690948486, "logits/rejected": 0.33896347880363464, "logps/chosen": -2.588531494140625, "logps/rejected": -2.61486554145813, "loss": 1.206, "nll_loss": 1.1286017894744873, "rewards/accuracies": 0.625, "rewards/chosen": -0.25885313749313354, "rewards/margins": 0.0026334114372730255, "rewards/rejected": -0.26148658990859985, "step": 858 }, { "epoch": 2.351813826146475, "grad_norm": 5.0627264976501465, "learning_rate": 8.824657534246574e-07, "log_odds_chosen": 0.35733017325401306, "log_odds_ratio": -0.6206645965576172, "logits/chosen": 0.41834741830825806, "logits/rejected": 0.47604963183403015, "logps/chosen": -3.1576857566833496, "logps/rejected": -3.4824042320251465, "loss": 0.9775, "nll_loss": 0.9154727458953857, "rewards/accuracies": 0.625, "rewards/chosen": -0.3157685697078705, "rewards/margins": 0.03247182071208954, "rewards/rejected": -0.3482404053211212, "step": 859 }, { "epoch": 2.3545516769336072, "grad_norm": 4.01427698135376, "learning_rate": 8.823287671232876e-07, "log_odds_chosen": 0.04164442420005798, "log_odds_ratio": -0.7360037565231323, "logits/chosen": 0.36436226963996887, "logits/rejected": 0.35835227370262146, "logps/chosen": -2.8283629417419434, "logps/rejected": -2.8665080070495605, "loss": 1.2319, "nll_loss": 1.1582906246185303, "rewards/accuracies": 0.5, "rewards/chosen": -0.28283628821372986, "rewards/margins": 0.003814505413174629, "rewards/rejected": -0.28665077686309814, "step": 860 }, { "epoch": 2.3572895277207393, "grad_norm": 4.014832019805908, "learning_rate": 8.821917808219178e-07, "log_odds_chosen": 0.6124835014343262, "log_odds_ratio": -0.49616456031799316, "logits/chosen": 0.26722896099090576, "logits/rejected": 0.3114715814590454, "logps/chosen": -2.2916269302368164, "logps/rejected": -2.8538920879364014, "loss": 1.0221, "nll_loss": 0.9724429845809937, "rewards/accuracies": 0.75, "rewards/chosen": -0.22916270792484283, "rewards/margins": 0.05622650310397148, "rewards/rejected": -0.2853892147541046, "step": 861 }, { "epoch": 2.3600273785078714, "grad_norm": 4.221312999725342, "learning_rate": 8.820547945205479e-07, "log_odds_chosen": 0.37264204025268555, "log_odds_ratio": -0.7393958568572998, "logits/chosen": 0.30941975116729736, "logits/rejected": 0.2984125018119812, "logps/chosen": -2.7213141918182373, "logps/rejected": -3.049481153488159, "loss": 1.2314, "nll_loss": 1.1574159860610962, "rewards/accuracies": 0.625, "rewards/chosen": -0.27213144302368164, "rewards/margins": 0.032816700637340546, "rewards/rejected": -0.3049481213092804, "step": 862 }, { "epoch": 2.3627652292950034, "grad_norm": 5.18445348739624, "learning_rate": 8.81917808219178e-07, "log_odds_chosen": 0.56104576587677, "log_odds_ratio": -0.7379016280174255, "logits/chosen": 0.28258615732192993, "logits/rejected": 0.34832724928855896, "logps/chosen": -3.51951003074646, "logps/rejected": -4.050446510314941, "loss": 1.0922, "nll_loss": 1.018376111984253, "rewards/accuracies": 0.375, "rewards/chosen": -0.351951003074646, "rewards/margins": 0.05309368669986725, "rewards/rejected": -0.40504470467567444, "step": 863 }, { "epoch": 2.3655030800821355, "grad_norm": 3.8825693130493164, "learning_rate": 8.817808219178082e-07, "log_odds_chosen": 0.1337340623140335, "log_odds_ratio": -0.7056595087051392, "logits/chosen": 0.2273675799369812, "logits/rejected": 0.264245867729187, "logps/chosen": -3.5200729370117188, "logps/rejected": -3.631091356277466, "loss": 1.1271, "nll_loss": 1.056571125984192, "rewards/accuracies": 0.5, "rewards/chosen": -0.35200732946395874, "rewards/margins": 0.011101845651865005, "rewards/rejected": -0.36310917139053345, "step": 864 }, { "epoch": 2.3682409308692676, "grad_norm": 4.534926891326904, "learning_rate": 8.816438356164383e-07, "log_odds_chosen": -0.3211335241794586, "log_odds_ratio": -0.966694712638855, "logits/chosen": 0.209919273853302, "logits/rejected": 0.30177855491638184, "logps/chosen": -3.079280138015747, "logps/rejected": -2.7670412063598633, "loss": 1.1116, "nll_loss": 1.0149235725402832, "rewards/accuracies": 0.375, "rewards/chosen": -0.30792802572250366, "rewards/margins": -0.03122389130294323, "rewards/rejected": -0.2767041325569153, "step": 865 }, { "epoch": 2.3709787816563996, "grad_norm": 3.942984104156494, "learning_rate": 8.815068493150684e-07, "log_odds_chosen": 0.3347827196121216, "log_odds_ratio": -0.7134612202644348, "logits/chosen": 0.18679970502853394, "logits/rejected": 0.16892096400260925, "logps/chosen": -2.785262107849121, "logps/rejected": -3.093461513519287, "loss": 1.1449, "nll_loss": 1.0735869407653809, "rewards/accuracies": 0.625, "rewards/chosen": -0.2785262167453766, "rewards/margins": 0.030819935724139214, "rewards/rejected": -0.30934613943099976, "step": 866 }, { "epoch": 2.3737166324435317, "grad_norm": 4.476312160491943, "learning_rate": 8.813698630136986e-07, "log_odds_chosen": 0.2641916871070862, "log_odds_ratio": -0.6801977157592773, "logits/chosen": 0.32832610607147217, "logits/rejected": 0.2832249402999878, "logps/chosen": -3.3660852909088135, "logps/rejected": -3.5868980884552, "loss": 1.145, "nll_loss": 1.0769939422607422, "rewards/accuracies": 0.625, "rewards/chosen": -0.33660855889320374, "rewards/margins": 0.022081270813941956, "rewards/rejected": -0.3586898446083069, "step": 867 }, { "epoch": 2.3764544832306638, "grad_norm": 5.035402297973633, "learning_rate": 8.812328767123287e-07, "log_odds_chosen": -0.3235182762145996, "log_odds_ratio": -1.0256117582321167, "logits/chosen": 0.32656407356262207, "logits/rejected": 0.42201995849609375, "logps/chosen": -3.49269437789917, "logps/rejected": -3.165583372116089, "loss": 1.0835, "nll_loss": 0.9808977246284485, "rewards/accuracies": 0.5, "rewards/chosen": -0.34926941990852356, "rewards/margins": -0.03271108865737915, "rewards/rejected": -0.3165583610534668, "step": 868 }, { "epoch": 2.379192334017796, "grad_norm": 4.525251865386963, "learning_rate": 8.810958904109589e-07, "log_odds_chosen": 0.3607591688632965, "log_odds_ratio": -0.5926408767700195, "logits/chosen": 0.3597237169742584, "logits/rejected": 0.4653986394405365, "logps/chosen": -2.805363893508911, "logps/rejected": -3.143956184387207, "loss": 0.976, "nll_loss": 0.9167004227638245, "rewards/accuracies": 0.625, "rewards/chosen": -0.280536413192749, "rewards/margins": 0.033859193325042725, "rewards/rejected": -0.31439560651779175, "step": 869 }, { "epoch": 2.3819301848049284, "grad_norm": 4.1716227531433105, "learning_rate": 8.809589041095889e-07, "log_odds_chosen": 0.37920641899108887, "log_odds_ratio": -0.7761470675468445, "logits/chosen": 0.3291575610637665, "logits/rejected": 0.24214713275432587, "logps/chosen": -3.306577205657959, "logps/rejected": -3.645813465118408, "loss": 1.0671, "nll_loss": 0.9895344376564026, "rewards/accuracies": 0.5, "rewards/chosen": -0.3306577205657959, "rewards/margins": 0.03392362594604492, "rewards/rejected": -0.3645813465118408, "step": 870 }, { "epoch": 2.3846680355920604, "grad_norm": 4.844790458679199, "learning_rate": 8.808219178082191e-07, "log_odds_chosen": 0.2533387243747711, "log_odds_ratio": -0.8218502402305603, "logits/chosen": 0.24379053711891174, "logits/rejected": 0.07225751876831055, "logps/chosen": -2.7975382804870605, "logps/rejected": -2.971221923828125, "loss": 1.214, "nll_loss": 1.131794810295105, "rewards/accuracies": 0.625, "rewards/chosen": -0.2797538638114929, "rewards/margins": 0.017368346452713013, "rewards/rejected": -0.29712218046188354, "step": 871 }, { "epoch": 2.3874058863791925, "grad_norm": 3.806370496749878, "learning_rate": 8.806849315068493e-07, "log_odds_chosen": 0.9370751976966858, "log_odds_ratio": -0.4054386019706726, "logits/chosen": 0.22702644765377045, "logits/rejected": 0.15037894248962402, "logps/chosen": -2.078981876373291, "logps/rejected": -2.9519598484039307, "loss": 1.1761, "nll_loss": 1.135574460029602, "rewards/accuracies": 0.75, "rewards/chosen": -0.20789819955825806, "rewards/margins": 0.08729778230190277, "rewards/rejected": -0.295195996761322, "step": 872 }, { "epoch": 2.3901437371663246, "grad_norm": 3.665649175643921, "learning_rate": 8.805479452054793e-07, "log_odds_chosen": -0.20698274672031403, "log_odds_ratio": -0.8768577575683594, "logits/chosen": 0.32313454151153564, "logits/rejected": 0.3310317397117615, "logps/chosen": -2.355107069015503, "logps/rejected": -2.1708319187164307, "loss": 1.1801, "nll_loss": 1.092434048652649, "rewards/accuracies": 0.5, "rewards/chosen": -0.2355107218027115, "rewards/margins": -0.018427502363920212, "rewards/rejected": -0.2170831859111786, "step": 873 }, { "epoch": 2.3928815879534566, "grad_norm": 5.024196147918701, "learning_rate": 8.804109589041095e-07, "log_odds_chosen": -0.5182594656944275, "log_odds_ratio": -1.1768114566802979, "logits/chosen": 0.48396730422973633, "logits/rejected": 0.555535078048706, "logps/chosen": -3.227865695953369, "logps/rejected": -2.660250663757324, "loss": 1.2362, "nll_loss": 1.1185683012008667, "rewards/accuracies": 0.625, "rewards/chosen": -0.3227865397930145, "rewards/margins": -0.0567614883184433, "rewards/rejected": -0.2660250663757324, "step": 874 }, { "epoch": 2.3956194387405887, "grad_norm": 5.177992343902588, "learning_rate": 8.802739726027397e-07, "log_odds_chosen": -0.5046101808547974, "log_odds_ratio": -1.0930798053741455, "logits/chosen": 0.2448456734418869, "logits/rejected": 0.23337894678115845, "logps/chosen": -3.0933942794799805, "logps/rejected": -2.587678909301758, "loss": 1.4013, "nll_loss": 1.2920054197311401, "rewards/accuracies": 0.5, "rewards/chosen": -0.30933940410614014, "rewards/margins": -0.0505714938044548, "rewards/rejected": -0.25876790285110474, "step": 875 }, { "epoch": 2.3983572895277208, "grad_norm": 3.749516725540161, "learning_rate": 8.801369863013698e-07, "log_odds_chosen": 0.4667005240917206, "log_odds_ratio": -0.5701796412467957, "logits/chosen": 0.40415048599243164, "logits/rejected": 0.33242732286453247, "logps/chosen": -2.425330400466919, "logps/rejected": -2.856865882873535, "loss": 1.1909, "nll_loss": 1.133906602859497, "rewards/accuracies": 0.75, "rewards/chosen": -0.24253304302692413, "rewards/margins": 0.043153584003448486, "rewards/rejected": -0.2856866121292114, "step": 876 }, { "epoch": 2.401095140314853, "grad_norm": 4.5149664878845215, "learning_rate": 8.799999999999999e-07, "log_odds_chosen": 0.2046229988336563, "log_odds_ratio": -0.641913652420044, "logits/chosen": 0.36532846093177795, "logits/rejected": 0.36438173055648804, "logps/chosen": -2.374809741973877, "logps/rejected": -2.5501275062561035, "loss": 1.207, "nll_loss": 1.1428260803222656, "rewards/accuracies": 0.625, "rewards/chosen": -0.2374810129404068, "rewards/margins": 0.01753174141049385, "rewards/rejected": -0.25501275062561035, "step": 877 }, { "epoch": 2.403832991101985, "grad_norm": 4.137566566467285, "learning_rate": 8.798630136986301e-07, "log_odds_chosen": -0.2693946063518524, "log_odds_ratio": -0.9514328837394714, "logits/chosen": 0.09122303128242493, "logits/rejected": 0.04799286648631096, "logps/chosen": -3.579646348953247, "logps/rejected": -3.3035073280334473, "loss": 1.2858, "nll_loss": 1.1906569004058838, "rewards/accuracies": 0.5, "rewards/chosen": -0.3579646646976471, "rewards/margins": -0.027613919228315353, "rewards/rejected": -0.33035075664520264, "step": 878 }, { "epoch": 2.406570841889117, "grad_norm": 5.127172946929932, "learning_rate": 8.797260273972602e-07, "log_odds_chosen": -0.26432815194129944, "log_odds_ratio": -0.945257306098938, "logits/chosen": 0.2904512882232666, "logits/rejected": 0.3287566006183624, "logps/chosen": -3.0501842498779297, "logps/rejected": -2.786421775817871, "loss": 1.1286, "nll_loss": 1.034073829650879, "rewards/accuracies": 0.5, "rewards/chosen": -0.30501842498779297, "rewards/margins": -0.02637624926865101, "rewards/rejected": -0.2786421775817871, "step": 879 }, { "epoch": 2.409308692676249, "grad_norm": 4.106842517852783, "learning_rate": 8.795890410958903e-07, "log_odds_chosen": 0.2853701710700989, "log_odds_ratio": -0.5831829309463501, "logits/chosen": 0.2975057363510132, "logits/rejected": 0.35997945070266724, "logps/chosen": -3.4546329975128174, "logps/rejected": -3.7251830101013184, "loss": 1.1382, "nll_loss": 1.0799174308776855, "rewards/accuracies": 0.75, "rewards/chosen": -0.3454633057117462, "rewards/margins": 0.02705501951277256, "rewards/rejected": -0.37251830101013184, "step": 880 }, { "epoch": 2.412046543463381, "grad_norm": 3.387890338897705, "learning_rate": 8.794520547945205e-07, "log_odds_chosen": 0.9372319579124451, "log_odds_ratio": -0.3955986499786377, "logits/chosen": 0.23065248131752014, "logits/rejected": 0.14099402725696564, "logps/chosen": -2.162283420562744, "logps/rejected": -3.0233354568481445, "loss": 1.1693, "nll_loss": 1.129712700843811, "rewards/accuracies": 1.0, "rewards/chosen": -0.21622833609580994, "rewards/margins": 0.08610521256923676, "rewards/rejected": -0.3023335635662079, "step": 881 }, { "epoch": 2.414784394250513, "grad_norm": 4.426196098327637, "learning_rate": 8.793150684931506e-07, "log_odds_chosen": 0.1158103197813034, "log_odds_ratio": -0.7514698505401611, "logits/chosen": 0.6133549213409424, "logits/rejected": 0.6234482526779175, "logps/chosen": -2.527411460876465, "logps/rejected": -2.5819668769836426, "loss": 1.2153, "nll_loss": 1.1401550769805908, "rewards/accuracies": 0.375, "rewards/chosen": -0.25274115800857544, "rewards/margins": 0.005455542355775833, "rewards/rejected": -0.2581966817378998, "step": 882 }, { "epoch": 2.4175222450376452, "grad_norm": 3.851839303970337, "learning_rate": 8.791780821917809e-07, "log_odds_chosen": 0.7024786472320557, "log_odds_ratio": -0.49198654294013977, "logits/chosen": 0.37649962306022644, "logits/rejected": 0.40337398648262024, "logps/chosen": -2.6902661323547363, "logps/rejected": -3.366769552230835, "loss": 1.1051, "nll_loss": 1.0559052228927612, "rewards/accuracies": 0.625, "rewards/chosen": -0.26902663707733154, "rewards/margins": 0.06765033304691315, "rewards/rejected": -0.3366769552230835, "step": 883 }, { "epoch": 2.4202600958247773, "grad_norm": 4.199366092681885, "learning_rate": 8.790410958904108e-07, "log_odds_chosen": 0.058483608067035675, "log_odds_ratio": -0.7245901823043823, "logits/chosen": 0.44048771262168884, "logits/rejected": 0.5174397230148315, "logps/chosen": -3.2741780281066895, "logps/rejected": -3.3053364753723145, "loss": 1.0218, "nll_loss": 0.9493733048439026, "rewards/accuracies": 0.625, "rewards/chosen": -0.3274178206920624, "rewards/margins": 0.0031158458441495895, "rewards/rejected": -0.3305336534976959, "step": 884 }, { "epoch": 2.42299794661191, "grad_norm": 3.717189311981201, "learning_rate": 8.78904109589041e-07, "log_odds_chosen": -0.1415756195783615, "log_odds_ratio": -0.8415112495422363, "logits/chosen": 0.14931046962738037, "logits/rejected": 0.14729677140712738, "logps/chosen": -2.7705607414245605, "logps/rejected": -2.611438035964966, "loss": 1.191, "nll_loss": 1.1068259477615356, "rewards/accuracies": 0.25, "rewards/chosen": -0.2770560383796692, "rewards/margins": -0.015912257134914398, "rewards/rejected": -0.2611438035964966, "step": 885 }, { "epoch": 2.425735797399042, "grad_norm": 4.139999866485596, "learning_rate": 8.787671232876713e-07, "log_odds_chosen": 0.8971349596977234, "log_odds_ratio": -0.5163242220878601, "logits/chosen": 0.4320654571056366, "logits/rejected": 0.4881656765937805, "logps/chosen": -2.4734432697296143, "logps/rejected": -3.3087544441223145, "loss": 1.0871, "nll_loss": 1.0354504585266113, "rewards/accuracies": 0.75, "rewards/chosen": -0.24734432995319366, "rewards/margins": 0.08353111147880554, "rewards/rejected": -0.3308754563331604, "step": 886 }, { "epoch": 2.428473648186174, "grad_norm": 3.721459150314331, "learning_rate": 8.786301369863012e-07, "log_odds_chosen": 0.731206476688385, "log_odds_ratio": -0.589991569519043, "logits/chosen": 0.25259286165237427, "logits/rejected": 0.35964876413345337, "logps/chosen": -2.3570058345794678, "logps/rejected": -3.055077075958252, "loss": 1.0465, "nll_loss": 0.9874899387359619, "rewards/accuracies": 0.875, "rewards/chosen": -0.2357005774974823, "rewards/margins": 0.06980712711811066, "rewards/rejected": -0.30550771951675415, "step": 887 }, { "epoch": 2.431211498973306, "grad_norm": 3.738811492919922, "learning_rate": 8.784931506849315e-07, "log_odds_chosen": 1.2540262937545776, "log_odds_ratio": -0.36828747391700745, "logits/chosen": 0.4036809206008911, "logits/rejected": 0.41540294885635376, "logps/chosen": -2.2946724891662598, "logps/rejected": -3.4296839237213135, "loss": 1.1232, "nll_loss": 1.0863542556762695, "rewards/accuracies": 0.875, "rewards/chosen": -0.2294672727584839, "rewards/margins": 0.11350112408399582, "rewards/rejected": -0.3429684042930603, "step": 888 }, { "epoch": 2.433949349760438, "grad_norm": 4.053799152374268, "learning_rate": 8.783561643835617e-07, "log_odds_chosen": 0.16643917560577393, "log_odds_ratio": -0.7448707818984985, "logits/chosen": 0.14129292964935303, "logits/rejected": 0.1571558266878128, "logps/chosen": -2.271559238433838, "logps/rejected": -2.449450969696045, "loss": 1.1316, "nll_loss": 1.0570738315582275, "rewards/accuracies": 0.25, "rewards/chosen": -0.2271559238433838, "rewards/margins": 0.01778917945921421, "rewards/rejected": -0.24494510889053345, "step": 889 }, { "epoch": 2.43668720054757, "grad_norm": 3.9500632286071777, "learning_rate": 8.782191780821917e-07, "log_odds_chosen": 0.09476472437381744, "log_odds_ratio": -0.7577037215232849, "logits/chosen": 0.26031753420829773, "logits/rejected": 0.2775767743587494, "logps/chosen": -2.734624147415161, "logps/rejected": -2.8052515983581543, "loss": 1.123, "nll_loss": 1.0472586154937744, "rewards/accuracies": 0.625, "rewards/chosen": -0.2734624147415161, "rewards/margins": 0.00706273689866066, "rewards/rejected": -0.2805251479148865, "step": 890 }, { "epoch": 2.439425051334702, "grad_norm": 4.400454521179199, "learning_rate": 8.780821917808219e-07, "log_odds_chosen": 0.5650774240493774, "log_odds_ratio": -0.6980885863304138, "logits/chosen": 0.485653281211853, "logits/rejected": 0.4223332703113556, "logps/chosen": -2.8743896484375, "logps/rejected": -3.3891758918762207, "loss": 1.2392, "nll_loss": 1.1693679094314575, "rewards/accuracies": 0.625, "rewards/chosen": -0.2874389588832855, "rewards/margins": 0.05147865042090416, "rewards/rejected": -0.33891761302948, "step": 891 }, { "epoch": 2.4421629021218343, "grad_norm": 3.52514386177063, "learning_rate": 8.779452054794521e-07, "log_odds_chosen": 1.3726422786712646, "log_odds_ratio": -0.4838668406009674, "logits/chosen": 0.33383801579475403, "logits/rejected": 0.37781625986099243, "logps/chosen": -2.379582405090332, "logps/rejected": -3.6908531188964844, "loss": 1.0385, "nll_loss": 0.9900933504104614, "rewards/accuracies": 0.75, "rewards/chosen": -0.23795825242996216, "rewards/margins": 0.13112705945968628, "rewards/rejected": -0.36908531188964844, "step": 892 }, { "epoch": 2.4449007529089664, "grad_norm": 3.6955909729003906, "learning_rate": 8.778082191780822e-07, "log_odds_chosen": 0.37321144342422485, "log_odds_ratio": -0.5721533894538879, "logits/chosen": 0.1553991734981537, "logits/rejected": 0.11758971959352493, "logps/chosen": -2.6770851612091064, "logps/rejected": -3.0236403942108154, "loss": 1.2058, "nll_loss": 1.1485836505889893, "rewards/accuracies": 0.75, "rewards/chosen": -0.26770853996276855, "rewards/margins": 0.03465551137924194, "rewards/rejected": -0.3023640513420105, "step": 893 }, { "epoch": 2.4476386036960984, "grad_norm": 4.558709621429443, "learning_rate": 8.776712328767123e-07, "log_odds_chosen": 0.3408234119415283, "log_odds_ratio": -0.8237005472183228, "logits/chosen": 0.37904661893844604, "logits/rejected": 0.350790798664093, "logps/chosen": -3.253699541091919, "logps/rejected": -3.5551135540008545, "loss": 1.1197, "nll_loss": 1.037315011024475, "rewards/accuracies": 0.5, "rewards/chosen": -0.3253699541091919, "rewards/margins": 0.03014140948653221, "rewards/rejected": -0.355511337518692, "step": 894 }, { "epoch": 2.4503764544832305, "grad_norm": 3.565260410308838, "learning_rate": 8.775342465753425e-07, "log_odds_chosen": 1.1935137510299683, "log_odds_ratio": -0.4474736154079437, "logits/chosen": 0.45350199937820435, "logits/rejected": 0.39185765385627747, "logps/chosen": -1.8192555904388428, "logps/rejected": -2.8964014053344727, "loss": 1.1529, "nll_loss": 1.1081676483154297, "rewards/accuracies": 0.875, "rewards/chosen": -0.18192556500434875, "rewards/margins": 0.10771457850933075, "rewards/rejected": -0.2896401286125183, "step": 895 }, { "epoch": 2.453114305270363, "grad_norm": 3.90655517578125, "learning_rate": 8.773972602739726e-07, "log_odds_chosen": 0.3784773647785187, "log_odds_ratio": -0.6126785278320312, "logits/chosen": 0.41408923268318176, "logits/rejected": 0.44684648513793945, "logps/chosen": -2.8614516258239746, "logps/rejected": -3.1793150901794434, "loss": 1.0112, "nll_loss": 0.9499492645263672, "rewards/accuracies": 0.75, "rewards/chosen": -0.2861451506614685, "rewards/margins": 0.03178633749485016, "rewards/rejected": -0.31793153285980225, "step": 896 }, { "epoch": 2.455852156057495, "grad_norm": 3.886380195617676, "learning_rate": 8.772602739726027e-07, "log_odds_chosen": -0.49337905645370483, "log_odds_ratio": -1.0920134782791138, "logits/chosen": 0.2747105360031128, "logits/rejected": 0.26481324434280396, "logps/chosen": -3.6012349128723145, "logps/rejected": -3.1159772872924805, "loss": 1.1762, "nll_loss": 1.0669831037521362, "rewards/accuracies": 0.5, "rewards/chosen": -0.36012351512908936, "rewards/margins": -0.04852576553821564, "rewards/rejected": -0.3115977346897125, "step": 897 }, { "epoch": 2.458590006844627, "grad_norm": 3.786419630050659, "learning_rate": 8.771232876712329e-07, "log_odds_chosen": 0.46699589490890503, "log_odds_ratio": -0.5515797734260559, "logits/chosen": 0.4210437536239624, "logits/rejected": 0.49568071961402893, "logps/chosen": -2.525913953781128, "logps/rejected": -2.944054365158081, "loss": 0.9578, "nll_loss": 0.9026799201965332, "rewards/accuracies": 0.75, "rewards/chosen": -0.2525913715362549, "rewards/margins": 0.041814059019088745, "rewards/rejected": -0.294405460357666, "step": 898 }, { "epoch": 2.461327857631759, "grad_norm": 4.7903151512146, "learning_rate": 8.76986301369863e-07, "log_odds_chosen": -0.194955974817276, "log_odds_ratio": -0.8593860864639282, "logits/chosen": 0.19144803285598755, "logits/rejected": 0.16627636551856995, "logps/chosen": -2.6186861991882324, "logps/rejected": -2.432060956954956, "loss": 1.2132, "nll_loss": 1.1272823810577393, "rewards/accuracies": 0.5, "rewards/chosen": -0.2618686258792877, "rewards/margins": -0.01866251416504383, "rewards/rejected": -0.24320611357688904, "step": 899 }, { "epoch": 2.4640657084188913, "grad_norm": 4.023550987243652, "learning_rate": 8.768493150684932e-07, "log_odds_chosen": 0.4981613755226135, "log_odds_ratio": -0.5281745791435242, "logits/chosen": 0.2633056342601776, "logits/rejected": 0.2674955427646637, "logps/chosen": -2.769486427307129, "logps/rejected": -3.2306249141693115, "loss": 1.1549, "nll_loss": 1.1020547151565552, "rewards/accuracies": 0.75, "rewards/chosen": -0.2769486606121063, "rewards/margins": 0.04611384868621826, "rewards/rejected": -0.3230625092983246, "step": 900 }, { "epoch": 2.4668035592060233, "grad_norm": 3.7667958736419678, "learning_rate": 8.767123287671232e-07, "log_odds_chosen": 0.9695900082588196, "log_odds_ratio": -0.5200580358505249, "logits/chosen": 0.25869178771972656, "logits/rejected": 0.16943767666816711, "logps/chosen": -2.194153070449829, "logps/rejected": -3.0520548820495605, "loss": 1.0884, "nll_loss": 1.0364199876785278, "rewards/accuracies": 0.875, "rewards/chosen": -0.2194153070449829, "rewards/margins": 0.08579020202159882, "rewards/rejected": -0.3052055239677429, "step": 901 }, { "epoch": 2.4695414099931554, "grad_norm": 3.875136137008667, "learning_rate": 8.765753424657534e-07, "log_odds_chosen": 0.5241028070449829, "log_odds_ratio": -0.5878549814224243, "logits/chosen": 0.37409037351608276, "logits/rejected": 0.37806788086891174, "logps/chosen": -2.4639530181884766, "logps/rejected": -2.9445652961730957, "loss": 1.1869, "nll_loss": 1.1280814409255981, "rewards/accuracies": 0.75, "rewards/chosen": -0.2463952898979187, "rewards/margins": 0.04806126281619072, "rewards/rejected": -0.2944565713405609, "step": 902 }, { "epoch": 2.4722792607802875, "grad_norm": 4.259725570678711, "learning_rate": 8.764383561643836e-07, "log_odds_chosen": -0.22822996973991394, "log_odds_ratio": -1.0318892002105713, "logits/chosen": 0.35001474618911743, "logits/rejected": 0.3834169805049896, "logps/chosen": -3.27950119972229, "logps/rejected": -3.084498882293701, "loss": 1.0539, "nll_loss": 0.9506862759590149, "rewards/accuracies": 0.5, "rewards/chosen": -0.327950119972229, "rewards/margins": -0.01950022019445896, "rewards/rejected": -0.308449923992157, "step": 903 }, { "epoch": 2.4750171115674195, "grad_norm": 4.540726184844971, "learning_rate": 8.763013698630136e-07, "log_odds_chosen": -0.6156405806541443, "log_odds_ratio": -1.2716076374053955, "logits/chosen": 0.18773964047431946, "logits/rejected": 0.3193349838256836, "logps/chosen": -3.854684352874756, "logps/rejected": -3.2727575302124023, "loss": 1.1325, "nll_loss": 1.0053597688674927, "rewards/accuracies": 0.5, "rewards/chosen": -0.38546842336654663, "rewards/margins": -0.058192670345306396, "rewards/rejected": -0.327275812625885, "step": 904 }, { "epoch": 2.4777549623545516, "grad_norm": 3.804590940475464, "learning_rate": 8.761643835616438e-07, "log_odds_chosen": 0.6492381691932678, "log_odds_ratio": -0.5178972482681274, "logits/chosen": 0.5231284499168396, "logits/rejected": 0.5422770977020264, "logps/chosen": -2.3030447959899902, "logps/rejected": -2.8770699501037598, "loss": 1.1128, "nll_loss": 1.0609796047210693, "rewards/accuracies": 0.625, "rewards/chosen": -0.2303045094013214, "rewards/margins": 0.05740249156951904, "rewards/rejected": -0.28770697116851807, "step": 905 }, { "epoch": 2.4804928131416837, "grad_norm": 3.850466012954712, "learning_rate": 8.76027397260274e-07, "log_odds_chosen": 0.5023159384727478, "log_odds_ratio": -0.5420414805412292, "logits/chosen": 0.2159680426120758, "logits/rejected": 0.14213934540748596, "logps/chosen": -1.958101749420166, "logps/rejected": -2.427647352218628, "loss": 1.1997, "nll_loss": 1.145472526550293, "rewards/accuracies": 0.5, "rewards/chosen": -0.19581016898155212, "rewards/margins": 0.04695454612374306, "rewards/rejected": -0.24276471138000488, "step": 906 }, { "epoch": 2.4832306639288158, "grad_norm": 3.894545316696167, "learning_rate": 8.758904109589041e-07, "log_odds_chosen": 1.0979807376861572, "log_odds_ratio": -0.3555375933647156, "logits/chosen": 0.3290530741214752, "logits/rejected": 0.3484322428703308, "logps/chosen": -2.2794880867004395, "logps/rejected": -3.2947566509246826, "loss": 1.0469, "nll_loss": 1.0113147497177124, "rewards/accuracies": 1.0, "rewards/chosen": -0.22794879972934723, "rewards/margins": 0.10152687877416611, "rewards/rejected": -0.3294757008552551, "step": 907 }, { "epoch": 2.485968514715948, "grad_norm": 4.7469987869262695, "learning_rate": 8.757534246575342e-07, "log_odds_chosen": 0.36371803283691406, "log_odds_ratio": -0.6543880701065063, "logits/chosen": 0.40829092264175415, "logits/rejected": 0.4302661418914795, "logps/chosen": -2.976466655731201, "logps/rejected": -3.256739616394043, "loss": 0.9745, "nll_loss": 0.9090616703033447, "rewards/accuracies": 0.75, "rewards/chosen": -0.2976466715335846, "rewards/margins": 0.02802729792892933, "rewards/rejected": -0.3256739675998688, "step": 908 }, { "epoch": 2.48870636550308, "grad_norm": 3.8195619583129883, "learning_rate": 8.756164383561644e-07, "log_odds_chosen": 0.5024462938308716, "log_odds_ratio": -0.5205686092376709, "logits/chosen": 0.247841939330101, "logits/rejected": 0.24624378979206085, "logps/chosen": -2.547534942626953, "logps/rejected": -3.001030921936035, "loss": 1.0647, "nll_loss": 1.0126503705978394, "rewards/accuracies": 0.875, "rewards/chosen": -0.2547535002231598, "rewards/margins": 0.045349590480327606, "rewards/rejected": -0.3001030683517456, "step": 909 }, { "epoch": 2.491444216290212, "grad_norm": 3.7791588306427, "learning_rate": 8.754794520547945e-07, "log_odds_chosen": 1.210889220237732, "log_odds_ratio": -0.42041629552841187, "logits/chosen": 0.28330525755882263, "logits/rejected": 0.1094992607831955, "logps/chosen": -2.1590094566345215, "logps/rejected": -3.2444474697113037, "loss": 1.1997, "nll_loss": 1.157642126083374, "rewards/accuracies": 0.875, "rewards/chosen": -0.21590092778205872, "rewards/margins": 0.10854381322860718, "rewards/rejected": -0.3244447410106659, "step": 910 }, { "epoch": 2.4941820670773445, "grad_norm": 3.6001787185668945, "learning_rate": 8.753424657534246e-07, "log_odds_chosen": 1.2228163480758667, "log_odds_ratio": -0.393801748752594, "logits/chosen": 0.17462433874607086, "logits/rejected": 0.13991189002990723, "logps/chosen": -2.551919460296631, "logps/rejected": -3.6806342601776123, "loss": 1.1214, "nll_loss": 1.0820114612579346, "rewards/accuracies": 0.875, "rewards/chosen": -0.25519195199012756, "rewards/margins": 0.11287149786949158, "rewards/rejected": -0.36806344985961914, "step": 911 }, { "epoch": 2.4969199178644765, "grad_norm": 3.609205484390259, "learning_rate": 8.752054794520548e-07, "log_odds_chosen": 1.2636823654174805, "log_odds_ratio": -0.47099700570106506, "logits/chosen": 0.43032175302505493, "logits/rejected": 0.3497822880744934, "logps/chosen": -2.0826616287231445, "logps/rejected": -3.2535152435302734, "loss": 1.2142, "nll_loss": 1.167056918144226, "rewards/accuracies": 0.875, "rewards/chosen": -0.20826616883277893, "rewards/margins": 0.11708536744117737, "rewards/rejected": -0.3253515362739563, "step": 912 }, { "epoch": 2.4996577686516086, "grad_norm": 4.047729015350342, "learning_rate": 8.750684931506849e-07, "log_odds_chosen": 0.09750109910964966, "log_odds_ratio": -0.68327796459198, "logits/chosen": 0.41550499200820923, "logits/rejected": 0.31984150409698486, "logps/chosen": -3.3633620738983154, "logps/rejected": -3.461750030517578, "loss": 1.0989, "nll_loss": 1.0306085348129272, "rewards/accuracies": 0.625, "rewards/chosen": -0.336336225271225, "rewards/margins": 0.009838800877332687, "rewards/rejected": -0.34617501497268677, "step": 913 }, { "epoch": 2.5023956194387407, "grad_norm": 4.321293830871582, "learning_rate": 8.749315068493151e-07, "log_odds_chosen": 0.28215327858924866, "log_odds_ratio": -0.6446617841720581, "logits/chosen": 0.35844510793685913, "logits/rejected": 0.29387128353118896, "logps/chosen": -2.9532766342163086, "logps/rejected": -3.1974337100982666, "loss": 1.1205, "nll_loss": 1.0560717582702637, "rewards/accuracies": 0.625, "rewards/chosen": -0.29532766342163086, "rewards/margins": 0.024415723979473114, "rewards/rejected": -0.31974339485168457, "step": 914 }, { "epoch": 2.5051334702258727, "grad_norm": 4.121154308319092, "learning_rate": 8.747945205479451e-07, "log_odds_chosen": 0.47529786825180054, "log_odds_ratio": -0.5828242301940918, "logits/chosen": 0.4772248864173889, "logits/rejected": 0.4718673527240753, "logps/chosen": -2.8201262950897217, "logps/rejected": -3.21810245513916, "loss": 1.0461, "nll_loss": 0.987812340259552, "rewards/accuracies": 0.625, "rewards/chosen": -0.2820126414299011, "rewards/margins": 0.03979760408401489, "rewards/rejected": -0.321810245513916, "step": 915 }, { "epoch": 2.507871321013005, "grad_norm": 3.7055015563964844, "learning_rate": 8.746575342465753e-07, "log_odds_chosen": 0.35241398215293884, "log_odds_ratio": -0.5483764410018921, "logits/chosen": 0.34885722398757935, "logits/rejected": 0.26970839500427246, "logps/chosen": -2.035677433013916, "logps/rejected": -2.3367130756378174, "loss": 1.2411, "nll_loss": 1.186220407485962, "rewards/accuracies": 0.75, "rewards/chosen": -0.2035677134990692, "rewards/margins": 0.030103590339422226, "rewards/rejected": -0.23367132246494293, "step": 916 }, { "epoch": 2.510609171800137, "grad_norm": 3.431450605392456, "learning_rate": 8.745205479452055e-07, "log_odds_chosen": 0.5086055994033813, "log_odds_ratio": -0.4926159381866455, "logits/chosen": 0.46638914942741394, "logits/rejected": 0.40606728196144104, "logps/chosen": -2.086212158203125, "logps/rejected": -2.52724552154541, "loss": 1.1879, "nll_loss": 1.1386574506759644, "rewards/accuracies": 0.75, "rewards/chosen": -0.20862123370170593, "rewards/margins": 0.044103316962718964, "rewards/rejected": -0.2527245581150055, "step": 917 }, { "epoch": 2.513347022587269, "grad_norm": 4.448832035064697, "learning_rate": 8.743835616438355e-07, "log_odds_chosen": 0.02383383736014366, "log_odds_ratio": -0.8429958820343018, "logits/chosen": 0.480202853679657, "logits/rejected": 0.5051396489143372, "logps/chosen": -3.3647751808166504, "logps/rejected": -3.4052798748016357, "loss": 1.2672, "nll_loss": 1.1828981637954712, "rewards/accuracies": 0.375, "rewards/chosen": -0.33647751808166504, "rewards/margins": 0.0040504708886146545, "rewards/rejected": -0.3405280113220215, "step": 918 }, { "epoch": 2.516084873374401, "grad_norm": 4.089791297912598, "learning_rate": 8.742465753424657e-07, "log_odds_chosen": -0.08107157051563263, "log_odds_ratio": -0.8371883034706116, "logits/chosen": 0.41507816314697266, "logits/rejected": 0.357200026512146, "logps/chosen": -3.120643138885498, "logps/rejected": -3.0361857414245605, "loss": 1.1136, "nll_loss": 1.0298852920532227, "rewards/accuracies": 0.625, "rewards/chosen": -0.3120642900466919, "rewards/margins": -0.008445696905255318, "rewards/rejected": -0.3036186099052429, "step": 919 }, { "epoch": 2.518822724161533, "grad_norm": 3.78804612159729, "learning_rate": 8.741095890410959e-07, "log_odds_chosen": 1.0213868618011475, "log_odds_ratio": -0.3509508967399597, "logits/chosen": 0.2628326117992401, "logits/rejected": 0.3066411316394806, "logps/chosen": -1.9384870529174805, "logps/rejected": -2.7985048294067383, "loss": 1.1201, "nll_loss": 1.0849725008010864, "rewards/accuracies": 1.0, "rewards/chosen": -0.19384869933128357, "rewards/margins": 0.08600179851055145, "rewards/rejected": -0.27985048294067383, "step": 920 }, { "epoch": 2.521560574948665, "grad_norm": 3.8717830181121826, "learning_rate": 8.73972602739726e-07, "log_odds_chosen": 0.2571493983268738, "log_odds_ratio": -0.632619321346283, "logits/chosen": 0.29633814096450806, "logits/rejected": 0.2965143620967865, "logps/chosen": -3.344987392425537, "logps/rejected": -3.5887694358825684, "loss": 1.1335, "nll_loss": 1.070268154144287, "rewards/accuracies": 0.625, "rewards/chosen": -0.3344987630844116, "rewards/margins": 0.024378184229135513, "rewards/rejected": -0.3588769733905792, "step": 921 }, { "epoch": 2.5242984257357977, "grad_norm": 3.7340164184570312, "learning_rate": 8.738356164383561e-07, "log_odds_chosen": 0.21828319132328033, "log_odds_ratio": -0.8170323371887207, "logits/chosen": 0.33769094944000244, "logits/rejected": 0.31070223450660706, "logps/chosen": -2.1469693183898926, "logps/rejected": -2.296175003051758, "loss": 1.2, "nll_loss": 1.1183319091796875, "rewards/accuracies": 0.625, "rewards/chosen": -0.21469691395759583, "rewards/margins": 0.014920578338205814, "rewards/rejected": -0.22961750626564026, "step": 922 }, { "epoch": 2.5270362765229297, "grad_norm": 3.9663562774658203, "learning_rate": 8.736986301369863e-07, "log_odds_chosen": 0.4752591550350189, "log_odds_ratio": -0.6446377038955688, "logits/chosen": 0.24632218480110168, "logits/rejected": 0.3117445111274719, "logps/chosen": -2.9312644004821777, "logps/rejected": -3.3704895973205566, "loss": 1.142, "nll_loss": 1.0774909257888794, "rewards/accuracies": 0.75, "rewards/chosen": -0.2931264340877533, "rewards/margins": 0.043922536075115204, "rewards/rejected": -0.3370489478111267, "step": 923 }, { "epoch": 2.529774127310062, "grad_norm": 3.738842248916626, "learning_rate": 8.735616438356164e-07, "log_odds_chosen": 0.41297370195388794, "log_odds_ratio": -0.6230188608169556, "logits/chosen": 0.36724919080734253, "logits/rejected": 0.41762545704841614, "logps/chosen": -2.756991386413574, "logps/rejected": -3.108048915863037, "loss": 1.2419, "nll_loss": 1.1795964241027832, "rewards/accuracies": 0.75, "rewards/chosen": -0.2756991386413574, "rewards/margins": 0.03510577231645584, "rewards/rejected": -0.31080490350723267, "step": 924 }, { "epoch": 2.532511978097194, "grad_norm": 3.7948973178863525, "learning_rate": 8.734246575342465e-07, "log_odds_chosen": 0.9630244374275208, "log_odds_ratio": -0.5512706637382507, "logits/chosen": 0.4025777578353882, "logits/rejected": 0.4235612750053406, "logps/chosen": -2.3306074142456055, "logps/rejected": -3.13962459564209, "loss": 1.1781, "nll_loss": 1.1229920387268066, "rewards/accuracies": 0.75, "rewards/chosen": -0.23306073248386383, "rewards/margins": 0.08090171962976456, "rewards/rejected": -0.313962459564209, "step": 925 }, { "epoch": 2.535249828884326, "grad_norm": 5.051830768585205, "learning_rate": 8.732876712328767e-07, "log_odds_chosen": -0.07900747656822205, "log_odds_ratio": -0.9812551736831665, "logits/chosen": 0.40267413854599, "logits/rejected": 0.3758985996246338, "logps/chosen": -3.3866820335388184, "logps/rejected": -3.2679295539855957, "loss": 1.09, "nll_loss": 0.9918323755264282, "rewards/accuracies": 0.375, "rewards/chosen": -0.33866819739341736, "rewards/margins": -0.011875258758664131, "rewards/rejected": -0.32679295539855957, "step": 926 }, { "epoch": 2.537987679671458, "grad_norm": 6.349517345428467, "learning_rate": 8.731506849315068e-07, "log_odds_chosen": -0.2669910490512848, "log_odds_ratio": -0.9029740691184998, "logits/chosen": 0.20199084281921387, "logits/rejected": 0.24803489446640015, "logps/chosen": -3.0678248405456543, "logps/rejected": -2.8005778789520264, "loss": 1.1085, "nll_loss": 1.018189787864685, "rewards/accuracies": 0.375, "rewards/chosen": -0.30678248405456543, "rewards/margins": -0.026724696159362793, "rewards/rejected": -0.28005778789520264, "step": 927 }, { "epoch": 2.54072553045859, "grad_norm": 3.6724934577941895, "learning_rate": 8.73013698630137e-07, "log_odds_chosen": 0.31108248233795166, "log_odds_ratio": -0.667360782623291, "logits/chosen": 0.3254307508468628, "logits/rejected": 0.2913845181465149, "logps/chosen": -2.32161283493042, "logps/rejected": -2.599966526031494, "loss": 1.2076, "nll_loss": 1.1408929824829102, "rewards/accuracies": 0.625, "rewards/chosen": -0.23216129839420319, "rewards/margins": 0.027835331857204437, "rewards/rejected": -0.259996622800827, "step": 928 }, { "epoch": 2.543463381245722, "grad_norm": 3.7869856357574463, "learning_rate": 8.72876712328767e-07, "log_odds_chosen": 0.37646329402923584, "log_odds_ratio": -0.5554633140563965, "logits/chosen": 0.19346559047698975, "logits/rejected": 0.13923563063144684, "logps/chosen": -2.507424831390381, "logps/rejected": -2.834233522415161, "loss": 1.1472, "nll_loss": 1.091684103012085, "rewards/accuracies": 0.75, "rewards/chosen": -0.25074249505996704, "rewards/margins": 0.03268086910247803, "rewards/rejected": -0.28342336416244507, "step": 929 }, { "epoch": 2.546201232032854, "grad_norm": 3.7451655864715576, "learning_rate": 8.727397260273972e-07, "log_odds_chosen": 0.5227420330047607, "log_odds_ratio": -0.5430536270141602, "logits/chosen": 0.4876434803009033, "logits/rejected": 0.4485798180103302, "logps/chosen": -2.470982789993286, "logps/rejected": -2.9384748935699463, "loss": 1.2187, "nll_loss": 1.1644142866134644, "rewards/accuracies": 0.875, "rewards/chosen": -0.24709826707839966, "rewards/margins": 0.046749211847782135, "rewards/rejected": -0.2938474714756012, "step": 930 }, { "epoch": 2.5489390828199863, "grad_norm": 3.740854501724243, "learning_rate": 8.726027397260274e-07, "log_odds_chosen": 0.8015692830085754, "log_odds_ratio": -0.42269235849380493, "logits/chosen": 0.2930297553539276, "logits/rejected": 0.26209771633148193, "logps/chosen": -2.1647138595581055, "logps/rejected": -2.8843812942504883, "loss": 1.1327, "nll_loss": 1.090388298034668, "rewards/accuracies": 0.875, "rewards/chosen": -0.21647138893604279, "rewards/margins": 0.0719667375087738, "rewards/rejected": -0.2884381115436554, "step": 931 }, { "epoch": 2.5516769336071183, "grad_norm": 3.5558526515960693, "learning_rate": 8.724657534246574e-07, "log_odds_chosen": 0.07005350291728973, "log_odds_ratio": -0.772821843624115, "logits/chosen": 0.33249256014823914, "logits/rejected": 0.24795454740524292, "logps/chosen": -2.634204864501953, "logps/rejected": -2.7034294605255127, "loss": 1.2696, "nll_loss": 1.1922706365585327, "rewards/accuracies": 0.625, "rewards/chosen": -0.2634204626083374, "rewards/margins": 0.006922483444213867, "rewards/rejected": -0.27034297585487366, "step": 932 }, { "epoch": 2.5544147843942504, "grad_norm": 3.815103769302368, "learning_rate": 8.723287671232876e-07, "log_odds_chosen": 0.6164628863334656, "log_odds_ratio": -0.5655177235603333, "logits/chosen": 0.35364654660224915, "logits/rejected": 0.42473435401916504, "logps/chosen": -2.7093703746795654, "logps/rejected": -3.2943873405456543, "loss": 1.0114, "nll_loss": 0.954875111579895, "rewards/accuracies": 0.625, "rewards/chosen": -0.27093705534935, "rewards/margins": 0.05850166827440262, "rewards/rejected": -0.3294387459754944, "step": 933 }, { "epoch": 2.5571526351813825, "grad_norm": 3.788505792617798, "learning_rate": 8.721917808219178e-07, "log_odds_chosen": -0.19490563869476318, "log_odds_ratio": -0.917385995388031, "logits/chosen": 0.2623198926448822, "logits/rejected": 0.23577743768692017, "logps/chosen": -2.5615451335906982, "logps/rejected": -2.349138021469116, "loss": 1.1449, "nll_loss": 1.0531905889511108, "rewards/accuracies": 0.75, "rewards/chosen": -0.25615450739860535, "rewards/margins": -0.021240709349513054, "rewards/rejected": -0.23491381108760834, "step": 934 }, { "epoch": 2.5598904859685145, "grad_norm": 3.9198858737945557, "learning_rate": 8.720547945205479e-07, "log_odds_chosen": 0.6428700089454651, "log_odds_ratio": -0.5662970542907715, "logits/chosen": 0.325772762298584, "logits/rejected": 0.31038373708724976, "logps/chosen": -2.8640806674957275, "logps/rejected": -3.4548757076263428, "loss": 1.0919, "nll_loss": 1.0352281332015991, "rewards/accuracies": 0.75, "rewards/chosen": -0.28640809655189514, "rewards/margins": 0.059079498052597046, "rewards/rejected": -0.3454875946044922, "step": 935 }, { "epoch": 2.5626283367556466, "grad_norm": 4.353514194488525, "learning_rate": 8.71917808219178e-07, "log_odds_chosen": 0.0086745023727417, "log_odds_ratio": -0.7587241530418396, "logits/chosen": 0.3566962480545044, "logits/rejected": 0.4169294834136963, "logps/chosen": -2.975309371948242, "logps/rejected": -2.979015827178955, "loss": 1.0226, "nll_loss": 0.9467710852622986, "rewards/accuracies": 0.5, "rewards/chosen": -0.2975309491157532, "rewards/margins": 0.0003706458956003189, "rewards/rejected": -0.29790157079696655, "step": 936 }, { "epoch": 2.5653661875427787, "grad_norm": 3.5442545413970947, "learning_rate": 8.717808219178082e-07, "log_odds_chosen": 0.04656077176332474, "log_odds_ratio": -0.7366598844528198, "logits/chosen": 0.2705864906311035, "logits/rejected": 0.2939482629299164, "logps/chosen": -2.3784449100494385, "logps/rejected": -2.3945977687835693, "loss": 1.1966, "nll_loss": 1.122982382774353, "rewards/accuracies": 0.375, "rewards/chosen": -0.23784449696540833, "rewards/margins": 0.0016152849420905113, "rewards/rejected": -0.2394597828388214, "step": 937 }, { "epoch": 2.5681040383299107, "grad_norm": 3.674135446548462, "learning_rate": 8.716438356164383e-07, "log_odds_chosen": 0.5295191407203674, "log_odds_ratio": -0.5093889236450195, "logits/chosen": 0.3142595887184143, "logits/rejected": 0.2923296391963959, "logps/chosen": -2.3798511028289795, "logps/rejected": -2.89229679107666, "loss": 1.1364, "nll_loss": 1.0854339599609375, "rewards/accuracies": 0.875, "rewards/chosen": -0.23798510432243347, "rewards/margins": 0.05124456435441971, "rewards/rejected": -0.2892296612262726, "step": 938 }, { "epoch": 2.5708418891170433, "grad_norm": 5.414066791534424, "learning_rate": 8.715068493150684e-07, "log_odds_chosen": 0.4200727939605713, "log_odds_ratio": -0.6181355118751526, "logits/chosen": 0.40436434745788574, "logits/rejected": 0.4135226607322693, "logps/chosen": -2.961635112762451, "logps/rejected": -3.366765022277832, "loss": 1.119, "nll_loss": 1.057234764099121, "rewards/accuracies": 0.75, "rewards/chosen": -0.29616349935531616, "rewards/margins": 0.04051302745938301, "rewards/rejected": -0.33667653799057007, "step": 939 }, { "epoch": 2.5735797399041753, "grad_norm": 3.7004988193511963, "learning_rate": 8.713698630136986e-07, "log_odds_chosen": 0.3817538619041443, "log_odds_ratio": -0.6409371495246887, "logits/chosen": 0.3889644742012024, "logits/rejected": 0.32324719429016113, "logps/chosen": -2.411602735519409, "logps/rejected": -2.7368154525756836, "loss": 1.1924, "nll_loss": 1.1283220052719116, "rewards/accuracies": 0.75, "rewards/chosen": -0.24116027355194092, "rewards/margins": 0.032521262764930725, "rewards/rejected": -0.2736815810203552, "step": 940 }, { "epoch": 2.5763175906913074, "grad_norm": 4.610921382904053, "learning_rate": 8.712328767123287e-07, "log_odds_chosen": 0.03004223108291626, "log_odds_ratio": -0.8304569125175476, "logits/chosen": 0.39970672130584717, "logits/rejected": 0.393799364566803, "logps/chosen": -2.5893642902374268, "logps/rejected": -2.5828897953033447, "loss": 1.2754, "nll_loss": 1.192352533340454, "rewards/accuracies": 0.5, "rewards/chosen": -0.25893643498420715, "rewards/margins": -0.000647442415356636, "rewards/rejected": -0.2582889795303345, "step": 941 }, { "epoch": 2.5790554414784395, "grad_norm": 3.8385610580444336, "learning_rate": 8.710958904109589e-07, "log_odds_chosen": 1.2206907272338867, "log_odds_ratio": -0.5150556564331055, "logits/chosen": 0.20513948798179626, "logits/rejected": 0.1392330825328827, "logps/chosen": -2.154935836791992, "logps/rejected": -3.324951648712158, "loss": 1.2148, "nll_loss": 1.1632781028747559, "rewards/accuracies": 0.75, "rewards/chosen": -0.2154935896396637, "rewards/margins": 0.11700160056352615, "rewards/rejected": -0.33249515295028687, "step": 942 }, { "epoch": 2.5817932922655715, "grad_norm": 3.400402784347534, "learning_rate": 8.70958904109589e-07, "log_odds_chosen": 0.6499602198600769, "log_odds_ratio": -0.5734089016914368, "logits/chosen": 0.2717213034629822, "logits/rejected": 0.28201398253440857, "logps/chosen": -2.6608469486236572, "logps/rejected": -3.2389297485351562, "loss": 1.0744, "nll_loss": 1.0170402526855469, "rewards/accuracies": 0.75, "rewards/chosen": -0.2660847008228302, "rewards/margins": 0.05780826136469841, "rewards/rejected": -0.3238929510116577, "step": 943 }, { "epoch": 2.5845311430527036, "grad_norm": 3.800589084625244, "learning_rate": 8.708219178082191e-07, "log_odds_chosen": 0.9339314699172974, "log_odds_ratio": -0.3914600908756256, "logits/chosen": 0.37252482771873474, "logits/rejected": 0.30396249890327454, "logps/chosen": -1.9028892517089844, "logps/rejected": -2.7298507690429688, "loss": 1.1379, "nll_loss": 1.0987797975540161, "rewards/accuracies": 0.875, "rewards/chosen": -0.19028893113136292, "rewards/margins": 0.08269616961479187, "rewards/rejected": -0.2729851007461548, "step": 944 }, { "epoch": 2.5872689938398357, "grad_norm": 3.4432260990142822, "learning_rate": 8.706849315068493e-07, "log_odds_chosen": 0.6013880968093872, "log_odds_ratio": -0.5033451914787292, "logits/chosen": 0.247817724943161, "logits/rejected": 0.1839980036020279, "logps/chosen": -2.1700096130371094, "logps/rejected": -2.6976113319396973, "loss": 1.1411, "nll_loss": 1.0907858610153198, "rewards/accuracies": 0.75, "rewards/chosen": -0.21700096130371094, "rewards/margins": 0.05276019126176834, "rewards/rejected": -0.2697611451148987, "step": 945 }, { "epoch": 2.5900068446269677, "grad_norm": 3.9556381702423096, "learning_rate": 8.705479452054793e-07, "log_odds_chosen": 0.36169663071632385, "log_odds_ratio": -0.54817795753479, "logits/chosen": 0.43565499782562256, "logits/rejected": 0.434080570936203, "logps/chosen": -2.447514772415161, "logps/rejected": -2.782167911529541, "loss": 1.089, "nll_loss": 1.0342061519622803, "rewards/accuracies": 0.625, "rewards/chosen": -0.2447514683008194, "rewards/margins": 0.03346532583236694, "rewards/rejected": -0.27821677923202515, "step": 946 }, { "epoch": 2.5927446954141, "grad_norm": 3.834749221801758, "learning_rate": 8.704109589041095e-07, "log_odds_chosen": 0.40521371364593506, "log_odds_ratio": -0.5912455916404724, "logits/chosen": 0.2859989404678345, "logits/rejected": 0.24564120173454285, "logps/chosen": -2.3571724891662598, "logps/rejected": -2.7426252365112305, "loss": 1.1867, "nll_loss": 1.1275560855865479, "rewards/accuracies": 0.5, "rewards/chosen": -0.23571723699569702, "rewards/margins": 0.03854529559612274, "rewards/rejected": -0.27426254749298096, "step": 947 }, { "epoch": 2.5954825462012323, "grad_norm": 5.295761585235596, "learning_rate": 8.702739726027397e-07, "log_odds_chosen": -0.004900813102722168, "log_odds_ratio": -0.9627442955970764, "logits/chosen": 0.4535790681838989, "logits/rejected": 0.4772096276283264, "logps/chosen": -3.0880236625671387, "logps/rejected": -2.9950900077819824, "loss": 1.0476, "nll_loss": 0.9513465166091919, "rewards/accuracies": 0.625, "rewards/chosen": -0.3088023364543915, "rewards/margins": -0.009293343871831894, "rewards/rejected": -0.2995089888572693, "step": 948 }, { "epoch": 2.5982203969883644, "grad_norm": 4.568530559539795, "learning_rate": 8.701369863013698e-07, "log_odds_chosen": -0.0423399992287159, "log_odds_ratio": -0.7542793154716492, "logits/chosen": 0.3823825716972351, "logits/rejected": 0.39560815691947937, "logps/chosen": -3.012662887573242, "logps/rejected": -2.9719395637512207, "loss": 1.1577, "nll_loss": 1.0822718143463135, "rewards/accuracies": 0.5, "rewards/chosen": -0.30126631259918213, "rewards/margins": -0.004072336480021477, "rewards/rejected": -0.2971939742565155, "step": 949 }, { "epoch": 2.6009582477754964, "grad_norm": 4.605199337005615, "learning_rate": 8.699999999999999e-07, "log_odds_chosen": 0.06567013263702393, "log_odds_ratio": -0.7494388222694397, "logits/chosen": 0.4850051701068878, "logits/rejected": 0.41276854276657104, "logps/chosen": -2.16894268989563, "logps/rejected": -2.174161195755005, "loss": 1.2425, "nll_loss": 1.1675522327423096, "rewards/accuracies": 0.75, "rewards/chosen": -0.216894268989563, "rewards/margins": 0.0005218563601374626, "rewards/rejected": -0.21741613745689392, "step": 950 }, { "epoch": 2.6036960985626285, "grad_norm": 3.820310115814209, "learning_rate": 8.698630136986301e-07, "log_odds_chosen": 0.09505495429039001, "log_odds_ratio": -0.7217053174972534, "logits/chosen": 0.3124602437019348, "logits/rejected": 0.29235604405403137, "logps/chosen": -2.884272813796997, "logps/rejected": -3.0055603981018066, "loss": 1.1843, "nll_loss": 1.112176537513733, "rewards/accuracies": 0.5, "rewards/chosen": -0.28842729330062866, "rewards/margins": 0.012128761038184166, "rewards/rejected": -0.3005560338497162, "step": 951 }, { "epoch": 2.6064339493497606, "grad_norm": 3.9159867763519287, "learning_rate": 8.697260273972602e-07, "log_odds_chosen": 0.5898807048797607, "log_odds_ratio": -0.5462708473205566, "logits/chosen": 0.29387563467025757, "logits/rejected": 0.18412846326828003, "logps/chosen": -2.271440029144287, "logps/rejected": -2.8071646690368652, "loss": 1.13, "nll_loss": 1.0753836631774902, "rewards/accuracies": 0.5, "rewards/chosen": -0.2271440178155899, "rewards/margins": 0.05357244238257408, "rewards/rejected": -0.2807164788246155, "step": 952 }, { "epoch": 2.6091718001368926, "grad_norm": 4.114648342132568, "learning_rate": 8.695890410958903e-07, "log_odds_chosen": 0.3701379895210266, "log_odds_ratio": -0.6826469898223877, "logits/chosen": 0.3567069172859192, "logits/rejected": 0.34847238659858704, "logps/chosen": -2.8597006797790527, "logps/rejected": -3.1513848304748535, "loss": 1.2613, "nll_loss": 1.1930328607559204, "rewards/accuracies": 0.75, "rewards/chosen": -0.2859700620174408, "rewards/margins": 0.029168399050831795, "rewards/rejected": -0.31513848900794983, "step": 953 }, { "epoch": 2.6119096509240247, "grad_norm": 3.6280229091644287, "learning_rate": 8.694520547945205e-07, "log_odds_chosen": 0.34403321146965027, "log_odds_ratio": -0.5884383916854858, "logits/chosen": 0.3283945322036743, "logits/rejected": 0.291289359331131, "logps/chosen": -2.258697032928467, "logps/rejected": -2.571664333343506, "loss": 1.1374, "nll_loss": 1.078508734703064, "rewards/accuracies": 0.625, "rewards/chosen": -0.22586970031261444, "rewards/margins": 0.0312967412173748, "rewards/rejected": -0.25716644525527954, "step": 954 }, { "epoch": 2.614647501711157, "grad_norm": 4.057251930236816, "learning_rate": 8.693150684931506e-07, "log_odds_chosen": 0.6310727000236511, "log_odds_ratio": -0.5284004807472229, "logits/chosen": 0.47562384605407715, "logits/rejected": 0.4767580032348633, "logps/chosen": -2.914304733276367, "logps/rejected": -3.5043740272521973, "loss": 1.075, "nll_loss": 1.0221308469772339, "rewards/accuracies": 0.75, "rewards/chosen": -0.2914304733276367, "rewards/margins": 0.0590069442987442, "rewards/rejected": -0.3504374027252197, "step": 955 }, { "epoch": 2.617385352498289, "grad_norm": 3.856518030166626, "learning_rate": 8.691780821917808e-07, "log_odds_chosen": 0.17999118566513062, "log_odds_ratio": -0.6775301694869995, "logits/chosen": 0.33721089363098145, "logits/rejected": 0.32166343927383423, "logps/chosen": -2.5816056728363037, "logps/rejected": -2.732226848602295, "loss": 1.1278, "nll_loss": 1.0600435733795166, "rewards/accuracies": 0.5, "rewards/chosen": -0.2581605911254883, "rewards/margins": 0.015062117949128151, "rewards/rejected": -0.2732226848602295, "step": 956 }, { "epoch": 2.620123203285421, "grad_norm": 4.284952640533447, "learning_rate": 8.690410958904109e-07, "log_odds_chosen": 0.14903700351715088, "log_odds_ratio": -0.8900966644287109, "logits/chosen": 0.3829025626182556, "logits/rejected": 0.3640248775482178, "logps/chosen": -2.901625156402588, "logps/rejected": -2.9815287590026855, "loss": 1.0771, "nll_loss": 0.9880964756011963, "rewards/accuracies": 0.75, "rewards/chosen": -0.2901625335216522, "rewards/margins": 0.007990354672074318, "rewards/rejected": -0.2981528639793396, "step": 957 }, { "epoch": 2.622861054072553, "grad_norm": 4.44187068939209, "learning_rate": 8.68904109589041e-07, "log_odds_chosen": 0.19857901334762573, "log_odds_ratio": -0.6774380803108215, "logits/chosen": 0.47656702995300293, "logits/rejected": 0.45436152815818787, "logps/chosen": -2.5328176021575928, "logps/rejected": -2.694801092147827, "loss": 1.1661, "nll_loss": 1.0984001159667969, "rewards/accuracies": 0.625, "rewards/chosen": -0.25328177213668823, "rewards/margins": 0.01619834080338478, "rewards/rejected": -0.2694801092147827, "step": 958 }, { "epoch": 2.625598904859685, "grad_norm": 3.9473936557769775, "learning_rate": 8.687671232876712e-07, "log_odds_chosen": 0.1344480961561203, "log_odds_ratio": -0.6438948512077332, "logits/chosen": 0.26536768674850464, "logits/rejected": 0.2847352921962738, "logps/chosen": -2.6188464164733887, "logps/rejected": -2.725189208984375, "loss": 1.1373, "nll_loss": 1.0729410648345947, "rewards/accuracies": 0.75, "rewards/chosen": -0.2618846595287323, "rewards/margins": 0.010634263977408409, "rewards/rejected": -0.27251893281936646, "step": 959 }, { "epoch": 2.628336755646817, "grad_norm": 3.6596198081970215, "learning_rate": 8.686301369863012e-07, "log_odds_chosen": 0.40449970960617065, "log_odds_ratio": -0.5815064907073975, "logits/chosen": 0.33329638838768005, "logits/rejected": 0.295706570148468, "logps/chosen": -2.26314115524292, "logps/rejected": -2.6293463706970215, "loss": 1.206, "nll_loss": 1.1478873491287231, "rewards/accuracies": 0.75, "rewards/chosen": -0.22631412744522095, "rewards/margins": 0.03662052005529404, "rewards/rejected": -0.2629346549510956, "step": 960 }, { "epoch": 2.631074606433949, "grad_norm": 3.757051944732666, "learning_rate": 8.684931506849314e-07, "log_odds_chosen": 0.3959204852581024, "log_odds_ratio": -0.5711815357208252, "logits/chosen": 0.3524714410305023, "logits/rejected": 0.3270834684371948, "logps/chosen": -2.4346189498901367, "logps/rejected": -2.766864538192749, "loss": 1.1691, "nll_loss": 1.1119471788406372, "rewards/accuracies": 0.75, "rewards/chosen": -0.24346190690994263, "rewards/margins": 0.033224571496248245, "rewards/rejected": -0.27668648958206177, "step": 961 }, { "epoch": 2.6338124572210813, "grad_norm": 5.069285869598389, "learning_rate": 8.683561643835616e-07, "log_odds_chosen": -0.26279181241989136, "log_odds_ratio": -0.9216510057449341, "logits/chosen": 0.465939998626709, "logits/rejected": 0.5843415856361389, "logps/chosen": -3.674560070037842, "logps/rejected": -3.414813995361328, "loss": 1.0786, "nll_loss": 0.986436128616333, "rewards/accuracies": 0.375, "rewards/chosen": -0.36745601892471313, "rewards/margins": -0.02597460150718689, "rewards/rejected": -0.34148141741752625, "step": 962 }, { "epoch": 2.6365503080082133, "grad_norm": 3.371615171432495, "learning_rate": 8.682191780821917e-07, "log_odds_chosen": 0.9481348991394043, "log_odds_ratio": -0.40714728832244873, "logits/chosen": 0.3932586908340454, "logits/rejected": 0.34875214099884033, "logps/chosen": -2.625084400177002, "logps/rejected": -3.540806770324707, "loss": 1.1245, "nll_loss": 1.0838109254837036, "rewards/accuracies": 0.875, "rewards/chosen": -0.26250848174095154, "rewards/margins": 0.09157222509384155, "rewards/rejected": -0.3540807068347931, "step": 963 }, { "epoch": 2.6392881587953454, "grad_norm": 4.01673698425293, "learning_rate": 8.680821917808218e-07, "log_odds_chosen": 0.24900028109550476, "log_odds_ratio": -0.6911453604698181, "logits/chosen": 0.34510716795921326, "logits/rejected": 0.4258682131767273, "logps/chosen": -2.965573787689209, "logps/rejected": -3.225306510925293, "loss": 1.13, "nll_loss": 1.0608444213867188, "rewards/accuracies": 0.625, "rewards/chosen": -0.29655739665031433, "rewards/margins": 0.025973286479711533, "rewards/rejected": -0.32253068685531616, "step": 964 }, { "epoch": 2.642026009582478, "grad_norm": 4.110629558563232, "learning_rate": 8.67945205479452e-07, "log_odds_chosen": -0.11828593164682388, "log_odds_ratio": -0.8017216920852661, "logits/chosen": 0.2328415960073471, "logits/rejected": 0.2646716237068176, "logps/chosen": -2.894686698913574, "logps/rejected": -2.7831125259399414, "loss": 1.0945, "nll_loss": 1.0143637657165527, "rewards/accuracies": 0.5, "rewards/chosen": -0.2894686758518219, "rewards/margins": -0.01115739718079567, "rewards/rejected": -0.27831125259399414, "step": 965 }, { "epoch": 2.64476386036961, "grad_norm": 3.9772696495056152, "learning_rate": 8.678082191780822e-07, "log_odds_chosen": 0.26384004950523376, "log_odds_ratio": -0.693263590335846, "logits/chosen": 0.4499971568584442, "logits/rejected": 0.41705361008644104, "logps/chosen": -2.2474939823150635, "logps/rejected": -2.484981060028076, "loss": 1.1193, "nll_loss": 1.0500200986862183, "rewards/accuracies": 0.5, "rewards/chosen": -0.2247493863105774, "rewards/margins": 0.023748718202114105, "rewards/rejected": -0.2484981119632721, "step": 966 }, { "epoch": 2.647501711156742, "grad_norm": 3.6444945335388184, "learning_rate": 8.676712328767122e-07, "log_odds_chosen": 0.4972327649593353, "log_odds_ratio": -0.7035251259803772, "logits/chosen": 0.41911065578460693, "logits/rejected": 0.47952529788017273, "logps/chosen": -2.344231367111206, "logps/rejected": -2.807969808578491, "loss": 1.0773, "nll_loss": 1.006963849067688, "rewards/accuracies": 0.625, "rewards/chosen": -0.23442311584949493, "rewards/margins": 0.046373847872018814, "rewards/rejected": -0.28079697489738464, "step": 967 }, { "epoch": 2.650239561943874, "grad_norm": 3.6324143409729004, "learning_rate": 8.675342465753425e-07, "log_odds_chosen": 0.24387381970882416, "log_odds_ratio": -0.6049452424049377, "logits/chosen": 0.5256701111793518, "logits/rejected": 0.49622657895088196, "logps/chosen": -2.4402527809143066, "logps/rejected": -2.6499428749084473, "loss": 1.1446, "nll_loss": 1.0840656757354736, "rewards/accuracies": 0.75, "rewards/chosen": -0.24402526021003723, "rewards/margins": 0.020969035103917122, "rewards/rejected": -0.2649942934513092, "step": 968 }, { "epoch": 2.652977412731006, "grad_norm": 4.465682029724121, "learning_rate": 8.673972602739726e-07, "log_odds_chosen": 0.9556630849838257, "log_odds_ratio": -0.3652229905128479, "logits/chosen": 0.5347252488136292, "logits/rejected": 0.450582355260849, "logps/chosen": -2.324366331100464, "logps/rejected": -3.2048544883728027, "loss": 1.0218, "nll_loss": 0.9852750301361084, "rewards/accuracies": 1.0, "rewards/chosen": -0.2324366420507431, "rewards/margins": 0.08804880082607269, "rewards/rejected": -0.3204854726791382, "step": 969 }, { "epoch": 2.6557152635181382, "grad_norm": 4.147716999053955, "learning_rate": 8.672602739726028e-07, "log_odds_chosen": 1.3332282304763794, "log_odds_ratio": -0.5088048577308655, "logits/chosen": 0.4055696129798889, "logits/rejected": 0.3259620666503906, "logps/chosen": -2.192638397216797, "logps/rejected": -3.4778802394866943, "loss": 1.1502, "nll_loss": 1.0993541479110718, "rewards/accuracies": 0.875, "rewards/chosen": -0.21926385164260864, "rewards/margins": 0.12852418422698975, "rewards/rejected": -0.347788006067276, "step": 970 }, { "epoch": 2.6584531143052703, "grad_norm": 4.353591442108154, "learning_rate": 8.671232876712329e-07, "log_odds_chosen": 0.9823398590087891, "log_odds_ratio": -0.6245760321617126, "logits/chosen": 0.2816828191280365, "logits/rejected": 0.3336850106716156, "logps/chosen": -2.807692527770996, "logps/rejected": -3.7596821784973145, "loss": 1.102, "nll_loss": 1.039534568786621, "rewards/accuracies": 0.75, "rewards/chosen": -0.2807692587375641, "rewards/margins": 0.09519895166158676, "rewards/rejected": -0.37596818804740906, "step": 971 }, { "epoch": 2.6611909650924024, "grad_norm": 4.558338165283203, "learning_rate": 8.66986301369863e-07, "log_odds_chosen": -0.009463503956794739, "log_odds_ratio": -0.8007328510284424, "logits/chosen": 0.3287925720214844, "logits/rejected": 0.3369809687137604, "logps/chosen": -2.902215003967285, "logps/rejected": -2.857785701751709, "loss": 1.0741, "nll_loss": 0.9940000176429749, "rewards/accuracies": 0.75, "rewards/chosen": -0.2902214825153351, "rewards/margins": -0.004442932084202766, "rewards/rejected": -0.28577855229377747, "step": 972 }, { "epoch": 2.6639288158795345, "grad_norm": 4.379016876220703, "learning_rate": 8.668493150684932e-07, "log_odds_chosen": -0.4806560277938843, "log_odds_ratio": -1.0400646924972534, "logits/chosen": 0.2379290759563446, "logits/rejected": 0.2499394565820694, "logps/chosen": -2.7919116020202637, "logps/rejected": -2.3485069274902344, "loss": 1.192, "nll_loss": 1.087996244430542, "rewards/accuracies": 0.5, "rewards/chosen": -0.27919116616249084, "rewards/margins": -0.04434044659137726, "rewards/rejected": -0.2348506897687912, "step": 973 }, { "epoch": 2.6666666666666665, "grad_norm": 4.2779459953308105, "learning_rate": 8.667123287671233e-07, "log_odds_chosen": 0.2718527317047119, "log_odds_ratio": -0.6021555662155151, "logits/chosen": 0.46199703216552734, "logits/rejected": 0.44143179059028625, "logps/chosen": -2.5188181400299072, "logps/rejected": -2.7706665992736816, "loss": 1.0268, "nll_loss": 0.9665722250938416, "rewards/accuracies": 0.875, "rewards/chosen": -0.25188180804252625, "rewards/margins": 0.025184888392686844, "rewards/rejected": -0.2770666778087616, "step": 974 }, { "epoch": 2.669404517453799, "grad_norm": 4.441427707672119, "learning_rate": 8.665753424657534e-07, "log_odds_chosen": -0.2182489037513733, "log_odds_ratio": -0.9269323348999023, "logits/chosen": 0.454006165266037, "logits/rejected": 0.5885692834854126, "logps/chosen": -3.1501519680023193, "logps/rejected": -2.947600841522217, "loss": 0.991, "nll_loss": 0.8983458280563354, "rewards/accuracies": 0.5, "rewards/chosen": -0.31501519680023193, "rewards/margins": -0.020255113020539284, "rewards/rejected": -0.2947600781917572, "step": 975 }, { "epoch": 2.672142368240931, "grad_norm": 3.7582318782806396, "learning_rate": 8.664383561643836e-07, "log_odds_chosen": 0.6695517897605896, "log_odds_ratio": -0.5417008399963379, "logits/chosen": 0.38013339042663574, "logits/rejected": 0.3133183717727661, "logps/chosen": -2.369859218597412, "logps/rejected": -2.9714131355285645, "loss": 1.1476, "nll_loss": 1.0934207439422607, "rewards/accuracies": 0.75, "rewards/chosen": -0.2369859218597412, "rewards/margins": 0.06015538424253464, "rewards/rejected": -0.29714131355285645, "step": 976 }, { "epoch": 2.674880219028063, "grad_norm": 4.714694976806641, "learning_rate": 8.663013698630136e-07, "log_odds_chosen": 0.12601763010025024, "log_odds_ratio": -0.6989340782165527, "logits/chosen": 0.45681577920913696, "logits/rejected": 0.4134732484817505, "logps/chosen": -2.9267263412475586, "logps/rejected": -3.0034995079040527, "loss": 1.1081, "nll_loss": 1.038251280784607, "rewards/accuracies": 0.625, "rewards/chosen": -0.29267263412475586, "rewards/margins": 0.007677330635488033, "rewards/rejected": -0.3003499507904053, "step": 977 }, { "epoch": 2.6776180698151952, "grad_norm": 3.818593978881836, "learning_rate": 8.661643835616438e-07, "log_odds_chosen": 0.8796499371528625, "log_odds_ratio": -0.4054209887981415, "logits/chosen": 0.42658108472824097, "logits/rejected": 0.4028100371360779, "logps/chosen": -1.8845546245574951, "logps/rejected": -2.6692628860473633, "loss": 1.0838, "nll_loss": 1.0432932376861572, "rewards/accuracies": 0.875, "rewards/chosen": -0.1884554624557495, "rewards/margins": 0.07847082614898682, "rewards/rejected": -0.26692628860473633, "step": 978 }, { "epoch": 2.6803559206023273, "grad_norm": 3.78771710395813, "learning_rate": 8.66027397260274e-07, "log_odds_chosen": -0.013202115893363953, "log_odds_ratio": -0.7456290125846863, "logits/chosen": 0.4940486550331116, "logits/rejected": 0.48202788829803467, "logps/chosen": -2.3419694900512695, "logps/rejected": -2.317321538925171, "loss": 1.128, "nll_loss": 1.0534733533859253, "rewards/accuracies": 0.5, "rewards/chosen": -0.2341969609260559, "rewards/margins": -0.0024648047983646393, "rewards/rejected": -0.23173215985298157, "step": 979 }, { "epoch": 2.6830937713894594, "grad_norm": 4.52482271194458, "learning_rate": 8.658904109589041e-07, "log_odds_chosen": 0.5099413394927979, "log_odds_ratio": -0.6494305729866028, "logits/chosen": 0.4990518093109131, "logits/rejected": 0.5484512448310852, "logps/chosen": -3.1680469512939453, "logps/rejected": -3.667041301727295, "loss": 0.9428, "nll_loss": 0.8779059648513794, "rewards/accuracies": 0.75, "rewards/chosen": -0.3168046772480011, "rewards/margins": 0.04989945888519287, "rewards/rejected": -0.36670416593551636, "step": 980 }, { "epoch": 2.6858316221765914, "grad_norm": 5.772551536560059, "learning_rate": 8.657534246575342e-07, "log_odds_chosen": -0.13107003271579742, "log_odds_ratio": -0.9531735777854919, "logits/chosen": 0.5423656105995178, "logits/rejected": 0.6665937900543213, "logps/chosen": -3.5498387813568115, "logps/rejected": -3.3941993713378906, "loss": 1.0752, "nll_loss": 0.979928731918335, "rewards/accuracies": 0.375, "rewards/chosen": -0.3549838960170746, "rewards/margins": -0.015563949942588806, "rewards/rejected": -0.3394199311733246, "step": 981 }, { "epoch": 2.6885694729637235, "grad_norm": 6.057487487792969, "learning_rate": 8.656164383561644e-07, "log_odds_chosen": -0.24389228224754333, "log_odds_ratio": -0.95733642578125, "logits/chosen": 0.4065546691417694, "logits/rejected": 0.5241173505783081, "logps/chosen": -3.4708070755004883, "logps/rejected": -3.2135751247406006, "loss": 1.0448, "nll_loss": 0.9490290880203247, "rewards/accuracies": 0.625, "rewards/chosen": -0.34708070755004883, "rewards/margins": -0.02572319097816944, "rewards/rejected": -0.32135751843452454, "step": 982 }, { "epoch": 2.6913073237508556, "grad_norm": 4.793175220489502, "learning_rate": 8.654794520547945e-07, "log_odds_chosen": -0.2938598394393921, "log_odds_ratio": -0.9993425607681274, "logits/chosen": 0.3334425389766693, "logits/rejected": 0.4168260991573334, "logps/chosen": -3.156846761703491, "logps/rejected": -2.8595633506774902, "loss": 1.1771, "nll_loss": 1.0772134065628052, "rewards/accuracies": 0.5, "rewards/chosen": -0.3156846761703491, "rewards/margins": -0.029728341847658157, "rewards/rejected": -0.28595635294914246, "step": 983 }, { "epoch": 2.6940451745379876, "grad_norm": 5.904815196990967, "learning_rate": 8.653424657534246e-07, "log_odds_chosen": -0.6432310342788696, "log_odds_ratio": -1.1332125663757324, "logits/chosen": 0.613412618637085, "logits/rejected": 0.5866624116897583, "logps/chosen": -3.667738437652588, "logps/rejected": -3.0465588569641113, "loss": 1.0009, "nll_loss": 0.887599527835846, "rewards/accuracies": 0.125, "rewards/chosen": -0.3667738437652588, "rewards/margins": -0.062117915600538254, "rewards/rejected": -0.30465590953826904, "step": 984 }, { "epoch": 2.6967830253251197, "grad_norm": 3.8495304584503174, "learning_rate": 8.652054794520548e-07, "log_odds_chosen": 1.031179666519165, "log_odds_ratio": -0.3791695535182953, "logits/chosen": 0.21962901949882507, "logits/rejected": 0.1663374900817871, "logps/chosen": -1.820354700088501, "logps/rejected": -2.7528042793273926, "loss": 1.0409, "nll_loss": 1.0029710531234741, "rewards/accuracies": 0.875, "rewards/chosen": -0.18203546106815338, "rewards/margins": 0.09324497729539871, "rewards/rejected": -0.2752804458141327, "step": 985 }, { "epoch": 2.6995208761122518, "grad_norm": 4.69005823135376, "learning_rate": 8.650684931506849e-07, "log_odds_chosen": 0.7276585102081299, "log_odds_ratio": -0.4670488238334656, "logits/chosen": 0.5400041341781616, "logits/rejected": 0.5500075221061707, "logps/chosen": -2.29183292388916, "logps/rejected": -2.9599483013153076, "loss": 0.9907, "nll_loss": 0.944026529788971, "rewards/accuracies": 0.625, "rewards/chosen": -0.22918331623077393, "rewards/margins": 0.06681152433156967, "rewards/rejected": -0.2959948480129242, "step": 986 }, { "epoch": 2.702258726899384, "grad_norm": 5.167577743530273, "learning_rate": 8.649315068493151e-07, "log_odds_chosen": 0.18756964802742004, "log_odds_ratio": -0.6848562359809875, "logits/chosen": 0.3040257692337036, "logits/rejected": 0.30575430393218994, "logps/chosen": -2.7426600456237793, "logps/rejected": -2.907179355621338, "loss": 1.1747, "nll_loss": 1.1062183380126953, "rewards/accuracies": 0.5, "rewards/chosen": -0.27426600456237793, "rewards/margins": 0.016451917588710785, "rewards/rejected": -0.2907179594039917, "step": 987 }, { "epoch": 2.704996577686516, "grad_norm": 3.7427563667297363, "learning_rate": 8.647945205479452e-07, "log_odds_chosen": 0.7677860260009766, "log_odds_ratio": -0.4391302466392517, "logits/chosen": 0.4675982594490051, "logits/rejected": 0.5412451028823853, "logps/chosen": -2.593210220336914, "logps/rejected": -3.30289888381958, "loss": 1.0609, "nll_loss": 1.0169646739959717, "rewards/accuracies": 0.875, "rewards/chosen": -0.25932103395462036, "rewards/margins": 0.0709688663482666, "rewards/rejected": -0.33028990030288696, "step": 988 }, { "epoch": 2.707734428473648, "grad_norm": 4.160985946655273, "learning_rate": 8.646575342465753e-07, "log_odds_chosen": 0.20714649558067322, "log_odds_ratio": -0.6463730335235596, "logits/chosen": 0.6344631910324097, "logits/rejected": 0.6146570444107056, "logps/chosen": -2.781032085418701, "logps/rejected": -2.9699454307556152, "loss": 1.0075, "nll_loss": 0.9428866505622864, "rewards/accuracies": 0.5, "rewards/chosen": -0.27810320258140564, "rewards/margins": 0.01889132708311081, "rewards/rejected": -0.29699453711509705, "step": 989 }, { "epoch": 2.71047227926078, "grad_norm": 3.4757518768310547, "learning_rate": 8.645205479452055e-07, "log_odds_chosen": 0.7370429635047913, "log_odds_ratio": -0.592406690120697, "logits/chosen": 0.4748738408088684, "logits/rejected": 0.4541902542114258, "logps/chosen": -2.6131503582000732, "logps/rejected": -3.2801904678344727, "loss": 1.0149, "nll_loss": 0.9556347131729126, "rewards/accuracies": 0.625, "rewards/chosen": -0.2613150477409363, "rewards/margins": 0.06670399755239487, "rewards/rejected": -0.32801905274391174, "step": 990 }, { "epoch": 2.713210130047912, "grad_norm": 3.646462917327881, "learning_rate": 8.643835616438355e-07, "log_odds_chosen": 0.7263153195381165, "log_odds_ratio": -0.4165315628051758, "logits/chosen": 0.43223559856414795, "logits/rejected": 0.4819892644882202, "logps/chosen": -2.3613929748535156, "logps/rejected": -3.0168800354003906, "loss": 1.0496, "nll_loss": 1.0079721212387085, "rewards/accuracies": 1.0, "rewards/chosen": -0.23613929748535156, "rewards/margins": 0.06554870307445526, "rewards/rejected": -0.30168798565864563, "step": 991 }, { "epoch": 2.7159479808350446, "grad_norm": 3.4865918159484863, "learning_rate": 8.642465753424657e-07, "log_odds_chosen": 0.4441293179988861, "log_odds_ratio": -0.5261403918266296, "logits/chosen": 0.43770527839660645, "logits/rejected": 0.439876914024353, "logps/chosen": -2.302553653717041, "logps/rejected": -2.7147018909454346, "loss": 1.034, "nll_loss": 0.9813830852508545, "rewards/accuracies": 0.75, "rewards/chosen": -0.2302553653717041, "rewards/margins": 0.04121483862400055, "rewards/rejected": -0.27147018909454346, "step": 992 }, { "epoch": 2.7186858316221767, "grad_norm": 5.888017654418945, "learning_rate": 8.641095890410959e-07, "log_odds_chosen": 0.22347180545330048, "log_odds_ratio": -0.8388725519180298, "logits/chosen": 0.4353110194206238, "logits/rejected": 0.3727915287017822, "logps/chosen": -2.962675094604492, "logps/rejected": -3.1540908813476562, "loss": 1.0768, "nll_loss": 0.9929169416427612, "rewards/accuracies": 0.75, "rewards/chosen": -0.2962675094604492, "rewards/margins": 0.019141584634780884, "rewards/rejected": -0.3154090642929077, "step": 993 }, { "epoch": 2.7214236824093088, "grad_norm": 5.060014247894287, "learning_rate": 8.63972602739726e-07, "log_odds_chosen": 0.17679162323474884, "log_odds_ratio": -0.7361570596694946, "logits/chosen": 0.5043162703514099, "logits/rejected": 0.6204589009284973, "logps/chosen": -2.8750452995300293, "logps/rejected": -3.0286307334899902, "loss": 0.9279, "nll_loss": 0.8543144464492798, "rewards/accuracies": 0.75, "rewards/chosen": -0.28750452399253845, "rewards/margins": 0.015358572825789452, "rewards/rejected": -0.30286309123039246, "step": 994 }, { "epoch": 2.724161533196441, "grad_norm": 4.41898250579834, "learning_rate": 8.638356164383561e-07, "log_odds_chosen": 0.5628901720046997, "log_odds_ratio": -0.5556312799453735, "logits/chosen": 0.49345070123672485, "logits/rejected": 0.42863157391548157, "logps/chosen": -2.3781330585479736, "logps/rejected": -2.867119789123535, "loss": 0.9017, "nll_loss": 0.8461341857910156, "rewards/accuracies": 0.625, "rewards/chosen": -0.2378132939338684, "rewards/margins": 0.04889865964651108, "rewards/rejected": -0.2867119610309601, "step": 995 }, { "epoch": 2.726899383983573, "grad_norm": 3.4058613777160645, "learning_rate": 8.636986301369863e-07, "log_odds_chosen": 0.6819813847541809, "log_odds_ratio": -0.43057429790496826, "logits/chosen": 0.3610661029815674, "logits/rejected": 0.28793326020240784, "logps/chosen": -2.488356590270996, "logps/rejected": -3.1244921684265137, "loss": 1.0572, "nll_loss": 1.0141425132751465, "rewards/accuracies": 0.875, "rewards/chosen": -0.24883565306663513, "rewards/margins": 0.06361354887485504, "rewards/rejected": -0.31244921684265137, "step": 996 }, { "epoch": 2.729637234770705, "grad_norm": 3.8646647930145264, "learning_rate": 8.635616438356164e-07, "log_odds_chosen": -0.029070481657981873, "log_odds_ratio": -0.7531240582466125, "logits/chosen": 0.45290666818618774, "logits/rejected": 0.5692889094352722, "logps/chosen": -2.542440414428711, "logps/rejected": -2.497110605239868, "loss": 0.9785, "nll_loss": 0.9031814336776733, "rewards/accuracies": 0.75, "rewards/chosen": -0.25424402952194214, "rewards/margins": -0.0045329853892326355, "rewards/rejected": -0.2497110664844513, "step": 997 }, { "epoch": 2.732375085557837, "grad_norm": 4.845486640930176, "learning_rate": 8.634246575342465e-07, "log_odds_chosen": 0.41829919815063477, "log_odds_ratio": -0.6768577694892883, "logits/chosen": 0.5678642988204956, "logits/rejected": 0.6574139595031738, "logps/chosen": -2.9611918926239014, "logps/rejected": -3.3605589866638184, "loss": 0.9445, "nll_loss": 0.8767824769020081, "rewards/accuracies": 0.5, "rewards/chosen": -0.29611921310424805, "rewards/margins": 0.03993667662143707, "rewards/rejected": -0.3360559046268463, "step": 998 }, { "epoch": 2.735112936344969, "grad_norm": 3.8609189987182617, "learning_rate": 8.632876712328767e-07, "log_odds_chosen": 0.05445735901594162, "log_odds_ratio": -0.7447786927223206, "logits/chosen": 0.4362819194793701, "logits/rejected": 0.35346245765686035, "logps/chosen": -2.5430245399475098, "logps/rejected": -2.583261489868164, "loss": 1.1214, "nll_loss": 1.0468875169754028, "rewards/accuracies": 0.5, "rewards/chosen": -0.2543024718761444, "rewards/margins": 0.004023673012852669, "rewards/rejected": -0.25832614302635193, "step": 999 }, { "epoch": 2.737850787132101, "grad_norm": 3.3129212856292725, "learning_rate": 8.631506849315068e-07, "log_odds_chosen": 1.059866189956665, "log_odds_ratio": -0.3160780668258667, "logits/chosen": 0.5169958472251892, "logits/rejected": 0.5228908061981201, "logps/chosen": -2.3107314109802246, "logps/rejected": -3.2814290523529053, "loss": 1.0166, "nll_loss": 0.9850308895111084, "rewards/accuracies": 1.0, "rewards/chosen": -0.23107314109802246, "rewards/margins": 0.09706976264715195, "rewards/rejected": -0.3281428813934326, "step": 1000 }, { "epoch": 2.7405886379192332, "grad_norm": 3.918519973754883, "learning_rate": 8.63013698630137e-07, "log_odds_chosen": 0.9739512205123901, "log_odds_ratio": -0.3763517737388611, "logits/chosen": 0.4957456886768341, "logits/rejected": 0.5894877910614014, "logps/chosen": -2.6197562217712402, "logps/rejected": -3.546821117401123, "loss": 0.9469, "nll_loss": 0.9093111753463745, "rewards/accuracies": 0.875, "rewards/chosen": -0.26197561621665955, "rewards/margins": 0.09270650893449783, "rewards/rejected": -0.35468214750289917, "step": 1001 }, { "epoch": 2.7433264887063658, "grad_norm": 3.919041156768799, "learning_rate": 8.628767123287671e-07, "log_odds_chosen": 0.16011664271354675, "log_odds_ratio": -0.7283333539962769, "logits/chosen": 0.4511462152004242, "logits/rejected": 0.38744059205055237, "logps/chosen": -2.706813097000122, "logps/rejected": -2.8276619911193848, "loss": 1.2205, "nll_loss": 1.1476876735687256, "rewards/accuracies": 0.75, "rewards/chosen": -0.27068132162094116, "rewards/margins": 0.012084903195500374, "rewards/rejected": -0.2827662229537964, "step": 1002 }, { "epoch": 2.746064339493498, "grad_norm": 3.4328246116638184, "learning_rate": 8.627397260273972e-07, "log_odds_chosen": 1.1710790395736694, "log_odds_ratio": -0.34494543075561523, "logits/chosen": 0.5608347654342651, "logits/rejected": 0.5917471647262573, "logps/chosen": -2.630976915359497, "logps/rejected": -3.7501049041748047, "loss": 0.9403, "nll_loss": 0.9057616591453552, "rewards/accuracies": 0.875, "rewards/chosen": -0.26309770345687866, "rewards/margins": 0.1119127869606018, "rewards/rejected": -0.37501049041748047, "step": 1003 }, { "epoch": 2.74880219028063, "grad_norm": 4.020617961883545, "learning_rate": 8.626027397260274e-07, "log_odds_chosen": 0.7907995581626892, "log_odds_ratio": -0.48392462730407715, "logits/chosen": 0.6732481122016907, "logits/rejected": 0.6868646144866943, "logps/chosen": -2.3718149662017822, "logps/rejected": -3.0918326377868652, "loss": 0.9748, "nll_loss": 0.9264560341835022, "rewards/accuracies": 0.875, "rewards/chosen": -0.23718151450157166, "rewards/margins": 0.07200175523757935, "rewards/rejected": -0.309183269739151, "step": 1004 }, { "epoch": 2.751540041067762, "grad_norm": 3.7928109169006348, "learning_rate": 8.624657534246575e-07, "log_odds_chosen": 0.6083040237426758, "log_odds_ratio": -0.461262047290802, "logits/chosen": 0.5216106176376343, "logits/rejected": 0.42686179280281067, "logps/chosen": -1.7180390357971191, "logps/rejected": -2.250305652618408, "loss": 1.18, "nll_loss": 1.1339128017425537, "rewards/accuracies": 0.875, "rewards/chosen": -0.17180390655994415, "rewards/margins": 0.05322665348649025, "rewards/rejected": -0.2250305414199829, "step": 1005 }, { "epoch": 2.754277891854894, "grad_norm": 4.360437870025635, "learning_rate": 8.623287671232876e-07, "log_odds_chosen": -0.29095903038978577, "log_odds_ratio": -0.9734717011451721, "logits/chosen": 0.4374402165412903, "logits/rejected": 0.47280818223953247, "logps/chosen": -3.337092399597168, "logps/rejected": -3.021697998046875, "loss": 1.0553, "nll_loss": 0.9580014944076538, "rewards/accuracies": 0.5, "rewards/chosen": -0.3337092697620392, "rewards/margins": -0.03153946250677109, "rewards/rejected": -0.3021697998046875, "step": 1006 }, { "epoch": 2.757015742642026, "grad_norm": 4.5044403076171875, "learning_rate": 8.621917808219178e-07, "log_odds_chosen": 0.3329174518585205, "log_odds_ratio": -0.5943809747695923, "logits/chosen": 0.2719760835170746, "logits/rejected": 0.2145296186208725, "logps/chosen": -2.8221426010131836, "logps/rejected": -3.130143880844116, "loss": 1.1034, "nll_loss": 1.0439246892929077, "rewards/accuracies": 0.75, "rewards/chosen": -0.28221428394317627, "rewards/margins": 0.03080012835562229, "rewards/rejected": -0.3130143880844116, "step": 1007 }, { "epoch": 2.759753593429158, "grad_norm": 3.8111460208892822, "learning_rate": 8.620547945205479e-07, "log_odds_chosen": -0.2604409456253052, "log_odds_ratio": -0.9708919525146484, "logits/chosen": 0.442291796207428, "logits/rejected": 0.5490685701370239, "logps/chosen": -3.14495849609375, "logps/rejected": -2.9156343936920166, "loss": 1.0616, "nll_loss": 0.9644662141799927, "rewards/accuracies": 0.125, "rewards/chosen": -0.31449589133262634, "rewards/margins": -0.022932425141334534, "rewards/rejected": -0.2915634512901306, "step": 1008 }, { "epoch": 2.7624914442162902, "grad_norm": 3.9177262783050537, "learning_rate": 8.61917808219178e-07, "log_odds_chosen": 0.40263432264328003, "log_odds_ratio": -0.5775529146194458, "logits/chosen": 0.47943082451820374, "logits/rejected": 0.5055013298988342, "logps/chosen": -2.6692147254943848, "logps/rejected": -3.0374083518981934, "loss": 0.9828, "nll_loss": 0.9250034689903259, "rewards/accuracies": 0.875, "rewards/chosen": -0.2669214904308319, "rewards/margins": 0.036819376051425934, "rewards/rejected": -0.30374085903167725, "step": 1009 }, { "epoch": 2.7652292950034223, "grad_norm": 4.20900821685791, "learning_rate": 8.617808219178082e-07, "log_odds_chosen": 0.5989261269569397, "log_odds_ratio": -0.5256392955780029, "logits/chosen": 0.6020945310592651, "logits/rejected": 0.44763511419296265, "logps/chosen": -2.305462598800659, "logps/rejected": -2.8496952056884766, "loss": 1.1305, "nll_loss": 1.0779640674591064, "rewards/accuracies": 0.75, "rewards/chosen": -0.2305462658405304, "rewards/margins": 0.0544232502579689, "rewards/rejected": -0.2849695086479187, "step": 1010 }, { "epoch": 2.7679671457905544, "grad_norm": 3.9374101161956787, "learning_rate": 8.616438356164383e-07, "log_odds_chosen": -0.028553009033203125, "log_odds_ratio": -0.8040643930435181, "logits/chosen": 0.5682849884033203, "logits/rejected": 0.6321057677268982, "logps/chosen": -2.3068783283233643, "logps/rejected": -2.274231433868408, "loss": 1.0645, "nll_loss": 0.984123706817627, "rewards/accuracies": 0.625, "rewards/chosen": -0.23068784177303314, "rewards/margins": -0.003264687955379486, "rewards/rejected": -0.22742313146591187, "step": 1011 }, { "epoch": 2.7707049965776864, "grad_norm": 4.816159725189209, "learning_rate": 8.615068493150684e-07, "log_odds_chosen": 0.07830113172531128, "log_odds_ratio": -0.7807722687721252, "logits/chosen": 0.5387289524078369, "logits/rejected": 0.5055743455886841, "logps/chosen": -2.6073157787323, "logps/rejected": -2.6897521018981934, "loss": 1.0702, "nll_loss": 0.9921385049819946, "rewards/accuracies": 0.375, "rewards/chosen": -0.26073157787323, "rewards/margins": 0.008243624120950699, "rewards/rejected": -0.2689751982688904, "step": 1012 }, { "epoch": 2.7734428473648185, "grad_norm": 3.788273334503174, "learning_rate": 8.613698630136986e-07, "log_odds_chosen": 1.076197862625122, "log_odds_ratio": -0.33726638555526733, "logits/chosen": 0.397507905960083, "logits/rejected": 0.38299793004989624, "logps/chosen": -2.426114797592163, "logps/rejected": -3.389629364013672, "loss": 0.967, "nll_loss": 0.9332801103591919, "rewards/accuracies": 1.0, "rewards/chosen": -0.24261149764060974, "rewards/margins": 0.09635143727064133, "rewards/rejected": -0.3389629125595093, "step": 1013 }, { "epoch": 2.7761806981519506, "grad_norm": 3.4434831142425537, "learning_rate": 8.612328767123287e-07, "log_odds_chosen": -0.10252058506011963, "log_odds_ratio": -0.7801321744918823, "logits/chosen": 0.294537752866745, "logits/rejected": 0.2779568135738373, "logps/chosen": -2.4193692207336426, "logps/rejected": -2.319550037384033, "loss": 1.0666, "nll_loss": 0.9885531663894653, "rewards/accuracies": 0.5, "rewards/chosen": -0.24193692207336426, "rewards/margins": -0.009981920942664146, "rewards/rejected": -0.23195500671863556, "step": 1014 }, { "epoch": 2.7789185489390826, "grad_norm": 5.299360752105713, "learning_rate": 8.610958904109589e-07, "log_odds_chosen": 0.530623733997345, "log_odds_ratio": -0.5702595114707947, "logits/chosen": 0.5937362313270569, "logits/rejected": 0.6658370494842529, "logps/chosen": -2.9171462059020996, "logps/rejected": -3.401301383972168, "loss": 0.9269, "nll_loss": 0.86983323097229, "rewards/accuracies": 0.75, "rewards/chosen": -0.2917146384716034, "rewards/margins": 0.048415523022413254, "rewards/rejected": -0.34013018012046814, "step": 1015 }, { "epoch": 2.7816563997262147, "grad_norm": 5.757147789001465, "learning_rate": 8.60958904109589e-07, "log_odds_chosen": -0.004635661840438843, "log_odds_ratio": -0.8339478969573975, "logits/chosen": 0.5254438519477844, "logits/rejected": 0.5672013163566589, "logps/chosen": -3.1999855041503906, "logps/rejected": -3.189032554626465, "loss": 1.1505, "nll_loss": 1.0671366453170776, "rewards/accuracies": 0.625, "rewards/chosen": -0.3199986219406128, "rewards/margins": -0.0010953154414892197, "rewards/rejected": -0.31890326738357544, "step": 1016 }, { "epoch": 2.7843942505133468, "grad_norm": 4.916522026062012, "learning_rate": 8.608219178082191e-07, "log_odds_chosen": 0.6025272011756897, "log_odds_ratio": -0.4886900782585144, "logits/chosen": 0.5409669876098633, "logits/rejected": 0.5345628261566162, "logps/chosen": -2.713399887084961, "logps/rejected": -3.262211322784424, "loss": 1.0059, "nll_loss": 0.9569985270500183, "rewards/accuracies": 0.875, "rewards/chosen": -0.271340012550354, "rewards/margins": 0.054881125688552856, "rewards/rejected": -0.3262211084365845, "step": 1017 }, { "epoch": 2.7871321013004793, "grad_norm": 3.724862813949585, "learning_rate": 8.606849315068493e-07, "log_odds_chosen": 0.6305993795394897, "log_odds_ratio": -0.6126658916473389, "logits/chosen": 0.5297539234161377, "logits/rejected": 0.5722271203994751, "logps/chosen": -2.46732234954834, "logps/rejected": -2.9987053871154785, "loss": 1.0989, "nll_loss": 1.037605881690979, "rewards/accuracies": 0.875, "rewards/chosen": -0.24673223495483398, "rewards/margins": 0.05313831567764282, "rewards/rejected": -0.2998705506324768, "step": 1018 }, { "epoch": 2.7898699520876113, "grad_norm": 4.6636457443237305, "learning_rate": 8.605479452054794e-07, "log_odds_chosen": -0.05924539268016815, "log_odds_ratio": -0.8904281258583069, "logits/chosen": 0.4456421732902527, "logits/rejected": 0.4581106901168823, "logps/chosen": -3.078984260559082, "logps/rejected": -2.989079475402832, "loss": 1.0692, "nll_loss": 0.9801506996154785, "rewards/accuracies": 0.5, "rewards/chosen": -0.3078984022140503, "rewards/margins": -0.008990485221147537, "rewards/rejected": -0.29890793561935425, "step": 1019 }, { "epoch": 2.7926078028747434, "grad_norm": 3.7724509239196777, "learning_rate": 8.604109589041095e-07, "log_odds_chosen": 0.3617746829986572, "log_odds_ratio": -0.7191122174263, "logits/chosen": 0.4625523090362549, "logits/rejected": 0.40756118297576904, "logps/chosen": -2.7302615642547607, "logps/rejected": -3.0691094398498535, "loss": 1.18, "nll_loss": 1.1081008911132812, "rewards/accuracies": 0.5, "rewards/chosen": -0.27302616834640503, "rewards/margins": 0.03388476371765137, "rewards/rejected": -0.3069109320640564, "step": 1020 }, { "epoch": 2.7953456536618755, "grad_norm": 3.7425014972686768, "learning_rate": 8.602739726027397e-07, "log_odds_chosen": -0.2388334572315216, "log_odds_ratio": -0.9091558456420898, "logits/chosen": 0.4957258105278015, "logits/rejected": 0.44509822130203247, "logps/chosen": -2.5928080081939697, "logps/rejected": -2.3437910079956055, "loss": 1.1452, "nll_loss": 1.0542747974395752, "rewards/accuracies": 0.625, "rewards/chosen": -0.259280800819397, "rewards/margins": -0.024901695549488068, "rewards/rejected": -0.2343790978193283, "step": 1021 }, { "epoch": 2.7980835044490076, "grad_norm": 3.4136669635772705, "learning_rate": 8.601369863013698e-07, "log_odds_chosen": 0.7805733680725098, "log_odds_ratio": -0.5393334031105042, "logits/chosen": 0.4708919823169708, "logits/rejected": 0.31471750140190125, "logps/chosen": -1.5949876308441162, "logps/rejected": -2.300947904586792, "loss": 1.1615, "nll_loss": 1.1075873374938965, "rewards/accuracies": 0.875, "rewards/chosen": -0.15949876606464386, "rewards/margins": 0.07059602439403534, "rewards/rejected": -0.230094775557518, "step": 1022 }, { "epoch": 2.8008213552361396, "grad_norm": 3.9804646968841553, "learning_rate": 8.599999999999999e-07, "log_odds_chosen": 0.1585044413805008, "log_odds_ratio": -0.7572554349899292, "logits/chosen": 0.40039461851119995, "logits/rejected": 0.29960381984710693, "logps/chosen": -2.3753623962402344, "logps/rejected": -2.494499444961548, "loss": 1.2081, "nll_loss": 1.1323692798614502, "rewards/accuracies": 0.75, "rewards/chosen": -0.2375362664461136, "rewards/margins": 0.011913683265447617, "rewards/rejected": -0.2494499534368515, "step": 1023 }, { "epoch": 2.8035592060232717, "grad_norm": 3.671894073486328, "learning_rate": 8.598630136986301e-07, "log_odds_chosen": 0.19787541031837463, "log_odds_ratio": -0.6812405586242676, "logits/chosen": 0.44346362352371216, "logits/rejected": 0.36941856145858765, "logps/chosen": -2.3859822750091553, "logps/rejected": -2.541959047317505, "loss": 1.174, "nll_loss": 1.1058317422866821, "rewards/accuracies": 0.75, "rewards/chosen": -0.23859825730323792, "rewards/margins": 0.015597661957144737, "rewards/rejected": -0.254195898771286, "step": 1024 }, { "epoch": 2.8062970568104038, "grad_norm": 3.757769823074341, "learning_rate": 8.597260273972602e-07, "log_odds_chosen": 0.33457043766975403, "log_odds_ratio": -0.6064164638519287, "logits/chosen": 0.47508376836776733, "logits/rejected": 0.5187005996704102, "logps/chosen": -2.625645637512207, "logps/rejected": -2.941118001937866, "loss": 0.9686, "nll_loss": 0.9079211950302124, "rewards/accuracies": 0.75, "rewards/chosen": -0.2625645697116852, "rewards/margins": 0.03154722973704338, "rewards/rejected": -0.29411178827285767, "step": 1025 }, { "epoch": 2.809034907597536, "grad_norm": 4.965365886688232, "learning_rate": 8.595890410958903e-07, "log_odds_chosen": 0.8836232423782349, "log_odds_ratio": -0.5474430322647095, "logits/chosen": 0.4560718238353729, "logits/rejected": 0.4936066269874573, "logps/chosen": -2.7233448028564453, "logps/rejected": -3.5338897705078125, "loss": 1.0328, "nll_loss": 0.9781002998352051, "rewards/accuracies": 0.75, "rewards/chosen": -0.272334486246109, "rewards/margins": 0.08105449378490448, "rewards/rejected": -0.3533889949321747, "step": 1026 }, { "epoch": 2.811772758384668, "grad_norm": 3.819434881210327, "learning_rate": 8.594520547945205e-07, "log_odds_chosen": 1.3428502082824707, "log_odds_ratio": -0.362268328666687, "logits/chosen": 0.5243929624557495, "logits/rejected": 0.42175155878067017, "logps/chosen": -2.05509877204895, "logps/rejected": -3.289425849914551, "loss": 1.0465, "nll_loss": 1.010269284248352, "rewards/accuracies": 0.875, "rewards/chosen": -0.20550987124443054, "rewards/margins": 0.12343274056911469, "rewards/rejected": -0.32894259691238403, "step": 1027 }, { "epoch": 2.8145106091718004, "grad_norm": 3.4486026763916016, "learning_rate": 8.593150684931506e-07, "log_odds_chosen": 0.44065314531326294, "log_odds_ratio": -0.600486159324646, "logits/chosen": 0.610465943813324, "logits/rejected": 0.553566575050354, "logps/chosen": -2.3715906143188477, "logps/rejected": -2.7307796478271484, "loss": 1.1876, "nll_loss": 1.1275991201400757, "rewards/accuracies": 0.75, "rewards/chosen": -0.23715907335281372, "rewards/margins": 0.03591892868280411, "rewards/rejected": -0.27307796478271484, "step": 1028 }, { "epoch": 2.8172484599589325, "grad_norm": 4.045375823974609, "learning_rate": 8.591780821917808e-07, "log_odds_chosen": -0.12911216914653778, "log_odds_ratio": -0.8517482876777649, "logits/chosen": 0.5635278820991516, "logits/rejected": 0.6730515360832214, "logps/chosen": -3.4580962657928467, "logps/rejected": -3.335106372833252, "loss": 0.9575, "nll_loss": 0.8723008036613464, "rewards/accuracies": 0.5, "rewards/chosen": -0.3458096385002136, "rewards/margins": -0.012298988178372383, "rewards/rejected": -0.3335106670856476, "step": 1029 }, { "epoch": 2.8199863107460645, "grad_norm": 4.395636558532715, "learning_rate": 8.590410958904109e-07, "log_odds_chosen": -0.32013511657714844, "log_odds_ratio": -0.9644442200660706, "logits/chosen": 0.5589276552200317, "logits/rejected": 0.6241569519042969, "logps/chosen": -2.9325919151306152, "logps/rejected": -2.611981153488159, "loss": 1.0705, "nll_loss": 0.9740274548530579, "rewards/accuracies": 0.5, "rewards/chosen": -0.29325923323631287, "rewards/margins": -0.03206109255552292, "rewards/rejected": -0.26119813323020935, "step": 1030 }, { "epoch": 2.8227241615331966, "grad_norm": 3.27669620513916, "learning_rate": 8.58904109589041e-07, "log_odds_chosen": 1.3069908618927002, "log_odds_ratio": -0.3086714744567871, "logits/chosen": 0.5354055166244507, "logits/rejected": 0.5288839340209961, "logps/chosen": -2.359381675720215, "logps/rejected": -3.5466580390930176, "loss": 0.9252, "nll_loss": 0.8943270444869995, "rewards/accuracies": 1.0, "rewards/chosen": -0.235938161611557, "rewards/margins": 0.1187276616692543, "rewards/rejected": -0.3546658158302307, "step": 1031 }, { "epoch": 2.8254620123203287, "grad_norm": 3.6992204189300537, "learning_rate": 8.587671232876712e-07, "log_odds_chosen": 0.5649818181991577, "log_odds_ratio": -0.5002472996711731, "logits/chosen": 0.5451610088348389, "logits/rejected": 0.596004068851471, "logps/chosen": -2.8845577239990234, "logps/rejected": -3.40645694732666, "loss": 0.9489, "nll_loss": 0.8988779187202454, "rewards/accuracies": 0.875, "rewards/chosen": -0.2884557247161865, "rewards/margins": 0.05218993127346039, "rewards/rejected": -0.3406457006931305, "step": 1032 }, { "epoch": 2.8281998631074607, "grad_norm": 3.6559319496154785, "learning_rate": 8.586301369863013e-07, "log_odds_chosen": 0.5066375732421875, "log_odds_ratio": -0.5596879720687866, "logits/chosen": 0.5462527871131897, "logits/rejected": 0.6542672514915466, "logps/chosen": -2.786935806274414, "logps/rejected": -3.2534773349761963, "loss": 1.0023, "nll_loss": 0.9463064670562744, "rewards/accuracies": 0.75, "rewards/chosen": -0.27869361639022827, "rewards/margins": 0.046654146164655685, "rewards/rejected": -0.32534775137901306, "step": 1033 }, { "epoch": 2.830937713894593, "grad_norm": 3.421713352203369, "learning_rate": 8.584931506849314e-07, "log_odds_chosen": 0.15732762217521667, "log_odds_ratio": -0.7202560901641846, "logits/chosen": 0.4300024211406708, "logits/rejected": 0.3820986747741699, "logps/chosen": -2.1821093559265137, "logps/rejected": -2.317335367202759, "loss": 1.1468, "nll_loss": 1.0747689008712769, "rewards/accuracies": 0.625, "rewards/chosen": -0.21821093559265137, "rewards/margins": 0.013522610068321228, "rewards/rejected": -0.2317335456609726, "step": 1034 }, { "epoch": 2.833675564681725, "grad_norm": 4.6587371826171875, "learning_rate": 8.583561643835616e-07, "log_odds_chosen": 0.13994556665420532, "log_odds_ratio": -0.7596888542175293, "logits/chosen": 0.6130975484848022, "logits/rejected": 0.677849292755127, "logps/chosen": -3.4793927669525146, "logps/rejected": -3.602951765060425, "loss": 0.9374, "nll_loss": 0.8614628911018372, "rewards/accuracies": 0.5, "rewards/chosen": -0.34793925285339355, "rewards/margins": 0.012355921790003777, "rewards/rejected": -0.36029520630836487, "step": 1035 }, { "epoch": 2.836413415468857, "grad_norm": 4.063802242279053, "learning_rate": 8.582191780821918e-07, "log_odds_chosen": 1.2766255140304565, "log_odds_ratio": -0.44797325134277344, "logits/chosen": 0.5841920375823975, "logits/rejected": 0.5797046422958374, "logps/chosen": -3.2214293479919434, "logps/rejected": -4.464583873748779, "loss": 0.9437, "nll_loss": 0.8989427089691162, "rewards/accuracies": 0.75, "rewards/chosen": -0.32214295864105225, "rewards/margins": 0.12431541830301285, "rewards/rejected": -0.4464583694934845, "step": 1036 }, { "epoch": 2.839151266255989, "grad_norm": 4.244247913360596, "learning_rate": 8.580821917808218e-07, "log_odds_chosen": -0.4653699994087219, "log_odds_ratio": -1.1229873895645142, "logits/chosen": 0.4330012798309326, "logits/rejected": 0.49547189474105835, "logps/chosen": -3.1984243392944336, "logps/rejected": -2.747178554534912, "loss": 1.1992, "nll_loss": 1.0868735313415527, "rewards/accuracies": 0.5, "rewards/chosen": -0.3198423981666565, "rewards/margins": -0.04512454569339752, "rewards/rejected": -0.27471789717674255, "step": 1037 }, { "epoch": 2.841889117043121, "grad_norm": 3.9188249111175537, "learning_rate": 8.57945205479452e-07, "log_odds_chosen": 1.7052079439163208, "log_odds_ratio": -0.5385009050369263, "logits/chosen": 0.48477357625961304, "logits/rejected": 0.5372616052627563, "logps/chosen": -3.036135196685791, "logps/rejected": -4.682023048400879, "loss": 1.0668, "nll_loss": 1.0129119157791138, "rewards/accuracies": 0.75, "rewards/chosen": -0.303613543510437, "rewards/margins": 0.1645887941122055, "rewards/rejected": -0.4682023525238037, "step": 1038 }, { "epoch": 2.844626967830253, "grad_norm": 4.299984455108643, "learning_rate": 8.578082191780821e-07, "log_odds_chosen": 0.32549619674682617, "log_odds_ratio": -0.7844842672348022, "logits/chosen": 0.4117787778377533, "logits/rejected": 0.272124320268631, "logps/chosen": -2.3953452110290527, "logps/rejected": -2.7046141624450684, "loss": 1.1591, "nll_loss": 1.080655574798584, "rewards/accuracies": 0.5, "rewards/chosen": -0.23953452706336975, "rewards/margins": 0.030926909297704697, "rewards/rejected": -0.27046144008636475, "step": 1039 }, { "epoch": 2.847364818617385, "grad_norm": 4.051291465759277, "learning_rate": 8.576712328767122e-07, "log_odds_chosen": 0.05284050852060318, "log_odds_ratio": -0.7312915325164795, "logits/chosen": 0.47449231147766113, "logits/rejected": 0.585274875164032, "logps/chosen": -2.631303310394287, "logps/rejected": -2.6684231758117676, "loss": 1.0645, "nll_loss": 0.9913376569747925, "rewards/accuracies": 0.625, "rewards/chosen": -0.2631303668022156, "rewards/margins": 0.0037119612097740173, "rewards/rejected": -0.2668423056602478, "step": 1040 }, { "epoch": 2.8501026694045173, "grad_norm": 3.975262403488159, "learning_rate": 8.575342465753424e-07, "log_odds_chosen": 0.5442509055137634, "log_odds_ratio": -0.5159283876419067, "logits/chosen": 0.44343018531799316, "logits/rejected": 0.4158746302127838, "logps/chosen": -2.7672770023345947, "logps/rejected": -3.25533390045166, "loss": 1.0801, "nll_loss": 1.0285391807556152, "rewards/accuracies": 0.875, "rewards/chosen": -0.27672770619392395, "rewards/margins": 0.04880569875240326, "rewards/rejected": -0.3255334198474884, "step": 1041 }, { "epoch": 2.8528405201916494, "grad_norm": 3.6050286293029785, "learning_rate": 8.573972602739725e-07, "log_odds_chosen": 0.4291483163833618, "log_odds_ratio": -0.7388418316841125, "logits/chosen": 0.6847472190856934, "logits/rejected": 0.764018714427948, "logps/chosen": -3.138817548751831, "logps/rejected": -3.5710244178771973, "loss": 0.9799, "nll_loss": 0.9059978127479553, "rewards/accuracies": 0.375, "rewards/chosen": -0.3138817846775055, "rewards/margins": 0.04322066530585289, "rewards/rejected": -0.3571024537086487, "step": 1042 }, { "epoch": 2.8555783709787814, "grad_norm": 4.813912391662598, "learning_rate": 8.572602739726027e-07, "log_odds_chosen": -0.01780731976032257, "log_odds_ratio": -0.8546100854873657, "logits/chosen": 0.4757990837097168, "logits/rejected": 0.44789740443229675, "logps/chosen": -2.271620035171509, "logps/rejected": -2.224241256713867, "loss": 1.1633, "nll_loss": 1.0778462886810303, "rewards/accuracies": 0.75, "rewards/chosen": -0.22716201841831207, "rewards/margins": -0.004737894982099533, "rewards/rejected": -0.22242411971092224, "step": 1043 }, { "epoch": 2.8583162217659135, "grad_norm": 4.40690279006958, "learning_rate": 8.571232876712328e-07, "log_odds_chosen": -0.028547421097755432, "log_odds_ratio": -0.8207007646560669, "logits/chosen": 0.3819478750228882, "logits/rejected": 0.3900260031223297, "logps/chosen": -3.327298164367676, "logps/rejected": -3.286348342895508, "loss": 1.0833, "nll_loss": 1.0012385845184326, "rewards/accuracies": 0.25, "rewards/chosen": -0.3327298164367676, "rewards/margins": -0.004094959236681461, "rewards/rejected": -0.3286348581314087, "step": 1044 }, { "epoch": 2.861054072553046, "grad_norm": 3.3529868125915527, "learning_rate": 8.569863013698629e-07, "log_odds_chosen": 0.7860445976257324, "log_odds_ratio": -0.43990087509155273, "logits/chosen": 0.5054593086242676, "logits/rejected": 0.4898329973220825, "logps/chosen": -2.905916213989258, "logps/rejected": -3.636094570159912, "loss": 1.1414, "nll_loss": 1.0973680019378662, "rewards/accuracies": 0.75, "rewards/chosen": -0.29059162735939026, "rewards/margins": 0.07301781326532364, "rewards/rejected": -0.3636094331741333, "step": 1045 }, { "epoch": 2.863791923340178, "grad_norm": 3.93229603767395, "learning_rate": 8.568493150684932e-07, "log_odds_chosen": -0.4525734484195709, "log_odds_ratio": -0.9771037101745605, "logits/chosen": 0.3393443524837494, "logits/rejected": 0.3000895380973816, "logps/chosen": -2.7081856727600098, "logps/rejected": -2.293483257293701, "loss": 1.1951, "nll_loss": 1.0974195003509521, "rewards/accuracies": 0.125, "rewards/chosen": -0.2708185613155365, "rewards/margins": -0.0414702445268631, "rewards/rejected": -0.2293483316898346, "step": 1046 }, { "epoch": 2.86652977412731, "grad_norm": 4.806082248687744, "learning_rate": 8.567123287671233e-07, "log_odds_chosen": 0.17469242215156555, "log_odds_ratio": -0.7397005558013916, "logits/chosen": 0.3289763331413269, "logits/rejected": 0.3518941104412079, "logps/chosen": -2.8177034854888916, "logps/rejected": -2.954655647277832, "loss": 1.1033, "nll_loss": 1.0293265581130981, "rewards/accuracies": 0.375, "rewards/chosen": -0.28177034854888916, "rewards/margins": 0.013695216737687588, "rewards/rejected": -0.2954655587673187, "step": 1047 }, { "epoch": 2.869267624914442, "grad_norm": 4.6314802169799805, "learning_rate": 8.565753424657534e-07, "log_odds_chosen": 0.3975088894367218, "log_odds_ratio": -0.6760126352310181, "logits/chosen": 0.5317812561988831, "logits/rejected": 0.6135949492454529, "logps/chosen": -2.581355094909668, "logps/rejected": -2.930572509765625, "loss": 0.981, "nll_loss": 0.9133665561676025, "rewards/accuracies": 0.5, "rewards/chosen": -0.25813552737236023, "rewards/margins": 0.034921735525131226, "rewards/rejected": -0.29305726289749146, "step": 1048 }, { "epoch": 2.8720054757015743, "grad_norm": 4.389389991760254, "learning_rate": 8.564383561643836e-07, "log_odds_chosen": -0.07807792723178864, "log_odds_ratio": -0.8231141567230225, "logits/chosen": 0.5649828910827637, "logits/rejected": 0.5851969122886658, "logps/chosen": -2.9649386405944824, "logps/rejected": -2.859652519226074, "loss": 0.9851, "nll_loss": 0.90276700258255, "rewards/accuracies": 0.375, "rewards/chosen": -0.29649388790130615, "rewards/margins": -0.010528624057769775, "rewards/rejected": -0.2859652638435364, "step": 1049 }, { "epoch": 2.8747433264887063, "grad_norm": 5.509857177734375, "learning_rate": 8.563013698630138e-07, "log_odds_chosen": -0.37648892402648926, "log_odds_ratio": -1.0541104078292847, "logits/chosen": 0.5646281242370605, "logits/rejected": 0.642844021320343, "logps/chosen": -3.2172183990478516, "logps/rejected": -2.86961030960083, "loss": 1.1055, "nll_loss": 1.0000542402267456, "rewards/accuracies": 0.375, "rewards/chosen": -0.3217218518257141, "rewards/margins": -0.034760814160108566, "rewards/rejected": -0.28696101903915405, "step": 1050 }, { "epoch": 2.8774811772758384, "grad_norm": 3.653001546859741, "learning_rate": 8.561643835616438e-07, "log_odds_chosen": 0.5747493505477905, "log_odds_ratio": -0.5630573630332947, "logits/chosen": 0.397123783826828, "logits/rejected": 0.47480103373527527, "logps/chosen": -2.632547378540039, "logps/rejected": -3.1624670028686523, "loss": 1.0199, "nll_loss": 0.9635666608810425, "rewards/accuracies": 0.875, "rewards/chosen": -0.2632547616958618, "rewards/margins": 0.05299194157123566, "rewards/rejected": -0.3162466883659363, "step": 1051 }, { "epoch": 2.8802190280629705, "grad_norm": 3.361938714981079, "learning_rate": 8.56027397260274e-07, "log_odds_chosen": 0.4167420268058777, "log_odds_ratio": -0.6221295595169067, "logits/chosen": 0.4867136478424072, "logits/rejected": 0.4370116591453552, "logps/chosen": -2.2466235160827637, "logps/rejected": -2.647214412689209, "loss": 1.1555, "nll_loss": 1.0932985544204712, "rewards/accuracies": 0.5, "rewards/chosen": -0.22466233372688293, "rewards/margins": 0.04005908966064453, "rewards/rejected": -0.26472145318984985, "step": 1052 }, { "epoch": 2.8829568788501025, "grad_norm": 3.1126649379730225, "learning_rate": 8.558904109589041e-07, "log_odds_chosen": 0.4790472388267517, "log_odds_ratio": -0.5870336294174194, "logits/chosen": 0.6108031868934631, "logits/rejected": 0.591171145439148, "logps/chosen": -2.2497310638427734, "logps/rejected": -2.7207725048065186, "loss": 0.9635, "nll_loss": 0.9047872424125671, "rewards/accuracies": 0.5, "rewards/chosen": -0.22497311234474182, "rewards/margins": 0.0471041314303875, "rewards/rejected": -0.2720772325992584, "step": 1053 }, { "epoch": 2.8856947296372346, "grad_norm": 3.6686837673187256, "learning_rate": 8.557534246575342e-07, "log_odds_chosen": 0.35681578516960144, "log_odds_ratio": -0.5884965658187866, "logits/chosen": 0.3756280541419983, "logits/rejected": 0.31318387389183044, "logps/chosen": -2.635599136352539, "logps/rejected": -2.9688057899475098, "loss": 1.0942, "nll_loss": 1.0353070497512817, "rewards/accuracies": 0.875, "rewards/chosen": -0.26355990767478943, "rewards/margins": 0.03332067281007767, "rewards/rejected": -0.2968806028366089, "step": 1054 }, { "epoch": 2.888432580424367, "grad_norm": 3.295316696166992, "learning_rate": 8.556164383561644e-07, "log_odds_chosen": 0.5437831878662109, "log_odds_ratio": -0.5294989347457886, "logits/chosen": 0.4913109540939331, "logits/rejected": 0.3760920763015747, "logps/chosen": -2.1192498207092285, "logps/rejected": -2.624934196472168, "loss": 1.1342, "nll_loss": 1.081299901008606, "rewards/accuracies": 0.75, "rewards/chosen": -0.21192499995231628, "rewards/margins": 0.05056843161582947, "rewards/rejected": -0.26249343156814575, "step": 1055 }, { "epoch": 2.891170431211499, "grad_norm": 3.1751859188079834, "learning_rate": 8.554794520547945e-07, "log_odds_chosen": 0.603735625743866, "log_odds_ratio": -0.5458619594573975, "logits/chosen": 0.41608119010925293, "logits/rejected": 0.5124208331108093, "logps/chosen": -2.6569855213165283, "logps/rejected": -3.2434520721435547, "loss": 1.0587, "nll_loss": 1.0041074752807617, "rewards/accuracies": 0.75, "rewards/chosen": -0.26569855213165283, "rewards/margins": 0.05864666774868965, "rewards/rejected": -0.3243452310562134, "step": 1056 }, { "epoch": 2.8939082819986313, "grad_norm": 4.053317546844482, "learning_rate": 8.553424657534247e-07, "log_odds_chosen": 0.10722124576568604, "log_odds_ratio": -0.758916974067688, "logits/chosen": 0.40645918250083923, "logits/rejected": 0.37616896629333496, "logps/chosen": -2.98848295211792, "logps/rejected": -3.057655096054077, "loss": 1.037, "nll_loss": 0.961073100566864, "rewards/accuracies": 0.625, "rewards/chosen": -0.2988482713699341, "rewards/margins": 0.0069172196090221405, "rewards/rejected": -0.3057655096054077, "step": 1057 }, { "epoch": 2.8966461327857633, "grad_norm": 4.713573455810547, "learning_rate": 8.552054794520548e-07, "log_odds_chosen": 0.09975528717041016, "log_odds_ratio": -0.799121618270874, "logits/chosen": 0.48305684328079224, "logits/rejected": 0.653564453125, "logps/chosen": -2.880378484725952, "logps/rejected": -2.982727289199829, "loss": 0.9762, "nll_loss": 0.8962457180023193, "rewards/accuracies": 0.75, "rewards/chosen": -0.28803783655166626, "rewards/margins": 0.010234903544187546, "rewards/rejected": -0.2982727587223053, "step": 1058 }, { "epoch": 2.8993839835728954, "grad_norm": 3.080852746963501, "learning_rate": 8.550684931506849e-07, "log_odds_chosen": 0.8194645047187805, "log_odds_ratio": -0.4987361431121826, "logits/chosen": 0.3005235195159912, "logits/rejected": 0.2857028543949127, "logps/chosen": -2.028237819671631, "logps/rejected": -2.800900936126709, "loss": 1.1311, "nll_loss": 1.0811957120895386, "rewards/accuracies": 0.75, "rewards/chosen": -0.20282377302646637, "rewards/margins": 0.07726635038852692, "rewards/rejected": -0.2800901234149933, "step": 1059 }, { "epoch": 2.9021218343600275, "grad_norm": 6.273251056671143, "learning_rate": 8.549315068493151e-07, "log_odds_chosen": -0.6821486353874207, "log_odds_ratio": -1.1946194171905518, "logits/chosen": 0.47301626205444336, "logits/rejected": 0.45858511328697205, "logps/chosen": -3.101133346557617, "logps/rejected": -2.4573116302490234, "loss": 1.0921, "nll_loss": 0.9726449847221375, "rewards/accuracies": 0.25, "rewards/chosen": -0.3101133406162262, "rewards/margins": -0.06438218057155609, "rewards/rejected": -0.2457311749458313, "step": 1060 }, { "epoch": 2.9048596851471595, "grad_norm": 3.2670469284057617, "learning_rate": 8.547945205479452e-07, "log_odds_chosen": 0.7383815050125122, "log_odds_ratio": -0.42638176679611206, "logits/chosen": 0.30119559168815613, "logits/rejected": 0.3225591778755188, "logps/chosen": -2.3852384090423584, "logps/rejected": -3.052255392074585, "loss": 1.1282, "nll_loss": 1.0855928659439087, "rewards/accuracies": 0.875, "rewards/chosen": -0.23852385580539703, "rewards/margins": 0.06670171022415161, "rewards/rejected": -0.30522555112838745, "step": 1061 }, { "epoch": 2.9075975359342916, "grad_norm": 4.363396644592285, "learning_rate": 8.546575342465753e-07, "log_odds_chosen": -0.09334895014762878, "log_odds_ratio": -0.8527834415435791, "logits/chosen": 0.43440377712249756, "logits/rejected": 0.3801512122154236, "logps/chosen": -2.751237630844116, "logps/rejected": -2.653074026107788, "loss": 1.0993, "nll_loss": 1.0140196084976196, "rewards/accuracies": 0.5, "rewards/chosen": -0.2751237750053406, "rewards/margins": -0.009816372767090797, "rewards/rejected": -0.26530739665031433, "step": 1062 }, { "epoch": 2.9103353867214237, "grad_norm": 3.48946213722229, "learning_rate": 8.545205479452055e-07, "log_odds_chosen": 1.4752938747406006, "log_odds_ratio": -0.32994920015335083, "logits/chosen": 0.6497585773468018, "logits/rejected": 0.662778913974762, "logps/chosen": -1.9224541187286377, "logps/rejected": -3.268187999725342, "loss": 0.8873, "nll_loss": 0.8543117046356201, "rewards/accuracies": 1.0, "rewards/chosen": -0.19224542379379272, "rewards/margins": 0.13457337021827698, "rewards/rejected": -0.3268188238143921, "step": 1063 }, { "epoch": 2.9130732375085557, "grad_norm": 3.3010647296905518, "learning_rate": 8.543835616438357e-07, "log_odds_chosen": 0.4925878643989563, "log_odds_ratio": -0.5908398628234863, "logits/chosen": 0.5922267436981201, "logits/rejected": 0.5304136276245117, "logps/chosen": -2.0212838649749756, "logps/rejected": -2.4839694499969482, "loss": 1.0225, "nll_loss": 0.9634159803390503, "rewards/accuracies": 0.625, "rewards/chosen": -0.20212838053703308, "rewards/margins": 0.04626856744289398, "rewards/rejected": -0.24839694797992706, "step": 1064 }, { "epoch": 2.915811088295688, "grad_norm": 3.797384262084961, "learning_rate": 8.542465753424657e-07, "log_odds_chosen": 0.8026271462440491, "log_odds_ratio": -0.48846229910850525, "logits/chosen": 0.5084589719772339, "logits/rejected": 0.4302089810371399, "logps/chosen": -1.8551805019378662, "logps/rejected": -2.57186222076416, "loss": 1.1188, "nll_loss": 1.0699455738067627, "rewards/accuracies": 0.75, "rewards/chosen": -0.1855180561542511, "rewards/margins": 0.07166815549135208, "rewards/rejected": -0.2571862041950226, "step": 1065 }, { "epoch": 2.91854893908282, "grad_norm": 4.078408241271973, "learning_rate": 8.541095890410959e-07, "log_odds_chosen": 0.7064328193664551, "log_odds_ratio": -0.5448713302612305, "logits/chosen": 0.6802238821983337, "logits/rejected": 0.6278221607208252, "logps/chosen": -3.253472328186035, "logps/rejected": -3.930792808532715, "loss": 0.9504, "nll_loss": 0.8959028720855713, "rewards/accuracies": 0.75, "rewards/chosen": -0.32534724473953247, "rewards/margins": 0.06773205101490021, "rewards/rejected": -0.3930792808532715, "step": 1066 }, { "epoch": 2.921286789869952, "grad_norm": 4.384359359741211, "learning_rate": 8.53972602739726e-07, "log_odds_chosen": 0.30823251605033875, "log_odds_ratio": -0.8085429668426514, "logits/chosen": 0.6573145985603333, "logits/rejected": 0.5381350517272949, "logps/chosen": -2.7810075283050537, "logps/rejected": -3.07643723487854, "loss": 1.1251, "nll_loss": 1.044291615486145, "rewards/accuracies": 0.5, "rewards/chosen": -0.27810075879096985, "rewards/margins": 0.029542986303567886, "rewards/rejected": -0.30764374136924744, "step": 1067 }, { "epoch": 2.924024640657084, "grad_norm": 3.9421064853668213, "learning_rate": 8.538356164383561e-07, "log_odds_chosen": 0.4871816337108612, "log_odds_ratio": -0.5428178906440735, "logits/chosen": 0.37968385219573975, "logits/rejected": 0.4714009463787079, "logps/chosen": -2.5873911380767822, "logps/rejected": -3.022930145263672, "loss": 1.0438, "nll_loss": 0.9894930124282837, "rewards/accuracies": 0.75, "rewards/chosen": -0.2587391138076782, "rewards/margins": 0.04355388507246971, "rewards/rejected": -0.30229300260543823, "step": 1068 }, { "epoch": 2.926762491444216, "grad_norm": 4.993864059448242, "learning_rate": 8.536986301369863e-07, "log_odds_chosen": -0.2071785032749176, "log_odds_ratio": -0.8481540083885193, "logits/chosen": 0.43042418360710144, "logits/rejected": 0.40801486372947693, "logps/chosen": -2.6240291595458984, "logps/rejected": -2.428447723388672, "loss": 1.289, "nll_loss": 1.2041938304901123, "rewards/accuracies": 0.375, "rewards/chosen": -0.2624029219150543, "rewards/margins": -0.019558150321245193, "rewards/rejected": -0.24284477531909943, "step": 1069 }, { "epoch": 2.929500342231348, "grad_norm": 3.29895281791687, "learning_rate": 8.535616438356164e-07, "log_odds_chosen": 0.23840053379535675, "log_odds_ratio": -0.6393522024154663, "logits/chosen": 0.4322320222854614, "logits/rejected": 0.40155673027038574, "logps/chosen": -2.556253433227539, "logps/rejected": -2.7643656730651855, "loss": 1.0599, "nll_loss": 0.9959251880645752, "rewards/accuracies": 0.5, "rewards/chosen": -0.25562533736228943, "rewards/margins": 0.020811233669519424, "rewards/rejected": -0.27643656730651855, "step": 1070 }, { "epoch": 2.9322381930184807, "grad_norm": 3.7840707302093506, "learning_rate": 8.534246575342465e-07, "log_odds_chosen": 0.35269564390182495, "log_odds_ratio": -0.6308413743972778, "logits/chosen": 0.4135705828666687, "logits/rejected": 0.33618050813674927, "logps/chosen": -2.9897685050964355, "logps/rejected": -3.322016477584839, "loss": 1.1811, "nll_loss": 1.1179828643798828, "rewards/accuracies": 0.5, "rewards/chosen": -0.2989768385887146, "rewards/margins": 0.03322482109069824, "rewards/rejected": -0.33220165967941284, "step": 1071 }, { "epoch": 2.9349760438056127, "grad_norm": 4.0006561279296875, "learning_rate": 8.532876712328767e-07, "log_odds_chosen": 0.8018959164619446, "log_odds_ratio": -0.6808776259422302, "logits/chosen": 0.4688799977302551, "logits/rejected": 0.3034588694572449, "logps/chosen": -2.0894973278045654, "logps/rejected": -2.793344020843506, "loss": 1.1649, "nll_loss": 1.0967683792114258, "rewards/accuracies": 0.875, "rewards/chosen": -0.2089497447013855, "rewards/margins": 0.0703846663236618, "rewards/rejected": -0.2793343961238861, "step": 1072 }, { "epoch": 2.937713894592745, "grad_norm": 3.5168616771698, "learning_rate": 8.531506849315068e-07, "log_odds_chosen": 0.42379191517829895, "log_odds_ratio": -0.5324863791465759, "logits/chosen": 0.3916153013706207, "logits/rejected": 0.41514724493026733, "logps/chosen": -2.3661630153656006, "logps/rejected": -2.7538909912109375, "loss": 1.0396, "nll_loss": 0.9863921999931335, "rewards/accuracies": 0.625, "rewards/chosen": -0.23661629855632782, "rewards/margins": 0.038772813975811005, "rewards/rejected": -0.2753891050815582, "step": 1073 }, { "epoch": 2.940451745379877, "grad_norm": 5.223083019256592, "learning_rate": 8.53013698630137e-07, "log_odds_chosen": 0.9763792753219604, "log_odds_ratio": -0.41705623269081116, "logits/chosen": 0.5676807165145874, "logits/rejected": 0.6365630030632019, "logps/chosen": -2.1330697536468506, "logps/rejected": -3.0625898838043213, "loss": 0.9858, "nll_loss": 0.9441126585006714, "rewards/accuracies": 0.875, "rewards/chosen": -0.2133069634437561, "rewards/margins": 0.09295200556516647, "rewards/rejected": -0.3062589764595032, "step": 1074 }, { "epoch": 2.943189596167009, "grad_norm": 3.6809353828430176, "learning_rate": 8.528767123287671e-07, "log_odds_chosen": 0.36977502703666687, "log_odds_ratio": -0.7116491198539734, "logits/chosen": 0.5625091791152954, "logits/rejected": 0.4810377359390259, "logps/chosen": -2.4540293216705322, "logps/rejected": -2.7821807861328125, "loss": 1.178, "nll_loss": 1.1068673133850098, "rewards/accuracies": 0.5, "rewards/chosen": -0.24540293216705322, "rewards/margins": 0.03281515836715698, "rewards/rejected": -0.2782180905342102, "step": 1075 }, { "epoch": 2.945927446954141, "grad_norm": 3.8984744548797607, "learning_rate": 8.527397260273972e-07, "log_odds_chosen": -0.030005037784576416, "log_odds_ratio": -0.8449327945709229, "logits/chosen": 0.5378350019454956, "logits/rejected": 0.4865046739578247, "logps/chosen": -2.421260356903076, "logps/rejected": -2.428743362426758, "loss": 1.1221, "nll_loss": 1.037613034248352, "rewards/accuracies": 0.5, "rewards/chosen": -0.24212606251239777, "rewards/margins": 0.0007482767105102539, "rewards/rejected": -0.24287432432174683, "step": 1076 }, { "epoch": 2.948665297741273, "grad_norm": 2.977863073348999, "learning_rate": 8.526027397260274e-07, "log_odds_chosen": 0.8288807272911072, "log_odds_ratio": -0.4474060833454132, "logits/chosen": 0.3512522578239441, "logits/rejected": 0.21185189485549927, "logps/chosen": -2.415830612182617, "logps/rejected": -3.1725873947143555, "loss": 1.1696, "nll_loss": 1.1248904466629028, "rewards/accuracies": 0.75, "rewards/chosen": -0.24158304929733276, "rewards/margins": 0.07567566633224487, "rewards/rejected": -0.3172587454319, "step": 1077 }, { "epoch": 2.951403148528405, "grad_norm": 3.6360490322113037, "learning_rate": 8.524657534246575e-07, "log_odds_chosen": -0.04286421462893486, "log_odds_ratio": -0.7887413501739502, "logits/chosen": 0.5256032347679138, "logits/rejected": 0.5227595567703247, "logps/chosen": -2.4303886890411377, "logps/rejected": -2.3758721351623535, "loss": 1.0539, "nll_loss": 0.9750450849533081, "rewards/accuracies": 0.625, "rewards/chosen": -0.24303887784481049, "rewards/margins": -0.005451652221381664, "rewards/rejected": -0.23758721351623535, "step": 1078 }, { "epoch": 2.954140999315537, "grad_norm": 4.2144904136657715, "learning_rate": 8.523287671232876e-07, "log_odds_chosen": -0.4755293130874634, "log_odds_ratio": -1.2417197227478027, "logits/chosen": 0.6065382957458496, "logits/rejected": 0.6974039673805237, "logps/chosen": -3.3167948722839355, "logps/rejected": -2.8259730339050293, "loss": 1.0938, "nll_loss": 0.9696172475814819, "rewards/accuracies": 0.5, "rewards/chosen": -0.3316795229911804, "rewards/margins": -0.0490821897983551, "rewards/rejected": -0.28259730339050293, "step": 1079 }, { "epoch": 2.9568788501026693, "grad_norm": 4.4452104568481445, "learning_rate": 8.521917808219178e-07, "log_odds_chosen": 0.6456948518753052, "log_odds_ratio": -0.5530738830566406, "logits/chosen": 0.5226997137069702, "logits/rejected": 0.6442939043045044, "logps/chosen": -2.613295555114746, "logps/rejected": -3.2016611099243164, "loss": 0.9626, "nll_loss": 0.9073024392127991, "rewards/accuracies": 0.75, "rewards/chosen": -0.2613295614719391, "rewards/margins": 0.058836549520492554, "rewards/rejected": -0.32016611099243164, "step": 1080 }, { "epoch": 2.9596167008898018, "grad_norm": 3.197638750076294, "learning_rate": 8.52054794520548e-07, "log_odds_chosen": 0.9719197750091553, "log_odds_ratio": -0.44028130173683167, "logits/chosen": 0.4570087492465973, "logits/rejected": 0.4812011122703552, "logps/chosen": -2.8867075443267822, "logps/rejected": -3.8180899620056152, "loss": 1.0051, "nll_loss": 0.9610713720321655, "rewards/accuracies": 0.75, "rewards/chosen": -0.28867077827453613, "rewards/margins": 0.09313822537660599, "rewards/rejected": -0.3818089962005615, "step": 1081 }, { "epoch": 2.962354551676934, "grad_norm": 5.022212028503418, "learning_rate": 8.51917808219178e-07, "log_odds_chosen": 0.05481252074241638, "log_odds_ratio": -0.843172013759613, "logits/chosen": 0.5449603796005249, "logits/rejected": 0.6571956872940063, "logps/chosen": -3.156665325164795, "logps/rejected": -3.1873135566711426, "loss": 1.0138, "nll_loss": 0.9294751882553101, "rewards/accuracies": 0.625, "rewards/chosen": -0.3156665563583374, "rewards/margins": 0.0030647944658994675, "rewards/rejected": -0.3187313377857208, "step": 1082 }, { "epoch": 2.965092402464066, "grad_norm": 3.8022632598876953, "learning_rate": 8.517808219178082e-07, "log_odds_chosen": 0.15918958187103271, "log_odds_ratio": -0.6590489149093628, "logits/chosen": 0.4743727445602417, "logits/rejected": 0.5148096084594727, "logps/chosen": -2.9966273307800293, "logps/rejected": -3.1493301391601562, "loss": 0.9583, "nll_loss": 0.8923465013504028, "rewards/accuracies": 0.5, "rewards/chosen": -0.2996627390384674, "rewards/margins": 0.01527027040719986, "rewards/rejected": -0.31493300199508667, "step": 1083 }, { "epoch": 2.967830253251198, "grad_norm": 4.162055492401123, "learning_rate": 8.516438356164383e-07, "log_odds_chosen": 0.5865716934204102, "log_odds_ratio": -0.5133551359176636, "logits/chosen": 0.46484702825546265, "logits/rejected": 0.5441806316375732, "logps/chosen": -2.928227424621582, "logps/rejected": -3.4540927410125732, "loss": 1.0248, "nll_loss": 0.9734206199645996, "rewards/accuracies": 0.75, "rewards/chosen": -0.2928227484226227, "rewards/margins": 0.05258653312921524, "rewards/rejected": -0.3454092741012573, "step": 1084 }, { "epoch": 2.97056810403833, "grad_norm": 3.791292905807495, "learning_rate": 8.515068493150684e-07, "log_odds_chosen": 0.2955265939235687, "log_odds_ratio": -0.6474123597145081, "logits/chosen": 0.35366344451904297, "logits/rejected": 0.38920652866363525, "logps/chosen": -2.435291051864624, "logps/rejected": -2.708982467651367, "loss": 1.1478, "nll_loss": 1.0831047296524048, "rewards/accuracies": 0.5, "rewards/chosen": -0.24352911114692688, "rewards/margins": 0.027369149029254913, "rewards/rejected": -0.2708982825279236, "step": 1085 }, { "epoch": 2.973305954825462, "grad_norm": 4.0268120765686035, "learning_rate": 8.513698630136986e-07, "log_odds_chosen": 0.3077060282230377, "log_odds_ratio": -0.7114794254302979, "logits/chosen": 0.37503331899642944, "logits/rejected": 0.3613373637199402, "logps/chosen": -2.277714967727661, "logps/rejected": -2.5318849086761475, "loss": 1.1521, "nll_loss": 1.0809416770935059, "rewards/accuracies": 0.625, "rewards/chosen": -0.22777150571346283, "rewards/margins": 0.025416992604732513, "rewards/rejected": -0.25318849086761475, "step": 1086 }, { "epoch": 2.976043805612594, "grad_norm": 4.485495567321777, "learning_rate": 8.512328767123287e-07, "log_odds_chosen": 0.7268153429031372, "log_odds_ratio": -0.49667179584503174, "logits/chosen": 0.6288422346115112, "logits/rejected": 0.6796680688858032, "logps/chosen": -3.3050856590270996, "logps/rejected": -4.010418891906738, "loss": 1.0086, "nll_loss": 0.9589480757713318, "rewards/accuracies": 0.75, "rewards/chosen": -0.3305085599422455, "rewards/margins": 0.07053330540657043, "rewards/rejected": -0.4010418653488159, "step": 1087 }, { "epoch": 2.9787816563997263, "grad_norm": 4.365832328796387, "learning_rate": 8.510958904109589e-07, "log_odds_chosen": 0.38716235756874084, "log_odds_ratio": -0.7282390594482422, "logits/chosen": 0.6061268448829651, "logits/rejected": 0.6151992082595825, "logps/chosen": -2.8461713790893555, "logps/rejected": -3.1894261837005615, "loss": 1.0008, "nll_loss": 0.927998960018158, "rewards/accuracies": 0.75, "rewards/chosen": -0.2846171259880066, "rewards/margins": 0.03432551026344299, "rewards/rejected": -0.3189426362514496, "step": 1088 }, { "epoch": 2.9815195071868583, "grad_norm": 3.9274022579193115, "learning_rate": 8.50958904109589e-07, "log_odds_chosen": 0.5269902944564819, "log_odds_ratio": -0.5996541976928711, "logits/chosen": 0.4190492331981659, "logits/rejected": 0.3950091600418091, "logps/chosen": -2.8854880332946777, "logps/rejected": -3.401651382446289, "loss": 1.1698, "nll_loss": 1.1097946166992188, "rewards/accuracies": 0.5, "rewards/chosen": -0.2885488271713257, "rewards/margins": 0.05161634087562561, "rewards/rejected": -0.3401651382446289, "step": 1089 }, { "epoch": 2.9842573579739904, "grad_norm": 3.3061819076538086, "learning_rate": 8.508219178082191e-07, "log_odds_chosen": 0.07693883031606674, "log_odds_ratio": -0.6973108053207397, "logits/chosen": 0.5077610015869141, "logits/rejected": 0.4975038766860962, "logps/chosen": -2.298525094985962, "logps/rejected": -2.3417956829071045, "loss": 1.0483, "nll_loss": 0.9785549640655518, "rewards/accuracies": 0.625, "rewards/chosen": -0.22985251247882843, "rewards/margins": 0.0043270522728562355, "rewards/rejected": -0.23417958617210388, "step": 1090 }, { "epoch": 2.9869952087611225, "grad_norm": 3.1914360523223877, "learning_rate": 8.506849315068493e-07, "log_odds_chosen": 0.5533992648124695, "log_odds_ratio": -0.5011102557182312, "logits/chosen": 0.4544840157032013, "logits/rejected": 0.42294806241989136, "logps/chosen": -1.9828040599822998, "logps/rejected": -2.4640955924987793, "loss": 1.054, "nll_loss": 1.0038530826568604, "rewards/accuracies": 0.875, "rewards/chosen": -0.19828039407730103, "rewards/margins": 0.04812914878129959, "rewards/rejected": -0.2464095652103424, "step": 1091 }, { "epoch": 2.9897330595482545, "grad_norm": 3.9452853202819824, "learning_rate": 8.505479452054794e-07, "log_odds_chosen": -0.2647382616996765, "log_odds_ratio": -0.9143063426017761, "logits/chosen": 0.5235718488693237, "logits/rejected": 0.5267866253852844, "logps/chosen": -2.8351006507873535, "logps/rejected": -2.567516326904297, "loss": 1.1197, "nll_loss": 1.0282855033874512, "rewards/accuracies": 0.375, "rewards/chosen": -0.2835100591182709, "rewards/margins": -0.026758410036563873, "rewards/rejected": -0.2567516565322876, "step": 1092 }, { "epoch": 2.9924709103353866, "grad_norm": 5.39211368560791, "learning_rate": 8.504109589041095e-07, "log_odds_chosen": 0.7094399929046631, "log_odds_ratio": -0.6670178174972534, "logits/chosen": 0.64239102602005, "logits/rejected": 0.7971584796905518, "logps/chosen": -2.513106346130371, "logps/rejected": -3.140624761581421, "loss": 0.9754, "nll_loss": 0.9086589813232422, "rewards/accuracies": 0.75, "rewards/chosen": -0.25131064653396606, "rewards/margins": 0.06275186687707901, "rewards/rejected": -0.3140624761581421, "step": 1093 }, { "epoch": 2.9952087611225187, "grad_norm": 3.4524178504943848, "learning_rate": 8.502739726027397e-07, "log_odds_chosen": 0.5233734250068665, "log_odds_ratio": -0.537068784236908, "logits/chosen": 0.4421326518058777, "logits/rejected": 0.4454457461833954, "logps/chosen": -2.9293999671936035, "logps/rejected": -3.392819404602051, "loss": 1.04, "nll_loss": 0.9862737655639648, "rewards/accuracies": 0.75, "rewards/chosen": -0.2929399907588959, "rewards/margins": 0.0463419184088707, "rewards/rejected": -0.33928191661834717, "step": 1094 }, { "epoch": 2.9979466119096507, "grad_norm": 3.2548770904541016, "learning_rate": 8.501369863013699e-07, "log_odds_chosen": 0.34214478731155396, "log_odds_ratio": -0.7832180857658386, "logits/chosen": 0.4537121653556824, "logits/rejected": 0.3365020155906677, "logps/chosen": -3.0519113540649414, "logps/rejected": -3.3636727333068848, "loss": 1.1178, "nll_loss": 1.0394915342330933, "rewards/accuracies": 0.625, "rewards/chosen": -0.30519112944602966, "rewards/margins": 0.031176134943962097, "rewards/rejected": -0.33636724948883057, "step": 1095 }, { "epoch": 3.0006844626967832, "grad_norm": 6.0535054206848145, "learning_rate": 8.499999999999999e-07, "log_odds_chosen": -0.001589938998222351, "log_odds_ratio": -0.8077638745307922, "logits/chosen": 0.5640908479690552, "logits/rejected": 0.6124911308288574, "logps/chosen": -3.140289068222046, "logps/rejected": -3.1282594203948975, "loss": 1.0398, "nll_loss": 0.9590079188346863, "rewards/accuracies": 0.625, "rewards/chosen": -0.31402888894081116, "rewards/margins": -0.0012029726058244705, "rewards/rejected": -0.3128259479999542, "step": 1096 }, { "epoch": 3.0034223134839153, "grad_norm": 4.726376056671143, "learning_rate": 8.498630136986301e-07, "log_odds_chosen": 0.57806396484375, "log_odds_ratio": -0.7368850708007812, "logits/chosen": 0.6973724365234375, "logits/rejected": 0.793111264705658, "logps/chosen": -2.6684017181396484, "logps/rejected": -3.2288577556610107, "loss": 0.9891, "nll_loss": 0.9153734445571899, "rewards/accuracies": 0.5, "rewards/chosen": -0.2668401598930359, "rewards/margins": 0.05604559928178787, "rewards/rejected": -0.32288578152656555, "step": 1097 }, { "epoch": 3.0061601642710474, "grad_norm": 3.4259703159332275, "learning_rate": 8.497260273972602e-07, "log_odds_chosen": 0.6085277795791626, "log_odds_ratio": -0.6239432096481323, "logits/chosen": 0.4121125340461731, "logits/rejected": 0.3813566565513611, "logps/chosen": -2.445899486541748, "logps/rejected": -2.962026596069336, "loss": 0.966, "nll_loss": 0.9036445617675781, "rewards/accuracies": 0.625, "rewards/chosen": -0.24458998441696167, "rewards/margins": 0.05161269009113312, "rewards/rejected": -0.2962026596069336, "step": 1098 }, { "epoch": 3.0088980150581794, "grad_norm": 4.146224498748779, "learning_rate": 8.495890410958903e-07, "log_odds_chosen": 0.39737996459007263, "log_odds_ratio": -0.7897013425827026, "logits/chosen": 0.33591243624687195, "logits/rejected": 0.2399958074092865, "logps/chosen": -2.75510311126709, "logps/rejected": -3.1327121257781982, "loss": 1.0717, "nll_loss": 0.9927670955657959, "rewards/accuracies": 0.625, "rewards/chosen": -0.275510311126709, "rewards/margins": 0.037760913372039795, "rewards/rejected": -0.3132712244987488, "step": 1099 }, { "epoch": 3.0116358658453115, "grad_norm": 3.756699800491333, "learning_rate": 8.494520547945205e-07, "log_odds_chosen": 0.03354492038488388, "log_odds_ratio": -0.7056920528411865, "logits/chosen": 0.41779953241348267, "logits/rejected": 0.28730982542037964, "logps/chosen": -2.455413341522217, "logps/rejected": -2.4727373123168945, "loss": 1.155, "nll_loss": 1.0844541788101196, "rewards/accuracies": 0.375, "rewards/chosen": -0.24554131925106049, "rewards/margins": 0.0017324090003967285, "rewards/rejected": -0.2472737431526184, "step": 1100 }, { "epoch": 3.0143737166324436, "grad_norm": 3.426560163497925, "learning_rate": 8.493150684931506e-07, "log_odds_chosen": 1.14304518699646, "log_odds_ratio": -0.32867303490638733, "logits/chosen": 0.38789671659469604, "logits/rejected": 0.2928937077522278, "logps/chosen": -2.201029062271118, "logps/rejected": -3.2653026580810547, "loss": 1.063, "nll_loss": 1.030144214630127, "rewards/accuracies": 1.0, "rewards/chosen": -0.220102921128273, "rewards/margins": 0.10642734915018082, "rewards/rejected": -0.3265302777290344, "step": 1101 }, { "epoch": 3.0171115674195756, "grad_norm": 3.1700260639190674, "learning_rate": 8.491780821917808e-07, "log_odds_chosen": 0.22919118404388428, "log_odds_ratio": -0.6311253309249878, "logits/chosen": 0.4985957145690918, "logits/rejected": 0.4779033064842224, "logps/chosen": -2.0788381099700928, "logps/rejected": -2.280059814453125, "loss": 1.0277, "nll_loss": 0.9646130800247192, "rewards/accuracies": 0.625, "rewards/chosen": -0.2078838050365448, "rewards/margins": 0.020122166723012924, "rewards/rejected": -0.22800597548484802, "step": 1102 }, { "epoch": 3.0198494182067077, "grad_norm": 3.5106823444366455, "learning_rate": 8.490410958904109e-07, "log_odds_chosen": 0.5534350872039795, "log_odds_ratio": -0.49511316418647766, "logits/chosen": 0.5134620070457458, "logits/rejected": 0.4680269658565521, "logps/chosen": -2.132432460784912, "logps/rejected": -2.6191024780273438, "loss": 1.0029, "nll_loss": 0.953347384929657, "rewards/accuracies": 0.875, "rewards/chosen": -0.21324323117733002, "rewards/margins": 0.04866701364517212, "rewards/rejected": -0.26191025972366333, "step": 1103 }, { "epoch": 3.02258726899384, "grad_norm": 3.775783061981201, "learning_rate": 8.48904109589041e-07, "log_odds_chosen": 0.2498878836631775, "log_odds_ratio": -0.8223579525947571, "logits/chosen": 0.5680817365646362, "logits/rejected": 0.6398448944091797, "logps/chosen": -3.1269307136535645, "logps/rejected": -3.333174467086792, "loss": 1.1691, "nll_loss": 1.0868760347366333, "rewards/accuracies": 0.625, "rewards/chosen": -0.3126930892467499, "rewards/margins": 0.020624402910470963, "rewards/rejected": -0.33331745862960815, "step": 1104 }, { "epoch": 3.025325119780972, "grad_norm": 3.00822377204895, "learning_rate": 8.487671232876712e-07, "log_odds_chosen": 0.9969393014907837, "log_odds_ratio": -0.44675880670547485, "logits/chosen": 0.4104051887989044, "logits/rejected": 0.23276513814926147, "logps/chosen": -2.044386863708496, "logps/rejected": -2.9697442054748535, "loss": 1.1088, "nll_loss": 1.064155101776123, "rewards/accuracies": 0.875, "rewards/chosen": -0.204438716173172, "rewards/margins": 0.09253573417663574, "rewards/rejected": -0.29697442054748535, "step": 1105 }, { "epoch": 3.028062970568104, "grad_norm": 3.5713679790496826, "learning_rate": 8.486301369863013e-07, "log_odds_chosen": 0.3572862148284912, "log_odds_ratio": -0.6101360321044922, "logits/chosen": 0.664476215839386, "logits/rejected": 0.7052580118179321, "logps/chosen": -3.5207433700561523, "logps/rejected": -3.8541417121887207, "loss": 1.0183, "nll_loss": 0.9573332071304321, "rewards/accuracies": 0.625, "rewards/chosen": -0.3520743250846863, "rewards/margins": 0.03333984687924385, "rewards/rejected": -0.385414183139801, "step": 1106 }, { "epoch": 3.030800821355236, "grad_norm": 3.404196262359619, "learning_rate": 8.484931506849314e-07, "log_odds_chosen": 0.2209721803665161, "log_odds_ratio": -0.7348361015319824, "logits/chosen": 0.6376866698265076, "logits/rejected": 0.6323840618133545, "logps/chosen": -2.8681559562683105, "logps/rejected": -3.0585708618164062, "loss": 0.9883, "nll_loss": 0.914781391620636, "rewards/accuracies": 0.875, "rewards/chosen": -0.2868156135082245, "rewards/margins": 0.019041478633880615, "rewards/rejected": -0.3058570921421051, "step": 1107 }, { "epoch": 3.033538672142368, "grad_norm": 3.0443174839019775, "learning_rate": 8.483561643835616e-07, "log_odds_chosen": 0.51139897108078, "log_odds_ratio": -0.5390260219573975, "logits/chosen": 0.6192806363105774, "logits/rejected": 0.5346142053604126, "logps/chosen": -2.0043811798095703, "logps/rejected": -2.4665400981903076, "loss": 1.0737, "nll_loss": 1.0198242664337158, "rewards/accuracies": 0.625, "rewards/chosen": -0.20043812692165375, "rewards/margins": 0.04621588811278343, "rewards/rejected": -0.24665400385856628, "step": 1108 }, { "epoch": 3.0362765229295006, "grad_norm": 3.2648422718048096, "learning_rate": 8.482191780821918e-07, "log_odds_chosen": 0.4260786175727844, "log_odds_ratio": -0.5939290523529053, "logits/chosen": 0.6660647988319397, "logits/rejected": 0.7238752841949463, "logps/chosen": -2.532808780670166, "logps/rejected": -2.9400548934936523, "loss": 0.8541, "nll_loss": 0.794695258140564, "rewards/accuracies": 0.625, "rewards/chosen": -0.2532808780670166, "rewards/margins": 0.04072463512420654, "rewards/rejected": -0.29400551319122314, "step": 1109 }, { "epoch": 3.0390143737166326, "grad_norm": 4.295602321624756, "learning_rate": 8.480821917808218e-07, "log_odds_chosen": 0.5865780711174011, "log_odds_ratio": -0.5243850350379944, "logits/chosen": 0.5729355216026306, "logits/rejected": 0.5345748066902161, "logps/chosen": -2.8284425735473633, "logps/rejected": -3.385775327682495, "loss": 1.0938, "nll_loss": 1.0413932800292969, "rewards/accuracies": 0.75, "rewards/chosen": -0.28284427523612976, "rewards/margins": 0.055733270943164825, "rewards/rejected": -0.338577538728714, "step": 1110 }, { "epoch": 3.0417522245037647, "grad_norm": 5.051039695739746, "learning_rate": 8.47945205479452e-07, "log_odds_chosen": -0.23330746591091156, "log_odds_ratio": -0.874622642993927, "logits/chosen": 0.6712799072265625, "logits/rejected": 0.7373582720756531, "logps/chosen": -3.215064764022827, "logps/rejected": -2.978973865509033, "loss": 1.0782, "nll_loss": 0.9907592535018921, "rewards/accuracies": 0.375, "rewards/chosen": -0.32150647044181824, "rewards/margins": -0.02360909804701805, "rewards/rejected": -0.2978973686695099, "step": 1111 }, { "epoch": 3.0444900752908968, "grad_norm": 3.710304021835327, "learning_rate": 8.478082191780822e-07, "log_odds_chosen": 0.35424190759658813, "log_odds_ratio": -0.6321895718574524, "logits/chosen": 0.5914495587348938, "logits/rejected": 0.5660364627838135, "logps/chosen": -2.2433698177337646, "logps/rejected": -2.514340877532959, "loss": 1.0476, "nll_loss": 0.9843549728393555, "rewards/accuracies": 0.75, "rewards/chosen": -0.22433698177337646, "rewards/margins": 0.027097126469016075, "rewards/rejected": -0.2514341175556183, "step": 1112 }, { "epoch": 3.047227926078029, "grad_norm": 3.3077330589294434, "learning_rate": 8.476712328767122e-07, "log_odds_chosen": 0.6286577582359314, "log_odds_ratio": -0.4489762485027313, "logits/chosen": 0.5013689994812012, "logits/rejected": 0.42337220907211304, "logps/chosen": -1.8398702144622803, "logps/rejected": -2.3834872245788574, "loss": 1.0972, "nll_loss": 1.0522527694702148, "rewards/accuracies": 1.0, "rewards/chosen": -0.18398702144622803, "rewards/margins": 0.05436168611049652, "rewards/rejected": -0.23834872245788574, "step": 1113 }, { "epoch": 3.049965776865161, "grad_norm": 3.1743390560150146, "learning_rate": 8.475342465753424e-07, "log_odds_chosen": 0.6729664206504822, "log_odds_ratio": -0.4914724826812744, "logits/chosen": 0.4908197522163391, "logits/rejected": 0.48938310146331787, "logps/chosen": -2.3321943283081055, "logps/rejected": -2.9717583656311035, "loss": 1.0065, "nll_loss": 0.9573644995689392, "rewards/accuracies": 0.75, "rewards/chosen": -0.23321941494941711, "rewards/margins": 0.06395639479160309, "rewards/rejected": -0.2971758246421814, "step": 1114 }, { "epoch": 3.052703627652293, "grad_norm": 3.1655213832855225, "learning_rate": 8.473972602739725e-07, "log_odds_chosen": 0.9763436317443848, "log_odds_ratio": -0.49852198362350464, "logits/chosen": 0.4712010324001312, "logits/rejected": 0.4062073528766632, "logps/chosen": -2.3181614875793457, "logps/rejected": -3.2250783443450928, "loss": 1.0815, "nll_loss": 1.031599998474121, "rewards/accuracies": 0.75, "rewards/chosen": -0.2318161427974701, "rewards/margins": 0.0906916931271553, "rewards/rejected": -0.3225078582763672, "step": 1115 }, { "epoch": 3.055441478439425, "grad_norm": 3.763450860977173, "learning_rate": 8.472602739726027e-07, "log_odds_chosen": 0.3431289494037628, "log_odds_ratio": -0.7512652277946472, "logits/chosen": 0.4377306401729584, "logits/rejected": 0.4558102786540985, "logps/chosen": -2.517193555831909, "logps/rejected": -2.808777093887329, "loss": 1.0082, "nll_loss": 0.9330406188964844, "rewards/accuracies": 0.875, "rewards/chosen": -0.2517193853855133, "rewards/margins": 0.029158350080251694, "rewards/rejected": -0.2808777093887329, "step": 1116 }, { "epoch": 3.058179329226557, "grad_norm": 5.866756916046143, "learning_rate": 8.471232876712328e-07, "log_odds_chosen": 0.2879028916358948, "log_odds_ratio": -0.6467634439468384, "logits/chosen": 0.7811543941497803, "logits/rejected": 0.8341972231864929, "logps/chosen": -3.6663596630096436, "logps/rejected": -3.9478530883789062, "loss": 0.8937, "nll_loss": 0.829001247882843, "rewards/accuracies": 0.625, "rewards/chosen": -0.3666359484195709, "rewards/margins": 0.02814934402704239, "rewards/rejected": -0.3947852849960327, "step": 1117 }, { "epoch": 3.060917180013689, "grad_norm": 4.480005264282227, "learning_rate": 8.469863013698629e-07, "log_odds_chosen": 0.4946569502353668, "log_odds_ratio": -0.5579112768173218, "logits/chosen": 0.390430748462677, "logits/rejected": 0.2795793414115906, "logps/chosen": -2.0588581562042236, "logps/rejected": -2.482666015625, "loss": 1.0981, "nll_loss": 1.042302131652832, "rewards/accuracies": 0.625, "rewards/chosen": -0.20588581264019012, "rewards/margins": 0.04238080605864525, "rewards/rejected": -0.24826662242412567, "step": 1118 }, { "epoch": 3.0636550308008212, "grad_norm": 3.693934202194214, "learning_rate": 8.468493150684931e-07, "log_odds_chosen": 0.35298749804496765, "log_odds_ratio": -0.5898404121398926, "logits/chosen": 0.5834856033325195, "logits/rejected": 0.45177167654037476, "logps/chosen": -1.7600584030151367, "logps/rejected": -2.077310562133789, "loss": 1.166, "nll_loss": 1.1070064306259155, "rewards/accuracies": 0.75, "rewards/chosen": -0.17600582540035248, "rewards/margins": 0.03172522038221359, "rewards/rejected": -0.20773106813430786, "step": 1119 }, { "epoch": 3.0663928815879533, "grad_norm": 3.6788928508758545, "learning_rate": 8.467123287671232e-07, "log_odds_chosen": 0.3678836226463318, "log_odds_ratio": -0.6151273846626282, "logits/chosen": 0.6460170745849609, "logits/rejected": 0.7269938588142395, "logps/chosen": -3.118347406387329, "logps/rejected": -3.467020034790039, "loss": 0.9033, "nll_loss": 0.8417918682098389, "rewards/accuracies": 0.5, "rewards/chosen": -0.31183478236198425, "rewards/margins": 0.03486723452806473, "rewards/rejected": -0.3467020094394684, "step": 1120 }, { "epoch": 3.0691307323750854, "grad_norm": 4.4642181396484375, "learning_rate": 8.465753424657533e-07, "log_odds_chosen": -0.022346049547195435, "log_odds_ratio": -0.8297942280769348, "logits/chosen": 0.5223039984703064, "logits/rejected": 0.5400476455688477, "logps/chosen": -2.5448951721191406, "logps/rejected": -2.4994707107543945, "loss": 1.0241, "nll_loss": 0.9411049485206604, "rewards/accuracies": 0.5, "rewards/chosen": -0.2544895112514496, "rewards/margins": -0.004542442969977856, "rewards/rejected": -0.24994707107543945, "step": 1121 }, { "epoch": 3.0718685831622174, "grad_norm": 3.3612165451049805, "learning_rate": 8.464383561643835e-07, "log_odds_chosen": 0.3986866772174835, "log_odds_ratio": -0.7867669463157654, "logits/chosen": 0.5345878601074219, "logits/rejected": 0.6122986674308777, "logps/chosen": -2.626110553741455, "logps/rejected": -2.955906391143799, "loss": 1.0559, "nll_loss": 0.9771985411643982, "rewards/accuracies": 0.625, "rewards/chosen": -0.26261106133461, "rewards/margins": 0.032979585230350494, "rewards/rejected": -0.2955906391143799, "step": 1122 }, { "epoch": 3.07460643394935, "grad_norm": 3.1661601066589355, "learning_rate": 8.463013698630137e-07, "log_odds_chosen": 0.6782318353652954, "log_odds_ratio": -0.5477718710899353, "logits/chosen": 0.44308632612228394, "logits/rejected": 0.41611504554748535, "logps/chosen": -2.1601650714874268, "logps/rejected": -2.795252561569214, "loss": 1.0705, "nll_loss": 1.0156947374343872, "rewards/accuracies": 0.625, "rewards/chosen": -0.2160165160894394, "rewards/margins": 0.06350873410701752, "rewards/rejected": -0.2795252501964569, "step": 1123 }, { "epoch": 3.077344284736482, "grad_norm": 3.528416633605957, "learning_rate": 8.461643835616437e-07, "log_odds_chosen": 0.7622799873352051, "log_odds_ratio": -0.5611851811408997, "logits/chosen": 0.5269727110862732, "logits/rejected": 0.5690693259239197, "logps/chosen": -2.232332706451416, "logps/rejected": -2.9445760250091553, "loss": 0.9868, "nll_loss": 0.9306381940841675, "rewards/accuracies": 0.625, "rewards/chosen": -0.22323325276374817, "rewards/margins": 0.07122434675693512, "rewards/rejected": -0.2944576144218445, "step": 1124 }, { "epoch": 3.080082135523614, "grad_norm": 3.2414777278900146, "learning_rate": 8.46027397260274e-07, "log_odds_chosen": 0.7983092665672302, "log_odds_ratio": -0.446860671043396, "logits/chosen": 0.5438159704208374, "logits/rejected": 0.5540189743041992, "logps/chosen": -2.314030170440674, "logps/rejected": -3.083449363708496, "loss": 1.0373, "nll_loss": 0.9926042556762695, "rewards/accuracies": 0.875, "rewards/chosen": -0.23140299320220947, "rewards/margins": 0.07694193720817566, "rewards/rejected": -0.30834493041038513, "step": 1125 }, { "epoch": 3.082819986310746, "grad_norm": 3.269784450531006, "learning_rate": 8.458904109589042e-07, "log_odds_chosen": 0.19807294011116028, "log_odds_ratio": -0.6675459742546082, "logits/chosen": 0.42985406517982483, "logits/rejected": 0.5160555243492126, "logps/chosen": -2.157597064971924, "logps/rejected": -2.327061414718628, "loss": 0.981, "nll_loss": 0.914249062538147, "rewards/accuracies": 0.625, "rewards/chosen": -0.21575969457626343, "rewards/margins": 0.01694643869996071, "rewards/rejected": -0.23270614445209503, "step": 1126 }, { "epoch": 3.0855578370978782, "grad_norm": 4.395181655883789, "learning_rate": 8.457534246575341e-07, "log_odds_chosen": 0.7379732728004456, "log_odds_ratio": -0.48244068026542664, "logits/chosen": 0.5811586976051331, "logits/rejected": 0.6288551092147827, "logps/chosen": -2.519834518432617, "logps/rejected": -3.216531276702881, "loss": 0.9985, "nll_loss": 0.9502270221710205, "rewards/accuracies": 0.75, "rewards/chosen": -0.2519834637641907, "rewards/margins": 0.06966965645551682, "rewards/rejected": -0.3216531276702881, "step": 1127 }, { "epoch": 3.0882956878850103, "grad_norm": 3.562067985534668, "learning_rate": 8.456164383561644e-07, "log_odds_chosen": 0.5587092041969299, "log_odds_ratio": -0.6655362844467163, "logits/chosen": 0.6637605428695679, "logits/rejected": 0.64602130651474, "logps/chosen": -2.651646614074707, "logps/rejected": -3.159034252166748, "loss": 1.09, "nll_loss": 1.0234935283660889, "rewards/accuracies": 0.625, "rewards/chosen": -0.26516467332839966, "rewards/margins": 0.050738781690597534, "rewards/rejected": -0.3159034252166748, "step": 1128 }, { "epoch": 3.0910335386721424, "grad_norm": 6.33751106262207, "learning_rate": 8.454794520547945e-07, "log_odds_chosen": 0.23218083381652832, "log_odds_ratio": -0.7769579887390137, "logits/chosen": 0.5823476314544678, "logits/rejected": 0.5093380212783813, "logps/chosen": -2.7528269290924072, "logps/rejected": -2.9115371704101562, "loss": 1.1568, "nll_loss": 1.0790950059890747, "rewards/accuracies": 0.625, "rewards/chosen": -0.27528268098831177, "rewards/margins": 0.015871021896600723, "rewards/rejected": -0.2911537289619446, "step": 1129 }, { "epoch": 3.0937713894592744, "grad_norm": 4.609651565551758, "learning_rate": 8.453424657534247e-07, "log_odds_chosen": 0.6088266372680664, "log_odds_ratio": -0.5134369134902954, "logits/chosen": 0.5895996689796448, "logits/rejected": 0.6385110020637512, "logps/chosen": -2.710078239440918, "logps/rejected": -3.2685201168060303, "loss": 0.9875, "nll_loss": 0.9361519813537598, "rewards/accuracies": 0.75, "rewards/chosen": -0.27100783586502075, "rewards/margins": 0.05584417283535004, "rewards/rejected": -0.326852023601532, "step": 1130 }, { "epoch": 3.0965092402464065, "grad_norm": 3.5598251819610596, "learning_rate": 8.452054794520548e-07, "log_odds_chosen": 0.5815107226371765, "log_odds_ratio": -0.6030528545379639, "logits/chosen": 0.6752219200134277, "logits/rejected": 0.6585665941238403, "logps/chosen": -3.3054943084716797, "logps/rejected": -3.824535369873047, "loss": 1.0617, "nll_loss": 1.0013630390167236, "rewards/accuracies": 0.5, "rewards/chosen": -0.3305494487285614, "rewards/margins": 0.05190407484769821, "rewards/rejected": -0.3824535310268402, "step": 1131 }, { "epoch": 3.0992470910335386, "grad_norm": 5.901770114898682, "learning_rate": 8.450684931506849e-07, "log_odds_chosen": 0.5550639033317566, "log_odds_ratio": -0.6909765005111694, "logits/chosen": 0.6632296442985535, "logits/rejected": 0.6464582085609436, "logps/chosen": -2.3217082023620605, "logps/rejected": -2.803985357284546, "loss": 1.1023, "nll_loss": 1.0331640243530273, "rewards/accuracies": 0.875, "rewards/chosen": -0.23217082023620605, "rewards/margins": 0.048227738589048386, "rewards/rejected": -0.28039854764938354, "step": 1132 }, { "epoch": 3.1019849418206706, "grad_norm": 6.715277671813965, "learning_rate": 8.449315068493151e-07, "log_odds_chosen": -0.6650886535644531, "log_odds_ratio": -1.196366310119629, "logits/chosen": 0.3635711967945099, "logits/rejected": 0.3454825282096863, "logps/chosen": -3.0744447708129883, "logps/rejected": -2.493701457977295, "loss": 1.1631, "nll_loss": 1.0434269905090332, "rewards/accuracies": 0.375, "rewards/chosen": -0.3074444532394409, "rewards/margins": -0.05807431787252426, "rewards/rejected": -0.24937014281749725, "step": 1133 }, { "epoch": 3.1047227926078027, "grad_norm": 3.7002947330474854, "learning_rate": 8.447945205479452e-07, "log_odds_chosen": 0.2387392520904541, "log_odds_ratio": -0.6223721504211426, "logits/chosen": 0.578086793422699, "logits/rejected": 0.539973258972168, "logps/chosen": -2.4193763732910156, "logps/rejected": -2.5892348289489746, "loss": 0.9811, "nll_loss": 0.918875515460968, "rewards/accuracies": 0.75, "rewards/chosen": -0.24193763732910156, "rewards/margins": 0.01698584482073784, "rewards/rejected": -0.2589234709739685, "step": 1134 }, { "epoch": 3.1074606433949348, "grad_norm": 2.9790194034576416, "learning_rate": 8.446575342465753e-07, "log_odds_chosen": 1.2809771299362183, "log_odds_ratio": -0.4248483180999756, "logits/chosen": 0.4220963418483734, "logits/rejected": 0.34446704387664795, "logps/chosen": -2.431321382522583, "logps/rejected": -3.625465154647827, "loss": 0.9921, "nll_loss": 0.9496314525604248, "rewards/accuracies": 0.75, "rewards/chosen": -0.24313214421272278, "rewards/margins": 0.11941438913345337, "rewards/rejected": -0.36254656314849854, "step": 1135 }, { "epoch": 3.1101984941820673, "grad_norm": 5.817500591278076, "learning_rate": 8.445205479452055e-07, "log_odds_chosen": 0.1393355429172516, "log_odds_ratio": -0.7073988914489746, "logits/chosen": 0.5702903866767883, "logits/rejected": 0.555990993976593, "logps/chosen": -3.055426597595215, "logps/rejected": -3.1726605892181396, "loss": 1.012, "nll_loss": 0.9412121772766113, "rewards/accuracies": 0.625, "rewards/chosen": -0.30554264783859253, "rewards/margins": 0.01172342523932457, "rewards/rejected": -0.3172661066055298, "step": 1136 }, { "epoch": 3.1129363449691994, "grad_norm": 3.481398582458496, "learning_rate": 8.443835616438357e-07, "log_odds_chosen": 0.8278591632843018, "log_odds_ratio": -0.6001187562942505, "logits/chosen": 0.6233021020889282, "logits/rejected": 0.5454235076904297, "logps/chosen": -2.515840530395508, "logps/rejected": -3.278111457824707, "loss": 0.9936, "nll_loss": 0.9335380792617798, "rewards/accuracies": 0.75, "rewards/chosen": -0.25158408284187317, "rewards/margins": 0.07622706890106201, "rewards/rejected": -0.3278111517429352, "step": 1137 }, { "epoch": 3.1156741957563314, "grad_norm": 4.11680793762207, "learning_rate": 8.442465753424657e-07, "log_odds_chosen": 0.1717197448015213, "log_odds_ratio": -0.6779240369796753, "logits/chosen": 0.4759448766708374, "logits/rejected": 0.5004411339759827, "logps/chosen": -2.5086212158203125, "logps/rejected": -2.66562557220459, "loss": 1.1237, "nll_loss": 1.0559382438659668, "rewards/accuracies": 0.75, "rewards/chosen": -0.25086212158203125, "rewards/margins": 0.015700427815318108, "rewards/rejected": -0.2665625512599945, "step": 1138 }, { "epoch": 3.1184120465434635, "grad_norm": 2.9439806938171387, "learning_rate": 8.441095890410959e-07, "log_odds_chosen": 0.17637899518013, "log_odds_ratio": -0.6593902111053467, "logits/chosen": 0.5665013790130615, "logits/rejected": 0.5281385183334351, "logps/chosen": -2.556077003479004, "logps/rejected": -2.7096424102783203, "loss": 1.097, "nll_loss": 1.031071662902832, "rewards/accuracies": 0.375, "rewards/chosen": -0.2556077241897583, "rewards/margins": 0.015356512740254402, "rewards/rejected": -0.27096420526504517, "step": 1139 }, { "epoch": 3.1211498973305956, "grad_norm": 5.211888790130615, "learning_rate": 8.439726027397261e-07, "log_odds_chosen": 0.6001451015472412, "log_odds_ratio": -0.915149450302124, "logits/chosen": 0.47940701246261597, "logits/rejected": 0.4486429691314697, "logps/chosen": -3.603834629058838, "logps/rejected": -4.129693984985352, "loss": 1.0995, "nll_loss": 1.0079972743988037, "rewards/accuracies": 0.5, "rewards/chosen": -0.3603834807872772, "rewards/margins": 0.0525859072804451, "rewards/rejected": -0.4129694104194641, "step": 1140 }, { "epoch": 3.1238877481177276, "grad_norm": 3.878466844558716, "learning_rate": 8.438356164383561e-07, "log_odds_chosen": 0.6532665491104126, "log_odds_ratio": -0.5801600813865662, "logits/chosen": 0.5253893136978149, "logits/rejected": 0.40325963497161865, "logps/chosen": -2.5293631553649902, "logps/rejected": -3.126859188079834, "loss": 1.0262, "nll_loss": 0.9681591987609863, "rewards/accuracies": 0.625, "rewards/chosen": -0.25293630361557007, "rewards/margins": 0.05974959209561348, "rewards/rejected": -0.31268593668937683, "step": 1141 }, { "epoch": 3.1266255989048597, "grad_norm": 3.192523717880249, "learning_rate": 8.436986301369863e-07, "log_odds_chosen": 1.396941065788269, "log_odds_ratio": -0.5574900507926941, "logits/chosen": 0.5536915063858032, "logits/rejected": 0.6207373142242432, "logps/chosen": -3.3294413089752197, "logps/rejected": -4.689769268035889, "loss": 1.0492, "nll_loss": 0.9934303760528564, "rewards/accuracies": 0.75, "rewards/chosen": -0.3329441547393799, "rewards/margins": 0.1360328197479248, "rewards/rejected": -0.4689769744873047, "step": 1142 }, { "epoch": 3.1293634496919918, "grad_norm": 3.2050745487213135, "learning_rate": 8.435616438356165e-07, "log_odds_chosen": -0.2810118794441223, "log_odds_ratio": -0.941288948059082, "logits/chosen": 0.5339374542236328, "logits/rejected": 0.5576730966567993, "logps/chosen": -2.588663101196289, "logps/rejected": -2.2870421409606934, "loss": 1.0738, "nll_loss": 0.9796304106712341, "rewards/accuracies": 0.25, "rewards/chosen": -0.2588663101196289, "rewards/margins": -0.03016209602355957, "rewards/rejected": -0.22870421409606934, "step": 1143 }, { "epoch": 3.132101300479124, "grad_norm": 3.1469807624816895, "learning_rate": 8.434246575342465e-07, "log_odds_chosen": 1.250098705291748, "log_odds_ratio": -0.38913577795028687, "logits/chosen": 0.49997878074645996, "logits/rejected": 0.45360881090164185, "logps/chosen": -2.3974101543426514, "logps/rejected": -3.558445930480957, "loss": 0.9745, "nll_loss": 0.9355682730674744, "rewards/accuracies": 0.875, "rewards/chosen": -0.2397410124540329, "rewards/margins": 0.11610355973243713, "rewards/rejected": -0.35584455728530884, "step": 1144 }, { "epoch": 3.134839151266256, "grad_norm": 4.273054599761963, "learning_rate": 8.432876712328767e-07, "log_odds_chosen": 0.3928726613521576, "log_odds_ratio": -0.6186171770095825, "logits/chosen": 0.5191553831100464, "logits/rejected": 0.5243052840232849, "logps/chosen": -2.4021530151367188, "logps/rejected": -2.771925449371338, "loss": 1.0522, "nll_loss": 0.9903357028961182, "rewards/accuracies": 0.625, "rewards/chosen": -0.24021530151367188, "rewards/margins": 0.03697725385427475, "rewards/rejected": -0.2771925628185272, "step": 1145 }, { "epoch": 3.137577002053388, "grad_norm": 2.9782297611236572, "learning_rate": 8.431506849315068e-07, "log_odds_chosen": 0.9767871499061584, "log_odds_ratio": -0.4062323570251465, "logits/chosen": 0.6367130279541016, "logits/rejected": 0.6128479242324829, "logps/chosen": -2.35170841217041, "logps/rejected": -3.2705414295196533, "loss": 0.9721, "nll_loss": 0.9314630031585693, "rewards/accuracies": 0.875, "rewards/chosen": -0.23517084121704102, "rewards/margins": 0.09188328683376312, "rewards/rejected": -0.32705414295196533, "step": 1146 }, { "epoch": 3.14031485284052, "grad_norm": 4.022621154785156, "learning_rate": 8.43013698630137e-07, "log_odds_chosen": 0.28423988819122314, "log_odds_ratio": -0.694941520690918, "logits/chosen": 0.508102536201477, "logits/rejected": 0.33900192379951477, "logps/chosen": -2.721961259841919, "logps/rejected": -2.9682459831237793, "loss": 1.094, "nll_loss": 1.024521827697754, "rewards/accuracies": 0.625, "rewards/chosen": -0.27219611406326294, "rewards/margins": 0.02462848648428917, "rewards/rejected": -0.2968246340751648, "step": 1147 }, { "epoch": 3.143052703627652, "grad_norm": 3.603731155395508, "learning_rate": 8.428767123287671e-07, "log_odds_chosen": 0.8020815849304199, "log_odds_ratio": -0.5610989332199097, "logits/chosen": 0.645967960357666, "logits/rejected": 0.6200474500656128, "logps/chosen": -2.8159773349761963, "logps/rejected": -3.613504648208618, "loss": 0.9994, "nll_loss": 0.9432761669158936, "rewards/accuracies": 0.625, "rewards/chosen": -0.28159773349761963, "rewards/margins": 0.07975274324417114, "rewards/rejected": -0.36135047674179077, "step": 1148 }, { "epoch": 3.145790554414784, "grad_norm": 4.980031967163086, "learning_rate": 8.427397260273972e-07, "log_odds_chosen": 0.37841421365737915, "log_odds_ratio": -0.6799055337905884, "logits/chosen": 0.6616822481155396, "logits/rejected": 0.6285028457641602, "logps/chosen": -2.267583131790161, "logps/rejected": -2.625894069671631, "loss": 1.0744, "nll_loss": 1.0064494609832764, "rewards/accuracies": 0.75, "rewards/chosen": -0.22675831615924835, "rewards/margins": 0.03583107888698578, "rewards/rejected": -0.26258939504623413, "step": 1149 }, { "epoch": 3.1485284052019167, "grad_norm": 3.5605998039245605, "learning_rate": 8.426027397260274e-07, "log_odds_chosen": -0.03489533066749573, "log_odds_ratio": -0.9553460478782654, "logits/chosen": 0.5045968294143677, "logits/rejected": 0.5775602459907532, "logps/chosen": -2.3208749294281006, "logps/rejected": -2.2452940940856934, "loss": 1.0145, "nll_loss": 0.9189352989196777, "rewards/accuracies": 0.75, "rewards/chosen": -0.23208747804164886, "rewards/margins": -0.007558068260550499, "rewards/rejected": -0.2245294153690338, "step": 1150 }, { "epoch": 3.1512662559890487, "grad_norm": 4.309014797210693, "learning_rate": 8.424657534246576e-07, "log_odds_chosen": 1.1080362796783447, "log_odds_ratio": -0.4289659857749939, "logits/chosen": 0.5283740162849426, "logits/rejected": 0.539058268070221, "logps/chosen": -2.2350423336029053, "logps/rejected": -3.2413430213928223, "loss": 0.8942, "nll_loss": 0.8512918949127197, "rewards/accuracies": 0.75, "rewards/chosen": -0.2235042303800583, "rewards/margins": 0.10063006728887558, "rewards/rejected": -0.32413429021835327, "step": 1151 }, { "epoch": 3.154004106776181, "grad_norm": 3.4010515213012695, "learning_rate": 8.423287671232876e-07, "log_odds_chosen": 1.0920284986495972, "log_odds_ratio": -0.5678749084472656, "logits/chosen": 0.5657914876937866, "logits/rejected": 0.5422050356864929, "logps/chosen": -2.4861741065979004, "logps/rejected": -3.47581148147583, "loss": 1.0619, "nll_loss": 1.0050981044769287, "rewards/accuracies": 0.75, "rewards/chosen": -0.24861741065979004, "rewards/margins": 0.09896372258663177, "rewards/rejected": -0.3475811183452606, "step": 1152 }, { "epoch": 3.156741957563313, "grad_norm": 3.230746030807495, "learning_rate": 8.421917808219178e-07, "log_odds_chosen": 1.4109044075012207, "log_odds_ratio": -0.3066349923610687, "logits/chosen": 0.450685977935791, "logits/rejected": 0.32157081365585327, "logps/chosen": -2.3547399044036865, "logps/rejected": -3.6835153102874756, "loss": 1.0578, "nll_loss": 1.0271530151367188, "rewards/accuracies": 0.875, "rewards/chosen": -0.23547399044036865, "rewards/margins": 0.13287752866744995, "rewards/rejected": -0.3683515191078186, "step": 1153 }, { "epoch": 3.159479808350445, "grad_norm": 4.370506763458252, "learning_rate": 8.42054794520548e-07, "log_odds_chosen": 1.0438683032989502, "log_odds_ratio": -0.6117171049118042, "logits/chosen": 0.44422808289527893, "logits/rejected": 0.3453867435455322, "logps/chosen": -2.5605854988098145, "logps/rejected": -3.532125473022461, "loss": 1.0805, "nll_loss": 1.0193206071853638, "rewards/accuracies": 0.625, "rewards/chosen": -0.25605854392051697, "rewards/margins": 0.09715400636196136, "rewards/rejected": -0.3532125651836395, "step": 1154 }, { "epoch": 3.162217659137577, "grad_norm": 3.4181530475616455, "learning_rate": 8.41917808219178e-07, "log_odds_chosen": 0.5608840584754944, "log_odds_ratio": -0.5547353625297546, "logits/chosen": 0.6199135184288025, "logits/rejected": 0.6320797204971313, "logps/chosen": -2.741856575012207, "logps/rejected": -3.272139549255371, "loss": 0.9894, "nll_loss": 0.9339380860328674, "rewards/accuracies": 0.625, "rewards/chosen": -0.2741856873035431, "rewards/margins": 0.05302827060222626, "rewards/rejected": -0.32721394300460815, "step": 1155 }, { "epoch": 3.164955509924709, "grad_norm": 3.2737576961517334, "learning_rate": 8.417808219178082e-07, "log_odds_chosen": 1.1752904653549194, "log_odds_ratio": -0.37423092126846313, "logits/chosen": 0.6291098594665527, "logits/rejected": 0.5704956650733948, "logps/chosen": -2.248500108718872, "logps/rejected": -3.374368667602539, "loss": 1.0635, "nll_loss": 1.0260854959487915, "rewards/accuracies": 0.875, "rewards/chosen": -0.22485002875328064, "rewards/margins": 0.1125868409872055, "rewards/rejected": -0.33743688464164734, "step": 1156 }, { "epoch": 3.167693360711841, "grad_norm": 3.163365602493286, "learning_rate": 8.416438356164384e-07, "log_odds_chosen": 0.3205642104148865, "log_odds_ratio": -0.6869747638702393, "logits/chosen": 0.3805606961250305, "logits/rejected": 0.401184618473053, "logps/chosen": -2.417410373687744, "logps/rejected": -2.684072494506836, "loss": 1.0198, "nll_loss": 0.9511077404022217, "rewards/accuracies": 0.75, "rewards/chosen": -0.24174104630947113, "rewards/margins": 0.026666205376386642, "rewards/rejected": -0.26840725541114807, "step": 1157 }, { "epoch": 3.1704312114989732, "grad_norm": 3.7631218433380127, "learning_rate": 8.415068493150684e-07, "log_odds_chosen": 0.5418609976768494, "log_odds_ratio": -0.5444076657295227, "logits/chosen": 0.45212724804878235, "logits/rejected": 0.40457233786582947, "logps/chosen": -2.4395477771759033, "logps/rejected": -2.9478139877319336, "loss": 1.0048, "nll_loss": 0.9503476023674011, "rewards/accuracies": 0.75, "rewards/chosen": -0.24395477771759033, "rewards/margins": 0.05082662031054497, "rewards/rejected": -0.2947813868522644, "step": 1158 }, { "epoch": 3.1731690622861053, "grad_norm": 2.958590269088745, "learning_rate": 8.413698630136986e-07, "log_odds_chosen": 1.152883529663086, "log_odds_ratio": -0.48166191577911377, "logits/chosen": 0.5098235607147217, "logits/rejected": 0.487266480922699, "logps/chosen": -2.223052501678467, "logps/rejected": -3.273777723312378, "loss": 0.9624, "nll_loss": 0.9142172336578369, "rewards/accuracies": 0.875, "rewards/chosen": -0.22230523824691772, "rewards/margins": 0.10507254302501678, "rewards/rejected": -0.3273777961730957, "step": 1159 }, { "epoch": 3.1759069130732374, "grad_norm": 3.895568370819092, "learning_rate": 8.412328767123287e-07, "log_odds_chosen": 0.3264722228050232, "log_odds_ratio": -0.6254526376724243, "logits/chosen": 0.6753031611442566, "logits/rejected": 0.7640225291252136, "logps/chosen": -2.9319820404052734, "logps/rejected": -3.25061297416687, "loss": 0.9398, "nll_loss": 0.8772203326225281, "rewards/accuracies": 0.5, "rewards/chosen": -0.29319822788238525, "rewards/margins": 0.031863078474998474, "rewards/rejected": -0.32506126165390015, "step": 1160 }, { "epoch": 3.1786447638603694, "grad_norm": 3.8556079864501953, "learning_rate": 8.410958904109589e-07, "log_odds_chosen": 1.091657042503357, "log_odds_ratio": -0.4151701331138611, "logits/chosen": 0.37273159623146057, "logits/rejected": 0.39676961302757263, "logps/chosen": -2.2575221061706543, "logps/rejected": -3.2949328422546387, "loss": 1.0148, "nll_loss": 0.973235547542572, "rewards/accuracies": 0.875, "rewards/chosen": -0.22575220465660095, "rewards/margins": 0.10374107956886292, "rewards/rejected": -0.32949331402778625, "step": 1161 }, { "epoch": 3.181382614647502, "grad_norm": 3.652646064758301, "learning_rate": 8.40958904109589e-07, "log_odds_chosen": 0.3381471633911133, "log_odds_ratio": -0.7353479862213135, "logits/chosen": 0.7190311551094055, "logits/rejected": 0.7184081077575684, "logps/chosen": -2.279715061187744, "logps/rejected": -2.5308351516723633, "loss": 1.1057, "nll_loss": 1.0321637392044067, "rewards/accuracies": 0.875, "rewards/chosen": -0.22797150909900665, "rewards/margins": 0.02511202171444893, "rewards/rejected": -0.2530835270881653, "step": 1162 }, { "epoch": 3.184120465434634, "grad_norm": 3.3405230045318604, "learning_rate": 8.408219178082191e-07, "log_odds_chosen": 0.49616196751594543, "log_odds_ratio": -0.5288609266281128, "logits/chosen": 0.5493028163909912, "logits/rejected": 0.4023642838001251, "logps/chosen": -2.263735771179199, "logps/rejected": -2.6895461082458496, "loss": 1.1171, "nll_loss": 1.0642112493515015, "rewards/accuracies": 0.75, "rewards/chosen": -0.2263736128807068, "rewards/margins": 0.04258102551102638, "rewards/rejected": -0.26895463466644287, "step": 1163 }, { "epoch": 3.186858316221766, "grad_norm": 3.7952311038970947, "learning_rate": 8.406849315068493e-07, "log_odds_chosen": 0.757154107093811, "log_odds_ratio": -0.4285428524017334, "logits/chosen": 0.4008275270462036, "logits/rejected": 0.36049965023994446, "logps/chosen": -2.1583595275878906, "logps/rejected": -2.839430809020996, "loss": 1.0155, "nll_loss": 0.9726892113685608, "rewards/accuracies": 0.875, "rewards/chosen": -0.21583594381809235, "rewards/margins": 0.06810714304447174, "rewards/rejected": -0.2839430868625641, "step": 1164 }, { "epoch": 3.189596167008898, "grad_norm": 3.7794578075408936, "learning_rate": 8.405479452054794e-07, "log_odds_chosen": 0.5582541823387146, "log_odds_ratio": -0.5277585983276367, "logits/chosen": 0.6968433856964111, "logits/rejected": 0.6340785622596741, "logps/chosen": -1.7372485399246216, "logps/rejected": -2.207808494567871, "loss": 1.1332, "nll_loss": 1.080374836921692, "rewards/accuracies": 0.625, "rewards/chosen": -0.17372484505176544, "rewards/margins": 0.04705601930618286, "rewards/rejected": -0.2207808792591095, "step": 1165 }, { "epoch": 3.19233401779603, "grad_norm": 4.287808418273926, "learning_rate": 8.404109589041095e-07, "log_odds_chosen": 0.3539426326751709, "log_odds_ratio": -0.708553671836853, "logits/chosen": 0.5771481394767761, "logits/rejected": 0.6876918077468872, "logps/chosen": -2.8822736740112305, "logps/rejected": -3.186002731323242, "loss": 0.9137, "nll_loss": 0.8428400158882141, "rewards/accuracies": 0.5, "rewards/chosen": -0.2882273495197296, "rewards/margins": 0.030372926965355873, "rewards/rejected": -0.31860029697418213, "step": 1166 }, { "epoch": 3.1950718685831623, "grad_norm": 3.3835299015045166, "learning_rate": 8.402739726027397e-07, "log_odds_chosen": 0.6571429371833801, "log_odds_ratio": -0.5445737242698669, "logits/chosen": 0.4786193370819092, "logits/rejected": 0.5253209471702576, "logps/chosen": -2.152750253677368, "logps/rejected": -2.716878890991211, "loss": 0.9301, "nll_loss": 0.8755969405174255, "rewards/accuracies": 0.75, "rewards/chosen": -0.21527501940727234, "rewards/margins": 0.05641286075115204, "rewards/rejected": -0.2716878652572632, "step": 1167 }, { "epoch": 3.1978097193702943, "grad_norm": 3.9285647869110107, "learning_rate": 8.401369863013699e-07, "log_odds_chosen": 0.11385534703731537, "log_odds_ratio": -0.7181975841522217, "logits/chosen": 0.6695053577423096, "logits/rejected": 0.5461560487747192, "logps/chosen": -2.8385848999023438, "logps/rejected": -2.9390745162963867, "loss": 1.0842, "nll_loss": 1.01239013671875, "rewards/accuracies": 0.375, "rewards/chosen": -0.2838585078716278, "rewards/margins": 0.010048963129520416, "rewards/rejected": -0.2939074635505676, "step": 1168 }, { "epoch": 3.2005475701574264, "grad_norm": 3.1923398971557617, "learning_rate": 8.399999999999999e-07, "log_odds_chosen": 0.6570796370506287, "log_odds_ratio": -0.4488825500011444, "logits/chosen": 0.4779622554779053, "logits/rejected": 0.4624187648296356, "logps/chosen": -2.011756181716919, "logps/rejected": -2.5746560096740723, "loss": 1.0479, "nll_loss": 1.0030094385147095, "rewards/accuracies": 1.0, "rewards/chosen": -0.20117561519145966, "rewards/margins": 0.05629000440239906, "rewards/rejected": -0.2574656307697296, "step": 1169 }, { "epoch": 3.2032854209445585, "grad_norm": 3.3883087635040283, "learning_rate": 8.398630136986301e-07, "log_odds_chosen": 0.0903017669916153, "log_odds_ratio": -0.7342845797538757, "logits/chosen": 0.6166641116142273, "logits/rejected": 0.6426904201507568, "logps/chosen": -2.371718645095825, "logps/rejected": -2.4841794967651367, "loss": 1.0864, "nll_loss": 1.0129393339157104, "rewards/accuracies": 0.625, "rewards/chosen": -0.23717188835144043, "rewards/margins": 0.011246100068092346, "rewards/rejected": -0.2484179586172104, "step": 1170 }, { "epoch": 3.2060232717316905, "grad_norm": 3.4625155925750732, "learning_rate": 8.397260273972603e-07, "log_odds_chosen": 0.31567347049713135, "log_odds_ratio": -0.610160231590271, "logits/chosen": 0.4731537997722626, "logits/rejected": 0.36160922050476074, "logps/chosen": -2.417248249053955, "logps/rejected": -2.6945810317993164, "loss": 1.0983, "nll_loss": 1.037315845489502, "rewards/accuracies": 0.875, "rewards/chosen": -0.24172481894493103, "rewards/margins": 0.02773331105709076, "rewards/rejected": -0.2694581151008606, "step": 1171 }, { "epoch": 3.2087611225188226, "grad_norm": 3.067950963973999, "learning_rate": 8.395890410958903e-07, "log_odds_chosen": -0.10806485265493393, "log_odds_ratio": -0.8119451999664307, "logits/chosen": 0.40942007303237915, "logits/rejected": 0.39904069900512695, "logps/chosen": -2.4515609741210938, "logps/rejected": -2.3291027545928955, "loss": 1.0984, "nll_loss": 1.0172195434570312, "rewards/accuracies": 0.625, "rewards/chosen": -0.24515610933303833, "rewards/margins": -0.012245826423168182, "rewards/rejected": -0.23291027545928955, "step": 1172 }, { "epoch": 3.2114989733059547, "grad_norm": 2.952549934387207, "learning_rate": 8.394520547945205e-07, "log_odds_chosen": 1.1779874563217163, "log_odds_ratio": -0.36679714918136597, "logits/chosen": 0.5187496542930603, "logits/rejected": 0.5176383852958679, "logps/chosen": -2.377537727355957, "logps/rejected": -3.493704319000244, "loss": 0.9581, "nll_loss": 0.9213951826095581, "rewards/accuracies": 0.875, "rewards/chosen": -0.23775377869606018, "rewards/margins": 0.11161666363477707, "rewards/rejected": -0.34937041997909546, "step": 1173 }, { "epoch": 3.2142368240930868, "grad_norm": 5.957642555236816, "learning_rate": 8.393150684931507e-07, "log_odds_chosen": 0.5507926940917969, "log_odds_ratio": -0.5236073732376099, "logits/chosen": 0.3984740078449249, "logits/rejected": 0.414145827293396, "logps/chosen": -2.3362157344818115, "logps/rejected": -2.853482246398926, "loss": 1.0257, "nll_loss": 0.973313570022583, "rewards/accuracies": 0.75, "rewards/chosen": -0.23362159729003906, "rewards/margins": 0.05172666162252426, "rewards/rejected": -0.28534823656082153, "step": 1174 }, { "epoch": 3.216974674880219, "grad_norm": 3.701232671737671, "learning_rate": 8.391780821917808e-07, "log_odds_chosen": 0.27651020884513855, "log_odds_ratio": -0.6394177675247192, "logits/chosen": 0.4874808192253113, "logits/rejected": 0.5087249279022217, "logps/chosen": -2.643376350402832, "logps/rejected": -2.871535301208496, "loss": 1.07, "nll_loss": 1.006100058555603, "rewards/accuracies": 0.75, "rewards/chosen": -0.2643376290798187, "rewards/margins": 0.0228158887475729, "rewards/rejected": -0.2871535122394562, "step": 1175 }, { "epoch": 3.2197125256673513, "grad_norm": 4.185032844543457, "learning_rate": 8.390410958904109e-07, "log_odds_chosen": 0.36589014530181885, "log_odds_ratio": -0.6635315418243408, "logits/chosen": 0.6274884939193726, "logits/rejected": 0.6823572516441345, "logps/chosen": -2.522030830383301, "logps/rejected": -2.784400463104248, "loss": 0.923, "nll_loss": 0.8566129207611084, "rewards/accuracies": 0.5, "rewards/chosen": -0.2522030770778656, "rewards/margins": 0.02623697742819786, "rewards/rejected": -0.27844002842903137, "step": 1176 }, { "epoch": 3.2224503764544834, "grad_norm": 3.289764165878296, "learning_rate": 8.38904109589041e-07, "log_odds_chosen": 1.0129709243774414, "log_odds_ratio": -0.3891192376613617, "logits/chosen": 0.6957451701164246, "logits/rejected": 0.7814404964447021, "logps/chosen": -2.462287425994873, "logps/rejected": -3.389801502227783, "loss": 0.8294, "nll_loss": 0.7904958724975586, "rewards/accuracies": 0.875, "rewards/chosen": -0.24622875452041626, "rewards/margins": 0.0927513986825943, "rewards/rejected": -0.33898013830184937, "step": 1177 }, { "epoch": 3.2251882272416155, "grad_norm": 3.9450268745422363, "learning_rate": 8.387671232876712e-07, "log_odds_chosen": 1.4868381023406982, "log_odds_ratio": -0.35019728541374207, "logits/chosen": 0.6177685856819153, "logits/rejected": 0.5679274797439575, "logps/chosen": -2.3269243240356445, "logps/rejected": -3.7182092666625977, "loss": 1.0192, "nll_loss": 0.9841915369033813, "rewards/accuracies": 0.875, "rewards/chosen": -0.2326924204826355, "rewards/margins": 0.13912849128246307, "rewards/rejected": -0.37182092666625977, "step": 1178 }, { "epoch": 3.2279260780287475, "grad_norm": 3.070026159286499, "learning_rate": 8.386301369863013e-07, "log_odds_chosen": 0.9558675289154053, "log_odds_ratio": -0.430971622467041, "logits/chosen": 0.4859681725502014, "logits/rejected": 0.4857335388660431, "logps/chosen": -2.368553400039673, "logps/rejected": -3.2443017959594727, "loss": 1.0063, "nll_loss": 0.9631643295288086, "rewards/accuracies": 0.75, "rewards/chosen": -0.23685534298419952, "rewards/margins": 0.08757483959197998, "rewards/rejected": -0.3244301676750183, "step": 1179 }, { "epoch": 3.2306639288158796, "grad_norm": 3.5329532623291016, "learning_rate": 8.384931506849314e-07, "log_odds_chosen": 0.5646450519561768, "log_odds_ratio": -0.504761815071106, "logits/chosen": 0.42804405093193054, "logits/rejected": 0.3229437470436096, "logps/chosen": -1.9190171957015991, "logps/rejected": -2.425062894821167, "loss": 1.0225, "nll_loss": 0.9720078706741333, "rewards/accuracies": 0.875, "rewards/chosen": -0.19190174341201782, "rewards/margins": 0.05060456693172455, "rewards/rejected": -0.24250629544258118, "step": 1180 }, { "epoch": 3.2334017796030117, "grad_norm": 5.564960956573486, "learning_rate": 8.383561643835616e-07, "log_odds_chosen": 0.6807008385658264, "log_odds_ratio": -0.5495235323905945, "logits/chosen": 0.3879605531692505, "logits/rejected": 0.36724117398262024, "logps/chosen": -2.4480228424072266, "logps/rejected": -3.090698719024658, "loss": 1.0402, "nll_loss": 0.9852023124694824, "rewards/accuracies": 0.75, "rewards/chosen": -0.24480228126049042, "rewards/margins": 0.06426762789487839, "rewards/rejected": -0.3090699017047882, "step": 1181 }, { "epoch": 3.2361396303901437, "grad_norm": 3.9189655780792236, "learning_rate": 8.382191780821918e-07, "log_odds_chosen": 0.03466671705245972, "log_odds_ratio": -0.8070802092552185, "logits/chosen": 0.5283240079879761, "logits/rejected": 0.568591833114624, "logps/chosen": -3.024158477783203, "logps/rejected": -3.0568628311157227, "loss": 0.9711, "nll_loss": 0.8903626203536987, "rewards/accuracies": 0.625, "rewards/chosen": -0.3024158477783203, "rewards/margins": 0.0032704435288906097, "rewards/rejected": -0.3056862950325012, "step": 1182 }, { "epoch": 3.238877481177276, "grad_norm": 3.884150981903076, "learning_rate": 8.380821917808218e-07, "log_odds_chosen": 0.5854196548461914, "log_odds_ratio": -0.5576464533805847, "logits/chosen": 0.5892715454101562, "logits/rejected": 0.6030853986740112, "logps/chosen": -2.88773775100708, "logps/rejected": -3.4204347133636475, "loss": 0.9979, "nll_loss": 0.942125678062439, "rewards/accuracies": 0.625, "rewards/chosen": -0.288773775100708, "rewards/margins": 0.053269676864147186, "rewards/rejected": -0.3420434594154358, "step": 1183 }, { "epoch": 3.241615331964408, "grad_norm": 3.958899974822998, "learning_rate": 8.37945205479452e-07, "log_odds_chosen": 0.11224612593650818, "log_odds_ratio": -0.7016831636428833, "logits/chosen": 0.5754188299179077, "logits/rejected": 0.609736442565918, "logps/chosen": -2.013087749481201, "logps/rejected": -2.1272313594818115, "loss": 0.9601, "nll_loss": 0.8898884654045105, "rewards/accuracies": 0.5, "rewards/chosen": -0.20130878686904907, "rewards/margins": 0.011414358392357826, "rewards/rejected": -0.21272313594818115, "step": 1184 }, { "epoch": 3.24435318275154, "grad_norm": 3.990447521209717, "learning_rate": 8.378082191780822e-07, "log_odds_chosen": -0.18309833109378815, "log_odds_ratio": -0.9266247749328613, "logits/chosen": 0.5703368186950684, "logits/rejected": 0.6036785244941711, "logps/chosen": -2.3740062713623047, "logps/rejected": -2.26650333404541, "loss": 1.0786, "nll_loss": 0.9859504699707031, "rewards/accuracies": 0.25, "rewards/chosen": -0.237400621175766, "rewards/margins": -0.010750280693173409, "rewards/rejected": -0.22665034234523773, "step": 1185 }, { "epoch": 3.247091033538672, "grad_norm": 3.296630382537842, "learning_rate": 8.376712328767122e-07, "log_odds_chosen": 0.7365533709526062, "log_odds_ratio": -0.5060088038444519, "logits/chosen": 0.49488571286201477, "logits/rejected": 0.5030168294906616, "logps/chosen": -2.3356106281280518, "logps/rejected": -2.970965623855591, "loss": 1.1394, "nll_loss": 1.0887532234191895, "rewards/accuracies": 0.75, "rewards/chosen": -0.23356106877326965, "rewards/margins": 0.06353551894426346, "rewards/rejected": -0.2970966100692749, "step": 1186 }, { "epoch": 3.249828884325804, "grad_norm": 4.392245292663574, "learning_rate": 8.375342465753424e-07, "log_odds_chosen": -0.11431427299976349, "log_odds_ratio": -0.8491523265838623, "logits/chosen": 0.632102906703949, "logits/rejected": 0.6344226598739624, "logps/chosen": -2.9071736335754395, "logps/rejected": -2.7985429763793945, "loss": 1.0348, "nll_loss": 0.9499065279960632, "rewards/accuracies": 0.5, "rewards/chosen": -0.29071736335754395, "rewards/margins": -0.010863065719604492, "rewards/rejected": -0.27985432744026184, "step": 1187 }, { "epoch": 3.2525667351129366, "grad_norm": 3.8232715129852295, "learning_rate": 8.373972602739726e-07, "log_odds_chosen": 0.6622490286827087, "log_odds_ratio": -0.5332159996032715, "logits/chosen": 0.6385594606399536, "logits/rejected": 0.6727420091629028, "logps/chosen": -2.7317800521850586, "logps/rejected": -3.3439249992370605, "loss": 0.9485, "nll_loss": 0.8951795697212219, "rewards/accuracies": 0.875, "rewards/chosen": -0.2731780409812927, "rewards/margins": 0.06121444329619408, "rewards/rejected": -0.3343924880027771, "step": 1188 }, { "epoch": 3.2553045859000687, "grad_norm": 3.7957754135131836, "learning_rate": 8.372602739726027e-07, "log_odds_chosen": 0.6907114386558533, "log_odds_ratio": -0.6940135955810547, "logits/chosen": 0.7545594573020935, "logits/rejected": 0.7753646373748779, "logps/chosen": -2.5049102306365967, "logps/rejected": -3.071509599685669, "loss": 0.9669, "nll_loss": 0.8974531888961792, "rewards/accuracies": 0.625, "rewards/chosen": -0.25049105286598206, "rewards/margins": 0.05665989965200424, "rewards/rejected": -0.3071509599685669, "step": 1189 }, { "epoch": 3.2580424366872007, "grad_norm": 4.043917655944824, "learning_rate": 8.371232876712328e-07, "log_odds_chosen": 0.5151398181915283, "log_odds_ratio": -0.6156663298606873, "logits/chosen": 0.6789615154266357, "logits/rejected": 0.7122867107391357, "logps/chosen": -2.542114734649658, "logps/rejected": -3.0126047134399414, "loss": 0.8954, "nll_loss": 0.8338794112205505, "rewards/accuracies": 0.625, "rewards/chosen": -0.2542114853858948, "rewards/margins": 0.04704900085926056, "rewards/rejected": -0.30126047134399414, "step": 1190 }, { "epoch": 3.260780287474333, "grad_norm": 3.2564823627471924, "learning_rate": 8.369863013698629e-07, "log_odds_chosen": 0.4132945239543915, "log_odds_ratio": -0.5355610251426697, "logits/chosen": 0.5228750705718994, "logits/rejected": 0.4416748285293579, "logps/chosen": -2.0850472450256348, "logps/rejected": -2.4448835849761963, "loss": 1.0583, "nll_loss": 1.0047551393508911, "rewards/accuracies": 0.875, "rewards/chosen": -0.20850473642349243, "rewards/margins": 0.0359836108982563, "rewards/rejected": -0.24448834359645844, "step": 1191 }, { "epoch": 3.263518138261465, "grad_norm": 5.614818096160889, "learning_rate": 8.368493150684931e-07, "log_odds_chosen": 0.08031810820102692, "log_odds_ratio": -0.7330809831619263, "logits/chosen": 0.5127547383308411, "logits/rejected": 0.5512628555297852, "logps/chosen": -2.906808853149414, "logps/rejected": -2.9797372817993164, "loss": 0.9591, "nll_loss": 0.8857946991920471, "rewards/accuracies": 0.375, "rewards/chosen": -0.2906808853149414, "rewards/margins": 0.007292876020073891, "rewards/rejected": -0.29797375202178955, "step": 1192 }, { "epoch": 3.266255989048597, "grad_norm": 3.352886438369751, "learning_rate": 8.367123287671232e-07, "log_odds_chosen": 0.5824794769287109, "log_odds_ratio": -0.47560209035873413, "logits/chosen": 0.5483954548835754, "logits/rejected": 0.6199449896812439, "logps/chosen": -2.4910824298858643, "logps/rejected": -3.0276851654052734, "loss": 0.9737, "nll_loss": 0.9260973334312439, "rewards/accuracies": 0.875, "rewards/chosen": -0.24910825490951538, "rewards/margins": 0.05366026610136032, "rewards/rejected": -0.3027684986591339, "step": 1193 }, { "epoch": 3.268993839835729, "grad_norm": 3.4489033222198486, "learning_rate": 8.365753424657533e-07, "log_odds_chosen": 0.7455371618270874, "log_odds_ratio": -0.4385043680667877, "logits/chosen": 0.6567672491073608, "logits/rejected": 0.6343749165534973, "logps/chosen": -3.0505008697509766, "logps/rejected": -3.741577625274658, "loss": 0.9772, "nll_loss": 0.9333957433700562, "rewards/accuracies": 0.875, "rewards/chosen": -0.3050500750541687, "rewards/margins": 0.06910770386457443, "rewards/rejected": -0.37415778636932373, "step": 1194 }, { "epoch": 3.271731690622861, "grad_norm": 5.012925624847412, "learning_rate": 8.364383561643835e-07, "log_odds_chosen": 0.15273644030094147, "log_odds_ratio": -0.7860245704650879, "logits/chosen": 0.6654115319252014, "logits/rejected": 0.7260619401931763, "logps/chosen": -3.2246155738830566, "logps/rejected": -3.3352222442626953, "loss": 0.9649, "nll_loss": 0.8862940073013306, "rewards/accuracies": 0.5, "rewards/chosen": -0.3224615752696991, "rewards/margins": 0.011060696095228195, "rewards/rejected": -0.3335222601890564, "step": 1195 }, { "epoch": 3.274469541409993, "grad_norm": 3.506319046020508, "learning_rate": 8.363013698630137e-07, "log_odds_chosen": 0.7474589347839355, "log_odds_ratio": -0.45819252729415894, "logits/chosen": 0.5829417705535889, "logits/rejected": 0.551834762096405, "logps/chosen": -1.88369619846344, "logps/rejected": -2.5653979778289795, "loss": 0.9933, "nll_loss": 0.9474443197250366, "rewards/accuracies": 0.625, "rewards/chosen": -0.18836963176727295, "rewards/margins": 0.06817016750574112, "rewards/rejected": -0.25653979182243347, "step": 1196 }, { "epoch": 3.277207392197125, "grad_norm": 3.5547358989715576, "learning_rate": 8.361643835616437e-07, "log_odds_chosen": 0.7658452987670898, "log_odds_ratio": -0.46958404779434204, "logits/chosen": 0.4594544470310211, "logits/rejected": 0.4441809058189392, "logps/chosen": -2.1162843704223633, "logps/rejected": -2.793748617172241, "loss": 0.9973, "nll_loss": 0.950367271900177, "rewards/accuracies": 0.75, "rewards/chosen": -0.21162843704223633, "rewards/margins": 0.06774644553661346, "rewards/rejected": -0.279374897480011, "step": 1197 }, { "epoch": 3.2799452429842573, "grad_norm": 4.605419158935547, "learning_rate": 8.360273972602739e-07, "log_odds_chosen": 0.25821107625961304, "log_odds_ratio": -0.7012514472007751, "logits/chosen": 0.732366681098938, "logits/rejected": 0.8225964307785034, "logps/chosen": -3.0131659507751465, "logps/rejected": -3.2420225143432617, "loss": 1.1383, "nll_loss": 1.0681653022766113, "rewards/accuracies": 0.625, "rewards/chosen": -0.3013165593147278, "rewards/margins": 0.022885674610733986, "rewards/rejected": -0.3242022395133972, "step": 1198 }, { "epoch": 3.2826830937713893, "grad_norm": 3.9865939617156982, "learning_rate": 8.358904109589041e-07, "log_odds_chosen": 0.8120859265327454, "log_odds_ratio": -0.5468674302101135, "logits/chosen": 0.49628180265426636, "logits/rejected": 0.5305564999580383, "logps/chosen": -2.8623881340026855, "logps/rejected": -3.613471031188965, "loss": 1.0018, "nll_loss": 0.9471040964126587, "rewards/accuracies": 0.75, "rewards/chosen": -0.28623878955841064, "rewards/margins": 0.07510831952095032, "rewards/rejected": -0.36134713888168335, "step": 1199 }, { "epoch": 3.2854209445585214, "grad_norm": 3.513096809387207, "learning_rate": 8.357534246575341e-07, "log_odds_chosen": 0.3639751076698303, "log_odds_ratio": -0.6441094875335693, "logits/chosen": 0.7646625638008118, "logits/rejected": 0.7278956174850464, "logps/chosen": -2.23974347114563, "logps/rejected": -2.539661169052124, "loss": 1.1113, "nll_loss": 1.0468605756759644, "rewards/accuracies": 0.875, "rewards/chosen": -0.223974347114563, "rewards/margins": 0.02999177947640419, "rewards/rejected": -0.2539661228656769, "step": 1200 }, { "epoch": 3.2881587953456535, "grad_norm": 4.114560127258301, "learning_rate": 8.356164383561643e-07, "log_odds_chosen": 0.10777133703231812, "log_odds_ratio": -0.7290686368942261, "logits/chosen": 0.4761293828487396, "logits/rejected": 0.37857669591903687, "logps/chosen": -2.5422921180725098, "logps/rejected": -2.642415761947632, "loss": 1.1023, "nll_loss": 1.029363751411438, "rewards/accuracies": 0.5, "rewards/chosen": -0.25422921776771545, "rewards/margins": 0.010012365877628326, "rewards/rejected": -0.26424160599708557, "step": 1201 }, { "epoch": 3.2908966461327855, "grad_norm": 3.8303840160369873, "learning_rate": 8.354794520547945e-07, "log_odds_chosen": 0.4684981107711792, "log_odds_ratio": -0.5301193594932556, "logits/chosen": 0.49651819467544556, "logits/rejected": 0.4898267984390259, "logps/chosen": -2.1175222396850586, "logps/rejected": -2.5128374099731445, "loss": 0.8989, "nll_loss": 0.8459198474884033, "rewards/accuracies": 0.75, "rewards/chosen": -0.21175223588943481, "rewards/margins": 0.03953151777386665, "rewards/rejected": -0.25128376483917236, "step": 1202 }, { "epoch": 3.293634496919918, "grad_norm": 5.332648277282715, "learning_rate": 8.353424657534246e-07, "log_odds_chosen": 0.7836074829101562, "log_odds_ratio": -0.4589298963546753, "logits/chosen": 0.690497636795044, "logits/rejected": 0.7204298973083496, "logps/chosen": -2.7700910568237305, "logps/rejected": -3.51493501663208, "loss": 0.9321, "nll_loss": 0.8862180113792419, "rewards/accuracies": 0.75, "rewards/chosen": -0.27700912952423096, "rewards/margins": 0.07448439300060272, "rewards/rejected": -0.3514934778213501, "step": 1203 }, { "epoch": 3.29637234770705, "grad_norm": 4.330428600311279, "learning_rate": 8.352054794520547e-07, "log_odds_chosen": 0.10267674922943115, "log_odds_ratio": -0.6825987696647644, "logits/chosen": 0.7375929355621338, "logits/rejected": 0.7407664060592651, "logps/chosen": -2.3479561805725098, "logps/rejected": -2.403895854949951, "loss": 0.9731, "nll_loss": 0.9048486351966858, "rewards/accuracies": 0.5, "rewards/chosen": -0.23479561507701874, "rewards/margins": 0.00559399276971817, "rewards/rejected": -0.2403896152973175, "step": 1204 }, { "epoch": 3.299110198494182, "grad_norm": 4.45380973815918, "learning_rate": 8.350684931506848e-07, "log_odds_chosen": 0.6033481359481812, "log_odds_ratio": -0.4995094835758209, "logits/chosen": 0.6090818643569946, "logits/rejected": 0.5901279449462891, "logps/chosen": -2.703040361404419, "logps/rejected": -3.2775464057922363, "loss": 0.9903, "nll_loss": 0.9403658509254456, "rewards/accuracies": 0.875, "rewards/chosen": -0.27030402421951294, "rewards/margins": 0.057450633496046066, "rewards/rejected": -0.3277546763420105, "step": 1205 }, { "epoch": 3.3018480492813143, "grad_norm": 3.5310137271881104, "learning_rate": 8.34931506849315e-07, "log_odds_chosen": 0.5094343423843384, "log_odds_ratio": -0.7458832859992981, "logits/chosen": 0.6487100124359131, "logits/rejected": 0.6908708810806274, "logps/chosen": -2.9495139122009277, "logps/rejected": -3.4468584060668945, "loss": 0.8861, "nll_loss": 0.8114883303642273, "rewards/accuracies": 0.625, "rewards/chosen": -0.2949514091014862, "rewards/margins": 0.04973447695374489, "rewards/rejected": -0.3446858823299408, "step": 1206 }, { "epoch": 3.3045859000684463, "grad_norm": 5.354591369628906, "learning_rate": 8.347945205479451e-07, "log_odds_chosen": -0.3014082908630371, "log_odds_ratio": -0.9266042709350586, "logits/chosen": 0.4887927174568176, "logits/rejected": 0.5731726884841919, "logps/chosen": -3.1644208431243896, "logps/rejected": -2.853292942047119, "loss": 1.046, "nll_loss": 0.9533559083938599, "rewards/accuracies": 0.5, "rewards/chosen": -0.31644207239151, "rewards/margins": -0.03111279383301735, "rewards/rejected": -0.28532931208610535, "step": 1207 }, { "epoch": 3.3073237508555784, "grad_norm": 4.047664642333984, "learning_rate": 8.346575342465752e-07, "log_odds_chosen": 0.384906530380249, "log_odds_ratio": -0.5959039330482483, "logits/chosen": 0.4821227788925171, "logits/rejected": 0.51699298620224, "logps/chosen": -2.701526641845703, "logps/rejected": -3.052722930908203, "loss": 1.0296, "nll_loss": 0.9699903130531311, "rewards/accuracies": 0.875, "rewards/chosen": -0.27015265822410583, "rewards/margins": 0.03511960804462433, "rewards/rejected": -0.30527228116989136, "step": 1208 }, { "epoch": 3.3100616016427105, "grad_norm": 3.772576332092285, "learning_rate": 8.345205479452055e-07, "log_odds_chosen": 0.3884565532207489, "log_odds_ratio": -0.9116766452789307, "logits/chosen": 0.6220608353614807, "logits/rejected": 0.6589358448982239, "logps/chosen": -3.366215705871582, "logps/rejected": -3.7286019325256348, "loss": 0.9812, "nll_loss": 0.8900715112686157, "rewards/accuracies": 0.625, "rewards/chosen": -0.33662158250808716, "rewards/margins": 0.03623863309621811, "rewards/rejected": -0.3728601932525635, "step": 1209 }, { "epoch": 3.3127994524298425, "grad_norm": 3.3713998794555664, "learning_rate": 8.343835616438357e-07, "log_odds_chosen": 0.6787275075912476, "log_odds_ratio": -0.456128865480423, "logits/chosen": 0.5233676433563232, "logits/rejected": 0.44432154297828674, "logps/chosen": -2.176098108291626, "logps/rejected": -2.763820171356201, "loss": 1.0028, "nll_loss": 0.9571915864944458, "rewards/accuracies": 0.875, "rewards/chosen": -0.21760982275009155, "rewards/margins": 0.05877222865819931, "rewards/rejected": -0.2763820290565491, "step": 1210 }, { "epoch": 3.3155373032169746, "grad_norm": 4.325575828552246, "learning_rate": 8.342465753424657e-07, "log_odds_chosen": 0.26171720027923584, "log_odds_ratio": -0.6427972316741943, "logits/chosen": 0.7231416702270508, "logits/rejected": 0.6428864002227783, "logps/chosen": -2.2889115810394287, "logps/rejected": -2.489537477493286, "loss": 1.1221, "nll_loss": 1.0578371286392212, "rewards/accuracies": 0.75, "rewards/chosen": -0.22889116406440735, "rewards/margins": 0.020062588155269623, "rewards/rejected": -0.24895375967025757, "step": 1211 }, { "epoch": 3.3182751540041067, "grad_norm": 4.073899745941162, "learning_rate": 8.341095890410959e-07, "log_odds_chosen": 0.3796997368335724, "log_odds_ratio": -0.6708905100822449, "logits/chosen": 0.6780036687850952, "logits/rejected": 0.7507648468017578, "logps/chosen": -2.928544044494629, "logps/rejected": -3.2658584117889404, "loss": 0.8656, "nll_loss": 0.7985008358955383, "rewards/accuracies": 0.5, "rewards/chosen": -0.2928544282913208, "rewards/margins": 0.03373141959309578, "rewards/rejected": -0.3265858292579651, "step": 1212 }, { "epoch": 3.3210130047912387, "grad_norm": 3.4059042930603027, "learning_rate": 8.339726027397261e-07, "log_odds_chosen": 0.5821105241775513, "log_odds_ratio": -0.5607304573059082, "logits/chosen": 0.5203801989555359, "logits/rejected": 0.49237436056137085, "logps/chosen": -2.6351914405822754, "logps/rejected": -3.1916232109069824, "loss": 1.0115, "nll_loss": 0.9553873538970947, "rewards/accuracies": 0.75, "rewards/chosen": -0.26351913809776306, "rewards/margins": 0.05564320087432861, "rewards/rejected": -0.31916236877441406, "step": 1213 }, { "epoch": 3.323750855578371, "grad_norm": 3.297322988510132, "learning_rate": 8.338356164383561e-07, "log_odds_chosen": 0.4009625315666199, "log_odds_ratio": -0.5740178823471069, "logits/chosen": 0.7004504799842834, "logits/rejected": 0.7819386124610901, "logps/chosen": -2.6237311363220215, "logps/rejected": -3.0077099800109863, "loss": 0.8385, "nll_loss": 0.7810758352279663, "rewards/accuracies": 0.625, "rewards/chosen": -0.2623731195926666, "rewards/margins": 0.038397885859012604, "rewards/rejected": -0.30077099800109863, "step": 1214 }, { "epoch": 3.3264887063655033, "grad_norm": 3.1106905937194824, "learning_rate": 8.336986301369863e-07, "log_odds_chosen": 0.8013466596603394, "log_odds_ratio": -0.4331384599208832, "logits/chosen": 0.6228651404380798, "logits/rejected": 0.5643676519393921, "logps/chosen": -2.1009204387664795, "logps/rejected": -2.8356800079345703, "loss": 1.0514, "nll_loss": 1.0081285238265991, "rewards/accuracies": 0.875, "rewards/chosen": -0.21009203791618347, "rewards/margins": 0.07347597181797028, "rewards/rejected": -0.28356802463531494, "step": 1215 }, { "epoch": 3.3292265571526354, "grad_norm": 5.604055881500244, "learning_rate": 8.335616438356165e-07, "log_odds_chosen": -0.15769171714782715, "log_odds_ratio": -1.0890402793884277, "logits/chosen": 0.603325605392456, "logits/rejected": 0.6194043159484863, "logps/chosen": -3.514831066131592, "logps/rejected": -3.3228139877319336, "loss": 1.0837, "nll_loss": 0.9747976064682007, "rewards/accuracies": 0.5, "rewards/chosen": -0.3514831066131592, "rewards/margins": -0.019201716408133507, "rewards/rejected": -0.3322813808917999, "step": 1216 }, { "epoch": 3.3319644079397674, "grad_norm": 4.64747953414917, "learning_rate": 8.334246575342466e-07, "log_odds_chosen": 0.19139012694358826, "log_odds_ratio": -0.6934583783149719, "logits/chosen": 0.5291623473167419, "logits/rejected": 0.5649358630180359, "logps/chosen": -2.4497857093811035, "logps/rejected": -2.6222083568573, "loss": 1.0329, "nll_loss": 0.9635567665100098, "rewards/accuracies": 0.5, "rewards/chosen": -0.24497859179973602, "rewards/margins": 0.01724226027727127, "rewards/rejected": -0.2622208297252655, "step": 1217 }, { "epoch": 3.3347022587268995, "grad_norm": 3.4709856510162354, "learning_rate": 8.332876712328767e-07, "log_odds_chosen": 1.5321030616760254, "log_odds_ratio": -0.5048699378967285, "logits/chosen": 0.5340594053268433, "logits/rejected": 0.5252715945243835, "logps/chosen": -2.4155216217041016, "logps/rejected": -3.8131327629089355, "loss": 0.9344, "nll_loss": 0.8838937282562256, "rewards/accuracies": 0.75, "rewards/chosen": -0.2415521740913391, "rewards/margins": 0.1397610902786255, "rewards/rejected": -0.3813132643699646, "step": 1218 }, { "epoch": 3.3374401095140316, "grad_norm": 4.399225234985352, "learning_rate": 8.331506849315069e-07, "log_odds_chosen": 0.4968883991241455, "log_odds_ratio": -0.5293352603912354, "logits/chosen": 0.6826307773590088, "logits/rejected": 0.7589218020439148, "logps/chosen": -2.910630702972412, "logps/rejected": -3.3705027103424072, "loss": 0.9628, "nll_loss": 0.9098572731018066, "rewards/accuracies": 0.875, "rewards/chosen": -0.2910630702972412, "rewards/margins": 0.04598722234368324, "rewards/rejected": -0.33705028891563416, "step": 1219 }, { "epoch": 3.3401779603011637, "grad_norm": 4.194246768951416, "learning_rate": 8.33013698630137e-07, "log_odds_chosen": 0.23944751918315887, "log_odds_ratio": -0.7939628958702087, "logits/chosen": 0.639410138130188, "logits/rejected": 0.7030227780342102, "logps/chosen": -3.434751272201538, "logps/rejected": -3.6824333667755127, "loss": 0.9448, "nll_loss": 0.8654345273971558, "rewards/accuracies": 0.5, "rewards/chosen": -0.3434751331806183, "rewards/margins": 0.024768201634287834, "rewards/rejected": -0.36824333667755127, "step": 1220 }, { "epoch": 3.3429158110882957, "grad_norm": 3.702491283416748, "learning_rate": 8.328767123287671e-07, "log_odds_chosen": 0.514785647392273, "log_odds_ratio": -0.5410267114639282, "logits/chosen": 0.7703102231025696, "logits/rejected": 0.8227478861808777, "logps/chosen": -3.208714008331299, "logps/rejected": -3.7145943641662598, "loss": 0.8646, "nll_loss": 0.8104778528213501, "rewards/accuracies": 0.75, "rewards/chosen": -0.32087138295173645, "rewards/margins": 0.05058802664279938, "rewards/rejected": -0.371459424495697, "step": 1221 }, { "epoch": 3.345653661875428, "grad_norm": 3.4185290336608887, "learning_rate": 8.327397260273972e-07, "log_odds_chosen": 0.9177709817886353, "log_odds_ratio": -0.3928394615650177, "logits/chosen": 0.4754674434661865, "logits/rejected": 0.492038369178772, "logps/chosen": -2.0393893718719482, "logps/rejected": -2.8340723514556885, "loss": 1.0358, "nll_loss": 0.9964705109596252, "rewards/accuracies": 0.875, "rewards/chosen": -0.20393893122673035, "rewards/margins": 0.07946828752756119, "rewards/rejected": -0.2834072411060333, "step": 1222 }, { "epoch": 3.34839151266256, "grad_norm": 4.119829177856445, "learning_rate": 8.326027397260274e-07, "log_odds_chosen": 0.6046401262283325, "log_odds_ratio": -0.6436327695846558, "logits/chosen": 0.6039289832115173, "logits/rejected": 0.6517269015312195, "logps/chosen": -3.2022085189819336, "logps/rejected": -3.7632029056549072, "loss": 0.9172, "nll_loss": 0.8528561592102051, "rewards/accuracies": 0.75, "rewards/chosen": -0.32022085785865784, "rewards/margins": 0.05609945207834244, "rewards/rejected": -0.3763203024864197, "step": 1223 }, { "epoch": 3.351129363449692, "grad_norm": 3.9718453884124756, "learning_rate": 8.324657534246576e-07, "log_odds_chosen": 0.4689810276031494, "log_odds_ratio": -0.5391519069671631, "logits/chosen": 0.6670821309089661, "logits/rejected": 0.6979180574417114, "logps/chosen": -2.9479565620422363, "logps/rejected": -3.372066020965576, "loss": 0.9173, "nll_loss": 0.8633882999420166, "rewards/accuracies": 0.75, "rewards/chosen": -0.2947956621646881, "rewards/margins": 0.04241097718477249, "rewards/rejected": -0.3372066020965576, "step": 1224 }, { "epoch": 3.353867214236824, "grad_norm": 3.6131834983825684, "learning_rate": 8.323287671232876e-07, "log_odds_chosen": 0.7327512502670288, "log_odds_ratio": -0.4166860580444336, "logits/chosen": 0.7987618446350098, "logits/rejected": 0.817189633846283, "logps/chosen": -2.8244144916534424, "logps/rejected": -3.511815071105957, "loss": 0.9306, "nll_loss": 0.8889129161834717, "rewards/accuracies": 1.0, "rewards/chosen": -0.28244146704673767, "rewards/margins": 0.06874004006385803, "rewards/rejected": -0.3511814773082733, "step": 1225 }, { "epoch": 3.356605065023956, "grad_norm": 4.235471248626709, "learning_rate": 8.321917808219178e-07, "log_odds_chosen": -0.10855123400688171, "log_odds_ratio": -0.899331271648407, "logits/chosen": 0.5560705065727234, "logits/rejected": 0.5524415373802185, "logps/chosen": -3.2792317867279053, "logps/rejected": -3.1183042526245117, "loss": 1.0683, "nll_loss": 0.9783475995063782, "rewards/accuracies": 0.5, "rewards/chosen": -0.3279231786727905, "rewards/margins": -0.016092736274003983, "rewards/rejected": -0.31183040142059326, "step": 1226 }, { "epoch": 3.359342915811088, "grad_norm": 3.908247232437134, "learning_rate": 8.32054794520548e-07, "log_odds_chosen": 0.7588708400726318, "log_odds_ratio": -0.5689147710800171, "logits/chosen": 0.5737619996070862, "logits/rejected": 0.5770153403282166, "logps/chosen": -2.6470935344696045, "logps/rejected": -3.372607469558716, "loss": 0.998, "nll_loss": 0.9411096572875977, "rewards/accuracies": 0.75, "rewards/chosen": -0.26470935344696045, "rewards/margins": 0.07255140691995621, "rewards/rejected": -0.33726072311401367, "step": 1227 }, { "epoch": 3.36208076659822, "grad_norm": 4.7082414627075195, "learning_rate": 8.31917808219178e-07, "log_odds_chosen": -0.08630098402500153, "log_odds_ratio": -0.7876273393630981, "logits/chosen": 0.6404185891151428, "logits/rejected": 0.670769453048706, "logps/chosen": -2.9250173568725586, "logps/rejected": -2.8536813259124756, "loss": 1.0272, "nll_loss": 0.9484566450119019, "rewards/accuracies": 0.375, "rewards/chosen": -0.2925017476081848, "rewards/margins": -0.007133621722459793, "rewards/rejected": -0.2853681445121765, "step": 1228 }, { "epoch": 3.3648186173853523, "grad_norm": 3.7892091274261475, "learning_rate": 8.317808219178082e-07, "log_odds_chosen": 0.7892376184463501, "log_odds_ratio": -0.41471171379089355, "logits/chosen": 0.6767903566360474, "logits/rejected": 0.6849182844161987, "logps/chosen": -1.8555936813354492, "logps/rejected": -2.5307555198669434, "loss": 0.9557, "nll_loss": 0.9141960144042969, "rewards/accuracies": 0.875, "rewards/chosen": -0.18555936217308044, "rewards/margins": 0.06751620024442673, "rewards/rejected": -0.25307556986808777, "step": 1229 }, { "epoch": 3.3675564681724848, "grad_norm": 3.930997848510742, "learning_rate": 8.316438356164384e-07, "log_odds_chosen": 0.729240894317627, "log_odds_ratio": -0.4363866448402405, "logits/chosen": 0.5779477953910828, "logits/rejected": 0.462611585855484, "logps/chosen": -2.030454397201538, "logps/rejected": -2.684180736541748, "loss": 1.0684, "nll_loss": 1.0248074531555176, "rewards/accuracies": 0.875, "rewards/chosen": -0.20304545760154724, "rewards/margins": 0.06537263095378876, "rewards/rejected": -0.2684180736541748, "step": 1230 }, { "epoch": 3.370294318959617, "grad_norm": 2.7269232273101807, "learning_rate": 8.315068493150684e-07, "log_odds_chosen": 0.7531948089599609, "log_odds_ratio": -0.4990655481815338, "logits/chosen": 0.6516648530960083, "logits/rejected": 0.6508432626724243, "logps/chosen": -2.482257843017578, "logps/rejected": -3.195500373840332, "loss": 0.994, "nll_loss": 0.9441391825675964, "rewards/accuracies": 0.75, "rewards/chosen": -0.24822579324245453, "rewards/margins": 0.07132424414157867, "rewards/rejected": -0.3195500373840332, "step": 1231 }, { "epoch": 3.373032169746749, "grad_norm": 3.33778715133667, "learning_rate": 8.313698630136986e-07, "log_odds_chosen": 0.46644896268844604, "log_odds_ratio": -0.6041030287742615, "logits/chosen": 0.4763669967651367, "logits/rejected": 0.6033019423484802, "logps/chosen": -2.7879202365875244, "logps/rejected": -3.2658092975616455, "loss": 0.9743, "nll_loss": 0.9139378070831299, "rewards/accuracies": 0.625, "rewards/chosen": -0.27879202365875244, "rewards/margins": 0.04778888449072838, "rewards/rejected": -0.3265809118747711, "step": 1232 }, { "epoch": 3.375770020533881, "grad_norm": 3.3442487716674805, "learning_rate": 8.312328767123288e-07, "log_odds_chosen": 0.7274479866027832, "log_odds_ratio": -0.5004078149795532, "logits/chosen": 0.7514420747756958, "logits/rejected": 0.7482056617736816, "logps/chosen": -2.0492916107177734, "logps/rejected": -2.6866517066955566, "loss": 0.9363, "nll_loss": 0.8862723112106323, "rewards/accuracies": 0.75, "rewards/chosen": -0.2049291729927063, "rewards/margins": 0.06373599171638489, "rewards/rejected": -0.2686651647090912, "step": 1233 }, { "epoch": 3.378507871321013, "grad_norm": 4.458950519561768, "learning_rate": 8.310958904109589e-07, "log_odds_chosen": -0.07001128792762756, "log_odds_ratio": -0.8578697443008423, "logits/chosen": 0.7049985527992249, "logits/rejected": 0.6800980567932129, "logps/chosen": -2.401442527770996, "logps/rejected": -2.31974720954895, "loss": 1.0302, "nll_loss": 0.9444270730018616, "rewards/accuracies": 0.75, "rewards/chosen": -0.2401442527770996, "rewards/margins": -0.008169539272785187, "rewards/rejected": -0.2319747358560562, "step": 1234 }, { "epoch": 3.381245722108145, "grad_norm": 3.0236611366271973, "learning_rate": 8.30958904109589e-07, "log_odds_chosen": 0.7043226957321167, "log_odds_ratio": -0.47706329822540283, "logits/chosen": 0.5593956708908081, "logits/rejected": 0.5392420887947083, "logps/chosen": -2.5293595790863037, "logps/rejected": -3.183415412902832, "loss": 1.0042, "nll_loss": 0.9564509391784668, "rewards/accuracies": 0.875, "rewards/chosen": -0.2529359459877014, "rewards/margins": 0.06540559232234955, "rewards/rejected": -0.31834152340888977, "step": 1235 }, { "epoch": 3.383983572895277, "grad_norm": 3.578198194503784, "learning_rate": 8.308219178082191e-07, "log_odds_chosen": 1.0227386951446533, "log_odds_ratio": -0.529865026473999, "logits/chosen": 0.6096093654632568, "logits/rejected": 0.6059153079986572, "logps/chosen": -2.493260622024536, "logps/rejected": -3.4883034229278564, "loss": 0.9811, "nll_loss": 0.9281361103057861, "rewards/accuracies": 0.75, "rewards/chosen": -0.24932608008384705, "rewards/margins": 0.09950428456068039, "rewards/rejected": -0.34883037209510803, "step": 1236 }, { "epoch": 3.3867214236824092, "grad_norm": 5.474441051483154, "learning_rate": 8.306849315068493e-07, "log_odds_chosen": 0.927161693572998, "log_odds_ratio": -0.42795631289482117, "logits/chosen": 0.6704661846160889, "logits/rejected": 0.6689473986625671, "logps/chosen": -2.503528356552124, "logps/rejected": -3.388617515563965, "loss": 0.9689, "nll_loss": 0.9260873198509216, "rewards/accuracies": 0.875, "rewards/chosen": -0.2503528296947479, "rewards/margins": 0.08850891888141632, "rewards/rejected": -0.33886176347732544, "step": 1237 }, { "epoch": 3.3894592744695413, "grad_norm": 3.673889398574829, "learning_rate": 8.305479452054795e-07, "log_odds_chosen": 0.706443727016449, "log_odds_ratio": -0.45931145548820496, "logits/chosen": 0.6053794026374817, "logits/rejected": 0.6514294147491455, "logps/chosen": -2.991440773010254, "logps/rejected": -3.6519596576690674, "loss": 0.9778, "nll_loss": 0.9318442344665527, "rewards/accuracies": 0.75, "rewards/chosen": -0.29914408922195435, "rewards/margins": 0.06605187803506851, "rewards/rejected": -0.36519598960876465, "step": 1238 }, { "epoch": 3.3921971252566734, "grad_norm": 3.1307871341705322, "learning_rate": 8.304109589041095e-07, "log_odds_chosen": 0.6596871614456177, "log_odds_ratio": -0.49039945006370544, "logits/chosen": 0.6639996767044067, "logits/rejected": 0.674008846282959, "logps/chosen": -2.2823240756988525, "logps/rejected": -2.895846128463745, "loss": 0.9421, "nll_loss": 0.8930236101150513, "rewards/accuracies": 0.75, "rewards/chosen": -0.22823241353034973, "rewards/margins": 0.0613521933555603, "rewards/rejected": -0.28958460688591003, "step": 1239 }, { "epoch": 3.3949349760438055, "grad_norm": 3.16034197807312, "learning_rate": 8.302739726027397e-07, "log_odds_chosen": 0.7405762672424316, "log_odds_ratio": -0.426140695810318, "logits/chosen": 0.7437959313392639, "logits/rejected": 0.6865965127944946, "logps/chosen": -2.291469097137451, "logps/rejected": -2.9797987937927246, "loss": 0.9483, "nll_loss": 0.9056538343429565, "rewards/accuracies": 1.0, "rewards/chosen": -0.22914689779281616, "rewards/margins": 0.06883297860622406, "rewards/rejected": -0.2979798913002014, "step": 1240 }, { "epoch": 3.3976728268309375, "grad_norm": 4.103254795074463, "learning_rate": 8.301369863013699e-07, "log_odds_chosen": 0.6811065673828125, "log_odds_ratio": -0.4601294696331024, "logits/chosen": 0.748043417930603, "logits/rejected": 0.8305143713951111, "logps/chosen": -2.4382333755493164, "logps/rejected": -3.079183340072632, "loss": 0.7913, "nll_loss": 0.7452390193939209, "rewards/accuracies": 0.75, "rewards/chosen": -0.2438233345746994, "rewards/margins": 0.06409499794244766, "rewards/rejected": -0.30791833996772766, "step": 1241 }, { "epoch": 3.40041067761807, "grad_norm": 4.904205322265625, "learning_rate": 8.299999999999999e-07, "log_odds_chosen": 0.494849294424057, "log_odds_ratio": -0.5562218427658081, "logits/chosen": 0.7707716226577759, "logits/rejected": 0.8566265106201172, "logps/chosen": -3.1948604583740234, "logps/rejected": -3.667370080947876, "loss": 0.9798, "nll_loss": 0.9242036938667297, "rewards/accuracies": 0.75, "rewards/chosen": -0.3194860517978668, "rewards/margins": 0.04725097492337227, "rewards/rejected": -0.3667370080947876, "step": 1242 }, { "epoch": 3.403148528405202, "grad_norm": 3.9845151901245117, "learning_rate": 8.298630136986301e-07, "log_odds_chosen": -0.5507791638374329, "log_odds_ratio": -1.2255945205688477, "logits/chosen": 0.5687879323959351, "logits/rejected": 0.6044353246688843, "logps/chosen": -3.355733871459961, "logps/rejected": -2.763294219970703, "loss": 1.1074, "nll_loss": 0.9848486185073853, "rewards/accuracies": 0.5, "rewards/chosen": -0.3355734348297119, "rewards/margins": -0.059243977069854736, "rewards/rejected": -0.2763294279575348, "step": 1243 }, { "epoch": 3.405886379192334, "grad_norm": 4.2067952156066895, "learning_rate": 8.297260273972603e-07, "log_odds_chosen": 1.1438875198364258, "log_odds_ratio": -0.515210747718811, "logits/chosen": 0.4143923819065094, "logits/rejected": 0.35709935426712036, "logps/chosen": -2.1807758808135986, "logps/rejected": -3.2435548305511475, "loss": 1.0223, "nll_loss": 0.9707633256912231, "rewards/accuracies": 0.625, "rewards/chosen": -0.21807760000228882, "rewards/margins": 0.10627786815166473, "rewards/rejected": -0.32435548305511475, "step": 1244 }, { "epoch": 3.4086242299794662, "grad_norm": 3.3089160919189453, "learning_rate": 8.295890410958903e-07, "log_odds_chosen": 1.0537514686584473, "log_odds_ratio": -0.49829429388046265, "logits/chosen": 0.5450564622879028, "logits/rejected": 0.5883222222328186, "logps/chosen": -2.2457356452941895, "logps/rejected": -3.154751777648926, "loss": 0.91, "nll_loss": 0.8601272702217102, "rewards/accuracies": 0.75, "rewards/chosen": -0.22457355260849, "rewards/margins": 0.09090159833431244, "rewards/rejected": -0.315475195646286, "step": 1245 }, { "epoch": 3.4113620807665983, "grad_norm": 5.47108793258667, "learning_rate": 8.294520547945205e-07, "log_odds_chosen": 0.9370583891868591, "log_odds_ratio": -0.450946182012558, "logits/chosen": 0.6958523988723755, "logits/rejected": 0.6972953081130981, "logps/chosen": -3.202500104904175, "logps/rejected": -4.090814113616943, "loss": 0.9999, "nll_loss": 0.9548441171646118, "rewards/accuracies": 0.875, "rewards/chosen": -0.320250004529953, "rewards/margins": 0.08883139491081238, "rewards/rejected": -0.40908142924308777, "step": 1246 }, { "epoch": 3.4140999315537304, "grad_norm": 4.128076076507568, "learning_rate": 8.293150684931507e-07, "log_odds_chosen": 1.1529767513275146, "log_odds_ratio": -0.4972120523452759, "logits/chosen": 0.5306061506271362, "logits/rejected": 0.4817967414855957, "logps/chosen": -2.2637267112731934, "logps/rejected": -3.3513095378875732, "loss": 0.9873, "nll_loss": 0.9376154541969299, "rewards/accuracies": 0.75, "rewards/chosen": -0.22637268900871277, "rewards/margins": 0.10875827819108963, "rewards/rejected": -0.3351309895515442, "step": 1247 }, { "epoch": 3.4168377823408624, "grad_norm": 4.31764554977417, "learning_rate": 8.291780821917808e-07, "log_odds_chosen": 0.3385559618473053, "log_odds_ratio": -0.5947219729423523, "logits/chosen": 0.7174443006515503, "logits/rejected": 0.6936532258987427, "logps/chosen": -2.353806495666504, "logps/rejected": -2.626962184906006, "loss": 0.9404, "nll_loss": 0.8808807134628296, "rewards/accuracies": 0.625, "rewards/chosen": -0.23538067936897278, "rewards/margins": 0.027315564453601837, "rewards/rejected": -0.262696236371994, "step": 1248 }, { "epoch": 3.4195756331279945, "grad_norm": 4.41095495223999, "learning_rate": 8.290410958904109e-07, "log_odds_chosen": 1.086887240409851, "log_odds_ratio": -0.5241560339927673, "logits/chosen": 0.6693401336669922, "logits/rejected": 0.5859998464584351, "logps/chosen": -2.059399127960205, "logps/rejected": -3.0740227699279785, "loss": 1.0364, "nll_loss": 0.9840250015258789, "rewards/accuracies": 0.625, "rewards/chosen": -0.20593991875648499, "rewards/margins": 0.10146236419677734, "rewards/rejected": -0.30740225315093994, "step": 1249 }, { "epoch": 3.4223134839151266, "grad_norm": 4.660037517547607, "learning_rate": 8.289041095890411e-07, "log_odds_chosen": 0.9044642448425293, "log_odds_ratio": -0.39494141936302185, "logits/chosen": 0.5794812440872192, "logits/rejected": 0.5401406288146973, "logps/chosen": -2.2942280769348145, "logps/rejected": -3.1025772094726562, "loss": 0.8468, "nll_loss": 0.8072730302810669, "rewards/accuracies": 0.875, "rewards/chosen": -0.22942283749580383, "rewards/margins": 0.08083490282297134, "rewards/rejected": -0.3102577328681946, "step": 1250 }, { "epoch": 3.4250513347022586, "grad_norm": 3.986760377883911, "learning_rate": 8.287671232876712e-07, "log_odds_chosen": 0.41767001152038574, "log_odds_ratio": -0.5778652429580688, "logits/chosen": 0.686120331287384, "logits/rejected": 0.6444529294967651, "logps/chosen": -2.5292563438415527, "logps/rejected": -2.9035868644714355, "loss": 1.0068, "nll_loss": 0.9490564465522766, "rewards/accuracies": 0.75, "rewards/chosen": -0.2529256343841553, "rewards/margins": 0.03743303567171097, "rewards/rejected": -0.29035866260528564, "step": 1251 }, { "epoch": 3.4277891854893907, "grad_norm": 3.407118558883667, "learning_rate": 8.286301369863013e-07, "log_odds_chosen": 0.7478606700897217, "log_odds_ratio": -0.4861793518066406, "logits/chosen": 0.5684864521026611, "logits/rejected": 0.5557000637054443, "logps/chosen": -2.377578020095825, "logps/rejected": -3.037243604660034, "loss": 0.9053, "nll_loss": 0.8566672801971436, "rewards/accuracies": 0.875, "rewards/chosen": -0.23775780200958252, "rewards/margins": 0.06596657633781433, "rewards/rejected": -0.30372437834739685, "step": 1252 }, { "epoch": 3.430527036276523, "grad_norm": 2.905797004699707, "learning_rate": 8.284931506849314e-07, "log_odds_chosen": 0.7168738842010498, "log_odds_ratio": -0.4730181097984314, "logits/chosen": 0.6076016426086426, "logits/rejected": 0.583530843257904, "logps/chosen": -2.289836883544922, "logps/rejected": -2.9806599617004395, "loss": 0.8996, "nll_loss": 0.852263331413269, "rewards/accuracies": 0.875, "rewards/chosen": -0.22898370027542114, "rewards/margins": 0.06908230483531952, "rewards/rejected": -0.29806602001190186, "step": 1253 }, { "epoch": 3.433264887063655, "grad_norm": 3.280907392501831, "learning_rate": 8.283561643835616e-07, "log_odds_chosen": 0.6465619802474976, "log_odds_ratio": -0.5049428343772888, "logits/chosen": 0.5977112650871277, "logits/rejected": 0.4926482141017914, "logps/chosen": -1.9735379219055176, "logps/rejected": -2.541825294494629, "loss": 1.0379, "nll_loss": 0.9874235987663269, "rewards/accuracies": 0.75, "rewards/chosen": -0.1973538100719452, "rewards/margins": 0.056828729808330536, "rewards/rejected": -0.25418251752853394, "step": 1254 }, { "epoch": 3.436002737850787, "grad_norm": 4.116805553436279, "learning_rate": 8.282191780821918e-07, "log_odds_chosen": -0.17325715720653534, "log_odds_ratio": -0.87417072057724, "logits/chosen": 0.48880714178085327, "logits/rejected": 0.41004878282546997, "logps/chosen": -2.1747686862945557, "logps/rejected": -1.9928343296051025, "loss": 1.0546, "nll_loss": 0.9671581983566284, "rewards/accuracies": 0.5, "rewards/chosen": -0.21747688949108124, "rewards/margins": -0.018193457275629044, "rewards/rejected": -0.1992834210395813, "step": 1255 }, { "epoch": 3.4387405886379194, "grad_norm": 2.93910813331604, "learning_rate": 8.280821917808218e-07, "log_odds_chosen": 1.5439751148223877, "log_odds_ratio": -0.3785213530063629, "logits/chosen": 0.7705956697463989, "logits/rejected": 0.8321250081062317, "logps/chosen": -2.6072628498077393, "logps/rejected": -4.083583831787109, "loss": 0.8684, "nll_loss": 0.8305172920227051, "rewards/accuracies": 0.875, "rewards/chosen": -0.26072627305984497, "rewards/margins": 0.14763206243515015, "rewards/rejected": -0.4083583950996399, "step": 1256 }, { "epoch": 3.4414784394250515, "grad_norm": 3.5040555000305176, "learning_rate": 8.27945205479452e-07, "log_odds_chosen": 1.1048061847686768, "log_odds_ratio": -0.39025241136550903, "logits/chosen": 0.6676453351974487, "logits/rejected": 0.6592438817024231, "logps/chosen": -2.5013818740844727, "logps/rejected": -3.516967296600342, "loss": 1.0157, "nll_loss": 0.9766319394111633, "rewards/accuracies": 0.875, "rewards/chosen": -0.25013816356658936, "rewards/margins": 0.10155857354402542, "rewards/rejected": -0.35169675946235657, "step": 1257 }, { "epoch": 3.4442162902121836, "grad_norm": 3.1020264625549316, "learning_rate": 8.278082191780822e-07, "log_odds_chosen": 0.6364712119102478, "log_odds_ratio": -0.6030253767967224, "logits/chosen": 0.6054370403289795, "logits/rejected": 0.6405017375946045, "logps/chosen": -2.1711180210113525, "logps/rejected": -2.7702994346618652, "loss": 0.9921, "nll_loss": 0.9317710399627686, "rewards/accuracies": 0.75, "rewards/chosen": -0.21711181104183197, "rewards/margins": 0.059918150305747986, "rewards/rejected": -0.27702996134757996, "step": 1258 }, { "epoch": 3.4469541409993156, "grad_norm": 3.494417190551758, "learning_rate": 8.276712328767122e-07, "log_odds_chosen": 0.45127981901168823, "log_odds_ratio": -0.6377688646316528, "logits/chosen": 0.7518371343612671, "logits/rejected": 0.7540256977081299, "logps/chosen": -3.0064697265625, "logps/rejected": -3.3982481956481934, "loss": 0.9647, "nll_loss": 0.9009263515472412, "rewards/accuracies": 0.75, "rewards/chosen": -0.30064693093299866, "rewards/margins": 0.03917785733938217, "rewards/rejected": -0.3398247957229614, "step": 1259 }, { "epoch": 3.4496919917864477, "grad_norm": 3.503199815750122, "learning_rate": 8.275342465753424e-07, "log_odds_chosen": 0.4321105182170868, "log_odds_ratio": -0.6082203388214111, "logits/chosen": 0.6752294898033142, "logits/rejected": 0.7280045747756958, "logps/chosen": -2.8379342555999756, "logps/rejected": -3.2442941665649414, "loss": 0.9329, "nll_loss": 0.8720840215682983, "rewards/accuracies": 0.75, "rewards/chosen": -0.2837934195995331, "rewards/margins": 0.04063602536916733, "rewards/rejected": -0.324429452419281, "step": 1260 }, { "epoch": 3.4524298425735798, "grad_norm": 3.5399017333984375, "learning_rate": 8.273972602739726e-07, "log_odds_chosen": 0.9331747889518738, "log_odds_ratio": -0.3656681180000305, "logits/chosen": 0.6876106858253479, "logits/rejected": 0.7058961391448975, "logps/chosen": -2.198791027069092, "logps/rejected": -3.06177020072937, "loss": 0.8149, "nll_loss": 0.7783719301223755, "rewards/accuracies": 0.875, "rewards/chosen": -0.2198791354894638, "rewards/margins": 0.0862978845834732, "rewards/rejected": -0.306177020072937, "step": 1261 }, { "epoch": 3.455167693360712, "grad_norm": 3.6687867641448975, "learning_rate": 8.272602739726027e-07, "log_odds_chosen": 1.244520902633667, "log_odds_ratio": -0.3314476013183594, "logits/chosen": 0.45718568563461304, "logits/rejected": 0.44203487038612366, "logps/chosen": -2.080578565597534, "logps/rejected": -3.2161338329315186, "loss": 0.97, "nll_loss": 0.9368162155151367, "rewards/accuracies": 0.875, "rewards/chosen": -0.20805785059928894, "rewards/margins": 0.11355551332235336, "rewards/rejected": -0.3216133713722229, "step": 1262 }, { "epoch": 3.457905544147844, "grad_norm": 4.279610633850098, "learning_rate": 8.271232876712328e-07, "log_odds_chosen": 0.4862634837627411, "log_odds_ratio": -0.5436676740646362, "logits/chosen": 0.5502418279647827, "logits/rejected": 0.5922784805297852, "logps/chosen": -2.5132155418395996, "logps/rejected": -2.960116386413574, "loss": 0.9925, "nll_loss": 0.938114583492279, "rewards/accuracies": 0.75, "rewards/chosen": -0.25132158398628235, "rewards/margins": 0.044690072536468506, "rewards/rejected": -0.29601162672042847, "step": 1263 }, { "epoch": 3.460643394934976, "grad_norm": 4.245423316955566, "learning_rate": 8.26986301369863e-07, "log_odds_chosen": -0.07234300673007965, "log_odds_ratio": -0.7931567430496216, "logits/chosen": 0.5115898847579956, "logits/rejected": 0.4891294538974762, "logps/chosen": -2.646656036376953, "logps/rejected": -2.575556755065918, "loss": 1.0771, "nll_loss": 0.9978251457214355, "rewards/accuracies": 0.5, "rewards/chosen": -0.2646656036376953, "rewards/margins": -0.007109895348548889, "rewards/rejected": -0.25755569338798523, "step": 1264 }, { "epoch": 3.463381245722108, "grad_norm": 3.4109835624694824, "learning_rate": 8.268493150684931e-07, "log_odds_chosen": 0.8305820226669312, "log_odds_ratio": -0.40556472539901733, "logits/chosen": 0.7138262987136841, "logits/rejected": 0.747331976890564, "logps/chosen": -2.0332629680633545, "logps/rejected": -2.772399663925171, "loss": 1.019, "nll_loss": 0.9784538745880127, "rewards/accuracies": 0.875, "rewards/chosen": -0.2033262997865677, "rewards/margins": 0.07391366362571716, "rewards/rejected": -0.27723997831344604, "step": 1265 }, { "epoch": 3.46611909650924, "grad_norm": 4.563020706176758, "learning_rate": 8.267123287671232e-07, "log_odds_chosen": 0.4117274880409241, "log_odds_ratio": -0.6903125047683716, "logits/chosen": 0.6904593706130981, "logits/rejected": 0.6350846290588379, "logps/chosen": -2.460491418838501, "logps/rejected": -2.8194236755371094, "loss": 1.0673, "nll_loss": 0.9982364177703857, "rewards/accuracies": 0.75, "rewards/chosen": -0.24604913592338562, "rewards/margins": 0.03589322417974472, "rewards/rejected": -0.28194236755371094, "step": 1266 }, { "epoch": 3.468856947296372, "grad_norm": 3.785447597503662, "learning_rate": 8.265753424657533e-07, "log_odds_chosen": 0.6263097524642944, "log_odds_ratio": -0.5269737243652344, "logits/chosen": 0.6148589849472046, "logits/rejected": 0.6684092283248901, "logps/chosen": -2.846479892730713, "logps/rejected": -3.4448800086975098, "loss": 1.0218, "nll_loss": 0.969068706035614, "rewards/accuracies": 0.5, "rewards/chosen": -0.28464800119400024, "rewards/margins": 0.059840016067028046, "rewards/rejected": -0.3444879949092865, "step": 1267 }, { "epoch": 3.4715947980835047, "grad_norm": 6.770805358886719, "learning_rate": 8.264383561643835e-07, "log_odds_chosen": 1.0994033813476562, "log_odds_ratio": -0.42289361357688904, "logits/chosen": 0.7534750699996948, "logits/rejected": 0.7894294857978821, "logps/chosen": -2.6702404022216797, "logps/rejected": -3.7306525707244873, "loss": 0.8986, "nll_loss": 0.8562619686126709, "rewards/accuracies": 0.625, "rewards/chosen": -0.2670240104198456, "rewards/margins": 0.10604124516248703, "rewards/rejected": -0.3730652332305908, "step": 1268 }, { "epoch": 3.4743326488706368, "grad_norm": 2.918755054473877, "learning_rate": 8.263013698630137e-07, "log_odds_chosen": 1.6785333156585693, "log_odds_ratio": -0.2908470928668976, "logits/chosen": 0.4742478132247925, "logits/rejected": 0.48380380868911743, "logps/chosen": -2.096691846847534, "logps/rejected": -3.6711416244506836, "loss": 0.9563, "nll_loss": 0.9271688461303711, "rewards/accuracies": 0.875, "rewards/chosen": -0.20966918766498566, "rewards/margins": 0.15744496881961823, "rewards/rejected": -0.36711418628692627, "step": 1269 }, { "epoch": 3.477070499657769, "grad_norm": 3.2856411933898926, "learning_rate": 8.261643835616437e-07, "log_odds_chosen": 0.782578706741333, "log_odds_ratio": -0.41707417368888855, "logits/chosen": 0.48702430725097656, "logits/rejected": 0.4453278183937073, "logps/chosen": -2.163465738296509, "logps/rejected": -2.8792481422424316, "loss": 1.041, "nll_loss": 0.9993183016777039, "rewards/accuracies": 1.0, "rewards/chosen": -0.21634656190872192, "rewards/margins": 0.0715782642364502, "rewards/rejected": -0.2879248261451721, "step": 1270 }, { "epoch": 3.479808350444901, "grad_norm": 3.4425787925720215, "learning_rate": 8.260273972602739e-07, "log_odds_chosen": 0.6971399188041687, "log_odds_ratio": -0.4277856647968292, "logits/chosen": 0.5844355225563049, "logits/rejected": 0.6205289363861084, "logps/chosen": -2.450209379196167, "logps/rejected": -3.0783238410949707, "loss": 0.8915, "nll_loss": 0.8487180471420288, "rewards/accuracies": 0.875, "rewards/chosen": -0.24502095580101013, "rewards/margins": 0.0628114640712738, "rewards/rejected": -0.30783239006996155, "step": 1271 }, { "epoch": 3.482546201232033, "grad_norm": 5.0058207511901855, "learning_rate": 8.258904109589041e-07, "log_odds_chosen": 0.48996561765670776, "log_odds_ratio": -0.5051102638244629, "logits/chosen": 0.570440948009491, "logits/rejected": 0.5187564492225647, "logps/chosen": -2.49464750289917, "logps/rejected": -2.9537980556488037, "loss": 1.0162, "nll_loss": 0.9656546115875244, "rewards/accuracies": 0.875, "rewards/chosen": -0.249464750289917, "rewards/margins": 0.04591505229473114, "rewards/rejected": -0.2953798174858093, "step": 1272 }, { "epoch": 3.485284052019165, "grad_norm": 4.11045503616333, "learning_rate": 8.257534246575341e-07, "log_odds_chosen": 0.026136890053749084, "log_odds_ratio": -0.757068395614624, "logits/chosen": 0.8645973205566406, "logits/rejected": 0.8559261560440063, "logps/chosen": -2.767068386077881, "logps/rejected": -2.8310163021087646, "loss": 1.0084, "nll_loss": 0.9326961040496826, "rewards/accuracies": 0.5, "rewards/chosen": -0.27670687437057495, "rewards/margins": 0.006394794210791588, "rewards/rejected": -0.2831016182899475, "step": 1273 }, { "epoch": 3.488021902806297, "grad_norm": 3.3948347568511963, "learning_rate": 8.256164383561643e-07, "log_odds_chosen": 0.6940100193023682, "log_odds_ratio": -0.514113187789917, "logits/chosen": 0.8468614220619202, "logits/rejected": 0.8991842269897461, "logps/chosen": -2.5055432319641113, "logps/rejected": -3.115605354309082, "loss": 0.9103, "nll_loss": 0.8589301109313965, "rewards/accuracies": 0.75, "rewards/chosen": -0.25055432319641113, "rewards/margins": 0.06100623309612274, "rewards/rejected": -0.31156057119369507, "step": 1274 }, { "epoch": 3.490759753593429, "grad_norm": 3.8053839206695557, "learning_rate": 8.254794520547945e-07, "log_odds_chosen": 0.09885256737470627, "log_odds_ratio": -0.6630311012268066, "logits/chosen": 0.5398303270339966, "logits/rejected": 0.5496572256088257, "logps/chosen": -2.164888381958008, "logps/rejected": -2.239875555038452, "loss": 1.0523, "nll_loss": 0.9860134124755859, "rewards/accuracies": 0.5, "rewards/chosen": -0.21648883819580078, "rewards/margins": 0.0074987271800637245, "rewards/rejected": -0.22398756444454193, "step": 1275 }, { "epoch": 3.4934976043805612, "grad_norm": 4.130179405212402, "learning_rate": 8.253424657534246e-07, "log_odds_chosen": 0.7313936948776245, "log_odds_ratio": -0.5033732652664185, "logits/chosen": 0.6096269488334656, "logits/rejected": 0.5350373387336731, "logps/chosen": -2.6697006225585938, "logps/rejected": -3.348724842071533, "loss": 1.0016, "nll_loss": 0.9512546062469482, "rewards/accuracies": 0.75, "rewards/chosen": -0.26697006821632385, "rewards/margins": 0.06790241599082947, "rewards/rejected": -0.33487245440483093, "step": 1276 }, { "epoch": 3.4962354551676933, "grad_norm": 5.012160778045654, "learning_rate": 8.252054794520547e-07, "log_odds_chosen": 0.8347576856613159, "log_odds_ratio": -0.4291173815727234, "logits/chosen": 0.545703649520874, "logits/rejected": 0.5719910860061646, "logps/chosen": -2.663351058959961, "logps/rejected": -3.4500174522399902, "loss": 0.9701, "nll_loss": 0.9271754622459412, "rewards/accuracies": 1.0, "rewards/chosen": -0.266335129737854, "rewards/margins": 0.07866665720939636, "rewards/rejected": -0.345001757144928, "step": 1277 }, { "epoch": 3.4989733059548254, "grad_norm": 3.89709734916687, "learning_rate": 8.250684931506849e-07, "log_odds_chosen": 0.7266290187835693, "log_odds_ratio": -0.45683252811431885, "logits/chosen": 0.6833093762397766, "logits/rejected": 0.7052648663520813, "logps/chosen": -2.311957359313965, "logps/rejected": -2.965028762817383, "loss": 0.9157, "nll_loss": 0.8700528740882874, "rewards/accuracies": 0.875, "rewards/chosen": -0.23119574785232544, "rewards/margins": 0.06530715525150299, "rewards/rejected": -0.29650288820266724, "step": 1278 }, { "epoch": 3.5017111567419574, "grad_norm": 3.6533727645874023, "learning_rate": 8.24931506849315e-07, "log_odds_chosen": 0.1762131005525589, "log_odds_ratio": -0.8193487524986267, "logits/chosen": 0.6046522855758667, "logits/rejected": 0.6036309003829956, "logps/chosen": -2.3459086418151855, "logps/rejected": -2.54591703414917, "loss": 1.0854, "nll_loss": 1.0034416913986206, "rewards/accuracies": 0.5, "rewards/chosen": -0.23459087312221527, "rewards/margins": 0.020000841468572617, "rewards/rejected": -0.254591703414917, "step": 1279 }, { "epoch": 3.5044490075290895, "grad_norm": 4.225555896759033, "learning_rate": 8.247945205479451e-07, "log_odds_chosen": 0.3055926263332367, "log_odds_ratio": -0.5838691592216492, "logits/chosen": 0.4081953167915344, "logits/rejected": 0.3461018204689026, "logps/chosen": -2.356557607650757, "logps/rejected": -2.6142358779907227, "loss": 1.0505, "nll_loss": 0.9921250343322754, "rewards/accuracies": 0.75, "rewards/chosen": -0.2356557548046112, "rewards/margins": 0.02576783299446106, "rewards/rejected": -0.26142358779907227, "step": 1280 }, { "epoch": 3.5071868583162216, "grad_norm": 3.4799768924713135, "learning_rate": 8.246575342465753e-07, "log_odds_chosen": 1.1228752136230469, "log_odds_ratio": -0.48489633202552795, "logits/chosen": 0.7083851099014282, "logits/rejected": 0.6943188905715942, "logps/chosen": -2.6584722995758057, "logps/rejected": -3.7673282623291016, "loss": 1.0283, "nll_loss": 0.9798128604888916, "rewards/accuracies": 0.75, "rewards/chosen": -0.26584723591804504, "rewards/margins": 0.1108856052160263, "rewards/rejected": -0.37673282623291016, "step": 1281 }, { "epoch": 3.5099247091033536, "grad_norm": 3.4043567180633545, "learning_rate": 8.245205479452054e-07, "log_odds_chosen": 0.7175499200820923, "log_odds_ratio": -0.43186309933662415, "logits/chosen": 0.48353907465934753, "logits/rejected": 0.43692973256111145, "logps/chosen": -1.97560453414917, "logps/rejected": -2.6158084869384766, "loss": 1.0078, "nll_loss": 0.964611291885376, "rewards/accuracies": 1.0, "rewards/chosen": -0.19756045937538147, "rewards/margins": 0.06402041018009186, "rewards/rejected": -0.26158085465431213, "step": 1282 }, { "epoch": 3.5126625598904857, "grad_norm": 4.496090888977051, "learning_rate": 8.243835616438356e-07, "log_odds_chosen": 0.48334574699401855, "log_odds_ratio": -0.6889634132385254, "logits/chosen": 0.7601168751716614, "logits/rejected": 0.8565002679824829, "logps/chosen": -3.571589708328247, "logps/rejected": -4.01831579208374, "loss": 0.9188, "nll_loss": 0.849931001663208, "rewards/accuracies": 0.75, "rewards/chosen": -0.35715898871421814, "rewards/margins": 0.04467261955142021, "rewards/rejected": -0.40183159708976746, "step": 1283 }, { "epoch": 3.515400410677618, "grad_norm": 4.090616703033447, "learning_rate": 8.242465753424656e-07, "log_odds_chosen": 0.5710161924362183, "log_odds_ratio": -0.6557639837265015, "logits/chosen": 0.6534960865974426, "logits/rejected": 0.7478735446929932, "logps/chosen": -3.153658866882324, "logps/rejected": -3.678150177001953, "loss": 1.0278, "nll_loss": 0.9622206687927246, "rewards/accuracies": 0.5, "rewards/chosen": -0.31536591053009033, "rewards/margins": 0.05244908481836319, "rewards/rejected": -0.3678150177001953, "step": 1284 }, { "epoch": 3.5181382614647503, "grad_norm": 3.8175487518310547, "learning_rate": 8.241095890410958e-07, "log_odds_chosen": 0.9433760643005371, "log_odds_ratio": -0.37250423431396484, "logits/chosen": 0.7047387957572937, "logits/rejected": 0.6994429230690002, "logps/chosen": -1.9884487390518188, "logps/rejected": -2.827906608581543, "loss": 0.8959, "nll_loss": 0.8586100339889526, "rewards/accuracies": 0.875, "rewards/chosen": -0.19884487986564636, "rewards/margins": 0.08394579589366913, "rewards/rejected": -0.2827906608581543, "step": 1285 }, { "epoch": 3.5208761122518824, "grad_norm": 3.932224988937378, "learning_rate": 8.23972602739726e-07, "log_odds_chosen": 0.8500500917434692, "log_odds_ratio": -0.5603893995285034, "logits/chosen": 0.750269889831543, "logits/rejected": 0.8422468900680542, "logps/chosen": -2.6391093730926514, "logps/rejected": -3.396348476409912, "loss": 0.8231, "nll_loss": 0.7670413255691528, "rewards/accuracies": 0.5, "rewards/chosen": -0.2639109492301941, "rewards/margins": 0.07572389394044876, "rewards/rejected": -0.33963483572006226, "step": 1286 }, { "epoch": 3.5236139630390144, "grad_norm": 3.4504268169403076, "learning_rate": 8.23835616438356e-07, "log_odds_chosen": 0.7796481251716614, "log_odds_ratio": -0.4697471857070923, "logits/chosen": 0.6498943567276001, "logits/rejected": 0.5427712202072144, "logps/chosen": -1.493283987045288, "logps/rejected": -2.1592864990234375, "loss": 1.0936, "nll_loss": 1.0466238260269165, "rewards/accuracies": 0.75, "rewards/chosen": -0.14932841062545776, "rewards/margins": 0.0666002407670021, "rewards/rejected": -0.21592864394187927, "step": 1287 }, { "epoch": 3.5263518138261465, "grad_norm": 3.9936649799346924, "learning_rate": 8.236986301369862e-07, "log_odds_chosen": 0.12541301548480988, "log_odds_ratio": -0.7362905144691467, "logits/chosen": 0.5427252054214478, "logits/rejected": 0.4994180202484131, "logps/chosen": -3.294032573699951, "logps/rejected": -3.3972296714782715, "loss": 1.0802, "nll_loss": 1.0065561532974243, "rewards/accuracies": 0.625, "rewards/chosen": -0.32940325140953064, "rewards/margins": 0.010319730266928673, "rewards/rejected": -0.33972299098968506, "step": 1288 }, { "epoch": 3.5290896646132786, "grad_norm": 4.567198753356934, "learning_rate": 8.235616438356165e-07, "log_odds_chosen": 0.3384245038032532, "log_odds_ratio": -0.707529604434967, "logits/chosen": 0.6513378024101257, "logits/rejected": 0.7042267918586731, "logps/chosen": -2.726301670074463, "logps/rejected": -3.051880359649658, "loss": 0.9285, "nll_loss": 0.8577573299407959, "rewards/accuracies": 0.5, "rewards/chosen": -0.2726301848888397, "rewards/margins": 0.03255785256624222, "rewards/rejected": -0.30518803000450134, "step": 1289 }, { "epoch": 3.5318275154004106, "grad_norm": 3.827019691467285, "learning_rate": 8.234246575342466e-07, "log_odds_chosen": 0.19887374341487885, "log_odds_ratio": -0.7058596611022949, "logits/chosen": 0.8468726873397827, "logits/rejected": 0.7834133505821228, "logps/chosen": -2.293914556503296, "logps/rejected": -2.415269136428833, "loss": 1.0324, "nll_loss": 0.9618603587150574, "rewards/accuracies": 0.75, "rewards/chosen": -0.2293914556503296, "rewards/margins": 0.0121354591101408, "rewards/rejected": -0.24152691662311554, "step": 1290 }, { "epoch": 3.5345653661875427, "grad_norm": 3.4305315017700195, "learning_rate": 8.232876712328767e-07, "log_odds_chosen": 0.8285069465637207, "log_odds_ratio": -0.4591794013977051, "logits/chosen": 0.7528200149536133, "logits/rejected": 0.5907526612281799, "logps/chosen": -2.7537920475006104, "logps/rejected": -3.5242671966552734, "loss": 1.0045, "nll_loss": 0.9585789442062378, "rewards/accuracies": 0.625, "rewards/chosen": -0.2753792405128479, "rewards/margins": 0.07704751193523407, "rewards/rejected": -0.3524267077445984, "step": 1291 }, { "epoch": 3.5373032169746748, "grad_norm": 3.9744179248809814, "learning_rate": 8.231506849315069e-07, "log_odds_chosen": 0.4050118923187256, "log_odds_ratio": -0.6103270649909973, "logits/chosen": 0.5403541922569275, "logits/rejected": 0.5016980767250061, "logps/chosen": -2.1993191242218018, "logps/rejected": -2.551623582839966, "loss": 1.0367, "nll_loss": 0.9756268858909607, "rewards/accuracies": 0.625, "rewards/chosen": -0.2199319303035736, "rewards/margins": 0.03523043543100357, "rewards/rejected": -0.2551623582839966, "step": 1292 }, { "epoch": 3.540041067761807, "grad_norm": 2.861215114593506, "learning_rate": 8.23013698630137e-07, "log_odds_chosen": 1.0255839824676514, "log_odds_ratio": -0.41622376441955566, "logits/chosen": 0.8335661888122559, "logits/rejected": 0.8046040534973145, "logps/chosen": -2.56364107131958, "logps/rejected": -3.5097296237945557, "loss": 0.8971, "nll_loss": 0.8554321527481079, "rewards/accuracies": 0.875, "rewards/chosen": -0.256364107131958, "rewards/margins": 0.0946088507771492, "rewards/rejected": -0.3509729504585266, "step": 1293 }, { "epoch": 3.5427789185489393, "grad_norm": 3.225048065185547, "learning_rate": 8.228767123287671e-07, "log_odds_chosen": 0.20414817333221436, "log_odds_ratio": -0.6562964916229248, "logits/chosen": 0.6470953822135925, "logits/rejected": 0.6577534079551697, "logps/chosen": -2.0635738372802734, "logps/rejected": -2.255235195159912, "loss": 1.0883, "nll_loss": 1.0227138996124268, "rewards/accuracies": 0.625, "rewards/chosen": -0.20635738968849182, "rewards/margins": 0.019166111946105957, "rewards/rejected": -0.22552350163459778, "step": 1294 }, { "epoch": 3.5455167693360714, "grad_norm": 4.193704605102539, "learning_rate": 8.227397260273973e-07, "log_odds_chosen": 1.4462591409683228, "log_odds_ratio": -0.39256054162979126, "logits/chosen": 0.516943097114563, "logits/rejected": 0.5621824860572815, "logps/chosen": -2.4532630443573, "logps/rejected": -3.809957504272461, "loss": 0.9577, "nll_loss": 0.9184845089912415, "rewards/accuracies": 0.875, "rewards/chosen": -0.24532631039619446, "rewards/margins": 0.13566944003105164, "rewards/rejected": -0.3809957504272461, "step": 1295 }, { "epoch": 3.5482546201232035, "grad_norm": 4.495364665985107, "learning_rate": 8.226027397260274e-07, "log_odds_chosen": 0.8008354306221008, "log_odds_ratio": -0.5149287581443787, "logits/chosen": 0.8260698318481445, "logits/rejected": 0.8410336375236511, "logps/chosen": -2.9358789920806885, "logps/rejected": -3.705543279647827, "loss": 1.0823, "nll_loss": 1.0308082103729248, "rewards/accuracies": 0.625, "rewards/chosen": -0.29358789324760437, "rewards/margins": 0.07696643471717834, "rewards/rejected": -0.3705543279647827, "step": 1296 }, { "epoch": 3.5509924709103355, "grad_norm": 5.111062049865723, "learning_rate": 8.224657534246576e-07, "log_odds_chosen": 0.13858406245708466, "log_odds_ratio": -0.7189644575119019, "logits/chosen": 0.8174499869346619, "logits/rejected": 0.7868099808692932, "logps/chosen": -2.5558860301971436, "logps/rejected": -2.651561737060547, "loss": 0.9097, "nll_loss": 0.8378499150276184, "rewards/accuracies": 0.75, "rewards/chosen": -0.2555885910987854, "rewards/margins": 0.009567582979798317, "rewards/rejected": -0.26515617966651917, "step": 1297 }, { "epoch": 3.5537303216974676, "grad_norm": 4.462648391723633, "learning_rate": 8.223287671232876e-07, "log_odds_chosen": 0.7531701922416687, "log_odds_ratio": -0.5100359320640564, "logits/chosen": 0.6959793567657471, "logits/rejected": 0.6194853186607361, "logps/chosen": -1.7100657224655151, "logps/rejected": -2.4115824699401855, "loss": 1.066, "nll_loss": 1.014985203742981, "rewards/accuracies": 0.75, "rewards/chosen": -0.17100657522678375, "rewards/margins": 0.0701516717672348, "rewards/rejected": -0.24115826189517975, "step": 1298 }, { "epoch": 3.5564681724845997, "grad_norm": 3.110408306121826, "learning_rate": 8.221917808219178e-07, "log_odds_chosen": 0.6717050075531006, "log_odds_ratio": -0.4387838840484619, "logits/chosen": 0.6636744141578674, "logits/rejected": 0.6221904754638672, "logps/chosen": -1.970229148864746, "logps/rejected": -2.5558691024780273, "loss": 0.9967, "nll_loss": 0.9528592824935913, "rewards/accuracies": 0.875, "rewards/chosen": -0.1970229148864746, "rewards/margins": 0.05856399983167648, "rewards/rejected": -0.2555869221687317, "step": 1299 }, { "epoch": 3.5592060232717317, "grad_norm": 3.7319164276123047, "learning_rate": 8.22054794520548e-07, "log_odds_chosen": 0.9188190698623657, "log_odds_ratio": -0.37606120109558105, "logits/chosen": 1.0001463890075684, "logits/rejected": 0.9763748645782471, "logps/chosen": -2.0961623191833496, "logps/rejected": -2.9128191471099854, "loss": 0.9681, "nll_loss": 0.9304698705673218, "rewards/accuracies": 0.875, "rewards/chosen": -0.20961622893810272, "rewards/margins": 0.08166567236185074, "rewards/rejected": -0.29128190875053406, "step": 1300 }, { "epoch": 3.561943874058864, "grad_norm": 4.554914474487305, "learning_rate": 8.21917808219178e-07, "log_odds_chosen": 0.8810086846351624, "log_odds_ratio": -0.5711371898651123, "logits/chosen": 0.8616582155227661, "logits/rejected": 0.8463441133499146, "logps/chosen": -2.792574644088745, "logps/rejected": -3.5900630950927734, "loss": 0.9039, "nll_loss": 0.8468256592750549, "rewards/accuracies": 0.625, "rewards/chosen": -0.27925747632980347, "rewards/margins": 0.07974881678819656, "rewards/rejected": -0.35900628566741943, "step": 1301 }, { "epoch": 3.564681724845996, "grad_norm": 3.5040252208709717, "learning_rate": 8.217808219178082e-07, "log_odds_chosen": 0.2711482644081116, "log_odds_ratio": -0.601468026638031, "logits/chosen": 0.6226244568824768, "logits/rejected": 0.6553098559379578, "logps/chosen": -2.477909564971924, "logps/rejected": -2.723240613937378, "loss": 1.0239, "nll_loss": 0.9637677669525146, "rewards/accuracies": 0.75, "rewards/chosen": -0.24779094755649567, "rewards/margins": 0.02453312836587429, "rewards/rejected": -0.2723240852355957, "step": 1302 }, { "epoch": 3.567419575633128, "grad_norm": 3.6691198348999023, "learning_rate": 8.216438356164384e-07, "log_odds_chosen": 0.19324424862861633, "log_odds_ratio": -0.8472708463668823, "logits/chosen": 0.6662058234214783, "logits/rejected": 0.6835699081420898, "logps/chosen": -2.805530548095703, "logps/rejected": -2.9745516777038574, "loss": 1.0303, "nll_loss": 0.94557124376297, "rewards/accuracies": 0.5, "rewards/chosen": -0.28055304288864136, "rewards/margins": 0.016902118921279907, "rewards/rejected": -0.29745519161224365, "step": 1303 }, { "epoch": 3.57015742642026, "grad_norm": 5.521420955657959, "learning_rate": 8.215068493150685e-07, "log_odds_chosen": 1.0113462209701538, "log_odds_ratio": -0.4862672686576843, "logits/chosen": 0.551056444644928, "logits/rejected": 0.518136739730835, "logps/chosen": -2.4680051803588867, "logps/rejected": -3.4066033363342285, "loss": 0.9545, "nll_loss": 0.9058670997619629, "rewards/accuracies": 0.75, "rewards/chosen": -0.2468005269765854, "rewards/margins": 0.09385982155799866, "rewards/rejected": -0.34066033363342285, "step": 1304 }, { "epoch": 3.572895277207392, "grad_norm": 3.2311880588531494, "learning_rate": 8.213698630136986e-07, "log_odds_chosen": 0.1479535549879074, "log_odds_ratio": -0.661974310874939, "logits/chosen": 0.7083045840263367, "logits/rejected": 0.8278903365135193, "logps/chosen": -2.6918680667877197, "logps/rejected": -2.806157350540161, "loss": 1.0963, "nll_loss": 1.030093789100647, "rewards/accuracies": 0.75, "rewards/chosen": -0.2691868245601654, "rewards/margins": 0.011428935453295708, "rewards/rejected": -0.28061574697494507, "step": 1305 }, { "epoch": 3.575633127994524, "grad_norm": 3.4656620025634766, "learning_rate": 8.212328767123288e-07, "log_odds_chosen": 0.43830427527427673, "log_odds_ratio": -0.6169594526290894, "logits/chosen": 0.5557557344436646, "logits/rejected": 0.5550580620765686, "logps/chosen": -2.4997599124908447, "logps/rejected": -2.888115406036377, "loss": 1.0129, "nll_loss": 0.9512195587158203, "rewards/accuracies": 0.5, "rewards/chosen": -0.2499760091304779, "rewards/margins": 0.03883552551269531, "rewards/rejected": -0.28881150484085083, "step": 1306 }, { "epoch": 3.578370978781656, "grad_norm": 3.5657026767730713, "learning_rate": 8.210958904109589e-07, "log_odds_chosen": -0.06788080930709839, "log_odds_ratio": -0.7832943201065063, "logits/chosen": 0.6700483560562134, "logits/rejected": 0.6597298383712769, "logps/chosen": -3.0716233253479004, "logps/rejected": -3.0028915405273438, "loss": 0.9609, "nll_loss": 0.8825544714927673, "rewards/accuracies": 0.5, "rewards/chosen": -0.307162344455719, "rewards/margins": -0.0068731773644685745, "rewards/rejected": -0.3002891540527344, "step": 1307 }, { "epoch": 3.5811088295687883, "grad_norm": 2.9276034832000732, "learning_rate": 8.20958904109589e-07, "log_odds_chosen": 0.8894855976104736, "log_odds_ratio": -0.4119408130645752, "logits/chosen": 0.5712522268295288, "logits/rejected": 0.5488261580467224, "logps/chosen": -1.7722376585006714, "logps/rejected": -2.5326333045959473, "loss": 0.9901, "nll_loss": 0.9489071369171143, "rewards/accuracies": 0.875, "rewards/chosen": -0.17722377181053162, "rewards/margins": 0.07603955268859863, "rewards/rejected": -0.25326332449913025, "step": 1308 }, { "epoch": 3.5838466803559204, "grad_norm": 3.740941047668457, "learning_rate": 8.208219178082192e-07, "log_odds_chosen": 0.7382615804672241, "log_odds_ratio": -0.46790969371795654, "logits/chosen": 0.6483280062675476, "logits/rejected": 0.6835891008377075, "logps/chosen": -2.8530335426330566, "logps/rejected": -3.5631866455078125, "loss": 0.9584, "nll_loss": 0.9115764498710632, "rewards/accuracies": 0.625, "rewards/chosen": -0.28530335426330566, "rewards/margins": 0.0710153579711914, "rewards/rejected": -0.3563186824321747, "step": 1309 }, { "epoch": 3.586584531143053, "grad_norm": 3.3658437728881836, "learning_rate": 8.206849315068493e-07, "log_odds_chosen": 0.1488807201385498, "log_odds_ratio": -0.7951323986053467, "logits/chosen": 0.5388407111167908, "logits/rejected": 0.5982544422149658, "logps/chosen": -2.5494697093963623, "logps/rejected": -2.729520559310913, "loss": 0.9907, "nll_loss": 0.9111968278884888, "rewards/accuracies": 0.375, "rewards/chosen": -0.2549469769001007, "rewards/margins": 0.01800510287284851, "rewards/rejected": -0.2729520797729492, "step": 1310 }, { "epoch": 3.589322381930185, "grad_norm": 3.6050240993499756, "learning_rate": 8.205479452054795e-07, "log_odds_chosen": 0.027347490191459656, "log_odds_ratio": -0.7691622376441956, "logits/chosen": 0.6583567261695862, "logits/rejected": 0.6066868305206299, "logps/chosen": -2.576063394546509, "logps/rejected": -2.5661497116088867, "loss": 0.9698, "nll_loss": 0.8928713798522949, "rewards/accuracies": 0.75, "rewards/chosen": -0.2576063275337219, "rewards/margins": -0.0009913723915815353, "rewards/rejected": -0.2566149830818176, "step": 1311 }, { "epoch": 3.592060232717317, "grad_norm": 3.7982306480407715, "learning_rate": 8.204109589041096e-07, "log_odds_chosen": 0.5722784996032715, "log_odds_ratio": -0.6154983043670654, "logits/chosen": 0.5744215250015259, "logits/rejected": 0.5715238451957703, "logps/chosen": -2.253079652786255, "logps/rejected": -2.784438133239746, "loss": 1.0682, "nll_loss": 1.0066728591918945, "rewards/accuracies": 0.5, "rewards/chosen": -0.22530798614025116, "rewards/margins": 0.053135838359594345, "rewards/rejected": -0.2784438133239746, "step": 1312 }, { "epoch": 3.594798083504449, "grad_norm": 3.7488434314727783, "learning_rate": 8.202739726027397e-07, "log_odds_chosen": 1.0051243305206299, "log_odds_ratio": -0.5188196897506714, "logits/chosen": 0.7840898036956787, "logits/rejected": 0.6794392466545105, "logps/chosen": -2.543715238571167, "logps/rejected": -3.4773714542388916, "loss": 0.9999, "nll_loss": 0.9480586647987366, "rewards/accuracies": 0.75, "rewards/chosen": -0.2543714940547943, "rewards/margins": 0.0933656245470047, "rewards/rejected": -0.3477371335029602, "step": 1313 }, { "epoch": 3.597535934291581, "grad_norm": 4.3888840675354, "learning_rate": 8.201369863013699e-07, "log_odds_chosen": 0.2561604678630829, "log_odds_ratio": -0.8145679235458374, "logits/chosen": 0.7768923044204712, "logits/rejected": 0.7903167605400085, "logps/chosen": -3.6037378311157227, "logps/rejected": -3.800177574157715, "loss": 0.9795, "nll_loss": 0.898029088973999, "rewards/accuracies": 0.75, "rewards/chosen": -0.3603737950325012, "rewards/margins": 0.01964394748210907, "rewards/rejected": -0.3800177574157715, "step": 1314 }, { "epoch": 3.600273785078713, "grad_norm": 5.035113334655762, "learning_rate": 8.199999999999999e-07, "log_odds_chosen": 0.015074729919433594, "log_odds_ratio": -0.8642418384552002, "logits/chosen": 0.638943076133728, "logits/rejected": 0.6108758449554443, "logps/chosen": -2.957930088043213, "logps/rejected": -2.955508232116699, "loss": 1.0077, "nll_loss": 0.9213079810142517, "rewards/accuracies": 0.625, "rewards/chosen": -0.29579299688339233, "rewards/margins": -0.00024218857288360596, "rewards/rejected": -0.2955508232116699, "step": 1315 }, { "epoch": 3.6030116358658453, "grad_norm": 3.1116809844970703, "learning_rate": 8.198630136986301e-07, "log_odds_chosen": 0.7002819776535034, "log_odds_ratio": -0.4509207606315613, "logits/chosen": 0.7946878671646118, "logits/rejected": 0.8429638147354126, "logps/chosen": -2.219167709350586, "logps/rejected": -2.8438637256622314, "loss": 0.8815, "nll_loss": 0.8364561796188354, "rewards/accuracies": 0.875, "rewards/chosen": -0.22191676497459412, "rewards/margins": 0.06246960908174515, "rewards/rejected": -0.28438639640808105, "step": 1316 }, { "epoch": 3.6057494866529773, "grad_norm": 3.6927521228790283, "learning_rate": 8.197260273972603e-07, "log_odds_chosen": 1.023077368736267, "log_odds_ratio": -0.6474753618240356, "logits/chosen": 0.6944629549980164, "logits/rejected": 0.6208499670028687, "logps/chosen": -2.294445037841797, "logps/rejected": -3.267932653427124, "loss": 1.0028, "nll_loss": 0.9380263090133667, "rewards/accuracies": 0.75, "rewards/chosen": -0.2294445037841797, "rewards/margins": 0.09734874963760376, "rewards/rejected": -0.32679325342178345, "step": 1317 }, { "epoch": 3.6084873374401094, "grad_norm": 4.376133918762207, "learning_rate": 8.195890410958903e-07, "log_odds_chosen": 0.8573211431503296, "log_odds_ratio": -0.42894211411476135, "logits/chosen": 0.7978036403656006, "logits/rejected": 0.8642981648445129, "logps/chosen": -2.7133538722991943, "logps/rejected": -3.5175938606262207, "loss": 0.9249, "nll_loss": 0.8820183277130127, "rewards/accuracies": 0.75, "rewards/chosen": -0.2713353633880615, "rewards/margins": 0.0804239884018898, "rewards/rejected": -0.3517593741416931, "step": 1318 }, { "epoch": 3.6112251882272415, "grad_norm": 2.828009605407715, "learning_rate": 8.194520547945205e-07, "log_odds_chosen": 1.664105772972107, "log_odds_ratio": -0.258822500705719, "logits/chosen": 0.5512083768844604, "logits/rejected": 0.5488293170928955, "logps/chosen": -1.623103380203247, "logps/rejected": -3.094320058822632, "loss": 0.9395, "nll_loss": 0.9136085510253906, "rewards/accuracies": 0.875, "rewards/chosen": -0.16231036186218262, "rewards/margins": 0.14712165296077728, "rewards/rejected": -0.3094320297241211, "step": 1319 }, { "epoch": 3.613963039014374, "grad_norm": 3.350775718688965, "learning_rate": 8.193150684931507e-07, "log_odds_chosen": 0.6221907734870911, "log_odds_ratio": -0.5094172954559326, "logits/chosen": 0.6569750905036926, "logits/rejected": 0.5919565558433533, "logps/chosen": -2.27519154548645, "logps/rejected": -2.8263018131256104, "loss": 1.0748, "nll_loss": 1.023841142654419, "rewards/accuracies": 0.75, "rewards/chosen": -0.22751915454864502, "rewards/margins": 0.05511102080345154, "rewards/rejected": -0.28263017535209656, "step": 1320 }, { "epoch": 3.616700889801506, "grad_norm": 3.5845565795898438, "learning_rate": 8.191780821917808e-07, "log_odds_chosen": 0.7868148684501648, "log_odds_ratio": -0.5430089831352234, "logits/chosen": 0.5979680418968201, "logits/rejected": 0.5500036478042603, "logps/chosen": -2.754641056060791, "logps/rejected": -3.5029690265655518, "loss": 0.9296, "nll_loss": 0.875311553478241, "rewards/accuracies": 0.75, "rewards/chosen": -0.27546411752700806, "rewards/margins": 0.07483280450105667, "rewards/rejected": -0.35029691457748413, "step": 1321 }, { "epoch": 3.619438740588638, "grad_norm": 3.6519603729248047, "learning_rate": 8.190410958904109e-07, "log_odds_chosen": 0.3266514241695404, "log_odds_ratio": -0.6753221750259399, "logits/chosen": 0.712598443031311, "logits/rejected": 0.7853584289550781, "logps/chosen": -2.7028706073760986, "logps/rejected": -2.95208740234375, "loss": 0.9324, "nll_loss": 0.8648732304573059, "rewards/accuracies": 0.625, "rewards/chosen": -0.27028706669807434, "rewards/margins": 0.02492169849574566, "rewards/rejected": -0.29520875215530396, "step": 1322 }, { "epoch": 3.62217659137577, "grad_norm": 2.8409786224365234, "learning_rate": 8.189041095890411e-07, "log_odds_chosen": 1.1555922031402588, "log_odds_ratio": -0.4034479856491089, "logits/chosen": 0.7954039573669434, "logits/rejected": 0.8217369318008423, "logps/chosen": -2.59344744682312, "logps/rejected": -3.689298152923584, "loss": 0.916, "nll_loss": 0.875645101070404, "rewards/accuracies": 0.875, "rewards/chosen": -0.25934475660324097, "rewards/margins": 0.10958507657051086, "rewards/rejected": -0.36892980337142944, "step": 1323 }, { "epoch": 3.6249144421629023, "grad_norm": 5.851385116577148, "learning_rate": 8.187671232876712e-07, "log_odds_chosen": 0.49267202615737915, "log_odds_ratio": -0.8051680326461792, "logits/chosen": 0.5164273381233215, "logits/rejected": 0.5036628246307373, "logps/chosen": -3.0436580181121826, "logps/rejected": -3.383796453475952, "loss": 1.006, "nll_loss": 0.9254984855651855, "rewards/accuracies": 0.625, "rewards/chosen": -0.3043658137321472, "rewards/margins": 0.03401384502649307, "rewards/rejected": -0.3383796215057373, "step": 1324 }, { "epoch": 3.6276522929500343, "grad_norm": 3.8291542530059814, "learning_rate": 8.186301369863013e-07, "log_odds_chosen": 0.5688902139663696, "log_odds_ratio": -0.5045555830001831, "logits/chosen": 0.5989254117012024, "logits/rejected": 0.6060289144515991, "logps/chosen": -2.0815649032592773, "logps/rejected": -2.6098711490631104, "loss": 1.0057, "nll_loss": 0.9552730321884155, "rewards/accuracies": 0.75, "rewards/chosen": -0.2081564962863922, "rewards/margins": 0.05283061042428017, "rewards/rejected": -0.2609871029853821, "step": 1325 }, { "epoch": 3.6303901437371664, "grad_norm": 3.7565550804138184, "learning_rate": 8.184931506849315e-07, "log_odds_chosen": 0.6498350501060486, "log_odds_ratio": -0.4384975731372833, "logits/chosen": 0.7705138921737671, "logits/rejected": 0.7783217430114746, "logps/chosen": -2.233677625656128, "logps/rejected": -2.831554412841797, "loss": 1.0325, "nll_loss": 0.9886217713356018, "rewards/accuracies": 1.0, "rewards/chosen": -0.22336776554584503, "rewards/margins": 0.05978766828775406, "rewards/rejected": -0.2831554412841797, "step": 1326 }, { "epoch": 3.6331279945242985, "grad_norm": 3.664558172225952, "learning_rate": 8.183561643835616e-07, "log_odds_chosen": 0.207563117146492, "log_odds_ratio": -0.7280468344688416, "logits/chosen": 0.6156086921691895, "logits/rejected": 0.5685959458351135, "logps/chosen": -2.2470946311950684, "logps/rejected": -2.408715009689331, "loss": 1.1086, "nll_loss": 1.0357606410980225, "rewards/accuracies": 0.625, "rewards/chosen": -0.22470948100090027, "rewards/margins": 0.016162022948265076, "rewards/rejected": -0.24087150394916534, "step": 1327 }, { "epoch": 3.6358658453114305, "grad_norm": 3.2546777725219727, "learning_rate": 8.182191780821918e-07, "log_odds_chosen": 0.8909977674484253, "log_odds_ratio": -0.41731494665145874, "logits/chosen": 0.5858973860740662, "logits/rejected": 0.5571103692054749, "logps/chosen": -2.379728317260742, "logps/rejected": -3.2208523750305176, "loss": 0.8783, "nll_loss": 0.8365910649299622, "rewards/accuracies": 0.875, "rewards/chosen": -0.23797282576560974, "rewards/margins": 0.08411240577697754, "rewards/rejected": -0.3220852315425873, "step": 1328 }, { "epoch": 3.6386036960985626, "grad_norm": 3.6139235496520996, "learning_rate": 8.180821917808218e-07, "log_odds_chosen": 0.785153865814209, "log_odds_ratio": -0.578330934047699, "logits/chosen": 0.6871808767318726, "logits/rejected": 0.779236376285553, "logps/chosen": -2.465059757232666, "logps/rejected": -3.194956064224243, "loss": 0.9659, "nll_loss": 0.908065915107727, "rewards/accuracies": 0.75, "rewards/chosen": -0.246506005525589, "rewards/margins": 0.07298959791660309, "rewards/rejected": -0.3194955885410309, "step": 1329 }, { "epoch": 3.6413415468856947, "grad_norm": 3.4407684803009033, "learning_rate": 8.17945205479452e-07, "log_odds_chosen": 0.7180018424987793, "log_odds_ratio": -0.42967164516448975, "logits/chosen": 0.7003710269927979, "logits/rejected": 0.6794020533561707, "logps/chosen": -1.9935003519058228, "logps/rejected": -2.633565902709961, "loss": 1.046, "nll_loss": 1.0030691623687744, "rewards/accuracies": 0.875, "rewards/chosen": -0.199350044131279, "rewards/margins": 0.06400655955076218, "rewards/rejected": -0.26335659623146057, "step": 1330 }, { "epoch": 3.6440793976728267, "grad_norm": 3.605430841445923, "learning_rate": 8.178082191780822e-07, "log_odds_chosen": -0.3928129971027374, "log_odds_ratio": -0.9404668211936951, "logits/chosen": 0.4705652594566345, "logits/rejected": 0.49966806173324585, "logps/chosen": -2.9311795234680176, "logps/rejected": -2.5596108436584473, "loss": 1.1295, "nll_loss": 1.035478949546814, "rewards/accuracies": 0.375, "rewards/chosen": -0.2931179404258728, "rewards/margins": -0.03715686500072479, "rewards/rejected": -0.2559610903263092, "step": 1331 }, { "epoch": 3.646817248459959, "grad_norm": 5.168254852294922, "learning_rate": 8.176712328767122e-07, "log_odds_chosen": 0.9961134791374207, "log_odds_ratio": -0.3928753733634949, "logits/chosen": 0.5846514105796814, "logits/rejected": 0.5881881713867188, "logps/chosen": -3.0418076515197754, "logps/rejected": -3.971769332885742, "loss": 1.0024, "nll_loss": 0.963158130645752, "rewards/accuracies": 0.875, "rewards/chosen": -0.304180771112442, "rewards/margins": 0.09299618750810623, "rewards/rejected": -0.39717692136764526, "step": 1332 }, { "epoch": 3.649555099247091, "grad_norm": 3.3367273807525635, "learning_rate": 8.175342465753424e-07, "log_odds_chosen": 1.0918045043945312, "log_odds_ratio": -0.3690078556537628, "logits/chosen": 0.7080539464950562, "logits/rejected": 0.7783058881759644, "logps/chosen": -2.554788112640381, "logps/rejected": -3.5780816078186035, "loss": 0.894, "nll_loss": 0.8571202754974365, "rewards/accuracies": 0.875, "rewards/chosen": -0.25547879934310913, "rewards/margins": 0.10232938826084137, "rewards/rejected": -0.3578081727027893, "step": 1333 }, { "epoch": 3.652292950034223, "grad_norm": 4.2374491691589355, "learning_rate": 8.173972602739726e-07, "log_odds_chosen": 0.11265163123607635, "log_odds_ratio": -0.7392686605453491, "logits/chosen": 0.6611244082450867, "logits/rejected": 0.6647407412528992, "logps/chosen": -2.3793203830718994, "logps/rejected": -2.474552869796753, "loss": 1.2222, "nll_loss": 1.1483227014541626, "rewards/accuracies": 0.625, "rewards/chosen": -0.23793205618858337, "rewards/margins": 0.009523238986730576, "rewards/rejected": -0.24745529890060425, "step": 1334 }, { "epoch": 3.655030800821355, "grad_norm": 3.5363292694091797, "learning_rate": 8.172602739726027e-07, "log_odds_chosen": 0.6072937250137329, "log_odds_ratio": -0.48047810792922974, "logits/chosen": 0.6266818046569824, "logits/rejected": 0.5902312994003296, "logps/chosen": -2.1850643157958984, "logps/rejected": -2.7405080795288086, "loss": 1.0155, "nll_loss": 0.9674887657165527, "rewards/accuracies": 0.75, "rewards/chosen": -0.21850645542144775, "rewards/margins": 0.05554436147212982, "rewards/rejected": -0.2740508019924164, "step": 1335 }, { "epoch": 3.657768651608487, "grad_norm": 3.860750436782837, "learning_rate": 8.171232876712328e-07, "log_odds_chosen": 1.382367730140686, "log_odds_ratio": -0.33916181325912476, "logits/chosen": 0.7026268243789673, "logits/rejected": 0.7023612856864929, "logps/chosen": -1.5373440980911255, "logps/rejected": -2.80294132232666, "loss": 0.8907, "nll_loss": 0.8567366600036621, "rewards/accuracies": 1.0, "rewards/chosen": -0.15373441576957703, "rewards/margins": 0.12655973434448242, "rewards/rejected": -0.28029415011405945, "step": 1336 }, { "epoch": 3.6605065023956196, "grad_norm": 3.659527540206909, "learning_rate": 8.16986301369863e-07, "log_odds_chosen": 0.5440173149108887, "log_odds_ratio": -0.47840797901153564, "logits/chosen": 0.6854917407035828, "logits/rejected": 0.6506971120834351, "logps/chosen": -2.2411117553710938, "logps/rejected": -2.7245545387268066, "loss": 0.974, "nll_loss": 0.9261727333068848, "rewards/accuracies": 0.875, "rewards/chosen": -0.2241111844778061, "rewards/margins": 0.048344291746616364, "rewards/rejected": -0.27245548367500305, "step": 1337 }, { "epoch": 3.6632443531827517, "grad_norm": 3.6797280311584473, "learning_rate": 8.168493150684931e-07, "log_odds_chosen": 0.27917760610580444, "log_odds_ratio": -0.6562164425849915, "logits/chosen": 0.7365127801895142, "logits/rejected": 0.7910191416740417, "logps/chosen": -2.984713554382324, "logps/rejected": -3.231456756591797, "loss": 0.8241, "nll_loss": 0.7584973573684692, "rewards/accuracies": 0.625, "rewards/chosen": -0.2984713613986969, "rewards/margins": 0.02467433176934719, "rewards/rejected": -0.32314568758010864, "step": 1338 }, { "epoch": 3.6659822039698837, "grad_norm": 3.5640859603881836, "learning_rate": 8.167123287671232e-07, "log_odds_chosen": 0.4697306156158447, "log_odds_ratio": -0.5549132227897644, "logits/chosen": 0.7787978649139404, "logits/rejected": 0.7123960256576538, "logps/chosen": -2.5853779315948486, "logps/rejected": -3.049332618713379, "loss": 0.9625, "nll_loss": 0.9069802761077881, "rewards/accuracies": 0.625, "rewards/chosen": -0.25853782892227173, "rewards/margins": 0.046395476907491684, "rewards/rejected": -0.3049332797527313, "step": 1339 }, { "epoch": 3.668720054757016, "grad_norm": 3.5636916160583496, "learning_rate": 8.165753424657534e-07, "log_odds_chosen": 1.0224493741989136, "log_odds_ratio": -0.4519922733306885, "logits/chosen": 0.6773597598075867, "logits/rejected": 0.6683724522590637, "logps/chosen": -2.623884677886963, "logps/rejected": -3.567416191101074, "loss": 0.95, "nll_loss": 0.9048057794570923, "rewards/accuracies": 0.75, "rewards/chosen": -0.2623884677886963, "rewards/margins": 0.09435318410396576, "rewards/rejected": -0.35674163699150085, "step": 1340 }, { "epoch": 3.671457905544148, "grad_norm": 4.28817081451416, "learning_rate": 8.164383561643835e-07, "log_odds_chosen": 1.145846962928772, "log_odds_ratio": -0.4751555323600769, "logits/chosen": 0.8026172518730164, "logits/rejected": 0.7072950601577759, "logps/chosen": -2.0218193531036377, "logps/rejected": -3.119260787963867, "loss": 1.0569, "nll_loss": 1.009408712387085, "rewards/accuracies": 0.875, "rewards/chosen": -0.20218193531036377, "rewards/margins": 0.10974416136741638, "rewards/rejected": -0.31192609667778015, "step": 1341 }, { "epoch": 3.67419575633128, "grad_norm": 5.020732402801514, "learning_rate": 8.163013698630137e-07, "log_odds_chosen": 0.8540628552436829, "log_odds_ratio": -0.4248970150947571, "logits/chosen": 0.4400661289691925, "logits/rejected": 0.3635171949863434, "logps/chosen": -2.0902862548828125, "logps/rejected": -2.8536689281463623, "loss": 0.9857, "nll_loss": 0.9431990385055542, "rewards/accuracies": 0.875, "rewards/chosen": -0.20902863144874573, "rewards/margins": 0.0763382613658905, "rewards/rejected": -0.28536689281463623, "step": 1342 }, { "epoch": 3.676933607118412, "grad_norm": 5.519561290740967, "learning_rate": 8.161643835616437e-07, "log_odds_chosen": 0.5689506530761719, "log_odds_ratio": -0.8536412119865417, "logits/chosen": 0.5935736894607544, "logits/rejected": 0.6057441830635071, "logps/chosen": -2.784919500350952, "logps/rejected": -3.341867685317993, "loss": 1.0158, "nll_loss": 0.9304218292236328, "rewards/accuracies": 0.75, "rewards/chosen": -0.27849194407463074, "rewards/margins": 0.055694788694381714, "rewards/rejected": -0.33418673276901245, "step": 1343 }, { "epoch": 3.679671457905544, "grad_norm": 4.160822868347168, "learning_rate": 8.160273972602739e-07, "log_odds_chosen": 0.4411349594593048, "log_odds_ratio": -0.5227549076080322, "logits/chosen": 0.6808274388313293, "logits/rejected": 0.5373795628547668, "logps/chosen": -1.7278553247451782, "logps/rejected": -2.0902059078216553, "loss": 1.0828, "nll_loss": 1.0305365324020386, "rewards/accuracies": 0.875, "rewards/chosen": -0.17278552055358887, "rewards/margins": 0.03623506426811218, "rewards/rejected": -0.20902058482170105, "step": 1344 }, { "epoch": 3.682409308692676, "grad_norm": 3.5615904331207275, "learning_rate": 8.158904109589041e-07, "log_odds_chosen": 1.3714549541473389, "log_odds_ratio": -0.34710562229156494, "logits/chosen": 0.5295740365982056, "logits/rejected": 0.5215874314308167, "logps/chosen": -1.9238874912261963, "logps/rejected": -3.1752231121063232, "loss": 1.0055, "nll_loss": 0.9708272218704224, "rewards/accuracies": 0.875, "rewards/chosen": -0.19238874316215515, "rewards/margins": 0.12513355910778046, "rewards/rejected": -0.3175222873687744, "step": 1345 }, { "epoch": 3.685147159479808, "grad_norm": 3.922863245010376, "learning_rate": 8.157534246575341e-07, "log_odds_chosen": 0.9110641479492188, "log_odds_ratio": -0.42437589168548584, "logits/chosen": 0.8095932006835938, "logits/rejected": 0.8537606596946716, "logps/chosen": -2.946382999420166, "logps/rejected": -3.8104655742645264, "loss": 0.8341, "nll_loss": 0.7917081117630005, "rewards/accuracies": 0.875, "rewards/chosen": -0.29463833570480347, "rewards/margins": 0.08640825748443604, "rewards/rejected": -0.3810465931892395, "step": 1346 }, { "epoch": 3.6878850102669407, "grad_norm": 3.8700950145721436, "learning_rate": 8.156164383561643e-07, "log_odds_chosen": 0.386790931224823, "log_odds_ratio": -0.6385929584503174, "logits/chosen": 0.5604274272918701, "logits/rejected": 0.5854998826980591, "logps/chosen": -2.6056883335113525, "logps/rejected": -2.9354944229125977, "loss": 0.9327, "nll_loss": 0.8688246011734009, "rewards/accuracies": 0.75, "rewards/chosen": -0.2605688273906708, "rewards/margins": 0.03298061341047287, "rewards/rejected": -0.29354944825172424, "step": 1347 }, { "epoch": 3.690622861054073, "grad_norm": 4.059251308441162, "learning_rate": 8.154794520547945e-07, "log_odds_chosen": 0.1339743435382843, "log_odds_ratio": -0.6614440679550171, "logits/chosen": 0.5732693672180176, "logits/rejected": 0.5959218740463257, "logps/chosen": -2.9144339561462402, "logps/rejected": -3.020491600036621, "loss": 1.0268, "nll_loss": 0.9606174230575562, "rewards/accuracies": 0.75, "rewards/chosen": -0.291443407535553, "rewards/margins": 0.010605755262076855, "rewards/rejected": -0.3020491600036621, "step": 1348 }, { "epoch": 3.693360711841205, "grad_norm": 3.428009271621704, "learning_rate": 8.153424657534246e-07, "log_odds_chosen": 0.8345834612846375, "log_odds_ratio": -0.6053569316864014, "logits/chosen": 0.760662317276001, "logits/rejected": 0.8241348266601562, "logps/chosen": -2.594259738922119, "logps/rejected": -3.421581745147705, "loss": 0.9601, "nll_loss": 0.899516761302948, "rewards/accuracies": 0.5, "rewards/chosen": -0.25942596793174744, "rewards/margins": 0.08273221552371979, "rewards/rejected": -0.3421581983566284, "step": 1349 }, { "epoch": 3.696098562628337, "grad_norm": 3.6156210899353027, "learning_rate": 8.152054794520547e-07, "log_odds_chosen": 0.35510820150375366, "log_odds_ratio": -0.5544217824935913, "logits/chosen": 0.751835286617279, "logits/rejected": 0.7459603548049927, "logps/chosen": -1.893449306488037, "logps/rejected": -2.195549488067627, "loss": 0.97, "nll_loss": 0.9145599603652954, "rewards/accuracies": 0.75, "rewards/chosen": -0.18934494256973267, "rewards/margins": 0.03021004982292652, "rewards/rejected": -0.21955497562885284, "step": 1350 }, { "epoch": 3.698836413415469, "grad_norm": 3.5411770343780518, "learning_rate": 8.150684931506849e-07, "log_odds_chosen": 0.9030571579933167, "log_odds_ratio": -0.47077834606170654, "logits/chosen": 0.7112451791763306, "logits/rejected": 0.7102921009063721, "logps/chosen": -2.5416154861450195, "logps/rejected": -3.403609275817871, "loss": 1.0302, "nll_loss": 0.9830840826034546, "rewards/accuracies": 0.875, "rewards/chosen": -0.2541615664958954, "rewards/margins": 0.08619936555624008, "rewards/rejected": -0.34036093950271606, "step": 1351 }, { "epoch": 3.701574264202601, "grad_norm": 4.020418643951416, "learning_rate": 8.14931506849315e-07, "log_odds_chosen": 0.7296028137207031, "log_odds_ratio": -0.44025248289108276, "logits/chosen": 0.4111994504928589, "logits/rejected": 0.39632734656333923, "logps/chosen": -1.9229674339294434, "logps/rejected": -2.583174228668213, "loss": 0.9632, "nll_loss": 0.9191846251487732, "rewards/accuracies": 1.0, "rewards/chosen": -0.19229675829410553, "rewards/margins": 0.06602068245410919, "rewards/rejected": -0.2583174407482147, "step": 1352 }, { "epoch": 3.704312114989733, "grad_norm": 3.3665521144866943, "learning_rate": 8.147945205479451e-07, "log_odds_chosen": 0.4582163095474243, "log_odds_ratio": -0.5157799124717712, "logits/chosen": 0.6832720637321472, "logits/rejected": 0.5838736295700073, "logps/chosen": -1.7480648756027222, "logps/rejected": -2.1339268684387207, "loss": 1.0224, "nll_loss": 0.9708393216133118, "rewards/accuracies": 0.75, "rewards/chosen": -0.17480647563934326, "rewards/margins": 0.038586221635341644, "rewards/rejected": -0.2133927047252655, "step": 1353 }, { "epoch": 3.707049965776865, "grad_norm": 3.864938974380493, "learning_rate": 8.146575342465753e-07, "log_odds_chosen": 0.6168314218521118, "log_odds_ratio": -0.45924270153045654, "logits/chosen": 0.7665712833404541, "logits/rejected": 0.6726899147033691, "logps/chosen": -2.6497135162353516, "logps/rejected": -3.2364070415496826, "loss": 1.0143, "nll_loss": 0.9683485627174377, "rewards/accuracies": 1.0, "rewards/chosen": -0.2649713456630707, "rewards/margins": 0.058669377118349075, "rewards/rejected": -0.32364073395729065, "step": 1354 }, { "epoch": 3.7097878165639973, "grad_norm": 3.285194158554077, "learning_rate": 8.145205479452054e-07, "log_odds_chosen": 0.7504916787147522, "log_odds_ratio": -0.40978536009788513, "logits/chosen": 0.8067352771759033, "logits/rejected": 0.8034638166427612, "logps/chosen": -2.0482375621795654, "logps/rejected": -2.7365355491638184, "loss": 0.87, "nll_loss": 0.8290383815765381, "rewards/accuracies": 1.0, "rewards/chosen": -0.20482377707958221, "rewards/margins": 0.06882979720830917, "rewards/rejected": -0.2736535966396332, "step": 1355 }, { "epoch": 3.7125256673511293, "grad_norm": 3.0379161834716797, "learning_rate": 8.143835616438356e-07, "log_odds_chosen": 1.394805908203125, "log_odds_ratio": -0.31665754318237305, "logits/chosen": 0.7488517761230469, "logits/rejected": 0.803709864616394, "logps/chosen": -1.8531734943389893, "logps/rejected": -3.08499813079834, "loss": 0.9164, "nll_loss": 0.8847386837005615, "rewards/accuracies": 0.875, "rewards/chosen": -0.18531735241413116, "rewards/margins": 0.12318246066570282, "rewards/rejected": -0.308499813079834, "step": 1356 }, { "epoch": 3.7152635181382614, "grad_norm": 3.372692584991455, "learning_rate": 8.142465753424657e-07, "log_odds_chosen": 1.2307718992233276, "log_odds_ratio": -0.40599897503852844, "logits/chosen": 0.7286021709442139, "logits/rejected": 0.7692389488220215, "logps/chosen": -2.7050867080688477, "logps/rejected": -3.8459742069244385, "loss": 0.9404, "nll_loss": 0.8997576236724854, "rewards/accuracies": 0.875, "rewards/chosen": -0.27050870656967163, "rewards/margins": 0.11408872902393341, "rewards/rejected": -0.38459742069244385, "step": 1357 }, { "epoch": 3.7180013689253935, "grad_norm": 4.320101261138916, "learning_rate": 8.141095890410958e-07, "log_odds_chosen": 0.13176803290843964, "log_odds_ratio": -0.8870912790298462, "logits/chosen": 0.7416905164718628, "logits/rejected": 0.8573802709579468, "logps/chosen": -3.663196563720703, "logps/rejected": -3.806645393371582, "loss": 0.9065, "nll_loss": 0.8178175687789917, "rewards/accuracies": 0.5, "rewards/chosen": -0.3663196861743927, "rewards/margins": 0.014344897121191025, "rewards/rejected": -0.38066455721855164, "step": 1358 }, { "epoch": 3.7207392197125255, "grad_norm": 3.4186692237854004, "learning_rate": 8.13972602739726e-07, "log_odds_chosen": 0.776115357875824, "log_odds_ratio": -0.41758885979652405, "logits/chosen": 0.6731921434402466, "logits/rejected": 0.5867243409156799, "logps/chosen": -1.6998050212860107, "logps/rejected": -2.372680425643921, "loss": 1.06, "nll_loss": 1.0182874202728271, "rewards/accuracies": 1.0, "rewards/chosen": -0.1699804961681366, "rewards/margins": 0.06728755682706833, "rewards/rejected": -0.23726806044578552, "step": 1359 }, { "epoch": 3.7234770704996576, "grad_norm": 3.782928705215454, "learning_rate": 8.13835616438356e-07, "log_odds_chosen": 0.7898333072662354, "log_odds_ratio": -0.40940529108047485, "logits/chosen": 0.6643876433372498, "logits/rejected": 0.646062970161438, "logps/chosen": -2.099691867828369, "logps/rejected": -2.803133249282837, "loss": 1.0016, "nll_loss": 0.9606776833534241, "rewards/accuracies": 1.0, "rewards/chosen": -0.2099691927433014, "rewards/margins": 0.07034414261579514, "rewards/rejected": -0.2803133428096771, "step": 1360 }, { "epoch": 3.7262149212867897, "grad_norm": 3.965150833129883, "learning_rate": 8.136986301369862e-07, "log_odds_chosen": 0.7889818549156189, "log_odds_ratio": -0.48239171504974365, "logits/chosen": 0.6506338715553284, "logits/rejected": 0.6153116822242737, "logps/chosen": -2.4283385276794434, "logps/rejected": -3.1659481525421143, "loss": 0.9903, "nll_loss": 0.9420191645622253, "rewards/accuracies": 0.875, "rewards/chosen": -0.24283385276794434, "rewards/margins": 0.0737609714269638, "rewards/rejected": -0.31659480929374695, "step": 1361 }, { "epoch": 3.7289527720739217, "grad_norm": 4.310882568359375, "learning_rate": 8.135616438356164e-07, "log_odds_chosen": -0.15538030862808228, "log_odds_ratio": -0.9870253801345825, "logits/chosen": 0.5704155564308167, "logits/rejected": 0.6808750033378601, "logps/chosen": -3.1504287719726562, "logps/rejected": -3.0385751724243164, "loss": 1.0699, "nll_loss": 0.9711922407150269, "rewards/accuracies": 0.375, "rewards/chosen": -0.3150428533554077, "rewards/margins": -0.011185349896550179, "rewards/rejected": -0.3038575053215027, "step": 1362 }, { "epoch": 3.731690622861054, "grad_norm": 4.072822093963623, "learning_rate": 8.134246575342465e-07, "log_odds_chosen": 1.0780088901519775, "log_odds_ratio": -0.583249568939209, "logits/chosen": 0.793632984161377, "logits/rejected": 0.8953855633735657, "logps/chosen": -2.4038467407226562, "logps/rejected": -3.391728401184082, "loss": 0.9895, "nll_loss": 0.9311366677284241, "rewards/accuracies": 0.75, "rewards/chosen": -0.24038466811180115, "rewards/margins": 0.09878819435834885, "rewards/rejected": -0.3391728699207306, "step": 1363 }, { "epoch": 3.7344284736481863, "grad_norm": 3.7527613639831543, "learning_rate": 8.132876712328766e-07, "log_odds_chosen": 1.1337374448776245, "log_odds_ratio": -0.4569339454174042, "logits/chosen": 0.8450170755386353, "logits/rejected": 0.85155189037323, "logps/chosen": -2.374448776245117, "logps/rejected": -3.3980307579040527, "loss": 0.8928, "nll_loss": 0.8470805883407593, "rewards/accuracies": 0.875, "rewards/chosen": -0.2374449074268341, "rewards/margins": 0.10235817730426788, "rewards/rejected": -0.3398030996322632, "step": 1364 }, { "epoch": 3.7371663244353184, "grad_norm": 4.453368186950684, "learning_rate": 8.131506849315068e-07, "log_odds_chosen": -0.21749234199523926, "log_odds_ratio": -1.1475045680999756, "logits/chosen": 0.5558062195777893, "logits/rejected": 0.6425765752792358, "logps/chosen": -3.176530361175537, "logps/rejected": -2.9741969108581543, "loss": 1.0025, "nll_loss": 0.8877245187759399, "rewards/accuracies": 0.125, "rewards/chosen": -0.3176530599594116, "rewards/margins": -0.02023334987461567, "rewards/rejected": -0.2974196970462799, "step": 1365 }, { "epoch": 3.7399041752224504, "grad_norm": 3.443995237350464, "learning_rate": 8.130136986301369e-07, "log_odds_chosen": 0.4166409373283386, "log_odds_ratio": -0.5640596747398376, "logits/chosen": 0.6216824650764465, "logits/rejected": 0.5579919815063477, "logps/chosen": -2.6024396419525146, "logps/rejected": -2.993159294128418, "loss": 0.9575, "nll_loss": 0.9011101722717285, "rewards/accuracies": 0.625, "rewards/chosen": -0.2602439522743225, "rewards/margins": 0.03907196223735809, "rewards/rejected": -0.2993159294128418, "step": 1366 }, { "epoch": 3.7426420260095825, "grad_norm": 4.447597980499268, "learning_rate": 8.12876712328767e-07, "log_odds_chosen": 0.3460107445716858, "log_odds_ratio": -0.5926209092140198, "logits/chosen": 0.7112935781478882, "logits/rejected": 0.6330914497375488, "logps/chosen": -1.9741158485412598, "logps/rejected": -2.246135711669922, "loss": 1.0441, "nll_loss": 0.9847930073738098, "rewards/accuracies": 0.75, "rewards/chosen": -0.19741159677505493, "rewards/margins": 0.027201974764466286, "rewards/rejected": -0.22461357712745667, "step": 1367 }, { "epoch": 3.7453798767967146, "grad_norm": 3.3630082607269287, "learning_rate": 8.127397260273973e-07, "log_odds_chosen": 0.45823198556900024, "log_odds_ratio": -0.5958119034767151, "logits/chosen": 0.534736156463623, "logits/rejected": 0.5444300770759583, "logps/chosen": -2.248356580734253, "logps/rejected": -2.6897459030151367, "loss": 1.0347, "nll_loss": 0.975092887878418, "rewards/accuracies": 0.625, "rewards/chosen": -0.22483566403388977, "rewards/margins": 0.04413892701268196, "rewards/rejected": -0.2689746022224426, "step": 1368 }, { "epoch": 3.7481177275838466, "grad_norm": 3.5785396099090576, "learning_rate": 8.126027397260273e-07, "log_odds_chosen": 0.6376357078552246, "log_odds_ratio": -0.4525497555732727, "logits/chosen": 0.7109383940696716, "logits/rejected": 0.6194592714309692, "logps/chosen": -2.1331872940063477, "logps/rejected": -2.699510097503662, "loss": 1.0024, "nll_loss": 0.9571493864059448, "rewards/accuracies": 0.875, "rewards/chosen": -0.21331873536109924, "rewards/margins": 0.056632257997989655, "rewards/rejected": -0.2699510157108307, "step": 1369 }, { "epoch": 3.7508555783709787, "grad_norm": 2.971644878387451, "learning_rate": 8.124657534246576e-07, "log_odds_chosen": 1.0971364974975586, "log_odds_ratio": -0.3940570652484894, "logits/chosen": 0.7258211970329285, "logits/rejected": 0.7506985664367676, "logps/chosen": -2.0394163131713867, "logps/rejected": -3.0086774826049805, "loss": 0.9069, "nll_loss": 0.8674601316452026, "rewards/accuracies": 0.75, "rewards/chosen": -0.20394162833690643, "rewards/margins": 0.09692611545324326, "rewards/rejected": -0.3008677363395691, "step": 1370 }, { "epoch": 3.753593429158111, "grad_norm": 3.6982898712158203, "learning_rate": 8.123287671232877e-07, "log_odds_chosen": 0.2921714782714844, "log_odds_ratio": -0.6028988361358643, "logits/chosen": 0.6472097635269165, "logits/rejected": 0.6603588461875916, "logps/chosen": -2.1817305088043213, "logps/rejected": -2.429891586303711, "loss": 1.0714, "nll_loss": 1.0110911130905151, "rewards/accuracies": 0.75, "rewards/chosen": -0.2181730568408966, "rewards/margins": 0.02481609396636486, "rewards/rejected": -0.24298915266990662, "step": 1371 }, { "epoch": 3.756331279945243, "grad_norm": 3.5276665687561035, "learning_rate": 8.121917808219178e-07, "log_odds_chosen": 0.4273951053619385, "log_odds_ratio": -0.6009371280670166, "logits/chosen": 0.599759042263031, "logits/rejected": 0.6176583170890808, "logps/chosen": -2.3123087882995605, "logps/rejected": -2.7249550819396973, "loss": 0.9648, "nll_loss": 0.904690146446228, "rewards/accuracies": 0.625, "rewards/chosen": -0.23123086988925934, "rewards/margins": 0.04126464203000069, "rewards/rejected": -0.2724955379962921, "step": 1372 }, { "epoch": 3.759069130732375, "grad_norm": 3.5379278659820557, "learning_rate": 8.12054794520548e-07, "log_odds_chosen": 1.3450207710266113, "log_odds_ratio": -0.42923417687416077, "logits/chosen": 0.7177609205245972, "logits/rejected": 0.6742579936981201, "logps/chosen": -2.5255990028381348, "logps/rejected": -3.8393006324768066, "loss": 1.0201, "nll_loss": 0.977135419845581, "rewards/accuracies": 0.625, "rewards/chosen": -0.25255993008613586, "rewards/margins": 0.1313701719045639, "rewards/rejected": -0.3839300870895386, "step": 1373 }, { "epoch": 3.7618069815195074, "grad_norm": 3.716108560562134, "learning_rate": 8.11917808219178e-07, "log_odds_chosen": 1.2151474952697754, "log_odds_ratio": -0.3672826588153839, "logits/chosen": 0.626852810382843, "logits/rejected": 0.6013097763061523, "logps/chosen": -2.7175211906433105, "logps/rejected": -3.8842875957489014, "loss": 1.0011, "nll_loss": 0.9644158482551575, "rewards/accuracies": 0.875, "rewards/chosen": -0.27175214886665344, "rewards/margins": 0.11667661368846893, "rewards/rejected": -0.38842877745628357, "step": 1374 }, { "epoch": 3.7645448323066395, "grad_norm": 3.4880638122558594, "learning_rate": 8.117808219178082e-07, "log_odds_chosen": 1.5530372858047485, "log_odds_ratio": -0.42720887064933777, "logits/chosen": 0.6254385709762573, "logits/rejected": 0.6356598734855652, "logps/chosen": -2.3984832763671875, "logps/rejected": -3.865446090698242, "loss": 0.9634, "nll_loss": 0.9206798076629639, "rewards/accuracies": 0.75, "rewards/chosen": -0.2398483157157898, "rewards/margins": 0.1466962993144989, "rewards/rejected": -0.3865445852279663, "step": 1375 }, { "epoch": 3.7672826830937716, "grad_norm": 4.003617763519287, "learning_rate": 8.116438356164384e-07, "log_odds_chosen": 0.3743789494037628, "log_odds_ratio": -0.8049268126487732, "logits/chosen": 0.5058538913726807, "logits/rejected": 0.44733738899230957, "logps/chosen": -3.3019227981567383, "logps/rejected": -3.644705295562744, "loss": 1.0897, "nll_loss": 1.0092332363128662, "rewards/accuracies": 0.625, "rewards/chosen": -0.3301922678947449, "rewards/margins": 0.034278228878974915, "rewards/rejected": -0.364470511674881, "step": 1376 }, { "epoch": 3.7700205338809036, "grad_norm": 3.6065213680267334, "learning_rate": 8.115068493150685e-07, "log_odds_chosen": 0.09145858883857727, "log_odds_ratio": -0.7490970492362976, "logits/chosen": 0.5711719393730164, "logits/rejected": 0.5182392597198486, "logps/chosen": -2.441650867462158, "logps/rejected": -2.4989840984344482, "loss": 1.0407, "nll_loss": 0.965810239315033, "rewards/accuracies": 0.625, "rewards/chosen": -0.24416512250900269, "rewards/margins": 0.005733293481171131, "rewards/rejected": -0.24989840388298035, "step": 1377 }, { "epoch": 3.7727583846680357, "grad_norm": 3.8553781509399414, "learning_rate": 8.113698630136986e-07, "log_odds_chosen": 1.5165250301361084, "log_odds_ratio": -0.3539520800113678, "logits/chosen": 0.8883504271507263, "logits/rejected": 0.8985981941223145, "logps/chosen": -2.113424301147461, "logps/rejected": -3.516432046890259, "loss": 0.8557, "nll_loss": 0.8203484416007996, "rewards/accuracies": 0.75, "rewards/chosen": -0.21134242415428162, "rewards/margins": 0.14030078053474426, "rewards/rejected": -0.3516432046890259, "step": 1378 }, { "epoch": 3.7754962354551678, "grad_norm": 3.200601100921631, "learning_rate": 8.112328767123288e-07, "log_odds_chosen": 0.8802817463874817, "log_odds_ratio": -0.4088595509529114, "logits/chosen": 0.71368807554245, "logits/rejected": 0.7381575107574463, "logps/chosen": -2.758944511413574, "logps/rejected": -3.5620038509368896, "loss": 0.9534, "nll_loss": 0.9124966263771057, "rewards/accuracies": 0.875, "rewards/chosen": -0.275894433259964, "rewards/margins": 0.08030594885349274, "rewards/rejected": -0.3562003970146179, "step": 1379 }, { "epoch": 3.7782340862423, "grad_norm": 3.2011921405792236, "learning_rate": 8.110958904109589e-07, "log_odds_chosen": 0.634834885597229, "log_odds_ratio": -0.48456206917762756, "logits/chosen": 0.7742505669593811, "logits/rejected": 0.778153121471405, "logps/chosen": -2.2578818798065186, "logps/rejected": -2.8503541946411133, "loss": 1.022, "nll_loss": 0.9735834002494812, "rewards/accuracies": 0.875, "rewards/chosen": -0.2257882058620453, "rewards/margins": 0.0592472180724144, "rewards/rejected": -0.2850354313850403, "step": 1380 }, { "epoch": 3.780971937029432, "grad_norm": 4.368916034698486, "learning_rate": 8.10958904109589e-07, "log_odds_chosen": 1.235804796218872, "log_odds_ratio": -0.3283538818359375, "logits/chosen": 0.6267543435096741, "logits/rejected": 0.4744487404823303, "logps/chosen": -2.124828815460205, "logps/rejected": -3.2619831562042236, "loss": 1.0442, "nll_loss": 1.0114061832427979, "rewards/accuracies": 0.875, "rewards/chosen": -0.21248286962509155, "rewards/margins": 0.11371542513370514, "rewards/rejected": -0.3261983394622803, "step": 1381 }, { "epoch": 3.783709787816564, "grad_norm": 3.3071815967559814, "learning_rate": 8.108219178082192e-07, "log_odds_chosen": 1.318922519683838, "log_odds_ratio": -0.3259699046611786, "logits/chosen": 0.45987051725387573, "logits/rejected": 0.4379548132419586, "logps/chosen": -2.032627582550049, "logps/rejected": -3.207387685775757, "loss": 0.9626, "nll_loss": 0.929972767829895, "rewards/accuracies": 0.875, "rewards/chosen": -0.20326277613639832, "rewards/margins": 0.11747598648071289, "rewards/rejected": -0.3207387626171112, "step": 1382 }, { "epoch": 3.786447638603696, "grad_norm": 3.69368839263916, "learning_rate": 8.106849315068493e-07, "log_odds_chosen": 0.36977672576904297, "log_odds_ratio": -0.7022991180419922, "logits/chosen": 0.6956658363342285, "logits/rejected": 0.6520190238952637, "logps/chosen": -2.0673182010650635, "logps/rejected": -2.4183406829833984, "loss": 1.0392, "nll_loss": 0.9689890146255493, "rewards/accuracies": 0.625, "rewards/chosen": -0.20673182606697083, "rewards/margins": 0.03510225564241409, "rewards/rejected": -0.2418341040611267, "step": 1383 }, { "epoch": 3.789185489390828, "grad_norm": 3.09800124168396, "learning_rate": 8.105479452054795e-07, "log_odds_chosen": 1.1708154678344727, "log_odds_ratio": -0.4203755855560303, "logits/chosen": 0.6711137890815735, "logits/rejected": 0.6108097434043884, "logps/chosen": -2.326610803604126, "logps/rejected": -3.4325966835021973, "loss": 1.0753, "nll_loss": 1.0333061218261719, "rewards/accuracies": 0.75, "rewards/chosen": -0.23266109824180603, "rewards/margins": 0.11059857159852982, "rewards/rejected": -0.34325969219207764, "step": 1384 }, { "epoch": 3.79192334017796, "grad_norm": 3.5822646617889404, "learning_rate": 8.104109589041096e-07, "log_odds_chosen": 0.3055344223976135, "log_odds_ratio": -0.5724581480026245, "logits/chosen": 0.8427508473396301, "logits/rejected": 0.8266393542289734, "logps/chosen": -2.1917741298675537, "logps/rejected": -2.4619274139404297, "loss": 1.0192, "nll_loss": 0.961955189704895, "rewards/accuracies": 0.875, "rewards/chosen": -0.21917739510536194, "rewards/margins": 0.027015352621674538, "rewards/rejected": -0.24619275331497192, "step": 1385 }, { "epoch": 3.7946611909650922, "grad_norm": 3.4845335483551025, "learning_rate": 8.102739726027397e-07, "log_odds_chosen": 0.3779759705066681, "log_odds_ratio": -0.5414408445358276, "logits/chosen": 0.7387728095054626, "logits/rejected": 0.7086608409881592, "logps/chosen": -2.2804758548736572, "logps/rejected": -2.609386682510376, "loss": 0.9713, "nll_loss": 0.917181670665741, "rewards/accuracies": 0.875, "rewards/chosen": -0.22804757952690125, "rewards/margins": 0.03289108723402023, "rewards/rejected": -0.26093870401382446, "step": 1386 }, { "epoch": 3.7973990417522243, "grad_norm": 4.854753017425537, "learning_rate": 8.101369863013699e-07, "log_odds_chosen": -0.16209764778614044, "log_odds_ratio": -0.9946374297142029, "logits/chosen": 0.5560129880905151, "logits/rejected": 0.6569509506225586, "logps/chosen": -3.3269271850585938, "logps/rejected": -3.1622440814971924, "loss": 1.0904, "nll_loss": 0.9909743666648865, "rewards/accuracies": 0.5, "rewards/chosen": -0.3326927423477173, "rewards/margins": -0.016468320041894913, "rewards/rejected": -0.31622442603111267, "step": 1387 }, { "epoch": 3.8001368925393564, "grad_norm": 2.947458267211914, "learning_rate": 8.1e-07, "log_odds_chosen": 0.9808245897293091, "log_odds_ratio": -0.5554169416427612, "logits/chosen": 0.6975716352462769, "logits/rejected": 0.6399662494659424, "logps/chosen": -2.087094306945801, "logps/rejected": -3.0304625034332275, "loss": 1.0829, "nll_loss": 1.0273163318634033, "rewards/accuracies": 0.625, "rewards/chosen": -0.20870941877365112, "rewards/margins": 0.09433683753013611, "rewards/rejected": -0.30304625630378723, "step": 1388 }, { "epoch": 3.8028747433264884, "grad_norm": 4.438432693481445, "learning_rate": 8.098630136986301e-07, "log_odds_chosen": 0.5724123120307922, "log_odds_ratio": -0.4929730296134949, "logits/chosen": 0.7925843000411987, "logits/rejected": 0.777428388595581, "logps/chosen": -2.7121620178222656, "logps/rejected": -3.230865001678467, "loss": 1.0594, "nll_loss": 1.0100719928741455, "rewards/accuracies": 0.75, "rewards/chosen": -0.2712162137031555, "rewards/margins": 0.05187029018998146, "rewards/rejected": -0.3230865001678467, "step": 1389 }, { "epoch": 3.805612594113621, "grad_norm": 4.456493854522705, "learning_rate": 8.097260273972603e-07, "log_odds_chosen": 0.5617539882659912, "log_odds_ratio": -0.5467108488082886, "logits/chosen": 0.571738600730896, "logits/rejected": 0.5286568999290466, "logps/chosen": -2.234001398086548, "logps/rejected": -2.7124147415161133, "loss": 0.9534, "nll_loss": 0.8987537622451782, "rewards/accuracies": 0.875, "rewards/chosen": -0.22340014576911926, "rewards/margins": 0.04784136265516281, "rewards/rejected": -0.2712414860725403, "step": 1390 }, { "epoch": 3.808350444900753, "grad_norm": 3.115229368209839, "learning_rate": 8.095890410958903e-07, "log_odds_chosen": 1.2181422710418701, "log_odds_ratio": -0.30014023184776306, "logits/chosen": 0.8008313179016113, "logits/rejected": 0.8108860850334167, "logps/chosen": -2.551910161972046, "logps/rejected": -3.688283920288086, "loss": 0.8529, "nll_loss": 0.8228782415390015, "rewards/accuracies": 1.0, "rewards/chosen": -0.25519102811813354, "rewards/margins": 0.11363737285137177, "rewards/rejected": -0.3688283860683441, "step": 1391 }, { "epoch": 3.811088295687885, "grad_norm": 4.695737838745117, "learning_rate": 8.094520547945205e-07, "log_odds_chosen": 0.42719602584838867, "log_odds_ratio": -0.5720871686935425, "logits/chosen": 0.783393383026123, "logits/rejected": 0.8005187511444092, "logps/chosen": -2.215425729751587, "logps/rejected": -2.6042137145996094, "loss": 0.9401, "nll_loss": 0.8828651905059814, "rewards/accuracies": 0.75, "rewards/chosen": -0.2215425819158554, "rewards/margins": 0.03887880593538284, "rewards/rejected": -0.26042139530181885, "step": 1392 }, { "epoch": 3.813826146475017, "grad_norm": 3.078606367111206, "learning_rate": 8.093150684931507e-07, "log_odds_chosen": 0.9147830009460449, "log_odds_ratio": -0.39194437861442566, "logits/chosen": 0.897485613822937, "logits/rejected": 0.9378990530967712, "logps/chosen": -2.695436716079712, "logps/rejected": -3.557852268218994, "loss": 0.8183, "nll_loss": 0.7791111469268799, "rewards/accuracies": 0.875, "rewards/chosen": -0.2695436477661133, "rewards/margins": 0.08624157309532166, "rewards/rejected": -0.3557852506637573, "step": 1393 }, { "epoch": 3.8165639972621492, "grad_norm": 4.813843250274658, "learning_rate": 8.091780821917808e-07, "log_odds_chosen": 0.6169770359992981, "log_odds_ratio": -0.5453486442565918, "logits/chosen": 0.7525307536125183, "logits/rejected": 0.6518049240112305, "logps/chosen": -3.1299633979797363, "logps/rejected": -3.704987049102783, "loss": 1.0803, "nll_loss": 1.025768518447876, "rewards/accuracies": 0.625, "rewards/chosen": -0.3129963278770447, "rewards/margins": 0.05750235170125961, "rewards/rejected": -0.3704986870288849, "step": 1394 }, { "epoch": 3.8193018480492813, "grad_norm": 3.949636697769165, "learning_rate": 8.090410958904109e-07, "log_odds_chosen": 0.18944504857063293, "log_odds_ratio": -0.723545253276825, "logits/chosen": 0.6979697346687317, "logits/rejected": 0.8058412671089172, "logps/chosen": -2.7182066440582275, "logps/rejected": -2.883786678314209, "loss": 0.8985, "nll_loss": 0.8261498212814331, "rewards/accuracies": 0.75, "rewards/chosen": -0.27182066440582275, "rewards/margins": 0.016558002680540085, "rewards/rejected": -0.28837865591049194, "step": 1395 }, { "epoch": 3.8220396988364134, "grad_norm": 3.5368378162384033, "learning_rate": 8.089041095890411e-07, "log_odds_chosen": 1.4006907939910889, "log_odds_ratio": -0.3606662154197693, "logits/chosen": 0.5519152879714966, "logits/rejected": 0.4830673336982727, "logps/chosen": -2.238229274749756, "logps/rejected": -3.540764808654785, "loss": 0.9937, "nll_loss": 0.9576790928840637, "rewards/accuracies": 1.0, "rewards/chosen": -0.2238229513168335, "rewards/margins": 0.13025355339050293, "rewards/rejected": -0.3540765047073364, "step": 1396 }, { "epoch": 3.8247775496235454, "grad_norm": 5.008337020874023, "learning_rate": 8.087671232876712e-07, "log_odds_chosen": 0.29912739992141724, "log_odds_ratio": -1.0834892988204956, "logits/chosen": 0.898766279220581, "logits/rejected": 0.9454230666160583, "logps/chosen": -3.277374744415283, "logps/rejected": -3.515404462814331, "loss": 0.8418, "nll_loss": 0.7334812879562378, "rewards/accuracies": 0.875, "rewards/chosen": -0.3277374804019928, "rewards/margins": 0.0238029807806015, "rewards/rejected": -0.3515404462814331, "step": 1397 }, { "epoch": 3.8275154004106775, "grad_norm": 4.919182300567627, "learning_rate": 8.086301369863014e-07, "log_odds_chosen": 0.6452552080154419, "log_odds_ratio": -0.6924461126327515, "logits/chosen": 0.5988410711288452, "logits/rejected": 0.5717077255249023, "logps/chosen": -3.037078857421875, "logps/rejected": -3.6393871307373047, "loss": 0.9343, "nll_loss": 0.8650902509689331, "rewards/accuracies": 0.625, "rewards/chosen": -0.30370789766311646, "rewards/margins": 0.060230813920497894, "rewards/rejected": -0.36393868923187256, "step": 1398 }, { "epoch": 3.8302532511978096, "grad_norm": 4.023815155029297, "learning_rate": 8.084931506849315e-07, "log_odds_chosen": 0.4229002594947815, "log_odds_ratio": -0.6559785008430481, "logits/chosen": 0.7414459586143494, "logits/rejected": 0.7710795998573303, "logps/chosen": -2.6110715866088867, "logps/rejected": -2.987856149673462, "loss": 0.8979, "nll_loss": 0.8322556614875793, "rewards/accuracies": 0.625, "rewards/chosen": -0.2611071467399597, "rewards/margins": 0.03767848014831543, "rewards/rejected": -0.29878562688827515, "step": 1399 }, { "epoch": 3.832991101984942, "grad_norm": 3.052105665206909, "learning_rate": 8.083561643835616e-07, "log_odds_chosen": 0.6080502271652222, "log_odds_ratio": -0.500104546546936, "logits/chosen": 0.5133901834487915, "logits/rejected": 0.4583958089351654, "logps/chosen": -2.3326008319854736, "logps/rejected": -2.8869428634643555, "loss": 0.9945, "nll_loss": 0.9445154666900635, "rewards/accuracies": 0.625, "rewards/chosen": -0.23326008021831512, "rewards/margins": 0.055434197187423706, "rewards/rejected": -0.2886942923069, "step": 1400 }, { "epoch": 3.835728952772074, "grad_norm": 4.512258529663086, "learning_rate": 8.082191780821918e-07, "log_odds_chosen": 0.5017881393432617, "log_odds_ratio": -0.617681622505188, "logits/chosen": 0.573030412197113, "logits/rejected": 0.6000957489013672, "logps/chosen": -2.4187841415405273, "logps/rejected": -2.8764450550079346, "loss": 1.046, "nll_loss": 0.9842698574066162, "rewards/accuracies": 0.875, "rewards/chosen": -0.2418784499168396, "rewards/margins": 0.04576607793569565, "rewards/rejected": -0.28764450550079346, "step": 1401 }, { "epoch": 3.838466803559206, "grad_norm": 4.308182239532471, "learning_rate": 8.080821917808219e-07, "log_odds_chosen": 0.6838828325271606, "log_odds_ratio": -0.5438241362571716, "logits/chosen": 0.8612861633300781, "logits/rejected": 0.8197077512741089, "logps/chosen": -2.797370433807373, "logps/rejected": -3.4374260902404785, "loss": 0.9827, "nll_loss": 0.9283102750778198, "rewards/accuracies": 0.75, "rewards/chosen": -0.27973705530166626, "rewards/margins": 0.06400555372238159, "rewards/rejected": -0.34374260902404785, "step": 1402 }, { "epoch": 3.8412046543463383, "grad_norm": 3.744757890701294, "learning_rate": 8.07945205479452e-07, "log_odds_chosen": 0.3599705398082733, "log_odds_ratio": -0.5722852349281311, "logits/chosen": 0.616401731967926, "logits/rejected": 0.6323594450950623, "logps/chosen": -2.3612279891967773, "logps/rejected": -2.7091150283813477, "loss": 1.0102, "nll_loss": 0.9529463648796082, "rewards/accuracies": 0.75, "rewards/chosen": -0.23612278699874878, "rewards/margins": 0.03478871285915375, "rewards/rejected": -0.2709115147590637, "step": 1403 }, { "epoch": 3.8439425051334704, "grad_norm": 4.092041969299316, "learning_rate": 8.078082191780822e-07, "log_odds_chosen": 0.8266807198524475, "log_odds_ratio": -0.5096763372421265, "logits/chosen": 0.6537390947341919, "logits/rejected": 0.6230395436286926, "logps/chosen": -2.0913901329040527, "logps/rejected": -2.884073257446289, "loss": 0.9332, "nll_loss": 0.8821898698806763, "rewards/accuracies": 0.625, "rewards/chosen": -0.20913901925086975, "rewards/margins": 0.07926830649375916, "rewards/rejected": -0.2884073257446289, "step": 1404 }, { "epoch": 3.8466803559206024, "grad_norm": 3.168174982070923, "learning_rate": 8.076712328767122e-07, "log_odds_chosen": 1.1306366920471191, "log_odds_ratio": -0.4769604802131653, "logits/chosen": 0.7243525981903076, "logits/rejected": 0.7014602422714233, "logps/chosen": -2.9033007621765137, "logps/rejected": -3.984708786010742, "loss": 0.8606, "nll_loss": 0.8129390478134155, "rewards/accuracies": 0.875, "rewards/chosen": -0.29033005237579346, "rewards/margins": 0.10814078152179718, "rewards/rejected": -0.39847084879875183, "step": 1405 }, { "epoch": 3.8494182067077345, "grad_norm": 2.7859044075012207, "learning_rate": 8.075342465753424e-07, "log_odds_chosen": 0.3790879249572754, "log_odds_ratio": -0.5490322709083557, "logits/chosen": 0.6711956262588501, "logits/rejected": 0.6651970744132996, "logps/chosen": -2.243353843688965, "logps/rejected": -2.6013588905334473, "loss": 0.9181, "nll_loss": 0.8631535768508911, "rewards/accuracies": 0.875, "rewards/chosen": -0.22433540225028992, "rewards/margins": 0.03580049052834511, "rewards/rejected": -0.2601358890533447, "step": 1406 }, { "epoch": 3.8521560574948666, "grad_norm": 4.140926361083984, "learning_rate": 8.073972602739726e-07, "log_odds_chosen": -0.025395452976226807, "log_odds_ratio": -0.8163197040557861, "logits/chosen": 0.8009735345840454, "logits/rejected": 0.8080007433891296, "logps/chosen": -2.978886127471924, "logps/rejected": -2.915304660797119, "loss": 0.9119, "nll_loss": 0.8302979469299316, "rewards/accuracies": 0.625, "rewards/chosen": -0.2978886365890503, "rewards/margins": -0.006358157843351364, "rewards/rejected": -0.2915304899215698, "step": 1407 }, { "epoch": 3.8548939082819986, "grad_norm": 5.810903072357178, "learning_rate": 8.072602739726027e-07, "log_odds_chosen": 0.18610642850399017, "log_odds_ratio": -0.8031367659568787, "logits/chosen": 0.594571590423584, "logits/rejected": 0.7479098439216614, "logps/chosen": -2.999680519104004, "logps/rejected": -3.1575963497161865, "loss": 0.9097, "nll_loss": 0.8293927311897278, "rewards/accuracies": 0.5, "rewards/chosen": -0.29996806383132935, "rewards/margins": 0.01579158753156662, "rewards/rejected": -0.31575965881347656, "step": 1408 }, { "epoch": 3.8576317590691307, "grad_norm": 3.9828670024871826, "learning_rate": 8.071232876712328e-07, "log_odds_chosen": 0.9444906115531921, "log_odds_ratio": -0.43412381410598755, "logits/chosen": 0.7055738568305969, "logits/rejected": 0.7550300359725952, "logps/chosen": -2.623002529144287, "logps/rejected": -3.5009796619415283, "loss": 0.8788, "nll_loss": 0.8353954553604126, "rewards/accuracies": 0.75, "rewards/chosen": -0.2623002529144287, "rewards/margins": 0.08779772371053696, "rewards/rejected": -0.3500979542732239, "step": 1409 }, { "epoch": 3.8603696098562628, "grad_norm": 3.4866533279418945, "learning_rate": 8.06986301369863e-07, "log_odds_chosen": 0.8510944247245789, "log_odds_ratio": -0.43026548624038696, "logits/chosen": 0.667637288570404, "logits/rejected": 0.6740913987159729, "logps/chosen": -1.7748217582702637, "logps/rejected": -2.4853262901306152, "loss": 0.993, "nll_loss": 0.9499267339706421, "rewards/accuracies": 0.875, "rewards/chosen": -0.17748218774795532, "rewards/margins": 0.07105046510696411, "rewards/rejected": -0.24853263795375824, "step": 1410 }, { "epoch": 3.863107460643395, "grad_norm": 3.9165539741516113, "learning_rate": 8.068493150684931e-07, "log_odds_chosen": 0.5651013851165771, "log_odds_ratio": -0.5689436197280884, "logits/chosen": 0.5925832986831665, "logits/rejected": 0.49228692054748535, "logps/chosen": -2.7890048027038574, "logps/rejected": -3.2885332107543945, "loss": 0.9428, "nll_loss": 0.8859475255012512, "rewards/accuracies": 0.625, "rewards/chosen": -0.27890050411224365, "rewards/margins": 0.04995281249284744, "rewards/rejected": -0.3288532793521881, "step": 1411 }, { "epoch": 3.865845311430527, "grad_norm": 4.773486614227295, "learning_rate": 8.067123287671232e-07, "log_odds_chosen": 0.3789004683494568, "log_odds_ratio": -0.6368840336799622, "logits/chosen": 0.7148979306221008, "logits/rejected": 0.7999202013015747, "logps/chosen": -2.6261942386627197, "logps/rejected": -2.968843460083008, "loss": 0.9006, "nll_loss": 0.8368974328041077, "rewards/accuracies": 0.75, "rewards/chosen": -0.2626194357872009, "rewards/margins": 0.034264929592609406, "rewards/rejected": -0.29688435792922974, "step": 1412 }, { "epoch": 3.868583162217659, "grad_norm": 3.6233766078948975, "learning_rate": 8.065753424657534e-07, "log_odds_chosen": 0.5569961071014404, "log_odds_ratio": -0.7373638153076172, "logits/chosen": 0.5001424551010132, "logits/rejected": 0.4438002109527588, "logps/chosen": -2.286823272705078, "logps/rejected": -2.757551670074463, "loss": 1.0731, "nll_loss": 0.9993728399276733, "rewards/accuracies": 0.75, "rewards/chosen": -0.22868233919143677, "rewards/margins": 0.04707285761833191, "rewards/rejected": -0.2757551670074463, "step": 1413 }, { "epoch": 3.871321013004791, "grad_norm": 4.100952625274658, "learning_rate": 8.064383561643835e-07, "log_odds_chosen": 1.105322241783142, "log_odds_ratio": -0.43603572249412537, "logits/chosen": 0.7159153819084167, "logits/rejected": 0.7738534212112427, "logps/chosen": -2.87180757522583, "logps/rejected": -3.9282376766204834, "loss": 0.9977, "nll_loss": 0.954046905040741, "rewards/accuracies": 0.875, "rewards/chosen": -0.2871807813644409, "rewards/margins": 0.10564301162958145, "rewards/rejected": -0.3928237557411194, "step": 1414 }, { "epoch": 3.874058863791923, "grad_norm": 4.240873336791992, "learning_rate": 8.063013698630137e-07, "log_odds_chosen": 1.0206825733184814, "log_odds_ratio": -0.34771451354026794, "logits/chosen": 0.8111824989318848, "logits/rejected": 0.7650566101074219, "logps/chosen": -1.8342105150222778, "logps/rejected": -2.755962610244751, "loss": 0.997, "nll_loss": 0.9621833562850952, "rewards/accuracies": 1.0, "rewards/chosen": -0.1834210455417633, "rewards/margins": 0.09217519313097, "rewards/rejected": -0.2755962610244751, "step": 1415 }, { "epoch": 3.876796714579055, "grad_norm": 3.560568332672119, "learning_rate": 8.061643835616438e-07, "log_odds_chosen": 0.20367062091827393, "log_odds_ratio": -0.6753062605857849, "logits/chosen": 0.5320851802825928, "logits/rejected": 0.4802970290184021, "logps/chosen": -2.4353885650634766, "logps/rejected": -2.6456832885742188, "loss": 1.1009, "nll_loss": 1.033326506614685, "rewards/accuracies": 0.625, "rewards/chosen": -0.24353885650634766, "rewards/margins": 0.021029507741332054, "rewards/rejected": -0.26456835865974426, "step": 1416 }, { "epoch": 3.8795345653661877, "grad_norm": 3.7645044326782227, "learning_rate": 8.060273972602739e-07, "log_odds_chosen": 0.7063920497894287, "log_odds_ratio": -0.628340482711792, "logits/chosen": 0.880449116230011, "logits/rejected": 0.9164990186691284, "logps/chosen": -2.299912691116333, "logps/rejected": -2.9105331897735596, "loss": 1.0018, "nll_loss": 0.9389663338661194, "rewards/accuracies": 0.5, "rewards/chosen": -0.22999128699302673, "rewards/margins": 0.0610620453953743, "rewards/rejected": -0.29105332493782043, "step": 1417 }, { "epoch": 3.8822724161533197, "grad_norm": 3.108510971069336, "learning_rate": 8.058904109589041e-07, "log_odds_chosen": 1.0957131385803223, "log_odds_ratio": -0.4382888376712799, "logits/chosen": 0.7573057413101196, "logits/rejected": 0.7550466060638428, "logps/chosen": -2.6236302852630615, "logps/rejected": -3.664424419403076, "loss": 0.9128, "nll_loss": 0.8689969182014465, "rewards/accuracies": 0.875, "rewards/chosen": -0.2623630464076996, "rewards/margins": 0.10407941043376923, "rewards/rejected": -0.3664424419403076, "step": 1418 }, { "epoch": 3.885010266940452, "grad_norm": 3.307934522628784, "learning_rate": 8.057534246575342e-07, "log_odds_chosen": 0.47046732902526855, "log_odds_ratio": -0.5692605972290039, "logits/chosen": 0.7066230773925781, "logits/rejected": 0.7416475415229797, "logps/chosen": -2.8140344619750977, "logps/rejected": -3.209479331970215, "loss": 0.9975, "nll_loss": 0.9406014680862427, "rewards/accuracies": 0.75, "rewards/chosen": -0.28140342235565186, "rewards/margins": 0.0395444817841053, "rewards/rejected": -0.32094791531562805, "step": 1419 }, { "epoch": 3.887748117727584, "grad_norm": 3.872326374053955, "learning_rate": 8.056164383561643e-07, "log_odds_chosen": -0.2948818802833557, "log_odds_ratio": -0.9510344862937927, "logits/chosen": 0.7943522930145264, "logits/rejected": 0.8044573068618774, "logps/chosen": -3.365917682647705, "logps/rejected": -3.0652294158935547, "loss": 1.0882, "nll_loss": 0.9931361079216003, "rewards/accuracies": 0.5, "rewards/chosen": -0.33659178018569946, "rewards/margins": -0.03006882034242153, "rewards/rejected": -0.3065229058265686, "step": 1420 }, { "epoch": 3.890485968514716, "grad_norm": 4.744958877563477, "learning_rate": 8.054794520547945e-07, "log_odds_chosen": 0.7914373874664307, "log_odds_ratio": -0.46756279468536377, "logits/chosen": 0.5409578084945679, "logits/rejected": 0.6723613142967224, "logps/chosen": -2.68977689743042, "logps/rejected": -3.387089729309082, "loss": 0.907, "nll_loss": 0.8602068424224854, "rewards/accuracies": 0.875, "rewards/chosen": -0.26897770166397095, "rewards/margins": 0.06973127275705338, "rewards/rejected": -0.3387089967727661, "step": 1421 }, { "epoch": 3.893223819301848, "grad_norm": 4.9113264083862305, "learning_rate": 8.053424657534246e-07, "log_odds_chosen": 1.0618473291397095, "log_odds_ratio": -0.593915581703186, "logits/chosen": 0.5895571708679199, "logits/rejected": 0.5301369428634644, "logps/chosen": -2.1242282390594482, "logps/rejected": -3.0946946144104004, "loss": 1.0246, "nll_loss": 0.9651679396629333, "rewards/accuracies": 0.75, "rewards/chosen": -0.21242281794548035, "rewards/margins": 0.09704665839672089, "rewards/rejected": -0.30946946144104004, "step": 1422 }, { "epoch": 3.89596167008898, "grad_norm": 3.7531349658966064, "learning_rate": 8.052054794520547e-07, "log_odds_chosen": 0.2355736494064331, "log_odds_ratio": -0.6798952221870422, "logits/chosen": 0.6963751316070557, "logits/rejected": 0.7399628758430481, "logps/chosen": -3.1093873977661133, "logps/rejected": -3.3348288536071777, "loss": 0.9671, "nll_loss": 0.899115264415741, "rewards/accuracies": 0.625, "rewards/chosen": -0.3109387755393982, "rewards/margins": 0.0225441325455904, "rewards/rejected": -0.33348292112350464, "step": 1423 }, { "epoch": 3.898699520876112, "grad_norm": 4.320424556732178, "learning_rate": 8.050684931506849e-07, "log_odds_chosen": 0.5535539388656616, "log_odds_ratio": -0.9281789660453796, "logits/chosen": 0.6590589284896851, "logits/rejected": 0.5707938075065613, "logps/chosen": -3.2406582832336426, "logps/rejected": -3.7668964862823486, "loss": 1.0792, "nll_loss": 0.9863866567611694, "rewards/accuracies": 0.375, "rewards/chosen": -0.32406583428382874, "rewards/margins": 0.05262380093336105, "rewards/rejected": -0.3766896426677704, "step": 1424 }, { "epoch": 3.9014373716632442, "grad_norm": 3.362360715866089, "learning_rate": 8.04931506849315e-07, "log_odds_chosen": 1.5790143013000488, "log_odds_ratio": -0.38880056142807007, "logits/chosen": 0.47479331493377686, "logits/rejected": 0.32471901178359985, "logps/chosen": -2.1110970973968506, "logps/rejected": -3.616868495941162, "loss": 0.9982, "nll_loss": 0.9593676328659058, "rewards/accuracies": 0.75, "rewards/chosen": -0.2111097127199173, "rewards/margins": 0.150577113032341, "rewards/rejected": -0.3616868257522583, "step": 1425 }, { "epoch": 3.9041752224503763, "grad_norm": 3.728468418121338, "learning_rate": 8.047945205479451e-07, "log_odds_chosen": 1.0061633586883545, "log_odds_ratio": -0.4219120442867279, "logits/chosen": 0.7778003811836243, "logits/rejected": 0.8226901292800903, "logps/chosen": -3.0125784873962402, "logps/rejected": -3.985567092895508, "loss": 0.9084, "nll_loss": 0.8662525415420532, "rewards/accuracies": 0.875, "rewards/chosen": -0.301257848739624, "rewards/margins": 0.09729886054992676, "rewards/rejected": -0.3985567092895508, "step": 1426 }, { "epoch": 3.906913073237509, "grad_norm": 3.8034868240356445, "learning_rate": 8.046575342465753e-07, "log_odds_chosen": 0.4028365910053253, "log_odds_ratio": -0.7114971280097961, "logits/chosen": 0.6645587086677551, "logits/rejected": 0.6991997957229614, "logps/chosen": -2.976555824279785, "logps/rejected": -3.3717987537384033, "loss": 0.9576, "nll_loss": 0.8864827752113342, "rewards/accuracies": 0.5, "rewards/chosen": -0.2976555824279785, "rewards/margins": 0.03952428698539734, "rewards/rejected": -0.33717986941337585, "step": 1427 }, { "epoch": 3.909650924024641, "grad_norm": 3.0651965141296387, "learning_rate": 8.045205479452054e-07, "log_odds_chosen": 1.497921347618103, "log_odds_ratio": -0.39913591742515564, "logits/chosen": 0.8654289245605469, "logits/rejected": 0.8764568567276001, "logps/chosen": -2.325537919998169, "logps/rejected": -3.773893356323242, "loss": 0.9709, "nll_loss": 0.9309433698654175, "rewards/accuracies": 0.875, "rewards/chosen": -0.23255378007888794, "rewards/margins": 0.14483554661273956, "rewards/rejected": -0.3773893415927887, "step": 1428 }, { "epoch": 3.912388774811773, "grad_norm": 3.463160753250122, "learning_rate": 8.043835616438356e-07, "log_odds_chosen": 0.3849179744720459, "log_odds_ratio": -0.6590803861618042, "logits/chosen": 0.7477046847343445, "logits/rejected": 0.6993228197097778, "logps/chosen": -3.0434153079986572, "logps/rejected": -3.4076900482177734, "loss": 1.0555, "nll_loss": 0.9895641803741455, "rewards/accuracies": 0.625, "rewards/chosen": -0.30434155464172363, "rewards/margins": 0.03642746061086655, "rewards/rejected": -0.3407689929008484, "step": 1429 }, { "epoch": 3.915126625598905, "grad_norm": 3.064244508743286, "learning_rate": 8.042465753424657e-07, "log_odds_chosen": 1.8892593383789062, "log_odds_ratio": -0.4408167898654938, "logits/chosen": 0.6879736185073853, "logits/rejected": 0.6752023696899414, "logps/chosen": -2.2912538051605225, "logps/rejected": -4.099247932434082, "loss": 0.9618, "nll_loss": 0.9177641272544861, "rewards/accuracies": 0.75, "rewards/chosen": -0.22912538051605225, "rewards/margins": 0.18079939484596252, "rewards/rejected": -0.4099247455596924, "step": 1430 }, { "epoch": 3.917864476386037, "grad_norm": 3.4043612480163574, "learning_rate": 8.041095890410958e-07, "log_odds_chosen": 1.383405327796936, "log_odds_ratio": -0.4147130250930786, "logits/chosen": 0.8390890955924988, "logits/rejected": 0.8234795928001404, "logps/chosen": -3.014450788497925, "logps/rejected": -4.3084917068481445, "loss": 0.9073, "nll_loss": 0.8658229112625122, "rewards/accuracies": 0.75, "rewards/chosen": -0.3014450967311859, "rewards/margins": 0.12940403819084167, "rewards/rejected": -0.4308491349220276, "step": 1431 }, { "epoch": 3.920602327173169, "grad_norm": 3.3458688259124756, "learning_rate": 8.03972602739726e-07, "log_odds_chosen": 1.107831597328186, "log_odds_ratio": -0.3685269355773926, "logits/chosen": 0.5478949546813965, "logits/rejected": 0.5340392589569092, "logps/chosen": -2.1022744178771973, "logps/rejected": -3.100858449935913, "loss": 0.9552, "nll_loss": 0.9183723330497742, "rewards/accuracies": 0.875, "rewards/chosen": -0.21022744476795197, "rewards/margins": 0.09985838830471039, "rewards/rejected": -0.31008583307266235, "step": 1432 }, { "epoch": 3.923340177960301, "grad_norm": 3.925021171569824, "learning_rate": 8.038356164383561e-07, "log_odds_chosen": 1.4414280652999878, "log_odds_ratio": -0.4945131242275238, "logits/chosen": 0.5937796831130981, "logits/rejected": 0.6033748388290405, "logps/chosen": -3.3470094203948975, "logps/rejected": -4.659456253051758, "loss": 1.0004, "nll_loss": 0.9509735703468323, "rewards/accuracies": 0.625, "rewards/chosen": -0.33470094203948975, "rewards/margins": 0.1312447190284729, "rewards/rejected": -0.46594566106796265, "step": 1433 }, { "epoch": 3.9260780287474333, "grad_norm": 3.9661481380462646, "learning_rate": 8.036986301369862e-07, "log_odds_chosen": 0.1659836322069168, "log_odds_ratio": -0.6720148921012878, "logits/chosen": 0.6029772162437439, "logits/rejected": 0.6220480799674988, "logps/chosen": -3.068772792816162, "logps/rejected": -3.2177491188049316, "loss": 1.0533, "nll_loss": 0.9860885143280029, "rewards/accuracies": 0.625, "rewards/chosen": -0.3068772554397583, "rewards/margins": 0.014897629618644714, "rewards/rejected": -0.3217749297618866, "step": 1434 }, { "epoch": 3.9288158795345653, "grad_norm": 3.1951990127563477, "learning_rate": 8.035616438356164e-07, "log_odds_chosen": 1.080688714981079, "log_odds_ratio": -0.3522096574306488, "logits/chosen": 0.7335108518600464, "logits/rejected": 0.7773837447166443, "logps/chosen": -2.2092349529266357, "logps/rejected": -3.2089455127716064, "loss": 0.8339, "nll_loss": 0.7986458539962769, "rewards/accuracies": 0.875, "rewards/chosen": -0.22092348337173462, "rewards/margins": 0.09997106343507767, "rewards/rejected": -0.3208945393562317, "step": 1435 }, { "epoch": 3.9315537303216974, "grad_norm": 3.2955777645111084, "learning_rate": 8.034246575342465e-07, "log_odds_chosen": 0.9666492342948914, "log_odds_ratio": -0.5276352167129517, "logits/chosen": 0.8374930620193481, "logits/rejected": 0.8750940561294556, "logps/chosen": -2.2389016151428223, "logps/rejected": -3.0891330242156982, "loss": 0.7673, "nll_loss": 0.7145802974700928, "rewards/accuracies": 0.875, "rewards/chosen": -0.22389017045497894, "rewards/margins": 0.08502313494682312, "rewards/rejected": -0.30891329050064087, "step": 1436 }, { "epoch": 3.9342915811088295, "grad_norm": 4.28891134262085, "learning_rate": 8.032876712328766e-07, "log_odds_chosen": 0.1717132031917572, "log_odds_ratio": -0.8442561030387878, "logits/chosen": 0.4813307523727417, "logits/rejected": 0.4508548974990845, "logps/chosen": -2.832864999771118, "logps/rejected": -2.947604179382324, "loss": 0.9857, "nll_loss": 0.9013012051582336, "rewards/accuracies": 0.625, "rewards/chosen": -0.2832864820957184, "rewards/margins": 0.011473923921585083, "rewards/rejected": -0.29476040601730347, "step": 1437 }, { "epoch": 3.9370294318959616, "grad_norm": 2.9454345703125, "learning_rate": 8.031506849315068e-07, "log_odds_chosen": 0.4240717589855194, "log_odds_ratio": -0.5607144832611084, "logits/chosen": 0.5921493768692017, "logits/rejected": 0.5723015666007996, "logps/chosen": -2.615631580352783, "logps/rejected": -3.0117673873901367, "loss": 0.9493, "nll_loss": 0.8932574987411499, "rewards/accuracies": 0.875, "rewards/chosen": -0.26156318187713623, "rewards/margins": 0.039613597095012665, "rewards/rejected": -0.3011767566204071, "step": 1438 }, { "epoch": 3.9397672826830936, "grad_norm": 4.27958345413208, "learning_rate": 8.030136986301369e-07, "log_odds_chosen": 0.42218390107154846, "log_odds_ratio": -0.5366510152816772, "logits/chosen": 0.8159553408622742, "logits/rejected": 0.952850341796875, "logps/chosen": -2.95169734954834, "logps/rejected": -3.3433985710144043, "loss": 0.8303, "nll_loss": 0.7766193151473999, "rewards/accuracies": 0.75, "rewards/chosen": -0.29516974091529846, "rewards/margins": 0.03917011618614197, "rewards/rejected": -0.33433985710144043, "step": 1439 }, { "epoch": 3.9425051334702257, "grad_norm": 3.6720755100250244, "learning_rate": 8.02876712328767e-07, "log_odds_chosen": 0.8625189065933228, "log_odds_ratio": -0.48271891474723816, "logits/chosen": 0.5759924650192261, "logits/rejected": 0.513909101486206, "logps/chosen": -1.6878883838653564, "logps/rejected": -2.456747055053711, "loss": 1.0035, "nll_loss": 0.9552450776100159, "rewards/accuracies": 0.625, "rewards/chosen": -0.1687888503074646, "rewards/margins": 0.07688585668802261, "rewards/rejected": -0.24567469954490662, "step": 1440 }, { "epoch": 3.9452429842573578, "grad_norm": 3.264899730682373, "learning_rate": 8.027397260273972e-07, "log_odds_chosen": 0.7715932130813599, "log_odds_ratio": -0.48643577098846436, "logits/chosen": 0.5588740110397339, "logits/rejected": 0.48023685812950134, "logps/chosen": -2.3348090648651123, "logps/rejected": -2.969622850418091, "loss": 1.1398, "nll_loss": 1.0911508798599243, "rewards/accuracies": 0.75, "rewards/chosen": -0.23348090052604675, "rewards/margins": 0.0634813904762268, "rewards/rejected": -0.29696229100227356, "step": 1441 }, { "epoch": 3.94798083504449, "grad_norm": 3.6628379821777344, "learning_rate": 8.026027397260273e-07, "log_odds_chosen": 0.5618023872375488, "log_odds_ratio": -0.49876442551612854, "logits/chosen": 0.6185517907142639, "logits/rejected": 0.6837435960769653, "logps/chosen": -3.265120506286621, "logps/rejected": -3.8112306594848633, "loss": 1.0021, "nll_loss": 0.9522478580474854, "rewards/accuracies": 0.75, "rewards/chosen": -0.32651203870773315, "rewards/margins": 0.05461103469133377, "rewards/rejected": -0.38112306594848633, "step": 1442 }, { "epoch": 3.9507186858316223, "grad_norm": 3.795186758041382, "learning_rate": 8.024657534246575e-07, "log_odds_chosen": -0.13716670870780945, "log_odds_ratio": -0.8965086340904236, "logits/chosen": 0.7113972306251526, "logits/rejected": 0.6703168153762817, "logps/chosen": -2.9574475288391113, "logps/rejected": -2.7907211780548096, "loss": 0.9626, "nll_loss": 0.8729763627052307, "rewards/accuracies": 0.625, "rewards/chosen": -0.29574477672576904, "rewards/margins": -0.01667262613773346, "rewards/rejected": -0.2790721356868744, "step": 1443 }, { "epoch": 3.9534565366187544, "grad_norm": 3.0725011825561523, "learning_rate": 8.023287671232876e-07, "log_odds_chosen": 0.7626704573631287, "log_odds_ratio": -0.501369833946228, "logits/chosen": 0.6741659641265869, "logits/rejected": 0.672117292881012, "logps/chosen": -2.3514516353607178, "logps/rejected": -3.074522018432617, "loss": 1.0266, "nll_loss": 0.9764916896820068, "rewards/accuracies": 0.875, "rewards/chosen": -0.23514515161514282, "rewards/margins": 0.0723070576786995, "rewards/rejected": -0.3074522316455841, "step": 1444 }, { "epoch": 3.9561943874058865, "grad_norm": 2.7498319149017334, "learning_rate": 8.021917808219177e-07, "log_odds_chosen": 1.7968690395355225, "log_odds_ratio": -0.3117227256298065, "logits/chosen": 0.7280870676040649, "logits/rejected": 0.6961776614189148, "logps/chosen": -2.233144760131836, "logps/rejected": -3.963308334350586, "loss": 0.9351, "nll_loss": 0.9039567708969116, "rewards/accuracies": 1.0, "rewards/chosen": -0.22331446409225464, "rewards/margins": 0.17301636934280396, "rewards/rejected": -0.3963308334350586, "step": 1445 }, { "epoch": 3.9589322381930185, "grad_norm": 3.764388084411621, "learning_rate": 8.02054794520548e-07, "log_odds_chosen": -0.22692622244358063, "log_odds_ratio": -1.171630620956421, "logits/chosen": 0.6463775634765625, "logits/rejected": 0.7275747060775757, "logps/chosen": -2.7323269844055176, "logps/rejected": -2.514054775238037, "loss": 0.9534, "nll_loss": 0.8362088203430176, "rewards/accuracies": 0.5, "rewards/chosen": -0.27323272824287415, "rewards/margins": -0.02182723768055439, "rewards/rejected": -0.2514054775238037, "step": 1446 }, { "epoch": 3.9616700889801506, "grad_norm": 5.285740852355957, "learning_rate": 8.01917808219178e-07, "log_odds_chosen": -0.20160368084907532, "log_odds_ratio": -0.9181361794471741, "logits/chosen": 0.682494580745697, "logits/rejected": 0.6896296739578247, "logps/chosen": -3.044691562652588, "logps/rejected": -2.7968509197235107, "loss": 0.8952, "nll_loss": 0.8034062385559082, "rewards/accuracies": 0.375, "rewards/chosen": -0.30446916818618774, "rewards/margins": -0.02478407323360443, "rewards/rejected": -0.2796851098537445, "step": 1447 }, { "epoch": 3.9644079397672827, "grad_norm": 2.9833531379699707, "learning_rate": 8.017808219178081e-07, "log_odds_chosen": 1.4717249870300293, "log_odds_ratio": -0.3131014108657837, "logits/chosen": 0.6965380907058716, "logits/rejected": 0.7039650678634644, "logps/chosen": -2.3212058544158936, "logps/rejected": -3.7072672843933105, "loss": 0.9622, "nll_loss": 0.9308491945266724, "rewards/accuracies": 0.875, "rewards/chosen": -0.2321206033229828, "rewards/margins": 0.13860613107681274, "rewards/rejected": -0.37072673439979553, "step": 1448 }, { "epoch": 3.9671457905544147, "grad_norm": 3.25376558303833, "learning_rate": 8.016438356164384e-07, "log_odds_chosen": 0.7731133103370667, "log_odds_ratio": -0.452695369720459, "logits/chosen": 0.6354248523712158, "logits/rejected": 0.6425421237945557, "logps/chosen": -2.4590463638305664, "logps/rejected": -3.19681978225708, "loss": 0.9804, "nll_loss": 0.9350989460945129, "rewards/accuracies": 0.875, "rewards/chosen": -0.24590462446212769, "rewards/margins": 0.07377734035253525, "rewards/rejected": -0.31968197226524353, "step": 1449 }, { "epoch": 3.969883641341547, "grad_norm": 4.4431562423706055, "learning_rate": 8.015068493150686e-07, "log_odds_chosen": 0.4926524758338928, "log_odds_ratio": -0.6136754751205444, "logits/chosen": 0.4848977327346802, "logits/rejected": 0.49365365505218506, "logps/chosen": -2.4229819774627686, "logps/rejected": -2.871734619140625, "loss": 0.9317, "nll_loss": 0.8703646063804626, "rewards/accuracies": 0.75, "rewards/chosen": -0.2422982156276703, "rewards/margins": 0.044875264167785645, "rewards/rejected": -0.28717347979545593, "step": 1450 }, { "epoch": 3.972621492128679, "grad_norm": 3.599586009979248, "learning_rate": 8.013698630136985e-07, "log_odds_chosen": 0.6915438175201416, "log_odds_ratio": -0.5456221699714661, "logits/chosen": 0.7622355222702026, "logits/rejected": 0.7355145215988159, "logps/chosen": -2.749497652053833, "logps/rejected": -3.3868918418884277, "loss": 1.0382, "nll_loss": 0.9836305379867554, "rewards/accuracies": 0.625, "rewards/chosen": -0.2749497592449188, "rewards/margins": 0.06373943388462067, "rewards/rejected": -0.3386892080307007, "step": 1451 }, { "epoch": 3.975359342915811, "grad_norm": 3.591353416442871, "learning_rate": 8.012328767123288e-07, "log_odds_chosen": 1.543163776397705, "log_odds_ratio": -0.41622835397720337, "logits/chosen": 0.8725830912590027, "logits/rejected": 0.7903748154640198, "logps/chosen": -2.675818681716919, "logps/rejected": -4.159696578979492, "loss": 0.8701, "nll_loss": 0.828481912612915, "rewards/accuracies": 0.875, "rewards/chosen": -0.26758188009262085, "rewards/margins": 0.14838775992393494, "rewards/rejected": -0.4159696400165558, "step": 1452 }, { "epoch": 3.9780971937029435, "grad_norm": 5.025571823120117, "learning_rate": 8.010958904109589e-07, "log_odds_chosen": -0.8840537071228027, "log_odds_ratio": -1.4364581108093262, "logits/chosen": 0.8473480939865112, "logits/rejected": 0.9324146509170532, "logps/chosen": -4.074522972106934, "logps/rejected": -3.200204372406006, "loss": 1.032, "nll_loss": 0.8883044123649597, "rewards/accuracies": 0.375, "rewards/chosen": -0.4074522852897644, "rewards/margins": -0.08743180334568024, "rewards/rejected": -0.320020467042923, "step": 1453 }, { "epoch": 3.9808350444900755, "grad_norm": 3.8867275714874268, "learning_rate": 8.00958904109589e-07, "log_odds_chosen": 0.8480393290519714, "log_odds_ratio": -0.4373481273651123, "logits/chosen": 0.7008827924728394, "logits/rejected": 0.7227182388305664, "logps/chosen": -2.819762945175171, "logps/rejected": -3.6014020442962646, "loss": 0.8974, "nll_loss": 0.8536980152130127, "rewards/accuracies": 0.875, "rewards/chosen": -0.2819763123989105, "rewards/margins": 0.07816389203071594, "rewards/rejected": -0.36014020442962646, "step": 1454 }, { "epoch": 3.9835728952772076, "grad_norm": 4.285060405731201, "learning_rate": 8.008219178082192e-07, "log_odds_chosen": 0.7765735387802124, "log_odds_ratio": -0.4202253520488739, "logits/chosen": 0.7363726496696472, "logits/rejected": 0.7715606093406677, "logps/chosen": -2.2823479175567627, "logps/rejected": -2.9933760166168213, "loss": 0.9627, "nll_loss": 0.9206857681274414, "rewards/accuracies": 0.875, "rewards/chosen": -0.22823478281497955, "rewards/margins": 0.07110282778739929, "rewards/rejected": -0.29933759570121765, "step": 1455 }, { "epoch": 3.9863107460643397, "grad_norm": 3.5453948974609375, "learning_rate": 8.006849315068493e-07, "log_odds_chosen": 1.0119765996932983, "log_odds_ratio": -0.5037404894828796, "logits/chosen": 0.8178818225860596, "logits/rejected": 0.8690330982208252, "logps/chosen": -2.5197863578796387, "logps/rejected": -3.4708001613616943, "loss": 0.9277, "nll_loss": 0.8772792220115662, "rewards/accuracies": 0.75, "rewards/chosen": -0.25197866559028625, "rewards/margins": 0.09510137885808945, "rewards/rejected": -0.3470800518989563, "step": 1456 }, { "epoch": 3.9890485968514717, "grad_norm": 3.2881054878234863, "learning_rate": 8.005479452054795e-07, "log_odds_chosen": 0.962549090385437, "log_odds_ratio": -0.5108094811439514, "logits/chosen": 0.5379760265350342, "logits/rejected": 0.5476595163345337, "logps/chosen": -2.545315980911255, "logps/rejected": -3.4676411151885986, "loss": 1.0416, "nll_loss": 0.9905602335929871, "rewards/accuracies": 0.625, "rewards/chosen": -0.2545316219329834, "rewards/margins": 0.09223254024982452, "rewards/rejected": -0.34676414728164673, "step": 1457 }, { "epoch": 3.991786447638604, "grad_norm": 4.095959663391113, "learning_rate": 8.004109589041096e-07, "log_odds_chosen": 1.0200644731521606, "log_odds_ratio": -0.4305698275566101, "logits/chosen": 0.7165117263793945, "logits/rejected": 0.7295722961425781, "logps/chosen": -2.204540729522705, "logps/rejected": -3.1890439987182617, "loss": 0.8986, "nll_loss": 0.8555465340614319, "rewards/accuracies": 0.875, "rewards/chosen": -0.22045406699180603, "rewards/margins": 0.09845034778118134, "rewards/rejected": -0.31890439987182617, "step": 1458 }, { "epoch": 3.994524298425736, "grad_norm": 2.839022159576416, "learning_rate": 8.002739726027397e-07, "log_odds_chosen": 0.9045186638832092, "log_odds_ratio": -0.3884297013282776, "logits/chosen": 0.8774826526641846, "logits/rejected": 0.8911441564559937, "logps/chosen": -2.489793300628662, "logps/rejected": -3.3441786766052246, "loss": 0.8664, "nll_loss": 0.8275862336158752, "rewards/accuracies": 1.0, "rewards/chosen": -0.24897931516170502, "rewards/margins": 0.08543851971626282, "rewards/rejected": -0.33441784977912903, "step": 1459 }, { "epoch": 3.997262149212868, "grad_norm": 4.575218677520752, "learning_rate": 8.001369863013699e-07, "log_odds_chosen": 0.48437416553497314, "log_odds_ratio": -0.5339792370796204, "logits/chosen": 0.7798891067504883, "logits/rejected": 0.7776315212249756, "logps/chosen": -2.1840062141418457, "logps/rejected": -2.6022024154663086, "loss": 0.9766, "nll_loss": 0.9232208132743835, "rewards/accuracies": 0.75, "rewards/chosen": -0.21840061247348785, "rewards/margins": 0.041819650679826736, "rewards/rejected": -0.2602202892303467, "step": 1460 }, { "epoch": 4.0, "grad_norm": 4.121119022369385, "learning_rate": 8e-07, "log_odds_chosen": 0.9974149465560913, "log_odds_ratio": -0.36119544506073, "logits/chosen": 0.8545277714729309, "logits/rejected": 0.8406997323036194, "logps/chosen": -2.3728556632995605, "logps/rejected": -3.2902886867523193, "loss": 0.9756, "nll_loss": 0.9394829273223877, "rewards/accuracies": 0.875, "rewards/chosen": -0.2372855544090271, "rewards/margins": 0.09174332767724991, "rewards/rejected": -0.3290289044380188, "step": 1461 }, { "epoch": 4.002737850787132, "grad_norm": 3.825782060623169, "learning_rate": 7.998630136986301e-07, "log_odds_chosen": 0.739504337310791, "log_odds_ratio": -0.653935432434082, "logits/chosen": 0.7938482761383057, "logits/rejected": 0.8784675598144531, "logps/chosen": -2.804856300354004, "logps/rejected": -3.488924026489258, "loss": 0.8684, "nll_loss": 0.803006112575531, "rewards/accuracies": 0.75, "rewards/chosen": -0.2804856300354004, "rewards/margins": 0.06840677559375763, "rewards/rejected": -0.3488923907279968, "step": 1462 }, { "epoch": 4.005475701574264, "grad_norm": 3.1189653873443604, "learning_rate": 7.997260273972603e-07, "log_odds_chosen": 1.6851292848587036, "log_odds_ratio": -0.22440674901008606, "logits/chosen": 0.9053229093551636, "logits/rejected": 0.9561588764190674, "logps/chosen": -2.3827872276306152, "logps/rejected": -3.971168279647827, "loss": 0.7708, "nll_loss": 0.7484070658683777, "rewards/accuracies": 1.0, "rewards/chosen": -0.23827871680259705, "rewards/margins": 0.15883812308311462, "rewards/rejected": -0.39711683988571167, "step": 1463 }, { "epoch": 4.008213552361396, "grad_norm": 4.360894680023193, "learning_rate": 7.995890410958905e-07, "log_odds_chosen": 1.0244691371917725, "log_odds_ratio": -0.4424365758895874, "logits/chosen": 0.9203972816467285, "logits/rejected": 0.9521256685256958, "logps/chosen": -2.473752975463867, "logps/rejected": -3.422816276550293, "loss": 0.789, "nll_loss": 0.7447559833526611, "rewards/accuracies": 0.875, "rewards/chosen": -0.24737530946731567, "rewards/margins": 0.09490631520748138, "rewards/rejected": -0.34228163957595825, "step": 1464 }, { "epoch": 4.010951403148528, "grad_norm": 4.152568817138672, "learning_rate": 7.994520547945205e-07, "log_odds_chosen": 0.9869076013565063, "log_odds_ratio": -0.45890527963638306, "logits/chosen": 0.8416750431060791, "logits/rejected": 0.8891620635986328, "logps/chosen": -2.6139581203460693, "logps/rejected": -3.5417094230651855, "loss": 0.8547, "nll_loss": 0.8087698221206665, "rewards/accuracies": 0.625, "rewards/chosen": -0.26139581203460693, "rewards/margins": 0.09277515858411789, "rewards/rejected": -0.3541709780693054, "step": 1465 }, { "epoch": 4.01368925393566, "grad_norm": 2.892974853515625, "learning_rate": 7.993150684931507e-07, "log_odds_chosen": 0.40117496252059937, "log_odds_ratio": -0.6106773614883423, "logits/chosen": 0.7655291557312012, "logits/rejected": 0.7455272674560547, "logps/chosen": -2.262115955352783, "logps/rejected": -2.645498752593994, "loss": 0.9768, "nll_loss": 0.9157750606536865, "rewards/accuracies": 0.625, "rewards/chosen": -0.22621160745620728, "rewards/margins": 0.038338277488946915, "rewards/rejected": -0.2645498514175415, "step": 1466 }, { "epoch": 4.016427104722792, "grad_norm": 3.153240203857422, "learning_rate": 7.991780821917808e-07, "log_odds_chosen": 0.5614305734634399, "log_odds_ratio": -0.5908326506614685, "logits/chosen": 0.7384104132652283, "logits/rejected": 0.697790265083313, "logps/chosen": -2.244227170944214, "logps/rejected": -2.7264137268066406, "loss": 0.937, "nll_loss": 0.8779290318489075, "rewards/accuracies": 0.75, "rewards/chosen": -0.22442270815372467, "rewards/margins": 0.048218682408332825, "rewards/rejected": -0.2726413905620575, "step": 1467 }, { "epoch": 4.0191649555099245, "grad_norm": 5.158880710601807, "learning_rate": 7.990410958904109e-07, "log_odds_chosen": 0.3772961497306824, "log_odds_ratio": -0.6145477890968323, "logits/chosen": 0.7655783891677856, "logits/rejected": 0.6579319834709167, "logps/chosen": -2.129930257797241, "logps/rejected": -2.4227166175842285, "loss": 1.0631, "nll_loss": 1.0016385316848755, "rewards/accuracies": 0.625, "rewards/chosen": -0.21299302577972412, "rewards/margins": 0.029278628528118134, "rewards/rejected": -0.24227166175842285, "step": 1468 }, { "epoch": 4.0219028062970565, "grad_norm": 4.163314342498779, "learning_rate": 7.989041095890411e-07, "log_odds_chosen": 0.21334150433540344, "log_odds_ratio": -0.7247161865234375, "logits/chosen": 0.6888706684112549, "logits/rejected": 0.670161247253418, "logps/chosen": -3.032853364944458, "logps/rejected": -3.2125983238220215, "loss": 0.8926, "nll_loss": 0.820122241973877, "rewards/accuracies": 0.75, "rewards/chosen": -0.3032853305339813, "rewards/margins": 0.017974531278014183, "rewards/rejected": -0.32125985622406006, "step": 1469 }, { "epoch": 4.024640657084189, "grad_norm": 3.4229977130889893, "learning_rate": 7.987671232876712e-07, "log_odds_chosen": 1.3992562294006348, "log_odds_ratio": -0.3800960183143616, "logits/chosen": 0.6901214718818665, "logits/rejected": 0.6295444965362549, "logps/chosen": -2.275573253631592, "logps/rejected": -3.5417518615722656, "loss": 0.948, "nll_loss": 0.9099848866462708, "rewards/accuracies": 0.875, "rewards/chosen": -0.22755730152130127, "rewards/margins": 0.12661787867546082, "rewards/rejected": -0.3541752099990845, "step": 1470 }, { "epoch": 4.027378507871321, "grad_norm": 4.9829206466674805, "learning_rate": 7.986301369863014e-07, "log_odds_chosen": 0.5618995428085327, "log_odds_ratio": -0.5254189372062683, "logits/chosen": 0.7811931371688843, "logits/rejected": 0.7339761853218079, "logps/chosen": -2.2384345531463623, "logps/rejected": -2.768000602722168, "loss": 1.0369, "nll_loss": 0.9843485951423645, "rewards/accuracies": 0.75, "rewards/chosen": -0.22384347021579742, "rewards/margins": 0.052956610918045044, "rewards/rejected": -0.2768000662326813, "step": 1471 }, { "epoch": 4.030116358658453, "grad_norm": 3.513641357421875, "learning_rate": 7.984931506849315e-07, "log_odds_chosen": 1.1240395307540894, "log_odds_ratio": -0.3934083878993988, "logits/chosen": 0.5960907340049744, "logits/rejected": 0.4877232313156128, "logps/chosen": -2.1708109378814697, "logps/rejected": -3.124752998352051, "loss": 0.9453, "nll_loss": 0.90596604347229, "rewards/accuracies": 0.875, "rewards/chosen": -0.21708109974861145, "rewards/margins": 0.09539420157670975, "rewards/rejected": -0.312475323677063, "step": 1472 }, { "epoch": 4.032854209445585, "grad_norm": 3.1683318614959717, "learning_rate": 7.983561643835616e-07, "log_odds_chosen": 0.9062322378158569, "log_odds_ratio": -0.41683536767959595, "logits/chosen": 0.7144200801849365, "logits/rejected": 0.7084946036338806, "logps/chosen": -2.5539135932922363, "logps/rejected": -3.411799669265747, "loss": 0.8955, "nll_loss": 0.853816032409668, "rewards/accuracies": 0.875, "rewards/chosen": -0.255391389131546, "rewards/margins": 0.08578857779502869, "rewards/rejected": -0.3411799669265747, "step": 1473 }, { "epoch": 4.035592060232717, "grad_norm": 4.293901443481445, "learning_rate": 7.982191780821918e-07, "log_odds_chosen": 0.9060757756233215, "log_odds_ratio": -0.4440506100654602, "logits/chosen": 0.7722381353378296, "logits/rejected": 0.7664749622344971, "logps/chosen": -2.262728691101074, "logps/rejected": -3.0805723667144775, "loss": 0.8888, "nll_loss": 0.844434916973114, "rewards/accuracies": 0.75, "rewards/chosen": -0.22627288103103638, "rewards/margins": 0.08178436756134033, "rewards/rejected": -0.3080572187900543, "step": 1474 }, { "epoch": 4.03832991101985, "grad_norm": 3.080756902694702, "learning_rate": 7.980821917808219e-07, "log_odds_chosen": 0.5393637418746948, "log_odds_ratio": -0.4908229112625122, "logits/chosen": 0.6106570959091187, "logits/rejected": 0.5957752466201782, "logps/chosen": -1.9573400020599365, "logps/rejected": -2.4096784591674805, "loss": 0.9546, "nll_loss": 0.9054983854293823, "rewards/accuracies": 0.875, "rewards/chosen": -0.19573399424552917, "rewards/margins": 0.04523385688662529, "rewards/rejected": -0.24096785485744476, "step": 1475 }, { "epoch": 4.041067761806982, "grad_norm": 3.5399692058563232, "learning_rate": 7.97945205479452e-07, "log_odds_chosen": 0.6170349717140198, "log_odds_ratio": -0.5298970341682434, "logits/chosen": 0.5689641237258911, "logits/rejected": 0.5805208683013916, "logps/chosen": -2.1511924266815186, "logps/rejected": -2.734321117401123, "loss": 0.9457, "nll_loss": 0.8927025198936462, "rewards/accuracies": 0.75, "rewards/chosen": -0.21511924266815186, "rewards/margins": 0.05831286311149597, "rewards/rejected": -0.2734321355819702, "step": 1476 }, { "epoch": 4.043805612594114, "grad_norm": 3.690732717514038, "learning_rate": 7.978082191780822e-07, "log_odds_chosen": 0.6432784795761108, "log_odds_ratio": -0.4931795597076416, "logits/chosen": 0.8273173570632935, "logits/rejected": 0.8356133699417114, "logps/chosen": -2.540621280670166, "logps/rejected": -3.136164426803589, "loss": 0.939, "nll_loss": 0.8896390199661255, "rewards/accuracies": 0.625, "rewards/chosen": -0.25406214594841003, "rewards/margins": 0.059554312378168106, "rewards/rejected": -0.31361645460128784, "step": 1477 }, { "epoch": 4.046543463381246, "grad_norm": 4.121282577514648, "learning_rate": 7.976712328767124e-07, "log_odds_chosen": 0.6751211285591125, "log_odds_ratio": -0.6615248918533325, "logits/chosen": 0.6077248454093933, "logits/rejected": 0.7102689743041992, "logps/chosen": -2.888101100921631, "logps/rejected": -3.5426034927368164, "loss": 1.0958, "nll_loss": 1.029691219329834, "rewards/accuracies": 0.625, "rewards/chosen": -0.288810133934021, "rewards/margins": 0.06545022130012512, "rewards/rejected": -0.35426032543182373, "step": 1478 }, { "epoch": 4.049281314168378, "grad_norm": 3.3140082359313965, "learning_rate": 7.975342465753424e-07, "log_odds_chosen": 1.6431176662445068, "log_odds_ratio": -0.2914375066757202, "logits/chosen": 0.8032982349395752, "logits/rejected": 0.7542567849159241, "logps/chosen": -2.153536319732666, "logps/rejected": -3.6942005157470703, "loss": 0.8914, "nll_loss": 0.86222904920578, "rewards/accuracies": 0.875, "rewards/chosen": -0.21535363793373108, "rewards/margins": 0.15406641364097595, "rewards/rejected": -0.3694200813770294, "step": 1479 }, { "epoch": 4.05201916495551, "grad_norm": 5.113152980804443, "learning_rate": 7.973972602739726e-07, "log_odds_chosen": 1.0497496128082275, "log_odds_ratio": -0.39101752638816833, "logits/chosen": 0.7495927810668945, "logits/rejected": 0.7698240876197815, "logps/chosen": -2.366424083709717, "logps/rejected": -3.3090834617614746, "loss": 0.8894, "nll_loss": 0.8503377437591553, "rewards/accuracies": 0.875, "rewards/chosen": -0.23664240539073944, "rewards/margins": 0.09426593035459518, "rewards/rejected": -0.3309083580970764, "step": 1480 }, { "epoch": 4.054757015742642, "grad_norm": 3.678692579269409, "learning_rate": 7.972602739726027e-07, "log_odds_chosen": 0.7379775643348694, "log_odds_ratio": -0.48690786957740784, "logits/chosen": 0.5410807728767395, "logits/rejected": 0.4748448133468628, "logps/chosen": -2.147329092025757, "logps/rejected": -2.831348180770874, "loss": 1.0041, "nll_loss": 0.95542311668396, "rewards/accuracies": 0.75, "rewards/chosen": -0.21473291516304016, "rewards/margins": 0.06840189546346664, "rewards/rejected": -0.2831348180770874, "step": 1481 }, { "epoch": 4.057494866529774, "grad_norm": 6.068539619445801, "learning_rate": 7.971232876712328e-07, "log_odds_chosen": 0.6765276193618774, "log_odds_ratio": -0.6706891655921936, "logits/chosen": 0.8772631883621216, "logits/rejected": 0.9750088453292847, "logps/chosen": -2.7641923427581787, "logps/rejected": -3.372734546661377, "loss": 0.9538, "nll_loss": 0.8867717385292053, "rewards/accuracies": 0.625, "rewards/chosen": -0.2764192223548889, "rewards/margins": 0.060854244977235794, "rewards/rejected": -0.3372734785079956, "step": 1482 }, { "epoch": 4.060232717316906, "grad_norm": 3.609659194946289, "learning_rate": 7.96986301369863e-07, "log_odds_chosen": 0.28117984533309937, "log_odds_ratio": -0.6128912568092346, "logits/chosen": 0.5067835450172424, "logits/rejected": 0.5334859490394592, "logps/chosen": -2.488706588745117, "logps/rejected": -2.7391951084136963, "loss": 0.9615, "nll_loss": 0.9002298712730408, "rewards/accuracies": 0.625, "rewards/chosen": -0.24887068569660187, "rewards/margins": 0.02504885010421276, "rewards/rejected": -0.2739195227622986, "step": 1483 }, { "epoch": 4.0629705681040384, "grad_norm": 4.313037872314453, "learning_rate": 7.968493150684931e-07, "log_odds_chosen": 0.4821073114871979, "log_odds_ratio": -0.5057063102722168, "logits/chosen": 0.9153050184249878, "logits/rejected": 0.847930371761322, "logps/chosen": -2.243609666824341, "logps/rejected": -2.6670572757720947, "loss": 0.9113, "nll_loss": 0.8607372045516968, "rewards/accuracies": 0.875, "rewards/chosen": -0.22436095774173737, "rewards/margins": 0.042344775050878525, "rewards/rejected": -0.266705721616745, "step": 1484 }, { "epoch": 4.0657084188911705, "grad_norm": 4.235687255859375, "learning_rate": 7.967123287671232e-07, "log_odds_chosen": 0.7623522877693176, "log_odds_ratio": -0.5055643320083618, "logits/chosen": 0.9466936588287354, "logits/rejected": 0.9276096224784851, "logps/chosen": -2.5636651515960693, "logps/rejected": -3.281890869140625, "loss": 0.8698, "nll_loss": 0.8192349672317505, "rewards/accuracies": 0.625, "rewards/chosen": -0.2563665211200714, "rewards/margins": 0.07182256877422333, "rewards/rejected": -0.32818910479545593, "step": 1485 }, { "epoch": 4.068446269678303, "grad_norm": 3.3261044025421143, "learning_rate": 7.965753424657534e-07, "log_odds_chosen": 1.1566038131713867, "log_odds_ratio": -0.3428274691104889, "logits/chosen": 0.47403398156166077, "logits/rejected": 0.3947862386703491, "logps/chosen": -2.3815836906433105, "logps/rejected": -3.4824416637420654, "loss": 1.008, "nll_loss": 0.9736683368682861, "rewards/accuracies": 0.875, "rewards/chosen": -0.23815837502479553, "rewards/margins": 0.11008580029010773, "rewards/rejected": -0.34824419021606445, "step": 1486 }, { "epoch": 4.071184120465435, "grad_norm": 4.403548240661621, "learning_rate": 7.964383561643835e-07, "log_odds_chosen": 1.0206079483032227, "log_odds_ratio": -0.4613681435585022, "logits/chosen": 0.6244887709617615, "logits/rejected": 0.6712954640388489, "logps/chosen": -2.654297351837158, "logps/rejected": -3.618478536605835, "loss": 0.943, "nll_loss": 0.8968486189842224, "rewards/accuracies": 0.625, "rewards/chosen": -0.2654297351837158, "rewards/margins": 0.09641812741756439, "rewards/rejected": -0.3618478775024414, "step": 1487 }, { "epoch": 4.073921971252567, "grad_norm": 3.1108522415161133, "learning_rate": 7.963013698630137e-07, "log_odds_chosen": 1.0530035495758057, "log_odds_ratio": -0.471805602312088, "logits/chosen": 0.5474306344985962, "logits/rejected": 0.5355609655380249, "logps/chosen": -2.833076000213623, "logps/rejected": -3.8380208015441895, "loss": 0.9517, "nll_loss": 0.9044952392578125, "rewards/accuracies": 0.75, "rewards/chosen": -0.28330761194229126, "rewards/margins": 0.10049448907375336, "rewards/rejected": -0.38380205631256104, "step": 1488 }, { "epoch": 4.076659822039699, "grad_norm": 4.1274189949035645, "learning_rate": 7.961643835616438e-07, "log_odds_chosen": 0.5232993960380554, "log_odds_ratio": -0.6028059720993042, "logits/chosen": 0.7347623705863953, "logits/rejected": 0.764922022819519, "logps/chosen": -2.3541994094848633, "logps/rejected": -2.77608585357666, "loss": 0.9651, "nll_loss": 0.904858410358429, "rewards/accuracies": 0.625, "rewards/chosen": -0.23541994392871857, "rewards/margins": 0.042188651859760284, "rewards/rejected": -0.27760857343673706, "step": 1489 }, { "epoch": 4.079397672826831, "grad_norm": 3.2126212120056152, "learning_rate": 7.960273972602739e-07, "log_odds_chosen": 0.6996967792510986, "log_odds_ratio": -0.4519183337688446, "logits/chosen": 0.6554805040359497, "logits/rejected": 0.6489414572715759, "logps/chosen": -2.0252203941345215, "logps/rejected": -2.6640892028808594, "loss": 0.8988, "nll_loss": 0.8536463379859924, "rewards/accuracies": 0.875, "rewards/chosen": -0.20252203941345215, "rewards/margins": 0.06388688832521439, "rewards/rejected": -0.26640892028808594, "step": 1490 }, { "epoch": 4.082135523613963, "grad_norm": 4.597692012786865, "learning_rate": 7.958904109589041e-07, "log_odds_chosen": 1.1911826133728027, "log_odds_ratio": -0.35815301537513733, "logits/chosen": 0.7664431929588318, "logits/rejected": 0.7576038241386414, "logps/chosen": -2.6082923412323, "logps/rejected": -3.7265915870666504, "loss": 0.8268, "nll_loss": 0.7909439206123352, "rewards/accuracies": 0.875, "rewards/chosen": -0.26082926988601685, "rewards/margins": 0.11182989180088043, "rewards/rejected": -0.3726591467857361, "step": 1491 }, { "epoch": 4.084873374401095, "grad_norm": 6.163938045501709, "learning_rate": 7.957534246575343e-07, "log_odds_chosen": 0.4637121260166168, "log_odds_ratio": -0.6440063714981079, "logits/chosen": 0.8251703977584839, "logits/rejected": 0.8055500984191895, "logps/chosen": -2.6456470489501953, "logps/rejected": -3.1473793983459473, "loss": 0.9958, "nll_loss": 0.9313946962356567, "rewards/accuracies": 0.625, "rewards/chosen": -0.2645646929740906, "rewards/margins": 0.05017325282096863, "rewards/rejected": -0.3147379457950592, "step": 1492 }, { "epoch": 4.087611225188227, "grad_norm": 2.6730916500091553, "learning_rate": 7.956164383561643e-07, "log_odds_chosen": 1.1804561614990234, "log_odds_ratio": -0.348706990480423, "logits/chosen": 0.8252025842666626, "logits/rejected": 0.81894451379776, "logps/chosen": -2.3227176666259766, "logps/rejected": -3.4167752265930176, "loss": 0.9077, "nll_loss": 0.8727809190750122, "rewards/accuracies": 0.875, "rewards/chosen": -0.23227177560329437, "rewards/margins": 0.1094057559967041, "rewards/rejected": -0.3416775166988373, "step": 1493 }, { "epoch": 4.090349075975359, "grad_norm": 3.6757800579071045, "learning_rate": 7.954794520547945e-07, "log_odds_chosen": 1.085442304611206, "log_odds_ratio": -0.49112290143966675, "logits/chosen": 0.4554579555988312, "logits/rejected": 0.3650892674922943, "logps/chosen": -2.1326913833618164, "logps/rejected": -3.1106717586517334, "loss": 1.027, "nll_loss": 0.9779289960861206, "rewards/accuracies": 0.75, "rewards/chosen": -0.2132691591978073, "rewards/margins": 0.09779803454875946, "rewards/rejected": -0.3110671937465668, "step": 1494 }, { "epoch": 4.093086926762491, "grad_norm": 3.934062957763672, "learning_rate": 7.953424657534247e-07, "log_odds_chosen": 0.40066394209861755, "log_odds_ratio": -0.6235830783843994, "logits/chosen": 0.5169851779937744, "logits/rejected": 0.5293499231338501, "logps/chosen": -2.3211584091186523, "logps/rejected": -2.6774415969848633, "loss": 0.9949, "nll_loss": 0.932503342628479, "rewards/accuracies": 0.875, "rewards/chosen": -0.23211583495140076, "rewards/margins": 0.03562832623720169, "rewards/rejected": -0.26774418354034424, "step": 1495 }, { "epoch": 4.095824777549623, "grad_norm": 8.072938919067383, "learning_rate": 7.952054794520547e-07, "log_odds_chosen": 1.36052668094635, "log_odds_ratio": -0.3579981327056885, "logits/chosen": 0.7009947896003723, "logits/rejected": 0.7091648578643799, "logps/chosen": -2.2176167964935303, "logps/rejected": -3.427673816680908, "loss": 0.9065, "nll_loss": 0.8706897497177124, "rewards/accuracies": 0.875, "rewards/chosen": -0.22176168859004974, "rewards/margins": 0.12100570648908615, "rewards/rejected": -0.3427673876285553, "step": 1496 }, { "epoch": 4.098562628336755, "grad_norm": 3.3979790210723877, "learning_rate": 7.950684931506849e-07, "log_odds_chosen": 1.1316059827804565, "log_odds_ratio": -0.36398744583129883, "logits/chosen": 0.7464513182640076, "logits/rejected": 0.7401725649833679, "logps/chosen": -1.9503017663955688, "logps/rejected": -2.976322650909424, "loss": 0.863, "nll_loss": 0.8266046047210693, "rewards/accuracies": 0.875, "rewards/chosen": -0.19503018260002136, "rewards/margins": 0.10260208696126938, "rewards/rejected": -0.29763227701187134, "step": 1497 }, { "epoch": 4.101300479123887, "grad_norm": 4.170137882232666, "learning_rate": 7.94931506849315e-07, "log_odds_chosen": 1.0971622467041016, "log_odds_ratio": -0.35405778884887695, "logits/chosen": 0.8025248646736145, "logits/rejected": 0.805302083492279, "logps/chosen": -2.33286714553833, "logps/rejected": -3.3091914653778076, "loss": 0.943, "nll_loss": 0.9075642228126526, "rewards/accuracies": 0.875, "rewards/chosen": -0.23328670859336853, "rewards/margins": 0.09763243794441223, "rewards/rejected": -0.33091914653778076, "step": 1498 }, { "epoch": 4.1040383299110195, "grad_norm": 4.1038498878479, "learning_rate": 7.947945205479451e-07, "log_odds_chosen": 0.2772607207298279, "log_odds_ratio": -0.7268432378768921, "logits/chosen": 0.7659550905227661, "logits/rejected": 0.7872487306594849, "logps/chosen": -2.4379100799560547, "logps/rejected": -2.672456741333008, "loss": 0.976, "nll_loss": 0.9033474922180176, "rewards/accuracies": 0.75, "rewards/chosen": -0.24379102885723114, "rewards/margins": 0.023454658687114716, "rewards/rejected": -0.26724568009376526, "step": 1499 }, { "epoch": 4.1067761806981515, "grad_norm": 3.097618579864502, "learning_rate": 7.946575342465753e-07, "log_odds_chosen": 0.8591875433921814, "log_odds_ratio": -0.43714267015457153, "logits/chosen": 0.673882782459259, "logits/rejected": 0.6369699835777283, "logps/chosen": -2.177596092224121, "logps/rejected": -2.9804880619049072, "loss": 1.031, "nll_loss": 0.9873263835906982, "rewards/accuracies": 0.75, "rewards/chosen": -0.2177596390247345, "rewards/margins": 0.08028918504714966, "rewards/rejected": -0.29804879426956177, "step": 1500 }, { "epoch": 4.1095140314852845, "grad_norm": 3.0913455486297607, "learning_rate": 7.945205479452054e-07, "log_odds_chosen": 0.45543360710144043, "log_odds_ratio": -0.5156201720237732, "logits/chosen": 0.5588839054107666, "logits/rejected": 0.5054178237915039, "logps/chosen": -2.2296524047851562, "logps/rejected": -2.6336920261383057, "loss": 1.0891, "nll_loss": 1.0375373363494873, "rewards/accuracies": 0.875, "rewards/chosen": -0.22296524047851562, "rewards/margins": 0.040403954684734344, "rewards/rejected": -0.26336920261383057, "step": 1501 }, { "epoch": 4.112251882272417, "grad_norm": 6.9249372482299805, "learning_rate": 7.943835616438356e-07, "log_odds_chosen": 0.7954980134963989, "log_odds_ratio": -0.462430477142334, "logits/chosen": 0.659619927406311, "logits/rejected": 0.6791195273399353, "logps/chosen": -2.6981594562530518, "logps/rejected": -3.45052433013916, "loss": 0.9798, "nll_loss": 0.9335082173347473, "rewards/accuracies": 0.875, "rewards/chosen": -0.26981595158576965, "rewards/margins": 0.07523651421070099, "rewards/rejected": -0.34505245089530945, "step": 1502 }, { "epoch": 4.114989733059549, "grad_norm": 3.2511987686157227, "learning_rate": 7.942465753424657e-07, "log_odds_chosen": 1.2754807472229004, "log_odds_ratio": -0.5648447275161743, "logits/chosen": 0.7576271295547485, "logits/rejected": 0.6519725322723389, "logps/chosen": -2.3232672214508057, "logps/rejected": -3.5593669414520264, "loss": 0.9135, "nll_loss": 0.8570553064346313, "rewards/accuracies": 0.625, "rewards/chosen": -0.2323267161846161, "rewards/margins": 0.1236099824309349, "rewards/rejected": -0.3559367060661316, "step": 1503 }, { "epoch": 4.117727583846681, "grad_norm": 3.3603861331939697, "learning_rate": 7.941095890410958e-07, "log_odds_chosen": 1.0419268608093262, "log_odds_ratio": -0.34677940607070923, "logits/chosen": 0.42454150319099426, "logits/rejected": 0.2765578627586365, "logps/chosen": -1.8915252685546875, "logps/rejected": -2.7912468910217285, "loss": 0.9992, "nll_loss": 0.9644755125045776, "rewards/accuracies": 1.0, "rewards/chosen": -0.1891525387763977, "rewards/margins": 0.08997215330600739, "rewards/rejected": -0.2791246771812439, "step": 1504 }, { "epoch": 4.120465434633813, "grad_norm": 5.316195964813232, "learning_rate": 7.93972602739726e-07, "log_odds_chosen": -0.019968658685684204, "log_odds_ratio": -0.8268449306488037, "logits/chosen": 0.7651610374450684, "logits/rejected": 0.7946181893348694, "logps/chosen": -3.4791464805603027, "logps/rejected": -3.444498062133789, "loss": 1.1347, "nll_loss": 1.0520110130310059, "rewards/accuracies": 0.375, "rewards/chosen": -0.3479146659374237, "rewards/margins": -0.0034648478031158447, "rewards/rejected": -0.34444981813430786, "step": 1505 }, { "epoch": 4.123203285420945, "grad_norm": 4.511693477630615, "learning_rate": 7.938356164383561e-07, "log_odds_chosen": 0.2590194642543793, "log_odds_ratio": -0.724452018737793, "logits/chosen": 0.7374666333198547, "logits/rejected": 0.7561339735984802, "logps/chosen": -3.031367301940918, "logps/rejected": -3.2510995864868164, "loss": 0.9222, "nll_loss": 0.8498034477233887, "rewards/accuracies": 0.625, "rewards/chosen": -0.3031367063522339, "rewards/margins": 0.021973248571157455, "rewards/rejected": -0.32510998845100403, "step": 1506 }, { "epoch": 4.125941136208077, "grad_norm": 4.37054967880249, "learning_rate": 7.936986301369862e-07, "log_odds_chosen": 0.7045361399650574, "log_odds_ratio": -0.5198538899421692, "logits/chosen": 0.8016207218170166, "logits/rejected": 0.8781337738037109, "logps/chosen": -2.8477015495300293, "logps/rejected": -3.4941091537475586, "loss": 0.8735, "nll_loss": 0.821506142616272, "rewards/accuracies": 0.875, "rewards/chosen": -0.2847701609134674, "rewards/margins": 0.06464077532291412, "rewards/rejected": -0.3494109511375427, "step": 1507 }, { "epoch": 4.128678986995209, "grad_norm": 3.4747297763824463, "learning_rate": 7.935616438356164e-07, "log_odds_chosen": 0.6213258504867554, "log_odds_ratio": -0.6433111429214478, "logits/chosen": 0.5277794599533081, "logits/rejected": 0.5349166393280029, "logps/chosen": -2.570560932159424, "logps/rejected": -3.149014472961426, "loss": 0.9811, "nll_loss": 0.916724443435669, "rewards/accuracies": 0.625, "rewards/chosen": -0.2570560872554779, "rewards/margins": 0.0578453429043293, "rewards/rejected": -0.3149014115333557, "step": 1508 }, { "epoch": 4.131416837782341, "grad_norm": 3.6536309719085693, "learning_rate": 7.934246575342466e-07, "log_odds_chosen": 0.8864995837211609, "log_odds_ratio": -0.39724111557006836, "logits/chosen": 0.7800241112709045, "logits/rejected": 0.7322590947151184, "logps/chosen": -2.4131457805633545, "logps/rejected": -3.206617593765259, "loss": 0.9292, "nll_loss": 0.8895087242126465, "rewards/accuracies": 0.875, "rewards/chosen": -0.2413145899772644, "rewards/margins": 0.07934718579053879, "rewards/rejected": -0.3206617832183838, "step": 1509 }, { "epoch": 4.134154688569473, "grad_norm": 5.436214447021484, "learning_rate": 7.932876712328766e-07, "log_odds_chosen": -0.009404659271240234, "log_odds_ratio": -0.9352257251739502, "logits/chosen": 0.7386740446090698, "logits/rejected": 0.6765735149383545, "logps/chosen": -2.6236720085144043, "logps/rejected": -2.5998363494873047, "loss": 1.0205, "nll_loss": 0.9270272254943848, "rewards/accuracies": 0.5, "rewards/chosen": -0.2623671889305115, "rewards/margins": -0.0023835450410842896, "rewards/rejected": -0.259983628988266, "step": 1510 }, { "epoch": 4.136892539356605, "grad_norm": 3.603590488433838, "learning_rate": 7.931506849315068e-07, "log_odds_chosen": 0.4658225178718567, "log_odds_ratio": -0.6184900403022766, "logits/chosen": 0.7456148266792297, "logits/rejected": 0.760746955871582, "logps/chosen": -2.302142381668091, "logps/rejected": -2.7257585525512695, "loss": 0.9647, "nll_loss": 0.9028594493865967, "rewards/accuracies": 0.625, "rewards/chosen": -0.23021423816680908, "rewards/margins": 0.04236162453889847, "rewards/rejected": -0.27257585525512695, "step": 1511 }, { "epoch": 4.139630390143737, "grad_norm": 3.9869444370269775, "learning_rate": 7.930136986301369e-07, "log_odds_chosen": 0.5282222032546997, "log_odds_ratio": -0.5124365091323853, "logits/chosen": 0.6144015192985535, "logits/rejected": 0.6287015676498413, "logps/chosen": -2.295121908187866, "logps/rejected": -2.7760956287384033, "loss": 0.9556, "nll_loss": 0.9043577909469604, "rewards/accuracies": 0.875, "rewards/chosen": -0.22951218485832214, "rewards/margins": 0.0480973981320858, "rewards/rejected": -0.27760958671569824, "step": 1512 }, { "epoch": 4.142368240930869, "grad_norm": 3.6079533100128174, "learning_rate": 7.92876712328767e-07, "log_odds_chosen": 0.4406159818172455, "log_odds_ratio": -0.5961212515830994, "logits/chosen": 0.7392797470092773, "logits/rejected": 0.7051593065261841, "logps/chosen": -3.092137336730957, "logps/rejected": -3.5320096015930176, "loss": 1.1138, "nll_loss": 1.0541563034057617, "rewards/accuracies": 0.625, "rewards/chosen": -0.3092137575149536, "rewards/margins": 0.043987251818180084, "rewards/rejected": -0.3532009720802307, "step": 1513 }, { "epoch": 4.145106091718001, "grad_norm": 5.659738540649414, "learning_rate": 7.927397260273972e-07, "log_odds_chosen": 0.43235692381858826, "log_odds_ratio": -0.6661815047264099, "logits/chosen": 0.8588250279426575, "logits/rejected": 0.9205089211463928, "logps/chosen": -2.972271680831909, "logps/rejected": -3.395632743835449, "loss": 0.9408, "nll_loss": 0.8741400241851807, "rewards/accuracies": 0.75, "rewards/chosen": -0.2972271740436554, "rewards/margins": 0.04233609884977341, "rewards/rejected": -0.3395632803440094, "step": 1514 }, { "epoch": 4.147843942505133, "grad_norm": 4.343108654022217, "learning_rate": 7.926027397260273e-07, "log_odds_chosen": 0.1528429090976715, "log_odds_ratio": -0.8345335721969604, "logits/chosen": 0.5219550728797913, "logits/rejected": 0.4720573425292969, "logps/chosen": -3.3230693340301514, "logps/rejected": -3.4152770042419434, "loss": 0.954, "nll_loss": 0.8705735802650452, "rewards/accuracies": 0.625, "rewards/chosen": -0.33230695128440857, "rewards/margins": 0.0092207882553339, "rewards/rejected": -0.34152770042419434, "step": 1515 }, { "epoch": 4.1505817932922655, "grad_norm": 3.956660270690918, "learning_rate": 7.924657534246575e-07, "log_odds_chosen": 0.5123228430747986, "log_odds_ratio": -0.48341313004493713, "logits/chosen": 0.8291383981704712, "logits/rejected": 0.694145143032074, "logps/chosen": -2.016176223754883, "logps/rejected": -2.47240948677063, "loss": 0.9919, "nll_loss": 0.9435163736343384, "rewards/accuracies": 1.0, "rewards/chosen": -0.20161762833595276, "rewards/margins": 0.04562334343791008, "rewards/rejected": -0.24724096059799194, "step": 1516 }, { "epoch": 4.153319644079398, "grad_norm": 4.354767799377441, "learning_rate": 7.923287671232876e-07, "log_odds_chosen": -0.10136042535305023, "log_odds_ratio": -0.9573251605033875, "logits/chosen": 0.712739109992981, "logits/rejected": 0.6963028311729431, "logps/chosen": -2.6973891258239746, "logps/rejected": -2.593266010284424, "loss": 0.9874, "nll_loss": 0.8916440606117249, "rewards/accuracies": 0.5, "rewards/chosen": -0.26973891258239746, "rewards/margins": -0.010412333533167839, "rewards/rejected": -0.2593265771865845, "step": 1517 }, { "epoch": 4.15605749486653, "grad_norm": 4.204684257507324, "learning_rate": 7.921917808219177e-07, "log_odds_chosen": 0.2418523132801056, "log_odds_ratio": -0.7382171154022217, "logits/chosen": 0.7146784067153931, "logits/rejected": 0.8341843485832214, "logps/chosen": -3.0641088485717773, "logps/rejected": -3.317166805267334, "loss": 0.9335, "nll_loss": 0.8597205877304077, "rewards/accuracies": 0.625, "rewards/chosen": -0.30641087889671326, "rewards/margins": 0.025305796414613724, "rewards/rejected": -0.3317166864871979, "step": 1518 }, { "epoch": 4.158795345653662, "grad_norm": 3.593360424041748, "learning_rate": 7.920547945205479e-07, "log_odds_chosen": 0.655232310295105, "log_odds_ratio": -0.44496050477027893, "logits/chosen": 0.7238872051239014, "logits/rejected": 0.7006445527076721, "logps/chosen": -2.0242950916290283, "logps/rejected": -2.6091301441192627, "loss": 0.8925, "nll_loss": 0.8480271100997925, "rewards/accuracies": 1.0, "rewards/chosen": -0.20242951810359955, "rewards/margins": 0.05848350003361702, "rewards/rejected": -0.26091301441192627, "step": 1519 }, { "epoch": 4.161533196440794, "grad_norm": 2.9282336235046387, "learning_rate": 7.91917808219178e-07, "log_odds_chosen": 1.2981312274932861, "log_odds_ratio": -0.2806971073150635, "logits/chosen": 0.8929909467697144, "logits/rejected": 0.9569032192230225, "logps/chosen": -1.977181315422058, "logps/rejected": -3.1529555320739746, "loss": 0.8421, "nll_loss": 0.814038872718811, "rewards/accuracies": 1.0, "rewards/chosen": -0.19771814346313477, "rewards/margins": 0.1175774410367012, "rewards/rejected": -0.315295547246933, "step": 1520 }, { "epoch": 4.164271047227926, "grad_norm": 4.943557262420654, "learning_rate": 7.917808219178081e-07, "log_odds_chosen": 0.6352320313453674, "log_odds_ratio": -0.4774228632450104, "logits/chosen": 0.6761741638183594, "logits/rejected": 0.7211760878562927, "logps/chosen": -2.331096649169922, "logps/rejected": -2.9105911254882812, "loss": 0.8948, "nll_loss": 0.8470690846443176, "rewards/accuracies": 0.875, "rewards/chosen": -0.23310966789722443, "rewards/margins": 0.05794944614171982, "rewards/rejected": -0.29105910658836365, "step": 1521 }, { "epoch": 4.167008898015058, "grad_norm": 3.324253559112549, "learning_rate": 7.916438356164383e-07, "log_odds_chosen": 0.29025089740753174, "log_odds_ratio": -0.5639134049415588, "logits/chosen": 0.6898479461669922, "logits/rejected": 0.7555029392242432, "logps/chosen": -2.4325804710388184, "logps/rejected": -2.682365894317627, "loss": 0.8741, "nll_loss": 0.8177571892738342, "rewards/accuracies": 0.875, "rewards/chosen": -0.2432580590248108, "rewards/margins": 0.02497853711247444, "rewards/rejected": -0.26823657751083374, "step": 1522 }, { "epoch": 4.16974674880219, "grad_norm": 3.245455741882324, "learning_rate": 7.915068493150685e-07, "log_odds_chosen": 0.2760591506958008, "log_odds_ratio": -0.6033843755722046, "logits/chosen": 0.747305154800415, "logits/rejected": 0.7889195680618286, "logps/chosen": -2.4411094188690186, "logps/rejected": -2.693669557571411, "loss": 0.9626, "nll_loss": 0.9022700190544128, "rewards/accuracies": 0.75, "rewards/chosen": -0.24411094188690186, "rewards/margins": 0.02525600790977478, "rewards/rejected": -0.269366979598999, "step": 1523 }, { "epoch": 4.172484599589322, "grad_norm": 3.93412184715271, "learning_rate": 7.913698630136985e-07, "log_odds_chosen": 0.8174442052841187, "log_odds_ratio": -0.41705322265625, "logits/chosen": 0.6173219680786133, "logits/rejected": 0.6711563467979431, "logps/chosen": -2.261270523071289, "logps/rejected": -3.011478900909424, "loss": 0.8859, "nll_loss": 0.8441941142082214, "rewards/accuracies": 0.875, "rewards/chosen": -0.22612708806991577, "rewards/margins": 0.07502081990242004, "rewards/rejected": -0.3011479079723358, "step": 1524 }, { "epoch": 4.175222450376454, "grad_norm": 2.9755680561065674, "learning_rate": 7.912328767123287e-07, "log_odds_chosen": 0.8628939390182495, "log_odds_ratio": -0.4753216505050659, "logits/chosen": 0.8411963582038879, "logits/rejected": 0.839428722858429, "logps/chosen": -2.4424221515655518, "logps/rejected": -3.2913923263549805, "loss": 0.9902, "nll_loss": 0.942643404006958, "rewards/accuracies": 0.75, "rewards/chosen": -0.24424222111701965, "rewards/margins": 0.0848969966173172, "rewards/rejected": -0.32913923263549805, "step": 1525 }, { "epoch": 4.177960301163586, "grad_norm": 4.114634990692139, "learning_rate": 7.91095890410959e-07, "log_odds_chosen": 0.8981218338012695, "log_odds_ratio": -0.4080505967140198, "logits/chosen": 0.5632628202438354, "logits/rejected": 0.5476459264755249, "logps/chosen": -2.076315402984619, "logps/rejected": -2.88191556930542, "loss": 0.9019, "nll_loss": 0.8610855340957642, "rewards/accuracies": 0.875, "rewards/chosen": -0.20763155817985535, "rewards/margins": 0.08056002855300903, "rewards/rejected": -0.288191556930542, "step": 1526 }, { "epoch": 4.180698151950718, "grad_norm": 3.5013997554779053, "learning_rate": 7.909589041095889e-07, "log_odds_chosen": 0.16522055864334106, "log_odds_ratio": -0.7647370100021362, "logits/chosen": 0.5097296237945557, "logits/rejected": 0.5720421671867371, "logps/chosen": -2.6468300819396973, "logps/rejected": -2.800875663757324, "loss": 0.9515, "nll_loss": 0.8750362396240234, "rewards/accuracies": 0.625, "rewards/chosen": -0.2646830081939697, "rewards/margins": 0.015404573641717434, "rewards/rejected": -0.28008759021759033, "step": 1527 }, { "epoch": 4.183436002737851, "grad_norm": 4.67475700378418, "learning_rate": 7.908219178082191e-07, "log_odds_chosen": 1.0206745862960815, "log_odds_ratio": -0.5917382836341858, "logits/chosen": 0.7469934225082397, "logits/rejected": 0.7049381136894226, "logps/chosen": -2.957792043685913, "logps/rejected": -3.8955166339874268, "loss": 0.8811, "nll_loss": 0.8219351768493652, "rewards/accuracies": 0.75, "rewards/chosen": -0.2957792282104492, "rewards/margins": 0.09377245604991913, "rewards/rejected": -0.38955166935920715, "step": 1528 }, { "epoch": 4.186173853524983, "grad_norm": 4.059802532196045, "learning_rate": 7.906849315068492e-07, "log_odds_chosen": 0.13614334166049957, "log_odds_ratio": -0.7434127330780029, "logits/chosen": 0.7825887799263, "logits/rejected": 0.7181641459465027, "logps/chosen": -2.379014015197754, "logps/rejected": -2.4569931030273438, "loss": 1.0885, "nll_loss": 1.0141111612319946, "rewards/accuracies": 0.75, "rewards/chosen": -0.23790140450000763, "rewards/margins": 0.007797923870384693, "rewards/rejected": -0.24569930136203766, "step": 1529 }, { "epoch": 4.188911704312115, "grad_norm": 2.964488983154297, "learning_rate": 7.905479452054795e-07, "log_odds_chosen": 1.465057611465454, "log_odds_ratio": -0.3675556778907776, "logits/chosen": 0.80229651927948, "logits/rejected": 0.7331221699714661, "logps/chosen": -2.307466745376587, "logps/rejected": -3.683915138244629, "loss": 0.9227, "nll_loss": 0.8859511017799377, "rewards/accuracies": 0.875, "rewards/chosen": -0.23074667155742645, "rewards/margins": 0.13764485716819763, "rewards/rejected": -0.3683915138244629, "step": 1530 }, { "epoch": 4.191649555099247, "grad_norm": 3.8072001934051514, "learning_rate": 7.904109589041096e-07, "log_odds_chosen": -0.14682774245738983, "log_odds_ratio": -0.9857562780380249, "logits/chosen": 0.6227777600288391, "logits/rejected": 0.725510835647583, "logps/chosen": -2.9166669845581055, "logps/rejected": -2.756063938140869, "loss": 0.9972, "nll_loss": 0.8985762000083923, "rewards/accuracies": 0.625, "rewards/chosen": -0.291666716337204, "rewards/margins": -0.016060302034020424, "rewards/rejected": -0.2756063938140869, "step": 1531 }, { "epoch": 4.1943874058863795, "grad_norm": 3.8489928245544434, "learning_rate": 7.902739726027396e-07, "log_odds_chosen": 0.49959903955459595, "log_odds_ratio": -0.522186279296875, "logits/chosen": 0.5972130298614502, "logits/rejected": 0.573287844657898, "logps/chosen": -2.1525371074676514, "logps/rejected": -2.5910983085632324, "loss": 0.9388, "nll_loss": 0.8865934610366821, "rewards/accuracies": 0.75, "rewards/chosen": -0.21525369584560394, "rewards/margins": 0.04385613650083542, "rewards/rejected": -0.25910985469818115, "step": 1532 }, { "epoch": 4.1971252566735116, "grad_norm": 3.317007541656494, "learning_rate": 7.901369863013699e-07, "log_odds_chosen": 0.9768993258476257, "log_odds_ratio": -0.4895155429840088, "logits/chosen": 0.6376338005065918, "logits/rejected": 0.5226297378540039, "logps/chosen": -2.505474090576172, "logps/rejected": -3.45089054107666, "loss": 0.9784, "nll_loss": 0.9294077754020691, "rewards/accuracies": 0.875, "rewards/chosen": -0.2505474090576172, "rewards/margins": 0.09454167634248734, "rewards/rejected": -0.3450890779495239, "step": 1533 }, { "epoch": 4.199863107460644, "grad_norm": 5.404240608215332, "learning_rate": 7.9e-07, "log_odds_chosen": -0.18562033772468567, "log_odds_ratio": -0.8373990058898926, "logits/chosen": 0.580956220626831, "logits/rejected": 0.6317687630653381, "logps/chosen": -2.9811782836914062, "logps/rejected": -2.791017532348633, "loss": 0.9968, "nll_loss": 0.9130405187606812, "rewards/accuracies": 0.5, "rewards/chosen": -0.29811781644821167, "rewards/margins": -0.019016040489077568, "rewards/rejected": -0.27910178899765015, "step": 1534 }, { "epoch": 4.202600958247776, "grad_norm": 3.5491812229156494, "learning_rate": 7.8986301369863e-07, "log_odds_chosen": 0.9422707557678223, "log_odds_ratio": -0.40599119663238525, "logits/chosen": 0.6026661992073059, "logits/rejected": 0.6322275996208191, "logps/chosen": -2.126657009124756, "logps/rejected": -3.0036191940307617, "loss": 1.0022, "nll_loss": 0.9615997076034546, "rewards/accuracies": 0.875, "rewards/chosen": -0.21266570687294006, "rewards/margins": 0.08769620954990387, "rewards/rejected": -0.3003619313240051, "step": 1535 }, { "epoch": 4.205338809034908, "grad_norm": 6.268450736999512, "learning_rate": 7.897260273972603e-07, "log_odds_chosen": 1.0492515563964844, "log_odds_ratio": -0.6135640740394592, "logits/chosen": 0.7641240954399109, "logits/rejected": 0.8301600217819214, "logps/chosen": -2.7621688842773438, "logps/rejected": -3.7952120304107666, "loss": 0.9726, "nll_loss": 0.9112067818641663, "rewards/accuracies": 0.75, "rewards/chosen": -0.27621689438819885, "rewards/margins": 0.10330427438020706, "rewards/rejected": -0.3795211911201477, "step": 1536 }, { "epoch": 4.20807665982204, "grad_norm": 3.455479383468628, "learning_rate": 7.895890410958905e-07, "log_odds_chosen": 0.8360489010810852, "log_odds_ratio": -0.578838586807251, "logits/chosen": 0.6405847072601318, "logits/rejected": 0.6705969572067261, "logps/chosen": -3.1412224769592285, "logps/rejected": -3.9676594734191895, "loss": 0.9224, "nll_loss": 0.8645098805427551, "rewards/accuracies": 0.625, "rewards/chosen": -0.3141222596168518, "rewards/margins": 0.08264371752738953, "rewards/rejected": -0.39676594734191895, "step": 1537 }, { "epoch": 4.210814510609172, "grad_norm": 3.6503233909606934, "learning_rate": 7.894520547945205e-07, "log_odds_chosen": 1.3550312519073486, "log_odds_ratio": -0.3130640983581543, "logits/chosen": 0.6966237425804138, "logits/rejected": 0.6597357988357544, "logps/chosen": -2.076714515686035, "logps/rejected": -3.324099063873291, "loss": 0.8797, "nll_loss": 0.8484426736831665, "rewards/accuracies": 0.875, "rewards/chosen": -0.20767146348953247, "rewards/margins": 0.12473843991756439, "rewards/rejected": -0.33240991830825806, "step": 1538 }, { "epoch": 4.213552361396304, "grad_norm": 4.631302356719971, "learning_rate": 7.893150684931507e-07, "log_odds_chosen": 0.1634371429681778, "log_odds_ratio": -0.6773240566253662, "logits/chosen": 0.7598272562026978, "logits/rejected": 0.7194445133209229, "logps/chosen": -1.8906311988830566, "logps/rejected": -1.9891515970230103, "loss": 0.9574, "nll_loss": 0.889657199382782, "rewards/accuracies": 0.75, "rewards/chosen": -0.18906313180923462, "rewards/margins": 0.009852040559053421, "rewards/rejected": -0.19891515374183655, "step": 1539 }, { "epoch": 4.216290212183436, "grad_norm": 3.5916435718536377, "learning_rate": 7.891780821917809e-07, "log_odds_chosen": 1.5576107501983643, "log_odds_ratio": -0.41062629222869873, "logits/chosen": 0.6818726062774658, "logits/rejected": 0.6614676117897034, "logps/chosen": -2.536909341812134, "logps/rejected": -4.022372722625732, "loss": 0.9543, "nll_loss": 0.9132366180419922, "rewards/accuracies": 0.75, "rewards/chosen": -0.2536909580230713, "rewards/margins": 0.14854633808135986, "rewards/rejected": -0.40223729610443115, "step": 1540 }, { "epoch": 4.219028062970568, "grad_norm": 4.0435895919799805, "learning_rate": 7.890410958904109e-07, "log_odds_chosen": 0.8810003995895386, "log_odds_ratio": -0.40404629707336426, "logits/chosen": 0.7016382217407227, "logits/rejected": 0.6375601291656494, "logps/chosen": -2.0126805305480957, "logps/rejected": -2.8147666454315186, "loss": 0.9817, "nll_loss": 0.9412962198257446, "rewards/accuracies": 1.0, "rewards/chosen": -0.2012680470943451, "rewards/margins": 0.08020861446857452, "rewards/rejected": -0.2814766764640808, "step": 1541 }, { "epoch": 4.2217659137577, "grad_norm": 3.083214521408081, "learning_rate": 7.889041095890411e-07, "log_odds_chosen": 0.5959733724594116, "log_odds_ratio": -0.5229565501213074, "logits/chosen": 0.7683016061782837, "logits/rejected": 0.7664608955383301, "logps/chosen": -2.4688398838043213, "logps/rejected": -3.043274402618408, "loss": 0.9386, "nll_loss": 0.8863204121589661, "rewards/accuracies": 0.75, "rewards/chosen": -0.24688397347927094, "rewards/margins": 0.05744347721338272, "rewards/rejected": -0.30432745814323425, "step": 1542 }, { "epoch": 4.224503764544832, "grad_norm": 3.5937185287475586, "learning_rate": 7.887671232876712e-07, "log_odds_chosen": 0.8604163527488708, "log_odds_ratio": -0.4944753348827362, "logits/chosen": 0.7909685373306274, "logits/rejected": 0.8582906723022461, "logps/chosen": -2.1964197158813477, "logps/rejected": -2.9668471813201904, "loss": 0.8601, "nll_loss": 0.8106812238693237, "rewards/accuracies": 0.625, "rewards/chosen": -0.21964198350906372, "rewards/margins": 0.07704272866249084, "rewards/rejected": -0.29668471217155457, "step": 1543 }, { "epoch": 4.227241615331964, "grad_norm": 3.0759494304656982, "learning_rate": 7.886301369863014e-07, "log_odds_chosen": 1.1632230281829834, "log_odds_ratio": -0.47054168581962585, "logits/chosen": 0.6913059949874878, "logits/rejected": 0.7667514681816101, "logps/chosen": -2.2582015991210938, "logps/rejected": -3.305342674255371, "loss": 0.8425, "nll_loss": 0.7954713106155396, "rewards/accuracies": 0.875, "rewards/chosen": -0.2258201539516449, "rewards/margins": 0.10471410304307938, "rewards/rejected": -0.33053427934646606, "step": 1544 }, { "epoch": 4.229979466119096, "grad_norm": 3.8111844062805176, "learning_rate": 7.884931506849315e-07, "log_odds_chosen": 0.8937123417854309, "log_odds_ratio": -0.47793909907341003, "logits/chosen": 0.7797191143035889, "logits/rejected": 0.8615871071815491, "logps/chosen": -2.664989709854126, "logps/rejected": -3.4434456825256348, "loss": 0.867, "nll_loss": 0.8191567659378052, "rewards/accuracies": 0.625, "rewards/chosen": -0.26649898290634155, "rewards/margins": 0.07784560322761536, "rewards/rejected": -0.3443445861339569, "step": 1545 }, { "epoch": 4.232717316906228, "grad_norm": 4.811980247497559, "learning_rate": 7.883561643835616e-07, "log_odds_chosen": 0.054845452308654785, "log_odds_ratio": -0.7483885288238525, "logits/chosen": 0.5452327728271484, "logits/rejected": 0.5995104908943176, "logps/chosen": -2.5902509689331055, "logps/rejected": -2.600130558013916, "loss": 0.8935, "nll_loss": 0.8186834454536438, "rewards/accuracies": 0.75, "rewards/chosen": -0.25902509689331055, "rewards/margins": 0.0009879730641841888, "rewards/rejected": -0.26001307368278503, "step": 1546 }, { "epoch": 4.2354551676933605, "grad_norm": 6.081528663635254, "learning_rate": 7.882191780821918e-07, "log_odds_chosen": 0.28869321942329407, "log_odds_ratio": -0.7424399852752686, "logits/chosen": 0.6491830348968506, "logits/rejected": 0.6782680749893188, "logps/chosen": -3.1204519271850586, "logps/rejected": -3.3167312145233154, "loss": 1.0267, "nll_loss": 0.9524584412574768, "rewards/accuracies": 0.5, "rewards/chosen": -0.31204521656036377, "rewards/margins": 0.019627928733825684, "rewards/rejected": -0.33167314529418945, "step": 1547 }, { "epoch": 4.238193018480493, "grad_norm": 3.65816068649292, "learning_rate": 7.880821917808219e-07, "log_odds_chosen": 1.277848720550537, "log_odds_ratio": -0.384492427110672, "logits/chosen": 0.6674575209617615, "logits/rejected": 0.4342350959777832, "logps/chosen": -2.6730258464813232, "logps/rejected": -3.878740072250366, "loss": 0.9493, "nll_loss": 0.9108359217643738, "rewards/accuracies": 0.875, "rewards/chosen": -0.26730257272720337, "rewards/margins": 0.12057138979434967, "rewards/rejected": -0.3878740072250366, "step": 1548 }, { "epoch": 4.240930869267625, "grad_norm": 3.435469150543213, "learning_rate": 7.87945205479452e-07, "log_odds_chosen": 0.7418572902679443, "log_odds_ratio": -0.5340616106987, "logits/chosen": 0.6586719155311584, "logits/rejected": 0.552029550075531, "logps/chosen": -2.171372413635254, "logps/rejected": -2.887363910675049, "loss": 1.0327, "nll_loss": 0.9793227910995483, "rewards/accuracies": 0.625, "rewards/chosen": -0.21713723242282867, "rewards/margins": 0.07159917056560516, "rewards/rejected": -0.28873640298843384, "step": 1549 }, { "epoch": 4.243668720054757, "grad_norm": 4.543424129486084, "learning_rate": 7.878082191780822e-07, "log_odds_chosen": 1.2869396209716797, "log_odds_ratio": -0.3408781886100769, "logits/chosen": 0.6835975646972656, "logits/rejected": 0.7305874824523926, "logps/chosen": -2.6810853481292725, "logps/rejected": -3.898085594177246, "loss": 0.9094, "nll_loss": 0.87530916929245, "rewards/accuracies": 0.875, "rewards/chosen": -0.2681085467338562, "rewards/margins": 0.12170004099607468, "rewards/rejected": -0.3898085951805115, "step": 1550 }, { "epoch": 4.246406570841889, "grad_norm": 4.130685806274414, "learning_rate": 7.876712328767124e-07, "log_odds_chosen": 1.3638086318969727, "log_odds_ratio": -0.4410460293292999, "logits/chosen": 0.5874851942062378, "logits/rejected": 0.5945380926132202, "logps/chosen": -2.018345832824707, "logps/rejected": -3.291588544845581, "loss": 0.9028, "nll_loss": 0.8586816191673279, "rewards/accuracies": 0.875, "rewards/chosen": -0.20183458924293518, "rewards/margins": 0.12732428312301636, "rewards/rejected": -0.32915887236595154, "step": 1551 }, { "epoch": 4.249144421629021, "grad_norm": 3.6067984104156494, "learning_rate": 7.875342465753424e-07, "log_odds_chosen": 0.7629947662353516, "log_odds_ratio": -0.4775852560997009, "logits/chosen": 0.781082034111023, "logits/rejected": 0.7896685004234314, "logps/chosen": -2.2814183235168457, "logps/rejected": -2.9813575744628906, "loss": 0.8718, "nll_loss": 0.8239930272102356, "rewards/accuracies": 1.0, "rewards/chosen": -0.22814181447029114, "rewards/margins": 0.06999392807483673, "rewards/rejected": -0.29813575744628906, "step": 1552 }, { "epoch": 4.251882272416153, "grad_norm": 3.610133171081543, "learning_rate": 7.873972602739726e-07, "log_odds_chosen": 0.5500118732452393, "log_odds_ratio": -0.5477051138877869, "logits/chosen": 0.7197920680046082, "logits/rejected": 0.7178085446357727, "logps/chosen": -2.58963942527771, "logps/rejected": -3.0992870330810547, "loss": 0.9263, "nll_loss": 0.8715463280677795, "rewards/accuracies": 0.75, "rewards/chosen": -0.258963942527771, "rewards/margins": 0.05096477270126343, "rewards/rejected": -0.3099287152290344, "step": 1553 }, { "epoch": 4.254620123203285, "grad_norm": 3.6674017906188965, "learning_rate": 7.872602739726028e-07, "log_odds_chosen": 0.481281578540802, "log_odds_ratio": -0.6518063545227051, "logits/chosen": 0.7254831790924072, "logits/rejected": 0.7395837306976318, "logps/chosen": -2.5894219875335693, "logps/rejected": -3.053389549255371, "loss": 0.9384, "nll_loss": 0.8732517957687378, "rewards/accuracies": 0.75, "rewards/chosen": -0.258942186832428, "rewards/margins": 0.04639677330851555, "rewards/rejected": -0.305338978767395, "step": 1554 }, { "epoch": 4.257357973990418, "grad_norm": 4.8893046379089355, "learning_rate": 7.871232876712328e-07, "log_odds_chosen": 0.26937806606292725, "log_odds_ratio": -1.111130952835083, "logits/chosen": 0.643538773059845, "logits/rejected": 0.7390888929367065, "logps/chosen": -2.984654188156128, "logps/rejected": -3.180103063583374, "loss": 0.9768, "nll_loss": 0.8657195568084717, "rewards/accuracies": 0.625, "rewards/chosen": -0.29846543073654175, "rewards/margins": 0.019544873386621475, "rewards/rejected": -0.3180103302001953, "step": 1555 }, { "epoch": 4.26009582477755, "grad_norm": 4.149663925170898, "learning_rate": 7.86986301369863e-07, "log_odds_chosen": 0.7606792449951172, "log_odds_ratio": -0.452725350856781, "logits/chosen": 0.7383323311805725, "logits/rejected": 0.6332508325576782, "logps/chosen": -2.4022836685180664, "logps/rejected": -3.1157259941101074, "loss": 1.0118, "nll_loss": 0.9665595293045044, "rewards/accuracies": 0.875, "rewards/chosen": -0.24022838473320007, "rewards/margins": 0.07134422659873962, "rewards/rejected": -0.3115726113319397, "step": 1556 }, { "epoch": 4.262833675564682, "grad_norm": 4.079877853393555, "learning_rate": 7.868493150684932e-07, "log_odds_chosen": 0.23827052116394043, "log_odds_ratio": -0.6099948883056641, "logits/chosen": 0.5523581504821777, "logits/rejected": 0.47847980260849, "logps/chosen": -2.429168462753296, "logps/rejected": -2.605490207672119, "loss": 0.9585, "nll_loss": 0.8975040912628174, "rewards/accuracies": 0.75, "rewards/chosen": -0.24291685223579407, "rewards/margins": 0.017632178962230682, "rewards/rejected": -0.26054900884628296, "step": 1557 }, { "epoch": 4.265571526351814, "grad_norm": 4.64361047744751, "learning_rate": 7.867123287671233e-07, "log_odds_chosen": 1.1704254150390625, "log_odds_ratio": -0.40242424607276917, "logits/chosen": 0.6911131143569946, "logits/rejected": 0.6188755035400391, "logps/chosen": -1.7640432119369507, "logps/rejected": -2.842559576034546, "loss": 1.0067, "nll_loss": 0.9664381742477417, "rewards/accuracies": 0.875, "rewards/chosen": -0.17640432715415955, "rewards/margins": 0.10785162448883057, "rewards/rejected": -0.2842559516429901, "step": 1558 }, { "epoch": 4.268309377138946, "grad_norm": 3.504971981048584, "learning_rate": 7.865753424657534e-07, "log_odds_chosen": 2.0159482955932617, "log_odds_ratio": -0.18420137465000153, "logits/chosen": 0.8210135698318481, "logits/rejected": 0.7930713295936584, "logps/chosen": -2.664970874786377, "logps/rejected": -4.604594707489014, "loss": 0.9195, "nll_loss": 0.9011234045028687, "rewards/accuracies": 1.0, "rewards/chosen": -0.26649707555770874, "rewards/margins": 0.19396241009235382, "rewards/rejected": -0.46045947074890137, "step": 1559 }, { "epoch": 4.271047227926078, "grad_norm": 4.573904514312744, "learning_rate": 7.864383561643835e-07, "log_odds_chosen": 0.5034921169281006, "log_odds_ratio": -0.5284725427627563, "logits/chosen": 0.8146307468414307, "logits/rejected": 0.7443838715553284, "logps/chosen": -2.282226324081421, "logps/rejected": -2.7120494842529297, "loss": 0.9975, "nll_loss": 0.9446343183517456, "rewards/accuracies": 0.875, "rewards/chosen": -0.22822263836860657, "rewards/margins": 0.042982317507267, "rewards/rejected": -0.27120494842529297, "step": 1560 }, { "epoch": 4.27378507871321, "grad_norm": 5.804325580596924, "learning_rate": 7.863013698630137e-07, "log_odds_chosen": 1.0134968757629395, "log_odds_ratio": -0.4276745319366455, "logits/chosen": 0.7274497747421265, "logits/rejected": 0.6716181039810181, "logps/chosen": -2.0718562602996826, "logps/rejected": -3.0212972164154053, "loss": 0.923, "nll_loss": 0.880190372467041, "rewards/accuracies": 0.875, "rewards/chosen": -0.20718562602996826, "rewards/margins": 0.09494408965110779, "rewards/rejected": -0.30212971568107605, "step": 1561 }, { "epoch": 4.276522929500342, "grad_norm": 4.672845363616943, "learning_rate": 7.861643835616438e-07, "log_odds_chosen": 0.50885009765625, "log_odds_ratio": -0.5768609046936035, "logits/chosen": 0.5521126389503479, "logits/rejected": 0.5302629470825195, "logps/chosen": -2.432011365890503, "logps/rejected": -2.903256416320801, "loss": 1.0403, "nll_loss": 0.9826527833938599, "rewards/accuracies": 0.75, "rewards/chosen": -0.2432011216878891, "rewards/margins": 0.04712452366948128, "rewards/rejected": -0.2903256416320801, "step": 1562 }, { "epoch": 4.2792607802874745, "grad_norm": 3.9207637310028076, "learning_rate": 7.860273972602739e-07, "log_odds_chosen": 0.37866055965423584, "log_odds_ratio": -0.6675618886947632, "logits/chosen": 0.7059080004692078, "logits/rejected": 0.8419374823570251, "logps/chosen": -2.666327714920044, "logps/rejected": -2.9720542430877686, "loss": 0.8479, "nll_loss": 0.7811229228973389, "rewards/accuracies": 0.625, "rewards/chosen": -0.2666327655315399, "rewards/margins": 0.030572645366191864, "rewards/rejected": -0.2972054183483124, "step": 1563 }, { "epoch": 4.2819986310746065, "grad_norm": 4.324160099029541, "learning_rate": 7.858904109589041e-07, "log_odds_chosen": 0.7148131132125854, "log_odds_ratio": -0.4592351019382477, "logits/chosen": 0.8762757182121277, "logits/rejected": 0.8842211365699768, "logps/chosen": -2.318183183670044, "logps/rejected": -2.9534473419189453, "loss": 0.9141, "nll_loss": 0.8682060837745667, "rewards/accuracies": 0.75, "rewards/chosen": -0.2318183183670044, "rewards/margins": 0.06352640688419342, "rewards/rejected": -0.2953447699546814, "step": 1564 }, { "epoch": 4.284736481861739, "grad_norm": 4.519576549530029, "learning_rate": 7.857534246575343e-07, "log_odds_chosen": 0.8271350264549255, "log_odds_ratio": -0.44220173358917236, "logits/chosen": 0.8103972673416138, "logits/rejected": 0.7859259843826294, "logps/chosen": -2.412320137023926, "logps/rejected": -3.1918110847473145, "loss": 0.979, "nll_loss": 0.934760570526123, "rewards/accuracies": 0.875, "rewards/chosen": -0.2412319928407669, "rewards/margins": 0.07794912159442902, "rewards/rejected": -0.3191811442375183, "step": 1565 }, { "epoch": 4.287474332648871, "grad_norm": 4.643884658813477, "learning_rate": 7.856164383561643e-07, "log_odds_chosen": 0.7166553139686584, "log_odds_ratio": -0.45641469955444336, "logits/chosen": 0.6130117177963257, "logits/rejected": 0.6335631608963013, "logps/chosen": -2.3218741416931152, "logps/rejected": -2.9664978981018066, "loss": 1.0052, "nll_loss": 0.9595927000045776, "rewards/accuracies": 0.875, "rewards/chosen": -0.2321874350309372, "rewards/margins": 0.06446239352226257, "rewards/rejected": -0.2966498136520386, "step": 1566 }, { "epoch": 4.290212183436003, "grad_norm": 3.701565980911255, "learning_rate": 7.854794520547945e-07, "log_odds_chosen": 0.7827457189559937, "log_odds_ratio": -0.4749622344970703, "logits/chosen": 0.7801159620285034, "logits/rejected": 0.8492332100868225, "logps/chosen": -2.3526620864868164, "logps/rejected": -3.053844928741455, "loss": 0.8726, "nll_loss": 0.8250890970230103, "rewards/accuracies": 0.75, "rewards/chosen": -0.23526617884635925, "rewards/margins": 0.07011827081441879, "rewards/rejected": -0.30538445711135864, "step": 1567 }, { "epoch": 4.292950034223135, "grad_norm": 3.480072498321533, "learning_rate": 7.853424657534247e-07, "log_odds_chosen": 0.8806757926940918, "log_odds_ratio": -0.4644646644592285, "logits/chosen": 0.7879258990287781, "logits/rejected": 0.7824064493179321, "logps/chosen": -2.1018383502960205, "logps/rejected": -2.8839545249938965, "loss": 0.9631, "nll_loss": 0.9166072607040405, "rewards/accuracies": 0.75, "rewards/chosen": -0.21018384397029877, "rewards/margins": 0.07821161299943924, "rewards/rejected": -0.2883954644203186, "step": 1568 }, { "epoch": 4.295687885010267, "grad_norm": 5.495726585388184, "learning_rate": 7.852054794520547e-07, "log_odds_chosen": 0.8787267804145813, "log_odds_ratio": -0.42705100774765015, "logits/chosen": 0.7564647197723389, "logits/rejected": 0.7797651290893555, "logps/chosen": -1.9061914682388306, "logps/rejected": -2.6887450218200684, "loss": 0.9173, "nll_loss": 0.8746154308319092, "rewards/accuracies": 0.875, "rewards/chosen": -0.19061914086341858, "rewards/margins": 0.07825534045696259, "rewards/rejected": -0.26887446641921997, "step": 1569 }, { "epoch": 4.298425735797399, "grad_norm": 3.0363705158233643, "learning_rate": 7.850684931506849e-07, "log_odds_chosen": 0.3217749297618866, "log_odds_ratio": -0.699760913848877, "logits/chosen": 0.5235697031021118, "logits/rejected": 0.5539817214012146, "logps/chosen": -2.2002949714660645, "logps/rejected": -2.5392909049987793, "loss": 0.9926, "nll_loss": 0.9226652979850769, "rewards/accuracies": 0.5, "rewards/chosen": -0.22002950310707092, "rewards/margins": 0.03389959782361984, "rewards/rejected": -0.25392910838127136, "step": 1570 }, { "epoch": 4.301163586584531, "grad_norm": 3.315173387527466, "learning_rate": 7.849315068493151e-07, "log_odds_chosen": 0.6079428791999817, "log_odds_ratio": -0.5850723385810852, "logits/chosen": 0.6469629406929016, "logits/rejected": 0.6545752286911011, "logps/chosen": -2.8348708152770996, "logps/rejected": -3.385042190551758, "loss": 0.9573, "nll_loss": 0.898833692073822, "rewards/accuracies": 0.75, "rewards/chosen": -0.28348711133003235, "rewards/margins": 0.05501712113618851, "rewards/rejected": -0.33850422501564026, "step": 1571 }, { "epoch": 4.303901437371663, "grad_norm": 5.609804153442383, "learning_rate": 7.847945205479451e-07, "log_odds_chosen": 0.04149147868156433, "log_odds_ratio": -0.9264802932739258, "logits/chosen": 0.59123694896698, "logits/rejected": 0.6239026188850403, "logps/chosen": -3.088425397872925, "logps/rejected": -3.114445686340332, "loss": 1.0414, "nll_loss": 0.9487376809120178, "rewards/accuracies": 0.75, "rewards/chosen": -0.3088425397872925, "rewards/margins": 0.0026020295917987823, "rewards/rejected": -0.31144458055496216, "step": 1572 }, { "epoch": 4.306639288158795, "grad_norm": 3.4427645206451416, "learning_rate": 7.846575342465753e-07, "log_odds_chosen": 0.8099486827850342, "log_odds_ratio": -0.4306694269180298, "logits/chosen": 0.805092990398407, "logits/rejected": 0.7753257155418396, "logps/chosen": -1.654046893119812, "logps/rejected": -2.3432140350341797, "loss": 0.9622, "nll_loss": 0.9190880060195923, "rewards/accuracies": 0.875, "rewards/chosen": -0.16540469229221344, "rewards/margins": 0.06891672313213348, "rewards/rejected": -0.2343214452266693, "step": 1573 }, { "epoch": 4.309377138945927, "grad_norm": 3.208543539047241, "learning_rate": 7.845205479452054e-07, "log_odds_chosen": 0.9775794744491577, "log_odds_ratio": -0.4357045888900757, "logits/chosen": 0.6466971635818481, "logits/rejected": 0.5641926527023315, "logps/chosen": -2.487877607345581, "logps/rejected": -3.407254219055176, "loss": 0.919, "nll_loss": 0.8754006624221802, "rewards/accuracies": 0.75, "rewards/chosen": -0.2487877607345581, "rewards/margins": 0.09193766117095947, "rewards/rejected": -0.3407254219055176, "step": 1574 }, { "epoch": 4.312114989733059, "grad_norm": 3.90440034866333, "learning_rate": 7.843835616438356e-07, "log_odds_chosen": 1.5995405912399292, "log_odds_ratio": -0.2649933397769928, "logits/chosen": 0.6661335229873657, "logits/rejected": 0.6922093033790588, "logps/chosen": -2.1980676651000977, "logps/rejected": -3.7010278701782227, "loss": 0.9676, "nll_loss": 0.9410763382911682, "rewards/accuracies": 1.0, "rewards/chosen": -0.2198067605495453, "rewards/margins": 0.15029603242874146, "rewards/rejected": -0.37010276317596436, "step": 1575 }, { "epoch": 4.314852840520191, "grad_norm": 3.6251602172851562, "learning_rate": 7.842465753424657e-07, "log_odds_chosen": 1.5484422445297241, "log_odds_ratio": -0.2775309383869171, "logits/chosen": 0.756589949131012, "logits/rejected": 0.7791057825088501, "logps/chosen": -1.9243180751800537, "logps/rejected": -3.3588011264801025, "loss": 0.7889, "nll_loss": 0.761189341545105, "rewards/accuracies": 1.0, "rewards/chosen": -0.19243179261684418, "rewards/margins": 0.14344832301139832, "rewards/rejected": -0.3358801305294037, "step": 1576 }, { "epoch": 4.317590691307323, "grad_norm": 3.456855297088623, "learning_rate": 7.841095890410958e-07, "log_odds_chosen": 1.021153450012207, "log_odds_ratio": -0.49293264746665955, "logits/chosen": 0.5756655931472778, "logits/rejected": 0.5726636648178101, "logps/chosen": -1.8257007598876953, "logps/rejected": -2.7503015995025635, "loss": 0.9592, "nll_loss": 0.9098832607269287, "rewards/accuracies": 0.875, "rewards/chosen": -0.18257008492946625, "rewards/margins": 0.09246004372835159, "rewards/rejected": -0.27503013610839844, "step": 1577 }, { "epoch": 4.3203285420944555, "grad_norm": 3.991661310195923, "learning_rate": 7.83972602739726e-07, "log_odds_chosen": 0.6951818466186523, "log_odds_ratio": -0.4936068654060364, "logits/chosen": 0.7793101668357849, "logits/rejected": 0.7550063133239746, "logps/chosen": -2.780679225921631, "logps/rejected": -3.4082727432250977, "loss": 0.892, "nll_loss": 0.8426587581634521, "rewards/accuracies": 0.75, "rewards/chosen": -0.2780679166316986, "rewards/margins": 0.06275935471057892, "rewards/rejected": -0.34082722663879395, "step": 1578 }, { "epoch": 4.323066392881588, "grad_norm": 4.582961082458496, "learning_rate": 7.838356164383562e-07, "log_odds_chosen": 0.5572117567062378, "log_odds_ratio": -0.7136965990066528, "logits/chosen": 0.6931900978088379, "logits/rejected": 0.7562943696975708, "logps/chosen": -2.4389138221740723, "logps/rejected": -2.969449520111084, "loss": 0.9872, "nll_loss": 0.9158256649971008, "rewards/accuracies": 0.5, "rewards/chosen": -0.2438913881778717, "rewards/margins": 0.05305355042219162, "rewards/rejected": -0.2969449460506439, "step": 1579 }, { "epoch": 4.3258042436687205, "grad_norm": 4.287536144256592, "learning_rate": 7.836986301369862e-07, "log_odds_chosen": -0.1347418576478958, "log_odds_ratio": -0.8684090971946716, "logits/chosen": 0.7568503022193909, "logits/rejected": 0.8275409936904907, "logps/chosen": -3.118044137954712, "logps/rejected": -2.9960756301879883, "loss": 0.9175, "nll_loss": 0.8306922912597656, "rewards/accuracies": 0.375, "rewards/chosen": -0.3118044137954712, "rewards/margins": -0.012196846306324005, "rewards/rejected": -0.29960760474205017, "step": 1580 }, { "epoch": 4.328542094455852, "grad_norm": 3.8753061294555664, "learning_rate": 7.835616438356164e-07, "log_odds_chosen": 0.8876977562904358, "log_odds_ratio": -0.5518156886100769, "logits/chosen": 0.6950224041938782, "logits/rejected": 0.6383562088012695, "logps/chosen": -2.615493059158325, "logps/rejected": -3.435730457305908, "loss": 0.9356, "nll_loss": 0.8804100751876831, "rewards/accuracies": 0.875, "rewards/chosen": -0.26154932379722595, "rewards/margins": 0.08202371746301651, "rewards/rejected": -0.34357306361198425, "step": 1581 }, { "epoch": 4.331279945242985, "grad_norm": 3.1053361892700195, "learning_rate": 7.834246575342466e-07, "log_odds_chosen": 1.172013759613037, "log_odds_ratio": -0.479514479637146, "logits/chosen": 0.8861754536628723, "logits/rejected": 0.9080917835235596, "logps/chosen": -2.3860130310058594, "logps/rejected": -3.545592784881592, "loss": 0.8473, "nll_loss": 0.7993319630622864, "rewards/accuracies": 0.5, "rewards/chosen": -0.23860129714012146, "rewards/margins": 0.11595799773931503, "rewards/rejected": -0.3545593023300171, "step": 1582 }, { "epoch": 4.334017796030117, "grad_norm": 3.9661448001861572, "learning_rate": 7.832876712328766e-07, "log_odds_chosen": 0.5617843866348267, "log_odds_ratio": -0.5965914726257324, "logits/chosen": 0.8875877857208252, "logits/rejected": 0.901943564414978, "logps/chosen": -2.5317957401275635, "logps/rejected": -3.0765786170959473, "loss": 0.9273, "nll_loss": 0.867647647857666, "rewards/accuracies": 0.625, "rewards/chosen": -0.25317955017089844, "rewards/margins": 0.054478272795677185, "rewards/rejected": -0.3076578378677368, "step": 1583 }, { "epoch": 4.336755646817249, "grad_norm": 4.555983066558838, "learning_rate": 7.831506849315068e-07, "log_odds_chosen": 1.263006567955017, "log_odds_ratio": -0.32692950963974, "logits/chosen": 0.8496972918510437, "logits/rejected": 0.8692829012870789, "logps/chosen": -2.5765814781188965, "logps/rejected": -3.777249336242676, "loss": 0.7864, "nll_loss": 0.7536936402320862, "rewards/accuracies": 0.875, "rewards/chosen": -0.25765812397003174, "rewards/margins": 0.120066799223423, "rewards/rejected": -0.37772494554519653, "step": 1584 }, { "epoch": 4.339493497604381, "grad_norm": 3.8972034454345703, "learning_rate": 7.83013698630137e-07, "log_odds_chosen": 0.4741981327533722, "log_odds_ratio": -0.6348955035209656, "logits/chosen": 0.7130677700042725, "logits/rejected": 0.7550867199897766, "logps/chosen": -2.719475030899048, "logps/rejected": -3.1978650093078613, "loss": 0.9302, "nll_loss": 0.8667413592338562, "rewards/accuracies": 0.625, "rewards/chosen": -0.2719475030899048, "rewards/margins": 0.04783899337053299, "rewards/rejected": -0.3197864890098572, "step": 1585 }, { "epoch": 4.342231348391513, "grad_norm": 3.37121319770813, "learning_rate": 7.82876712328767e-07, "log_odds_chosen": 0.8621220588684082, "log_odds_ratio": -0.5036801695823669, "logits/chosen": 0.6147445440292358, "logits/rejected": 0.5330621600151062, "logps/chosen": -2.3660993576049805, "logps/rejected": -3.2048182487487793, "loss": 0.9996, "nll_loss": 0.9492076635360718, "rewards/accuracies": 0.75, "rewards/chosen": -0.23660995066165924, "rewards/margins": 0.08387190103530884, "rewards/rejected": -0.3204818665981293, "step": 1586 }, { "epoch": 4.344969199178645, "grad_norm": 3.4254331588745117, "learning_rate": 7.827397260273972e-07, "log_odds_chosen": 1.1559944152832031, "log_odds_ratio": -0.4291498064994812, "logits/chosen": 0.7655431032180786, "logits/rejected": 0.7479026913642883, "logps/chosen": -1.646528720855713, "logps/rejected": -2.6912713050842285, "loss": 0.9766, "nll_loss": 0.9336655735969543, "rewards/accuracies": 0.875, "rewards/chosen": -0.16465285420417786, "rewards/margins": 0.1044742614030838, "rewards/rejected": -0.26912713050842285, "step": 1587 }, { "epoch": 4.347707049965777, "grad_norm": 3.3630666732788086, "learning_rate": 7.826027397260274e-07, "log_odds_chosen": 1.8913114070892334, "log_odds_ratio": -0.40434324741363525, "logits/chosen": 0.7134048342704773, "logits/rejected": 0.7102363109588623, "logps/chosen": -2.256695032119751, "logps/rejected": -4.070395469665527, "loss": 0.9578, "nll_loss": 0.9174038767814636, "rewards/accuracies": 0.75, "rewards/chosen": -0.2256695032119751, "rewards/margins": 0.1813700795173645, "rewards/rejected": -0.4070395827293396, "step": 1588 }, { "epoch": 4.350444900752909, "grad_norm": 3.1214439868927, "learning_rate": 7.824657534246575e-07, "log_odds_chosen": 1.6777602434158325, "log_odds_ratio": -0.3275459408760071, "logits/chosen": 0.9094247221946716, "logits/rejected": 0.8474329710006714, "logps/chosen": -2.214423656463623, "logps/rejected": -3.802196502685547, "loss": 0.9398, "nll_loss": 0.9070538282394409, "rewards/accuracies": 1.0, "rewards/chosen": -0.22144237160682678, "rewards/margins": 0.15877728164196014, "rewards/rejected": -0.3802196979522705, "step": 1589 }, { "epoch": 4.353182751540041, "grad_norm": 4.370790004730225, "learning_rate": 7.823287671232876e-07, "log_odds_chosen": 1.1484489440917969, "log_odds_ratio": -0.3070274591445923, "logits/chosen": 0.8006406426429749, "logits/rejected": 0.8361479043960571, "logps/chosen": -2.4784975051879883, "logps/rejected": -3.5583748817443848, "loss": 0.8675, "nll_loss": 0.836768627166748, "rewards/accuracies": 1.0, "rewards/chosen": -0.24784976243972778, "rewards/margins": 0.10798772424459457, "rewards/rejected": -0.35583749413490295, "step": 1590 }, { "epoch": 4.355920602327173, "grad_norm": 4.669953346252441, "learning_rate": 7.821917808219177e-07, "log_odds_chosen": 0.415711909532547, "log_odds_ratio": -0.6276904344558716, "logits/chosen": 0.5564207434654236, "logits/rejected": 0.5958680510520935, "logps/chosen": -2.48842453956604, "logps/rejected": -2.850963830947876, "loss": 0.9745, "nll_loss": 0.911689043045044, "rewards/accuracies": 0.75, "rewards/chosen": -0.24884247779846191, "rewards/margins": 0.036253921687603, "rewards/rejected": -0.2850964069366455, "step": 1591 }, { "epoch": 4.358658453114305, "grad_norm": 3.6542792320251465, "learning_rate": 7.820547945205479e-07, "log_odds_chosen": 1.2148312330245972, "log_odds_ratio": -0.28659501671791077, "logits/chosen": 0.9433349967002869, "logits/rejected": 0.9349196553230286, "logps/chosen": -2.684133291244507, "logps/rejected": -3.798964262008667, "loss": 0.8461, "nll_loss": 0.8174514770507812, "rewards/accuracies": 1.0, "rewards/chosen": -0.26841336488723755, "rewards/margins": 0.11148308217525482, "rewards/rejected": -0.3798964023590088, "step": 1592 }, { "epoch": 4.361396303901437, "grad_norm": 3.8424243927001953, "learning_rate": 7.81917808219178e-07, "log_odds_chosen": 0.726416289806366, "log_odds_ratio": -0.463650643825531, "logits/chosen": 0.6817972660064697, "logits/rejected": 0.705669641494751, "logps/chosen": -2.1754865646362305, "logps/rejected": -2.760350227355957, "loss": 0.9491, "nll_loss": 0.9026897549629211, "rewards/accuracies": 0.875, "rewards/chosen": -0.2175486832857132, "rewards/margins": 0.05848635733127594, "rewards/rejected": -0.27603501081466675, "step": 1593 }, { "epoch": 4.3641341546885695, "grad_norm": 3.0372097492218018, "learning_rate": 7.817808219178081e-07, "log_odds_chosen": 1.0283758640289307, "log_odds_ratio": -0.3478427827358246, "logits/chosen": 0.7871896624565125, "logits/rejected": 0.7861422300338745, "logps/chosen": -2.055056095123291, "logps/rejected": -2.9776597023010254, "loss": 0.9535, "nll_loss": 0.9186850190162659, "rewards/accuracies": 1.0, "rewards/chosen": -0.2055056393146515, "rewards/margins": 0.09226033836603165, "rewards/rejected": -0.29776597023010254, "step": 1594 }, { "epoch": 4.3668720054757015, "grad_norm": 5.088440895080566, "learning_rate": 7.816438356164383e-07, "log_odds_chosen": 0.9211657047271729, "log_odds_ratio": -0.4254305064678192, "logits/chosen": 0.7522115707397461, "logits/rejected": 0.797187864780426, "logps/chosen": -3.0613410472869873, "logps/rejected": -3.9483823776245117, "loss": 0.9207, "nll_loss": 0.8781404495239258, "rewards/accuracies": 0.75, "rewards/chosen": -0.30613410472869873, "rewards/margins": 0.08870411664247513, "rewards/rejected": -0.39483824372291565, "step": 1595 }, { "epoch": 4.369609856262834, "grad_norm": 3.7813503742218018, "learning_rate": 7.815068493150685e-07, "log_odds_chosen": 1.687391996383667, "log_odds_ratio": -0.2729947566986084, "logits/chosen": 0.7885995507240295, "logits/rejected": 0.6692842245101929, "logps/chosen": -2.4988582134246826, "logps/rejected": -4.103375434875488, "loss": 1.0762, "nll_loss": 1.0489338636398315, "rewards/accuracies": 1.0, "rewards/chosen": -0.24988581240177155, "rewards/margins": 0.1604517102241516, "rewards/rejected": -0.41033753752708435, "step": 1596 }, { "epoch": 4.372347707049966, "grad_norm": 3.0460457801818848, "learning_rate": 7.813698630136985e-07, "log_odds_chosen": 0.9045277833938599, "log_odds_ratio": -0.40656352043151855, "logits/chosen": 0.8862203359603882, "logits/rejected": 0.9016405940055847, "logps/chosen": -2.3323512077331543, "logps/rejected": -3.1705503463745117, "loss": 0.8149, "nll_loss": 0.7742235660552979, "rewards/accuracies": 0.875, "rewards/chosen": -0.23323512077331543, "rewards/margins": 0.0838199108839035, "rewards/rejected": -0.3170550465583801, "step": 1597 }, { "epoch": 4.375085557837098, "grad_norm": 2.8968968391418457, "learning_rate": 7.812328767123287e-07, "log_odds_chosen": 1.9811615943908691, "log_odds_ratio": -0.21930980682373047, "logits/chosen": 0.9503648281097412, "logits/rejected": 0.9677780270576477, "logps/chosen": -2.3222150802612305, "logps/rejected": -4.2036638259887695, "loss": 0.7678, "nll_loss": 0.7458787560462952, "rewards/accuracies": 1.0, "rewards/chosen": -0.23222151398658752, "rewards/margins": 0.18814492225646973, "rewards/rejected": -0.42036640644073486, "step": 1598 }, { "epoch": 4.37782340862423, "grad_norm": 3.148963212966919, "learning_rate": 7.810958904109589e-07, "log_odds_chosen": 0.7581884860992432, "log_odds_ratio": -0.5058289766311646, "logits/chosen": 0.7877739071846008, "logits/rejected": 0.9067152142524719, "logps/chosen": -2.9404006004333496, "logps/rejected": -3.707357168197632, "loss": 0.9111, "nll_loss": 0.8605664968490601, "rewards/accuracies": 0.875, "rewards/chosen": -0.2940400540828705, "rewards/margins": 0.07669564336538315, "rewards/rejected": -0.37073570489883423, "step": 1599 }, { "epoch": 4.380561259411362, "grad_norm": 4.42466926574707, "learning_rate": 7.809589041095889e-07, "log_odds_chosen": 0.8507678508758545, "log_odds_ratio": -0.7468404769897461, "logits/chosen": 0.9077187776565552, "logits/rejected": 0.8533214926719666, "logps/chosen": -2.6070408821105957, "logps/rejected": -3.3790042400360107, "loss": 1.0302, "nll_loss": 0.9555248022079468, "rewards/accuracies": 0.875, "rewards/chosen": -0.26070407032966614, "rewards/margins": 0.07719635218381882, "rewards/rejected": -0.33790042996406555, "step": 1600 }, { "epoch": 4.383299110198494, "grad_norm": 3.3767364025115967, "learning_rate": 7.808219178082191e-07, "log_odds_chosen": 0.5238050222396851, "log_odds_ratio": -0.538593590259552, "logits/chosen": 0.985458254814148, "logits/rejected": 0.9429982304573059, "logps/chosen": -2.0339603424072266, "logps/rejected": -2.51178240776062, "loss": 0.933, "nll_loss": 0.8790932893753052, "rewards/accuracies": 0.75, "rewards/chosen": -0.2033960521221161, "rewards/margins": 0.04778219386935234, "rewards/rejected": -0.2511782646179199, "step": 1601 }, { "epoch": 4.386036960985626, "grad_norm": 4.409695625305176, "learning_rate": 7.806849315068493e-07, "log_odds_chosen": 0.9376051425933838, "log_odds_ratio": -0.38237547874450684, "logits/chosen": 0.7011888027191162, "logits/rejected": 0.6754817962646484, "logps/chosen": -2.329169273376465, "logps/rejected": -3.1825273036956787, "loss": 0.8853, "nll_loss": 0.8470855951309204, "rewards/accuracies": 1.0, "rewards/chosen": -0.232916921377182, "rewards/margins": 0.08533579111099243, "rewards/rejected": -0.31825271248817444, "step": 1602 }, { "epoch": 4.388774811772758, "grad_norm": 4.326523780822754, "learning_rate": 7.805479452054794e-07, "log_odds_chosen": 1.0798053741455078, "log_odds_ratio": -0.3881245255470276, "logits/chosen": 0.8805677890777588, "logits/rejected": 0.950000524520874, "logps/chosen": -2.809915065765381, "logps/rejected": -3.8489346504211426, "loss": 0.8566, "nll_loss": 0.8177495002746582, "rewards/accuracies": 0.875, "rewards/chosen": -0.28099149465560913, "rewards/margins": 0.10390196740627289, "rewards/rejected": -0.3848934769630432, "step": 1603 }, { "epoch": 4.39151266255989, "grad_norm": 3.867772102355957, "learning_rate": 7.804109589041095e-07, "log_odds_chosen": 1.0092991590499878, "log_odds_ratio": -0.44439923763275146, "logits/chosen": 0.6948018074035645, "logits/rejected": 0.6880254745483398, "logps/chosen": -1.8717639446258545, "logps/rejected": -2.783806324005127, "loss": 0.9765, "nll_loss": 0.9320344924926758, "rewards/accuracies": 0.875, "rewards/chosen": -0.1871763914823532, "rewards/margins": 0.09120424836874008, "rewards/rejected": -0.2783806622028351, "step": 1604 }, { "epoch": 4.394250513347022, "grad_norm": 3.311251163482666, "learning_rate": 7.802739726027396e-07, "log_odds_chosen": 1.2071614265441895, "log_odds_ratio": -0.4075366258621216, "logits/chosen": 0.6538287401199341, "logits/rejected": 0.6680185198783875, "logps/chosen": -2.3086864948272705, "logps/rejected": -3.4308080673217773, "loss": 0.8719, "nll_loss": 0.8311395645141602, "rewards/accuracies": 0.75, "rewards/chosen": -0.2308686524629593, "rewards/margins": 0.1122121512889862, "rewards/rejected": -0.3430808186531067, "step": 1605 }, { "epoch": 4.396988364134154, "grad_norm": 4.180685520172119, "learning_rate": 7.801369863013698e-07, "log_odds_chosen": 0.3039425015449524, "log_odds_ratio": -0.7307676076889038, "logits/chosen": 0.7339370250701904, "logits/rejected": 0.771695613861084, "logps/chosen": -2.9321346282958984, "logps/rejected": -3.2455625534057617, "loss": 0.8979, "nll_loss": 0.8247905969619751, "rewards/accuracies": 0.5, "rewards/chosen": -0.29321348667144775, "rewards/margins": 0.0313427671790123, "rewards/rejected": -0.32455626130104065, "step": 1606 }, { "epoch": 4.399726214921287, "grad_norm": 2.840827465057373, "learning_rate": 7.799999999999999e-07, "log_odds_chosen": 1.270688772201538, "log_odds_ratio": -0.3477898836135864, "logits/chosen": 0.6006147861480713, "logits/rejected": 0.6140552759170532, "logps/chosen": -2.573720932006836, "logps/rejected": -3.76760196685791, "loss": 0.9836, "nll_loss": 0.9488388895988464, "rewards/accuracies": 0.75, "rewards/chosen": -0.25737208127975464, "rewards/margins": 0.11938809603452682, "rewards/rejected": -0.37676021456718445, "step": 1607 }, { "epoch": 4.402464065708419, "grad_norm": 4.512158393859863, "learning_rate": 7.7986301369863e-07, "log_odds_chosen": 0.7318058609962463, "log_odds_ratio": -0.5984637141227722, "logits/chosen": 0.7971152067184448, "logits/rejected": 0.8383475542068481, "logps/chosen": -2.8278024196624756, "logps/rejected": -3.528430700302124, "loss": 1.0121, "nll_loss": 0.9522097110748291, "rewards/accuracies": 0.75, "rewards/chosen": -0.282780259847641, "rewards/margins": 0.07006281614303589, "rewards/rejected": -0.3528430759906769, "step": 1608 }, { "epoch": 4.405201916495551, "grad_norm": 3.441344738006592, "learning_rate": 7.797260273972602e-07, "log_odds_chosen": 1.2383034229278564, "log_odds_ratio": -0.44914186000823975, "logits/chosen": 0.6199436187744141, "logits/rejected": 0.5164437294006348, "logps/chosen": -2.0601754188537598, "logps/rejected": -3.236067056655884, "loss": 1.0383, "nll_loss": 0.993408203125, "rewards/accuracies": 0.75, "rewards/chosen": -0.20601756870746613, "rewards/margins": 0.11758914589881897, "rewards/rejected": -0.3236067295074463, "step": 1609 }, { "epoch": 4.407939767282683, "grad_norm": 4.640893459320068, "learning_rate": 7.795890410958905e-07, "log_odds_chosen": 1.587892770767212, "log_odds_ratio": -0.2824093997478485, "logits/chosen": 0.8985760807991028, "logits/rejected": 0.9377247095108032, "logps/chosen": -2.195085048675537, "logps/rejected": -3.655076026916504, "loss": 0.8245, "nll_loss": 0.7962597608566284, "rewards/accuracies": 0.875, "rewards/chosen": -0.21950852870941162, "rewards/margins": 0.14599908888339996, "rewards/rejected": -0.3655076026916504, "step": 1610 }, { "epoch": 4.4106776180698155, "grad_norm": 4.503554821014404, "learning_rate": 7.794520547945204e-07, "log_odds_chosen": 0.6216427087783813, "log_odds_ratio": -0.6569284200668335, "logits/chosen": 0.8311880826950073, "logits/rejected": 0.8338381052017212, "logps/chosen": -2.483619213104248, "logps/rejected": -2.9549522399902344, "loss": 0.9247, "nll_loss": 0.8590146899223328, "rewards/accuracies": 0.875, "rewards/chosen": -0.24836193025112152, "rewards/margins": 0.047133300453424454, "rewards/rejected": -0.2954952120780945, "step": 1611 }, { "epoch": 4.413415468856948, "grad_norm": 6.2685346603393555, "learning_rate": 7.793150684931507e-07, "log_odds_chosen": 1.4267317056655884, "log_odds_ratio": -0.49121952056884766, "logits/chosen": 0.8327964544296265, "logits/rejected": 0.8072109222412109, "logps/chosen": -2.5656352043151855, "logps/rejected": -3.848754405975342, "loss": 0.92, "nll_loss": 0.8709133267402649, "rewards/accuracies": 0.75, "rewards/chosen": -0.25656354427337646, "rewards/margins": 0.1283119022846222, "rewards/rejected": -0.38487547636032104, "step": 1612 }, { "epoch": 4.41615331964408, "grad_norm": 5.711933135986328, "learning_rate": 7.791780821917809e-07, "log_odds_chosen": 1.3677289485931396, "log_odds_ratio": -0.3370918035507202, "logits/chosen": 0.7460190057754517, "logits/rejected": 0.750104546546936, "logps/chosen": -2.405379295349121, "logps/rejected": -3.681752920150757, "loss": 0.8396, "nll_loss": 0.8058463931083679, "rewards/accuracies": 0.875, "rewards/chosen": -0.24053792655467987, "rewards/margins": 0.1276373714208603, "rewards/rejected": -0.36817529797554016, "step": 1613 }, { "epoch": 4.418891170431212, "grad_norm": 4.092763423919678, "learning_rate": 7.790410958904108e-07, "log_odds_chosen": 0.7703603506088257, "log_odds_ratio": -0.6671574115753174, "logits/chosen": 0.863642692565918, "logits/rejected": 0.8381328582763672, "logps/chosen": -2.91988468170166, "logps/rejected": -3.639411449432373, "loss": 1.0791, "nll_loss": 1.0123810768127441, "rewards/accuracies": 0.75, "rewards/chosen": -0.2919884920120239, "rewards/margins": 0.071952685713768, "rewards/rejected": -0.36394116282463074, "step": 1614 }, { "epoch": 4.421629021218344, "grad_norm": 3.1365020275115967, "learning_rate": 7.789041095890411e-07, "log_odds_chosen": 0.9695006012916565, "log_odds_ratio": -0.475528746843338, "logits/chosen": 0.5721112489700317, "logits/rejected": 0.5595476627349854, "logps/chosen": -2.082397937774658, "logps/rejected": -2.9402129650115967, "loss": 0.988, "nll_loss": 0.9404128193855286, "rewards/accuracies": 0.75, "rewards/chosen": -0.2082398235797882, "rewards/margins": 0.08578146249055862, "rewards/rejected": -0.29402127861976624, "step": 1615 }, { "epoch": 4.424366872005476, "grad_norm": 3.2590415477752686, "learning_rate": 7.787671232876713e-07, "log_odds_chosen": 0.7361079454421997, "log_odds_ratio": -0.4421690106391907, "logits/chosen": 0.7364275455474854, "logits/rejected": 0.7772361040115356, "logps/chosen": -1.9540162086486816, "logps/rejected": -2.6188793182373047, "loss": 0.9, "nll_loss": 0.8557637333869934, "rewards/accuracies": 0.875, "rewards/chosen": -0.1954016238451004, "rewards/margins": 0.06648631393909454, "rewards/rejected": -0.26188793778419495, "step": 1616 }, { "epoch": 4.427104722792608, "grad_norm": 5.975121021270752, "learning_rate": 7.786301369863014e-07, "log_odds_chosen": 0.07679417729377747, "log_odds_ratio": -1.1215717792510986, "logits/chosen": 0.5605231523513794, "logits/rejected": 0.5828892588615417, "logps/chosen": -3.2323567867279053, "logps/rejected": -3.2542483806610107, "loss": 1.1097, "nll_loss": 0.9975690841674805, "rewards/accuracies": 0.875, "rewards/chosen": -0.3232356905937195, "rewards/margins": 0.0021891221404075623, "rewards/rejected": -0.32542482018470764, "step": 1617 }, { "epoch": 4.42984257357974, "grad_norm": 4.478911399841309, "learning_rate": 7.784931506849315e-07, "log_odds_chosen": 1.0031039714813232, "log_odds_ratio": -0.41339144110679626, "logits/chosen": 0.8272202014923096, "logits/rejected": 0.8884847164154053, "logps/chosen": -2.6873741149902344, "logps/rejected": -3.6597819328308105, "loss": 0.8534, "nll_loss": 0.8120695352554321, "rewards/accuracies": 0.75, "rewards/chosen": -0.26873743534088135, "rewards/margins": 0.09724078327417374, "rewards/rejected": -0.3659782111644745, "step": 1618 }, { "epoch": 4.432580424366872, "grad_norm": 3.886428117752075, "learning_rate": 7.783561643835616e-07, "log_odds_chosen": 1.3169212341308594, "log_odds_ratio": -0.5761473178863525, "logits/chosen": 0.7813037037849426, "logits/rejected": 0.8208408355712891, "logps/chosen": -2.7442941665649414, "logps/rejected": -4.021036148071289, "loss": 0.8957, "nll_loss": 0.8380451202392578, "rewards/accuracies": 0.75, "rewards/chosen": -0.27442944049835205, "rewards/margins": 0.1276741474866867, "rewards/rejected": -0.40210357308387756, "step": 1619 }, { "epoch": 4.435318275154004, "grad_norm": 3.478968858718872, "learning_rate": 7.782191780821918e-07, "log_odds_chosen": 0.5799281597137451, "log_odds_ratio": -0.4945068061351776, "logits/chosen": 0.6756480932235718, "logits/rejected": 0.5736755728721619, "logps/chosen": -2.5765697956085205, "logps/rejected": -3.104287624359131, "loss": 0.9574, "nll_loss": 0.9079139232635498, "rewards/accuracies": 0.875, "rewards/chosen": -0.257656991481781, "rewards/margins": 0.05277179554104805, "rewards/rejected": -0.31042882800102234, "step": 1620 }, { "epoch": 4.438056125941136, "grad_norm": 4.551887035369873, "learning_rate": 7.780821917808219e-07, "log_odds_chosen": 0.44013991951942444, "log_odds_ratio": -0.6074150204658508, "logits/chosen": 0.6833037734031677, "logits/rejected": 0.6689595580101013, "logps/chosen": -3.011565685272217, "logps/rejected": -3.4258100986480713, "loss": 0.9744, "nll_loss": 0.9136443734169006, "rewards/accuracies": 0.5, "rewards/chosen": -0.30115658044815063, "rewards/margins": 0.041424427181482315, "rewards/rejected": -0.34258103370666504, "step": 1621 }, { "epoch": 4.440793976728268, "grad_norm": 3.2199387550354004, "learning_rate": 7.77945205479452e-07, "log_odds_chosen": 0.9778425097465515, "log_odds_ratio": -0.5039576888084412, "logits/chosen": 0.6814858913421631, "logits/rejected": 0.8043230175971985, "logps/chosen": -2.478381872177124, "logps/rejected": -3.3937599658966064, "loss": 0.954, "nll_loss": 0.9036375284194946, "rewards/accuracies": 0.625, "rewards/chosen": -0.24783821403980255, "rewards/margins": 0.09153779596090317, "rewards/rejected": -0.3393760323524475, "step": 1622 }, { "epoch": 4.4435318275154, "grad_norm": 3.915191411972046, "learning_rate": 7.778082191780822e-07, "log_odds_chosen": 0.49579161405563354, "log_odds_ratio": -0.5712748169898987, "logits/chosen": 0.746985912322998, "logits/rejected": 0.6297442317008972, "logps/chosen": -1.891969084739685, "logps/rejected": -2.325103759765625, "loss": 1.0141, "nll_loss": 0.9569544792175293, "rewards/accuracies": 0.5, "rewards/chosen": -0.18919691443443298, "rewards/margins": 0.043313466012477875, "rewards/rejected": -0.23251038789749146, "step": 1623 }, { "epoch": 4.446269678302532, "grad_norm": 4.524031162261963, "learning_rate": 7.776712328767124e-07, "log_odds_chosen": 0.8445243835449219, "log_odds_ratio": -0.49061205983161926, "logits/chosen": 0.7597872018814087, "logits/rejected": 0.7224072813987732, "logps/chosen": -2.1679534912109375, "logps/rejected": -2.8776745796203613, "loss": 0.8976, "nll_loss": 0.8485429286956787, "rewards/accuracies": 0.875, "rewards/chosen": -0.21679535508155823, "rewards/margins": 0.07097209990024567, "rewards/rejected": -0.2877674400806427, "step": 1624 }, { "epoch": 4.4490075290896645, "grad_norm": 4.780958652496338, "learning_rate": 7.775342465753424e-07, "log_odds_chosen": 0.8929477334022522, "log_odds_ratio": -0.4704977869987488, "logits/chosen": 0.6418429613113403, "logits/rejected": 0.5733301043510437, "logps/chosen": -2.5833494663238525, "logps/rejected": -3.4203805923461914, "loss": 0.9202, "nll_loss": 0.8731947541236877, "rewards/accuracies": 0.75, "rewards/chosen": -0.2583349347114563, "rewards/margins": 0.08370313048362732, "rewards/rejected": -0.3420380651950836, "step": 1625 }, { "epoch": 4.4517453798767965, "grad_norm": 3.840092420578003, "learning_rate": 7.773972602739726e-07, "log_odds_chosen": 0.5678438544273376, "log_odds_ratio": -0.5578598976135254, "logits/chosen": 0.8058162927627563, "logits/rejected": 0.8172036409378052, "logps/chosen": -2.416416645050049, "logps/rejected": -2.942134141921997, "loss": 0.905, "nll_loss": 0.8492152094841003, "rewards/accuracies": 0.75, "rewards/chosen": -0.24164164066314697, "rewards/margins": 0.05257175490260124, "rewards/rejected": -0.2942134141921997, "step": 1626 }, { "epoch": 4.454483230663929, "grad_norm": 4.750912189483643, "learning_rate": 7.772602739726028e-07, "log_odds_chosen": 0.6047136783599854, "log_odds_ratio": -0.6332147121429443, "logits/chosen": 0.903472363948822, "logits/rejected": 0.9080290794372559, "logps/chosen": -2.236402988433838, "logps/rejected": -2.7177202701568604, "loss": 1.019, "nll_loss": 0.9556524753570557, "rewards/accuracies": 0.625, "rewards/chosen": -0.2236403077840805, "rewards/margins": 0.04813171923160553, "rewards/rejected": -0.27177202701568604, "step": 1627 }, { "epoch": 4.457221081451061, "grad_norm": 3.1905713081359863, "learning_rate": 7.771232876712328e-07, "log_odds_chosen": 1.5807912349700928, "log_odds_ratio": -0.23028595745563507, "logits/chosen": 0.7744483947753906, "logits/rejected": 0.7786500453948975, "logps/chosen": -2.1265671253204346, "logps/rejected": -3.60443377494812, "loss": 0.8371, "nll_loss": 0.8141133785247803, "rewards/accuracies": 1.0, "rewards/chosen": -0.21265673637390137, "rewards/margins": 0.14778664708137512, "rewards/rejected": -0.3604433536529541, "step": 1628 }, { "epoch": 4.459958932238193, "grad_norm": 4.9535040855407715, "learning_rate": 7.76986301369863e-07, "log_odds_chosen": 1.4107521772384644, "log_odds_ratio": -0.410281240940094, "logits/chosen": 0.7521370649337769, "logits/rejected": 0.6808820366859436, "logps/chosen": -2.44997239112854, "logps/rejected": -3.8431384563446045, "loss": 0.9567, "nll_loss": 0.9156680107116699, "rewards/accuracies": 0.625, "rewards/chosen": -0.24499723315238953, "rewards/margins": 0.1393166035413742, "rewards/rejected": -0.38431382179260254, "step": 1629 }, { "epoch": 4.462696783025325, "grad_norm": 4.102067947387695, "learning_rate": 7.768493150684932e-07, "log_odds_chosen": 0.6875134706497192, "log_odds_ratio": -0.6075909733772278, "logits/chosen": 0.7033995389938354, "logits/rejected": 0.6646476984024048, "logps/chosen": -2.3523218631744385, "logps/rejected": -2.981823444366455, "loss": 1.1131, "nll_loss": 1.0522997379302979, "rewards/accuracies": 0.75, "rewards/chosen": -0.23523220419883728, "rewards/margins": 0.06295014917850494, "rewards/rejected": -0.2981823682785034, "step": 1630 }, { "epoch": 4.465434633812457, "grad_norm": 4.173486232757568, "learning_rate": 7.767123287671233e-07, "log_odds_chosen": 0.4594375789165497, "log_odds_ratio": -0.5866440534591675, "logits/chosen": 0.755161702632904, "logits/rejected": 0.7939471006393433, "logps/chosen": -2.644404172897339, "logps/rejected": -3.048692464828491, "loss": 0.9388, "nll_loss": 0.8800934553146362, "rewards/accuracies": 0.5, "rewards/chosen": -0.2644404172897339, "rewards/margins": 0.04042883217334747, "rewards/rejected": -0.30486923456192017, "step": 1631 }, { "epoch": 4.468172484599589, "grad_norm": 4.033939361572266, "learning_rate": 7.765753424657534e-07, "log_odds_chosen": 0.8197754621505737, "log_odds_ratio": -0.4878811240196228, "logits/chosen": 0.9209249019622803, "logits/rejected": 0.8952819108963013, "logps/chosen": -2.5772643089294434, "logps/rejected": -3.332308292388916, "loss": 0.9422, "nll_loss": 0.8934575319290161, "rewards/accuracies": 0.75, "rewards/chosen": -0.25772643089294434, "rewards/margins": 0.07550442218780518, "rewards/rejected": -0.3332308530807495, "step": 1632 }, { "epoch": 4.470910335386721, "grad_norm": 4.139954090118408, "learning_rate": 7.764383561643836e-07, "log_odds_chosen": 0.7266502380371094, "log_odds_ratio": -0.5002234578132629, "logits/chosen": 0.7397711277008057, "logits/rejected": 0.7287105917930603, "logps/chosen": -2.8098840713500977, "logps/rejected": -3.475759744644165, "loss": 0.9813, "nll_loss": 0.9312691688537598, "rewards/accuracies": 0.75, "rewards/chosen": -0.2809883952140808, "rewards/margins": 0.06658758968114853, "rewards/rejected": -0.3475760221481323, "step": 1633 }, { "epoch": 4.473648186173854, "grad_norm": 3.453763246536255, "learning_rate": 7.763013698630137e-07, "log_odds_chosen": 1.3309416770935059, "log_odds_ratio": -0.5611050724983215, "logits/chosen": 0.8000545501708984, "logits/rejected": 0.8600144386291504, "logps/chosen": -2.3780875205993652, "logps/rejected": -3.6757731437683105, "loss": 0.817, "nll_loss": 0.7608636617660522, "rewards/accuracies": 0.5, "rewards/chosen": -0.23780876398086548, "rewards/margins": 0.12976858019828796, "rewards/rejected": -0.36757731437683105, "step": 1634 }, { "epoch": 4.476386036960986, "grad_norm": 2.9627625942230225, "learning_rate": 7.761643835616438e-07, "log_odds_chosen": 1.2359323501586914, "log_odds_ratio": -0.3844611346721649, "logits/chosen": 0.445965975522995, "logits/rejected": 0.5169926285743713, "logps/chosen": -2.144001007080078, "logps/rejected": -3.2982096672058105, "loss": 0.9043, "nll_loss": 0.865899920463562, "rewards/accuracies": 0.875, "rewards/chosen": -0.21440014243125916, "rewards/margins": 0.11542084813117981, "rewards/rejected": -0.3298209607601166, "step": 1635 }, { "epoch": 4.479123887748118, "grad_norm": 3.2391929626464844, "learning_rate": 7.760273972602739e-07, "log_odds_chosen": 1.181025505065918, "log_odds_ratio": -0.34431377053260803, "logits/chosen": 0.7138997316360474, "logits/rejected": 0.701167106628418, "logps/chosen": -2.220407009124756, "logps/rejected": -3.28682017326355, "loss": 0.8957, "nll_loss": 0.8613103628158569, "rewards/accuracies": 0.875, "rewards/chosen": -0.22204069793224335, "rewards/margins": 0.10664132982492447, "rewards/rejected": -0.328682005405426, "step": 1636 }, { "epoch": 4.48186173853525, "grad_norm": 4.369832992553711, "learning_rate": 7.758904109589041e-07, "log_odds_chosen": 0.8570309281349182, "log_odds_ratio": -0.4665386974811554, "logits/chosen": 0.6056416034698486, "logits/rejected": 0.5125225782394409, "logps/chosen": -1.9135441780090332, "logps/rejected": -2.690260648727417, "loss": 1.0264, "nll_loss": 0.979770302772522, "rewards/accuracies": 0.875, "rewards/chosen": -0.1913544237613678, "rewards/margins": 0.07767165452241898, "rewards/rejected": -0.26902610063552856, "step": 1637 }, { "epoch": 4.484599589322382, "grad_norm": 4.5204362869262695, "learning_rate": 7.757534246575343e-07, "log_odds_chosen": 0.4326495826244354, "log_odds_ratio": -0.6641900539398193, "logits/chosen": 0.8759639263153076, "logits/rejected": 0.9662647843360901, "logps/chosen": -3.121058225631714, "logps/rejected": -3.5117573738098145, "loss": 0.9756, "nll_loss": 0.9091882705688477, "rewards/accuracies": 0.75, "rewards/chosen": -0.31210583448410034, "rewards/margins": 0.039069920778274536, "rewards/rejected": -0.3511757552623749, "step": 1638 }, { "epoch": 4.487337440109514, "grad_norm": 3.0774481296539307, "learning_rate": 7.756164383561643e-07, "log_odds_chosen": 1.2659144401550293, "log_odds_ratio": -0.43256163597106934, "logits/chosen": 0.688341498374939, "logits/rejected": 0.689907431602478, "logps/chosen": -2.3713345527648926, "logps/rejected": -3.586566925048828, "loss": 0.9218, "nll_loss": 0.8785097002983093, "rewards/accuracies": 0.75, "rewards/chosen": -0.2371334433555603, "rewards/margins": 0.12152329087257385, "rewards/rejected": -0.35865673422813416, "step": 1639 }, { "epoch": 4.490075290896646, "grad_norm": 3.408118963241577, "learning_rate": 7.754794520547945e-07, "log_odds_chosen": 0.2928088903427124, "log_odds_ratio": -0.7938425540924072, "logits/chosen": 0.75068199634552, "logits/rejected": 0.784442663192749, "logps/chosen": -2.305326461791992, "logps/rejected": -2.5648977756500244, "loss": 1.0225, "nll_loss": 0.9430798888206482, "rewards/accuracies": 0.625, "rewards/chosen": -0.23053264617919922, "rewards/margins": 0.025957122445106506, "rewards/rejected": -0.25648975372314453, "step": 1640 }, { "epoch": 4.492813141683778, "grad_norm": 3.0385007858276367, "learning_rate": 7.753424657534247e-07, "log_odds_chosen": 0.9139847159385681, "log_odds_ratio": -0.4206928312778473, "logits/chosen": 0.5412771701812744, "logits/rejected": 0.48164427280426025, "logps/chosen": -1.9749112129211426, "logps/rejected": -2.811516284942627, "loss": 0.8964, "nll_loss": 0.8543627262115479, "rewards/accuracies": 1.0, "rewards/chosen": -0.1974911391735077, "rewards/margins": 0.08366049826145172, "rewards/rejected": -0.2811516225337982, "step": 1641 }, { "epoch": 4.4955509924709105, "grad_norm": 4.837944030761719, "learning_rate": 7.752054794520547e-07, "log_odds_chosen": 0.20731133222579956, "log_odds_ratio": -0.6707765460014343, "logits/chosen": 0.8623675107955933, "logits/rejected": 0.9256997108459473, "logps/chosen": -2.4033570289611816, "logps/rejected": -2.57145357131958, "loss": 0.9297, "nll_loss": 0.8626383543014526, "rewards/accuracies": 0.5, "rewards/chosen": -0.24033570289611816, "rewards/margins": 0.016809670254588127, "rewards/rejected": -0.25714537501335144, "step": 1642 }, { "epoch": 4.498288843258043, "grad_norm": 4.2096757888793945, "learning_rate": 7.750684931506849e-07, "log_odds_chosen": 0.6828080415725708, "log_odds_ratio": -0.4463844299316406, "logits/chosen": 0.7218438982963562, "logits/rejected": 0.8943244814872742, "logps/chosen": -2.752448558807373, "logps/rejected": -3.384654998779297, "loss": 0.9426, "nll_loss": 0.8980051279067993, "rewards/accuracies": 0.875, "rewards/chosen": -0.2752448618412018, "rewards/margins": 0.06322062760591507, "rewards/rejected": -0.33846548199653625, "step": 1643 }, { "epoch": 4.501026694045175, "grad_norm": 3.9316322803497314, "learning_rate": 7.749315068493151e-07, "log_odds_chosen": 1.1116666793823242, "log_odds_ratio": -0.4053725302219391, "logits/chosen": 0.7914208769798279, "logits/rejected": 0.8292163610458374, "logps/chosen": -2.7562992572784424, "logps/rejected": -3.8110530376434326, "loss": 0.8973, "nll_loss": 0.8567832708358765, "rewards/accuracies": 0.75, "rewards/chosen": -0.2756299376487732, "rewards/margins": 0.10547539591789246, "rewards/rejected": -0.38110530376434326, "step": 1644 }, { "epoch": 4.503764544832307, "grad_norm": 2.8596832752227783, "learning_rate": 7.747945205479452e-07, "log_odds_chosen": 1.3629918098449707, "log_odds_ratio": -0.3071531653404236, "logits/chosen": 0.7913491725921631, "logits/rejected": 0.7985032796859741, "logps/chosen": -2.103243827819824, "logps/rejected": -3.371245861053467, "loss": 0.9451, "nll_loss": 0.9143598675727844, "rewards/accuracies": 1.0, "rewards/chosen": -0.21032439172267914, "rewards/margins": 0.12680017948150635, "rewards/rejected": -0.3371245861053467, "step": 1645 }, { "epoch": 4.506502395619439, "grad_norm": 3.222573757171631, "learning_rate": 7.746575342465753e-07, "log_odds_chosen": 1.4564423561096191, "log_odds_ratio": -0.35510796308517456, "logits/chosen": 0.6205135583877563, "logits/rejected": 0.5720145106315613, "logps/chosen": -2.41556978225708, "logps/rejected": -3.806549549102783, "loss": 0.919, "nll_loss": 0.8834829330444336, "rewards/accuracies": 0.875, "rewards/chosen": -0.24155700206756592, "rewards/margins": 0.13909797370433807, "rewards/rejected": -0.3806549310684204, "step": 1646 }, { "epoch": 4.509240246406571, "grad_norm": 3.3899471759796143, "learning_rate": 7.745205479452055e-07, "log_odds_chosen": 0.8241578340530396, "log_odds_ratio": -0.4184107184410095, "logits/chosen": 0.5696998834609985, "logits/rejected": 0.49163901805877686, "logps/chosen": -2.157486915588379, "logps/rejected": -2.904590368270874, "loss": 1.0131, "nll_loss": 0.9712704420089722, "rewards/accuracies": 0.875, "rewards/chosen": -0.21574871242046356, "rewards/margins": 0.07471032440662384, "rewards/rejected": -0.2904590368270874, "step": 1647 }, { "epoch": 4.511978097193703, "grad_norm": 3.386586904525757, "learning_rate": 7.743835616438356e-07, "log_odds_chosen": 0.5524235367774963, "log_odds_ratio": -0.5564102530479431, "logits/chosen": 0.5495302677154541, "logits/rejected": 0.5130254626274109, "logps/chosen": -2.258676528930664, "logps/rejected": -2.761789321899414, "loss": 1.0371, "nll_loss": 0.9814151525497437, "rewards/accuracies": 0.625, "rewards/chosen": -0.2258676439523697, "rewards/margins": 0.05031127482652664, "rewards/rejected": -0.2761789560317993, "step": 1648 }, { "epoch": 4.514715947980835, "grad_norm": 3.0005180835723877, "learning_rate": 7.742465753424657e-07, "log_odds_chosen": 1.523669958114624, "log_odds_ratio": -0.29418084025382996, "logits/chosen": 0.7633117437362671, "logits/rejected": 0.754919171333313, "logps/chosen": -2.4393205642700195, "logps/rejected": -3.8719489574432373, "loss": 0.8662, "nll_loss": 0.8367898464202881, "rewards/accuracies": 0.875, "rewards/chosen": -0.2439320683479309, "rewards/margins": 0.1432628333568573, "rewards/rejected": -0.3871949315071106, "step": 1649 }, { "epoch": 4.517453798767967, "grad_norm": 3.983795404434204, "learning_rate": 7.741095890410958e-07, "log_odds_chosen": 0.8431698083877563, "log_odds_ratio": -0.4660862684249878, "logits/chosen": 0.6455524563789368, "logits/rejected": 0.6576250791549683, "logps/chosen": -2.012667417526245, "logps/rejected": -2.758383274078369, "loss": 0.9216, "nll_loss": 0.8750366568565369, "rewards/accuracies": 0.75, "rewards/chosen": -0.20126675069332123, "rewards/margins": 0.07457158714532852, "rewards/rejected": -0.27583834528923035, "step": 1650 }, { "epoch": 4.520191649555099, "grad_norm": 3.9228248596191406, "learning_rate": 7.73972602739726e-07, "log_odds_chosen": 0.7235591411590576, "log_odds_ratio": -0.4988009035587311, "logits/chosen": 0.8937593698501587, "logits/rejected": 0.9508259296417236, "logps/chosen": -3.1462807655334473, "logps/rejected": -3.84706711769104, "loss": 0.851, "nll_loss": 0.8011046051979065, "rewards/accuracies": 0.75, "rewards/chosen": -0.31462806463241577, "rewards/margins": 0.07007865607738495, "rewards/rejected": -0.3847067356109619, "step": 1651 }, { "epoch": 4.522929500342231, "grad_norm": 3.480114698410034, "learning_rate": 7.738356164383562e-07, "log_odds_chosen": 0.8256916999816895, "log_odds_ratio": -0.4050199091434479, "logits/chosen": 0.5242728590965271, "logits/rejected": 0.4165072739124298, "logps/chosen": -2.1323742866516113, "logps/rejected": -2.8636112213134766, "loss": 0.9884, "nll_loss": 0.947937548160553, "rewards/accuracies": 1.0, "rewards/chosen": -0.2132374346256256, "rewards/margins": 0.07312372326850891, "rewards/rejected": -0.2863611578941345, "step": 1652 }, { "epoch": 4.525667351129363, "grad_norm": 3.9692742824554443, "learning_rate": 7.736986301369862e-07, "log_odds_chosen": 0.5406097769737244, "log_odds_ratio": -0.5332521200180054, "logits/chosen": 0.6456783413887024, "logits/rejected": 0.5516390204429626, "logps/chosen": -1.826785683631897, "logps/rejected": -2.3313093185424805, "loss": 0.9578, "nll_loss": 0.9044901132583618, "rewards/accuracies": 0.875, "rewards/chosen": -0.18267858028411865, "rewards/margins": 0.05045235902070999, "rewards/rejected": -0.23313093185424805, "step": 1653 }, { "epoch": 4.528405201916495, "grad_norm": 5.577231407165527, "learning_rate": 7.735616438356164e-07, "log_odds_chosen": 0.7912991642951965, "log_odds_ratio": -0.5323917269706726, "logits/chosen": 0.7494853138923645, "logits/rejected": 0.6967143416404724, "logps/chosen": -3.043705940246582, "logps/rejected": -3.7890706062316895, "loss": 0.8846, "nll_loss": 0.8313517570495605, "rewards/accuracies": 0.875, "rewards/chosen": -0.30437058210372925, "rewards/margins": 0.07453650236129761, "rewards/rejected": -0.37890708446502686, "step": 1654 }, { "epoch": 4.531143052703627, "grad_norm": 4.4931960105896, "learning_rate": 7.734246575342466e-07, "log_odds_chosen": 0.9723684191703796, "log_odds_ratio": -0.4799794852733612, "logits/chosen": 0.8163438439369202, "logits/rejected": 0.8707237243652344, "logps/chosen": -2.824418067932129, "logps/rejected": -3.7545509338378906, "loss": 0.9078, "nll_loss": 0.8598189353942871, "rewards/accuracies": 0.75, "rewards/chosen": -0.2824418246746063, "rewards/margins": 0.09301325678825378, "rewards/rejected": -0.3754551112651825, "step": 1655 }, { "epoch": 4.5338809034907595, "grad_norm": 3.8081133365631104, "learning_rate": 7.732876712328766e-07, "log_odds_chosen": 0.9656883478164673, "log_odds_ratio": -0.4349391758441925, "logits/chosen": 0.9007478952407837, "logits/rejected": 0.863099217414856, "logps/chosen": -2.2515430450439453, "logps/rejected": -3.1634440422058105, "loss": 0.947, "nll_loss": 0.9035160541534424, "rewards/accuracies": 1.0, "rewards/chosen": -0.22515429556369781, "rewards/margins": 0.09119009971618652, "rewards/rejected": -0.31634441018104553, "step": 1656 }, { "epoch": 4.5366187542778915, "grad_norm": 3.83400821685791, "learning_rate": 7.731506849315068e-07, "log_odds_chosen": 1.375794768333435, "log_odds_ratio": -0.369005411863327, "logits/chosen": 0.8732824921607971, "logits/rejected": 0.836966872215271, "logps/chosen": -2.4586610794067383, "logps/rejected": -3.772725820541382, "loss": 0.8926, "nll_loss": 0.8557138442993164, "rewards/accuracies": 0.75, "rewards/chosen": -0.2458660900592804, "rewards/margins": 0.1314064860343933, "rewards/rejected": -0.3772726058959961, "step": 1657 }, { "epoch": 4.539356605065024, "grad_norm": 3.1244256496429443, "learning_rate": 7.73013698630137e-07, "log_odds_chosen": 1.1091856956481934, "log_odds_ratio": -0.3157420754432678, "logits/chosen": 0.8062410950660706, "logits/rejected": 0.7743732929229736, "logps/chosen": -1.6013355255126953, "logps/rejected": -2.547720193862915, "loss": 0.8688, "nll_loss": 0.8372492790222168, "rewards/accuracies": 1.0, "rewards/chosen": -0.16013354063034058, "rewards/margins": 0.09463849663734436, "rewards/rejected": -0.25477203726768494, "step": 1658 }, { "epoch": 4.5420944558521565, "grad_norm": 4.346395492553711, "learning_rate": 7.72876712328767e-07, "log_odds_chosen": 0.4124210774898529, "log_odds_ratio": -0.6674686670303345, "logits/chosen": 0.7614771127700806, "logits/rejected": 0.6901968717575073, "logps/chosen": -2.508838176727295, "logps/rejected": -2.90919828414917, "loss": 0.9971, "nll_loss": 0.9303865432739258, "rewards/accuracies": 0.5, "rewards/chosen": -0.2508838176727295, "rewards/margins": 0.04003600776195526, "rewards/rejected": -0.29091984033584595, "step": 1659 }, { "epoch": 4.544832306639288, "grad_norm": 3.5462236404418945, "learning_rate": 7.727397260273972e-07, "log_odds_chosen": 1.020226001739502, "log_odds_ratio": -0.3903977870941162, "logits/chosen": 0.7505258321762085, "logits/rejected": 0.7715291380882263, "logps/chosen": -2.1665868759155273, "logps/rejected": -3.1208598613739014, "loss": 0.9989, "nll_loss": 0.9598349332809448, "rewards/accuracies": 1.0, "rewards/chosen": -0.21665868163108826, "rewards/margins": 0.09542731940746307, "rewards/rejected": -0.31208598613739014, "step": 1660 }, { "epoch": 4.547570157426421, "grad_norm": 3.3096506595611572, "learning_rate": 7.726027397260274e-07, "log_odds_chosen": 1.6965363025665283, "log_odds_ratio": -0.2599971294403076, "logits/chosen": 0.766507625579834, "logits/rejected": 0.7793304920196533, "logps/chosen": -2.7614269256591797, "logps/rejected": -4.368224143981934, "loss": 0.9382, "nll_loss": 0.9121507406234741, "rewards/accuracies": 1.0, "rewards/chosen": -0.2761426866054535, "rewards/margins": 0.16067971289157867, "rewards/rejected": -0.43682241439819336, "step": 1661 }, { "epoch": 4.550308008213553, "grad_norm": 3.3301665782928467, "learning_rate": 7.724657534246575e-07, "log_odds_chosen": 1.403971791267395, "log_odds_ratio": -0.35019466280937195, "logits/chosen": 0.9716835618019104, "logits/rejected": 1.0070804357528687, "logps/chosen": -2.356515884399414, "logps/rejected": -3.6999692916870117, "loss": 0.7658, "nll_loss": 0.7307447195053101, "rewards/accuracies": 0.875, "rewards/chosen": -0.23565159738063812, "rewards/margins": 0.13434535264968872, "rewards/rejected": -0.36999696493148804, "step": 1662 }, { "epoch": 4.553045859000685, "grad_norm": 6.5665106773376465, "learning_rate": 7.723287671232876e-07, "log_odds_chosen": 0.8897133469581604, "log_odds_ratio": -0.47660571336746216, "logits/chosen": 0.9287809133529663, "logits/rejected": 0.951850175857544, "logps/chosen": -2.6953089237213135, "logps/rejected": -3.51047420501709, "loss": 0.87, "nll_loss": 0.8223459720611572, "rewards/accuracies": 0.75, "rewards/chosen": -0.26953089237213135, "rewards/margins": 0.0815165564417839, "rewards/rejected": -0.35104745626449585, "step": 1663 }, { "epoch": 4.555783709787817, "grad_norm": 5.940553665161133, "learning_rate": 7.721917808219178e-07, "log_odds_chosen": 1.221510648727417, "log_odds_ratio": -0.3639324903488159, "logits/chosen": 0.6394762396812439, "logits/rejected": 0.649526059627533, "logps/chosen": -2.050907611846924, "logps/rejected": -3.1917171478271484, "loss": 0.9496, "nll_loss": 0.9131940603256226, "rewards/accuracies": 0.875, "rewards/chosen": -0.20509076118469238, "rewards/margins": 0.11408096551895142, "rewards/rejected": -0.3191717267036438, "step": 1664 }, { "epoch": 4.558521560574949, "grad_norm": 4.098317623138428, "learning_rate": 7.720547945205479e-07, "log_odds_chosen": 0.8112627267837524, "log_odds_ratio": -0.43232661485671997, "logits/chosen": 0.9313194751739502, "logits/rejected": 0.828352689743042, "logps/chosen": -2.2079904079437256, "logps/rejected": -2.9300501346588135, "loss": 1.0407, "nll_loss": 0.9974656105041504, "rewards/accuracies": 0.75, "rewards/chosen": -0.220799058675766, "rewards/margins": 0.07220596820116043, "rewards/rejected": -0.2930050194263458, "step": 1665 }, { "epoch": 4.561259411362081, "grad_norm": 8.506355285644531, "learning_rate": 7.71917808219178e-07, "log_odds_chosen": 0.4622751474380493, "log_odds_ratio": -0.7193381786346436, "logits/chosen": 0.8281799554824829, "logits/rejected": 0.9546495676040649, "logps/chosen": -2.961402654647827, "logps/rejected": -3.3531875610351562, "loss": 0.774, "nll_loss": 0.70206618309021, "rewards/accuracies": 0.625, "rewards/chosen": -0.29614025354385376, "rewards/margins": 0.03917847201228142, "rewards/rejected": -0.33531874418258667, "step": 1666 }, { "epoch": 4.563997262149213, "grad_norm": 6.283511638641357, "learning_rate": 7.717808219178081e-07, "log_odds_chosen": 0.6272593140602112, "log_odds_ratio": -0.44292211532592773, "logits/chosen": 0.640062689781189, "logits/rejected": 0.6010522246360779, "logps/chosen": -2.0354421138763428, "logps/rejected": -2.5853288173675537, "loss": 0.9434, "nll_loss": 0.899153470993042, "rewards/accuracies": 0.875, "rewards/chosen": -0.2035442292690277, "rewards/margins": 0.054988667368888855, "rewards/rejected": -0.25853288173675537, "step": 1667 }, { "epoch": 4.566735112936345, "grad_norm": 5.99172306060791, "learning_rate": 7.716438356164383e-07, "log_odds_chosen": 0.7163265347480774, "log_odds_ratio": -0.5230672359466553, "logits/chosen": 0.5690004229545593, "logits/rejected": 0.5802839398384094, "logps/chosen": -2.414943218231201, "logps/rejected": -3.0682270526885986, "loss": 1.0264, "nll_loss": 0.9740617275238037, "rewards/accuracies": 0.875, "rewards/chosen": -0.2414943277835846, "rewards/margins": 0.06532840430736542, "rewards/rejected": -0.3068227171897888, "step": 1668 }, { "epoch": 4.569472963723477, "grad_norm": 7.75568962097168, "learning_rate": 7.715068493150685e-07, "log_odds_chosen": 0.5219684839248657, "log_odds_ratio": -0.6580981016159058, "logits/chosen": 0.96014404296875, "logits/rejected": 0.9787317514419556, "logps/chosen": -3.2546300888061523, "logps/rejected": -3.7265820503234863, "loss": 0.863, "nll_loss": 0.7972242832183838, "rewards/accuracies": 0.75, "rewards/chosen": -0.32546302676200867, "rewards/margins": 0.0471951849758625, "rewards/rejected": -0.37265825271606445, "step": 1669 }, { "epoch": 4.572210814510609, "grad_norm": 4.477523326873779, "learning_rate": 7.713698630136985e-07, "log_odds_chosen": 0.6893320083618164, "log_odds_ratio": -0.5045205354690552, "logits/chosen": 0.8644095659255981, "logits/rejected": 0.8859264850616455, "logps/chosen": -2.4153246879577637, "logps/rejected": -3.0358967781066895, "loss": 0.8252, "nll_loss": 0.7747703194618225, "rewards/accuracies": 0.625, "rewards/chosen": -0.24153245985507965, "rewards/margins": 0.062057215720415115, "rewards/rejected": -0.30358967185020447, "step": 1670 }, { "epoch": 4.574948665297741, "grad_norm": 4.9904069900512695, "learning_rate": 7.712328767123287e-07, "log_odds_chosen": 1.3243987560272217, "log_odds_ratio": -0.44181033968925476, "logits/chosen": 0.9286434650421143, "logits/rejected": 1.0202072858810425, "logps/chosen": -2.7772202491760254, "logps/rejected": -4.020157814025879, "loss": 0.7887, "nll_loss": 0.7445244789123535, "rewards/accuracies": 0.75, "rewards/chosen": -0.277722030878067, "rewards/margins": 0.12429375946521759, "rewards/rejected": -0.402015745639801, "step": 1671 }, { "epoch": 4.577686516084873, "grad_norm": 5.648156642913818, "learning_rate": 7.710958904109589e-07, "log_odds_chosen": 1.267720103263855, "log_odds_ratio": -0.3142184019088745, "logits/chosen": 0.6731997728347778, "logits/rejected": 0.7135300636291504, "logps/chosen": -2.025731086730957, "logps/rejected": -3.1846323013305664, "loss": 0.8905, "nll_loss": 0.8590836524963379, "rewards/accuracies": 1.0, "rewards/chosen": -0.20257310569286346, "rewards/margins": 0.11589011549949646, "rewards/rejected": -0.3184632360935211, "step": 1672 }, { "epoch": 4.5804243668720055, "grad_norm": 4.556488037109375, "learning_rate": 7.709589041095889e-07, "log_odds_chosen": 0.6278705596923828, "log_odds_ratio": -0.48042190074920654, "logits/chosen": 0.7454975843429565, "logits/rejected": 0.7564305067062378, "logps/chosen": -2.5969841480255127, "logps/rejected": -3.1760122776031494, "loss": 0.9165, "nll_loss": 0.8684360980987549, "rewards/accuracies": 0.75, "rewards/chosen": -0.25969842076301575, "rewards/margins": 0.05790280923247337, "rewards/rejected": -0.3176012337207794, "step": 1673 }, { "epoch": 4.583162217659138, "grad_norm": 3.2954771518707275, "learning_rate": 7.708219178082191e-07, "log_odds_chosen": 1.7738826274871826, "log_odds_ratio": -0.37796658277511597, "logits/chosen": 0.7547244429588318, "logits/rejected": 0.6890966892242432, "logps/chosen": -2.67457914352417, "logps/rejected": -4.377737522125244, "loss": 0.9691, "nll_loss": 0.9313358068466187, "rewards/accuracies": 0.875, "rewards/chosen": -0.2674579322338104, "rewards/margins": 0.17031580209732056, "rewards/rejected": -0.437773734331131, "step": 1674 }, { "epoch": 4.58590006844627, "grad_norm": 4.516196250915527, "learning_rate": 7.706849315068493e-07, "log_odds_chosen": 0.20662453770637512, "log_odds_ratio": -0.6154681444168091, "logits/chosen": 0.6921029686927795, "logits/rejected": 0.6883806586265564, "logps/chosen": -1.9818007946014404, "logps/rejected": -2.164332866668701, "loss": 0.9802, "nll_loss": 0.9186672568321228, "rewards/accuracies": 0.625, "rewards/chosen": -0.19818010926246643, "rewards/margins": 0.018253182992339134, "rewards/rejected": -0.21643328666687012, "step": 1675 }, { "epoch": 4.588637919233402, "grad_norm": 7.681436061859131, "learning_rate": 7.705479452054794e-07, "log_odds_chosen": 1.6146427392959595, "log_odds_ratio": -0.41952621936798096, "logits/chosen": 0.90596604347229, "logits/rejected": 0.9770693182945251, "logps/chosen": -2.821309804916382, "logps/rejected": -4.406894683837891, "loss": 0.9336, "nll_loss": 0.8916463851928711, "rewards/accuracies": 0.75, "rewards/chosen": -0.28213098645210266, "rewards/margins": 0.15855848789215088, "rewards/rejected": -0.44068944454193115, "step": 1676 }, { "epoch": 4.591375770020534, "grad_norm": 4.011017322540283, "learning_rate": 7.704109589041095e-07, "log_odds_chosen": 0.5016287565231323, "log_odds_ratio": -0.7138967514038086, "logits/chosen": 0.864770770072937, "logits/rejected": 0.9114396572113037, "logps/chosen": -2.9038727283477783, "logps/rejected": -3.375877857208252, "loss": 0.9988, "nll_loss": 0.9274592995643616, "rewards/accuracies": 0.625, "rewards/chosen": -0.2903873026371002, "rewards/margins": 0.047200486063957214, "rewards/rejected": -0.33758777379989624, "step": 1677 }, { "epoch": 4.594113620807666, "grad_norm": 3.1102375984191895, "learning_rate": 7.702739726027397e-07, "log_odds_chosen": 1.2867369651794434, "log_odds_ratio": -0.44451791048049927, "logits/chosen": 0.9173973798751831, "logits/rejected": 0.8187459707260132, "logps/chosen": -2.4037368297576904, "logps/rejected": -3.6326682567596436, "loss": 0.9527, "nll_loss": 0.9082765579223633, "rewards/accuracies": 0.75, "rewards/chosen": -0.24037371575832367, "rewards/margins": 0.12289311736822128, "rewards/rejected": -0.36326682567596436, "step": 1678 }, { "epoch": 4.596851471594798, "grad_norm": 4.742572784423828, "learning_rate": 7.701369863013698e-07, "log_odds_chosen": 0.5577890872955322, "log_odds_ratio": -0.5610609650611877, "logits/chosen": 0.729332447052002, "logits/rejected": 0.7301599979400635, "logps/chosen": -2.408106803894043, "logps/rejected": -2.904873847961426, "loss": 0.9773, "nll_loss": 0.9212068915367126, "rewards/accuracies": 0.625, "rewards/chosen": -0.24081066250801086, "rewards/margins": 0.04967670887708664, "rewards/rejected": -0.2904873788356781, "step": 1679 }, { "epoch": 4.59958932238193, "grad_norm": 5.409361362457275, "learning_rate": 7.699999999999999e-07, "log_odds_chosen": 1.3532203435897827, "log_odds_ratio": -0.5674344301223755, "logits/chosen": 0.5909346342086792, "logits/rejected": 0.5830028653144836, "logps/chosen": -2.615025281906128, "logps/rejected": -3.7934887409210205, "loss": 0.9331, "nll_loss": 0.8763473033905029, "rewards/accuracies": 0.875, "rewards/chosen": -0.2615025043487549, "rewards/margins": 0.11784635484218597, "rewards/rejected": -0.37934887409210205, "step": 1680 }, { "epoch": 4.602327173169062, "grad_norm": 4.443489074707031, "learning_rate": 7.6986301369863e-07, "log_odds_chosen": 1.2076679468154907, "log_odds_ratio": -0.5306063890457153, "logits/chosen": 0.8880628943443298, "logits/rejected": 0.8936516046524048, "logps/chosen": -2.480482816696167, "logps/rejected": -3.6184473037719727, "loss": 0.8785, "nll_loss": 0.8254053592681885, "rewards/accuracies": 0.75, "rewards/chosen": -0.24804827570915222, "rewards/margins": 0.11379643529653549, "rewards/rejected": -0.3618447184562683, "step": 1681 }, { "epoch": 4.605065023956194, "grad_norm": 3.3420896530151367, "learning_rate": 7.697260273972602e-07, "log_odds_chosen": 0.7492853403091431, "log_odds_ratio": -0.46550220251083374, "logits/chosen": 0.616010308265686, "logits/rejected": 0.6240342259407043, "logps/chosen": -2.05700945854187, "logps/rejected": -2.7591123580932617, "loss": 1.0181, "nll_loss": 0.9715126156806946, "rewards/accuracies": 0.875, "rewards/chosen": -0.20570096373558044, "rewards/margins": 0.0702102929353714, "rewards/rejected": -0.27591124176979065, "step": 1682 }, { "epoch": 4.607802874743326, "grad_norm": 3.945556879043579, "learning_rate": 7.695890410958904e-07, "log_odds_chosen": 0.22895167768001556, "log_odds_ratio": -0.6994763016700745, "logits/chosen": 0.7729923129081726, "logits/rejected": 0.7658935785293579, "logps/chosen": -2.358222484588623, "logps/rejected": -2.6066982746124268, "loss": 1.0289, "nll_loss": 0.9589440226554871, "rewards/accuracies": 0.375, "rewards/chosen": -0.23582223057746887, "rewards/margins": 0.024847596883773804, "rewards/rejected": -0.2606698274612427, "step": 1683 }, { "epoch": 4.610540725530458, "grad_norm": 4.239226341247559, "learning_rate": 7.694520547945204e-07, "log_odds_chosen": 0.6298692226409912, "log_odds_ratio": -0.48536741733551025, "logits/chosen": 0.7436583638191223, "logits/rejected": 0.6995030641555786, "logps/chosen": -2.527181386947632, "logps/rejected": -3.114506483078003, "loss": 0.9301, "nll_loss": 0.881573498249054, "rewards/accuracies": 0.875, "rewards/chosen": -0.25271812081336975, "rewards/margins": 0.05873250961303711, "rewards/rejected": -0.31145066022872925, "step": 1684 }, { "epoch": 4.61327857631759, "grad_norm": 6.287684917449951, "learning_rate": 7.693150684931506e-07, "log_odds_chosen": 1.517587661743164, "log_odds_ratio": -0.36532720923423767, "logits/chosen": 0.8269808888435364, "logits/rejected": 0.8181080222129822, "logps/chosen": -2.318955421447754, "logps/rejected": -3.755917549133301, "loss": 0.9079, "nll_loss": 0.8713676333427429, "rewards/accuracies": 0.75, "rewards/chosen": -0.2318955361843109, "rewards/margins": 0.14369621872901917, "rewards/rejected": -0.3755917549133301, "step": 1685 }, { "epoch": 4.616016427104723, "grad_norm": 3.0197439193725586, "learning_rate": 7.691780821917808e-07, "log_odds_chosen": 1.2349040508270264, "log_odds_ratio": -0.3540348410606384, "logits/chosen": 0.704303503036499, "logits/rejected": 0.6894232630729675, "logps/chosen": -2.025731086730957, "logps/rejected": -3.1563682556152344, "loss": 0.9749, "nll_loss": 0.9394678473472595, "rewards/accuracies": 0.875, "rewards/chosen": -0.20257312059402466, "rewards/margins": 0.11306371539831161, "rewards/rejected": -0.31563684344291687, "step": 1686 }, { "epoch": 4.618754277891854, "grad_norm": 4.0043721199035645, "learning_rate": 7.690410958904108e-07, "log_odds_chosen": 0.89864182472229, "log_odds_ratio": -0.41099241375923157, "logits/chosen": 0.7545548677444458, "logits/rejected": 0.6636583805084229, "logps/chosen": -1.7280248403549194, "logps/rejected": -2.5243911743164062, "loss": 1.0305, "nll_loss": 0.9893532395362854, "rewards/accuracies": 0.875, "rewards/chosen": -0.17280250787734985, "rewards/margins": 0.07963661849498749, "rewards/rejected": -0.25243911147117615, "step": 1687 }, { "epoch": 4.621492128678987, "grad_norm": 3.5509068965911865, "learning_rate": 7.68904109589041e-07, "log_odds_chosen": 0.6760924458503723, "log_odds_ratio": -0.4628193974494934, "logits/chosen": 0.7215121388435364, "logits/rejected": 0.6294549703598022, "logps/chosen": -2.5059454441070557, "logps/rejected": -3.1310207843780518, "loss": 0.9821, "nll_loss": 0.9358340501785278, "rewards/accuracies": 0.75, "rewards/chosen": -0.2505945563316345, "rewards/margins": 0.0625075250864029, "rewards/rejected": -0.3131020665168762, "step": 1688 }, { "epoch": 4.6242299794661195, "grad_norm": 4.476627349853516, "learning_rate": 7.687671232876712e-07, "log_odds_chosen": 0.2506285309791565, "log_odds_ratio": -0.6933495998382568, "logits/chosen": 0.7063231468200684, "logits/rejected": 0.6834542751312256, "logps/chosen": -2.9162964820861816, "logps/rejected": -3.116392135620117, "loss": 0.883, "nll_loss": 0.8137096762657166, "rewards/accuracies": 0.625, "rewards/chosen": -0.2916296720504761, "rewards/margins": 0.020009569823741913, "rewards/rejected": -0.3116392195224762, "step": 1689 }, { "epoch": 4.6269678302532515, "grad_norm": 3.2814886569976807, "learning_rate": 7.686301369863013e-07, "log_odds_chosen": 1.4624292850494385, "log_odds_ratio": -0.3145343065261841, "logits/chosen": 0.8888359665870667, "logits/rejected": 0.8669731616973877, "logps/chosen": -2.36906361579895, "logps/rejected": -3.7365026473999023, "loss": 0.8815, "nll_loss": 0.8500895500183105, "rewards/accuracies": 1.0, "rewards/chosen": -0.23690634965896606, "rewards/margins": 0.13674390316009521, "rewards/rejected": -0.37365028262138367, "step": 1690 }, { "epoch": 4.629705681040384, "grad_norm": 3.4586799144744873, "learning_rate": 7.684931506849314e-07, "log_odds_chosen": 1.55389404296875, "log_odds_ratio": -0.3376729488372803, "logits/chosen": 0.7564997673034668, "logits/rejected": 0.7740894556045532, "logps/chosen": -1.8666648864746094, "logps/rejected": -3.3167929649353027, "loss": 0.9346, "nll_loss": 0.9008233547210693, "rewards/accuracies": 0.75, "rewards/chosen": -0.18666648864746094, "rewards/margins": 0.14501282572746277, "rewards/rejected": -0.3316793143749237, "step": 1691 }, { "epoch": 4.632443531827516, "grad_norm": 4.5178022384643555, "learning_rate": 7.683561643835617e-07, "log_odds_chosen": 1.2090981006622314, "log_odds_ratio": -0.459034264087677, "logits/chosen": 0.6055235266685486, "logits/rejected": 0.5312715768814087, "logps/chosen": -1.7869324684143066, "logps/rejected": -2.934998035430908, "loss": 0.9411, "nll_loss": 0.8951973915100098, "rewards/accuracies": 0.875, "rewards/chosen": -0.1786932349205017, "rewards/margins": 0.11480656266212463, "rewards/rejected": -0.29349982738494873, "step": 1692 }, { "epoch": 4.635181382614648, "grad_norm": 4.008362293243408, "learning_rate": 7.682191780821918e-07, "log_odds_chosen": 1.0880804061889648, "log_odds_ratio": -0.3865668773651123, "logits/chosen": 0.8209072947502136, "logits/rejected": 0.7495207786560059, "logps/chosen": -2.056471109390259, "logps/rejected": -3.0443763732910156, "loss": 0.9686, "nll_loss": 0.9299436807632446, "rewards/accuracies": 1.0, "rewards/chosen": -0.20564711093902588, "rewards/margins": 0.0987904891371727, "rewards/rejected": -0.3044376075267792, "step": 1693 }, { "epoch": 4.63791923340178, "grad_norm": 4.312529563903809, "learning_rate": 7.680821917808219e-07, "log_odds_chosen": 0.5041626691818237, "log_odds_ratio": -0.5728904604911804, "logits/chosen": 0.7438157796859741, "logits/rejected": 0.7604458928108215, "logps/chosen": -2.5927724838256836, "logps/rejected": -3.0705533027648926, "loss": 0.9982, "nll_loss": 0.9409000873565674, "rewards/accuracies": 0.75, "rewards/chosen": -0.25927725434303284, "rewards/margins": 0.04777810722589493, "rewards/rejected": -0.30705535411834717, "step": 1694 }, { "epoch": 4.640657084188912, "grad_norm": 5.14853048324585, "learning_rate": 7.679452054794521e-07, "log_odds_chosen": 0.7249008417129517, "log_odds_ratio": -0.47855135798454285, "logits/chosen": 0.7695740461349487, "logits/rejected": 0.7123119831085205, "logps/chosen": -1.8404371738433838, "logps/rejected": -2.4665610790252686, "loss": 0.961, "nll_loss": 0.9131920337677002, "rewards/accuracies": 0.875, "rewards/chosen": -0.18404372036457062, "rewards/margins": 0.062612384557724, "rewards/rejected": -0.2466561198234558, "step": 1695 }, { "epoch": 4.643394934976044, "grad_norm": 4.495363235473633, "learning_rate": 7.678082191780822e-07, "log_odds_chosen": 0.8191568851470947, "log_odds_ratio": -0.5435891151428223, "logits/chosen": 0.5111280679702759, "logits/rejected": 0.4488452672958374, "logps/chosen": -2.253767251968384, "logps/rejected": -3.0048153400421143, "loss": 1.0261, "nll_loss": 0.971765398979187, "rewards/accuracies": 0.625, "rewards/chosen": -0.22537671029567719, "rewards/margins": 0.07510481774806976, "rewards/rejected": -0.30048155784606934, "step": 1696 }, { "epoch": 4.646132785763176, "grad_norm": 4.895321846008301, "learning_rate": 7.676712328767124e-07, "log_odds_chosen": 0.29076871275901794, "log_odds_ratio": -0.8595006465911865, "logits/chosen": 0.8668895959854126, "logits/rejected": 0.8480525612831116, "logps/chosen": -3.1330454349517822, "logps/rejected": -3.414386510848999, "loss": 1.009, "nll_loss": 0.9230819940567017, "rewards/accuracies": 0.5, "rewards/chosen": -0.3133045434951782, "rewards/margins": 0.028134113177657127, "rewards/rejected": -0.3414386510848999, "step": 1697 }, { "epoch": 4.648870636550308, "grad_norm": 3.2953062057495117, "learning_rate": 7.675342465753424e-07, "log_odds_chosen": 0.9173750877380371, "log_odds_ratio": -0.43378472328186035, "logits/chosen": 0.8110495805740356, "logits/rejected": 0.7393219470977783, "logps/chosen": -1.8603448867797852, "logps/rejected": -2.6893973350524902, "loss": 0.913, "nll_loss": 0.8696061372756958, "rewards/accuracies": 0.75, "rewards/chosen": -0.18603448569774628, "rewards/margins": 0.08290523290634155, "rewards/rejected": -0.268939733505249, "step": 1698 }, { "epoch": 4.65160848733744, "grad_norm": 4.0687150955200195, "learning_rate": 7.673972602739726e-07, "log_odds_chosen": 0.5939721465110779, "log_odds_ratio": -0.47507616877555847, "logits/chosen": 0.8951050639152527, "logits/rejected": 0.8729395866394043, "logps/chosen": -2.587559700012207, "logps/rejected": -3.1397814750671387, "loss": 0.9013, "nll_loss": 0.8537819385528564, "rewards/accuracies": 1.0, "rewards/chosen": -0.25875598192214966, "rewards/margins": 0.05522218346595764, "rewards/rejected": -0.3139781355857849, "step": 1699 }, { "epoch": 4.654346338124572, "grad_norm": 3.809436082839966, "learning_rate": 7.672602739726028e-07, "log_odds_chosen": 1.304983139038086, "log_odds_ratio": -0.2920287549495697, "logits/chosen": 0.6994419097900391, "logits/rejected": 0.8163821697235107, "logps/chosen": -2.1098122596740723, "logps/rejected": -3.279507637023926, "loss": 0.9267, "nll_loss": 0.8974587917327881, "rewards/accuracies": 1.0, "rewards/chosen": -0.21098122000694275, "rewards/margins": 0.11696955561637878, "rewards/rejected": -0.32795077562332153, "step": 1700 }, { "epoch": 4.657084188911704, "grad_norm": 3.1972336769104004, "learning_rate": 7.671232876712328e-07, "log_odds_chosen": 0.9442986845970154, "log_odds_ratio": -0.43938180804252625, "logits/chosen": 0.887283444404602, "logits/rejected": 0.8978208303451538, "logps/chosen": -2.749014377593994, "logps/rejected": -3.637746810913086, "loss": 0.8567, "nll_loss": 0.8127409219741821, "rewards/accuracies": 0.75, "rewards/chosen": -0.27490144968032837, "rewards/margins": 0.0888732299208641, "rewards/rejected": -0.36377468705177307, "step": 1701 }, { "epoch": 4.659822039698836, "grad_norm": 4.522469520568848, "learning_rate": 7.66986301369863e-07, "log_odds_chosen": 0.7634501457214355, "log_odds_ratio": -0.5830487608909607, "logits/chosen": 0.9068140387535095, "logits/rejected": 0.9517740607261658, "logps/chosen": -2.7810356616973877, "logps/rejected": -3.4305548667907715, "loss": 0.8028, "nll_loss": 0.7444491386413574, "rewards/accuracies": 0.75, "rewards/chosen": -0.2781035602092743, "rewards/margins": 0.06495194137096405, "rewards/rejected": -0.34305551648139954, "step": 1702 }, { "epoch": 4.662559890485968, "grad_norm": 5.771106719970703, "learning_rate": 7.668493150684932e-07, "log_odds_chosen": 0.577526867389679, "log_odds_ratio": -0.8205021619796753, "logits/chosen": 0.6006172895431519, "logits/rejected": 0.6563271880149841, "logps/chosen": -4.022008895874023, "logps/rejected": -4.596140384674072, "loss": 0.9848, "nll_loss": 0.9027410745620728, "rewards/accuracies": 0.75, "rewards/chosen": -0.40220093727111816, "rewards/margins": 0.05741315707564354, "rewards/rejected": -0.4596140384674072, "step": 1703 }, { "epoch": 4.6652977412731005, "grad_norm": 4.73722505569458, "learning_rate": 7.667123287671233e-07, "log_odds_chosen": 0.665454089641571, "log_odds_ratio": -0.7189902663230896, "logits/chosen": 0.7291338443756104, "logits/rejected": 0.6854996681213379, "logps/chosen": -2.496197462081909, "logps/rejected": -3.1024179458618164, "loss": 1.0295, "nll_loss": 0.9575577974319458, "rewards/accuracies": 0.5, "rewards/chosen": -0.249619722366333, "rewards/margins": 0.060622088611125946, "rewards/rejected": -0.31024181842803955, "step": 1704 }, { "epoch": 4.6680355920602326, "grad_norm": 3.1222286224365234, "learning_rate": 7.665753424657534e-07, "log_odds_chosen": 1.658360242843628, "log_odds_ratio": -0.2741978168487549, "logits/chosen": 0.8834236264228821, "logits/rejected": 0.9018427729606628, "logps/chosen": -2.135256052017212, "logps/rejected": -3.7139813899993896, "loss": 0.8737, "nll_loss": 0.8463025093078613, "rewards/accuracies": 0.875, "rewards/chosen": -0.21352560818195343, "rewards/margins": 0.15787255764007568, "rewards/rejected": -0.3713981807231903, "step": 1705 }, { "epoch": 4.670773442847365, "grad_norm": 4.114634990692139, "learning_rate": 7.664383561643836e-07, "log_odds_chosen": 0.8550378084182739, "log_odds_ratio": -0.3691977560520172, "logits/chosen": 0.6698800325393677, "logits/rejected": 0.6318904161453247, "logps/chosen": -1.9863169193267822, "logps/rejected": -2.7372682094573975, "loss": 0.9085, "nll_loss": 0.8715514540672302, "rewards/accuracies": 1.0, "rewards/chosen": -0.19863170385360718, "rewards/margins": 0.07509513199329376, "rewards/rejected": -0.27372682094573975, "step": 1706 }, { "epoch": 4.673511293634497, "grad_norm": 4.496303558349609, "learning_rate": 7.663013698630137e-07, "log_odds_chosen": 0.6460461616516113, "log_odds_ratio": -0.5136876702308655, "logits/chosen": 0.621036946773529, "logits/rejected": 0.6439138054847717, "logps/chosen": -2.427462339401245, "logps/rejected": -3.009744644165039, "loss": 0.8922, "nll_loss": 0.840829610824585, "rewards/accuracies": 0.875, "rewards/chosen": -0.2427462339401245, "rewards/margins": 0.058228228241205215, "rewards/rejected": -0.3009744882583618, "step": 1707 }, { "epoch": 4.676249144421629, "grad_norm": 3.774322032928467, "learning_rate": 7.661643835616438e-07, "log_odds_chosen": 1.1930408477783203, "log_odds_ratio": -0.29556047916412354, "logits/chosen": 0.8778069019317627, "logits/rejected": 0.8924928903579712, "logps/chosen": -2.196061849594116, "logps/rejected": -3.2680587768554688, "loss": 0.8112, "nll_loss": 0.7816592454910278, "rewards/accuracies": 1.0, "rewards/chosen": -0.2196061909198761, "rewards/margins": 0.10719969123601913, "rewards/rejected": -0.32680588960647583, "step": 1708 }, { "epoch": 4.678986995208761, "grad_norm": 3.4391908645629883, "learning_rate": 7.66027397260274e-07, "log_odds_chosen": 0.9691177606582642, "log_odds_ratio": -0.40666770935058594, "logits/chosen": 0.8025896549224854, "logits/rejected": 0.7835311889648438, "logps/chosen": -2.3136656284332275, "logps/rejected": -3.2015466690063477, "loss": 0.9611, "nll_loss": 0.920413613319397, "rewards/accuracies": 0.875, "rewards/chosen": -0.2313665747642517, "rewards/margins": 0.08878809958696365, "rewards/rejected": -0.32015466690063477, "step": 1709 }, { "epoch": 4.681724845995893, "grad_norm": 5.131062984466553, "learning_rate": 7.658904109589041e-07, "log_odds_chosen": 0.2722271680831909, "log_odds_ratio": -0.8222774267196655, "logits/chosen": 0.6626228094100952, "logits/rejected": 0.6126418113708496, "logps/chosen": -2.282803535461426, "logps/rejected": -2.513624906539917, "loss": 1.1167, "nll_loss": 1.0344345569610596, "rewards/accuracies": 0.625, "rewards/chosen": -0.22828035056591034, "rewards/margins": 0.023082122206687927, "rewards/rejected": -0.25136247277259827, "step": 1710 }, { "epoch": 4.684462696783025, "grad_norm": 4.469659328460693, "learning_rate": 7.657534246575343e-07, "log_odds_chosen": 0.2567877173423767, "log_odds_ratio": -0.6952383518218994, "logits/chosen": 0.6925586462020874, "logits/rejected": 0.7202988862991333, "logps/chosen": -2.8170948028564453, "logps/rejected": -3.0474495887756348, "loss": 0.9624, "nll_loss": 0.8929018378257751, "rewards/accuracies": 0.75, "rewards/chosen": -0.2817094922065735, "rewards/margins": 0.023035474121570587, "rewards/rejected": -0.3047449588775635, "step": 1711 }, { "epoch": 4.687200547570157, "grad_norm": 3.841237783432007, "learning_rate": 7.656164383561643e-07, "log_odds_chosen": 0.8603720664978027, "log_odds_ratio": -0.5653271675109863, "logits/chosen": 0.9193627834320068, "logits/rejected": 0.9705667495727539, "logps/chosen": -2.6214818954467773, "logps/rejected": -3.402773380279541, "loss": 0.8216, "nll_loss": 0.7650647759437561, "rewards/accuracies": 0.75, "rewards/chosen": -0.2621482014656067, "rewards/margins": 0.07812915742397308, "rewards/rejected": -0.34027737379074097, "step": 1712 }, { "epoch": 4.68993839835729, "grad_norm": 3.102849245071411, "learning_rate": 7.654794520547945e-07, "log_odds_chosen": 1.1758626699447632, "log_odds_ratio": -0.45762643218040466, "logits/chosen": 0.7518326044082642, "logits/rejected": 0.63863205909729, "logps/chosen": -2.0580382347106934, "logps/rejected": -3.133481979370117, "loss": 0.9825, "nll_loss": 0.9367231726646423, "rewards/accuracies": 0.75, "rewards/chosen": -0.20580382645130157, "rewards/margins": 0.10754439234733582, "rewards/rejected": -0.3133482336997986, "step": 1713 }, { "epoch": 4.692676249144421, "grad_norm": 4.405048847198486, "learning_rate": 7.653424657534247e-07, "log_odds_chosen": 0.9051504135131836, "log_odds_ratio": -0.5278728008270264, "logits/chosen": 0.8133947253227234, "logits/rejected": 1.0166075229644775, "logps/chosen": -2.573190927505493, "logps/rejected": -3.443237781524658, "loss": 0.8641, "nll_loss": 0.8112658262252808, "rewards/accuracies": 0.875, "rewards/chosen": -0.2573190927505493, "rewards/margins": 0.08700470626354218, "rewards/rejected": -0.3443237841129303, "step": 1714 }, { "epoch": 4.695414099931554, "grad_norm": 3.6990721225738525, "learning_rate": 7.652054794520547e-07, "log_odds_chosen": 0.9165540933609009, "log_odds_ratio": -0.36266928911209106, "logits/chosen": 0.6887162923812866, "logits/rejected": 0.6871876120567322, "logps/chosen": -2.4841129779815674, "logps/rejected": -3.331173896789551, "loss": 0.8234, "nll_loss": 0.7871809005737305, "rewards/accuracies": 0.875, "rewards/chosen": -0.24841131269931793, "rewards/margins": 0.08470610529184341, "rewards/rejected": -0.33311739563941956, "step": 1715 }, { "epoch": 4.698151950718686, "grad_norm": 3.0040507316589355, "learning_rate": 7.650684931506849e-07, "log_odds_chosen": 1.8020033836364746, "log_odds_ratio": -0.21032899618148804, "logits/chosen": 0.796699047088623, "logits/rejected": 0.7206264734268188, "logps/chosen": -1.6178392171859741, "logps/rejected": -3.2522597312927246, "loss": 0.8406, "nll_loss": 0.8195740580558777, "rewards/accuracies": 1.0, "rewards/chosen": -0.16178391873836517, "rewards/margins": 0.16344209015369415, "rewards/rejected": -0.3252260088920593, "step": 1716 }, { "epoch": 4.700889801505818, "grad_norm": 3.8041634559631348, "learning_rate": 7.649315068493151e-07, "log_odds_chosen": 0.46317434310913086, "log_odds_ratio": -0.54388427734375, "logits/chosen": 0.5187257528305054, "logits/rejected": 0.4849863350391388, "logps/chosen": -3.4508860111236572, "logps/rejected": -3.8782434463500977, "loss": 1.028, "nll_loss": 0.9735836386680603, "rewards/accuracies": 0.625, "rewards/chosen": -0.3450886011123657, "rewards/margins": 0.04273572564125061, "rewards/rejected": -0.38782432675361633, "step": 1717 }, { "epoch": 4.70362765229295, "grad_norm": 5.578505516052246, "learning_rate": 7.647945205479452e-07, "log_odds_chosen": 0.8214690089225769, "log_odds_ratio": -0.39972081780433655, "logits/chosen": 0.824063777923584, "logits/rejected": 0.865924596786499, "logps/chosen": -2.63916015625, "logps/rejected": -3.3968758583068848, "loss": 0.8699, "nll_loss": 0.8299490213394165, "rewards/accuracies": 0.875, "rewards/chosen": -0.263916015625, "rewards/margins": 0.07577159255743027, "rewards/rejected": -0.3396875858306885, "step": 1718 }, { "epoch": 4.706365503080082, "grad_norm": 5.214090347290039, "learning_rate": 7.646575342465753e-07, "log_odds_chosen": 0.7146185040473938, "log_odds_ratio": -0.565027117729187, "logits/chosen": 0.7887179851531982, "logits/rejected": 0.7139463424682617, "logps/chosen": -2.211397171020508, "logps/rejected": -2.82269024848938, "loss": 1.0708, "nll_loss": 1.0142807960510254, "rewards/accuracies": 0.75, "rewards/chosen": -0.22113972902297974, "rewards/margins": 0.061129309237003326, "rewards/rejected": -0.28226903080940247, "step": 1719 }, { "epoch": 4.7091033538672145, "grad_norm": 5.234871864318848, "learning_rate": 7.645205479452055e-07, "log_odds_chosen": 0.6885001063346863, "log_odds_ratio": -0.4940498173236847, "logits/chosen": 0.7468010783195496, "logits/rejected": 0.7385311126708984, "logps/chosen": -2.3444814682006836, "logps/rejected": -2.9185519218444824, "loss": 0.8391, "nll_loss": 0.7896561026573181, "rewards/accuracies": 0.625, "rewards/chosen": -0.2344481498003006, "rewards/margins": 0.05740702897310257, "rewards/rejected": -0.29185518622398376, "step": 1720 }, { "epoch": 4.7118412046543465, "grad_norm": 4.736484050750732, "learning_rate": 7.643835616438356e-07, "log_odds_chosen": 1.3635355234146118, "log_odds_ratio": -0.39802244305610657, "logits/chosen": 0.8276734352111816, "logits/rejected": 0.8864924907684326, "logps/chosen": -2.606240749359131, "logps/rejected": -3.839108467102051, "loss": 0.9159, "nll_loss": 0.8760708570480347, "rewards/accuracies": 0.75, "rewards/chosen": -0.2606240510940552, "rewards/margins": 0.12328678369522095, "rewards/rejected": -0.3839108347892761, "step": 1721 }, { "epoch": 4.714579055441479, "grad_norm": 5.055168628692627, "learning_rate": 7.642465753424657e-07, "log_odds_chosen": 0.1611642837524414, "log_odds_ratio": -0.7600630521774292, "logits/chosen": 0.7673760056495667, "logits/rejected": 0.830213725566864, "logps/chosen": -3.6735446453094482, "logps/rejected": -3.801825761795044, "loss": 0.8904, "nll_loss": 0.8144429922103882, "rewards/accuracies": 0.625, "rewards/chosen": -0.36735445261001587, "rewards/margins": 0.012828126549720764, "rewards/rejected": -0.3801825940608978, "step": 1722 }, { "epoch": 4.717316906228611, "grad_norm": 3.5713038444519043, "learning_rate": 7.641095890410959e-07, "log_odds_chosen": 0.34574732184410095, "log_odds_ratio": -0.6578816175460815, "logits/chosen": 0.7697939872741699, "logits/rejected": 0.8703112602233887, "logps/chosen": -2.6742312908172607, "logps/rejected": -2.967989206314087, "loss": 1.0861, "nll_loss": 1.0203391313552856, "rewards/accuracies": 0.375, "rewards/chosen": -0.267423152923584, "rewards/margins": 0.029375797137618065, "rewards/rejected": -0.2967989146709442, "step": 1723 }, { "epoch": 4.720054757015743, "grad_norm": 3.414510488510132, "learning_rate": 7.63972602739726e-07, "log_odds_chosen": 0.4856795370578766, "log_odds_ratio": -0.5068976283073425, "logits/chosen": 0.765015721321106, "logits/rejected": 0.7787909507751465, "logps/chosen": -2.495548725128174, "logps/rejected": -2.919910192489624, "loss": 0.951, "nll_loss": 0.9003593325614929, "rewards/accuracies": 0.75, "rewards/chosen": -0.24955488741397858, "rewards/margins": 0.04243611916899681, "rewards/rejected": -0.2919909954071045, "step": 1724 }, { "epoch": 4.722792607802875, "grad_norm": 3.6517856121063232, "learning_rate": 7.638356164383562e-07, "log_odds_chosen": 0.554053008556366, "log_odds_ratio": -0.5172698497772217, "logits/chosen": 0.6056700348854065, "logits/rejected": 0.5628620982170105, "logps/chosen": -2.2313125133514404, "logps/rejected": -2.7360920906066895, "loss": 0.9677, "nll_loss": 0.9159753322601318, "rewards/accuracies": 0.75, "rewards/chosen": -0.2231312394142151, "rewards/margins": 0.05047798901796341, "rewards/rejected": -0.2736092507839203, "step": 1725 }, { "epoch": 4.725530458590007, "grad_norm": 4.918362617492676, "learning_rate": 7.636986301369863e-07, "log_odds_chosen": 0.898222804069519, "log_odds_ratio": -0.5816007256507874, "logits/chosen": 0.6451114416122437, "logits/rejected": 0.6791189908981323, "logps/chosen": -2.781719207763672, "logps/rejected": -3.606736898422241, "loss": 0.9835, "nll_loss": 0.9253731369972229, "rewards/accuracies": 0.875, "rewards/chosen": -0.27817192673683167, "rewards/margins": 0.08250178396701813, "rewards/rejected": -0.3606736660003662, "step": 1726 }, { "epoch": 4.728268309377139, "grad_norm": 3.6236937046051025, "learning_rate": 7.635616438356164e-07, "log_odds_chosen": 0.8546690344810486, "log_odds_ratio": -0.5630707740783691, "logits/chosen": 0.8357712626457214, "logits/rejected": 0.9219276309013367, "logps/chosen": -2.3625717163085938, "logps/rejected": -3.148566246032715, "loss": 0.8458, "nll_loss": 0.7895160913467407, "rewards/accuracies": 0.75, "rewards/chosen": -0.2362571656703949, "rewards/margins": 0.07859943807125092, "rewards/rejected": -0.314856618642807, "step": 1727 }, { "epoch": 4.731006160164271, "grad_norm": 3.2132461071014404, "learning_rate": 7.634246575342466e-07, "log_odds_chosen": 0.7266519069671631, "log_odds_ratio": -0.42898476123809814, "logits/chosen": 0.7590120434761047, "logits/rejected": 0.7338575124740601, "logps/chosen": -1.9119150638580322, "logps/rejected": -2.546339511871338, "loss": 0.9088, "nll_loss": 0.8659430146217346, "rewards/accuracies": 0.875, "rewards/chosen": -0.19119149446487427, "rewards/margins": 0.06344246119260788, "rewards/rejected": -0.25463396310806274, "step": 1728 }, { "epoch": 4.733744010951403, "grad_norm": 4.617879867553711, "learning_rate": 7.632876712328766e-07, "log_odds_chosen": 0.7605057954788208, "log_odds_ratio": -0.4765546917915344, "logits/chosen": 0.833655595779419, "logits/rejected": 0.8744411468505859, "logps/chosen": -2.4654500484466553, "logps/rejected": -3.1613736152648926, "loss": 0.7971, "nll_loss": 0.7494522929191589, "rewards/accuracies": 0.75, "rewards/chosen": -0.24654501676559448, "rewards/margins": 0.06959234178066254, "rewards/rejected": -0.3161373734474182, "step": 1729 }, { "epoch": 4.736481861738535, "grad_norm": 3.6124660968780518, "learning_rate": 7.631506849315068e-07, "log_odds_chosen": 1.461129069328308, "log_odds_ratio": -0.3516300618648529, "logits/chosen": 0.7016892433166504, "logits/rejected": 0.7065690159797668, "logps/chosen": -2.129790782928467, "logps/rejected": -3.519639492034912, "loss": 0.9077, "nll_loss": 0.8725051283836365, "rewards/accuracies": 0.75, "rewards/chosen": -0.21297909319400787, "rewards/margins": 0.13898488879203796, "rewards/rejected": -0.35196399688720703, "step": 1730 }, { "epoch": 4.739219712525667, "grad_norm": 3.7590906620025635, "learning_rate": 7.63013698630137e-07, "log_odds_chosen": 0.7656876444816589, "log_odds_ratio": -0.5298392176628113, "logits/chosen": 0.7888148427009583, "logits/rejected": 0.8290532827377319, "logps/chosen": -2.405282974243164, "logps/rejected": -3.117645740509033, "loss": 0.9114, "nll_loss": 0.8583692312240601, "rewards/accuracies": 0.75, "rewards/chosen": -0.24052828550338745, "rewards/margins": 0.07123629748821259, "rewards/rejected": -0.31176459789276123, "step": 1731 }, { "epoch": 4.741957563312799, "grad_norm": 3.3599166870117188, "learning_rate": 7.62876712328767e-07, "log_odds_chosen": 1.5277189016342163, "log_odds_ratio": -0.3346909284591675, "logits/chosen": 0.7401437163352966, "logits/rejected": 0.756454586982727, "logps/chosen": -2.0663626194000244, "logps/rejected": -3.4924285411834717, "loss": 0.9456, "nll_loss": 0.9120871424674988, "rewards/accuracies": 1.0, "rewards/chosen": -0.20663627982139587, "rewards/margins": 0.14260661602020264, "rewards/rejected": -0.3492428660392761, "step": 1732 }, { "epoch": 4.744695414099931, "grad_norm": 3.5160632133483887, "learning_rate": 7.627397260273972e-07, "log_odds_chosen": 0.5176715850830078, "log_odds_ratio": -0.48491761088371277, "logits/chosen": 0.8256598711013794, "logits/rejected": 0.7942101955413818, "logps/chosen": -2.900484561920166, "logps/rejected": -3.3552379608154297, "loss": 0.9356, "nll_loss": 0.8871021270751953, "rewards/accuracies": 0.75, "rewards/chosen": -0.2900484502315521, "rewards/margins": 0.04547535255551338, "rewards/rejected": -0.3355238139629364, "step": 1733 }, { "epoch": 4.747433264887063, "grad_norm": 5.5446062088012695, "learning_rate": 7.626027397260274e-07, "log_odds_chosen": 0.9495039582252502, "log_odds_ratio": -0.5295351147651672, "logits/chosen": 0.8389713764190674, "logits/rejected": 0.9336550235748291, "logps/chosen": -3.004936695098877, "logps/rejected": -3.9126312732696533, "loss": 0.8796, "nll_loss": 0.8266138434410095, "rewards/accuracies": 0.75, "rewards/chosen": -0.30049365758895874, "rewards/margins": 0.090769462287426, "rewards/rejected": -0.39126312732696533, "step": 1734 }, { "epoch": 4.7501711156741955, "grad_norm": 3.6901934146881104, "learning_rate": 7.624657534246575e-07, "log_odds_chosen": 0.9619263410568237, "log_odds_ratio": -0.5713016986846924, "logits/chosen": 0.6637375354766846, "logits/rejected": 0.7546592354774475, "logps/chosen": -2.377363920211792, "logps/rejected": -3.2363991737365723, "loss": 0.8682, "nll_loss": 0.8110806941986084, "rewards/accuracies": 0.875, "rewards/chosen": -0.23773640394210815, "rewards/margins": 0.08590351790189743, "rewards/rejected": -0.3236399292945862, "step": 1735 }, { "epoch": 4.7529089664613275, "grad_norm": 5.299356460571289, "learning_rate": 7.623287671232876e-07, "log_odds_chosen": 0.9707352519035339, "log_odds_ratio": -0.5132872462272644, "logits/chosen": 0.8772268891334534, "logits/rejected": 0.9410436153411865, "logps/chosen": -3.1548190116882324, "logps/rejected": -4.056970119476318, "loss": 0.8173, "nll_loss": 0.7659978270530701, "rewards/accuracies": 0.625, "rewards/chosen": -0.31548190116882324, "rewards/margins": 0.09021511673927307, "rewards/rejected": -0.4056970179080963, "step": 1736 }, { "epoch": 4.75564681724846, "grad_norm": 3.689667224884033, "learning_rate": 7.621917808219178e-07, "log_odds_chosen": 1.2524243593215942, "log_odds_ratio": -0.3633158802986145, "logits/chosen": 0.8535993099212646, "logits/rejected": 0.8823925256729126, "logps/chosen": -2.197111129760742, "logps/rejected": -3.387636661529541, "loss": 0.8693, "nll_loss": 0.8329921960830688, "rewards/accuracies": 0.75, "rewards/chosen": -0.21971115469932556, "rewards/margins": 0.11905254423618317, "rewards/rejected": -0.33876368403434753, "step": 1737 }, { "epoch": 4.758384668035592, "grad_norm": 4.103437423706055, "learning_rate": 7.620547945205479e-07, "log_odds_chosen": 0.7826310396194458, "log_odds_ratio": -0.5549753904342651, "logits/chosen": 0.7559508681297302, "logits/rejected": 0.7399241328239441, "logps/chosen": -2.6181693077087402, "logps/rejected": -3.3684439659118652, "loss": 0.9104, "nll_loss": 0.854863166809082, "rewards/accuracies": 0.625, "rewards/chosen": -0.26181694865226746, "rewards/margins": 0.07502748817205429, "rewards/rejected": -0.33684441447257996, "step": 1738 }, { "epoch": 4.761122518822724, "grad_norm": 4.183380603790283, "learning_rate": 7.619178082191781e-07, "log_odds_chosen": 0.6567135453224182, "log_odds_ratio": -0.4410019516944885, "logits/chosen": 0.8510483503341675, "logits/rejected": 0.7604172229766846, "logps/chosen": -1.9718878269195557, "logps/rejected": -2.542238712310791, "loss": 0.9201, "nll_loss": 0.8760431408882141, "rewards/accuracies": 1.0, "rewards/chosen": -0.19718879461288452, "rewards/margins": 0.05703509598970413, "rewards/rejected": -0.25422388315200806, "step": 1739 }, { "epoch": 4.763860369609857, "grad_norm": 3.709951639175415, "learning_rate": 7.617808219178082e-07, "log_odds_chosen": 2.7591662406921387, "log_odds_ratio": -0.18572717905044556, "logits/chosen": 0.6330305337905884, "logits/rejected": 0.6422973871231079, "logps/chosen": -2.150167226791382, "logps/rejected": -4.690977096557617, "loss": 1.0035, "nll_loss": 0.9849534034729004, "rewards/accuracies": 1.0, "rewards/chosen": -0.21501672267913818, "rewards/margins": 0.25408104062080383, "rewards/rejected": -0.4690977931022644, "step": 1740 }, { "epoch": 4.766598220396988, "grad_norm": 4.206192970275879, "learning_rate": 7.616438356164383e-07, "log_odds_chosen": 0.7634978890419006, "log_odds_ratio": -0.4180111289024353, "logits/chosen": 0.7515709400177002, "logits/rejected": 0.7320125102996826, "logps/chosen": -2.12168550491333, "logps/rejected": -2.805936336517334, "loss": 0.8538, "nll_loss": 0.8120234608650208, "rewards/accuracies": 1.0, "rewards/chosen": -0.21216854453086853, "rewards/margins": 0.06842507421970367, "rewards/rejected": -0.2805936336517334, "step": 1741 }, { "epoch": 4.769336071184121, "grad_norm": 4.144809722900391, "learning_rate": 7.615068493150685e-07, "log_odds_chosen": 1.050320029258728, "log_odds_ratio": -0.578643798828125, "logits/chosen": 0.9486980438232422, "logits/rejected": 0.9529126286506653, "logps/chosen": -3.522552490234375, "logps/rejected": -4.533509254455566, "loss": 0.9009, "nll_loss": 0.8430490493774414, "rewards/accuracies": 0.875, "rewards/chosen": -0.352255254983902, "rewards/margins": 0.10109564661979675, "rewards/rejected": -0.45335090160369873, "step": 1742 }, { "epoch": 4.772073921971253, "grad_norm": 3.8118717670440674, "learning_rate": 7.613698630136985e-07, "log_odds_chosen": 0.7339224815368652, "log_odds_ratio": -0.6022229790687561, "logits/chosen": 0.6212685108184814, "logits/rejected": 0.6034142971038818, "logps/chosen": -2.514979362487793, "logps/rejected": -3.155658721923828, "loss": 0.9116, "nll_loss": 0.8513404130935669, "rewards/accuracies": 0.75, "rewards/chosen": -0.25149792432785034, "rewards/margins": 0.06406795233488083, "rewards/rejected": -0.3155658543109894, "step": 1743 }, { "epoch": 4.774811772758385, "grad_norm": 5.751413345336914, "learning_rate": 7.612328767123287e-07, "log_odds_chosen": 0.8454164266586304, "log_odds_ratio": -0.5761001110076904, "logits/chosen": 0.9824445843696594, "logits/rejected": 1.0029677152633667, "logps/chosen": -2.9018259048461914, "logps/rejected": -3.6893744468688965, "loss": 0.9282, "nll_loss": 0.8705750107765198, "rewards/accuracies": 0.75, "rewards/chosen": -0.29018259048461914, "rewards/margins": 0.07875485718250275, "rewards/rejected": -0.3689374327659607, "step": 1744 }, { "epoch": 4.777549623545517, "grad_norm": 5.012581825256348, "learning_rate": 7.610958904109589e-07, "log_odds_chosen": 0.366790771484375, "log_odds_ratio": -0.5605089664459229, "logits/chosen": 0.7911971807479858, "logits/rejected": 0.8739049434661865, "logps/chosen": -2.830052137374878, "logps/rejected": -3.1833300590515137, "loss": 0.9235, "nll_loss": 0.8674969673156738, "rewards/accuracies": 0.875, "rewards/chosen": -0.2830052375793457, "rewards/margins": 0.03532778471708298, "rewards/rejected": -0.3183330297470093, "step": 1745 }, { "epoch": 4.780287474332649, "grad_norm": 5.4071550369262695, "learning_rate": 7.609589041095889e-07, "log_odds_chosen": 0.45080098509788513, "log_odds_ratio": -0.6477988958358765, "logits/chosen": 0.9979844093322754, "logits/rejected": 1.072436809539795, "logps/chosen": -3.509570360183716, "logps/rejected": -3.930441379547119, "loss": 0.8626, "nll_loss": 0.7978573441505432, "rewards/accuracies": 0.5, "rewards/chosen": -0.3509570360183716, "rewards/margins": 0.04208708927035332, "rewards/rejected": -0.3930441439151764, "step": 1746 }, { "epoch": 4.783025325119781, "grad_norm": 3.3777294158935547, "learning_rate": 7.608219178082191e-07, "log_odds_chosen": 1.6329152584075928, "log_odds_ratio": -0.3043748140335083, "logits/chosen": 0.7553640604019165, "logits/rejected": 0.6896491646766663, "logps/chosen": -1.9387693405151367, "logps/rejected": -3.4434356689453125, "loss": 0.8335, "nll_loss": 0.8030444383621216, "rewards/accuracies": 0.875, "rewards/chosen": -0.1938769519329071, "rewards/margins": 0.15046662092208862, "rewards/rejected": -0.3443435728549957, "step": 1747 }, { "epoch": 4.785763175906913, "grad_norm": 5.8426103591918945, "learning_rate": 7.606849315068493e-07, "log_odds_chosen": 1.5204641819000244, "log_odds_ratio": -0.418619304895401, "logits/chosen": 0.7556575536727905, "logits/rejected": 0.7832249402999878, "logps/chosen": -2.5186142921447754, "logps/rejected": -3.902834415435791, "loss": 0.9624, "nll_loss": 0.920539379119873, "rewards/accuracies": 0.875, "rewards/chosen": -0.25186145305633545, "rewards/margins": 0.13842199742794037, "rewards/rejected": -0.390283465385437, "step": 1748 }, { "epoch": 4.788501026694045, "grad_norm": 4.400667667388916, "learning_rate": 7.605479452054794e-07, "log_odds_chosen": 1.8102737665176392, "log_odds_ratio": -0.24323046207427979, "logits/chosen": 0.5358114242553711, "logits/rejected": 0.42056357860565186, "logps/chosen": -1.996147632598877, "logps/rejected": -3.704841136932373, "loss": 0.9608, "nll_loss": 0.9364590644836426, "rewards/accuracies": 0.875, "rewards/chosen": -0.19961479306221008, "rewards/margins": 0.17086932063102722, "rewards/rejected": -0.3704840838909149, "step": 1749 }, { "epoch": 4.791238877481177, "grad_norm": 4.588626384735107, "learning_rate": 7.604109589041095e-07, "log_odds_chosen": 0.6934360265731812, "log_odds_ratio": -0.4597075581550598, "logits/chosen": 0.9550234079360962, "logits/rejected": 0.9413894414901733, "logps/chosen": -2.313983678817749, "logps/rejected": -2.9332406520843506, "loss": 0.9874, "nll_loss": 0.9414464831352234, "rewards/accuracies": 0.75, "rewards/chosen": -0.23139838874340057, "rewards/margins": 0.061925675719976425, "rewards/rejected": -0.2933240532875061, "step": 1750 }, { "epoch": 4.7939767282683095, "grad_norm": 3.7991302013397217, "learning_rate": 7.602739726027397e-07, "log_odds_chosen": 1.1565866470336914, "log_odds_ratio": -0.3763726055622101, "logits/chosen": 0.8634361028671265, "logits/rejected": 0.776199460029602, "logps/chosen": -2.1602134704589844, "logps/rejected": -3.220672845840454, "loss": 0.938, "nll_loss": 0.9003457427024841, "rewards/accuracies": 0.875, "rewards/chosen": -0.216021329164505, "rewards/margins": 0.10604596138000488, "rewards/rejected": -0.3220672905445099, "step": 1751 }, { "epoch": 4.7967145790554415, "grad_norm": 4.9309797286987305, "learning_rate": 7.601369863013698e-07, "log_odds_chosen": 0.5596356987953186, "log_odds_ratio": -0.5721007585525513, "logits/chosen": 0.7902297377586365, "logits/rejected": 0.7809349894523621, "logps/chosen": -2.2872252464294434, "logps/rejected": -2.8344552516937256, "loss": 0.9431, "nll_loss": 0.8859003186225891, "rewards/accuracies": 0.75, "rewards/chosen": -0.22872254252433777, "rewards/margins": 0.05472300946712494, "rewards/rejected": -0.2834455370903015, "step": 1752 }, { "epoch": 4.799452429842574, "grad_norm": 5.026459217071533, "learning_rate": 7.599999999999999e-07, "log_odds_chosen": 1.1588172912597656, "log_odds_ratio": -0.6097798347473145, "logits/chosen": 0.9452413320541382, "logits/rejected": 0.8696995973587036, "logps/chosen": -2.2280187606811523, "logps/rejected": -3.2706103324890137, "loss": 0.9163, "nll_loss": 0.8552868962287903, "rewards/accuracies": 0.625, "rewards/chosen": -0.22280186414718628, "rewards/margins": 0.10425914824008942, "rewards/rejected": -0.3270610272884369, "step": 1753 }, { "epoch": 4.802190280629706, "grad_norm": 5.900476932525635, "learning_rate": 7.598630136986301e-07, "log_odds_chosen": 0.7618038654327393, "log_odds_ratio": -0.5057412385940552, "logits/chosen": 0.896091639995575, "logits/rejected": 0.7882298231124878, "logps/chosen": -2.7817976474761963, "logps/rejected": -3.5104901790618896, "loss": 0.9009, "nll_loss": 0.8502963185310364, "rewards/accuracies": 0.625, "rewards/chosen": -0.27817976474761963, "rewards/margins": 0.07286925613880157, "rewards/rejected": -0.3510490357875824, "step": 1754 }, { "epoch": 4.804928131416838, "grad_norm": 3.8831892013549805, "learning_rate": 7.597260273972602e-07, "log_odds_chosen": 1.3639823198318481, "log_odds_ratio": -0.3519449234008789, "logits/chosen": 0.7783852219581604, "logits/rejected": 0.8268899321556091, "logps/chosen": -1.8507204055786133, "logps/rejected": -3.122274875640869, "loss": 0.8737, "nll_loss": 0.8384820222854614, "rewards/accuracies": 1.0, "rewards/chosen": -0.18507203459739685, "rewards/margins": 0.12715545296669006, "rewards/rejected": -0.3122274875640869, "step": 1755 }, { "epoch": 4.80766598220397, "grad_norm": 4.179388523101807, "learning_rate": 7.595890410958904e-07, "log_odds_chosen": 0.4275929927825928, "log_odds_ratio": -0.5499825477600098, "logits/chosen": 1.0247756242752075, "logits/rejected": 1.0502692461013794, "logps/chosen": -2.9848856925964355, "logps/rejected": -3.3861470222473145, "loss": 0.8034, "nll_loss": 0.748438835144043, "rewards/accuracies": 0.75, "rewards/chosen": -0.2984885573387146, "rewards/margins": 0.040126144886016846, "rewards/rejected": -0.33861470222473145, "step": 1756 }, { "epoch": 4.810403832991102, "grad_norm": 5.199780464172363, "learning_rate": 7.594520547945204e-07, "log_odds_chosen": 1.870217204093933, "log_odds_ratio": -0.45060452818870544, "logits/chosen": 0.6677019596099854, "logits/rejected": 0.6240675449371338, "logps/chosen": -2.9416637420654297, "logps/rejected": -4.726999282836914, "loss": 0.9216, "nll_loss": 0.876541793346405, "rewards/accuracies": 0.875, "rewards/chosen": -0.2941663861274719, "rewards/margins": 0.17853356897830963, "rewards/rejected": -0.47269994020462036, "step": 1757 }, { "epoch": 4.813141683778234, "grad_norm": 5.954942226409912, "learning_rate": 7.593150684931506e-07, "log_odds_chosen": 0.9115166664123535, "log_odds_ratio": -0.42979195713996887, "logits/chosen": 0.9658045172691345, "logits/rejected": 1.0118669271469116, "logps/chosen": -3.320791244506836, "logps/rejected": -4.192782878875732, "loss": 0.8012, "nll_loss": 0.7581925392150879, "rewards/accuracies": 0.875, "rewards/chosen": -0.332079142332077, "rewards/margins": 0.08719919621944427, "rewards/rejected": -0.4192783236503601, "step": 1758 }, { "epoch": 4.815879534565366, "grad_norm": 5.087444305419922, "learning_rate": 7.591780821917808e-07, "log_odds_chosen": 0.8666311502456665, "log_odds_ratio": -0.3913012742996216, "logits/chosen": 0.7769392728805542, "logits/rejected": 0.7480965852737427, "logps/chosen": -2.0369672775268555, "logps/rejected": -2.7749199867248535, "loss": 0.9359, "nll_loss": 0.8967256546020508, "rewards/accuracies": 1.0, "rewards/chosen": -0.20369672775268555, "rewards/margins": 0.07379525899887085, "rewards/rejected": -0.2774919867515564, "step": 1759 }, { "epoch": 4.818617385352498, "grad_norm": 3.220886707305908, "learning_rate": 7.590410958904108e-07, "log_odds_chosen": 0.3610741198062897, "log_odds_ratio": -0.6442669630050659, "logits/chosen": 0.7860620617866516, "logits/rejected": 0.8184903860092163, "logps/chosen": -2.1580305099487305, "logps/rejected": -2.444364547729492, "loss": 0.951, "nll_loss": 0.8865370750427246, "rewards/accuracies": 0.75, "rewards/chosen": -0.2158030867576599, "rewards/margins": 0.028633393347263336, "rewards/rejected": -0.24443645775318146, "step": 1760 }, { "epoch": 4.82135523613963, "grad_norm": 3.120281457901001, "learning_rate": 7.58904109589041e-07, "log_odds_chosen": 1.592954397201538, "log_odds_ratio": -0.2679228186607361, "logits/chosen": 0.7724639177322388, "logits/rejected": 0.8404754996299744, "logps/chosen": -2.092072010040283, "logps/rejected": -3.567225694656372, "loss": 0.8107, "nll_loss": 0.7838830947875977, "rewards/accuracies": 1.0, "rewards/chosen": -0.2092072069644928, "rewards/margins": 0.14751538634300232, "rewards/rejected": -0.3567225933074951, "step": 1761 }, { "epoch": 4.824093086926762, "grad_norm": 3.5604171752929688, "learning_rate": 7.587671232876712e-07, "log_odds_chosen": 0.9701237678527832, "log_odds_ratio": -0.49484753608703613, "logits/chosen": 0.5074792504310608, "logits/rejected": 0.4719913601875305, "logps/chosen": -2.1617424488067627, "logps/rejected": -3.009913921356201, "loss": 0.95, "nll_loss": 0.9005116820335388, "rewards/accuracies": 0.875, "rewards/chosen": -0.21617427468299866, "rewards/margins": 0.08481711894273758, "rewards/rejected": -0.30099135637283325, "step": 1762 }, { "epoch": 4.826830937713894, "grad_norm": 3.35660457611084, "learning_rate": 7.586301369863013e-07, "log_odds_chosen": 0.7286208271980286, "log_odds_ratio": -0.46734923124313354, "logits/chosen": 0.6285302639007568, "logits/rejected": 0.6312878131866455, "logps/chosen": -2.531616449356079, "logps/rejected": -3.2197513580322266, "loss": 0.9291, "nll_loss": 0.8823789358139038, "rewards/accuracies": 0.75, "rewards/chosen": -0.25316163897514343, "rewards/margins": 0.06881347298622131, "rewards/rejected": -0.32197511196136475, "step": 1763 }, { "epoch": 4.829568788501026, "grad_norm": 3.211651086807251, "learning_rate": 7.584931506849314e-07, "log_odds_chosen": 1.0051878690719604, "log_odds_ratio": -0.4389594793319702, "logits/chosen": 0.8174294233322144, "logits/rejected": 0.7590107321739197, "logps/chosen": -2.489762783050537, "logps/rejected": -3.4596049785614014, "loss": 1.0174, "nll_loss": 0.9735349416732788, "rewards/accuracies": 0.875, "rewards/chosen": -0.24897629022598267, "rewards/margins": 0.09698420763015747, "rewards/rejected": -0.34596049785614014, "step": 1764 }, { "epoch": 4.832306639288159, "grad_norm": 5.262167930603027, "learning_rate": 7.583561643835616e-07, "log_odds_chosen": 0.2046392560005188, "log_odds_ratio": -0.8141217827796936, "logits/chosen": 0.7072827816009521, "logits/rejected": 0.7333720922470093, "logps/chosen": -2.8605427742004395, "logps/rejected": -3.03835391998291, "loss": 0.9683, "nll_loss": 0.8869318962097168, "rewards/accuracies": 0.625, "rewards/chosen": -0.2860542833805084, "rewards/margins": 0.01778109185397625, "rewards/rejected": -0.30383536219596863, "step": 1765 }, { "epoch": 4.8350444900752905, "grad_norm": 3.9635403156280518, "learning_rate": 7.582191780821917e-07, "log_odds_chosen": 0.8806056976318359, "log_odds_ratio": -0.5539789795875549, "logits/chosen": 0.7908933162689209, "logits/rejected": 0.7254857420921326, "logps/chosen": -3.229522228240967, "logps/rejected": -4.0864057540893555, "loss": 1.0705, "nll_loss": 1.0151307582855225, "rewards/accuracies": 0.625, "rewards/chosen": -0.3229522407054901, "rewards/margins": 0.08568833023309708, "rewards/rejected": -0.4086405336856842, "step": 1766 }, { "epoch": 4.837782340862423, "grad_norm": 3.60837984085083, "learning_rate": 7.580821917808218e-07, "log_odds_chosen": 0.7899218201637268, "log_odds_ratio": -0.5103141069412231, "logits/chosen": 0.6488383412361145, "logits/rejected": 0.6901776790618896, "logps/chosen": -2.239603042602539, "logps/rejected": -2.9925050735473633, "loss": 0.9199, "nll_loss": 0.8689026236534119, "rewards/accuracies": 0.75, "rewards/chosen": -0.2239602953195572, "rewards/margins": 0.07529023289680481, "rewards/rejected": -0.2992505431175232, "step": 1767 }, { "epoch": 4.840520191649555, "grad_norm": 3.986372470855713, "learning_rate": 7.57945205479452e-07, "log_odds_chosen": 1.4928847551345825, "log_odds_ratio": -0.3131367564201355, "logits/chosen": 0.9004016518592834, "logits/rejected": 0.9094754457473755, "logps/chosen": -2.1751601696014404, "logps/rejected": -3.5700125694274902, "loss": 0.9156, "nll_loss": 0.8842991590499878, "rewards/accuracies": 0.875, "rewards/chosen": -0.21751603484153748, "rewards/margins": 0.13948526978492737, "rewards/rejected": -0.35700130462646484, "step": 1768 }, { "epoch": 4.843258042436688, "grad_norm": 3.3598272800445557, "learning_rate": 7.578082191780821e-07, "log_odds_chosen": 0.2786439061164856, "log_odds_ratio": -0.6278733015060425, "logits/chosen": 0.7313024997711182, "logits/rejected": 0.7478454113006592, "logps/chosen": -2.093632221221924, "logps/rejected": -2.3634555339813232, "loss": 0.9663, "nll_loss": 0.9035375118255615, "rewards/accuracies": 0.5, "rewards/chosen": -0.20936323702335358, "rewards/margins": 0.026982324197888374, "rewards/rejected": -0.2363455593585968, "step": 1769 }, { "epoch": 4.84599589322382, "grad_norm": 3.7781195640563965, "learning_rate": 7.576712328767123e-07, "log_odds_chosen": 0.41013938188552856, "log_odds_ratio": -0.6728736162185669, "logits/chosen": 0.8182739019393921, "logits/rejected": 0.7940815687179565, "logps/chosen": -2.0696256160736084, "logps/rejected": -2.46940279006958, "loss": 0.945, "nll_loss": 0.8776806592941284, "rewards/accuracies": 0.5, "rewards/chosen": -0.20696255564689636, "rewards/margins": 0.03997771441936493, "rewards/rejected": -0.2469402700662613, "step": 1770 }, { "epoch": 4.848733744010952, "grad_norm": 4.649789810180664, "learning_rate": 7.575342465753424e-07, "log_odds_chosen": 1.2326184511184692, "log_odds_ratio": -0.4304565191268921, "logits/chosen": 0.7722182273864746, "logits/rejected": 0.7658658027648926, "logps/chosen": -1.9578137397766113, "logps/rejected": -3.097592830657959, "loss": 0.8882, "nll_loss": 0.8451899290084839, "rewards/accuracies": 0.875, "rewards/chosen": -0.1957813799381256, "rewards/margins": 0.11397792398929596, "rewards/rejected": -0.30975931882858276, "step": 1771 }, { "epoch": 4.851471594798084, "grad_norm": 4.660722732543945, "learning_rate": 7.573972602739725e-07, "log_odds_chosen": 1.1772130727767944, "log_odds_ratio": -0.3962666094303131, "logits/chosen": 0.6481201648712158, "logits/rejected": 0.6050413846969604, "logps/chosen": -2.556767225265503, "logps/rejected": -3.604361057281494, "loss": 0.9601, "nll_loss": 0.9204820990562439, "rewards/accuracies": 0.875, "rewards/chosen": -0.2556767463684082, "rewards/margins": 0.10475938022136688, "rewards/rejected": -0.3604361116886139, "step": 1772 }, { "epoch": 4.854209445585216, "grad_norm": 3.5009267330169678, "learning_rate": 7.572602739726028e-07, "log_odds_chosen": 1.4671388864517212, "log_odds_ratio": -0.4578140377998352, "logits/chosen": 0.8115009665489197, "logits/rejected": 0.8454970717430115, "logps/chosen": -2.1125245094299316, "logps/rejected": -3.4828176498413086, "loss": 0.9002, "nll_loss": 0.8543888330459595, "rewards/accuracies": 0.75, "rewards/chosen": -0.21125245094299316, "rewards/margins": 0.13702933490276337, "rewards/rejected": -0.34828177094459534, "step": 1773 }, { "epoch": 4.856947296372348, "grad_norm": 6.259756088256836, "learning_rate": 7.571232876712327e-07, "log_odds_chosen": 1.431046485900879, "log_odds_ratio": -0.32338857650756836, "logits/chosen": 1.0975526571273804, "logits/rejected": 1.154648780822754, "logps/chosen": -2.3322558403015137, "logps/rejected": -3.6750874519348145, "loss": 0.7297, "nll_loss": 0.6973778009414673, "rewards/accuracies": 0.875, "rewards/chosen": -0.23322558403015137, "rewards/margins": 0.134283185005188, "rewards/rejected": -0.36750876903533936, "step": 1774 }, { "epoch": 4.85968514715948, "grad_norm": 4.165180683135986, "learning_rate": 7.56986301369863e-07, "log_odds_chosen": 1.3226457834243774, "log_odds_ratio": -0.5268608331680298, "logits/chosen": 0.9660508632659912, "logits/rejected": 0.9679558873176575, "logps/chosen": -2.5067620277404785, "logps/rejected": -3.7709879875183105, "loss": 0.9622, "nll_loss": 0.9094902276992798, "rewards/accuracies": 0.625, "rewards/chosen": -0.2506762146949768, "rewards/margins": 0.12642256915569305, "rewards/rejected": -0.37709876894950867, "step": 1775 }, { "epoch": 4.862422997946612, "grad_norm": 3.905470848083496, "learning_rate": 7.568493150684932e-07, "log_odds_chosen": 0.6533956527709961, "log_odds_ratio": -0.5383711457252502, "logits/chosen": 0.8251386880874634, "logits/rejected": 0.8767499923706055, "logps/chosen": -2.427276372909546, "logps/rejected": -3.0130774974823, "loss": 0.9443, "nll_loss": 0.8904179930686951, "rewards/accuracies": 0.625, "rewards/chosen": -0.24272765219211578, "rewards/margins": 0.05858011543750763, "rewards/rejected": -0.301307737827301, "step": 1776 }, { "epoch": 4.865160848733744, "grad_norm": 3.2314209938049316, "learning_rate": 7.567123287671233e-07, "log_odds_chosen": 1.7282030582427979, "log_odds_ratio": -0.3334197700023651, "logits/chosen": 0.9874659180641174, "logits/rejected": 1.0719388723373413, "logps/chosen": -2.4159295558929443, "logps/rejected": -4.038224220275879, "loss": 0.7317, "nll_loss": 0.6983240842819214, "rewards/accuracies": 0.75, "rewards/chosen": -0.24159295856952667, "rewards/margins": 0.1622294783592224, "rewards/rejected": -0.4038224518299103, "step": 1777 }, { "epoch": 4.867898699520876, "grad_norm": 3.914181709289551, "learning_rate": 7.565753424657534e-07, "log_odds_chosen": 1.7648284435272217, "log_odds_ratio": -0.4559519290924072, "logits/chosen": 0.890032172203064, "logits/rejected": 0.9121609926223755, "logps/chosen": -2.898531198501587, "logps/rejected": -4.559649467468262, "loss": 0.83, "nll_loss": 0.7843746542930603, "rewards/accuracies": 0.75, "rewards/chosen": -0.28985312581062317, "rewards/margins": 0.16611185669898987, "rewards/rejected": -0.45596498250961304, "step": 1778 }, { "epoch": 4.870636550308008, "grad_norm": 3.454958915710449, "learning_rate": 7.564383561643836e-07, "log_odds_chosen": 0.9905015826225281, "log_odds_ratio": -0.5555562973022461, "logits/chosen": 0.5349529981613159, "logits/rejected": 0.471769779920578, "logps/chosen": -1.956844687461853, "logps/rejected": -2.8836042881011963, "loss": 1.0077, "nll_loss": 0.9521344900131226, "rewards/accuracies": 0.625, "rewards/chosen": -0.19568446278572083, "rewards/margins": 0.09267596900463104, "rewards/rejected": -0.2883604168891907, "step": 1779 }, { "epoch": 4.87337440109514, "grad_norm": 4.263274669647217, "learning_rate": 7.563013698630137e-07, "log_odds_chosen": 0.8281317353248596, "log_odds_ratio": -0.5853127241134644, "logits/chosen": 0.7106363773345947, "logits/rejected": 0.7342968583106995, "logps/chosen": -3.0668373107910156, "logps/rejected": -3.8379781246185303, "loss": 0.9571, "nll_loss": 0.8985552191734314, "rewards/accuracies": 0.625, "rewards/chosen": -0.3066837191581726, "rewards/margins": 0.07711409032344818, "rewards/rejected": -0.383797824382782, "step": 1780 }, { "epoch": 4.876112251882272, "grad_norm": 3.8333029747009277, "learning_rate": 7.561643835616438e-07, "log_odds_chosen": 1.1919814348220825, "log_odds_ratio": -0.4437718391418457, "logits/chosen": 0.819502592086792, "logits/rejected": 0.778333842754364, "logps/chosen": -1.7320764064788818, "logps/rejected": -2.848022937774658, "loss": 0.9498, "nll_loss": 0.9054707288742065, "rewards/accuracies": 0.75, "rewards/chosen": -0.17320765554904938, "rewards/margins": 0.11159464716911316, "rewards/rejected": -0.28480228781700134, "step": 1781 }, { "epoch": 4.878850102669404, "grad_norm": 4.265207290649414, "learning_rate": 7.56027397260274e-07, "log_odds_chosen": 0.2897372841835022, "log_odds_ratio": -0.6022578477859497, "logits/chosen": 0.8531051874160767, "logits/rejected": 0.8736448287963867, "logps/chosen": -2.513188600540161, "logps/rejected": -2.8123531341552734, "loss": 0.8897, "nll_loss": 0.8294273018836975, "rewards/accuracies": 0.75, "rewards/chosen": -0.25131887197494507, "rewards/margins": 0.029916437342762947, "rewards/rejected": -0.28123530745506287, "step": 1782 }, { "epoch": 4.8815879534565365, "grad_norm": 3.668088674545288, "learning_rate": 7.558904109589041e-07, "log_odds_chosen": 1.4479458332061768, "log_odds_ratio": -0.5108656287193298, "logits/chosen": 0.9845129251480103, "logits/rejected": 1.0314483642578125, "logps/chosen": -2.370650053024292, "logps/rejected": -3.67238450050354, "loss": 0.7441, "nll_loss": 0.692974865436554, "rewards/accuracies": 0.75, "rewards/chosen": -0.23706501722335815, "rewards/margins": 0.1301734745502472, "rewards/rejected": -0.36723846197128296, "step": 1783 }, { "epoch": 4.884325804243669, "grad_norm": 3.6208038330078125, "learning_rate": 7.557534246575343e-07, "log_odds_chosen": 0.3991731107234955, "log_odds_ratio": -0.5736833810806274, "logits/chosen": 0.9362095594406128, "logits/rejected": 0.9894609451293945, "logps/chosen": -2.8494629859924316, "logps/rejected": -3.2226932048797607, "loss": 0.9538, "nll_loss": 0.8964481353759766, "rewards/accuracies": 0.875, "rewards/chosen": -0.2849462926387787, "rewards/margins": 0.037323057651519775, "rewards/rejected": -0.32226935029029846, "step": 1784 }, { "epoch": 4.887063655030801, "grad_norm": 4.133472442626953, "learning_rate": 7.556164383561644e-07, "log_odds_chosen": 1.5656158924102783, "log_odds_ratio": -0.31753718852996826, "logits/chosen": 0.6865185499191284, "logits/rejected": 0.6838859915733337, "logps/chosen": -2.5276317596435547, "logps/rejected": -4.030586242675781, "loss": 0.9549, "nll_loss": 0.9231914281845093, "rewards/accuracies": 1.0, "rewards/chosen": -0.25276315212249756, "rewards/margins": 0.1502954661846161, "rewards/rejected": -0.40305864810943604, "step": 1785 }, { "epoch": 4.889801505817933, "grad_norm": 3.4931328296661377, "learning_rate": 7.554794520547945e-07, "log_odds_chosen": 1.937791109085083, "log_odds_ratio": -0.17363207042217255, "logits/chosen": 0.8433494567871094, "logits/rejected": 0.874274492263794, "logps/chosen": -2.494861602783203, "logps/rejected": -4.3150105476379395, "loss": 0.7448, "nll_loss": 0.7274332642555237, "rewards/accuracies": 1.0, "rewards/chosen": -0.24948619306087494, "rewards/margins": 0.1820148527622223, "rewards/rejected": -0.43150103092193604, "step": 1786 }, { "epoch": 4.892539356605065, "grad_norm": 4.470250606536865, "learning_rate": 7.553424657534247e-07, "log_odds_chosen": 0.6917774677276611, "log_odds_ratio": -0.6339850425720215, "logits/chosen": 0.6970484256744385, "logits/rejected": 0.8276495933532715, "logps/chosen": -2.8183374404907227, "logps/rejected": -3.4736664295196533, "loss": 1.0589, "nll_loss": 0.9955502152442932, "rewards/accuracies": 0.5, "rewards/chosen": -0.2818337678909302, "rewards/margins": 0.06553289294242859, "rewards/rejected": -0.34736666083335876, "step": 1787 }, { "epoch": 4.895277207392197, "grad_norm": 4.633336067199707, "learning_rate": 7.552054794520547e-07, "log_odds_chosen": 0.5668013095855713, "log_odds_ratio": -0.646105170249939, "logits/chosen": 0.9614087343215942, "logits/rejected": 0.9574475288391113, "logps/chosen": -3.1394920349121094, "logps/rejected": -3.664031505584717, "loss": 0.8707, "nll_loss": 0.8060613870620728, "rewards/accuracies": 0.875, "rewards/chosen": -0.31394922733306885, "rewards/margins": 0.05245392397046089, "rewards/rejected": -0.36640316247940063, "step": 1788 }, { "epoch": 4.898015058179329, "grad_norm": 3.4686741828918457, "learning_rate": 7.550684931506849e-07, "log_odds_chosen": 2.2895889282226562, "log_odds_ratio": -0.1841270625591278, "logits/chosen": 0.7548472285270691, "logits/rejected": 0.6520177125930786, "logps/chosen": -1.8415157794952393, "logps/rejected": -3.960845470428467, "loss": 0.8724, "nll_loss": 0.8539453148841858, "rewards/accuracies": 1.0, "rewards/chosen": -0.18415158987045288, "rewards/margins": 0.2119329273700714, "rewards/rejected": -0.3960845470428467, "step": 1789 }, { "epoch": 4.900752908966461, "grad_norm": 3.476618528366089, "learning_rate": 7.549315068493151e-07, "log_odds_chosen": 1.0537500381469727, "log_odds_ratio": -0.38131487369537354, "logits/chosen": 1.0277824401855469, "logits/rejected": 1.0586128234863281, "logps/chosen": -2.285369873046875, "logps/rejected": -3.2548370361328125, "loss": 0.7533, "nll_loss": 0.715203046798706, "rewards/accuracies": 0.875, "rewards/chosen": -0.22853699326515198, "rewards/margins": 0.09694670140743256, "rewards/rejected": -0.32548367977142334, "step": 1790 }, { "epoch": 4.903490759753593, "grad_norm": 4.465420722961426, "learning_rate": 7.547945205479452e-07, "log_odds_chosen": 0.41462233662605286, "log_odds_ratio": -0.5776976943016052, "logits/chosen": 0.5467899441719055, "logits/rejected": 0.4970310926437378, "logps/chosen": -2.759066104888916, "logps/rejected": -3.1338729858398438, "loss": 1.0159, "nll_loss": 0.9580878019332886, "rewards/accuracies": 0.625, "rewards/chosen": -0.27590662240982056, "rewards/margins": 0.0374806672334671, "rewards/rejected": -0.31338727474212646, "step": 1791 }, { "epoch": 4.906228610540726, "grad_norm": 4.097670078277588, "learning_rate": 7.546575342465753e-07, "log_odds_chosen": 1.1824204921722412, "log_odds_ratio": -0.3275451362133026, "logits/chosen": 0.6798627972602844, "logits/rejected": 0.6954200267791748, "logps/chosen": -2.0594024658203125, "logps/rejected": -3.0991153717041016, "loss": 0.8985, "nll_loss": 0.8656958937644958, "rewards/accuracies": 0.875, "rewards/chosen": -0.20594024658203125, "rewards/margins": 0.10397130250930786, "rewards/rejected": -0.3099115490913391, "step": 1792 }, { "epoch": 4.908966461327857, "grad_norm": 2.898559808731079, "learning_rate": 7.545205479452055e-07, "log_odds_chosen": 1.901474952697754, "log_odds_ratio": -0.29424262046813965, "logits/chosen": 0.780353844165802, "logits/rejected": 0.7332812547683716, "logps/chosen": -2.0657806396484375, "logps/rejected": -3.8722667694091797, "loss": 1.0011, "nll_loss": 0.9716957807540894, "rewards/accuracies": 0.875, "rewards/chosen": -0.2065780758857727, "rewards/margins": 0.18064860999584198, "rewards/rejected": -0.3872267007827759, "step": 1793 }, { "epoch": 4.91170431211499, "grad_norm": 3.480203866958618, "learning_rate": 7.543835616438356e-07, "log_odds_chosen": 1.7861480712890625, "log_odds_ratio": -0.3070313334465027, "logits/chosen": 0.8044098019599915, "logits/rejected": 0.7256348729133606, "logps/chosen": -2.3793294429779053, "logps/rejected": -4.064549922943115, "loss": 0.8862, "nll_loss": 0.8554733991622925, "rewards/accuracies": 1.0, "rewards/chosen": -0.2379329651594162, "rewards/margins": 0.16852204501628876, "rewards/rejected": -0.40645501017570496, "step": 1794 }, { "epoch": 4.914442162902122, "grad_norm": 3.5765907764434814, "learning_rate": 7.542465753424657e-07, "log_odds_chosen": 1.6464003324508667, "log_odds_ratio": -0.33447128534317017, "logits/chosen": 0.8093116283416748, "logits/rejected": 0.8159542083740234, "logps/chosen": -2.610170841217041, "logps/rejected": -4.176556587219238, "loss": 0.8678, "nll_loss": 0.8343340158462524, "rewards/accuracies": 0.75, "rewards/chosen": -0.2610170841217041, "rewards/margins": 0.15663856267929077, "rewards/rejected": -0.4176556468009949, "step": 1795 }, { "epoch": 4.917180013689254, "grad_norm": 3.367141008377075, "learning_rate": 7.541095890410959e-07, "log_odds_chosen": 1.132559061050415, "log_odds_ratio": -0.38014376163482666, "logits/chosen": 0.7405661344528198, "logits/rejected": 0.6695293188095093, "logps/chosen": -1.9048998355865479, "logps/rejected": -2.9645910263061523, "loss": 0.8823, "nll_loss": 0.844294548034668, "rewards/accuracies": 0.875, "rewards/chosen": -0.1904900074005127, "rewards/margins": 0.1059691309928894, "rewards/rejected": -0.2964591383934021, "step": 1796 }, { "epoch": 4.919917864476386, "grad_norm": 4.721938610076904, "learning_rate": 7.53972602739726e-07, "log_odds_chosen": 0.6683520078659058, "log_odds_ratio": -0.5884090065956116, "logits/chosen": 0.8344002366065979, "logits/rejected": 0.8949122428894043, "logps/chosen": -2.738374710083008, "logps/rejected": -3.3519182205200195, "loss": 0.8989, "nll_loss": 0.8400799036026001, "rewards/accuracies": 0.875, "rewards/chosen": -0.27383747696876526, "rewards/margins": 0.061354368925094604, "rewards/rejected": -0.33519184589385986, "step": 1797 }, { "epoch": 4.922655715263518, "grad_norm": 4.187774181365967, "learning_rate": 7.538356164383562e-07, "log_odds_chosen": 1.6662769317626953, "log_odds_ratio": -0.2534382939338684, "logits/chosen": 0.6480847597122192, "logits/rejected": 0.5941805839538574, "logps/chosen": -2.123356342315674, "logps/rejected": -3.680217981338501, "loss": 0.9433, "nll_loss": 0.9179602861404419, "rewards/accuracies": 1.0, "rewards/chosen": -0.21233563125133514, "rewards/margins": 0.1556861698627472, "rewards/rejected": -0.36802178621292114, "step": 1798 }, { "epoch": 4.9253935660506505, "grad_norm": 5.107511043548584, "learning_rate": 7.536986301369863e-07, "log_odds_chosen": 1.0725858211517334, "log_odds_ratio": -0.7207825183868408, "logits/chosen": 1.0311331748962402, "logits/rejected": 1.062627911567688, "logps/chosen": -3.068458080291748, "logps/rejected": -4.159906387329102, "loss": 0.8748, "nll_loss": 0.802705705165863, "rewards/accuracies": 0.625, "rewards/chosen": -0.3068458139896393, "rewards/margins": 0.10914482921361923, "rewards/rejected": -0.4159906804561615, "step": 1799 }, { "epoch": 4.9281314168377826, "grad_norm": 3.0285773277282715, "learning_rate": 7.535616438356164e-07, "log_odds_chosen": 2.007080316543579, "log_odds_ratio": -0.24291250109672546, "logits/chosen": 0.8970674872398376, "logits/rejected": 0.9226523637771606, "logps/chosen": -2.7869150638580322, "logps/rejected": -4.731878757476807, "loss": 0.84, "nll_loss": 0.8157323002815247, "rewards/accuracies": 1.0, "rewards/chosen": -0.27869153022766113, "rewards/margins": 0.19449636340141296, "rewards/rejected": -0.4731878638267517, "step": 1800 }, { "epoch": 4.930869267624915, "grad_norm": 3.7171928882598877, "learning_rate": 7.534246575342466e-07, "log_odds_chosen": 0.937185525894165, "log_odds_ratio": -0.4508891701698303, "logits/chosen": 0.7668732404708862, "logits/rejected": 0.8541922569274902, "logps/chosen": -2.762993335723877, "logps/rejected": -3.6107962131500244, "loss": 0.7892, "nll_loss": 0.7441409826278687, "rewards/accuracies": 0.75, "rewards/chosen": -0.2762993574142456, "rewards/margins": 0.08478027582168579, "rewards/rejected": -0.3610796332359314, "step": 1801 }, { "epoch": 4.933607118412047, "grad_norm": 3.897247791290283, "learning_rate": 7.532876712328767e-07, "log_odds_chosen": 3.0737547874450684, "log_odds_ratio": -0.11126988381147385, "logits/chosen": 0.702007532119751, "logits/rejected": 0.6860786080360413, "logps/chosen": -2.1968183517456055, "logps/rejected": -5.122331619262695, "loss": 0.867, "nll_loss": 0.8558286428451538, "rewards/accuracies": 1.0, "rewards/chosen": -0.21968184411525726, "rewards/margins": 0.29255133867263794, "rewards/rejected": -0.5122331976890564, "step": 1802 }, { "epoch": 4.936344969199179, "grad_norm": 4.221858024597168, "learning_rate": 7.531506849315068e-07, "log_odds_chosen": 1.1005897521972656, "log_odds_ratio": -0.37658607959747314, "logits/chosen": 0.6371432542800903, "logits/rejected": 0.5979728698730469, "logps/chosen": -2.804023027420044, "logps/rejected": -3.839128017425537, "loss": 0.9373, "nll_loss": 0.8996504545211792, "rewards/accuracies": 0.75, "rewards/chosen": -0.2804023027420044, "rewards/margins": 0.10351049154996872, "rewards/rejected": -0.3839128017425537, "step": 1803 }, { "epoch": 4.939082819986311, "grad_norm": 3.1638123989105225, "learning_rate": 7.53013698630137e-07, "log_odds_chosen": 1.0704460144042969, "log_odds_ratio": -0.3409513235092163, "logits/chosen": 0.9342436790466309, "logits/rejected": 0.9852416515350342, "logps/chosen": -2.011826515197754, "logps/rejected": -2.9966750144958496, "loss": 0.7852, "nll_loss": 0.7511271238327026, "rewards/accuracies": 1.0, "rewards/chosen": -0.20118266344070435, "rewards/margins": 0.0984848290681839, "rewards/rejected": -0.29966747760772705, "step": 1804 }, { "epoch": 4.941820670773443, "grad_norm": 4.0020012855529785, "learning_rate": 7.528767123287671e-07, "log_odds_chosen": 0.6170839667320251, "log_odds_ratio": -0.5544857978820801, "logits/chosen": 0.9743422269821167, "logits/rejected": 0.9782706499099731, "logps/chosen": -2.721747875213623, "logps/rejected": -3.2599563598632812, "loss": 0.8607, "nll_loss": 0.8052851557731628, "rewards/accuracies": 0.625, "rewards/chosen": -0.27217477560043335, "rewards/margins": 0.053820863366127014, "rewards/rejected": -0.32599565386772156, "step": 1805 }, { "epoch": 4.944558521560575, "grad_norm": 3.869910717010498, "learning_rate": 7.527397260273972e-07, "log_odds_chosen": 0.05060502886772156, "log_odds_ratio": -0.8028082251548767, "logits/chosen": 0.6834768056869507, "logits/rejected": 0.7277020215988159, "logps/chosen": -2.694654941558838, "logps/rejected": -2.695678234100342, "loss": 0.8774, "nll_loss": 0.7971323132514954, "rewards/accuracies": 0.625, "rewards/chosen": -0.26946547627449036, "rewards/margins": 0.00010235235095024109, "rewards/rejected": -0.2695677876472473, "step": 1806 }, { "epoch": 4.947296372347707, "grad_norm": 4.263947010040283, "learning_rate": 7.526027397260274e-07, "log_odds_chosen": 1.297691822052002, "log_odds_ratio": -0.33120661973953247, "logits/chosen": 0.8027799129486084, "logits/rejected": 0.652423083782196, "logps/chosen": -2.082242965698242, "logps/rejected": -3.2780675888061523, "loss": 0.8892, "nll_loss": 0.8561168909072876, "rewards/accuracies": 1.0, "rewards/chosen": -0.2082243263721466, "rewards/margins": 0.11958244442939758, "rewards/rejected": -0.3278067708015442, "step": 1807 }, { "epoch": 4.950034223134839, "grad_norm": 4.045578479766846, "learning_rate": 7.524657534246575e-07, "log_odds_chosen": 2.035203695297241, "log_odds_ratio": -0.40643805265426636, "logits/chosen": 0.7633593082427979, "logits/rejected": 0.7557792663574219, "logps/chosen": -2.223992109298706, "logps/rejected": -4.142920017242432, "loss": 0.8616, "nll_loss": 0.8209855556488037, "rewards/accuracies": 0.875, "rewards/chosen": -0.22239920496940613, "rewards/margins": 0.1918928027153015, "rewards/rejected": -0.41429197788238525, "step": 1808 }, { "epoch": 4.952772073921971, "grad_norm": 3.6635096073150635, "learning_rate": 7.523287671232876e-07, "log_odds_chosen": 1.0963332653045654, "log_odds_ratio": -0.3745240271091461, "logits/chosen": 0.7095867395401001, "logits/rejected": 0.7003635764122009, "logps/chosen": -2.1059465408325195, "logps/rejected": -3.1480026245117188, "loss": 0.8716, "nll_loss": 0.8341195583343506, "rewards/accuracies": 1.0, "rewards/chosen": -0.21059466898441315, "rewards/margins": 0.10420557856559753, "rewards/rejected": -0.3148002624511719, "step": 1809 }, { "epoch": 4.955509924709103, "grad_norm": 3.435178756713867, "learning_rate": 7.521917808219178e-07, "log_odds_chosen": 1.6605217456817627, "log_odds_ratio": -0.2655092775821686, "logits/chosen": 0.8340246081352234, "logits/rejected": 0.8241302967071533, "logps/chosen": -2.235471725463867, "logps/rejected": -3.76300048828125, "loss": 1.0046, "nll_loss": 0.9780060052871704, "rewards/accuracies": 1.0, "rewards/chosen": -0.22354716062545776, "rewards/margins": 0.15275290608406067, "rewards/rejected": -0.37630003690719604, "step": 1810 }, { "epoch": 4.958247775496235, "grad_norm": 4.401771545410156, "learning_rate": 7.520547945205479e-07, "log_odds_chosen": 1.587822675704956, "log_odds_ratio": -0.3340175747871399, "logits/chosen": 0.705348014831543, "logits/rejected": 0.6709260940551758, "logps/chosen": -2.091538667678833, "logps/rejected": -3.5569097995758057, "loss": 0.8434, "nll_loss": 0.8100035190582275, "rewards/accuracies": 0.875, "rewards/chosen": -0.20915386080741882, "rewards/margins": 0.14653712511062622, "rewards/rejected": -0.35569098591804504, "step": 1811 }, { "epoch": 4.960985626283367, "grad_norm": 3.584625005722046, "learning_rate": 7.519178082191781e-07, "log_odds_chosen": 1.6609244346618652, "log_odds_ratio": -0.3282415270805359, "logits/chosen": 0.6917320489883423, "logits/rejected": 0.5898131728172302, "logps/chosen": -2.0914299488067627, "logps/rejected": -3.661045551300049, "loss": 0.9315, "nll_loss": 0.8986831903457642, "rewards/accuracies": 0.875, "rewards/chosen": -0.2091429978609085, "rewards/margins": 0.156961590051651, "rewards/rejected": -0.3661045730113983, "step": 1812 }, { "epoch": 4.963723477070499, "grad_norm": 4.06869649887085, "learning_rate": 7.517808219178082e-07, "log_odds_chosen": 0.03543210029602051, "log_odds_ratio": -0.7311275005340576, "logits/chosen": 0.6743083000183105, "logits/rejected": 0.7090296149253845, "logps/chosen": -2.345017910003662, "logps/rejected": -2.354839324951172, "loss": 1.027, "nll_loss": 0.9538521766662598, "rewards/accuracies": 0.5, "rewards/chosen": -0.23450180888175964, "rewards/margins": 0.000982135534286499, "rewards/rejected": -0.23548394441604614, "step": 1813 }, { "epoch": 4.9664613278576315, "grad_norm": 3.7684147357940674, "learning_rate": 7.516438356164383e-07, "log_odds_chosen": 0.6405732035636902, "log_odds_ratio": -0.7027961611747742, "logits/chosen": 0.859606146812439, "logits/rejected": 0.9413800239562988, "logps/chosen": -3.0923900604248047, "logps/rejected": -3.734536647796631, "loss": 0.9046, "nll_loss": 0.8343573212623596, "rewards/accuracies": 0.5, "rewards/chosen": -0.3092389702796936, "rewards/margins": 0.06421466916799545, "rewards/rejected": -0.37345364689826965, "step": 1814 }, { "epoch": 4.969199178644764, "grad_norm": 3.027379274368286, "learning_rate": 7.515068493150685e-07, "log_odds_chosen": 1.1022515296936035, "log_odds_ratio": -0.4385172128677368, "logits/chosen": 0.6846293210983276, "logits/rejected": 0.6185040473937988, "logps/chosen": -2.1425580978393555, "logps/rejected": -3.190089225769043, "loss": 0.907, "nll_loss": 0.8631546497344971, "rewards/accuracies": 0.75, "rewards/chosen": -0.21425582468509674, "rewards/margins": 0.10475309193134308, "rewards/rejected": -0.3190089166164398, "step": 1815 }, { "epoch": 4.971937029431896, "grad_norm": 3.458997964859009, "learning_rate": 7.513698630136986e-07, "log_odds_chosen": 0.8483256697654724, "log_odds_ratio": -0.5053616762161255, "logits/chosen": 0.5512261390686035, "logits/rejected": 0.5582216382026672, "logps/chosen": -1.990983486175537, "logps/rejected": -2.7910234928131104, "loss": 0.9462, "nll_loss": 0.8956645727157593, "rewards/accuracies": 1.0, "rewards/chosen": -0.19909833371639252, "rewards/margins": 0.0800040066242218, "rewards/rejected": -0.2791023254394531, "step": 1816 }, { "epoch": 4.974674880219028, "grad_norm": 3.788529396057129, "learning_rate": 7.512328767123287e-07, "log_odds_chosen": 0.9456387758255005, "log_odds_ratio": -0.5790469646453857, "logits/chosen": 0.6412320137023926, "logits/rejected": 0.646359920501709, "logps/chosen": -2.683436870574951, "logps/rejected": -3.592695713043213, "loss": 0.8746, "nll_loss": 0.8166778087615967, "rewards/accuracies": 0.625, "rewards/chosen": -0.2683436870574951, "rewards/margins": 0.09092588722705841, "rewards/rejected": -0.35926955938339233, "step": 1817 }, { "epoch": 4.97741273100616, "grad_norm": 4.110363960266113, "learning_rate": 7.510958904109589e-07, "log_odds_chosen": 0.9629030823707581, "log_odds_ratio": -0.3996613919734955, "logits/chosen": 0.8699444532394409, "logits/rejected": 0.8411781787872314, "logps/chosen": -2.4854986667633057, "logps/rejected": -3.3958539962768555, "loss": 0.8759, "nll_loss": 0.8359538316726685, "rewards/accuracies": 1.0, "rewards/chosen": -0.24854987859725952, "rewards/margins": 0.09103552252054214, "rewards/rejected": -0.33958542346954346, "step": 1818 }, { "epoch": 4.980150581793293, "grad_norm": 3.654110908508301, "learning_rate": 7.509589041095889e-07, "log_odds_chosen": 0.6925997138023376, "log_odds_ratio": -0.48454394936561584, "logits/chosen": 0.7194195985794067, "logits/rejected": 0.705711841583252, "logps/chosen": -2.312819242477417, "logps/rejected": -2.95163631439209, "loss": 0.9088, "nll_loss": 0.8603849411010742, "rewards/accuracies": 0.625, "rewards/chosen": -0.23128193616867065, "rewards/margins": 0.06388169527053833, "rewards/rejected": -0.295163631439209, "step": 1819 }, { "epoch": 4.982888432580424, "grad_norm": 3.932774782180786, "learning_rate": 7.508219178082191e-07, "log_odds_chosen": 1.0078179836273193, "log_odds_ratio": -0.5228631496429443, "logits/chosen": 0.8745806813240051, "logits/rejected": 0.8226532340049744, "logps/chosen": -2.424220085144043, "logps/rejected": -3.3894295692443848, "loss": 0.8497, "nll_loss": 0.7973883152008057, "rewards/accuracies": 0.75, "rewards/chosen": -0.24242202937602997, "rewards/margins": 0.09652094542980194, "rewards/rejected": -0.3389429748058319, "step": 1820 }, { "epoch": 4.985626283367557, "grad_norm": 2.892859697341919, "learning_rate": 7.506849315068493e-07, "log_odds_chosen": 0.6678557991981506, "log_odds_ratio": -0.4650523066520691, "logits/chosen": 0.799441933631897, "logits/rejected": 0.7884422540664673, "logps/chosen": -2.211261034011841, "logps/rejected": -2.8114535808563232, "loss": 0.8811, "nll_loss": 0.8346296548843384, "rewards/accuracies": 0.75, "rewards/chosen": -0.22112612426280975, "rewards/margins": 0.06001923605799675, "rewards/rejected": -0.2811453342437744, "step": 1821 }, { "epoch": 4.988364134154689, "grad_norm": 4.056301116943359, "learning_rate": 7.505479452054794e-07, "log_odds_chosen": 0.8521459698677063, "log_odds_ratio": -0.4109047055244446, "logits/chosen": 0.8556316494941711, "logits/rejected": 0.8942010402679443, "logps/chosen": -2.3173105716705322, "logps/rejected": -3.0806543827056885, "loss": 0.8198, "nll_loss": 0.7787324786186218, "rewards/accuracies": 1.0, "rewards/chosen": -0.2317310869693756, "rewards/margins": 0.07633436471223831, "rewards/rejected": -0.30806541442871094, "step": 1822 }, { "epoch": 4.991101984941821, "grad_norm": 3.5555496215820312, "learning_rate": 7.504109589041095e-07, "log_odds_chosen": 2.326153516769409, "log_odds_ratio": -0.2958241105079651, "logits/chosen": 0.6780503988265991, "logits/rejected": 0.6485879421234131, "logps/chosen": -2.4039554595947266, "logps/rejected": -4.625240325927734, "loss": 0.8961, "nll_loss": 0.8665100932121277, "rewards/accuracies": 0.875, "rewards/chosen": -0.24039557576179504, "rewards/margins": 0.2221284955739975, "rewards/rejected": -0.46252405643463135, "step": 1823 }, { "epoch": 4.993839835728953, "grad_norm": 4.159448146820068, "learning_rate": 7.502739726027397e-07, "log_odds_chosen": 0.4433228075504303, "log_odds_ratio": -0.5583801865577698, "logits/chosen": 0.7141169309616089, "logits/rejected": 0.673214316368103, "logps/chosen": -2.499542474746704, "logps/rejected": -2.9204587936401367, "loss": 0.9417, "nll_loss": 0.8858166933059692, "rewards/accuracies": 0.75, "rewards/chosen": -0.2499542534351349, "rewards/margins": 0.04209163412451744, "rewards/rejected": -0.29204586148262024, "step": 1824 }, { "epoch": 4.996577686516085, "grad_norm": 3.417490243911743, "learning_rate": 7.501369863013698e-07, "log_odds_chosen": 1.3074599504470825, "log_odds_ratio": -0.32579052448272705, "logits/chosen": 0.8999645113945007, "logits/rejected": 0.9429705739021301, "logps/chosen": -2.0748515129089355, "logps/rejected": -3.25236439704895, "loss": 0.8943, "nll_loss": 0.8617397546768188, "rewards/accuracies": 0.875, "rewards/chosen": -0.2074851542711258, "rewards/margins": 0.11775131523609161, "rewards/rejected": -0.325236439704895, "step": 1825 }, { "epoch": 4.999315537303217, "grad_norm": 5.026827812194824, "learning_rate": 7.5e-07, "log_odds_chosen": 1.4022055864334106, "log_odds_ratio": -0.7546834349632263, "logits/chosen": 1.0087641477584839, "logits/rejected": 0.9035165309906006, "logps/chosen": -2.5229833126068115, "logps/rejected": -3.7895596027374268, "loss": 0.9206, "nll_loss": 0.845173180103302, "rewards/accuracies": 0.875, "rewards/chosen": -0.25229835510253906, "rewards/margins": 0.1266576200723648, "rewards/rejected": -0.3789559602737427, "step": 1826 }, { "epoch": 5.002053388090349, "grad_norm": 3.286306381225586, "learning_rate": 7.498630136986301e-07, "log_odds_chosen": 0.9888540506362915, "log_odds_ratio": -0.4388238191604614, "logits/chosen": 0.6170253753662109, "logits/rejected": 0.577784538269043, "logps/chosen": -2.0777406692504883, "logps/rejected": -2.99537992477417, "loss": 0.943, "nll_loss": 0.8991405367851257, "rewards/accuracies": 0.875, "rewards/chosen": -0.2077740728855133, "rewards/margins": 0.09176395833492279, "rewards/rejected": -0.2995380163192749, "step": 1827 }, { "epoch": 5.004791238877481, "grad_norm": 3.7077929973602295, "learning_rate": 7.497260273972602e-07, "log_odds_chosen": 1.0461677312850952, "log_odds_ratio": -0.5328542590141296, "logits/chosen": 0.9370598793029785, "logits/rejected": 0.9470321536064148, "logps/chosen": -2.0641658306121826, "logps/rejected": -2.996816635131836, "loss": 0.9014, "nll_loss": 0.8481103181838989, "rewards/accuracies": 0.625, "rewards/chosen": -0.20641657710075378, "rewards/margins": 0.09326508641242981, "rewards/rejected": -0.2996816635131836, "step": 1828 }, { "epoch": 5.007529089664613, "grad_norm": 3.8545286655426025, "learning_rate": 7.495890410958904e-07, "log_odds_chosen": 2.2346444129943848, "log_odds_ratio": -0.26730918884277344, "logits/chosen": 0.817743182182312, "logits/rejected": 0.824561357498169, "logps/chosen": -1.5730513334274292, "logps/rejected": -3.6114745140075684, "loss": 0.8625, "nll_loss": 0.8357582688331604, "rewards/accuracies": 1.0, "rewards/chosen": -0.15730513632297516, "rewards/margins": 0.20384234189987183, "rewards/rejected": -0.3611474633216858, "step": 1829 }, { "epoch": 5.0102669404517455, "grad_norm": 3.4061973094940186, "learning_rate": 7.494520547945205e-07, "log_odds_chosen": 1.182953953742981, "log_odds_ratio": -0.36894288659095764, "logits/chosen": 0.502457857131958, "logits/rejected": 0.49392738938331604, "logps/chosen": -1.5616849660873413, "logps/rejected": -2.6211037635803223, "loss": 0.9645, "nll_loss": 0.9276537895202637, "rewards/accuracies": 0.875, "rewards/chosen": -0.15616849064826965, "rewards/margins": 0.10594189167022705, "rewards/rejected": -0.2621103823184967, "step": 1830 }, { "epoch": 5.0130047912388775, "grad_norm": 3.549891710281372, "learning_rate": 7.493150684931506e-07, "log_odds_chosen": 0.769109845161438, "log_odds_ratio": -0.4500429630279541, "logits/chosen": 0.6683530807495117, "logits/rejected": 0.6571800112724304, "logps/chosen": -2.3421854972839355, "logps/rejected": -3.0616838932037354, "loss": 1.0042, "nll_loss": 0.9592219591140747, "rewards/accuracies": 1.0, "rewards/chosen": -0.2342185378074646, "rewards/margins": 0.07194983214139938, "rewards/rejected": -0.3061683773994446, "step": 1831 }, { "epoch": 5.01574264202601, "grad_norm": 4.280017852783203, "learning_rate": 7.491780821917808e-07, "log_odds_chosen": 0.5710996389389038, "log_odds_ratio": -0.6259638667106628, "logits/chosen": 0.6843273639678955, "logits/rejected": 0.6095839738845825, "logps/chosen": -2.0917747020721436, "logps/rejected": -2.612666130065918, "loss": 0.9265, "nll_loss": 0.86390221118927, "rewards/accuracies": 0.75, "rewards/chosen": -0.20917746424674988, "rewards/margins": 0.052089158445596695, "rewards/rejected": -0.26126664876937866, "step": 1832 }, { "epoch": 5.018480492813142, "grad_norm": 4.175896644592285, "learning_rate": 7.49041095890411e-07, "log_odds_chosen": 1.2187671661376953, "log_odds_ratio": -0.45744457840919495, "logits/chosen": 0.7325859665870667, "logits/rejected": 0.7992192506790161, "logps/chosen": -3.0584917068481445, "logps/rejected": -4.232278823852539, "loss": 0.8704, "nll_loss": 0.8246960043907166, "rewards/accuracies": 0.875, "rewards/chosen": -0.30584919452667236, "rewards/margins": 0.11737874150276184, "rewards/rejected": -0.4232279360294342, "step": 1833 }, { "epoch": 5.021218343600274, "grad_norm": 3.893043279647827, "learning_rate": 7.48904109589041e-07, "log_odds_chosen": 0.8697041869163513, "log_odds_ratio": -0.43719640374183655, "logits/chosen": 0.9737329483032227, "logits/rejected": 1.0224589109420776, "logps/chosen": -2.3604962825775146, "logps/rejected": -3.151616334915161, "loss": 0.8107, "nll_loss": 0.7669582962989807, "rewards/accuracies": 0.875, "rewards/chosen": -0.236049622297287, "rewards/margins": 0.07911200821399689, "rewards/rejected": -0.31516164541244507, "step": 1834 }, { "epoch": 5.023956194387406, "grad_norm": 3.788275718688965, "learning_rate": 7.487671232876712e-07, "log_odds_chosen": 0.543465256690979, "log_odds_ratio": -0.5157685875892639, "logits/chosen": 0.7496857643127441, "logits/rejected": 0.7345925569534302, "logps/chosen": -3.028690814971924, "logps/rejected": -3.540958881378174, "loss": 0.9873, "nll_loss": 0.9357599020004272, "rewards/accuracies": 0.875, "rewards/chosen": -0.30286905169487, "rewards/margins": 0.05122684687376022, "rewards/rejected": -0.3540959060192108, "step": 1835 }, { "epoch": 5.026694045174538, "grad_norm": 3.350667953491211, "learning_rate": 7.486301369863013e-07, "log_odds_chosen": 0.5977686643600464, "log_odds_ratio": -0.5262868404388428, "logits/chosen": 0.6909899711608887, "logits/rejected": 0.7546827793121338, "logps/chosen": -2.3289785385131836, "logps/rejected": -2.8782241344451904, "loss": 0.9223, "nll_loss": 0.8696613907814026, "rewards/accuracies": 0.75, "rewards/chosen": -0.23289787769317627, "rewards/margins": 0.05492457002401352, "rewards/rejected": -0.2878224551677704, "step": 1836 }, { "epoch": 5.02943189596167, "grad_norm": 3.636587381362915, "learning_rate": 7.484931506849314e-07, "log_odds_chosen": 1.8510446548461914, "log_odds_ratio": -0.37470316886901855, "logits/chosen": 0.8628621697425842, "logits/rejected": 0.8264335989952087, "logps/chosen": -2.2811131477355957, "logps/rejected": -4.042616844177246, "loss": 0.9393, "nll_loss": 0.9018080830574036, "rewards/accuracies": 0.75, "rewards/chosen": -0.22811131179332733, "rewards/margins": 0.1761503368616104, "rewards/rejected": -0.40426164865493774, "step": 1837 }, { "epoch": 5.032169746748802, "grad_norm": 3.683772563934326, "learning_rate": 7.483561643835616e-07, "log_odds_chosen": 1.7333569526672363, "log_odds_ratio": -0.3096007704734802, "logits/chosen": 0.9246423840522766, "logits/rejected": 0.9294170141220093, "logps/chosen": -2.115769624710083, "logps/rejected": -3.761080265045166, "loss": 0.884, "nll_loss": 0.8530541658401489, "rewards/accuracies": 1.0, "rewards/chosen": -0.21157698333263397, "rewards/margins": 0.16453108191490173, "rewards/rejected": -0.3761080503463745, "step": 1838 }, { "epoch": 5.034907597535934, "grad_norm": 3.33927059173584, "learning_rate": 7.482191780821917e-07, "log_odds_chosen": 0.5763081312179565, "log_odds_ratio": -0.5800684690475464, "logits/chosen": 0.7620559930801392, "logits/rejected": 0.7781093120574951, "logps/chosen": -2.2122015953063965, "logps/rejected": -2.771326780319214, "loss": 0.9007, "nll_loss": 0.842715859413147, "rewards/accuracies": 0.75, "rewards/chosen": -0.22122016549110413, "rewards/margins": 0.055912502110004425, "rewards/rejected": -0.27713268995285034, "step": 1839 }, { "epoch": 5.037645448323066, "grad_norm": 4.279103755950928, "learning_rate": 7.480821917808218e-07, "log_odds_chosen": 0.590589702129364, "log_odds_ratio": -0.5210521221160889, "logits/chosen": 1.0579073429107666, "logits/rejected": 1.0497970581054688, "logps/chosen": -3.2256908416748047, "logps/rejected": -3.793700695037842, "loss": 0.8603, "nll_loss": 0.8081778287887573, "rewards/accuracies": 0.625, "rewards/chosen": -0.3225690722465515, "rewards/margins": 0.05680098384618759, "rewards/rejected": -0.3793700337409973, "step": 1840 }, { "epoch": 5.040383299110198, "grad_norm": 3.6861090660095215, "learning_rate": 7.47945205479452e-07, "log_odds_chosen": 0.9472339153289795, "log_odds_ratio": -0.37848976254463196, "logits/chosen": 0.7791337966918945, "logits/rejected": 0.740258514881134, "logps/chosen": -2.72005295753479, "logps/rejected": -3.610846996307373, "loss": 1.0301, "nll_loss": 0.9922336935997009, "rewards/accuracies": 0.875, "rewards/chosen": -0.2720052897930145, "rewards/margins": 0.08907943218946457, "rewards/rejected": -0.3610846996307373, "step": 1841 }, { "epoch": 5.04312114989733, "grad_norm": 5.156521320343018, "learning_rate": 7.478082191780821e-07, "log_odds_chosen": 0.4946649670600891, "log_odds_ratio": -0.5460736751556396, "logits/chosen": 0.7398709654808044, "logits/rejected": 0.7547463774681091, "logps/chosen": -2.8968002796173096, "logps/rejected": -3.342970132827759, "loss": 0.9094, "nll_loss": 0.8547977209091187, "rewards/accuracies": 0.75, "rewards/chosen": -0.2896800637245178, "rewards/margins": 0.044616978615522385, "rewards/rejected": -0.3342970311641693, "step": 1842 }, { "epoch": 5.045859000684462, "grad_norm": 3.087881565093994, "learning_rate": 7.476712328767123e-07, "log_odds_chosen": 2.8448843955993652, "log_odds_ratio": -0.12977245450019836, "logits/chosen": 0.775065541267395, "logits/rejected": 0.7657438516616821, "logps/chosen": -2.368072748184204, "logps/rejected": -5.085766315460205, "loss": 0.8752, "nll_loss": 0.862255334854126, "rewards/accuracies": 1.0, "rewards/chosen": -0.23680728673934937, "rewards/margins": 0.2717694044113159, "rewards/rejected": -0.5085766315460205, "step": 1843 }, { "epoch": 5.048596851471594, "grad_norm": 4.088196277618408, "learning_rate": 7.475342465753424e-07, "log_odds_chosen": 1.546971082687378, "log_odds_ratio": -0.32097697257995605, "logits/chosen": 0.8535772562026978, "logits/rejected": 0.790101170539856, "logps/chosen": -3.1507511138916016, "logps/rejected": -4.613834381103516, "loss": 0.9513, "nll_loss": 0.9191622734069824, "rewards/accuracies": 0.75, "rewards/chosen": -0.3150750994682312, "rewards/margins": 0.1463083028793335, "rewards/rejected": -0.4613834023475647, "step": 1844 }, { "epoch": 5.0513347022587265, "grad_norm": 4.239866256713867, "learning_rate": 7.473972602739725e-07, "log_odds_chosen": 0.681128740310669, "log_odds_ratio": -0.697472333908081, "logits/chosen": 0.8424339294433594, "logits/rejected": 0.855536699295044, "logps/chosen": -2.9061925411224365, "logps/rejected": -3.5299174785614014, "loss": 0.8941, "nll_loss": 0.8243581652641296, "rewards/accuracies": 0.625, "rewards/chosen": -0.29061925411224365, "rewards/margins": 0.06237248331308365, "rewards/rejected": -0.3529917597770691, "step": 1845 }, { "epoch": 5.054072553045859, "grad_norm": 4.3979268074035645, "learning_rate": 7.472602739726027e-07, "log_odds_chosen": 1.0603141784667969, "log_odds_ratio": -0.3505431115627289, "logits/chosen": 0.9802292585372925, "logits/rejected": 0.9985430240631104, "logps/chosen": -2.335287094116211, "logps/rejected": -3.324540615081787, "loss": 0.8494, "nll_loss": 0.8143138289451599, "rewards/accuracies": 1.0, "rewards/chosen": -0.2335287183523178, "rewards/margins": 0.09892533719539642, "rewards/rejected": -0.33245405554771423, "step": 1846 }, { "epoch": 5.0568104038329915, "grad_norm": 3.1365175247192383, "learning_rate": 7.471232876712328e-07, "log_odds_chosen": 1.8604402542114258, "log_odds_ratio": -0.27037447690963745, "logits/chosen": 0.7504677772521973, "logits/rejected": 0.748060405254364, "logps/chosen": -2.1234264373779297, "logps/rejected": -3.855119228363037, "loss": 0.8722, "nll_loss": 0.8451941609382629, "rewards/accuracies": 0.875, "rewards/chosen": -0.21234264969825745, "rewards/margins": 0.1731692999601364, "rewards/rejected": -0.38551196455955505, "step": 1847 }, { "epoch": 5.059548254620124, "grad_norm": 4.739090442657471, "learning_rate": 7.469863013698629e-07, "log_odds_chosen": 0.20878303050994873, "log_odds_ratio": -0.7145676612854004, "logits/chosen": 0.7673521637916565, "logits/rejected": 0.792685866355896, "logps/chosen": -2.6206042766571045, "logps/rejected": -2.8414347171783447, "loss": 0.9423, "nll_loss": 0.87083899974823, "rewards/accuracies": 0.5, "rewards/chosen": -0.2620604336261749, "rewards/margins": 0.022083068266510963, "rewards/rejected": -0.28414350748062134, "step": 1848 }, { "epoch": 5.062286105407256, "grad_norm": 4.720916748046875, "learning_rate": 7.468493150684931e-07, "log_odds_chosen": 1.463418960571289, "log_odds_ratio": -0.6715173125267029, "logits/chosen": 0.9998354315757751, "logits/rejected": 1.0744907855987549, "logps/chosen": -3.356576919555664, "logps/rejected": -4.757061004638672, "loss": 0.8477, "nll_loss": 0.7805012464523315, "rewards/accuracies": 0.75, "rewards/chosen": -0.3356577157974243, "rewards/margins": 0.14004842936992645, "rewards/rejected": -0.4757061302661896, "step": 1849 }, { "epoch": 5.065023956194388, "grad_norm": 4.258825778961182, "learning_rate": 7.467123287671232e-07, "log_odds_chosen": 1.4215199947357178, "log_odds_ratio": -0.4637448787689209, "logits/chosen": 0.8810431361198425, "logits/rejected": 0.7791560292243958, "logps/chosen": -2.284299373626709, "logps/rejected": -3.6704630851745605, "loss": 0.9051, "nll_loss": 0.8587226271629333, "rewards/accuracies": 0.625, "rewards/chosen": -0.22842994332313538, "rewards/margins": 0.1386163830757141, "rewards/rejected": -0.3670463263988495, "step": 1850 }, { "epoch": 5.06776180698152, "grad_norm": 4.386668682098389, "learning_rate": 7.465753424657533e-07, "log_odds_chosen": 1.0218236446380615, "log_odds_ratio": -0.47830018401145935, "logits/chosen": 0.7921675443649292, "logits/rejected": 0.8078711032867432, "logps/chosen": -2.9035513401031494, "logps/rejected": -3.8847713470458984, "loss": 1.0067, "nll_loss": 0.9588650465011597, "rewards/accuracies": 0.75, "rewards/chosen": -0.2903551459312439, "rewards/margins": 0.0981220006942749, "rewards/rejected": -0.3884771168231964, "step": 1851 }, { "epoch": 5.070499657768652, "grad_norm": 3.4154865741729736, "learning_rate": 7.464383561643835e-07, "log_odds_chosen": 1.0902667045593262, "log_odds_ratio": -0.42970556020736694, "logits/chosen": 0.8711273074150085, "logits/rejected": 0.9682309031486511, "logps/chosen": -2.230877161026001, "logps/rejected": -3.2166571617126465, "loss": 0.7971, "nll_loss": 0.7541553378105164, "rewards/accuracies": 0.75, "rewards/chosen": -0.22308774292469025, "rewards/margins": 0.09857797622680664, "rewards/rejected": -0.3216657340526581, "step": 1852 }, { "epoch": 5.073237508555784, "grad_norm": 4.151980400085449, "learning_rate": 7.463013698630136e-07, "log_odds_chosen": 1.0455763339996338, "log_odds_ratio": -0.3648942708969116, "logits/chosen": 0.7479164600372314, "logits/rejected": 0.733883261680603, "logps/chosen": -2.6459453105926514, "logps/rejected": -3.6198697090148926, "loss": 0.9668, "nll_loss": 0.9303050637245178, "rewards/accuracies": 1.0, "rewards/chosen": -0.26459455490112305, "rewards/margins": 0.09739243984222412, "rewards/rejected": -0.36198699474334717, "step": 1853 }, { "epoch": 5.075975359342916, "grad_norm": 6.501246452331543, "learning_rate": 7.461643835616437e-07, "log_odds_chosen": 0.5066815614700317, "log_odds_ratio": -0.5081884860992432, "logits/chosen": 0.7654409408569336, "logits/rejected": 0.875188946723938, "logps/chosen": -2.2488086223602295, "logps/rejected": -2.7111034393310547, "loss": 0.9178, "nll_loss": 0.866939127445221, "rewards/accuracies": 0.75, "rewards/chosen": -0.2248808741569519, "rewards/margins": 0.04622947424650192, "rewards/rejected": -0.2711103558540344, "step": 1854 }, { "epoch": 5.078713210130048, "grad_norm": 5.922034740447998, "learning_rate": 7.46027397260274e-07, "log_odds_chosen": 1.1100927591323853, "log_odds_ratio": -0.4008225202560425, "logits/chosen": 0.8860208988189697, "logits/rejected": 0.8793433904647827, "logps/chosen": -2.236332654953003, "logps/rejected": -3.2612593173980713, "loss": 0.875, "nll_loss": 0.8349477648735046, "rewards/accuracies": 0.875, "rewards/chosen": -0.22363325953483582, "rewards/margins": 0.10249266028404236, "rewards/rejected": -0.3261259198188782, "step": 1855 }, { "epoch": 5.08145106091718, "grad_norm": 3.719695806503296, "learning_rate": 7.45890410958904e-07, "log_odds_chosen": 0.82389235496521, "log_odds_ratio": -0.5716546177864075, "logits/chosen": 0.6711167693138123, "logits/rejected": 0.7123767137527466, "logps/chosen": -2.1572864055633545, "logps/rejected": -2.8945047855377197, "loss": 0.9988, "nll_loss": 0.9416193962097168, "rewards/accuracies": 0.625, "rewards/chosen": -0.21572864055633545, "rewards/margins": 0.07372181862592697, "rewards/rejected": -0.289450466632843, "step": 1856 }, { "epoch": 5.084188911704312, "grad_norm": 3.772585153579712, "learning_rate": 7.457534246575343e-07, "log_odds_chosen": 0.760338544845581, "log_odds_ratio": -0.622845470905304, "logits/chosen": 0.8891980648040771, "logits/rejected": 0.9339878559112549, "logps/chosen": -2.213146448135376, "logps/rejected": -2.894929885864258, "loss": 0.9185, "nll_loss": 0.8561998009681702, "rewards/accuracies": 0.625, "rewards/chosen": -0.22131465375423431, "rewards/margins": 0.06817835569381714, "rewards/rejected": -0.28949299454689026, "step": 1857 }, { "epoch": 5.086926762491444, "grad_norm": 4.905954360961914, "learning_rate": 7.456164383561644e-07, "log_odds_chosen": 0.42956459522247314, "log_odds_ratio": -0.5968877077102661, "logits/chosen": 0.804877758026123, "logits/rejected": 0.7440481185913086, "logps/chosen": -2.2366204261779785, "logps/rejected": -2.605926036834717, "loss": 0.943, "nll_loss": 0.883267343044281, "rewards/accuracies": 0.625, "rewards/chosen": -0.22366201877593994, "rewards/margins": 0.036930590867996216, "rewards/rejected": -0.26059260964393616, "step": 1858 }, { "epoch": 5.089664613278576, "grad_norm": 6.101099967956543, "learning_rate": 7.454794520547945e-07, "log_odds_chosen": 1.2130324840545654, "log_odds_ratio": -0.3326510787010193, "logits/chosen": 0.9028959274291992, "logits/rejected": 0.9851451516151428, "logps/chosen": -2.649384021759033, "logps/rejected": -3.8098294734954834, "loss": 0.8521, "nll_loss": 0.818838894367218, "rewards/accuracies": 0.875, "rewards/chosen": -0.2649384140968323, "rewards/margins": 0.1160445585846901, "rewards/rejected": -0.3809829354286194, "step": 1859 }, { "epoch": 5.092402464065708, "grad_norm": 2.9148387908935547, "learning_rate": 7.453424657534247e-07, "log_odds_chosen": 2.2667651176452637, "log_odds_ratio": -0.23105663061141968, "logits/chosen": 0.9114664196968079, "logits/rejected": 0.9296927452087402, "logps/chosen": -1.9700640439987183, "logps/rejected": -4.092332363128662, "loss": 0.8725, "nll_loss": 0.8494000434875488, "rewards/accuracies": 1.0, "rewards/chosen": -0.19700641930103302, "rewards/margins": 0.2122267782688141, "rewards/rejected": -0.4092332124710083, "step": 1860 }, { "epoch": 5.0951403148528405, "grad_norm": 4.582930088043213, "learning_rate": 7.452054794520548e-07, "log_odds_chosen": 0.7900948524475098, "log_odds_ratio": -0.5755317807197571, "logits/chosen": 0.6912327408790588, "logits/rejected": 0.6388412714004517, "logps/chosen": -3.261157751083374, "logps/rejected": -4.030668258666992, "loss": 0.9703, "nll_loss": 0.9127347469329834, "rewards/accuracies": 0.625, "rewards/chosen": -0.32611578702926636, "rewards/margins": 0.0769510269165039, "rewards/rejected": -0.40306681394577026, "step": 1861 }, { "epoch": 5.0978781656399725, "grad_norm": 5.1283392906188965, "learning_rate": 7.450684931506849e-07, "log_odds_chosen": 2.350036144256592, "log_odds_ratio": -0.14355936646461487, "logits/chosen": 0.9230741858482361, "logits/rejected": 1.0917820930480957, "logps/chosen": -2.4518508911132812, "logps/rejected": -4.717047691345215, "loss": 0.7769, "nll_loss": 0.7625292539596558, "rewards/accuracies": 1.0, "rewards/chosen": -0.24518507719039917, "rewards/margins": 0.22651967406272888, "rewards/rejected": -0.47170472145080566, "step": 1862 }, { "epoch": 5.100616016427105, "grad_norm": 3.6811306476593018, "learning_rate": 7.449315068493151e-07, "log_odds_chosen": 1.7387312650680542, "log_odds_ratio": -0.3427482545375824, "logits/chosen": 0.6607630252838135, "logits/rejected": 0.669274628162384, "logps/chosen": -2.057173728942871, "logps/rejected": -3.711249351501465, "loss": 0.8308, "nll_loss": 0.7964808344841003, "rewards/accuracies": 0.875, "rewards/chosen": -0.20571738481521606, "rewards/margins": 0.16540758311748505, "rewards/rejected": -0.3711249828338623, "step": 1863 }, { "epoch": 5.103353867214237, "grad_norm": 3.34433650970459, "learning_rate": 7.447945205479453e-07, "log_odds_chosen": 1.344138741493225, "log_odds_ratio": -0.3595386743545532, "logits/chosen": 0.5607814788818359, "logits/rejected": 0.5404466986656189, "logps/chosen": -2.5461790561676025, "logps/rejected": -3.7965707778930664, "loss": 0.9088, "nll_loss": 0.8728646636009216, "rewards/accuracies": 0.875, "rewards/chosen": -0.2546178996562958, "rewards/margins": 0.125039204955101, "rewards/rejected": -0.3796570897102356, "step": 1864 }, { "epoch": 5.106091718001369, "grad_norm": 3.137430429458618, "learning_rate": 7.446575342465753e-07, "log_odds_chosen": 1.6924338340759277, "log_odds_ratio": -0.30654847621917725, "logits/chosen": 0.8791687488555908, "logits/rejected": 0.888282299041748, "logps/chosen": -2.11899995803833, "logps/rejected": -3.7052648067474365, "loss": 0.8648, "nll_loss": 0.8341184258460999, "rewards/accuracies": 1.0, "rewards/chosen": -0.211899995803833, "rewards/margins": 0.1586265116930008, "rewards/rejected": -0.3705264925956726, "step": 1865 }, { "epoch": 5.108829568788501, "grad_norm": 5.399952411651611, "learning_rate": 7.445205479452055e-07, "log_odds_chosen": 0.8163520693778992, "log_odds_ratio": -0.3986194133758545, "logits/chosen": 0.8468095064163208, "logits/rejected": 0.8489436507225037, "logps/chosen": -1.9426958560943604, "logps/rejected": -2.6430695056915283, "loss": 0.8292, "nll_loss": 0.7893693447113037, "rewards/accuracies": 1.0, "rewards/chosen": -0.1942695826292038, "rewards/margins": 0.0700373649597168, "rewards/rejected": -0.2643069624900818, "step": 1866 }, { "epoch": 5.111567419575633, "grad_norm": 3.2237014770507812, "learning_rate": 7.443835616438356e-07, "log_odds_chosen": 1.3059641122817993, "log_odds_ratio": -0.36632323265075684, "logits/chosen": 0.9231827259063721, "logits/rejected": 0.8586257696151733, "logps/chosen": -1.7780324220657349, "logps/rejected": -3.0015087127685547, "loss": 0.855, "nll_loss": 0.8183823227882385, "rewards/accuracies": 0.75, "rewards/chosen": -0.17780324816703796, "rewards/margins": 0.1223476454615593, "rewards/rejected": -0.30015087127685547, "step": 1867 }, { "epoch": 5.114305270362765, "grad_norm": 3.4951717853546143, "learning_rate": 7.442465753424657e-07, "log_odds_chosen": 1.4945778846740723, "log_odds_ratio": -0.33549800515174866, "logits/chosen": 0.7852691411972046, "logits/rejected": 0.8295105695724487, "logps/chosen": -2.0873165130615234, "logps/rejected": -3.471007823944092, "loss": 0.8663, "nll_loss": 0.8327429294586182, "rewards/accuracies": 0.875, "rewards/chosen": -0.20873165130615234, "rewards/margins": 0.1383691132068634, "rewards/rejected": -0.34710076451301575, "step": 1868 }, { "epoch": 5.117043121149897, "grad_norm": 4.169256687164307, "learning_rate": 7.441095890410959e-07, "log_odds_chosen": 0.7148140072822571, "log_odds_ratio": -0.4633287489414215, "logits/chosen": 0.9036002159118652, "logits/rejected": 0.9081563949584961, "logps/chosen": -2.4563815593719482, "logps/rejected": -3.080073595046997, "loss": 0.8596, "nll_loss": 0.8132497072219849, "rewards/accuracies": 0.75, "rewards/chosen": -0.2456381618976593, "rewards/margins": 0.0623692125082016, "rewards/rejected": -0.3080073595046997, "step": 1869 }, { "epoch": 5.119780971937029, "grad_norm": 4.3030524253845215, "learning_rate": 7.43972602739726e-07, "log_odds_chosen": 1.1606605052947998, "log_odds_ratio": -0.4179859459400177, "logits/chosen": 0.6350153684616089, "logits/rejected": 0.5669956207275391, "logps/chosen": -2.395636796951294, "logps/rejected": -3.5111422538757324, "loss": 0.9582, "nll_loss": 0.9164045453071594, "rewards/accuracies": 0.75, "rewards/chosen": -0.2395636886358261, "rewards/margins": 0.11155053973197937, "rewards/rejected": -0.3511142134666443, "step": 1870 }, { "epoch": 5.122518822724161, "grad_norm": 3.8050615787506104, "learning_rate": 7.438356164383562e-07, "log_odds_chosen": 1.0186091661453247, "log_odds_ratio": -0.39652174711227417, "logits/chosen": 0.7324578762054443, "logits/rejected": 0.6442502737045288, "logps/chosen": -2.1239705085754395, "logps/rejected": -3.0270884037017822, "loss": 1.0, "nll_loss": 0.9603236317634583, "rewards/accuracies": 0.875, "rewards/chosen": -0.21239706873893738, "rewards/margins": 0.09031176567077637, "rewards/rejected": -0.30270886421203613, "step": 1871 }, { "epoch": 5.125256673511293, "grad_norm": 3.5309157371520996, "learning_rate": 7.436986301369863e-07, "log_odds_chosen": 1.3574756383895874, "log_odds_ratio": -0.31454530358314514, "logits/chosen": 0.7838166952133179, "logits/rejected": 0.7230889201164246, "logps/chosen": -2.7793869972229004, "logps/rejected": -4.025252819061279, "loss": 0.9459, "nll_loss": 0.9144203662872314, "rewards/accuracies": 0.875, "rewards/chosen": -0.27793869376182556, "rewards/margins": 0.12458658963441849, "rewards/rejected": -0.40252530574798584, "step": 1872 }, { "epoch": 5.127994524298426, "grad_norm": 3.4824001789093018, "learning_rate": 7.435616438356164e-07, "log_odds_chosen": 1.2356678247451782, "log_odds_ratio": -0.3074510395526886, "logits/chosen": 0.6897866129875183, "logits/rejected": 0.6651075482368469, "logps/chosen": -2.392446517944336, "logps/rejected": -3.5565476417541504, "loss": 0.8482, "nll_loss": 0.817409098148346, "rewards/accuracies": 0.875, "rewards/chosen": -0.23924466967582703, "rewards/margins": 0.11641009151935577, "rewards/rejected": -0.355654776096344, "step": 1873 }, { "epoch": 5.130732375085558, "grad_norm": 3.598768711090088, "learning_rate": 7.434246575342466e-07, "log_odds_chosen": 0.37821143865585327, "log_odds_ratio": -0.7135496735572815, "logits/chosen": 0.8219603300094604, "logits/rejected": 0.8630886077880859, "logps/chosen": -2.399667263031006, "logps/rejected": -2.7804789543151855, "loss": 0.9717, "nll_loss": 0.9003511667251587, "rewards/accuracies": 0.5, "rewards/chosen": -0.23996669054031372, "rewards/margins": 0.03808118402957916, "rewards/rejected": -0.2780478894710541, "step": 1874 }, { "epoch": 5.13347022587269, "grad_norm": 4.298923969268799, "learning_rate": 7.432876712328767e-07, "log_odds_chosen": 0.5462584495544434, "log_odds_ratio": -0.511317253112793, "logits/chosen": 0.6233099699020386, "logits/rejected": 0.6124840974807739, "logps/chosen": -2.4045753479003906, "logps/rejected": -2.864997386932373, "loss": 1.0315, "nll_loss": 0.9803769588470459, "rewards/accuracies": 0.875, "rewards/chosen": -0.24045753479003906, "rewards/margins": 0.04604221135377884, "rewards/rejected": -0.2864997386932373, "step": 1875 }, { "epoch": 5.136208076659822, "grad_norm": 3.477574348449707, "learning_rate": 7.431506849315068e-07, "log_odds_chosen": 1.6059554815292358, "log_odds_ratio": -0.254884272813797, "logits/chosen": 1.009016513824463, "logits/rejected": 1.0174816846847534, "logps/chosen": -2.9926323890686035, "logps/rejected": -4.544022560119629, "loss": 0.8495, "nll_loss": 0.8239768743515015, "rewards/accuracies": 1.0, "rewards/chosen": -0.29926326870918274, "rewards/margins": 0.15513905882835388, "rewards/rejected": -0.45440229773521423, "step": 1876 }, { "epoch": 5.138945927446954, "grad_norm": 4.740715503692627, "learning_rate": 7.43013698630137e-07, "log_odds_chosen": 0.21879389882087708, "log_odds_ratio": -0.7571971416473389, "logits/chosen": 0.8624197840690613, "logits/rejected": 0.8069754838943481, "logps/chosen": -2.604264736175537, "logps/rejected": -2.780590057373047, "loss": 0.9719, "nll_loss": 0.8961507678031921, "rewards/accuracies": 0.5, "rewards/chosen": -0.26042646169662476, "rewards/margins": 0.01763255149126053, "rewards/rejected": -0.2780590057373047, "step": 1877 }, { "epoch": 5.1416837782340865, "grad_norm": 4.2012038230896, "learning_rate": 7.428767123287672e-07, "log_odds_chosen": 1.7530109882354736, "log_odds_ratio": -0.250118613243103, "logits/chosen": 0.8654930591583252, "logits/rejected": 0.8537571430206299, "logps/chosen": -2.515122175216675, "logps/rejected": -4.184939384460449, "loss": 0.8799, "nll_loss": 0.8549270033836365, "rewards/accuracies": 0.875, "rewards/chosen": -0.25151222944259644, "rewards/margins": 0.16698172688484192, "rewards/rejected": -0.41849395632743835, "step": 1878 }, { "epoch": 5.144421629021219, "grad_norm": 3.345876932144165, "learning_rate": 7.427397260273972e-07, "log_odds_chosen": 1.0237230062484741, "log_odds_ratio": -0.3910084664821625, "logits/chosen": 0.7461645603179932, "logits/rejected": 0.7278211712837219, "logps/chosen": -2.5525062084198, "logps/rejected": -3.517897129058838, "loss": 0.8821, "nll_loss": 0.8429890871047974, "rewards/accuracies": 1.0, "rewards/chosen": -0.25525063276290894, "rewards/margins": 0.09653910249471664, "rewards/rejected": -0.3517897427082062, "step": 1879 }, { "epoch": 5.147159479808351, "grad_norm": 4.435357570648193, "learning_rate": 7.426027397260274e-07, "log_odds_chosen": 1.3917839527130127, "log_odds_ratio": -0.37485191226005554, "logits/chosen": 0.712013840675354, "logits/rejected": 0.7539286613464355, "logps/chosen": -3.28322434425354, "logps/rejected": -4.611756324768066, "loss": 0.945, "nll_loss": 0.9075450897216797, "rewards/accuracies": 0.875, "rewards/chosen": -0.3283224403858185, "rewards/margins": 0.1328531801700592, "rewards/rejected": -0.4611756205558777, "step": 1880 }, { "epoch": 5.149897330595483, "grad_norm": 3.3522191047668457, "learning_rate": 7.424657534246575e-07, "log_odds_chosen": 0.7950677275657654, "log_odds_ratio": -0.4222133159637451, "logits/chosen": 0.8325833082199097, "logits/rejected": 0.8423531651496887, "logps/chosen": -2.507782220840454, "logps/rejected": -3.261695623397827, "loss": 0.8333, "nll_loss": 0.7910909652709961, "rewards/accuracies": 0.75, "rewards/chosen": -0.2507782280445099, "rewards/margins": 0.07539132982492447, "rewards/rejected": -0.32616955041885376, "step": 1881 }, { "epoch": 5.152635181382615, "grad_norm": 3.951862096786499, "learning_rate": 7.423287671232876e-07, "log_odds_chosen": 1.2839688062667847, "log_odds_ratio": -0.4441432058811188, "logits/chosen": 0.6905137896537781, "logits/rejected": 0.7081762552261353, "logps/chosen": -2.694368839263916, "logps/rejected": -3.9175591468811035, "loss": 1.0127, "nll_loss": 0.9683064222335815, "rewards/accuracies": 0.75, "rewards/chosen": -0.26943689584732056, "rewards/margins": 0.12231907248497009, "rewards/rejected": -0.39175593852996826, "step": 1882 }, { "epoch": 5.155373032169747, "grad_norm": 4.381540298461914, "learning_rate": 7.421917808219178e-07, "log_odds_chosen": 2.1012909412384033, "log_odds_ratio": -0.34409454464912415, "logits/chosen": 0.7638899087905884, "logits/rejected": 0.8032020330429077, "logps/chosen": -2.6847727298736572, "logps/rejected": -4.665116310119629, "loss": 0.8646, "nll_loss": 0.8301698565483093, "rewards/accuracies": 0.75, "rewards/chosen": -0.26847729086875916, "rewards/margins": 0.1980343759059906, "rewards/rejected": -0.46651163697242737, "step": 1883 }, { "epoch": 5.158110882956879, "grad_norm": 3.3714969158172607, "learning_rate": 7.420547945205479e-07, "log_odds_chosen": 1.4108912944793701, "log_odds_ratio": -0.2819488048553467, "logits/chosen": 0.6992390155792236, "logits/rejected": 0.682391881942749, "logps/chosen": -2.0443034172058105, "logps/rejected": -3.3634769916534424, "loss": 0.8211, "nll_loss": 0.7929005026817322, "rewards/accuracies": 1.0, "rewards/chosen": -0.20443031191825867, "rewards/margins": 0.13191735744476318, "rewards/rejected": -0.33634769916534424, "step": 1884 }, { "epoch": 5.160848733744011, "grad_norm": 4.531144142150879, "learning_rate": 7.419178082191781e-07, "log_odds_chosen": 0.45221567153930664, "log_odds_ratio": -0.5322096347808838, "logits/chosen": 0.8108497262001038, "logits/rejected": 0.8071675300598145, "logps/chosen": -2.0529892444610596, "logps/rejected": -2.467957019805908, "loss": 0.902, "nll_loss": 0.84881192445755, "rewards/accuracies": 0.75, "rewards/chosen": -0.20529893040657043, "rewards/margins": 0.04149677976965904, "rewards/rejected": -0.24679571390151978, "step": 1885 }, { "epoch": 5.163586584531143, "grad_norm": 3.549495220184326, "learning_rate": 7.417808219178082e-07, "log_odds_chosen": 1.5529439449310303, "log_odds_ratio": -0.31697604060173035, "logits/chosen": 0.8547791838645935, "logits/rejected": 0.8857994079589844, "logps/chosen": -2.330430746078491, "logps/rejected": -3.8003339767456055, "loss": 0.8636, "nll_loss": 0.8319286704063416, "rewards/accuracies": 0.875, "rewards/chosen": -0.23304308950901031, "rewards/margins": 0.14699029922485352, "rewards/rejected": -0.380033403635025, "step": 1886 }, { "epoch": 5.166324435318275, "grad_norm": 3.1641836166381836, "learning_rate": 7.416438356164383e-07, "log_odds_chosen": 0.9045915603637695, "log_odds_ratio": -0.43441057205200195, "logits/chosen": 0.7796645164489746, "logits/rejected": 0.7947072982788086, "logps/chosen": -2.2727391719818115, "logps/rejected": -3.1216275691986084, "loss": 0.8633, "nll_loss": 0.8199065327644348, "rewards/accuracies": 0.875, "rewards/chosen": -0.22727391123771667, "rewards/margins": 0.08488886058330536, "rewards/rejected": -0.31216275691986084, "step": 1887 }, { "epoch": 5.169062286105407, "grad_norm": 4.559994697570801, "learning_rate": 7.415068493150685e-07, "log_odds_chosen": 0.8870992064476013, "log_odds_ratio": -0.4369964599609375, "logits/chosen": 0.8571276664733887, "logits/rejected": 0.7867959141731262, "logps/chosen": -2.555595874786377, "logps/rejected": -3.397829532623291, "loss": 0.8293, "nll_loss": 0.7856041789054871, "rewards/accuracies": 0.625, "rewards/chosen": -0.2555595934391022, "rewards/margins": 0.08422334492206573, "rewards/rejected": -0.3397829234600067, "step": 1888 }, { "epoch": 5.171800136892539, "grad_norm": 4.45686674118042, "learning_rate": 7.413698630136986e-07, "log_odds_chosen": 0.6555041670799255, "log_odds_ratio": -0.4829455018043518, "logits/chosen": 0.8350278735160828, "logits/rejected": 0.7498090863227844, "logps/chosen": -2.44694185256958, "logps/rejected": -3.0386273860931396, "loss": 0.8699, "nll_loss": 0.8215687870979309, "rewards/accuracies": 0.875, "rewards/chosen": -0.24469420313835144, "rewards/margins": 0.05916855111718178, "rewards/rejected": -0.3038627505302429, "step": 1889 }, { "epoch": 5.174537987679671, "grad_norm": 3.400663137435913, "learning_rate": 7.412328767123287e-07, "log_odds_chosen": 1.4316437244415283, "log_odds_ratio": -0.30924224853515625, "logits/chosen": 0.7537615299224854, "logits/rejected": 0.7486855983734131, "logps/chosen": -2.3552212715148926, "logps/rejected": -3.692234754562378, "loss": 0.883, "nll_loss": 0.8520633578300476, "rewards/accuracies": 1.0, "rewards/chosen": -0.2355221062898636, "rewards/margins": 0.133701354265213, "rewards/rejected": -0.3692234754562378, "step": 1890 }, { "epoch": 5.177275838466803, "grad_norm": 7.695067405700684, "learning_rate": 7.410958904109589e-07, "log_odds_chosen": 1.0457818508148193, "log_odds_ratio": -0.36808890104293823, "logits/chosen": 0.9978534579277039, "logits/rejected": 1.012500524520874, "logps/chosen": -2.2079474925994873, "logps/rejected": -3.136608839035034, "loss": 0.7919, "nll_loss": 0.755119264125824, "rewards/accuracies": 0.875, "rewards/chosen": -0.22079476714134216, "rewards/margins": 0.09286610782146454, "rewards/rejected": -0.3136608600616455, "step": 1891 }, { "epoch": 5.1800136892539355, "grad_norm": 3.8111982345581055, "learning_rate": 7.409589041095891e-07, "log_odds_chosen": 1.0629913806915283, "log_odds_ratio": -0.3912563920021057, "logits/chosen": 0.8339204788208008, "logits/rejected": 0.8481795787811279, "logps/chosen": -2.3773372173309326, "logps/rejected": -3.384793281555176, "loss": 0.8693, "nll_loss": 0.8301544189453125, "rewards/accuracies": 0.875, "rewards/chosen": -0.23773372173309326, "rewards/margins": 0.10074561834335327, "rewards/rejected": -0.33847934007644653, "step": 1892 }, { "epoch": 5.1827515400410675, "grad_norm": 7.179299354553223, "learning_rate": 7.408219178082191e-07, "log_odds_chosen": 0.8956098556518555, "log_odds_ratio": -0.5028533935546875, "logits/chosen": 0.9210556745529175, "logits/rejected": 0.9684033393859863, "logps/chosen": -3.401132583618164, "logps/rejected": -4.24947452545166, "loss": 0.8417, "nll_loss": 0.7913795709609985, "rewards/accuracies": 0.875, "rewards/chosen": -0.3401132822036743, "rewards/margins": 0.08483417332172394, "rewards/rejected": -0.42494744062423706, "step": 1893 }, { "epoch": 5.1854893908282, "grad_norm": 4.275723934173584, "learning_rate": 7.406849315068493e-07, "log_odds_chosen": 0.8069748878479004, "log_odds_ratio": -0.43294602632522583, "logits/chosen": 0.6105954051017761, "logits/rejected": 0.5768371224403381, "logps/chosen": -2.0467159748077393, "logps/rejected": -2.749948024749756, "loss": 0.9451, "nll_loss": 0.901759922504425, "rewards/accuracies": 0.875, "rewards/chosen": -0.20467160642147064, "rewards/margins": 0.07032321393489838, "rewards/rejected": -0.27499479055404663, "step": 1894 }, { "epoch": 5.188227241615332, "grad_norm": 3.8022663593292236, "learning_rate": 7.405479452054795e-07, "log_odds_chosen": 1.3236658573150635, "log_odds_ratio": -0.37139004468917847, "logits/chosen": 0.8737122416496277, "logits/rejected": 0.8635424375534058, "logps/chosen": -2.3196141719818115, "logps/rejected": -3.5769715309143066, "loss": 0.9518, "nll_loss": 0.9146907329559326, "rewards/accuracies": 0.75, "rewards/chosen": -0.2319614291191101, "rewards/margins": 0.12573574483394623, "rewards/rejected": -0.35769715905189514, "step": 1895 }, { "epoch": 5.190965092402464, "grad_norm": 5.553488254547119, "learning_rate": 7.404109589041095e-07, "log_odds_chosen": 1.6199637651443481, "log_odds_ratio": -0.5818431377410889, "logits/chosen": 0.6872266530990601, "logits/rejected": 0.7605873346328735, "logps/chosen": -3.076587200164795, "logps/rejected": -4.643345832824707, "loss": 1.0055, "nll_loss": 0.9473167657852173, "rewards/accuracies": 0.875, "rewards/chosen": -0.30765873193740845, "rewards/margins": 0.15667586028575897, "rewards/rejected": -0.4643346071243286, "step": 1896 }, { "epoch": 5.193702943189596, "grad_norm": 5.9123921394348145, "learning_rate": 7.402739726027397e-07, "log_odds_chosen": 1.5711896419525146, "log_odds_ratio": -0.5312114953994751, "logits/chosen": 0.98002028465271, "logits/rejected": 0.9690324068069458, "logps/chosen": -3.0881142616271973, "logps/rejected": -4.592761993408203, "loss": 0.8852, "nll_loss": 0.8320962190628052, "rewards/accuracies": 0.875, "rewards/chosen": -0.3088114559650421, "rewards/margins": 0.1504647582769394, "rewards/rejected": -0.4592761993408203, "step": 1897 }, { "epoch": 5.196440793976728, "grad_norm": 2.9228527545928955, "learning_rate": 7.401369863013698e-07, "log_odds_chosen": 2.0800046920776367, "log_odds_ratio": -0.36780011653900146, "logits/chosen": 0.7956643104553223, "logits/rejected": 0.7257865071296692, "logps/chosen": -1.8245487213134766, "logps/rejected": -3.8244032859802246, "loss": 0.858, "nll_loss": 0.8212270140647888, "rewards/accuracies": 0.875, "rewards/chosen": -0.18245485424995422, "rewards/margins": 0.19998547434806824, "rewards/rejected": -0.38244035840034485, "step": 1898 }, { "epoch": 5.19917864476386, "grad_norm": 4.027379512786865, "learning_rate": 7.4e-07, "log_odds_chosen": 0.4388468265533447, "log_odds_ratio": -0.5961796045303345, "logits/chosen": 0.7375792264938354, "logits/rejected": 0.7195285558700562, "logps/chosen": -2.7643392086029053, "logps/rejected": -3.140598773956299, "loss": 0.9467, "nll_loss": 0.8870742917060852, "rewards/accuracies": 0.625, "rewards/chosen": -0.27643391489982605, "rewards/margins": 0.037625961005687714, "rewards/rejected": -0.314059853553772, "step": 1899 }, { "epoch": 5.201916495550993, "grad_norm": 4.082040786743164, "learning_rate": 7.398630136986301e-07, "log_odds_chosen": 0.8378744721412659, "log_odds_ratio": -0.5082194805145264, "logits/chosen": 0.8017123341560364, "logits/rejected": 0.858669638633728, "logps/chosen": -2.8488082885742188, "logps/rejected": -3.6409027576446533, "loss": 0.9, "nll_loss": 0.8492245078086853, "rewards/accuracies": 0.625, "rewards/chosen": -0.2848808467388153, "rewards/margins": 0.07920943945646286, "rewards/rejected": -0.3640902638435364, "step": 1900 }, { "epoch": 5.204654346338125, "grad_norm": 3.849260091781616, "learning_rate": 7.397260273972602e-07, "log_odds_chosen": 1.3047678470611572, "log_odds_ratio": -0.39188000559806824, "logits/chosen": 0.7065564393997192, "logits/rejected": 0.6799826622009277, "logps/chosen": -2.084160327911377, "logps/rejected": -3.32315731048584, "loss": 0.9093, "nll_loss": 0.8700881004333496, "rewards/accuracies": 0.875, "rewards/chosen": -0.20841604471206665, "rewards/margins": 0.12389971315860748, "rewards/rejected": -0.33231574296951294, "step": 1901 }, { "epoch": 5.207392197125257, "grad_norm": 5.321142196655273, "learning_rate": 7.395890410958904e-07, "log_odds_chosen": 1.2170172929763794, "log_odds_ratio": -0.4497081935405731, "logits/chosen": 0.6419774293899536, "logits/rejected": 0.5800255537033081, "logps/chosen": -2.087660789489746, "logps/rejected": -3.1200599670410156, "loss": 0.9466, "nll_loss": 0.9016379117965698, "rewards/accuracies": 0.75, "rewards/chosen": -0.20876608788967133, "rewards/margins": 0.10323991626501083, "rewards/rejected": -0.31200599670410156, "step": 1902 }, { "epoch": 5.210130047912389, "grad_norm": 4.786520481109619, "learning_rate": 7.394520547945205e-07, "log_odds_chosen": 1.0123281478881836, "log_odds_ratio": -0.6718934178352356, "logits/chosen": 0.8863180875778198, "logits/rejected": 0.9707203507423401, "logps/chosen": -2.790278911590576, "logps/rejected": -3.7923383712768555, "loss": 0.8762, "nll_loss": 0.8090017437934875, "rewards/accuracies": 0.75, "rewards/chosen": -0.27902790904045105, "rewards/margins": 0.1002059206366539, "rewards/rejected": -0.37923383712768555, "step": 1903 }, { "epoch": 5.212867898699521, "grad_norm": 4.46327543258667, "learning_rate": 7.393150684931506e-07, "log_odds_chosen": 1.30482816696167, "log_odds_ratio": -0.3539749085903168, "logits/chosen": 0.8810119032859802, "logits/rejected": 0.8495978713035583, "logps/chosen": -2.796088695526123, "logps/rejected": -4.031922817230225, "loss": 0.9631, "nll_loss": 0.9276614189147949, "rewards/accuracies": 0.875, "rewards/chosen": -0.2796088457107544, "rewards/margins": 0.12358342111110687, "rewards/rejected": -0.40319228172302246, "step": 1904 }, { "epoch": 5.215605749486653, "grad_norm": 4.059994220733643, "learning_rate": 7.391780821917808e-07, "log_odds_chosen": 1.4230525493621826, "log_odds_ratio": -0.4731067717075348, "logits/chosen": 0.713783323764801, "logits/rejected": 0.6844707727432251, "logps/chosen": -1.683915376663208, "logps/rejected": -3.0190582275390625, "loss": 0.9702, "nll_loss": 0.9228430986404419, "rewards/accuracies": 0.625, "rewards/chosen": -0.16839152574539185, "rewards/margins": 0.13351429998874664, "rewards/rejected": -0.3019058406352997, "step": 1905 }, { "epoch": 5.218343600273785, "grad_norm": 3.893476724624634, "learning_rate": 7.39041095890411e-07, "log_odds_chosen": 0.829035758972168, "log_odds_ratio": -0.401472806930542, "logits/chosen": 0.5219240784645081, "logits/rejected": 0.5124256014823914, "logps/chosen": -2.299389362335205, "logps/rejected": -3.0680437088012695, "loss": 0.8679, "nll_loss": 0.8277342319488525, "rewards/accuracies": 0.875, "rewards/chosen": -0.22993893921375275, "rewards/margins": 0.07686544954776764, "rewards/rejected": -0.3068043887615204, "step": 1906 }, { "epoch": 5.221081451060917, "grad_norm": 3.668447732925415, "learning_rate": 7.38904109589041e-07, "log_odds_chosen": 2.2958621978759766, "log_odds_ratio": -0.16331657767295837, "logits/chosen": 0.8341554999351501, "logits/rejected": 0.8349677920341492, "logps/chosen": -2.25262451171875, "logps/rejected": -4.388485908508301, "loss": 0.7789, "nll_loss": 0.7625210285186768, "rewards/accuracies": 0.875, "rewards/chosen": -0.22526246309280396, "rewards/margins": 0.21358610689640045, "rewards/rejected": -0.4388485550880432, "step": 1907 }, { "epoch": 5.223819301848049, "grad_norm": 4.803360462188721, "learning_rate": 7.387671232876712e-07, "log_odds_chosen": 0.7674797177314758, "log_odds_ratio": -0.5122514367103577, "logits/chosen": 0.7394995093345642, "logits/rejected": 0.8127537965774536, "logps/chosen": -2.596780776977539, "logps/rejected": -3.3274097442626953, "loss": 0.9305, "nll_loss": 0.8792543411254883, "rewards/accuracies": 0.875, "rewards/chosen": -0.25967806577682495, "rewards/margins": 0.07306288182735443, "rewards/rejected": -0.3327409625053406, "step": 1908 }, { "epoch": 5.2265571526351815, "grad_norm": 4.803437232971191, "learning_rate": 7.386301369863014e-07, "log_odds_chosen": 0.542557954788208, "log_odds_ratio": -0.7005336880683899, "logits/chosen": 0.7680934071540833, "logits/rejected": 0.8132243752479553, "logps/chosen": -3.0316736698150635, "logps/rejected": -3.495203971862793, "loss": 0.8828, "nll_loss": 0.8127318620681763, "rewards/accuracies": 0.875, "rewards/chosen": -0.30316734313964844, "rewards/margins": 0.0463530533015728, "rewards/rejected": -0.3495204448699951, "step": 1909 }, { "epoch": 5.229295003422314, "grad_norm": 3.0740084648132324, "learning_rate": 7.384931506849314e-07, "log_odds_chosen": 2.313774585723877, "log_odds_ratio": -0.15049996972084045, "logits/chosen": 0.7863996028900146, "logits/rejected": 0.7865555286407471, "logps/chosen": -2.0410332679748535, "logps/rejected": -4.212024211883545, "loss": 0.7629, "nll_loss": 0.7478252053260803, "rewards/accuracies": 1.0, "rewards/chosen": -0.20410332083702087, "rewards/margins": 0.217099130153656, "rewards/rejected": -0.42120248079299927, "step": 1910 }, { "epoch": 5.232032854209446, "grad_norm": 3.7006959915161133, "learning_rate": 7.383561643835616e-07, "log_odds_chosen": 0.8607624173164368, "log_odds_ratio": -0.4423370957374573, "logits/chosen": 0.6403486728668213, "logits/rejected": 0.6479754447937012, "logps/chosen": -1.5804789066314697, "logps/rejected": -2.349457263946533, "loss": 0.879, "nll_loss": 0.8348153829574585, "rewards/accuracies": 0.75, "rewards/chosen": -0.1580478996038437, "rewards/margins": 0.07689784467220306, "rewards/rejected": -0.23494574427604675, "step": 1911 }, { "epoch": 5.234770704996578, "grad_norm": 4.666694641113281, "learning_rate": 7.382191780821917e-07, "log_odds_chosen": 1.295344352722168, "log_odds_ratio": -0.36000311374664307, "logits/chosen": 1.027946949005127, "logits/rejected": 1.0416507720947266, "logps/chosen": -2.974924087524414, "logps/rejected": -4.19382905960083, "loss": 0.9045, "nll_loss": 0.8684984445571899, "rewards/accuracies": 0.875, "rewards/chosen": -0.2974924147129059, "rewards/margins": 0.12189047783613205, "rewards/rejected": -0.41938287019729614, "step": 1912 }, { "epoch": 5.23750855578371, "grad_norm": 3.871751308441162, "learning_rate": 7.380821917808218e-07, "log_odds_chosen": 1.2317442893981934, "log_odds_ratio": -0.35960012674331665, "logits/chosen": 0.6522946357727051, "logits/rejected": 0.6292124390602112, "logps/chosen": -2.5484161376953125, "logps/rejected": -3.7171854972839355, "loss": 0.9066, "nll_loss": 0.8706103563308716, "rewards/accuracies": 1.0, "rewards/chosen": -0.2548416256904602, "rewards/margins": 0.11687692254781723, "rewards/rejected": -0.37171852588653564, "step": 1913 }, { "epoch": 5.240246406570842, "grad_norm": 6.44773530960083, "learning_rate": 7.37945205479452e-07, "log_odds_chosen": 2.180565118789673, "log_odds_ratio": -0.33015528321266174, "logits/chosen": 0.8671674728393555, "logits/rejected": 0.8445278406143188, "logps/chosen": -2.5263867378234863, "logps/rejected": -4.587450981140137, "loss": 0.8184, "nll_loss": 0.7853949069976807, "rewards/accuracies": 0.875, "rewards/chosen": -0.25263863801956177, "rewards/margins": 0.20610645413398743, "rewards/rejected": -0.4587451219558716, "step": 1914 }, { "epoch": 5.242984257357974, "grad_norm": 3.6754002571105957, "learning_rate": 7.378082191780821e-07, "log_odds_chosen": 0.8641420006752014, "log_odds_ratio": -0.5154005289077759, "logits/chosen": 0.8776358366012573, "logits/rejected": 0.9439936876296997, "logps/chosen": -2.793560266494751, "logps/rejected": -3.5854201316833496, "loss": 0.9068, "nll_loss": 0.8552127480506897, "rewards/accuracies": 0.625, "rewards/chosen": -0.27935606241226196, "rewards/margins": 0.07918596267700195, "rewards/rejected": -0.35854199528694153, "step": 1915 }, { "epoch": 5.245722108145106, "grad_norm": 7.827342987060547, "learning_rate": 7.376712328767123e-07, "log_odds_chosen": -0.1717635989189148, "log_odds_ratio": -1.314073085784912, "logits/chosen": 0.9075102806091309, "logits/rejected": 0.9476664066314697, "logps/chosen": -3.4575915336608887, "logps/rejected": -3.2215075492858887, "loss": 0.9735, "nll_loss": 0.8421015739440918, "rewards/accuracies": 0.75, "rewards/chosen": -0.3457591235637665, "rewards/margins": -0.023608386516571045, "rewards/rejected": -0.3221507668495178, "step": 1916 }, { "epoch": 5.248459958932238, "grad_norm": 3.710662841796875, "learning_rate": 7.375342465753424e-07, "log_odds_chosen": 0.9074225425720215, "log_odds_ratio": -0.3830757737159729, "logits/chosen": 0.8915722370147705, "logits/rejected": 0.8866392374038696, "logps/chosen": -2.122096061706543, "logps/rejected": -2.8883118629455566, "loss": 0.8975, "nll_loss": 0.8591454029083252, "rewards/accuracies": 0.75, "rewards/chosen": -0.21220959722995758, "rewards/margins": 0.07662159204483032, "rewards/rejected": -0.2888311743736267, "step": 1917 }, { "epoch": 5.25119780971937, "grad_norm": 5.63933801651001, "learning_rate": 7.373972602739725e-07, "log_odds_chosen": 1.2865644693374634, "log_odds_ratio": -0.4210028648376465, "logits/chosen": 0.60182124376297, "logits/rejected": 0.5776338577270508, "logps/chosen": -2.1178135871887207, "logps/rejected": -3.3660101890563965, "loss": 0.8778, "nll_loss": 0.8356673121452332, "rewards/accuracies": 0.75, "rewards/chosen": -0.2117813676595688, "rewards/margins": 0.12481965869665146, "rewards/rejected": -0.33660101890563965, "step": 1918 }, { "epoch": 5.253935660506502, "grad_norm": 3.903773307800293, "learning_rate": 7.372602739726027e-07, "log_odds_chosen": 1.3933024406433105, "log_odds_ratio": -0.275701105594635, "logits/chosen": 0.8340885043144226, "logits/rejected": 0.8673006296157837, "logps/chosen": -1.759610652923584, "logps/rejected": -2.9803199768066406, "loss": 0.8255, "nll_loss": 0.7979430556297302, "rewards/accuracies": 1.0, "rewards/chosen": -0.17596107721328735, "rewards/margins": 0.12207093089818954, "rewards/rejected": -0.2980319857597351, "step": 1919 }, { "epoch": 5.256673511293634, "grad_norm": 3.8991332054138184, "learning_rate": 7.371232876712329e-07, "log_odds_chosen": 0.6978787779808044, "log_odds_ratio": -0.6177933216094971, "logits/chosen": 0.641059160232544, "logits/rejected": 0.7740435004234314, "logps/chosen": -2.5156490802764893, "logps/rejected": -3.183371067047119, "loss": 0.8313, "nll_loss": 0.7694742679595947, "rewards/accuracies": 0.625, "rewards/chosen": -0.2515649199485779, "rewards/margins": 0.06677219271659851, "rewards/rejected": -0.318337082862854, "step": 1920 }, { "epoch": 5.259411362080766, "grad_norm": 5.096633434295654, "learning_rate": 7.369863013698629e-07, "log_odds_chosen": 0.7780925035476685, "log_odds_ratio": -0.6174483895301819, "logits/chosen": 0.8285703659057617, "logits/rejected": 0.9121414422988892, "logps/chosen": -2.7256221771240234, "logps/rejected": -3.4958910942077637, "loss": 0.9486, "nll_loss": 0.8868929743766785, "rewards/accuracies": 0.625, "rewards/chosen": -0.2725622057914734, "rewards/margins": 0.07702690362930298, "rewards/rejected": -0.34958910942077637, "step": 1921 }, { "epoch": 5.262149212867898, "grad_norm": 5.221038341522217, "learning_rate": 7.368493150684931e-07, "log_odds_chosen": 0.9575662612915039, "log_odds_ratio": -0.5575223565101624, "logits/chosen": 0.6656145453453064, "logits/rejected": 0.6346858739852905, "logps/chosen": -1.9158735275268555, "logps/rejected": -2.8321330547332764, "loss": 0.9516, "nll_loss": 0.8958939909934998, "rewards/accuracies": 0.75, "rewards/chosen": -0.19158735871315002, "rewards/margins": 0.09162595868110657, "rewards/rejected": -0.2832133173942566, "step": 1922 }, { "epoch": 5.2648870636550305, "grad_norm": 3.9118027687072754, "learning_rate": 7.367123287671233e-07, "log_odds_chosen": 1.8749420642852783, "log_odds_ratio": -0.2340947985649109, "logits/chosen": 0.6032404899597168, "logits/rejected": 0.5691335797309875, "logps/chosen": -2.192410469055176, "logps/rejected": -3.8930411338806152, "loss": 0.8342, "nll_loss": 0.8107996582984924, "rewards/accuracies": 1.0, "rewards/chosen": -0.21924105286598206, "rewards/margins": 0.1700630635023117, "rewards/rejected": -0.38930410146713257, "step": 1923 }, { "epoch": 5.2676249144421625, "grad_norm": 4.452998638153076, "learning_rate": 7.365753424657533e-07, "log_odds_chosen": 0.6836420893669128, "log_odds_ratio": -0.6858587265014648, "logits/chosen": 0.9355310797691345, "logits/rejected": 0.9702037572860718, "logps/chosen": -3.153780698776245, "logps/rejected": -3.7801313400268555, "loss": 1.0334, "nll_loss": 0.9648005962371826, "rewards/accuracies": 0.625, "rewards/chosen": -0.3153780698776245, "rewards/margins": 0.06263507902622223, "rewards/rejected": -0.37801313400268555, "step": 1924 }, { "epoch": 5.270362765229295, "grad_norm": 3.88352632522583, "learning_rate": 7.364383561643835e-07, "log_odds_chosen": 0.6143136620521545, "log_odds_ratio": -0.5030017495155334, "logits/chosen": 0.7847514152526855, "logits/rejected": 0.7982068061828613, "logps/chosen": -2.3346331119537354, "logps/rejected": -2.8982222080230713, "loss": 0.887, "nll_loss": 0.8367400169372559, "rewards/accuracies": 0.75, "rewards/chosen": -0.233463317155838, "rewards/margins": 0.05635892599821091, "rewards/rejected": -0.28982222080230713, "step": 1925 }, { "epoch": 5.273100616016427, "grad_norm": 3.061340093612671, "learning_rate": 7.363013698630136e-07, "log_odds_chosen": 1.4494327306747437, "log_odds_ratio": -0.27896302938461304, "logits/chosen": 0.8811424970626831, "logits/rejected": 0.9400444030761719, "logps/chosen": -2.388115167617798, "logps/rejected": -3.7741591930389404, "loss": 0.7635, "nll_loss": 0.7355787754058838, "rewards/accuracies": 0.875, "rewards/chosen": -0.23881152272224426, "rewards/margins": 0.13860438764095306, "rewards/rejected": -0.3774159252643585, "step": 1926 }, { "epoch": 5.27583846680356, "grad_norm": 5.0740885734558105, "learning_rate": 7.361643835616437e-07, "log_odds_chosen": 0.601385235786438, "log_odds_ratio": -0.5445370078086853, "logits/chosen": 0.736430287361145, "logits/rejected": 0.8145504593849182, "logps/chosen": -2.3884449005126953, "logps/rejected": -2.95568585395813, "loss": 0.8502, "nll_loss": 0.7957779765129089, "rewards/accuracies": 0.5, "rewards/chosen": -0.23884448409080505, "rewards/margins": 0.05672411620616913, "rewards/rejected": -0.2955686151981354, "step": 1927 }, { "epoch": 5.278576317590692, "grad_norm": 3.9388015270233154, "learning_rate": 7.360273972602739e-07, "log_odds_chosen": 0.9420552253723145, "log_odds_ratio": -0.38251227140426636, "logits/chosen": 0.8118784427642822, "logits/rejected": 0.7629886269569397, "logps/chosen": -1.8981897830963135, "logps/rejected": -2.7594878673553467, "loss": 0.9644, "nll_loss": 0.9261226654052734, "rewards/accuracies": 0.875, "rewards/chosen": -0.18981897830963135, "rewards/margins": 0.0861298143863678, "rewards/rejected": -0.27594879269599915, "step": 1928 }, { "epoch": 5.281314168377824, "grad_norm": 3.4572548866271973, "learning_rate": 7.35890410958904e-07, "log_odds_chosen": 0.8266545534133911, "log_odds_ratio": -0.5250853300094604, "logits/chosen": 0.7765524983406067, "logits/rejected": 0.841491162776947, "logps/chosen": -2.753054141998291, "logps/rejected": -3.53320574760437, "loss": 0.8688, "nll_loss": 0.8162742853164673, "rewards/accuracies": 0.875, "rewards/chosen": -0.2753054201602936, "rewards/margins": 0.07801517099142075, "rewards/rejected": -0.3533205986022949, "step": 1929 }, { "epoch": 5.284052019164956, "grad_norm": 4.265413761138916, "learning_rate": 7.357534246575342e-07, "log_odds_chosen": 0.7656949758529663, "log_odds_ratio": -0.527733325958252, "logits/chosen": 0.8586597442626953, "logits/rejected": 0.9290655255317688, "logps/chosen": -2.679628372192383, "logps/rejected": -3.3670663833618164, "loss": 0.892, "nll_loss": 0.8392443656921387, "rewards/accuracies": 0.875, "rewards/chosen": -0.26796284317970276, "rewards/margins": 0.06874381005764008, "rewards/rejected": -0.33670663833618164, "step": 1930 }, { "epoch": 5.286789869952088, "grad_norm": 6.941356658935547, "learning_rate": 7.356164383561643e-07, "log_odds_chosen": 1.5871679782867432, "log_odds_ratio": -0.2686800956726074, "logits/chosen": 0.8841602802276611, "logits/rejected": 0.9841756224632263, "logps/chosen": -2.4407002925872803, "logps/rejected": -3.9353864192962646, "loss": 0.7918, "nll_loss": 0.7649399638175964, "rewards/accuracies": 1.0, "rewards/chosen": -0.24407005310058594, "rewards/margins": 0.1494685858488083, "rewards/rejected": -0.39353862404823303, "step": 1931 }, { "epoch": 5.28952772073922, "grad_norm": 3.525068998336792, "learning_rate": 7.354794520547944e-07, "log_odds_chosen": 0.9278866648674011, "log_odds_ratio": -0.38653069734573364, "logits/chosen": 0.8401673436164856, "logits/rejected": 0.8920740485191345, "logps/chosen": -2.379119396209717, "logps/rejected": -3.2454848289489746, "loss": 0.8374, "nll_loss": 0.7987207174301147, "rewards/accuracies": 0.875, "rewards/chosen": -0.23791193962097168, "rewards/margins": 0.08663655817508698, "rewards/rejected": -0.32454848289489746, "step": 1932 }, { "epoch": 5.292265571526352, "grad_norm": 3.959258556365967, "learning_rate": 7.353424657534247e-07, "log_odds_chosen": 1.7461899518966675, "log_odds_ratio": -0.24562352895736694, "logits/chosen": 0.9715970754623413, "logits/rejected": 1.0854911804199219, "logps/chosen": -2.836581230163574, "logps/rejected": -4.515456676483154, "loss": 0.8053, "nll_loss": 0.7807401418685913, "rewards/accuracies": 0.875, "rewards/chosen": -0.28365811705589294, "rewards/margins": 0.16788752377033234, "rewards/rejected": -0.45154568552970886, "step": 1933 }, { "epoch": 5.295003422313484, "grad_norm": 4.267845153808594, "learning_rate": 7.352054794520547e-07, "log_odds_chosen": 0.8115184903144836, "log_odds_ratio": -0.6976532936096191, "logits/chosen": 0.9797064065933228, "logits/rejected": 1.0346086025238037, "logps/chosen": -2.953765869140625, "logps/rejected": -3.6983652114868164, "loss": 0.9369, "nll_loss": 0.8670937418937683, "rewards/accuracies": 0.5, "rewards/chosen": -0.29537662863731384, "rewards/margins": 0.07445991039276123, "rewards/rejected": -0.3698365092277527, "step": 1934 }, { "epoch": 5.297741273100616, "grad_norm": 3.5061304569244385, "learning_rate": 7.350684931506848e-07, "log_odds_chosen": 2.1125247478485107, "log_odds_ratio": -0.2507088780403137, "logits/chosen": 1.0730313062667847, "logits/rejected": 1.1092772483825684, "logps/chosen": -2.5471386909484863, "logps/rejected": -4.5846686363220215, "loss": 0.7606, "nll_loss": 0.7355656623840332, "rewards/accuracies": 1.0, "rewards/chosen": -0.25471392273902893, "rewards/margins": 0.20375296473503113, "rewards/rejected": -0.45846688747406006, "step": 1935 }, { "epoch": 5.300479123887748, "grad_norm": 4.039211273193359, "learning_rate": 7.349315068493151e-07, "log_odds_chosen": 1.4984562397003174, "log_odds_ratio": -0.423613041639328, "logits/chosen": 0.8837480545043945, "logits/rejected": 0.754184901714325, "logps/chosen": -2.268533706665039, "logps/rejected": -3.6889452934265137, "loss": 0.929, "nll_loss": 0.8866711854934692, "rewards/accuracies": 0.75, "rewards/chosen": -0.2268533855676651, "rewards/margins": 0.1420411318540573, "rewards/rejected": -0.3688945174217224, "step": 1936 }, { "epoch": 5.30321697467488, "grad_norm": 4.042115688323975, "learning_rate": 7.347945205479453e-07, "log_odds_chosen": 1.9894670248031616, "log_odds_ratio": -0.19525980949401855, "logits/chosen": 0.84192955493927, "logits/rejected": 0.7443764209747314, "logps/chosen": -1.9060444831848145, "logps/rejected": -3.698573112487793, "loss": 0.8058, "nll_loss": 0.7863034605979919, "rewards/accuracies": 1.0, "rewards/chosen": -0.19060446321964264, "rewards/margins": 0.17925283312797546, "rewards/rejected": -0.3698573112487793, "step": 1937 }, { "epoch": 5.305954825462012, "grad_norm": 4.416546821594238, "learning_rate": 7.346575342465753e-07, "log_odds_chosen": 0.528765857219696, "log_odds_ratio": -0.5387916564941406, "logits/chosen": 0.9237587451934814, "logits/rejected": 0.9243676662445068, "logps/chosen": -2.148658275604248, "logps/rejected": -2.63315749168396, "loss": 0.8477, "nll_loss": 0.793835461139679, "rewards/accuracies": 0.75, "rewards/chosen": -0.21486584842205048, "rewards/margins": 0.04844992607831955, "rewards/rejected": -0.26331576704978943, "step": 1938 }, { "epoch": 5.308692676249144, "grad_norm": 3.374098539352417, "learning_rate": 7.345205479452055e-07, "log_odds_chosen": 2.758779287338257, "log_odds_ratio": -0.0985785648226738, "logits/chosen": 0.8663560152053833, "logits/rejected": 0.8062804341316223, "logps/chosen": -2.3877646923065186, "logps/rejected": -5.013500213623047, "loss": 0.8164, "nll_loss": 0.8065378069877625, "rewards/accuracies": 1.0, "rewards/chosen": -0.23877647519111633, "rewards/margins": 0.2625735402107239, "rewards/rejected": -0.5013500452041626, "step": 1939 }, { "epoch": 5.3114305270362765, "grad_norm": 4.840961933135986, "learning_rate": 7.343835616438357e-07, "log_odds_chosen": 0.26655206084251404, "log_odds_ratio": -0.8104981184005737, "logits/chosen": 0.9240651726722717, "logits/rejected": 0.96953284740448, "logps/chosen": -2.8603999614715576, "logps/rejected": -3.1020779609680176, "loss": 0.9619, "nll_loss": 0.8808567523956299, "rewards/accuracies": 0.5, "rewards/chosen": -0.2860400080680847, "rewards/margins": 0.024167772382497787, "rewards/rejected": -0.3102077841758728, "step": 1940 }, { "epoch": 5.314168377823409, "grad_norm": 3.7036266326904297, "learning_rate": 7.342465753424657e-07, "log_odds_chosen": 1.0419871807098389, "log_odds_ratio": -0.38667193055152893, "logits/chosen": 0.8485457897186279, "logits/rejected": 0.9145976305007935, "logps/chosen": -2.4860167503356934, "logps/rejected": -3.4564075469970703, "loss": 0.8408, "nll_loss": 0.8021464943885803, "rewards/accuracies": 0.875, "rewards/chosen": -0.24860167503356934, "rewards/margins": 0.09703907370567322, "rewards/rejected": -0.34564074873924255, "step": 1941 }, { "epoch": 5.316906228610541, "grad_norm": 4.8103251457214355, "learning_rate": 7.341095890410959e-07, "log_odds_chosen": 0.24572360515594482, "log_odds_ratio": -0.672146201133728, "logits/chosen": 0.836299479007721, "logits/rejected": 0.7765334844589233, "logps/chosen": -3.0255959033966064, "logps/rejected": -3.2453579902648926, "loss": 0.9878, "nll_loss": 0.9206336736679077, "rewards/accuracies": 0.625, "rewards/chosen": -0.30255958437919617, "rewards/margins": 0.021976208314299583, "rewards/rejected": -0.3245357573032379, "step": 1942 }, { "epoch": 5.319644079397673, "grad_norm": 3.740438461303711, "learning_rate": 7.33972602739726e-07, "log_odds_chosen": 1.4524787664413452, "log_odds_ratio": -0.319815993309021, "logits/chosen": 0.8628191351890564, "logits/rejected": 0.9099334478378296, "logps/chosen": -2.428765058517456, "logps/rejected": -3.762331962585449, "loss": 0.8357, "nll_loss": 0.8037306666374207, "rewards/accuracies": 0.875, "rewards/chosen": -0.24287652969360352, "rewards/margins": 0.13335669040679932, "rewards/rejected": -0.37623322010040283, "step": 1943 }, { "epoch": 5.322381930184805, "grad_norm": 4.055777549743652, "learning_rate": 7.338356164383562e-07, "log_odds_chosen": 1.6021689176559448, "log_odds_ratio": -0.4127402603626251, "logits/chosen": 0.7743362188339233, "logits/rejected": 0.7146800756454468, "logps/chosen": -2.3053505420684814, "logps/rejected": -3.794177532196045, "loss": 0.865, "nll_loss": 0.8237190246582031, "rewards/accuracies": 0.875, "rewards/chosen": -0.23053507506847382, "rewards/margins": 0.14888271689414978, "rewards/rejected": -0.3794177770614624, "step": 1944 }, { "epoch": 5.325119780971937, "grad_norm": 3.28859806060791, "learning_rate": 7.336986301369863e-07, "log_odds_chosen": 0.9121408462524414, "log_odds_ratio": -0.38166090846061707, "logits/chosen": 0.777495265007019, "logits/rejected": 0.7541149854660034, "logps/chosen": -2.0035951137542725, "logps/rejected": -2.8214192390441895, "loss": 0.8819, "nll_loss": 0.8437138795852661, "rewards/accuracies": 0.875, "rewards/chosen": -0.2003595232963562, "rewards/margins": 0.08178241550922394, "rewards/rejected": -0.28214192390441895, "step": 1945 }, { "epoch": 5.327857631759069, "grad_norm": 4.649733543395996, "learning_rate": 7.335616438356164e-07, "log_odds_chosen": 0.5095351338386536, "log_odds_ratio": -0.5504305362701416, "logits/chosen": 0.8216509819030762, "logits/rejected": 0.8573384284973145, "logps/chosen": -2.691107749938965, "logps/rejected": -3.1538453102111816, "loss": 0.9646, "nll_loss": 0.9095293283462524, "rewards/accuracies": 0.625, "rewards/chosen": -0.2691107988357544, "rewards/margins": 0.046273790299892426, "rewards/rejected": -0.3153845965862274, "step": 1946 }, { "epoch": 5.330595482546201, "grad_norm": 4.195348262786865, "learning_rate": 7.334246575342466e-07, "log_odds_chosen": 1.382939100265503, "log_odds_ratio": -0.3299679458141327, "logits/chosen": 0.8320245146751404, "logits/rejected": 0.8465014696121216, "logps/chosen": -2.4982423782348633, "logps/rejected": -3.772152900695801, "loss": 0.8283, "nll_loss": 0.795271635055542, "rewards/accuracies": 0.875, "rewards/chosen": -0.24982424080371857, "rewards/margins": 0.127391055226326, "rewards/rejected": -0.37721529603004456, "step": 1947 }, { "epoch": 5.333333333333333, "grad_norm": 4.413191318511963, "learning_rate": 7.332876712328767e-07, "log_odds_chosen": 1.216055989265442, "log_odds_ratio": -0.4456389546394348, "logits/chosen": 0.7832368612289429, "logits/rejected": 0.7454174757003784, "logps/chosen": -2.314208507537842, "logps/rejected": -3.46243953704834, "loss": 0.9111, "nll_loss": 0.8665156364440918, "rewards/accuracies": 0.625, "rewards/chosen": -0.2314208596944809, "rewards/margins": 0.1148231253027916, "rewards/rejected": -0.3462439775466919, "step": 1948 }, { "epoch": 5.336071184120465, "grad_norm": 5.415940284729004, "learning_rate": 7.331506849315068e-07, "log_odds_chosen": 0.5162230730056763, "log_odds_ratio": -0.6878059506416321, "logits/chosen": 0.9136152267456055, "logits/rejected": 0.9482153058052063, "logps/chosen": -2.9374938011169434, "logps/rejected": -3.4373302459716797, "loss": 0.9347, "nll_loss": 0.8659558892250061, "rewards/accuracies": 0.625, "rewards/chosen": -0.2937493920326233, "rewards/margins": 0.04998364299535751, "rewards/rejected": -0.343733012676239, "step": 1949 }, { "epoch": 5.338809034907597, "grad_norm": 3.60775089263916, "learning_rate": 7.33013698630137e-07, "log_odds_chosen": 0.9957337379455566, "log_odds_ratio": -0.45292091369628906, "logits/chosen": 0.6601681709289551, "logits/rejected": 0.670760452747345, "logps/chosen": -2.4501395225524902, "logps/rejected": -3.374974012374878, "loss": 0.8426, "nll_loss": 0.7972973585128784, "rewards/accuracies": 0.75, "rewards/chosen": -0.24501395225524902, "rewards/margins": 0.09248346090316772, "rewards/rejected": -0.33749741315841675, "step": 1950 }, { "epoch": 5.341546885694729, "grad_norm": 4.051793575286865, "learning_rate": 7.328767123287672e-07, "log_odds_chosen": 1.156118631362915, "log_odds_ratio": -0.3801705241203308, "logits/chosen": 0.7810273170471191, "logits/rejected": 0.688529372215271, "logps/chosen": -2.1275482177734375, "logps/rejected": -3.2059714794158936, "loss": 0.9672, "nll_loss": 0.9291462898254395, "rewards/accuracies": 0.875, "rewards/chosen": -0.21275484561920166, "rewards/margins": 0.1078423261642456, "rewards/rejected": -0.32059717178344727, "step": 1951 }, { "epoch": 5.344284736481862, "grad_norm": 4.1749725341796875, "learning_rate": 7.327397260273972e-07, "log_odds_chosen": 0.5474295616149902, "log_odds_ratio": -0.5554282665252686, "logits/chosen": 0.7680408358573914, "logits/rejected": 0.7373114824295044, "logps/chosen": -1.9016777276992798, "logps/rejected": -2.3690929412841797, "loss": 0.9435, "nll_loss": 0.8879541158676147, "rewards/accuracies": 0.75, "rewards/chosen": -0.19016778469085693, "rewards/margins": 0.04674151539802551, "rewards/rejected": -0.23690930008888245, "step": 1952 }, { "epoch": 5.347022587268993, "grad_norm": 3.5536835193634033, "learning_rate": 7.326027397260274e-07, "log_odds_chosen": 2.6136891841888428, "log_odds_ratio": -0.2164521962404251, "logits/chosen": 0.8866790533065796, "logits/rejected": 0.9061034321784973, "logps/chosen": -2.8354978561401367, "logps/rejected": -5.40023136138916, "loss": 0.909, "nll_loss": 0.8873237371444702, "rewards/accuracies": 1.0, "rewards/chosen": -0.28354978561401367, "rewards/margins": 0.2564733922481537, "rewards/rejected": -0.5400232076644897, "step": 1953 }, { "epoch": 5.349760438056126, "grad_norm": 3.1672494411468506, "learning_rate": 7.324657534246576e-07, "log_odds_chosen": 0.9079355597496033, "log_odds_ratio": -0.38443896174430847, "logits/chosen": 0.95421302318573, "logits/rejected": 1.0306239128112793, "logps/chosen": -2.4917714595794678, "logps/rejected": -3.3407111167907715, "loss": 0.8162, "nll_loss": 0.7777966856956482, "rewards/accuracies": 1.0, "rewards/chosen": -0.24917715787887573, "rewards/margins": 0.08489394187927246, "rewards/rejected": -0.3340711295604706, "step": 1954 }, { "epoch": 5.352498288843258, "grad_norm": 5.5553131103515625, "learning_rate": 7.323287671232876e-07, "log_odds_chosen": 0.5816553235054016, "log_odds_ratio": -0.4926562011241913, "logits/chosen": 0.9922288656234741, "logits/rejected": 0.8951766490936279, "logps/chosen": -2.592151641845703, "logps/rejected": -3.076338291168213, "loss": 0.9285, "nll_loss": 0.8792539238929749, "rewards/accuracies": 0.75, "rewards/chosen": -0.25921517610549927, "rewards/margins": 0.04841864854097366, "rewards/rejected": -0.30763381719589233, "step": 1955 }, { "epoch": 5.3552361396303905, "grad_norm": 5.518121242523193, "learning_rate": 7.321917808219178e-07, "log_odds_chosen": 1.0968011617660522, "log_odds_ratio": -0.6279608011245728, "logits/chosen": 1.1187682151794434, "logits/rejected": 1.1752548217773438, "logps/chosen": -3.503873109817505, "logps/rejected": -4.522566318511963, "loss": 0.8142, "nll_loss": 0.7513796091079712, "rewards/accuracies": 0.625, "rewards/chosen": -0.3503872752189636, "rewards/margins": 0.10186934471130371, "rewards/rejected": -0.4522566497325897, "step": 1956 }, { "epoch": 5.3579739904175225, "grad_norm": 3.7596523761749268, "learning_rate": 7.320547945205479e-07, "log_odds_chosen": 0.7437874674797058, "log_odds_ratio": -0.4615263342857361, "logits/chosen": 0.6189416646957397, "logits/rejected": 0.6629567742347717, "logps/chosen": -2.352571725845337, "logps/rejected": -3.041182518005371, "loss": 0.8833, "nll_loss": 0.8371632099151611, "rewards/accuracies": 0.75, "rewards/chosen": -0.23525719344615936, "rewards/margins": 0.06886108219623566, "rewards/rejected": -0.304118275642395, "step": 1957 }, { "epoch": 5.360711841204655, "grad_norm": 3.264756441116333, "learning_rate": 7.319178082191781e-07, "log_odds_chosen": 1.603873610496521, "log_odds_ratio": -0.2634582817554474, "logits/chosen": 0.778872013092041, "logits/rejected": 0.7411553859710693, "logps/chosen": -1.675433874130249, "logps/rejected": -3.137241840362549, "loss": 0.9063, "nll_loss": 0.8799228668212891, "rewards/accuracies": 1.0, "rewards/chosen": -0.16754339635372162, "rewards/margins": 0.14618080854415894, "rewards/rejected": -0.31372418999671936, "step": 1958 }, { "epoch": 5.363449691991787, "grad_norm": 4.421322345733643, "learning_rate": 7.317808219178082e-07, "log_odds_chosen": 1.1522115468978882, "log_odds_ratio": -0.34075015783309937, "logits/chosen": 0.7687506079673767, "logits/rejected": 0.7740472555160522, "logps/chosen": -2.41451358795166, "logps/rejected": -3.4997758865356445, "loss": 0.8789, "nll_loss": 0.8448257446289062, "rewards/accuracies": 0.875, "rewards/chosen": -0.24145136773586273, "rewards/margins": 0.10852622240781784, "rewards/rejected": -0.34997761249542236, "step": 1959 }, { "epoch": 5.366187542778919, "grad_norm": 4.128481388092041, "learning_rate": 7.316438356164383e-07, "log_odds_chosen": 1.48552668094635, "log_odds_ratio": -0.39554882049560547, "logits/chosen": 0.8422651290893555, "logits/rejected": 0.8463667035102844, "logps/chosen": -2.680253505706787, "logps/rejected": -4.111935138702393, "loss": 0.9332, "nll_loss": 0.8936910033226013, "rewards/accuracies": 0.875, "rewards/chosen": -0.26802533864974976, "rewards/margins": 0.14316818118095398, "rewards/rejected": -0.4111935496330261, "step": 1960 }, { "epoch": 5.368925393566051, "grad_norm": 4.108766555786133, "learning_rate": 7.315068493150685e-07, "log_odds_chosen": 0.17874157428741455, "log_odds_ratio": -0.6973824501037598, "logits/chosen": 0.8728803992271423, "logits/rejected": 0.82215416431427, "logps/chosen": -2.4089155197143555, "logps/rejected": -2.550649881362915, "loss": 0.9074, "nll_loss": 0.8377050161361694, "rewards/accuracies": 0.75, "rewards/chosen": -0.24089154601097107, "rewards/margins": 0.014173446223139763, "rewards/rejected": -0.2550649642944336, "step": 1961 }, { "epoch": 5.371663244353183, "grad_norm": 3.328369617462158, "learning_rate": 7.313698630136986e-07, "log_odds_chosen": 1.7031139135360718, "log_odds_ratio": -0.2956671118736267, "logits/chosen": 1.0134897232055664, "logits/rejected": 1.0525964498519897, "logps/chosen": -1.627772331237793, "logps/rejected": -3.174893856048584, "loss": 0.8578, "nll_loss": 0.8282813429832458, "rewards/accuracies": 1.0, "rewards/chosen": -0.16277724504470825, "rewards/margins": 0.15471217036247253, "rewards/rejected": -0.3174893856048584, "step": 1962 }, { "epoch": 5.374401095140315, "grad_norm": 4.58620023727417, "learning_rate": 7.312328767123287e-07, "log_odds_chosen": 2.1356968879699707, "log_odds_ratio": -0.28359419107437134, "logits/chosen": 0.971128523349762, "logits/rejected": 1.0102379322052002, "logps/chosen": -2.1478679180145264, "logps/rejected": -4.20186710357666, "loss": 0.7883, "nll_loss": 0.7599697113037109, "rewards/accuracies": 1.0, "rewards/chosen": -0.21478679776191711, "rewards/margins": 0.20539993047714233, "rewards/rejected": -0.42018672823905945, "step": 1963 }, { "epoch": 5.377138945927447, "grad_norm": 4.47922420501709, "learning_rate": 7.310958904109589e-07, "log_odds_chosen": 0.8193879127502441, "log_odds_ratio": -0.469540536403656, "logits/chosen": 0.7387632131576538, "logits/rejected": 0.7833269834518433, "logps/chosen": -2.543787956237793, "logps/rejected": -3.3431291580200195, "loss": 0.9363, "nll_loss": 0.8893713355064392, "rewards/accuracies": 0.75, "rewards/chosen": -0.2543787956237793, "rewards/margins": 0.07993413507938385, "rewards/rejected": -0.33431291580200195, "step": 1964 }, { "epoch": 5.379876796714579, "grad_norm": 4.002213001251221, "learning_rate": 7.309589041095891e-07, "log_odds_chosen": 0.904922366142273, "log_odds_ratio": -0.41140782833099365, "logits/chosen": 0.843487560749054, "logits/rejected": 0.7567609548568726, "logps/chosen": -1.6107876300811768, "logps/rejected": -2.388197183609009, "loss": 0.9453, "nll_loss": 0.9042012691497803, "rewards/accuracies": 0.875, "rewards/chosen": -0.16107875108718872, "rewards/margins": 0.07774095982313156, "rewards/rejected": -0.23881971836090088, "step": 1965 }, { "epoch": 5.382614647501711, "grad_norm": 4.6947712898254395, "learning_rate": 7.308219178082191e-07, "log_odds_chosen": 2.2622156143188477, "log_odds_ratio": -0.2186967134475708, "logits/chosen": 0.8726854920387268, "logits/rejected": 0.9240456819534302, "logps/chosen": -2.5396928787231445, "logps/rejected": -4.709283828735352, "loss": 0.8497, "nll_loss": 0.8277918100357056, "rewards/accuracies": 1.0, "rewards/chosen": -0.25396928191185, "rewards/margins": 0.2169591188430786, "rewards/rejected": -0.4709284007549286, "step": 1966 }, { "epoch": 5.385352498288843, "grad_norm": 3.3313117027282715, "learning_rate": 7.306849315068493e-07, "log_odds_chosen": 1.7216969728469849, "log_odds_ratio": -0.21358007192611694, "logits/chosen": 0.831771194934845, "logits/rejected": 0.8255912661552429, "logps/chosen": -2.4895853996276855, "logps/rejected": -4.114887714385986, "loss": 0.9686, "nll_loss": 0.9472916126251221, "rewards/accuracies": 1.0, "rewards/chosen": -0.2489585280418396, "rewards/margins": 0.16253024339675903, "rewards/rejected": -0.41148877143859863, "step": 1967 }, { "epoch": 5.388090349075975, "grad_norm": 4.922396183013916, "learning_rate": 7.305479452054795e-07, "log_odds_chosen": 1.035223364830017, "log_odds_ratio": -0.7699719667434692, "logits/chosen": 0.8738719820976257, "logits/rejected": 0.8847923278808594, "logps/chosen": -3.0085415840148926, "logps/rejected": -3.979464054107666, "loss": 0.9912, "nll_loss": 0.9142130613327026, "rewards/accuracies": 0.75, "rewards/chosen": -0.3008541464805603, "rewards/margins": 0.09709222614765167, "rewards/rejected": -0.39794638752937317, "step": 1968 }, { "epoch": 5.390828199863107, "grad_norm": 3.7195844650268555, "learning_rate": 7.304109589041095e-07, "log_odds_chosen": 1.5438179969787598, "log_odds_ratio": -0.36129510402679443, "logits/chosen": 0.8986122608184814, "logits/rejected": 0.8862336874008179, "logps/chosen": -1.8264126777648926, "logps/rejected": -3.206317186355591, "loss": 0.8731, "nll_loss": 0.8369637131690979, "rewards/accuracies": 1.0, "rewards/chosen": -0.18264128267765045, "rewards/margins": 0.13799041509628296, "rewards/rejected": -0.3206317126750946, "step": 1969 }, { "epoch": 5.393566050650239, "grad_norm": 3.416684865951538, "learning_rate": 7.302739726027397e-07, "log_odds_chosen": 1.8109729290008545, "log_odds_ratio": -0.4269333481788635, "logits/chosen": 0.8487482666969299, "logits/rejected": 0.8642778396606445, "logps/chosen": -2.1782071590423584, "logps/rejected": -3.8004543781280518, "loss": 0.8726, "nll_loss": 0.8298673629760742, "rewards/accuracies": 0.875, "rewards/chosen": -0.21782073378562927, "rewards/margins": 0.16222470998764038, "rewards/rejected": -0.38004544377326965, "step": 1970 }, { "epoch": 5.3963039014373715, "grad_norm": 3.727766513824463, "learning_rate": 7.301369863013699e-07, "log_odds_chosen": 1.181082010269165, "log_odds_ratio": -0.33189305663108826, "logits/chosen": 1.0953243970870972, "logits/rejected": 1.1531654596328735, "logps/chosen": -2.6062159538269043, "logps/rejected": -3.705695629119873, "loss": 0.7639, "nll_loss": 0.7306626439094543, "rewards/accuracies": 0.875, "rewards/chosen": -0.2606216073036194, "rewards/margins": 0.10994794964790344, "rewards/rejected": -0.37056952714920044, "step": 1971 }, { "epoch": 5.3990417522245036, "grad_norm": 3.803447723388672, "learning_rate": 7.3e-07, "log_odds_chosen": 1.516359567642212, "log_odds_ratio": -0.3903282880783081, "logits/chosen": 0.8772855997085571, "logits/rejected": 0.9100658893585205, "logps/chosen": -2.2140514850616455, "logps/rejected": -3.60748291015625, "loss": 0.7546, "nll_loss": 0.7155630588531494, "rewards/accuracies": 0.875, "rewards/chosen": -0.22140514850616455, "rewards/margins": 0.13934314250946045, "rewards/rejected": -0.360748291015625, "step": 1972 }, { "epoch": 5.401779603011636, "grad_norm": 5.146708965301514, "learning_rate": 7.298630136986301e-07, "log_odds_chosen": 0.8850677609443665, "log_odds_ratio": -0.40995538234710693, "logits/chosen": 0.691143810749054, "logits/rejected": 0.7787596583366394, "logps/chosen": -2.2364754676818848, "logps/rejected": -3.012110710144043, "loss": 0.8245, "nll_loss": 0.7835418581962585, "rewards/accuracies": 1.0, "rewards/chosen": -0.2236475646495819, "rewards/margins": 0.07756350934505463, "rewards/rejected": -0.30121105909347534, "step": 1973 }, { "epoch": 5.404517453798768, "grad_norm": 6.426517486572266, "learning_rate": 7.297260273972602e-07, "log_odds_chosen": 0.6493837833404541, "log_odds_ratio": -0.5922800898551941, "logits/chosen": 0.7535052299499512, "logits/rejected": 0.8766577839851379, "logps/chosen": -2.294551372528076, "logps/rejected": -2.8921046257019043, "loss": 0.8376, "nll_loss": 0.7783489227294922, "rewards/accuracies": 0.875, "rewards/chosen": -0.2294551283121109, "rewards/margins": 0.05975531414151192, "rewards/rejected": -0.2892104387283325, "step": 1974 }, { "epoch": 5.4072553045859, "grad_norm": 4.184139728546143, "learning_rate": 7.295890410958904e-07, "log_odds_chosen": 1.4781261682510376, "log_odds_ratio": -0.47167474031448364, "logits/chosen": 0.7750766277313232, "logits/rejected": 0.8123874068260193, "logps/chosen": -2.7716784477233887, "logps/rejected": -4.197580337524414, "loss": 0.8757, "nll_loss": 0.8285353183746338, "rewards/accuracies": 0.875, "rewards/chosen": -0.2771678566932678, "rewards/margins": 0.14259016513824463, "rewards/rejected": -0.41975802183151245, "step": 1975 }, { "epoch": 5.409993155373032, "grad_norm": 5.642507553100586, "learning_rate": 7.294520547945205e-07, "log_odds_chosen": 1.2556407451629639, "log_odds_ratio": -0.46595776081085205, "logits/chosen": 0.6332361102104187, "logits/rejected": 0.6548097133636475, "logps/chosen": -2.240471601486206, "logps/rejected": -3.3956761360168457, "loss": 0.9148, "nll_loss": 0.8682478070259094, "rewards/accuracies": 0.75, "rewards/chosen": -0.22404715418815613, "rewards/margins": 0.11552046239376068, "rewards/rejected": -0.339567631483078, "step": 1976 }, { "epoch": 5.412731006160164, "grad_norm": 4.456853866577148, "learning_rate": 7.293150684931506e-07, "log_odds_chosen": 2.3266634941101074, "log_odds_ratio": -0.3144166171550751, "logits/chosen": 0.7925737500190735, "logits/rejected": 0.7034726142883301, "logps/chosen": -1.9139142036437988, "logps/rejected": -4.069923400878906, "loss": 0.9299, "nll_loss": 0.898462176322937, "rewards/accuracies": 0.875, "rewards/chosen": -0.19139140844345093, "rewards/margins": 0.2156008929014206, "rewards/rejected": -0.4069923162460327, "step": 1977 }, { "epoch": 5.415468856947296, "grad_norm": 3.247774362564087, "learning_rate": 7.291780821917808e-07, "log_odds_chosen": 1.4283350706100464, "log_odds_ratio": -0.29282742738723755, "logits/chosen": 0.820976734161377, "logits/rejected": 0.7806142568588257, "logps/chosen": -1.8917248249053955, "logps/rejected": -3.1791787147521973, "loss": 0.7668, "nll_loss": 0.7375140190124512, "rewards/accuracies": 1.0, "rewards/chosen": -0.18917247653007507, "rewards/margins": 0.12874539196491241, "rewards/rejected": -0.3179178833961487, "step": 1978 }, { "epoch": 5.418206707734429, "grad_norm": 3.265583038330078, "learning_rate": 7.29041095890411e-07, "log_odds_chosen": 1.9812583923339844, "log_odds_ratio": -0.2744806110858917, "logits/chosen": 0.8412241339683533, "logits/rejected": 0.8092355132102966, "logps/chosen": -2.3787550926208496, "logps/rejected": -4.279894828796387, "loss": 0.8931, "nll_loss": 0.8656386733055115, "rewards/accuracies": 1.0, "rewards/chosen": -0.23787552118301392, "rewards/margins": 0.19011399149894714, "rewards/rejected": -0.42798948287963867, "step": 1979 }, { "epoch": 5.420944558521561, "grad_norm": 3.593754768371582, "learning_rate": 7.28904109589041e-07, "log_odds_chosen": 1.0085712671279907, "log_odds_ratio": -0.39118146896362305, "logits/chosen": 0.8852371573448181, "logits/rejected": 0.8862639665603638, "logps/chosen": -2.0810961723327637, "logps/rejected": -2.9982261657714844, "loss": 0.9267, "nll_loss": 0.8876067399978638, "rewards/accuracies": 0.875, "rewards/chosen": -0.20810961723327637, "rewards/margins": 0.09171301126480103, "rewards/rejected": -0.2998226284980774, "step": 1980 }, { "epoch": 5.423682409308693, "grad_norm": 3.814504384994507, "learning_rate": 7.287671232876712e-07, "log_odds_chosen": 1.320548176765442, "log_odds_ratio": -0.30213361978530884, "logits/chosen": 1.0985671281814575, "logits/rejected": 1.0788578987121582, "logps/chosen": -1.9807026386260986, "logps/rejected": -3.1863088607788086, "loss": 0.7623, "nll_loss": 0.73212730884552, "rewards/accuracies": 0.875, "rewards/chosen": -0.19807025790214539, "rewards/margins": 0.12056063115596771, "rewards/rejected": -0.3186309039592743, "step": 1981 }, { "epoch": 5.426420260095825, "grad_norm": 4.285877704620361, "learning_rate": 7.286301369863014e-07, "log_odds_chosen": 0.38987767696380615, "log_odds_ratio": -0.5332584381103516, "logits/chosen": 1.0929534435272217, "logits/rejected": 1.09983229637146, "logps/chosen": -2.4460904598236084, "logps/rejected": -2.7901368141174316, "loss": 0.8483, "nll_loss": 0.7950136661529541, "rewards/accuracies": 1.0, "rewards/chosen": -0.2446090579032898, "rewards/margins": 0.03440462425351143, "rewards/rejected": -0.27901366353034973, "step": 1982 }, { "epoch": 5.429158110882957, "grad_norm": 4.917789936065674, "learning_rate": 7.284931506849314e-07, "log_odds_chosen": 1.9812148809432983, "log_odds_ratio": -0.28170302510261536, "logits/chosen": 0.8517860770225525, "logits/rejected": 0.8982910513877869, "logps/chosen": -2.667588949203491, "logps/rejected": -4.584942817687988, "loss": 0.8429, "nll_loss": 0.8146879076957703, "rewards/accuracies": 0.875, "rewards/chosen": -0.26675888895988464, "rewards/margins": 0.1917353868484497, "rewards/rejected": -0.45849427580833435, "step": 1983 }, { "epoch": 5.431895961670089, "grad_norm": 3.9790616035461426, "learning_rate": 7.283561643835616e-07, "log_odds_chosen": 0.8698751330375671, "log_odds_ratio": -0.5066758394241333, "logits/chosen": 0.8447867631912231, "logits/rejected": 0.8534792065620422, "logps/chosen": -2.4747190475463867, "logps/rejected": -3.2710866928100586, "loss": 0.8698, "nll_loss": 0.8191306591033936, "rewards/accuracies": 0.75, "rewards/chosen": -0.2474718987941742, "rewards/margins": 0.07963676750659943, "rewards/rejected": -0.3271086513996124, "step": 1984 }, { "epoch": 5.434633812457221, "grad_norm": 4.482095241546631, "learning_rate": 7.282191780821918e-07, "log_odds_chosen": 1.8245006799697876, "log_odds_ratio": -0.4384240210056305, "logits/chosen": 1.091034173965454, "logits/rejected": 1.0286152362823486, "logps/chosen": -2.377715587615967, "logps/rejected": -4.115970611572266, "loss": 0.8857, "nll_loss": 0.8418682813644409, "rewards/accuracies": 0.875, "rewards/chosen": -0.23777157068252563, "rewards/margins": 0.1738254725933075, "rewards/rejected": -0.4115970730781555, "step": 1985 }, { "epoch": 5.437371663244353, "grad_norm": 3.8745291233062744, "learning_rate": 7.280821917808219e-07, "log_odds_chosen": 1.0737863779067993, "log_odds_ratio": -0.552229106426239, "logits/chosen": 0.8680609464645386, "logits/rejected": 0.9199395179748535, "logps/chosen": -1.7816529273986816, "logps/rejected": -2.7764787673950195, "loss": 0.9533, "nll_loss": 0.8980703353881836, "rewards/accuracies": 0.625, "rewards/chosen": -0.1781652867794037, "rewards/margins": 0.09948259592056274, "rewards/rejected": -0.27764788269996643, "step": 1986 }, { "epoch": 5.4401095140314855, "grad_norm": 4.903519153594971, "learning_rate": 7.27945205479452e-07, "log_odds_chosen": 1.5418111085891724, "log_odds_ratio": -0.3450123965740204, "logits/chosen": 0.6835159063339233, "logits/rejected": 0.6846967935562134, "logps/chosen": -2.3575072288513184, "logps/rejected": -3.8108701705932617, "loss": 0.8371, "nll_loss": 0.8026149272918701, "rewards/accuracies": 0.75, "rewards/chosen": -0.2357507050037384, "rewards/margins": 0.14533631503582, "rewards/rejected": -0.381087064743042, "step": 1987 }, { "epoch": 5.4428473648186175, "grad_norm": 4.523565769195557, "learning_rate": 7.278082191780821e-07, "log_odds_chosen": -0.08189764618873596, "log_odds_ratio": -0.8530451059341431, "logits/chosen": 0.7935298085212708, "logits/rejected": 0.8035486936569214, "logps/chosen": -3.2122745513916016, "logps/rejected": -3.1272783279418945, "loss": 0.9968, "nll_loss": 0.9114484786987305, "rewards/accuracies": 0.5, "rewards/chosen": -0.32122746109962463, "rewards/margins": -0.008499637246131897, "rewards/rejected": -0.31272780895233154, "step": 1988 }, { "epoch": 5.44558521560575, "grad_norm": 5.989091396331787, "learning_rate": 7.276712328767123e-07, "log_odds_chosen": 1.1692979335784912, "log_odds_ratio": -0.48956698179244995, "logits/chosen": 0.9160494804382324, "logits/rejected": 1.0530545711517334, "logps/chosen": -2.944396495819092, "logps/rejected": -4.076348304748535, "loss": 0.917, "nll_loss": 0.8680720329284668, "rewards/accuracies": 0.75, "rewards/chosen": -0.2944396436214447, "rewards/margins": 0.11319515109062195, "rewards/rejected": -0.40763479471206665, "step": 1989 }, { "epoch": 5.448323066392882, "grad_norm": 4.528019905090332, "learning_rate": 7.275342465753424e-07, "log_odds_chosen": 1.387356162071228, "log_odds_ratio": -0.4353170096874237, "logits/chosen": 1.049917459487915, "logits/rejected": 1.086834192276001, "logps/chosen": -2.9402265548706055, "logps/rejected": -4.263506889343262, "loss": 0.8601, "nll_loss": 0.8165438771247864, "rewards/accuracies": 0.75, "rewards/chosen": -0.29402267932891846, "rewards/margins": 0.13232803344726562, "rewards/rejected": -0.4263507127761841, "step": 1990 }, { "epoch": 5.451060917180014, "grad_norm": 3.369259834289551, "learning_rate": 7.273972602739725e-07, "log_odds_chosen": 0.9154000282287598, "log_odds_ratio": -0.53831547498703, "logits/chosen": 0.7929591536521912, "logits/rejected": 0.8588111400604248, "logps/chosen": -2.2581048011779785, "logps/rejected": -3.093684673309326, "loss": 0.8689, "nll_loss": 0.8150923252105713, "rewards/accuracies": 0.625, "rewards/chosen": -0.2258104681968689, "rewards/margins": 0.08355799317359924, "rewards/rejected": -0.30936846137046814, "step": 1991 }, { "epoch": 5.453798767967146, "grad_norm": 3.6764252185821533, "learning_rate": 7.272602739726027e-07, "log_odds_chosen": 1.0494370460510254, "log_odds_ratio": -0.3366202712059021, "logits/chosen": 0.8040944933891296, "logits/rejected": 0.7918599843978882, "logps/chosen": -2.1436102390289307, "logps/rejected": -3.104220390319824, "loss": 0.903, "nll_loss": 0.8693154454231262, "rewards/accuracies": 1.0, "rewards/chosen": -0.2143610417842865, "rewards/margins": 0.09606100618839264, "rewards/rejected": -0.31042206287384033, "step": 1992 }, { "epoch": 5.456536618754278, "grad_norm": 3.2432477474212646, "learning_rate": 7.271232876712329e-07, "log_odds_chosen": 1.6873552799224854, "log_odds_ratio": -0.3710993230342865, "logits/chosen": 0.7253316640853882, "logits/rejected": 0.6850574612617493, "logps/chosen": -2.156888961791992, "logps/rejected": -3.5803768634796143, "loss": 0.941, "nll_loss": 0.9039220809936523, "rewards/accuracies": 0.75, "rewards/chosen": -0.21568891406059265, "rewards/margins": 0.14234879612922668, "rewards/rejected": -0.35803771018981934, "step": 1993 }, { "epoch": 5.45927446954141, "grad_norm": 3.9203732013702393, "learning_rate": 7.269863013698629e-07, "log_odds_chosen": 0.7363469004631042, "log_odds_ratio": -0.7187460660934448, "logits/chosen": 0.6513941287994385, "logits/rejected": 0.7498157620429993, "logps/chosen": -2.2392940521240234, "logps/rejected": -2.8742730617523193, "loss": 0.9005, "nll_loss": 0.8285996317863464, "rewards/accuracies": 0.625, "rewards/chosen": -0.22392942011356354, "rewards/margins": 0.06349789351224899, "rewards/rejected": -0.28742730617523193, "step": 1994 }, { "epoch": 5.462012320328542, "grad_norm": 4.158783435821533, "learning_rate": 7.268493150684931e-07, "log_odds_chosen": 2.0457801818847656, "log_odds_ratio": -0.25547680258750916, "logits/chosen": 1.0715477466583252, "logits/rejected": 1.1496264934539795, "logps/chosen": -2.229701280593872, "logps/rejected": -4.176133155822754, "loss": 0.7648, "nll_loss": 0.7392302751541138, "rewards/accuracies": 1.0, "rewards/chosen": -0.2229701280593872, "rewards/margins": 0.19464319944381714, "rewards/rejected": -0.41761335730552673, "step": 1995 }, { "epoch": 5.464750171115674, "grad_norm": 5.105866432189941, "learning_rate": 7.267123287671233e-07, "log_odds_chosen": 0.7697292566299438, "log_odds_ratio": -0.7533566951751709, "logits/chosen": 0.7695790529251099, "logits/rejected": 0.7468198537826538, "logps/chosen": -3.45284366607666, "logps/rejected": -4.156911849975586, "loss": 0.9214, "nll_loss": 0.8460733294487, "rewards/accuracies": 0.5, "rewards/chosen": -0.3452843725681305, "rewards/margins": 0.07040679454803467, "rewards/rejected": -0.41569119691848755, "step": 1996 }, { "epoch": 5.467488021902806, "grad_norm": 4.53104829788208, "learning_rate": 7.265753424657533e-07, "log_odds_chosen": 0.7789834141731262, "log_odds_ratio": -0.5139619708061218, "logits/chosen": 0.708529531955719, "logits/rejected": 0.660531759262085, "logps/chosen": -2.394108295440674, "logps/rejected": -3.104071617126465, "loss": 0.898, "nll_loss": 0.8465714454650879, "rewards/accuracies": 0.875, "rewards/chosen": -0.23941081762313843, "rewards/margins": 0.07099632173776627, "rewards/rejected": -0.3104071617126465, "step": 1997 }, { "epoch": 5.470225872689938, "grad_norm": 3.616053342819214, "learning_rate": 7.264383561643835e-07, "log_odds_chosen": 2.423288106918335, "log_odds_ratio": -0.407281756401062, "logits/chosen": 0.8380069732666016, "logits/rejected": 0.8246853947639465, "logps/chosen": -2.2699358463287354, "logps/rejected": -4.5347676277160645, "loss": 0.9039, "nll_loss": 0.8631269931793213, "rewards/accuracies": 0.75, "rewards/chosen": -0.226993590593338, "rewards/margins": 0.22648321092128754, "rewards/rejected": -0.45347678661346436, "step": 1998 }, { "epoch": 5.47296372347707, "grad_norm": 4.228430271148682, "learning_rate": 7.263013698630137e-07, "log_odds_chosen": 1.1545393466949463, "log_odds_ratio": -0.5405477285385132, "logits/chosen": 0.7652679085731506, "logits/rejected": 0.7611689567565918, "logps/chosen": -2.6828866004943848, "logps/rejected": -3.8072311878204346, "loss": 1.0329, "nll_loss": 0.9788244962692261, "rewards/accuracies": 0.625, "rewards/chosen": -0.26828864216804504, "rewards/margins": 0.1124344915151596, "rewards/rejected": -0.38072311878204346, "step": 1999 }, { "epoch": 5.475701574264202, "grad_norm": 3.373894453048706, "learning_rate": 7.261643835616437e-07, "log_odds_chosen": 1.4218592643737793, "log_odds_ratio": -0.38830357789993286, "logits/chosen": 0.6812217831611633, "logits/rejected": 0.5642558336257935, "logps/chosen": -2.2390365600585938, "logps/rejected": -3.5744993686676025, "loss": 0.9063, "nll_loss": 0.8674629926681519, "rewards/accuracies": 0.75, "rewards/chosen": -0.22390364110469818, "rewards/margins": 0.13354630768299103, "rewards/rejected": -0.3574499487876892, "step": 2000 }, { "epoch": 5.478439425051334, "grad_norm": 3.956251859664917, "learning_rate": 7.260273972602739e-07, "log_odds_chosen": 1.263603687286377, "log_odds_ratio": -0.5026751756668091, "logits/chosen": 0.6792773008346558, "logits/rejected": 0.6432930827140808, "logps/chosen": -1.9204316139221191, "logps/rejected": -3.0278003215789795, "loss": 0.8982, "nll_loss": 0.8479254245758057, "rewards/accuracies": 0.625, "rewards/chosen": -0.19204315543174744, "rewards/margins": 0.11073688417673111, "rewards/rejected": -0.30278003215789795, "step": 2001 }, { "epoch": 5.4811772758384665, "grad_norm": 3.7202248573303223, "learning_rate": 7.258904109589041e-07, "log_odds_chosen": 1.0315334796905518, "log_odds_ratio": -0.35340604186058044, "logits/chosen": 0.9293379783630371, "logits/rejected": 0.9382570385932922, "logps/chosen": -1.970313549041748, "logps/rejected": -2.9131438732147217, "loss": 0.8453, "nll_loss": 0.8099292516708374, "rewards/accuracies": 1.0, "rewards/chosen": -0.19703136384487152, "rewards/margins": 0.09428304433822632, "rewards/rejected": -0.29131439328193665, "step": 2002 }, { "epoch": 5.4839151266255985, "grad_norm": 3.580043077468872, "learning_rate": 7.257534246575342e-07, "log_odds_chosen": 2.2178118228912354, "log_odds_ratio": -0.29867228865623474, "logits/chosen": 0.7657875418663025, "logits/rejected": 0.7703835964202881, "logps/chosen": -2.6567277908325195, "logps/rejected": -4.806759834289551, "loss": 0.9242, "nll_loss": 0.8943280577659607, "rewards/accuracies": 0.875, "rewards/chosen": -0.2656727731227875, "rewards/margins": 0.21500322222709656, "rewards/rejected": -0.48067599534988403, "step": 2003 }, { "epoch": 5.486652977412731, "grad_norm": 3.2525405883789062, "learning_rate": 7.256164383561643e-07, "log_odds_chosen": 1.7651987075805664, "log_odds_ratio": -0.3883075714111328, "logits/chosen": 0.9297957420349121, "logits/rejected": 0.9122457504272461, "logps/chosen": -1.9325231313705444, "logps/rejected": -3.615917921066284, "loss": 0.947, "nll_loss": 0.9081653952598572, "rewards/accuracies": 0.875, "rewards/chosen": -0.1932523250579834, "rewards/margins": 0.16833946108818054, "rewards/rejected": -0.36159178614616394, "step": 2004 }, { "epoch": 5.489390828199863, "grad_norm": 3.0384669303894043, "learning_rate": 7.254794520547944e-07, "log_odds_chosen": 1.204024076461792, "log_odds_ratio": -0.3368600606918335, "logits/chosen": 0.7519625425338745, "logits/rejected": 0.7921997308731079, "logps/chosen": -2.0360822677612305, "logps/rejected": -3.1131350994110107, "loss": 0.8271, "nll_loss": 0.7933802008628845, "rewards/accuracies": 0.875, "rewards/chosen": -0.2036082148551941, "rewards/margins": 0.10770530998706818, "rewards/rejected": -0.3113135099411011, "step": 2005 }, { "epoch": 5.492128678986996, "grad_norm": 3.2651195526123047, "learning_rate": 7.253424657534246e-07, "log_odds_chosen": 1.7205482721328735, "log_odds_ratio": -0.3632901608943939, "logits/chosen": 0.7037770748138428, "logits/rejected": 0.699791431427002, "logps/chosen": -2.547241449356079, "logps/rejected": -4.229363441467285, "loss": 0.8453, "nll_loss": 0.8089709877967834, "rewards/accuracies": 0.875, "rewards/chosen": -0.2547241449356079, "rewards/margins": 0.16821222007274628, "rewards/rejected": -0.4229363799095154, "step": 2006 }, { "epoch": 5.494866529774128, "grad_norm": 3.5067994594573975, "learning_rate": 7.252054794520547e-07, "log_odds_chosen": 1.3357844352722168, "log_odds_ratio": -0.3378237187862396, "logits/chosen": 0.9736776351928711, "logits/rejected": 1.0323116779327393, "logps/chosen": -2.0344061851501465, "logps/rejected": -3.276669979095459, "loss": 0.8647, "nll_loss": 0.8309651613235474, "rewards/accuracies": 0.875, "rewards/chosen": -0.2034406065940857, "rewards/margins": 0.1242263987660408, "rewards/rejected": -0.3276669979095459, "step": 2007 }, { "epoch": 5.49760438056126, "grad_norm": 4.081090927124023, "learning_rate": 7.250684931506848e-07, "log_odds_chosen": 1.3722705841064453, "log_odds_ratio": -0.30917662382125854, "logits/chosen": 0.5706096887588501, "logits/rejected": 0.5237744450569153, "logps/chosen": -1.879217267036438, "logps/rejected": -3.1310112476348877, "loss": 0.8797, "nll_loss": 0.848755955696106, "rewards/accuracies": 0.875, "rewards/chosen": -0.18792173266410828, "rewards/margins": 0.12517940998077393, "rewards/rejected": -0.3131011426448822, "step": 2008 }, { "epoch": 5.500342231348392, "grad_norm": 5.058403491973877, "learning_rate": 7.24931506849315e-07, "log_odds_chosen": 1.034052848815918, "log_odds_ratio": -0.5435482859611511, "logits/chosen": 0.6899828314781189, "logits/rejected": 0.7825954556465149, "logps/chosen": -3.0790090560913086, "logps/rejected": -4.076033592224121, "loss": 0.9198, "nll_loss": 0.8654283285140991, "rewards/accuracies": 0.75, "rewards/chosen": -0.30790090560913086, "rewards/margins": 0.09970250725746155, "rewards/rejected": -0.4076034128665924, "step": 2009 }, { "epoch": 5.503080082135524, "grad_norm": 3.417834997177124, "learning_rate": 7.247945205479452e-07, "log_odds_chosen": 0.641048789024353, "log_odds_ratio": -0.5526214241981506, "logits/chosen": 0.6887670755386353, "logits/rejected": 0.6879114508628845, "logps/chosen": -2.6132655143737793, "logps/rejected": -3.223613739013672, "loss": 0.886, "nll_loss": 0.8307750225067139, "rewards/accuracies": 0.625, "rewards/chosen": -0.26132655143737793, "rewards/margins": 0.061034828424453735, "rewards/rejected": -0.32236137986183167, "step": 2010 }, { "epoch": 5.505817932922656, "grad_norm": 3.523390769958496, "learning_rate": 7.246575342465752e-07, "log_odds_chosen": 1.8389177322387695, "log_odds_ratio": -0.26726990938186646, "logits/chosen": 0.7424953579902649, "logits/rejected": 0.7233259081840515, "logps/chosen": -2.3482909202575684, "logps/rejected": -4.090304374694824, "loss": 0.8846, "nll_loss": 0.8578609824180603, "rewards/accuracies": 0.875, "rewards/chosen": -0.2348290979862213, "rewards/margins": 0.1742013394832611, "rewards/rejected": -0.4090304374694824, "step": 2011 }, { "epoch": 5.508555783709788, "grad_norm": 3.691119909286499, "learning_rate": 7.245205479452054e-07, "log_odds_chosen": 2.4005119800567627, "log_odds_ratio": -0.24003227055072784, "logits/chosen": 0.8082783818244934, "logits/rejected": 0.7878011465072632, "logps/chosen": -2.3926339149475098, "logps/rejected": -4.716419696807861, "loss": 0.932, "nll_loss": 0.908024787902832, "rewards/accuracies": 1.0, "rewards/chosen": -0.2392634004354477, "rewards/margins": 0.23237860202789307, "rewards/rejected": -0.47164198756217957, "step": 2012 }, { "epoch": 5.51129363449692, "grad_norm": 5.14789342880249, "learning_rate": 7.243835616438357e-07, "log_odds_chosen": 0.9759700298309326, "log_odds_ratio": -0.5572986602783203, "logits/chosen": 1.1521612405776978, "logits/rejected": 1.1252272129058838, "logps/chosen": -2.6150007247924805, "logps/rejected": -3.509200096130371, "loss": 0.7923, "nll_loss": 0.7365236282348633, "rewards/accuracies": 0.625, "rewards/chosen": -0.2615000605583191, "rewards/margins": 0.08941994607448578, "rewards/rejected": -0.35092002153396606, "step": 2013 }, { "epoch": 5.514031485284052, "grad_norm": 3.4974122047424316, "learning_rate": 7.242465753424656e-07, "log_odds_chosen": 2.424407482147217, "log_odds_ratio": -0.24395816028118134, "logits/chosen": 0.7429052591323853, "logits/rejected": 0.7341365218162537, "logps/chosen": -2.4141979217529297, "logps/rejected": -4.727485656738281, "loss": 0.8004, "nll_loss": 0.7760286331176758, "rewards/accuracies": 0.75, "rewards/chosen": -0.24141979217529297, "rewards/margins": 0.23132875561714172, "rewards/rejected": -0.4727485477924347, "step": 2014 }, { "epoch": 5.516769336071184, "grad_norm": 3.832042932510376, "learning_rate": 7.241095890410958e-07, "log_odds_chosen": 0.7114672660827637, "log_odds_ratio": -0.4694916903972626, "logits/chosen": 0.6057313680648804, "logits/rejected": 0.5498821139335632, "logps/chosen": -2.616107940673828, "logps/rejected": -3.2761244773864746, "loss": 0.9306, "nll_loss": 0.8836460113525391, "rewards/accuracies": 0.75, "rewards/chosen": -0.26161080598831177, "rewards/margins": 0.06600164622068405, "rewards/rejected": -0.3276124596595764, "step": 2015 }, { "epoch": 5.519507186858316, "grad_norm": 6.361324787139893, "learning_rate": 7.239726027397261e-07, "log_odds_chosen": 0.22544047236442566, "log_odds_ratio": -0.9227298498153687, "logits/chosen": 1.0163981914520264, "logits/rejected": 0.9494590759277344, "logps/chosen": -3.360891819000244, "logps/rejected": -3.5322165489196777, "loss": 0.9471, "nll_loss": 0.8548072576522827, "rewards/accuracies": 0.75, "rewards/chosen": -0.33608919382095337, "rewards/margins": 0.017132479697465897, "rewards/rejected": -0.3532216548919678, "step": 2016 }, { "epoch": 5.522245037645448, "grad_norm": 5.946074485778809, "learning_rate": 7.238356164383562e-07, "log_odds_chosen": 0.9514237642288208, "log_odds_ratio": -0.47377365827560425, "logits/chosen": 0.984721302986145, "logits/rejected": 1.0084943771362305, "logps/chosen": -2.987217426300049, "logps/rejected": -3.887722969055176, "loss": 0.8263, "nll_loss": 0.7789686918258667, "rewards/accuracies": 0.75, "rewards/chosen": -0.2987217307090759, "rewards/margins": 0.09005053341388702, "rewards/rejected": -0.38877230882644653, "step": 2017 }, { "epoch": 5.5249828884325805, "grad_norm": 3.7505626678466797, "learning_rate": 7.236986301369863e-07, "log_odds_chosen": 0.8753809928894043, "log_odds_ratio": -0.46496838331222534, "logits/chosen": 0.6650827527046204, "logits/rejected": 0.6018122434616089, "logps/chosen": -2.4707441329956055, "logps/rejected": -3.2504124641418457, "loss": 0.9297, "nll_loss": 0.8832165002822876, "rewards/accuracies": 0.75, "rewards/chosen": -0.2470744103193283, "rewards/margins": 0.07796682417392731, "rewards/rejected": -0.325041264295578, "step": 2018 }, { "epoch": 5.5277207392197125, "grad_norm": 3.1633780002593994, "learning_rate": 7.235616438356164e-07, "log_odds_chosen": 1.5041993856430054, "log_odds_ratio": -0.24705857038497925, "logits/chosen": 0.7946091890335083, "logits/rejected": 0.8062843084335327, "logps/chosen": -1.8486738204956055, "logps/rejected": -3.1613144874572754, "loss": 0.8345, "nll_loss": 0.8097890019416809, "rewards/accuracies": 1.0, "rewards/chosen": -0.18486739695072174, "rewards/margins": 0.1312640905380249, "rewards/rejected": -0.31613147258758545, "step": 2019 }, { "epoch": 5.530458590006845, "grad_norm": 4.60344123840332, "learning_rate": 7.234246575342466e-07, "log_odds_chosen": 1.8211894035339355, "log_odds_ratio": -0.34902286529541016, "logits/chosen": 0.7983927130699158, "logits/rejected": 0.7093175649642944, "logps/chosen": -2.430070161819458, "logps/rejected": -4.145914077758789, "loss": 0.8627, "nll_loss": 0.8278214335441589, "rewards/accuracies": 0.875, "rewards/chosen": -0.24300701916217804, "rewards/margins": 0.17158439755439758, "rewards/rejected": -0.41459140181541443, "step": 2020 }, { "epoch": 5.533196440793977, "grad_norm": 4.05585241317749, "learning_rate": 7.232876712328767e-07, "log_odds_chosen": 1.8150662183761597, "log_odds_ratio": -0.23163443803787231, "logits/chosen": 0.83763587474823, "logits/rejected": 0.8834158778190613, "logps/chosen": -2.200413465499878, "logps/rejected": -3.857471227645874, "loss": 0.7986, "nll_loss": 0.7754233479499817, "rewards/accuracies": 1.0, "rewards/chosen": -0.22004136443138123, "rewards/margins": 0.16570578515529633, "rewards/rejected": -0.38574713468551636, "step": 2021 }, { "epoch": 5.535934291581109, "grad_norm": 4.006087303161621, "learning_rate": 7.231506849315068e-07, "log_odds_chosen": 0.8656949996948242, "log_odds_ratio": -0.5151602029800415, "logits/chosen": 0.8247336149215698, "logits/rejected": 0.8042895197868347, "logps/chosen": -2.6570982933044434, "logps/rejected": -3.4750449657440186, "loss": 0.9467, "nll_loss": 0.8951912522315979, "rewards/accuracies": 0.75, "rewards/chosen": -0.2657098174095154, "rewards/margins": 0.08179469406604767, "rewards/rejected": -0.34750449657440186, "step": 2022 }, { "epoch": 5.538672142368241, "grad_norm": 3.55326509475708, "learning_rate": 7.23013698630137e-07, "log_odds_chosen": 1.6979299783706665, "log_odds_ratio": -0.3700631260871887, "logits/chosen": 1.0136889219284058, "logits/rejected": 1.043592095375061, "logps/chosen": -2.5854005813598633, "logps/rejected": -4.210168838500977, "loss": 0.7752, "nll_loss": 0.7382305860519409, "rewards/accuracies": 0.75, "rewards/chosen": -0.2585400640964508, "rewards/margins": 0.1624768078327179, "rewards/rejected": -0.4210168719291687, "step": 2023 }, { "epoch": 5.541409993155373, "grad_norm": 3.683107852935791, "learning_rate": 7.228767123287672e-07, "log_odds_chosen": 1.2670788764953613, "log_odds_ratio": -0.3664230704307556, "logits/chosen": 0.6991403698921204, "logits/rejected": 0.6523791551589966, "logps/chosen": -2.03682804107666, "logps/rejected": -3.2193658351898193, "loss": 0.86, "nll_loss": 0.8233248591423035, "rewards/accuracies": 1.0, "rewards/chosen": -0.2036828100681305, "rewards/margins": 0.11825381219387054, "rewards/rejected": -0.32193660736083984, "step": 2024 }, { "epoch": 5.544147843942505, "grad_norm": 3.555729866027832, "learning_rate": 7.227397260273972e-07, "log_odds_chosen": 1.2953933477401733, "log_odds_ratio": -0.3716903328895569, "logits/chosen": 0.7730134725570679, "logits/rejected": 0.7906211614608765, "logps/chosen": -1.8961780071258545, "logps/rejected": -3.1049551963806152, "loss": 0.8448, "nll_loss": 0.8076412081718445, "rewards/accuracies": 0.875, "rewards/chosen": -0.1896178126335144, "rewards/margins": 0.12087773531675339, "rewards/rejected": -0.3104955554008484, "step": 2025 }, { "epoch": 5.546885694729637, "grad_norm": 3.897001266479492, "learning_rate": 7.226027397260274e-07, "log_odds_chosen": 1.3230892419815063, "log_odds_ratio": -0.29111963510513306, "logits/chosen": 0.9323704242706299, "logits/rejected": 0.881020188331604, "logps/chosen": -1.5667349100112915, "logps/rejected": -2.7130253314971924, "loss": 0.8134, "nll_loss": 0.784273624420166, "rewards/accuracies": 1.0, "rewards/chosen": -0.15667349100112915, "rewards/margins": 0.11462903022766113, "rewards/rejected": -0.2713025212287903, "step": 2026 }, { "epoch": 5.549623545516769, "grad_norm": 3.911998987197876, "learning_rate": 7.224657534246576e-07, "log_odds_chosen": 1.1048855781555176, "log_odds_ratio": -0.4072086215019226, "logits/chosen": 0.8795908093452454, "logits/rejected": 0.8843063116073608, "logps/chosen": -2.7882823944091797, "logps/rejected": -3.8130831718444824, "loss": 0.8345, "nll_loss": 0.7937788963317871, "rewards/accuracies": 0.75, "rewards/chosen": -0.2788282632827759, "rewards/margins": 0.10248006880283356, "rewards/rejected": -0.38130834698677063, "step": 2027 }, { "epoch": 5.552361396303901, "grad_norm": 3.1501305103302, "learning_rate": 7.223287671232876e-07, "log_odds_chosen": 1.3693132400512695, "log_odds_ratio": -0.3306681513786316, "logits/chosen": 0.9496340155601501, "logits/rejected": 0.9079152345657349, "logps/chosen": -2.10958194732666, "logps/rejected": -3.4136133193969727, "loss": 0.8387, "nll_loss": 0.8056694269180298, "rewards/accuracies": 0.875, "rewards/chosen": -0.21095822751522064, "rewards/margins": 0.13040313124656677, "rewards/rejected": -0.3413613736629486, "step": 2028 }, { "epoch": 5.555099247091033, "grad_norm": 3.576582670211792, "learning_rate": 7.221917808219178e-07, "log_odds_chosen": 0.7397797107696533, "log_odds_ratio": -0.430412232875824, "logits/chosen": 0.6340981721878052, "logits/rejected": 0.5955557823181152, "logps/chosen": -2.3740830421447754, "logps/rejected": -3.0678858757019043, "loss": 0.8851, "nll_loss": 0.8420203924179077, "rewards/accuracies": 1.0, "rewards/chosen": -0.2374083250761032, "rewards/margins": 0.06938029080629349, "rewards/rejected": -0.3067885935306549, "step": 2029 }, { "epoch": 5.557837097878165, "grad_norm": 4.82352876663208, "learning_rate": 7.22054794520548e-07, "log_odds_chosen": 1.0633348226547241, "log_odds_ratio": -0.4357221722602844, "logits/chosen": 0.6223222613334656, "logits/rejected": 0.6552345752716064, "logps/chosen": -2.3852310180664062, "logps/rejected": -3.3680853843688965, "loss": 0.8969, "nll_loss": 0.8533481359481812, "rewards/accuracies": 0.75, "rewards/chosen": -0.23852309584617615, "rewards/margins": 0.09828545153141022, "rewards/rejected": -0.33680856227874756, "step": 2030 }, { "epoch": 5.560574948665298, "grad_norm": 4.251950740814209, "learning_rate": 7.219178082191781e-07, "log_odds_chosen": 2.064845561981201, "log_odds_ratio": -0.20522359013557434, "logits/chosen": 0.7325209379196167, "logits/rejected": 0.6920591592788696, "logps/chosen": -2.36801815032959, "logps/rejected": -4.311601161956787, "loss": 0.9086, "nll_loss": 0.8880653977394104, "rewards/accuracies": 1.0, "rewards/chosen": -0.23680183291435242, "rewards/margins": 0.19435828924179077, "rewards/rejected": -0.4311601221561432, "step": 2031 }, { "epoch": 5.563312799452429, "grad_norm": 3.6439647674560547, "learning_rate": 7.217808219178082e-07, "log_odds_chosen": 0.5553226470947266, "log_odds_ratio": -0.5997999310493469, "logits/chosen": 0.6635274887084961, "logits/rejected": 0.6333750486373901, "logps/chosen": -1.890385627746582, "logps/rejected": -2.408473491668701, "loss": 0.8965, "nll_loss": 0.8364831209182739, "rewards/accuracies": 0.75, "rewards/chosen": -0.18903854489326477, "rewards/margins": 0.05180878937244415, "rewards/rejected": -0.24084734916687012, "step": 2032 }, { "epoch": 5.566050650239562, "grad_norm": 4.1470842361450195, "learning_rate": 7.216438356164384e-07, "log_odds_chosen": 1.233961820602417, "log_odds_ratio": -0.37132060527801514, "logits/chosen": 0.7789977192878723, "logits/rejected": 0.6523470878601074, "logps/chosen": -2.4602150917053223, "logps/rejected": -3.606081008911133, "loss": 0.9058, "nll_loss": 0.8686356544494629, "rewards/accuracies": 0.875, "rewards/chosen": -0.24602152407169342, "rewards/margins": 0.11458659917116165, "rewards/rejected": -0.36060813069343567, "step": 2033 }, { "epoch": 5.568788501026694, "grad_norm": 5.458630561828613, "learning_rate": 7.215068493150685e-07, "log_odds_chosen": 0.8005872964859009, "log_odds_ratio": -0.5000375509262085, "logits/chosen": 0.7894062399864197, "logits/rejected": 0.7571960091590881, "logps/chosen": -2.633314371109009, "logps/rejected": -3.3679981231689453, "loss": 0.8523, "nll_loss": 0.8022733330726624, "rewards/accuracies": 0.875, "rewards/chosen": -0.26333141326904297, "rewards/margins": 0.07346837222576141, "rewards/rejected": -0.3367998003959656, "step": 2034 }, { "epoch": 5.5715263518138265, "grad_norm": 5.2356438636779785, "learning_rate": 7.213698630136986e-07, "log_odds_chosen": 0.8965104818344116, "log_odds_ratio": -0.5377894043922424, "logits/chosen": 0.8874044418334961, "logits/rejected": 0.8853083848953247, "logps/chosen": -2.8540728092193604, "logps/rejected": -3.662653923034668, "loss": 0.8846, "nll_loss": 0.8308291435241699, "rewards/accuracies": 0.75, "rewards/chosen": -0.28540727496147156, "rewards/margins": 0.08085814118385315, "rewards/rejected": -0.3662654161453247, "step": 2035 }, { "epoch": 5.574264202600959, "grad_norm": 4.281710624694824, "learning_rate": 7.212328767123287e-07, "log_odds_chosen": 1.4820889234542847, "log_odds_ratio": -0.3459703326225281, "logits/chosen": 0.9288927316665649, "logits/rejected": 0.9470432996749878, "logps/chosen": -2.6790239810943604, "logps/rejected": -4.074872970581055, "loss": 0.8081, "nll_loss": 0.7734805345535278, "rewards/accuracies": 0.875, "rewards/chosen": -0.2679023742675781, "rewards/margins": 0.13958494365215302, "rewards/rejected": -0.40748730301856995, "step": 2036 }, { "epoch": 5.577002053388091, "grad_norm": 3.3425493240356445, "learning_rate": 7.210958904109589e-07, "log_odds_chosen": 2.3405699729919434, "log_odds_ratio": -0.267839640378952, "logits/chosen": 0.728081464767456, "logits/rejected": 0.6403577327728271, "logps/chosen": -1.7257039546966553, "logps/rejected": -3.9391908645629883, "loss": 0.8784, "nll_loss": 0.8516489267349243, "rewards/accuracies": 0.875, "rewards/chosen": -0.17257040739059448, "rewards/margins": 0.22134873270988464, "rewards/rejected": -0.39391911029815674, "step": 2037 }, { "epoch": 5.579739904175223, "grad_norm": 7.220444202423096, "learning_rate": 7.209589041095891e-07, "log_odds_chosen": 1.410111427307129, "log_odds_ratio": -0.579162061214447, "logits/chosen": 0.9112712740898132, "logits/rejected": 0.9523525834083557, "logps/chosen": -2.664030075073242, "logps/rejected": -3.9764089584350586, "loss": 0.8558, "nll_loss": 0.7979257702827454, "rewards/accuracies": 0.75, "rewards/chosen": -0.2664030194282532, "rewards/margins": 0.13123786449432373, "rewards/rejected": -0.3976408839225769, "step": 2038 }, { "epoch": 5.582477754962355, "grad_norm": 4.945675849914551, "learning_rate": 7.208219178082191e-07, "log_odds_chosen": 1.3407095670700073, "log_odds_ratio": -0.28327715396881104, "logits/chosen": 0.9107164740562439, "logits/rejected": 0.9694104790687561, "logps/chosen": -2.5306999683380127, "logps/rejected": -3.7979984283447266, "loss": 0.8212, "nll_loss": 0.7928237915039062, "rewards/accuracies": 1.0, "rewards/chosen": -0.25306999683380127, "rewards/margins": 0.12672989070415497, "rewards/rejected": -0.37979987263679504, "step": 2039 }, { "epoch": 5.585215605749487, "grad_norm": 4.3602728843688965, "learning_rate": 7.206849315068493e-07, "log_odds_chosen": 1.2760910987854004, "log_odds_ratio": -0.3505569100379944, "logits/chosen": 0.8111850023269653, "logits/rejected": 0.7898353338241577, "logps/chosen": -2.277435541152954, "logps/rejected": -3.455476999282837, "loss": 0.8478, "nll_loss": 0.8127888441085815, "rewards/accuracies": 1.0, "rewards/chosen": -0.22774356603622437, "rewards/margins": 0.117804154753685, "rewards/rejected": -0.34554770588874817, "step": 2040 }, { "epoch": 5.587953456536619, "grad_norm": 3.5091285705566406, "learning_rate": 7.205479452054795e-07, "log_odds_chosen": 0.8824278116226196, "log_odds_ratio": -0.5558764934539795, "logits/chosen": 0.7784149646759033, "logits/rejected": 0.798984169960022, "logps/chosen": -2.38118839263916, "logps/rejected": -3.256962299346924, "loss": 0.9, "nll_loss": 0.8443654775619507, "rewards/accuracies": 0.75, "rewards/chosen": -0.23811885714530945, "rewards/margins": 0.08757738769054413, "rewards/rejected": -0.3256962299346924, "step": 2041 }, { "epoch": 5.590691307323751, "grad_norm": 3.144399642944336, "learning_rate": 7.204109589041095e-07, "log_odds_chosen": 2.519984722137451, "log_odds_ratio": -0.22347815334796906, "logits/chosen": 0.813166618347168, "logits/rejected": 0.8138603568077087, "logps/chosen": -2.1932692527770996, "logps/rejected": -4.636584758758545, "loss": 0.8468, "nll_loss": 0.8245015740394592, "rewards/accuracies": 1.0, "rewards/chosen": -0.2193269431591034, "rewards/margins": 0.24433155357837677, "rewards/rejected": -0.46365851163864136, "step": 2042 }, { "epoch": 5.593429158110883, "grad_norm": 4.172809600830078, "learning_rate": 7.202739726027397e-07, "log_odds_chosen": 2.1027750968933105, "log_odds_ratio": -0.31406688690185547, "logits/chosen": 0.9595000743865967, "logits/rejected": 0.9824888706207275, "logps/chosen": -2.5301899909973145, "logps/rejected": -4.560774326324463, "loss": 0.7465, "nll_loss": 0.715104341506958, "rewards/accuracies": 0.875, "rewards/chosen": -0.25301897525787354, "rewards/margins": 0.20305848121643066, "rewards/rejected": -0.4560774564743042, "step": 2043 }, { "epoch": 5.596167008898015, "grad_norm": 3.440458059310913, "learning_rate": 7.201369863013699e-07, "log_odds_chosen": 1.6908795833587646, "log_odds_ratio": -0.24226249754428864, "logits/chosen": 1.1198129653930664, "logits/rejected": 1.1484066247940063, "logps/chosen": -2.12837290763855, "logps/rejected": -3.677387237548828, "loss": 0.8114, "nll_loss": 0.7872081995010376, "rewards/accuracies": 1.0, "rewards/chosen": -0.2128373086452484, "rewards/margins": 0.1549014151096344, "rewards/rejected": -0.3677387237548828, "step": 2044 }, { "epoch": 5.598904859685147, "grad_norm": 6.024229526519775, "learning_rate": 7.2e-07, "log_odds_chosen": 2.189979314804077, "log_odds_ratio": -0.3048418164253235, "logits/chosen": 0.9817463755607605, "logits/rejected": 0.9309351444244385, "logps/chosen": -2.3663556575775146, "logps/rejected": -4.448092460632324, "loss": 0.8125, "nll_loss": 0.7819969654083252, "rewards/accuracies": 0.75, "rewards/chosen": -0.23663556575775146, "rewards/margins": 0.2081736922264099, "rewards/rejected": -0.4448092579841614, "step": 2045 }, { "epoch": 5.601642710472279, "grad_norm": 4.023950576782227, "learning_rate": 7.198630136986301e-07, "log_odds_chosen": 1.265411376953125, "log_odds_ratio": -0.47705942392349243, "logits/chosen": 0.6129852533340454, "logits/rejected": 0.5764513611793518, "logps/chosen": -1.988723635673523, "logps/rejected": -3.147930860519409, "loss": 0.935, "nll_loss": 0.8873202204704285, "rewards/accuracies": 0.75, "rewards/chosen": -0.198872372508049, "rewards/margins": 0.11592070758342743, "rewards/rejected": -0.31479305028915405, "step": 2046 }, { "epoch": 5.604380561259411, "grad_norm": 4.234433650970459, "learning_rate": 7.197260273972603e-07, "log_odds_chosen": 0.17678537964820862, "log_odds_ratio": -0.6754233837127686, "logits/chosen": 0.7052989602088928, "logits/rejected": 0.6985692381858826, "logps/chosen": -2.1560471057891846, "logps/rejected": -2.2805776596069336, "loss": 0.995, "nll_loss": 0.927448034286499, "rewards/accuracies": 0.625, "rewards/chosen": -0.2156047224998474, "rewards/margins": 0.012453043833374977, "rewards/rejected": -0.22805777192115784, "step": 2047 }, { "epoch": 5.607118412046543, "grad_norm": 2.937614679336548, "learning_rate": 7.195890410958904e-07, "log_odds_chosen": 1.810341477394104, "log_odds_ratio": -0.22602427005767822, "logits/chosen": 0.8394126892089844, "logits/rejected": 0.8605773448944092, "logps/chosen": -1.7832183837890625, "logps/rejected": -3.4348831176757812, "loss": 0.7597, "nll_loss": 0.737069845199585, "rewards/accuracies": 1.0, "rewards/chosen": -0.17832183837890625, "rewards/margins": 0.165166437625885, "rewards/rejected": -0.34348827600479126, "step": 2048 }, { "epoch": 5.6098562628336754, "grad_norm": 4.120212554931641, "learning_rate": 7.194520547945205e-07, "log_odds_chosen": 1.341626763343811, "log_odds_ratio": -0.37578263878822327, "logits/chosen": 0.7375996708869934, "logits/rejected": 0.6372726559638977, "logps/chosen": -1.8907421827316284, "logps/rejected": -3.108063220977783, "loss": 0.8458, "nll_loss": 0.8082544803619385, "rewards/accuracies": 0.875, "rewards/chosen": -0.18907423317432404, "rewards/margins": 0.12173209339380264, "rewards/rejected": -0.3108063340187073, "step": 2049 }, { "epoch": 5.6125941136208075, "grad_norm": 4.2247090339660645, "learning_rate": 7.193150684931506e-07, "log_odds_chosen": 2.501922130584717, "log_odds_ratio": -0.30354559421539307, "logits/chosen": 0.9126617312431335, "logits/rejected": 0.9998130798339844, "logps/chosen": -2.58414626121521, "logps/rejected": -5.0025553703308105, "loss": 0.812, "nll_loss": 0.781668484210968, "rewards/accuracies": 1.0, "rewards/chosen": -0.2584146559238434, "rewards/margins": 0.2418408840894699, "rewards/rejected": -0.5002555251121521, "step": 2050 }, { "epoch": 5.61533196440794, "grad_norm": 3.717571973800659, "learning_rate": 7.191780821917808e-07, "log_odds_chosen": 1.0671441555023193, "log_odds_ratio": -0.35848483443260193, "logits/chosen": 0.8379532694816589, "logits/rejected": 0.8030576705932617, "logps/chosen": -2.016169548034668, "logps/rejected": -2.998788595199585, "loss": 0.8625, "nll_loss": 0.8266245126724243, "rewards/accuracies": 0.875, "rewards/chosen": -0.20161697268486023, "rewards/margins": 0.09826190024614334, "rewards/rejected": -0.299878865480423, "step": 2051 }, { "epoch": 5.618069815195072, "grad_norm": 3.2626304626464844, "learning_rate": 7.19041095890411e-07, "log_odds_chosen": 1.3096084594726562, "log_odds_ratio": -0.29608243703842163, "logits/chosen": 0.9579678773880005, "logits/rejected": 1.088174819946289, "logps/chosen": -2.379617214202881, "logps/rejected": -3.61921763420105, "loss": 0.7598, "nll_loss": 0.7301580905914307, "rewards/accuracies": 1.0, "rewards/chosen": -0.23796173930168152, "rewards/margins": 0.12396003305912018, "rewards/rejected": -0.3619217872619629, "step": 2052 }, { "epoch": 5.620807665982204, "grad_norm": 4.222228527069092, "learning_rate": 7.18904109589041e-07, "log_odds_chosen": 0.9203000068664551, "log_odds_ratio": -0.5238461494445801, "logits/chosen": 0.9787696599960327, "logits/rejected": 1.001123309135437, "logps/chosen": -2.456778049468994, "logps/rejected": -3.3039586544036865, "loss": 0.9065, "nll_loss": 0.8541211485862732, "rewards/accuracies": 0.5, "rewards/chosen": -0.24567779898643494, "rewards/margins": 0.08471804857254028, "rewards/rejected": -0.3303958475589752, "step": 2053 }, { "epoch": 5.623545516769336, "grad_norm": 5.285682678222656, "learning_rate": 7.187671232876712e-07, "log_odds_chosen": 0.7869740724563599, "log_odds_ratio": -0.6624940633773804, "logits/chosen": 0.8883033394813538, "logits/rejected": 0.9175577759742737, "logps/chosen": -2.4628820419311523, "logps/rejected": -3.198045253753662, "loss": 1.0054, "nll_loss": 0.9391530752182007, "rewards/accuracies": 0.5, "rewards/chosen": -0.2462882399559021, "rewards/margins": 0.07351630926132202, "rewards/rejected": -0.31980451941490173, "step": 2054 }, { "epoch": 5.626283367556468, "grad_norm": 4.721287727355957, "learning_rate": 7.186301369863014e-07, "log_odds_chosen": 0.7011339068412781, "log_odds_ratio": -0.742624044418335, "logits/chosen": 0.7991539239883423, "logits/rejected": 0.7179425954818726, "logps/chosen": -2.3837990760803223, "logps/rejected": -3.013594388961792, "loss": 1.0139, "nll_loss": 0.9396007657051086, "rewards/accuracies": 0.75, "rewards/chosen": -0.23837989568710327, "rewards/margins": 0.06297953426837921, "rewards/rejected": -0.3013594448566437, "step": 2055 }, { "epoch": 5.6290212183436, "grad_norm": 6.141429901123047, "learning_rate": 7.184931506849314e-07, "log_odds_chosen": -0.2612302899360657, "log_odds_ratio": -0.9750597476959229, "logits/chosen": 0.9983937740325928, "logits/rejected": 1.0515464544296265, "logps/chosen": -3.3583154678344727, "logps/rejected": -3.1366753578186035, "loss": 1.0112, "nll_loss": 0.9137256145477295, "rewards/accuracies": 0.25, "rewards/chosen": -0.33583158254623413, "rewards/margins": -0.022164033725857735, "rewards/rejected": -0.31366753578186035, "step": 2056 }, { "epoch": 5.631759069130732, "grad_norm": 5.53153657913208, "learning_rate": 7.183561643835616e-07, "log_odds_chosen": 1.2967889308929443, "log_odds_ratio": -0.49383413791656494, "logits/chosen": 0.941646933555603, "logits/rejected": 1.0773096084594727, "logps/chosen": -2.7626144886016846, "logps/rejected": -3.982555389404297, "loss": 0.7709, "nll_loss": 0.7215414047241211, "rewards/accuracies": 0.875, "rewards/chosen": -0.27626147866249084, "rewards/margins": 0.12199412286281586, "rewards/rejected": -0.3982555568218231, "step": 2057 }, { "epoch": 5.634496919917865, "grad_norm": 5.888261795043945, "learning_rate": 7.182191780821918e-07, "log_odds_chosen": 1.3127660751342773, "log_odds_ratio": -0.431307315826416, "logits/chosen": 0.9486016631126404, "logits/rejected": 0.8789909482002258, "logps/chosen": -2.427133083343506, "logps/rejected": -3.65472149848938, "loss": 0.9402, "nll_loss": 0.8970227837562561, "rewards/accuracies": 0.75, "rewards/chosen": -0.2427133321762085, "rewards/margins": 0.12275883555412292, "rewards/rejected": -0.36547213792800903, "step": 2058 }, { "epoch": 5.637234770704996, "grad_norm": 3.9362895488739014, "learning_rate": 7.180821917808219e-07, "log_odds_chosen": 0.8780486583709717, "log_odds_ratio": -0.5396947860717773, "logits/chosen": 0.7774590849876404, "logits/rejected": 0.7135944962501526, "logps/chosen": -2.280479907989502, "logps/rejected": -3.1377692222595215, "loss": 0.9478, "nll_loss": 0.8938743472099304, "rewards/accuracies": 0.75, "rewards/chosen": -0.22804801166057587, "rewards/margins": 0.08572891354560852, "rewards/rejected": -0.3137769103050232, "step": 2059 }, { "epoch": 5.639972621492129, "grad_norm": 5.201436519622803, "learning_rate": 7.17945205479452e-07, "log_odds_chosen": 1.1317079067230225, "log_odds_ratio": -0.4570576846599579, "logits/chosen": 1.0456739664077759, "logits/rejected": 1.1280772686004639, "logps/chosen": -3.0785746574401855, "logps/rejected": -4.155513763427734, "loss": 0.782, "nll_loss": 0.736322283744812, "rewards/accuracies": 0.875, "rewards/chosen": -0.3078574538230896, "rewards/margins": 0.10769394040107727, "rewards/rejected": -0.41555139422416687, "step": 2060 }, { "epoch": 5.642710472279261, "grad_norm": 3.543654203414917, "learning_rate": 7.178082191780822e-07, "log_odds_chosen": 1.5575700998306274, "log_odds_ratio": -0.3264886736869812, "logits/chosen": 0.7590550184249878, "logits/rejected": 0.7297153472900391, "logps/chosen": -2.0764143466949463, "logps/rejected": -3.5022695064544678, "loss": 0.8673, "nll_loss": 0.834650456905365, "rewards/accuracies": 0.875, "rewards/chosen": -0.20764145255088806, "rewards/margins": 0.14258551597595215, "rewards/rejected": -0.3502269685268402, "step": 2061 }, { "epoch": 5.645448323066393, "grad_norm": 4.076430797576904, "learning_rate": 7.176712328767123e-07, "log_odds_chosen": 1.0521893501281738, "log_odds_ratio": -0.34199225902557373, "logits/chosen": 0.9846959114074707, "logits/rejected": 1.0315511226654053, "logps/chosen": -2.645639181137085, "logps/rejected": -3.641587495803833, "loss": 0.7289, "nll_loss": 0.6947179436683655, "rewards/accuracies": 1.0, "rewards/chosen": -0.2645639181137085, "rewards/margins": 0.0995948314666748, "rewards/rejected": -0.3641587495803833, "step": 2062 }, { "epoch": 5.648186173853525, "grad_norm": 3.6291990280151367, "learning_rate": 7.175342465753424e-07, "log_odds_chosen": 1.5524846315383911, "log_odds_ratio": -0.30301275849342346, "logits/chosen": 0.9346733093261719, "logits/rejected": 0.9428286552429199, "logps/chosen": -2.0949161052703857, "logps/rejected": -3.5213427543640137, "loss": 0.8112, "nll_loss": 0.7808640003204346, "rewards/accuracies": 0.875, "rewards/chosen": -0.20949159562587738, "rewards/margins": 0.14264267683029175, "rewards/rejected": -0.3521342873573303, "step": 2063 }, { "epoch": 5.650924024640657, "grad_norm": 4.70349645614624, "learning_rate": 7.173972602739725e-07, "log_odds_chosen": 1.6892576217651367, "log_odds_ratio": -0.36809664964675903, "logits/chosen": 0.8851606845855713, "logits/rejected": 0.8848413228988647, "logps/chosen": -2.828425884246826, "logps/rejected": -4.467957973480225, "loss": 0.9172, "nll_loss": 0.8803917169570923, "rewards/accuracies": 0.875, "rewards/chosen": -0.28284257650375366, "rewards/margins": 0.16395321488380432, "rewards/rejected": -0.44679582118988037, "step": 2064 }, { "epoch": 5.653661875427789, "grad_norm": 4.927251815795898, "learning_rate": 7.172602739726027e-07, "log_odds_chosen": 2.0757181644439697, "log_odds_ratio": -0.34372591972351074, "logits/chosen": 0.8452392220497131, "logits/rejected": 0.851252555847168, "logps/chosen": -2.287761926651001, "logps/rejected": -4.283324718475342, "loss": 0.8132, "nll_loss": 0.7788629531860352, "rewards/accuracies": 1.0, "rewards/chosen": -0.228776216506958, "rewards/margins": 0.19955630600452423, "rewards/rejected": -0.42833250761032104, "step": 2065 }, { "epoch": 5.6563997262149215, "grad_norm": 4.62357759475708, "learning_rate": 7.171232876712329e-07, "log_odds_chosen": 0.6978425979614258, "log_odds_ratio": -0.5178810954093933, "logits/chosen": 0.9125463366508484, "logits/rejected": 0.9131238460540771, "logps/chosen": -2.9201457500457764, "logps/rejected": -3.5994622707366943, "loss": 0.8458, "nll_loss": 0.793975293636322, "rewards/accuracies": 0.625, "rewards/chosen": -0.29201456904411316, "rewards/margins": 0.0679316446185112, "rewards/rejected": -0.35994622111320496, "step": 2066 }, { "epoch": 5.6591375770020536, "grad_norm": 3.576014518737793, "learning_rate": 7.169863013698629e-07, "log_odds_chosen": 1.5494552850723267, "log_odds_ratio": -0.3398236036300659, "logits/chosen": 1.0240851640701294, "logits/rejected": 1.0262799263000488, "logps/chosen": -2.467487096786499, "logps/rejected": -3.8653478622436523, "loss": 0.7535, "nll_loss": 0.7194766402244568, "rewards/accuracies": 0.875, "rewards/chosen": -0.24674871563911438, "rewards/margins": 0.13978607952594757, "rewards/rejected": -0.38653481006622314, "step": 2067 }, { "epoch": 5.661875427789186, "grad_norm": 3.922673463821411, "learning_rate": 7.168493150684931e-07, "log_odds_chosen": 2.2377448081970215, "log_odds_ratio": -0.22156357765197754, "logits/chosen": 1.0230427980422974, "logits/rejected": 1.0857409238815308, "logps/chosen": -2.6959376335144043, "logps/rejected": -4.8644328117370605, "loss": 0.7901, "nll_loss": 0.7679123878479004, "rewards/accuracies": 1.0, "rewards/chosen": -0.2695937752723694, "rewards/margins": 0.21684950590133667, "rewards/rejected": -0.48644328117370605, "step": 2068 }, { "epoch": 5.664613278576318, "grad_norm": 4.172338485717773, "learning_rate": 7.167123287671233e-07, "log_odds_chosen": 2.3194851875305176, "log_odds_ratio": -0.3856879770755768, "logits/chosen": 0.7563320398330688, "logits/rejected": 0.6856327056884766, "logps/chosen": -2.653252601623535, "logps/rejected": -4.894570827484131, "loss": 0.8654, "nll_loss": 0.8268686532974243, "rewards/accuracies": 0.875, "rewards/chosen": -0.26532524824142456, "rewards/margins": 0.22413182258605957, "rewards/rejected": -0.48945707082748413, "step": 2069 }, { "epoch": 5.66735112936345, "grad_norm": 4.469315052032471, "learning_rate": 7.165753424657533e-07, "log_odds_chosen": 1.134147047996521, "log_odds_ratio": -0.4383573830127716, "logits/chosen": 0.7561994194984436, "logits/rejected": 0.7638009786605835, "logps/chosen": -2.8381550312042236, "logps/rejected": -3.8999714851379395, "loss": 0.8795, "nll_loss": 0.8356820344924927, "rewards/accuracies": 0.875, "rewards/chosen": -0.28381550312042236, "rewards/margins": 0.10618165135383606, "rewards/rejected": -0.38999712467193604, "step": 2070 }, { "epoch": 5.670088980150582, "grad_norm": 3.7748420238494873, "learning_rate": 7.164383561643835e-07, "log_odds_chosen": 0.9900401830673218, "log_odds_ratio": -0.43240976333618164, "logits/chosen": 0.7611802816390991, "logits/rejected": 0.7123484015464783, "logps/chosen": -2.0804851055145264, "logps/rejected": -2.9540293216705322, "loss": 0.8896, "nll_loss": 0.8464082479476929, "rewards/accuracies": 0.875, "rewards/chosen": -0.2080485075712204, "rewards/margins": 0.08735442161560059, "rewards/rejected": -0.2954029440879822, "step": 2071 }, { "epoch": 5.672826830937714, "grad_norm": 4.2512311935424805, "learning_rate": 7.163013698630137e-07, "log_odds_chosen": 1.4667901992797852, "log_odds_ratio": -0.42947888374328613, "logits/chosen": 0.7675678730010986, "logits/rejected": 0.8123928308486938, "logps/chosen": -2.6890411376953125, "logps/rejected": -4.1206865310668945, "loss": 0.9645, "nll_loss": 0.9215906262397766, "rewards/accuracies": 0.875, "rewards/chosen": -0.2689041197299957, "rewards/margins": 0.1431645005941391, "rewards/rejected": -0.41206860542297363, "step": 2072 }, { "epoch": 5.675564681724846, "grad_norm": 3.262364625930786, "learning_rate": 7.161643835616437e-07, "log_odds_chosen": 1.6164839267730713, "log_odds_ratio": -0.2926179766654968, "logits/chosen": 0.751286506652832, "logits/rejected": 0.7593253254890442, "logps/chosen": -2.0476999282836914, "logps/rejected": -3.5469954013824463, "loss": 0.9233, "nll_loss": 0.8939961194992065, "rewards/accuracies": 0.875, "rewards/chosen": -0.20476999878883362, "rewards/margins": 0.14992956817150116, "rewards/rejected": -0.35469958186149597, "step": 2073 }, { "epoch": 5.678302532511978, "grad_norm": 3.9112391471862793, "learning_rate": 7.160273972602739e-07, "log_odds_chosen": 2.1234800815582275, "log_odds_ratio": -0.2611963450908661, "logits/chosen": 0.7876752018928528, "logits/rejected": 0.7960370779037476, "logps/chosen": -2.4429943561553955, "logps/rejected": -4.438155651092529, "loss": 0.856, "nll_loss": 0.8298965096473694, "rewards/accuracies": 0.875, "rewards/chosen": -0.24429942667484283, "rewards/margins": 0.1995161473751068, "rewards/rejected": -0.44381555914878845, "step": 2074 }, { "epoch": 5.68104038329911, "grad_norm": 4.092473030090332, "learning_rate": 7.158904109589041e-07, "log_odds_chosen": 1.013694167137146, "log_odds_ratio": -0.41251152753829956, "logits/chosen": 0.79619300365448, "logits/rejected": 0.8217137455940247, "logps/chosen": -2.084404468536377, "logps/rejected": -3.0321028232574463, "loss": 0.8477, "nll_loss": 0.8064224720001221, "rewards/accuracies": 0.875, "rewards/chosen": -0.20844045281410217, "rewards/margins": 0.09476985037326813, "rewards/rejected": -0.3032102882862091, "step": 2075 }, { "epoch": 5.683778234086242, "grad_norm": 3.3887135982513428, "learning_rate": 7.157534246575342e-07, "log_odds_chosen": 1.9016988277435303, "log_odds_ratio": -0.3519347906112671, "logits/chosen": 0.9538121223449707, "logits/rejected": 0.99205482006073, "logps/chosen": -2.6317055225372314, "logps/rejected": -4.495332717895508, "loss": 0.7654, "nll_loss": 0.7301747798919678, "rewards/accuracies": 0.875, "rewards/chosen": -0.2631705701351166, "rewards/margins": 0.18636274337768555, "rewards/rejected": -0.4495333135128021, "step": 2076 }, { "epoch": 5.686516084873374, "grad_norm": 4.123955249786377, "learning_rate": 7.156164383561643e-07, "log_odds_chosen": 0.4791784882545471, "log_odds_ratio": -0.6996276378631592, "logits/chosen": 0.7301512956619263, "logits/rejected": 0.7877774834632874, "logps/chosen": -2.6309800148010254, "logps/rejected": -3.0524327754974365, "loss": 0.8709, "nll_loss": 0.8008979558944702, "rewards/accuracies": 0.625, "rewards/chosen": -0.26309800148010254, "rewards/margins": 0.0421452671289444, "rewards/rejected": -0.30524328351020813, "step": 2077 }, { "epoch": 5.689253935660506, "grad_norm": 3.251232147216797, "learning_rate": 7.154794520547945e-07, "log_odds_chosen": 1.8093217611312866, "log_odds_ratio": -0.2704058289527893, "logits/chosen": 0.8373489379882812, "logits/rejected": 0.8205718994140625, "logps/chosen": -2.442833423614502, "logps/rejected": -4.1581830978393555, "loss": 0.7981, "nll_loss": 0.7710450887680054, "rewards/accuracies": 1.0, "rewards/chosen": -0.24428334832191467, "rewards/margins": 0.1715349704027176, "rewards/rejected": -0.41581833362579346, "step": 2078 }, { "epoch": 5.691991786447638, "grad_norm": 4.1803107261657715, "learning_rate": 7.153424657534246e-07, "log_odds_chosen": 1.441986083984375, "log_odds_ratio": -0.3655427396297455, "logits/chosen": 0.9011040925979614, "logits/rejected": 0.8913301229476929, "logps/chosen": -2.768303155899048, "logps/rejected": -4.155079364776611, "loss": 0.8768, "nll_loss": 0.840238094329834, "rewards/accuracies": 1.0, "rewards/chosen": -0.2768302857875824, "rewards/margins": 0.13867764174938202, "rewards/rejected": -0.415507972240448, "step": 2079 }, { "epoch": 5.69472963723477, "grad_norm": 3.8530004024505615, "learning_rate": 7.152054794520548e-07, "log_odds_chosen": 1.478453278541565, "log_odds_ratio": -0.4258098006248474, "logits/chosen": 0.8842042684555054, "logits/rejected": 0.9142318964004517, "logps/chosen": -2.363095998764038, "logps/rejected": -3.738738775253296, "loss": 0.9105, "nll_loss": 0.8678973317146301, "rewards/accuracies": 0.75, "rewards/chosen": -0.23630960285663605, "rewards/margins": 0.1375642865896225, "rewards/rejected": -0.37387388944625854, "step": 2080 }, { "epoch": 5.6974674880219025, "grad_norm": 4.845600128173828, "learning_rate": 7.150684931506848e-07, "log_odds_chosen": 1.8154287338256836, "log_odds_ratio": -0.3338257074356079, "logits/chosen": 0.7645589709281921, "logits/rejected": 0.6864141225814819, "logps/chosen": -2.278040885925293, "logps/rejected": -4.017232894897461, "loss": 0.8855, "nll_loss": 0.8521288633346558, "rewards/accuracies": 0.875, "rewards/chosen": -0.22780409455299377, "rewards/margins": 0.173919215798378, "rewards/rejected": -0.40172329545021057, "step": 2081 }, { "epoch": 5.700205338809035, "grad_norm": 4.544908046722412, "learning_rate": 7.14931506849315e-07, "log_odds_chosen": 1.1089341640472412, "log_odds_ratio": -0.515739381313324, "logits/chosen": 0.8276443481445312, "logits/rejected": 0.8764014840126038, "logps/chosen": -2.7737839221954346, "logps/rejected": -3.8164591789245605, "loss": 0.933, "nll_loss": 0.8814254999160767, "rewards/accuracies": 0.75, "rewards/chosen": -0.2773783802986145, "rewards/margins": 0.10426750779151917, "rewards/rejected": -0.38164588809013367, "step": 2082 }, { "epoch": 5.702943189596167, "grad_norm": 4.198352813720703, "learning_rate": 7.147945205479452e-07, "log_odds_chosen": 1.1514471769332886, "log_odds_ratio": -0.4239353537559509, "logits/chosen": 0.8077545166015625, "logits/rejected": 0.7438543438911438, "logps/chosen": -2.0162675380706787, "logps/rejected": -3.1139683723449707, "loss": 0.9176, "nll_loss": 0.8752068877220154, "rewards/accuracies": 0.75, "rewards/chosen": -0.20162677764892578, "rewards/margins": 0.10977006703615189, "rewards/rejected": -0.3113968074321747, "step": 2083 }, { "epoch": 5.705681040383299, "grad_norm": 3.763810396194458, "learning_rate": 7.146575342465752e-07, "log_odds_chosen": 0.004359863698482513, "log_odds_ratio": -0.7617507576942444, "logits/chosen": 0.849048376083374, "logits/rejected": 0.8473447561264038, "logps/chosen": -2.4324045181274414, "logps/rejected": -2.389697313308716, "loss": 0.9233, "nll_loss": 0.8471521735191345, "rewards/accuracies": 0.625, "rewards/chosen": -0.24324047565460205, "rewards/margins": -0.004270736128091812, "rewards/rejected": -0.23896974325180054, "step": 2084 }, { "epoch": 5.708418891170432, "grad_norm": 5.483831882476807, "learning_rate": 7.145205479452054e-07, "log_odds_chosen": 1.0335769653320312, "log_odds_ratio": -0.5184134840965271, "logits/chosen": 0.8872522115707397, "logits/rejected": 0.8989430069923401, "logps/chosen": -2.5281426906585693, "logps/rejected": -3.4667856693267822, "loss": 0.838, "nll_loss": 0.7861431837081909, "rewards/accuracies": 0.625, "rewards/chosen": -0.25281429290771484, "rewards/margins": 0.09386427700519562, "rewards/rejected": -0.34667855501174927, "step": 2085 }, { "epoch": 5.711156741957563, "grad_norm": 9.806047439575195, "learning_rate": 7.143835616438356e-07, "log_odds_chosen": 1.3618048429489136, "log_odds_ratio": -0.8256276249885559, "logits/chosen": 1.254894733428955, "logits/rejected": 1.2742141485214233, "logps/chosen": -3.6738274097442627, "logps/rejected": -5.013467788696289, "loss": 0.8906, "nll_loss": 0.80799400806427, "rewards/accuracies": 0.75, "rewards/chosen": -0.3673827648162842, "rewards/margins": 0.13396403193473816, "rewards/rejected": -0.5013467669487, "step": 2086 }, { "epoch": 5.713894592744696, "grad_norm": 3.99969482421875, "learning_rate": 7.142465753424656e-07, "log_odds_chosen": 1.0135492086410522, "log_odds_ratio": -0.3611498177051544, "logits/chosen": 0.7023721933364868, "logits/rejected": 0.6761530637741089, "logps/chosen": -2.20398211479187, "logps/rejected": -3.099871873855591, "loss": 0.8923, "nll_loss": 0.8562248945236206, "rewards/accuracies": 0.875, "rewards/chosen": -0.22039823234081268, "rewards/margins": 0.08958898484706879, "rewards/rejected": -0.3099871873855591, "step": 2087 }, { "epoch": 5.716632443531828, "grad_norm": 4.598671913146973, "learning_rate": 7.141095890410958e-07, "log_odds_chosen": 1.3275716304779053, "log_odds_ratio": -0.5125879645347595, "logits/chosen": 0.8701175451278687, "logits/rejected": 0.8013975024223328, "logps/chosen": -2.6985273361206055, "logps/rejected": -3.9385898113250732, "loss": 0.9393, "nll_loss": 0.8880602717399597, "rewards/accuracies": 0.875, "rewards/chosen": -0.26985272765159607, "rewards/margins": 0.1240062266588211, "rewards/rejected": -0.39385896921157837, "step": 2088 }, { "epoch": 5.71937029431896, "grad_norm": 4.22529411315918, "learning_rate": 7.13972602739726e-07, "log_odds_chosen": 1.2136965990066528, "log_odds_ratio": -0.3323642909526825, "logits/chosen": 0.7759647369384766, "logits/rejected": 0.760650634765625, "logps/chosen": -2.670802354812622, "logps/rejected": -3.825371742248535, "loss": 0.9517, "nll_loss": 0.918482780456543, "rewards/accuracies": 1.0, "rewards/chosen": -0.2670802175998688, "rewards/margins": 0.1154569536447525, "rewards/rejected": -0.38253721594810486, "step": 2089 }, { "epoch": 5.722108145106092, "grad_norm": 4.329247951507568, "learning_rate": 7.138356164383561e-07, "log_odds_chosen": 0.3154796361923218, "log_odds_ratio": -0.7912815809249878, "logits/chosen": 0.6218066811561584, "logits/rejected": 0.49330469965934753, "logps/chosen": -2.6660571098327637, "logps/rejected": -2.9338278770446777, "loss": 1.0143, "nll_loss": 0.9351621270179749, "rewards/accuracies": 0.625, "rewards/chosen": -0.2666057348251343, "rewards/margins": 0.026777036488056183, "rewards/rejected": -0.29338276386260986, "step": 2090 }, { "epoch": 5.724845995893224, "grad_norm": 4.523488521575928, "learning_rate": 7.136986301369862e-07, "log_odds_chosen": 1.3983467817306519, "log_odds_ratio": -0.3627049922943115, "logits/chosen": 0.7177225351333618, "logits/rejected": 0.7982726693153381, "logps/chosen": -2.4796319007873535, "logps/rejected": -3.811706066131592, "loss": 0.8813, "nll_loss": 0.8450480103492737, "rewards/accuracies": 0.75, "rewards/chosen": -0.24796319007873535, "rewards/margins": 0.13320744037628174, "rewards/rejected": -0.3811706304550171, "step": 2091 }, { "epoch": 5.727583846680356, "grad_norm": 4.084915637969971, "learning_rate": 7.135616438356164e-07, "log_odds_chosen": 0.5699089169502258, "log_odds_ratio": -0.5150789022445679, "logits/chosen": 0.8915963768959045, "logits/rejected": 0.8804597854614258, "logps/chosen": -2.760383367538452, "logps/rejected": -3.2747795581817627, "loss": 0.9262, "nll_loss": 0.8747079372406006, "rewards/accuracies": 0.75, "rewards/chosen": -0.27603834867477417, "rewards/margins": 0.05143962800502777, "rewards/rejected": -0.32747799158096313, "step": 2092 }, { "epoch": 5.730321697467488, "grad_norm": 4.127054214477539, "learning_rate": 7.134246575342465e-07, "log_odds_chosen": 1.489039421081543, "log_odds_ratio": -0.319269597530365, "logits/chosen": 0.8993022441864014, "logits/rejected": 0.8495315313339233, "logps/chosen": -2.0338335037231445, "logps/rejected": -3.4322896003723145, "loss": 0.8982, "nll_loss": 0.8662621974945068, "rewards/accuracies": 1.0, "rewards/chosen": -0.20338335633277893, "rewards/margins": 0.13984562456607819, "rewards/rejected": -0.3432289958000183, "step": 2093 }, { "epoch": 5.73305954825462, "grad_norm": 3.637932062149048, "learning_rate": 7.132876712328766e-07, "log_odds_chosen": 1.972418189048767, "log_odds_ratio": -0.22759896516799927, "logits/chosen": 1.1217820644378662, "logits/rejected": 1.1974868774414062, "logps/chosen": -2.3670454025268555, "logps/rejected": -4.223020553588867, "loss": 0.7117, "nll_loss": 0.6889731884002686, "rewards/accuracies": 1.0, "rewards/chosen": -0.23670455813407898, "rewards/margins": 0.1855974942445755, "rewards/rejected": -0.4223020672798157, "step": 2094 }, { "epoch": 5.735797399041752, "grad_norm": 3.509206771850586, "learning_rate": 7.131506849315067e-07, "log_odds_chosen": 2.613765239715576, "log_odds_ratio": -0.16564929485321045, "logits/chosen": 0.8410289287567139, "logits/rejected": 0.8795739412307739, "logps/chosen": -2.717029094696045, "logps/rejected": -5.251378059387207, "loss": 0.8544, "nll_loss": 0.8378071188926697, "rewards/accuracies": 1.0, "rewards/chosen": -0.27170291543006897, "rewards/margins": 0.2534348964691162, "rewards/rejected": -0.5251378417015076, "step": 2095 }, { "epoch": 5.738535249828884, "grad_norm": 4.047300338745117, "learning_rate": 7.13013698630137e-07, "log_odds_chosen": 1.0779812335968018, "log_odds_ratio": -0.4214141368865967, "logits/chosen": 0.7193777561187744, "logits/rejected": 0.778712809085846, "logps/chosen": -2.1777584552764893, "logps/rejected": -3.1810660362243652, "loss": 0.8306, "nll_loss": 0.7884684205055237, "rewards/accuracies": 0.875, "rewards/chosen": -0.2177758365869522, "rewards/margins": 0.10033075511455536, "rewards/rejected": -0.31810659170150757, "step": 2096 }, { "epoch": 5.7412731006160165, "grad_norm": 3.6445653438568115, "learning_rate": 7.128767123287672e-07, "log_odds_chosen": 2.080691337585449, "log_odds_ratio": -0.21776121854782104, "logits/chosen": 1.0077722072601318, "logits/rejected": 1.0251994132995605, "logps/chosen": -2.150592803955078, "logps/rejected": -4.10166072845459, "loss": 0.7594, "nll_loss": 0.7375888824462891, "rewards/accuracies": 1.0, "rewards/chosen": -0.2150592803955078, "rewards/margins": 0.19510683417320251, "rewards/rejected": -0.41016608476638794, "step": 2097 }, { "epoch": 5.7440109514031485, "grad_norm": 3.6761655807495117, "learning_rate": 7.127397260273971e-07, "log_odds_chosen": 1.6271040439605713, "log_odds_ratio": -0.4082929790019989, "logits/chosen": 0.7006874084472656, "logits/rejected": 0.7030235528945923, "logps/chosen": -2.3790106773376465, "logps/rejected": -3.964111566543579, "loss": 0.921, "nll_loss": 0.8801773190498352, "rewards/accuracies": 0.75, "rewards/chosen": -0.23790109157562256, "rewards/margins": 0.15851008892059326, "rewards/rejected": -0.3964111804962158, "step": 2098 }, { "epoch": 5.746748802190281, "grad_norm": 3.5037662982940674, "learning_rate": 7.126027397260274e-07, "log_odds_chosen": 0.8651612997055054, "log_odds_ratio": -0.50124591588974, "logits/chosen": 0.5758140683174133, "logits/rejected": 0.6674965620040894, "logps/chosen": -2.2439615726470947, "logps/rejected": -3.079836130142212, "loss": 0.9493, "nll_loss": 0.8991674184799194, "rewards/accuracies": 0.625, "rewards/chosen": -0.22439616918563843, "rewards/margins": 0.08358748257160187, "rewards/rejected": -0.3079836368560791, "step": 2099 }, { "epoch": 5.749486652977413, "grad_norm": 4.460117340087891, "learning_rate": 7.124657534246576e-07, "log_odds_chosen": 1.2228540182113647, "log_odds_ratio": -0.3726826012134552, "logits/chosen": 0.7732551097869873, "logits/rejected": 0.6960433721542358, "logps/chosen": -1.9903820753097534, "logps/rejected": -3.1532394886016846, "loss": 0.8645, "nll_loss": 0.82722407579422, "rewards/accuracies": 0.875, "rewards/chosen": -0.19903820753097534, "rewards/margins": 0.11628575623035431, "rewards/rejected": -0.31532394886016846, "step": 2100 }, { "epoch": 5.752224503764545, "grad_norm": 3.6789166927337646, "learning_rate": 7.123287671232876e-07, "log_odds_chosen": 1.1163135766983032, "log_odds_ratio": -0.5764954090118408, "logits/chosen": 0.7256208658218384, "logits/rejected": 0.7771137356758118, "logps/chosen": -3.0619781017303467, "logps/rejected": -4.145523548126221, "loss": 0.8832, "nll_loss": 0.8255587220191956, "rewards/accuracies": 0.875, "rewards/chosen": -0.3061978220939636, "rewards/margins": 0.10835455358028412, "rewards/rejected": -0.41455233097076416, "step": 2101 }, { "epoch": 5.754962354551677, "grad_norm": 4.488772869110107, "learning_rate": 7.121917808219178e-07, "log_odds_chosen": 1.0348402261734009, "log_odds_ratio": -0.3961881101131439, "logits/chosen": 0.6023929715156555, "logits/rejected": 0.5918338298797607, "logps/chosen": -2.029078483581543, "logps/rejected": -2.969237804412842, "loss": 0.8468, "nll_loss": 0.807213306427002, "rewards/accuracies": 0.875, "rewards/chosen": -0.20290784537792206, "rewards/margins": 0.09401595592498779, "rewards/rejected": -0.29692381620407104, "step": 2102 }, { "epoch": 5.757700205338809, "grad_norm": 5.818386077880859, "learning_rate": 7.12054794520548e-07, "log_odds_chosen": 0.22670069336891174, "log_odds_ratio": -0.7024669647216797, "logits/chosen": 0.871641218662262, "logits/rejected": 0.8374550938606262, "logps/chosen": -2.9735870361328125, "logps/rejected": -3.148536443710327, "loss": 0.8663, "nll_loss": 0.7960742115974426, "rewards/accuracies": 0.625, "rewards/chosen": -0.2973586916923523, "rewards/margins": 0.017494991421699524, "rewards/rejected": -0.3148536682128906, "step": 2103 }, { "epoch": 5.760438056125941, "grad_norm": 3.9254932403564453, "learning_rate": 7.119178082191781e-07, "log_odds_chosen": 1.2357151508331299, "log_odds_ratio": -0.3526378870010376, "logits/chosen": 0.8189595341682434, "logits/rejected": 0.8100728988647461, "logps/chosen": -1.9398269653320312, "logps/rejected": -3.0945546627044678, "loss": 0.8245, "nll_loss": 0.7892654538154602, "rewards/accuracies": 1.0, "rewards/chosen": -0.19398272037506104, "rewards/margins": 0.11547276377677917, "rewards/rejected": -0.3094554543495178, "step": 2104 }, { "epoch": 5.763175906913073, "grad_norm": 3.252361536026001, "learning_rate": 7.117808219178082e-07, "log_odds_chosen": 1.9722682237625122, "log_odds_ratio": -0.2293066382408142, "logits/chosen": 0.9630966186523438, "logits/rejected": 0.8921149373054504, "logps/chosen": -1.7927769422531128, "logps/rejected": -3.6023736000061035, "loss": 0.82, "nll_loss": 0.7970914244651794, "rewards/accuracies": 1.0, "rewards/chosen": -0.179277703166008, "rewards/margins": 0.18095968663692474, "rewards/rejected": -0.36023736000061035, "step": 2105 }, { "epoch": 5.765913757700205, "grad_norm": 5.4356207847595215, "learning_rate": 7.116438356164384e-07, "log_odds_chosen": 0.8126527070999146, "log_odds_ratio": -0.5094646215438843, "logits/chosen": 0.6618804931640625, "logits/rejected": 0.6540786027908325, "logps/chosen": -2.180044174194336, "logps/rejected": -2.908247947692871, "loss": 0.8559, "nll_loss": 0.8049278855323792, "rewards/accuracies": 0.875, "rewards/chosen": -0.21800442039966583, "rewards/margins": 0.07282038033008575, "rewards/rejected": -0.2908248007297516, "step": 2106 }, { "epoch": 5.768651608487337, "grad_norm": 3.6214849948883057, "learning_rate": 7.115068493150685e-07, "log_odds_chosen": 1.0315345525741577, "log_odds_ratio": -0.3689189553260803, "logits/chosen": 0.8689488172531128, "logits/rejected": 0.8115766048431396, "logps/chosen": -2.2787060737609863, "logps/rejected": -3.2525362968444824, "loss": 0.862, "nll_loss": 0.8250958919525146, "rewards/accuracies": 1.0, "rewards/chosen": -0.2278706133365631, "rewards/margins": 0.0973830372095108, "rewards/rejected": -0.3252536356449127, "step": 2107 }, { "epoch": 5.771389459274469, "grad_norm": 3.731314182281494, "learning_rate": 7.113698630136986e-07, "log_odds_chosen": 1.3127883672714233, "log_odds_ratio": -0.40537533164024353, "logits/chosen": 0.9100121259689331, "logits/rejected": 0.9011168479919434, "logps/chosen": -1.852882981300354, "logps/rejected": -3.0967226028442383, "loss": 0.9316, "nll_loss": 0.8910251259803772, "rewards/accuracies": 0.875, "rewards/chosen": -0.18528828024864197, "rewards/margins": 0.12438398599624634, "rewards/rejected": -0.3096722960472107, "step": 2108 }, { "epoch": 5.774127310061601, "grad_norm": 4.158414363861084, "learning_rate": 7.112328767123288e-07, "log_odds_chosen": 1.2213770151138306, "log_odds_ratio": -0.45160752534866333, "logits/chosen": 0.7675052285194397, "logits/rejected": 0.7208796143531799, "logps/chosen": -2.0876946449279785, "logps/rejected": -3.2372889518737793, "loss": 0.9032, "nll_loss": 0.8580671548843384, "rewards/accuracies": 0.875, "rewards/chosen": -0.20876945555210114, "rewards/margins": 0.11495944112539291, "rewards/rejected": -0.32372888922691345, "step": 2109 }, { "epoch": 5.776865160848733, "grad_norm": 3.7848238945007324, "learning_rate": 7.110958904109589e-07, "log_odds_chosen": 1.1915827989578247, "log_odds_ratio": -0.33413374423980713, "logits/chosen": 0.771293044090271, "logits/rejected": 0.7343790531158447, "logps/chosen": -1.8123775720596313, "logps/rejected": -2.882685422897339, "loss": 0.8301, "nll_loss": 0.7966861724853516, "rewards/accuracies": 1.0, "rewards/chosen": -0.18123777210712433, "rewards/margins": 0.10703080147504807, "rewards/rejected": -0.2882685661315918, "step": 2110 }, { "epoch": 5.779603011635865, "grad_norm": 5.2782697677612305, "learning_rate": 7.109589041095891e-07, "log_odds_chosen": 0.46579286456108093, "log_odds_ratio": -0.6435500383377075, "logits/chosen": 0.9337449073791504, "logits/rejected": 0.8656411170959473, "logps/chosen": -2.7511606216430664, "logps/rejected": -3.165761709213257, "loss": 0.9209, "nll_loss": 0.8565508127212524, "rewards/accuracies": 0.625, "rewards/chosen": -0.27511608600616455, "rewards/margins": 0.04146009311079979, "rewards/rejected": -0.31657618284225464, "step": 2111 }, { "epoch": 5.782340862422998, "grad_norm": 4.677190780639648, "learning_rate": 7.108219178082191e-07, "log_odds_chosen": 0.4260859787464142, "log_odds_ratio": -0.5812420845031738, "logits/chosen": 0.7174800038337708, "logits/rejected": 0.7351709604263306, "logps/chosen": -2.012691020965576, "logps/rejected": -2.4033191204071045, "loss": 0.891, "nll_loss": 0.8328456878662109, "rewards/accuracies": 0.625, "rewards/chosen": -0.20126911997795105, "rewards/margins": 0.03906279057264328, "rewards/rejected": -0.24033191800117493, "step": 2112 }, { "epoch": 5.78507871321013, "grad_norm": 4.322202205657959, "learning_rate": 7.106849315068493e-07, "log_odds_chosen": 1.5898280143737793, "log_odds_ratio": -0.2845095992088318, "logits/chosen": 0.9541665315628052, "logits/rejected": 0.9111018180847168, "logps/chosen": -2.1012487411499023, "logps/rejected": -3.5558595657348633, "loss": 0.8466, "nll_loss": 0.8181844353675842, "rewards/accuracies": 0.875, "rewards/chosen": -0.2101248800754547, "rewards/margins": 0.1454610824584961, "rewards/rejected": -0.3555859625339508, "step": 2113 }, { "epoch": 5.7878165639972625, "grad_norm": 3.5303072929382324, "learning_rate": 7.105479452054795e-07, "log_odds_chosen": 2.2365245819091797, "log_odds_ratio": -0.17541706562042236, "logits/chosen": 1.030125379562378, "logits/rejected": 1.1158138513565063, "logps/chosen": -2.1577186584472656, "logps/rejected": -4.2501420974731445, "loss": 0.6667, "nll_loss": 0.6491898894309998, "rewards/accuracies": 1.0, "rewards/chosen": -0.21577188372612, "rewards/margins": 0.20924237370491028, "rewards/rejected": -0.4250142276287079, "step": 2114 }, { "epoch": 5.790554414784395, "grad_norm": 4.3965864181518555, "learning_rate": 7.104109589041095e-07, "log_odds_chosen": 1.125262975692749, "log_odds_ratio": -0.5143493413925171, "logits/chosen": 0.8122787475585938, "logits/rejected": 0.7697412967681885, "logps/chosen": -2.3425660133361816, "logps/rejected": -3.390127658843994, "loss": 0.926, "nll_loss": 0.874597430229187, "rewards/accuracies": 0.75, "rewards/chosen": -0.23425662517547607, "rewards/margins": 0.10475614666938782, "rewards/rejected": -0.3390127420425415, "step": 2115 }, { "epoch": 5.793292265571527, "grad_norm": 4.136533737182617, "learning_rate": 7.102739726027397e-07, "log_odds_chosen": 0.8296753168106079, "log_odds_ratio": -0.46633994579315186, "logits/chosen": 0.821234941482544, "logits/rejected": 0.8431400060653687, "logps/chosen": -2.0574660301208496, "logps/rejected": -2.7627618312835693, "loss": 0.877, "nll_loss": 0.8303354978561401, "rewards/accuracies": 0.75, "rewards/chosen": -0.2057466208934784, "rewards/margins": 0.07052956521511078, "rewards/rejected": -0.276276171207428, "step": 2116 }, { "epoch": 5.796030116358659, "grad_norm": 3.8254988193511963, "learning_rate": 7.101369863013699e-07, "log_odds_chosen": 0.9867262840270996, "log_odds_ratio": -0.4051947295665741, "logits/chosen": 0.7064159512519836, "logits/rejected": 0.6458627581596375, "logps/chosen": -2.0524425506591797, "logps/rejected": -2.972379446029663, "loss": 0.8956, "nll_loss": 0.8551052212715149, "rewards/accuracies": 0.875, "rewards/chosen": -0.2052442729473114, "rewards/margins": 0.09199370443820953, "rewards/rejected": -0.29723796248435974, "step": 2117 }, { "epoch": 5.798767967145791, "grad_norm": 4.227887153625488, "learning_rate": 7.1e-07, "log_odds_chosen": 0.23842176795005798, "log_odds_ratio": -0.9485628008842468, "logits/chosen": 0.6550724506378174, "logits/rejected": 0.7068533897399902, "logps/chosen": -2.844106435775757, "logps/rejected": -3.0695672035217285, "loss": 0.8309, "nll_loss": 0.7360627055168152, "rewards/accuracies": 0.625, "rewards/chosen": -0.28441065549850464, "rewards/margins": 0.022546056658029556, "rewards/rejected": -0.3069567084312439, "step": 2118 }, { "epoch": 5.801505817932923, "grad_norm": 4.252255916595459, "learning_rate": 7.098630136986301e-07, "log_odds_chosen": 1.2002615928649902, "log_odds_ratio": -0.4000277519226074, "logits/chosen": 0.9568578600883484, "logits/rejected": 1.0205036401748657, "logps/chosen": -2.602893114089966, "logps/rejected": -3.7224817276000977, "loss": 0.7868, "nll_loss": 0.7467600703239441, "rewards/accuracies": 0.75, "rewards/chosen": -0.2602893114089966, "rewards/margins": 0.11195886880159378, "rewards/rejected": -0.37224817276000977, "step": 2119 }, { "epoch": 5.804243668720055, "grad_norm": 3.506147623062134, "learning_rate": 7.097260273972603e-07, "log_odds_chosen": 0.7591586112976074, "log_odds_ratio": -0.4360199570655823, "logits/chosen": 0.981998860836029, "logits/rejected": 0.923758864402771, "logps/chosen": -1.9345884323120117, "logps/rejected": -2.6251211166381836, "loss": 0.8668, "nll_loss": 0.8232090473175049, "rewards/accuracies": 0.875, "rewards/chosen": -0.19345882534980774, "rewards/margins": 0.0690532997250557, "rewards/rejected": -0.2625121474266052, "step": 2120 }, { "epoch": 5.806981519507187, "grad_norm": 4.975525856018066, "learning_rate": 7.095890410958904e-07, "log_odds_chosen": 1.2638344764709473, "log_odds_ratio": -0.4283735156059265, "logits/chosen": 0.8228201866149902, "logits/rejected": 0.8180793523788452, "logps/chosen": -3.020277738571167, "logps/rejected": -4.184894561767578, "loss": 0.8826, "nll_loss": 0.8397266864776611, "rewards/accuracies": 0.75, "rewards/chosen": -0.30202776193618774, "rewards/margins": 0.11646167933940887, "rewards/rejected": -0.4184894561767578, "step": 2121 }, { "epoch": 5.809719370294319, "grad_norm": 4.437989234924316, "learning_rate": 7.094520547945205e-07, "log_odds_chosen": 0.8750766515731812, "log_odds_ratio": -0.6003462672233582, "logits/chosen": 1.0207068920135498, "logits/rejected": 1.0960413217544556, "logps/chosen": -3.1864638328552246, "logps/rejected": -4.032361030578613, "loss": 0.8806, "nll_loss": 0.8205420970916748, "rewards/accuracies": 0.625, "rewards/chosen": -0.3186464011669159, "rewards/margins": 0.08458971232175827, "rewards/rejected": -0.4032360911369324, "step": 2122 }, { "epoch": 5.812457221081451, "grad_norm": 6.179322242736816, "learning_rate": 7.093150684931507e-07, "log_odds_chosen": 0.6401320695877075, "log_odds_ratio": -0.6300467848777771, "logits/chosen": 0.8673621416091919, "logits/rejected": 0.7844154834747314, "logps/chosen": -2.529979705810547, "logps/rejected": -3.128502368927002, "loss": 0.9396, "nll_loss": 0.8765726089477539, "rewards/accuracies": 0.625, "rewards/chosen": -0.2529979348182678, "rewards/margins": 0.05985227972269058, "rewards/rejected": -0.3128502368927002, "step": 2123 }, { "epoch": 5.815195071868583, "grad_norm": 4.854532718658447, "learning_rate": 7.091780821917808e-07, "log_odds_chosen": 0.90938401222229, "log_odds_ratio": -0.5960065722465515, "logits/chosen": 0.8750172853469849, "logits/rejected": 0.921904444694519, "logps/chosen": -3.3903822898864746, "logps/rejected": -4.2545366287231445, "loss": 0.9364, "nll_loss": 0.8768126964569092, "rewards/accuracies": 0.75, "rewards/chosen": -0.33903825283050537, "rewards/margins": 0.08641539514064789, "rewards/rejected": -0.4254536032676697, "step": 2124 }, { "epoch": 5.817932922655715, "grad_norm": 5.102901935577393, "learning_rate": 7.09041095890411e-07, "log_odds_chosen": 1.4537402391433716, "log_odds_ratio": -0.34693509340286255, "logits/chosen": 0.8124681115150452, "logits/rejected": 0.7709354162216187, "logps/chosen": -1.6785807609558105, "logps/rejected": -3.0214617252349854, "loss": 0.831, "nll_loss": 0.7963519096374512, "rewards/accuracies": 0.875, "rewards/chosen": -0.1678580790758133, "rewards/margins": 0.13428813219070435, "rewards/rejected": -0.30214619636535645, "step": 2125 }, { "epoch": 5.820670773442847, "grad_norm": 5.864264011383057, "learning_rate": 7.08904109589041e-07, "log_odds_chosen": 1.347316861152649, "log_odds_ratio": -0.3523027300834656, "logits/chosen": 0.9132966995239258, "logits/rejected": 0.9193814396858215, "logps/chosen": -2.156282901763916, "logps/rejected": -3.3883917331695557, "loss": 0.8505, "nll_loss": 0.8152449727058411, "rewards/accuracies": 0.875, "rewards/chosen": -0.21562829613685608, "rewards/margins": 0.1232108622789383, "rewards/rejected": -0.33883917331695557, "step": 2126 }, { "epoch": 5.823408624229979, "grad_norm": 3.8788585662841797, "learning_rate": 7.087671232876712e-07, "log_odds_chosen": 0.9823254942893982, "log_odds_ratio": -0.40450698137283325, "logits/chosen": 0.7892981171607971, "logits/rejected": 0.6779675483703613, "logps/chosen": -1.7649104595184326, "logps/rejected": -2.64884352684021, "loss": 0.8894, "nll_loss": 0.8489041924476624, "rewards/accuracies": 0.875, "rewards/chosen": -0.17649102210998535, "rewards/margins": 0.08839331567287445, "rewards/rejected": -0.264884352684021, "step": 2127 }, { "epoch": 5.8261464750171115, "grad_norm": 4.64033842086792, "learning_rate": 7.086301369863014e-07, "log_odds_chosen": 1.036165714263916, "log_odds_ratio": -0.36197370290756226, "logits/chosen": 0.7282211184501648, "logits/rejected": 0.7055389285087585, "logps/chosen": -2.4626524448394775, "logps/rejected": -3.399120807647705, "loss": 0.8135, "nll_loss": 0.7773122191429138, "rewards/accuracies": 0.875, "rewards/chosen": -0.2462652325630188, "rewards/margins": 0.09364685416221619, "rewards/rejected": -0.339912086725235, "step": 2128 }, { "epoch": 5.8288843258042435, "grad_norm": 3.456947088241577, "learning_rate": 7.084931506849314e-07, "log_odds_chosen": 1.2303892374038696, "log_odds_ratio": -0.3461686670780182, "logits/chosen": 0.836668848991394, "logits/rejected": 0.8096137642860413, "logps/chosen": -2.052555561065674, "logps/rejected": -3.181471347808838, "loss": 0.9131, "nll_loss": 0.8784988522529602, "rewards/accuracies": 0.875, "rewards/chosen": -0.20525556802749634, "rewards/margins": 0.11289156973361969, "rewards/rejected": -0.3181471526622772, "step": 2129 }, { "epoch": 5.831622176591376, "grad_norm": 5.763822555541992, "learning_rate": 7.083561643835616e-07, "log_odds_chosen": 1.1972157955169678, "log_odds_ratio": -0.5188103318214417, "logits/chosen": 0.6654213666915894, "logits/rejected": 0.7182352542877197, "logps/chosen": -2.9940221309661865, "logps/rejected": -4.094435214996338, "loss": 0.8538, "nll_loss": 0.8019405603408813, "rewards/accuracies": 0.75, "rewards/chosen": -0.29940223693847656, "rewards/margins": 0.11004128307104111, "rewards/rejected": -0.40944352746009827, "step": 2130 }, { "epoch": 5.834360027378508, "grad_norm": 3.641693115234375, "learning_rate": 7.082191780821918e-07, "log_odds_chosen": 1.6107096672058105, "log_odds_ratio": -0.3348388373851776, "logits/chosen": 0.683992326259613, "logits/rejected": 0.7017261385917664, "logps/chosen": -2.5171523094177246, "logps/rejected": -4.028249740600586, "loss": 0.9279, "nll_loss": 0.8944132924079895, "rewards/accuracies": 0.75, "rewards/chosen": -0.2517152428627014, "rewards/margins": 0.1511097401380539, "rewards/rejected": -0.4028249979019165, "step": 2131 }, { "epoch": 5.83709787816564, "grad_norm": 5.258431911468506, "learning_rate": 7.080821917808219e-07, "log_odds_chosen": 0.7217035889625549, "log_odds_ratio": -0.9031982421875, "logits/chosen": 0.7067298293113708, "logits/rejected": 0.6694109439849854, "logps/chosen": -2.1534717082977295, "logps/rejected": -2.835430860519409, "loss": 0.9032, "nll_loss": 0.8128776550292969, "rewards/accuracies": 0.625, "rewards/chosen": -0.21534717082977295, "rewards/margins": 0.0681959018111229, "rewards/rejected": -0.28354310989379883, "step": 2132 }, { "epoch": 5.839835728952772, "grad_norm": 4.10853385925293, "learning_rate": 7.07945205479452e-07, "log_odds_chosen": 2.0383384227752686, "log_odds_ratio": -0.27514663338661194, "logits/chosen": 0.8652549982070923, "logits/rejected": 0.8740891218185425, "logps/chosen": -1.869857907295227, "logps/rejected": -3.7893056869506836, "loss": 0.8179, "nll_loss": 0.7904115915298462, "rewards/accuracies": 1.0, "rewards/chosen": -0.1869857907295227, "rewards/margins": 0.19194477796554565, "rewards/rejected": -0.37893059849739075, "step": 2133 }, { "epoch": 5.842573579739904, "grad_norm": 4.960455417633057, "learning_rate": 7.078082191780822e-07, "log_odds_chosen": 1.2203346490859985, "log_odds_ratio": -0.49759674072265625, "logits/chosen": 0.780744194984436, "logits/rejected": 0.7469713091850281, "logps/chosen": -1.9851609468460083, "logps/rejected": -3.1490113735198975, "loss": 0.9182, "nll_loss": 0.868437647819519, "rewards/accuracies": 0.75, "rewards/chosen": -0.1985161006450653, "rewards/margins": 0.11638502776622772, "rewards/rejected": -0.31490111351013184, "step": 2134 }, { "epoch": 5.845311430527036, "grad_norm": 3.8238677978515625, "learning_rate": 7.076712328767123e-07, "log_odds_chosen": 1.139382243156433, "log_odds_ratio": -0.343158483505249, "logits/chosen": 0.6979845762252808, "logits/rejected": 0.705854058265686, "logps/chosen": -1.9714410305023193, "logps/rejected": -3.019077777862549, "loss": 0.833, "nll_loss": 0.7986635565757751, "rewards/accuracies": 1.0, "rewards/chosen": -0.19714409112930298, "rewards/margins": 0.1047637015581131, "rewards/rejected": -0.30190780758857727, "step": 2135 }, { "epoch": 5.848049281314168, "grad_norm": 5.436899662017822, "learning_rate": 7.075342465753424e-07, "log_odds_chosen": 0.1755010187625885, "log_odds_ratio": -0.7408688068389893, "logits/chosen": 0.623774528503418, "logits/rejected": 0.6014225482940674, "logps/chosen": -2.788245677947998, "logps/rejected": -2.9203031063079834, "loss": 0.9154, "nll_loss": 0.8412797451019287, "rewards/accuracies": 0.625, "rewards/chosen": -0.2788245677947998, "rewards/margins": 0.013205735012888908, "rewards/rejected": -0.29203033447265625, "step": 2136 }, { "epoch": 5.850787132101301, "grad_norm": 3.7010226249694824, "learning_rate": 7.073972602739726e-07, "log_odds_chosen": 1.3088583946228027, "log_odds_ratio": -0.38507014513015747, "logits/chosen": 0.5908743143081665, "logits/rejected": 0.5287674069404602, "logps/chosen": -2.022552013397217, "logps/rejected": -3.254230499267578, "loss": 0.873, "nll_loss": 0.8344716429710388, "rewards/accuracies": 0.875, "rewards/chosen": -0.2022552192211151, "rewards/margins": 0.12316782027482986, "rewards/rejected": -0.32542306184768677, "step": 2137 }, { "epoch": 5.853524982888432, "grad_norm": 4.090450286865234, "learning_rate": 7.072602739726027e-07, "log_odds_chosen": 1.3610230684280396, "log_odds_ratio": -0.32111406326293945, "logits/chosen": 0.8036075234413147, "logits/rejected": 0.7318955659866333, "logps/chosen": -2.092698335647583, "logps/rejected": -3.360910177230835, "loss": 0.841, "nll_loss": 0.808903694152832, "rewards/accuracies": 0.875, "rewards/chosen": -0.20926985144615173, "rewards/margins": 0.12682119011878967, "rewards/rejected": -0.3360910415649414, "step": 2138 }, { "epoch": 5.856262833675565, "grad_norm": 3.138594150543213, "learning_rate": 7.071232876712329e-07, "log_odds_chosen": 1.59165358543396, "log_odds_ratio": -0.2547343373298645, "logits/chosen": 0.8576719760894775, "logits/rejected": 0.7915462851524353, "logps/chosen": -2.187560796737671, "logps/rejected": -3.6755480766296387, "loss": 0.8799, "nll_loss": 0.8543832302093506, "rewards/accuracies": 1.0, "rewards/chosen": -0.21875609457492828, "rewards/margins": 0.14879870414733887, "rewards/rejected": -0.36755478382110596, "step": 2139 }, { "epoch": 5.859000684462696, "grad_norm": 4.036118984222412, "learning_rate": 7.06986301369863e-07, "log_odds_chosen": 2.3829827308654785, "log_odds_ratio": -0.3063771426677704, "logits/chosen": 1.0270228385925293, "logits/rejected": 1.0941109657287598, "logps/chosen": -2.558237075805664, "logps/rejected": -4.849774360656738, "loss": 0.9173, "nll_loss": 0.8866693377494812, "rewards/accuracies": 0.75, "rewards/chosen": -0.2558237314224243, "rewards/margins": 0.22915375232696533, "rewards/rejected": -0.48497745394706726, "step": 2140 }, { "epoch": 5.861738535249829, "grad_norm": 4.236740589141846, "learning_rate": 7.068493150684931e-07, "log_odds_chosen": 3.2679948806762695, "log_odds_ratio": -0.20544034242630005, "logits/chosen": 0.8625129461288452, "logits/rejected": 0.8474350571632385, "logps/chosen": -2.0566539764404297, "logps/rejected": -5.1963348388671875, "loss": 0.8179, "nll_loss": 0.7973909378051758, "rewards/accuracies": 0.875, "rewards/chosen": -0.20566539466381073, "rewards/margins": 0.31396812200546265, "rewards/rejected": -0.5196335315704346, "step": 2141 }, { "epoch": 5.864476386036961, "grad_norm": 3.9560372829437256, "learning_rate": 7.067123287671233e-07, "log_odds_chosen": 1.066933274269104, "log_odds_ratio": -0.37420129776000977, "logits/chosen": 0.952467679977417, "logits/rejected": 0.8786327838897705, "logps/chosen": -2.608576774597168, "logps/rejected": -3.6283702850341797, "loss": 0.8989, "nll_loss": 0.8614913821220398, "rewards/accuracies": 1.0, "rewards/chosen": -0.2608577013015747, "rewards/margins": 0.10197935998439789, "rewards/rejected": -0.362837016582489, "step": 2142 }, { "epoch": 5.867214236824093, "grad_norm": 4.049009799957275, "learning_rate": 7.065753424657533e-07, "log_odds_chosen": 0.7265357971191406, "log_odds_ratio": -0.41100406646728516, "logits/chosen": 0.6365147233009338, "logits/rejected": 0.6238104701042175, "logps/chosen": -2.3668041229248047, "logps/rejected": -3.0190417766571045, "loss": 0.8048, "nll_loss": 0.7637413144111633, "rewards/accuracies": 1.0, "rewards/chosen": -0.23668041825294495, "rewards/margins": 0.06522378325462341, "rewards/rejected": -0.30190420150756836, "step": 2143 }, { "epoch": 5.8699520876112254, "grad_norm": 4.0154900550842285, "learning_rate": 7.064383561643835e-07, "log_odds_chosen": 2.4845848083496094, "log_odds_ratio": -0.18909262120723724, "logits/chosen": 0.7768650054931641, "logits/rejected": 0.7881174087524414, "logps/chosen": -2.5953726768493652, "logps/rejected": -4.99982213973999, "loss": 0.9269, "nll_loss": 0.9079532027244568, "rewards/accuracies": 1.0, "rewards/chosen": -0.2595372498035431, "rewards/margins": 0.24044495820999146, "rewards/rejected": -0.49998220801353455, "step": 2144 }, { "epoch": 5.8726899383983575, "grad_norm": 3.8462891578674316, "learning_rate": 7.063013698630137e-07, "log_odds_chosen": 1.31758713722229, "log_odds_ratio": -0.32561904191970825, "logits/chosen": 0.9051246643066406, "logits/rejected": 0.937340497970581, "logps/chosen": -2.4757189750671387, "logps/rejected": -3.7171120643615723, "loss": 0.8423, "nll_loss": 0.8097381591796875, "rewards/accuracies": 0.875, "rewards/chosen": -0.2475719153881073, "rewards/margins": 0.12413930147886276, "rewards/rejected": -0.37171119451522827, "step": 2145 }, { "epoch": 5.87542778918549, "grad_norm": 4.361909866333008, "learning_rate": 7.061643835616438e-07, "log_odds_chosen": 1.9867109060287476, "log_odds_ratio": -0.34129464626312256, "logits/chosen": 0.6433569192886353, "logits/rejected": 0.6597620248794556, "logps/chosen": -2.2126643657684326, "logps/rejected": -4.113950252532959, "loss": 0.8732, "nll_loss": 0.8390579223632812, "rewards/accuracies": 0.875, "rewards/chosen": -0.22126644849777222, "rewards/margins": 0.19012856483459473, "rewards/rejected": -0.41139501333236694, "step": 2146 }, { "epoch": 5.878165639972622, "grad_norm": 3.7494821548461914, "learning_rate": 7.060273972602739e-07, "log_odds_chosen": 1.677567481994629, "log_odds_ratio": -0.2883490025997162, "logits/chosen": 0.8266757130622864, "logits/rejected": 0.770700216293335, "logps/chosen": -1.9761347770690918, "logps/rejected": -3.530081033706665, "loss": 0.9176, "nll_loss": 0.8888134360313416, "rewards/accuracies": 1.0, "rewards/chosen": -0.19761347770690918, "rewards/margins": 0.15539461374282837, "rewards/rejected": -0.35300809144973755, "step": 2147 }, { "epoch": 5.880903490759754, "grad_norm": 3.504476308822632, "learning_rate": 7.058904109589041e-07, "log_odds_chosen": 2.242610454559326, "log_odds_ratio": -0.2550462782382965, "logits/chosen": 0.8084431290626526, "logits/rejected": 0.7893972396850586, "logps/chosen": -2.0842628479003906, "logps/rejected": -4.2380475997924805, "loss": 0.7851, "nll_loss": 0.7595984935760498, "rewards/accuracies": 1.0, "rewards/chosen": -0.20842629671096802, "rewards/margins": 0.21537846326828003, "rewards/rejected": -0.42380475997924805, "step": 2148 }, { "epoch": 5.883641341546886, "grad_norm": 3.8886935710906982, "learning_rate": 7.057534246575342e-07, "log_odds_chosen": 0.7068463563919067, "log_odds_ratio": -0.5166040062904358, "logits/chosen": 0.6206902265548706, "logits/rejected": 0.5815788507461548, "logps/chosen": -2.649380922317505, "logps/rejected": -3.3236820697784424, "loss": 0.914, "nll_loss": 0.8623201847076416, "rewards/accuracies": 0.5, "rewards/chosen": -0.2649381160736084, "rewards/margins": 0.06743009388446808, "rewards/rejected": -0.3323681950569153, "step": 2149 }, { "epoch": 5.886379192334018, "grad_norm": 5.287639141082764, "learning_rate": 7.056164383561643e-07, "log_odds_chosen": 1.9627859592437744, "log_odds_ratio": -0.27751392126083374, "logits/chosen": 0.8763812780380249, "logits/rejected": 0.9154765605926514, "logps/chosen": -2.46984601020813, "logps/rejected": -4.3307318687438965, "loss": 0.8226, "nll_loss": 0.7948721051216125, "rewards/accuracies": 1.0, "rewards/chosen": -0.24698461592197418, "rewards/margins": 0.18608860671520233, "rewards/rejected": -0.4330732226371765, "step": 2150 }, { "epoch": 5.88911704312115, "grad_norm": 3.6483476161956787, "learning_rate": 7.054794520547945e-07, "log_odds_chosen": 1.2221177816390991, "log_odds_ratio": -0.32787248492240906, "logits/chosen": 0.8657245635986328, "logits/rejected": 0.9247181415557861, "logps/chosen": -2.3207201957702637, "logps/rejected": -3.4693875312805176, "loss": 0.8392, "nll_loss": 0.8064467906951904, "rewards/accuracies": 1.0, "rewards/chosen": -0.23207202553749084, "rewards/margins": 0.11486673355102539, "rewards/rejected": -0.34693875908851624, "step": 2151 }, { "epoch": 5.891854893908282, "grad_norm": 5.333739280700684, "learning_rate": 7.053424657534246e-07, "log_odds_chosen": 2.2308335304260254, "log_odds_ratio": -0.18941999971866608, "logits/chosen": 0.8154875040054321, "logits/rejected": 0.8260698318481445, "logps/chosen": -2.328570604324341, "logps/rejected": -4.398556709289551, "loss": 0.7903, "nll_loss": 0.7713393568992615, "rewards/accuracies": 1.0, "rewards/chosen": -0.23285707831382751, "rewards/margins": 0.20699858665466309, "rewards/rejected": -0.4398556649684906, "step": 2152 }, { "epoch": 5.894592744695414, "grad_norm": 5.292388439178467, "learning_rate": 7.052054794520548e-07, "log_odds_chosen": 0.9848654866218567, "log_odds_ratio": -0.5958693623542786, "logits/chosen": 0.8425066471099854, "logits/rejected": 0.8686119914054871, "logps/chosen": -2.195927619934082, "logps/rejected": -3.107994556427002, "loss": 0.8859, "nll_loss": 0.8262976408004761, "rewards/accuracies": 0.75, "rewards/chosen": -0.21959276497364044, "rewards/margins": 0.09120670706033707, "rewards/rejected": -0.3107994794845581, "step": 2153 }, { "epoch": 5.897330595482546, "grad_norm": 4.49422025680542, "learning_rate": 7.050684931506849e-07, "log_odds_chosen": 1.1081740856170654, "log_odds_ratio": -0.4187989830970764, "logits/chosen": 0.7312840223312378, "logits/rejected": 0.7315140962600708, "logps/chosen": -2.642702341079712, "logps/rejected": -3.6852800846099854, "loss": 0.8341, "nll_loss": 0.7922070026397705, "rewards/accuracies": 0.875, "rewards/chosen": -0.26427024602890015, "rewards/margins": 0.10425776988267899, "rewards/rejected": -0.36852800846099854, "step": 2154 }, { "epoch": 5.900068446269678, "grad_norm": 4.050687313079834, "learning_rate": 7.04931506849315e-07, "log_odds_chosen": 2.274463653564453, "log_odds_ratio": -0.19789539277553558, "logits/chosen": 0.9836364984512329, "logits/rejected": 1.0637309551239014, "logps/chosen": -2.6159956455230713, "logps/rejected": -4.799616813659668, "loss": 0.8231, "nll_loss": 0.8033030033111572, "rewards/accuracies": 1.0, "rewards/chosen": -0.2615995705127716, "rewards/margins": 0.21836207807064056, "rewards/rejected": -0.479961633682251, "step": 2155 }, { "epoch": 5.90280629705681, "grad_norm": 4.199305534362793, "learning_rate": 7.047945205479452e-07, "log_odds_chosen": 0.5884543657302856, "log_odds_ratio": -0.5290365219116211, "logits/chosen": 0.8769703507423401, "logits/rejected": 0.8859136700630188, "logps/chosen": -1.7375760078430176, "logps/rejected": -2.229217052459717, "loss": 0.9007, "nll_loss": 0.8477963805198669, "rewards/accuracies": 0.75, "rewards/chosen": -0.1737576127052307, "rewards/margins": 0.04916407912969589, "rewards/rejected": -0.222921684384346, "step": 2156 }, { "epoch": 5.905544147843942, "grad_norm": 8.453788757324219, "learning_rate": 7.046575342465752e-07, "log_odds_chosen": 1.368654727935791, "log_odds_ratio": -0.44574564695358276, "logits/chosen": 0.866890549659729, "logits/rejected": 0.887962818145752, "logps/chosen": -2.571899890899658, "logps/rejected": -3.8353898525238037, "loss": 0.8031, "nll_loss": 0.7585170865058899, "rewards/accuracies": 0.875, "rewards/chosen": -0.2571900188922882, "rewards/margins": 0.12634901702404022, "rewards/rejected": -0.38353902101516724, "step": 2157 }, { "epoch": 5.908281998631074, "grad_norm": 3.7431986331939697, "learning_rate": 7.045205479452054e-07, "log_odds_chosen": 1.0701647996902466, "log_odds_ratio": -0.3521668016910553, "logits/chosen": 0.7726057767868042, "logits/rejected": 0.7284937500953674, "logps/chosen": -2.1818134784698486, "logps/rejected": -3.1489577293395996, "loss": 0.8915, "nll_loss": 0.8562434911727905, "rewards/accuracies": 1.0, "rewards/chosen": -0.21818134188652039, "rewards/margins": 0.09671442210674286, "rewards/rejected": -0.31489574909210205, "step": 2158 }, { "epoch": 5.9110198494182065, "grad_norm": 4.867283821105957, "learning_rate": 7.043835616438356e-07, "log_odds_chosen": 1.4327417612075806, "log_odds_ratio": -0.3584572672843933, "logits/chosen": 0.9443507790565491, "logits/rejected": 0.9862583875656128, "logps/chosen": -2.698087453842163, "logps/rejected": -4.057658672332764, "loss": 0.7719, "nll_loss": 0.7360178232192993, "rewards/accuracies": 0.875, "rewards/chosen": -0.2698087692260742, "rewards/margins": 0.13595712184906006, "rewards/rejected": -0.4057658910751343, "step": 2159 }, { "epoch": 5.9137577002053385, "grad_norm": 4.054384708404541, "learning_rate": 7.042465753424656e-07, "log_odds_chosen": 2.4545607566833496, "log_odds_ratio": -0.2569684386253357, "logits/chosen": 0.9551814794540405, "logits/rejected": 0.9752001166343689, "logps/chosen": -2.8438892364501953, "logps/rejected": -5.247191429138184, "loss": 0.7532, "nll_loss": 0.7275131940841675, "rewards/accuracies": 0.875, "rewards/chosen": -0.2843889594078064, "rewards/margins": 0.24033023416996002, "rewards/rejected": -0.5247191786766052, "step": 2160 }, { "epoch": 5.916495550992471, "grad_norm": 4.685768127441406, "learning_rate": 7.041095890410958e-07, "log_odds_chosen": 0.9924125671386719, "log_odds_ratio": -0.4831980764865875, "logits/chosen": 0.9213226437568665, "logits/rejected": 0.9628375768661499, "logps/chosen": -2.7212181091308594, "logps/rejected": -3.6672260761260986, "loss": 0.8532, "nll_loss": 0.8048471212387085, "rewards/accuracies": 0.875, "rewards/chosen": -0.2721218168735504, "rewards/margins": 0.09460080415010452, "rewards/rejected": -0.36672264337539673, "step": 2161 }, { "epoch": 5.919233401779603, "grad_norm": 4.153775691986084, "learning_rate": 7.03972602739726e-07, "log_odds_chosen": 1.369207501411438, "log_odds_ratio": -0.3602776527404785, "logits/chosen": 0.8393121957778931, "logits/rejected": 0.8602303266525269, "logps/chosen": -2.257784605026245, "logps/rejected": -3.5386481285095215, "loss": 0.9857, "nll_loss": 0.9497122764587402, "rewards/accuracies": 0.875, "rewards/chosen": -0.2257784605026245, "rewards/margins": 0.1280863732099533, "rewards/rejected": -0.3538648188114166, "step": 2162 }, { "epoch": 5.921971252566735, "grad_norm": 3.5163044929504395, "learning_rate": 7.038356164383561e-07, "log_odds_chosen": 1.3511638641357422, "log_odds_ratio": -0.3298564553260803, "logits/chosen": 0.7405992746353149, "logits/rejected": 0.6535323262214661, "logps/chosen": -1.520025610923767, "logps/rejected": -2.7201156616210938, "loss": 0.858, "nll_loss": 0.8250199556350708, "rewards/accuracies": 1.0, "rewards/chosen": -0.15200257301330566, "rewards/margins": 0.12000902742147446, "rewards/rejected": -0.27201157808303833, "step": 2163 }, { "epoch": 5.924709103353868, "grad_norm": 4.045773983001709, "learning_rate": 7.036986301369862e-07, "log_odds_chosen": 0.8611623048782349, "log_odds_ratio": -0.5346970558166504, "logits/chosen": 0.905745267868042, "logits/rejected": 0.9632011651992798, "logps/chosen": -2.8520865440368652, "logps/rejected": -3.6799683570861816, "loss": 1.0287, "nll_loss": 0.9752503633499146, "rewards/accuracies": 0.5, "rewards/chosen": -0.28520864248275757, "rewards/margins": 0.08278817683458328, "rewards/rejected": -0.36799681186676025, "step": 2164 }, { "epoch": 5.927446954140999, "grad_norm": 3.743007183074951, "learning_rate": 7.035616438356164e-07, "log_odds_chosen": 2.5223352909088135, "log_odds_ratio": -0.3047550320625305, "logits/chosen": 0.8841913938522339, "logits/rejected": 0.8814834356307983, "logps/chosen": -2.722090244293213, "logps/rejected": -5.17271089553833, "loss": 0.8518, "nll_loss": 0.8213179707527161, "rewards/accuracies": 0.875, "rewards/chosen": -0.2722090482711792, "rewards/margins": 0.24506203830242157, "rewards/rejected": -0.517271101474762, "step": 2165 }, { "epoch": 5.930184804928132, "grad_norm": 6.050541877746582, "learning_rate": 7.034246575342465e-07, "log_odds_chosen": 0.4239799976348877, "log_odds_ratio": -0.6925392150878906, "logits/chosen": 0.8008230924606323, "logits/rejected": 0.8098176121711731, "logps/chosen": -2.4768831729888916, "logps/rejected": -2.910562515258789, "loss": 0.9455, "nll_loss": 0.8762896656990051, "rewards/accuracies": 0.625, "rewards/chosen": -0.24768830835819244, "rewards/margins": 0.04336794465780258, "rewards/rejected": -0.2910562753677368, "step": 2166 }, { "epoch": 5.932922655715264, "grad_norm": 3.590691328048706, "learning_rate": 7.032876712328767e-07, "log_odds_chosen": 1.1576874256134033, "log_odds_ratio": -0.4471570551395416, "logits/chosen": 0.8634593486785889, "logits/rejected": 0.8750396966934204, "logps/chosen": -1.822245717048645, "logps/rejected": -2.8892478942871094, "loss": 0.7794, "nll_loss": 0.7347174882888794, "rewards/accuracies": 0.75, "rewards/chosen": -0.1822245717048645, "rewards/margins": 0.10670022666454315, "rewards/rejected": -0.28892481327056885, "step": 2167 }, { "epoch": 5.935660506502396, "grad_norm": 3.8819141387939453, "learning_rate": 7.031506849315068e-07, "log_odds_chosen": 0.6717524528503418, "log_odds_ratio": -0.5016266107559204, "logits/chosen": 0.7505804300308228, "logits/rejected": 0.6758142709732056, "logps/chosen": -2.4944522380828857, "logps/rejected": -3.133389949798584, "loss": 0.9382, "nll_loss": 0.8880769610404968, "rewards/accuracies": 0.625, "rewards/chosen": -0.24944521486759186, "rewards/margins": 0.06389380246400833, "rewards/rejected": -0.3133390247821808, "step": 2168 }, { "epoch": 5.938398357289528, "grad_norm": 3.725263833999634, "learning_rate": 7.030136986301369e-07, "log_odds_chosen": 2.2987120151519775, "log_odds_ratio": -0.1776914894580841, "logits/chosen": 0.9439894556999207, "logits/rejected": 0.8830813765525818, "logps/chosen": -2.8135533332824707, "logps/rejected": -4.999802112579346, "loss": 0.8876, "nll_loss": 0.8697860836982727, "rewards/accuracies": 1.0, "rewards/chosen": -0.2813553214073181, "rewards/margins": 0.21862488985061646, "rewards/rejected": -0.49998021125793457, "step": 2169 }, { "epoch": 5.94113620807666, "grad_norm": 5.534485340118408, "learning_rate": 7.028767123287671e-07, "log_odds_chosen": 1.1951884031295776, "log_odds_ratio": -0.7957066893577576, "logits/chosen": 0.8004990816116333, "logits/rejected": 0.8113968372344971, "logps/chosen": -3.1799895763397217, "logps/rejected": -4.347267150878906, "loss": 0.9627, "nll_loss": 0.8831679821014404, "rewards/accuracies": 0.5, "rewards/chosen": -0.3179989457130432, "rewards/margins": 0.1167277991771698, "rewards/rejected": -0.4347267746925354, "step": 2170 }, { "epoch": 5.943874058863792, "grad_norm": 3.705101728439331, "learning_rate": 7.027397260273972e-07, "log_odds_chosen": 1.300815224647522, "log_odds_ratio": -0.3480064868927002, "logits/chosen": 0.7587565779685974, "logits/rejected": 0.7329273819923401, "logps/chosen": -2.32420015335083, "logps/rejected": -3.5546457767486572, "loss": 0.9125, "nll_loss": 0.8777422308921814, "rewards/accuracies": 0.875, "rewards/chosen": -0.23242001235485077, "rewards/margins": 0.12304458022117615, "rewards/rejected": -0.3554645776748657, "step": 2171 }, { "epoch": 5.946611909650924, "grad_norm": 5.047102451324463, "learning_rate": 7.026027397260273e-07, "log_odds_chosen": 3.1075220108032227, "log_odds_ratio": -0.18715259432792664, "logits/chosen": 0.8416871428489685, "logits/rejected": 0.8315105438232422, "logps/chosen": -2.7846012115478516, "logps/rejected": -5.814130783081055, "loss": 0.9591, "nll_loss": 0.9403883218765259, "rewards/accuracies": 1.0, "rewards/chosen": -0.27846014499664307, "rewards/margins": 0.30295294523239136, "rewards/rejected": -0.5814131498336792, "step": 2172 }, { "epoch": 5.949349760438056, "grad_norm": 5.253146171569824, "learning_rate": 7.024657534246575e-07, "log_odds_chosen": 1.3996682167053223, "log_odds_ratio": -0.2549843192100525, "logits/chosen": 0.7150242328643799, "logits/rejected": 0.5943571329116821, "logps/chosen": -2.437542676925659, "logps/rejected": -3.7578489780426025, "loss": 0.9017, "nll_loss": 0.8761830925941467, "rewards/accuracies": 1.0, "rewards/chosen": -0.24375426769256592, "rewards/margins": 0.13203062117099762, "rewards/rejected": -0.37578490376472473, "step": 2173 }, { "epoch": 5.952087611225188, "grad_norm": 4.5827484130859375, "learning_rate": 7.023287671232875e-07, "log_odds_chosen": 1.1556682586669922, "log_odds_ratio": -0.46027129888534546, "logits/chosen": 0.8926956653594971, "logits/rejected": 0.8895901441574097, "logps/chosen": -2.951747417449951, "logps/rejected": -4.046265602111816, "loss": 0.9334, "nll_loss": 0.8873743414878845, "rewards/accuracies": 0.875, "rewards/chosen": -0.295174777507782, "rewards/margins": 0.10945183038711548, "rewards/rejected": -0.40462660789489746, "step": 2174 }, { "epoch": 5.95482546201232, "grad_norm": 4.494378566741943, "learning_rate": 7.021917808219177e-07, "log_odds_chosen": 0.9724186658859253, "log_odds_ratio": -0.5446802377700806, "logits/chosen": 0.8942375183105469, "logits/rejected": 0.9221733212471008, "logps/chosen": -2.5497257709503174, "logps/rejected": -3.4390275478363037, "loss": 0.8993, "nll_loss": 0.8448533415794373, "rewards/accuracies": 0.625, "rewards/chosen": -0.25497257709503174, "rewards/margins": 0.08893019706010818, "rewards/rejected": -0.3439027965068817, "step": 2175 }, { "epoch": 5.9575633127994525, "grad_norm": 3.1743695735931396, "learning_rate": 7.02054794520548e-07, "log_odds_chosen": 1.3159162998199463, "log_odds_ratio": -0.2682245373725891, "logits/chosen": 0.6568461656570435, "logits/rejected": 0.6407263278961182, "logps/chosen": -1.9203137159347534, "logps/rejected": -3.1133830547332764, "loss": 0.8225, "nll_loss": 0.7957216501235962, "rewards/accuracies": 1.0, "rewards/chosen": -0.1920313835144043, "rewards/margins": 0.11930694431066513, "rewards/rejected": -0.31133833527565, "step": 2176 }, { "epoch": 5.960301163586585, "grad_norm": 5.157439231872559, "learning_rate": 7.01917808219178e-07, "log_odds_chosen": 0.5606966614723206, "log_odds_ratio": -0.6257138252258301, "logits/chosen": 0.9887705445289612, "logits/rejected": 1.0090715885162354, "logps/chosen": -2.5692672729492188, "logps/rejected": -3.071998119354248, "loss": 0.8698, "nll_loss": 0.8072128295898438, "rewards/accuracies": 0.625, "rewards/chosen": -0.2569267153739929, "rewards/margins": 0.0502731017768383, "rewards/rejected": -0.3071998059749603, "step": 2177 }, { "epoch": 5.963039014373717, "grad_norm": 4.190625190734863, "learning_rate": 7.017808219178082e-07, "log_odds_chosen": 2.195848226547241, "log_odds_ratio": -0.18417419493198395, "logits/chosen": 0.9149462580680847, "logits/rejected": 0.9105138182640076, "logps/chosen": -2.452585458755493, "logps/rejected": -4.565380096435547, "loss": 0.8285, "nll_loss": 0.8101134300231934, "rewards/accuracies": 0.875, "rewards/chosen": -0.24525853991508484, "rewards/margins": 0.2112794816493988, "rewards/rejected": -0.45653802156448364, "step": 2178 }, { "epoch": 5.965776865160849, "grad_norm": 3.7904629707336426, "learning_rate": 7.016438356164384e-07, "log_odds_chosen": 1.6054911613464355, "log_odds_ratio": -0.2777712047100067, "logits/chosen": 0.8200309872627258, "logits/rejected": 0.7971721887588501, "logps/chosen": -1.762515664100647, "logps/rejected": -3.185929775238037, "loss": 0.8197, "nll_loss": 0.791961669921875, "rewards/accuracies": 1.0, "rewards/chosen": -0.1762515753507614, "rewards/margins": 0.14234140515327454, "rewards/rejected": -0.31859296560287476, "step": 2179 }, { "epoch": 5.968514715947981, "grad_norm": 3.628904104232788, "learning_rate": 7.015068493150685e-07, "log_odds_chosen": 0.8510100245475769, "log_odds_ratio": -0.4177186191082001, "logits/chosen": 0.5701404809951782, "logits/rejected": 0.4910549223423004, "logps/chosen": -2.5191760063171387, "logps/rejected": -3.2852680683135986, "loss": 0.9207, "nll_loss": 0.8789253234863281, "rewards/accuracies": 0.875, "rewards/chosen": -0.25191760063171387, "rewards/margins": 0.07660921663045883, "rewards/rejected": -0.3285267949104309, "step": 2180 }, { "epoch": 5.971252566735113, "grad_norm": 3.75876784324646, "learning_rate": 7.013698630136986e-07, "log_odds_chosen": 2.1614556312561035, "log_odds_ratio": -0.36528971791267395, "logits/chosen": 0.6157066226005554, "logits/rejected": 0.6093772053718567, "logps/chosen": -1.7825453281402588, "logps/rejected": -3.827910900115967, "loss": 0.887, "nll_loss": 0.8504700064659119, "rewards/accuracies": 0.75, "rewards/chosen": -0.17825452983379364, "rewards/margins": 0.204536572098732, "rewards/rejected": -0.38279110193252563, "step": 2181 }, { "epoch": 5.973990417522245, "grad_norm": 3.5558955669403076, "learning_rate": 7.012328767123288e-07, "log_odds_chosen": 2.470198631286621, "log_odds_ratio": -0.32244181632995605, "logits/chosen": 0.692755401134491, "logits/rejected": 0.6963194012641907, "logps/chosen": -2.0160751342773438, "logps/rejected": -4.3527607917785645, "loss": 0.9237, "nll_loss": 0.8914102911949158, "rewards/accuracies": 0.875, "rewards/chosen": -0.20160752534866333, "rewards/margins": 0.23366858065128326, "rewards/rejected": -0.4352760910987854, "step": 2182 }, { "epoch": 5.976728268309377, "grad_norm": 3.9490668773651123, "learning_rate": 7.010958904109589e-07, "log_odds_chosen": 0.7468468546867371, "log_odds_ratio": -0.7491418123245239, "logits/chosen": 0.5692197680473328, "logits/rejected": 0.5498538017272949, "logps/chosen": -2.493553400039673, "logps/rejected": -3.1754536628723145, "loss": 0.8982, "nll_loss": 0.8232460021972656, "rewards/accuracies": 0.625, "rewards/chosen": -0.24935534596443176, "rewards/margins": 0.06819004565477371, "rewards/rejected": -0.3175453841686249, "step": 2183 }, { "epoch": 5.979466119096509, "grad_norm": 3.602635383605957, "learning_rate": 7.009589041095891e-07, "log_odds_chosen": 1.193864345550537, "log_odds_ratio": -0.39379987120628357, "logits/chosen": 0.7676990628242493, "logits/rejected": 0.7888389825820923, "logps/chosen": -1.9472647905349731, "logps/rejected": -3.0441272258758545, "loss": 0.8998, "nll_loss": 0.8604140281677246, "rewards/accuracies": 0.875, "rewards/chosen": -0.19472646713256836, "rewards/margins": 0.10968625545501709, "rewards/rejected": -0.30441272258758545, "step": 2184 }, { "epoch": 5.982203969883641, "grad_norm": 4.464263439178467, "learning_rate": 7.008219178082192e-07, "log_odds_chosen": 0.7852689027786255, "log_odds_ratio": -0.4370907247066498, "logits/chosen": 0.9292738437652588, "logits/rejected": 0.8931275606155396, "logps/chosen": -2.8791513442993164, "logps/rejected": -3.59083890914917, "loss": 0.8559, "nll_loss": 0.812204897403717, "rewards/accuracies": 0.875, "rewards/chosen": -0.2879151403903961, "rewards/margins": 0.07116878032684326, "rewards/rejected": -0.35908395051956177, "step": 2185 }, { "epoch": 5.984941820670773, "grad_norm": 4.110443115234375, "learning_rate": 7.006849315068493e-07, "log_odds_chosen": 1.6251513957977295, "log_odds_ratio": -0.5131877064704895, "logits/chosen": 0.9253056049346924, "logits/rejected": 0.857939600944519, "logps/chosen": -2.3294873237609863, "logps/rejected": -3.819434881210327, "loss": 0.906, "nll_loss": 0.8546846508979797, "rewards/accuracies": 0.75, "rewards/chosen": -0.23294875025749207, "rewards/margins": 0.14899474382400513, "rewards/rejected": -0.3819435238838196, "step": 2186 }, { "epoch": 5.987679671457905, "grad_norm": 3.860295057296753, "learning_rate": 7.005479452054795e-07, "log_odds_chosen": 1.151902675628662, "log_odds_ratio": -0.42151719331741333, "logits/chosen": 0.8808894753456116, "logits/rejected": 0.8923156261444092, "logps/chosen": -2.4111385345458984, "logps/rejected": -3.5028891563415527, "loss": 0.7849, "nll_loss": 0.7427381873130798, "rewards/accuracies": 0.875, "rewards/chosen": -0.24111387133598328, "rewards/margins": 0.10917502641677856, "rewards/rejected": -0.35028889775276184, "step": 2187 }, { "epoch": 5.990417522245037, "grad_norm": 3.192870616912842, "learning_rate": 7.004109589041095e-07, "log_odds_chosen": 1.3849159479141235, "log_odds_ratio": -0.4011452794075012, "logits/chosen": 0.83454829454422, "logits/rejected": 0.8401579856872559, "logps/chosen": -2.154238700866699, "logps/rejected": -3.4236459732055664, "loss": 0.91, "nll_loss": 0.8699263334274292, "rewards/accuracies": 0.875, "rewards/chosen": -0.2154238522052765, "rewards/margins": 0.1269407421350479, "rewards/rejected": -0.3423646092414856, "step": 2188 }, { "epoch": 5.993155373032169, "grad_norm": 6.586102485656738, "learning_rate": 7.002739726027397e-07, "log_odds_chosen": 1.0909433364868164, "log_odds_ratio": -0.6382740139961243, "logits/chosen": 1.060704231262207, "logits/rejected": 1.1195824146270752, "logps/chosen": -3.766007423400879, "logps/rejected": -4.826547622680664, "loss": 0.8947, "nll_loss": 0.8308961987495422, "rewards/accuracies": 0.75, "rewards/chosen": -0.3766007423400879, "rewards/margins": 0.10605399310588837, "rewards/rejected": -0.48265475034713745, "step": 2189 }, { "epoch": 5.9958932238193015, "grad_norm": 4.380436897277832, "learning_rate": 7.001369863013699e-07, "log_odds_chosen": 1.7555466890335083, "log_odds_ratio": -0.26350632309913635, "logits/chosen": 0.7647128701210022, "logits/rejected": 0.6809556484222412, "logps/chosen": -2.328641176223755, "logps/rejected": -4.009673595428467, "loss": 0.9567, "nll_loss": 0.9303632974624634, "rewards/accuracies": 1.0, "rewards/chosen": -0.2328641265630722, "rewards/margins": 0.16810323297977448, "rewards/rejected": -0.4009673595428467, "step": 2190 }, { "epoch": 5.998631074606434, "grad_norm": 3.98188853263855, "learning_rate": 7e-07, "log_odds_chosen": 0.7237322330474854, "log_odds_ratio": -0.6282773613929749, "logits/chosen": 0.7633990049362183, "logits/rejected": 0.7541327476501465, "logps/chosen": -2.2180118560791016, "logps/rejected": -2.886484146118164, "loss": 0.9292, "nll_loss": 0.8663233518600464, "rewards/accuracies": 0.75, "rewards/chosen": -0.22180119156837463, "rewards/margins": 0.06684722751379013, "rewards/rejected": -0.28864842653274536, "step": 2191 }, { "epoch": 6.0013689253935665, "grad_norm": 4.942773818969727, "learning_rate": 6.998630136986301e-07, "log_odds_chosen": 1.591994285583496, "log_odds_ratio": -0.31714680790901184, "logits/chosen": 0.6987268924713135, "logits/rejected": 0.628285825252533, "logps/chosen": -2.3420298099517822, "logps/rejected": -3.8249847888946533, "loss": 0.9563, "nll_loss": 0.9245927929878235, "rewards/accuracies": 0.875, "rewards/chosen": -0.23420298099517822, "rewards/margins": 0.14829547703266144, "rewards/rejected": -0.38249847292900085, "step": 2192 }, { "epoch": 6.0041067761806985, "grad_norm": 3.3267624378204346, "learning_rate": 6.997260273972603e-07, "log_odds_chosen": 2.0550761222839355, "log_odds_ratio": -0.30426591634750366, "logits/chosen": 0.7031588554382324, "logits/rejected": 0.6031116843223572, "logps/chosen": -2.103497266769409, "logps/rejected": -4.060649871826172, "loss": 0.8822, "nll_loss": 0.8517297506332397, "rewards/accuracies": 0.875, "rewards/chosen": -0.21034973859786987, "rewards/margins": 0.19571523368358612, "rewards/rejected": -0.4060649871826172, "step": 2193 }, { "epoch": 6.006844626967831, "grad_norm": 4.807339668273926, "learning_rate": 6.995890410958904e-07, "log_odds_chosen": 0.8661651611328125, "log_odds_ratio": -0.4541415274143219, "logits/chosen": 0.802284836769104, "logits/rejected": 0.8161437511444092, "logps/chosen": -2.727278709411621, "logps/rejected": -3.5243027210235596, "loss": 0.8118, "nll_loss": 0.7664322257041931, "rewards/accuracies": 0.875, "rewards/chosen": -0.2727278769016266, "rewards/margins": 0.07970239222049713, "rewards/rejected": -0.3524302840232849, "step": 2194 }, { "epoch": 6.009582477754963, "grad_norm": 3.340224504470825, "learning_rate": 6.994520547945205e-07, "log_odds_chosen": 1.3042151927947998, "log_odds_ratio": -0.3083506226539612, "logits/chosen": 0.7307048439979553, "logits/rejected": 0.7565277814865112, "logps/chosen": -3.4293622970581055, "logps/rejected": -4.665971279144287, "loss": 0.8697, "nll_loss": 0.8388725519180298, "rewards/accuracies": 0.875, "rewards/chosen": -0.3429362177848816, "rewards/margins": 0.12366091459989548, "rewards/rejected": -0.46659713983535767, "step": 2195 }, { "epoch": 6.012320328542095, "grad_norm": 4.347811698913574, "learning_rate": 6.993150684931507e-07, "log_odds_chosen": 1.8957233428955078, "log_odds_ratio": -0.2533010244369507, "logits/chosen": 0.9623420834541321, "logits/rejected": 0.9688525795936584, "logps/chosen": -2.148472785949707, "logps/rejected": -3.928281307220459, "loss": 0.8255, "nll_loss": 0.8001834750175476, "rewards/accuracies": 0.875, "rewards/chosen": -0.21484726667404175, "rewards/margins": 0.17798085510730743, "rewards/rejected": -0.3928281366825104, "step": 2196 }, { "epoch": 6.015058179329227, "grad_norm": 3.8945212364196777, "learning_rate": 6.991780821917808e-07, "log_odds_chosen": 0.8019136190414429, "log_odds_ratio": -0.42948096990585327, "logits/chosen": 0.8190593123435974, "logits/rejected": 0.8133777379989624, "logps/chosen": -2.3345863819122314, "logps/rejected": -3.0435683727264404, "loss": 0.7803, "nll_loss": 0.7373585104942322, "rewards/accuracies": 0.75, "rewards/chosen": -0.23345865309238434, "rewards/margins": 0.07089820504188538, "rewards/rejected": -0.3043568432331085, "step": 2197 }, { "epoch": 6.017796030116359, "grad_norm": 4.054002285003662, "learning_rate": 6.99041095890411e-07, "log_odds_chosen": 1.8600289821624756, "log_odds_ratio": -0.2230079472064972, "logits/chosen": 1.0210806131362915, "logits/rejected": 1.0318260192871094, "logps/chosen": -2.6939942836761475, "logps/rejected": -4.493048191070557, "loss": 0.8817, "nll_loss": 0.8593899011611938, "rewards/accuracies": 1.0, "rewards/chosen": -0.2693994641304016, "rewards/margins": 0.17990536987781525, "rewards/rejected": -0.44930481910705566, "step": 2198 }, { "epoch": 6.020533880903491, "grad_norm": 3.5197136402130127, "learning_rate": 6.989041095890411e-07, "log_odds_chosen": 1.4670593738555908, "log_odds_ratio": -0.2843138575553894, "logits/chosen": 0.6628340482711792, "logits/rejected": 0.6272181868553162, "logps/chosen": -2.1112565994262695, "logps/rejected": -3.4706714153289795, "loss": 0.8919, "nll_loss": 0.8634962439537048, "rewards/accuracies": 1.0, "rewards/chosen": -0.21112565696239471, "rewards/margins": 0.13594147562980652, "rewards/rejected": -0.3470671474933624, "step": 2199 }, { "epoch": 6.023271731690623, "grad_norm": 3.901402473449707, "learning_rate": 6.987671232876712e-07, "log_odds_chosen": 1.0440077781677246, "log_odds_ratio": -0.3898162841796875, "logits/chosen": 0.6303147673606873, "logits/rejected": 0.550811767578125, "logps/chosen": -2.4119319915771484, "logps/rejected": -3.384701728820801, "loss": 0.9102, "nll_loss": 0.8712612390518188, "rewards/accuracies": 0.875, "rewards/chosen": -0.24119320511817932, "rewards/margins": 0.09727700799703598, "rewards/rejected": -0.3384702205657959, "step": 2200 }, { "epoch": 6.026009582477755, "grad_norm": 5.6563544273376465, "learning_rate": 6.986301369863014e-07, "log_odds_chosen": 1.5127285718917847, "log_odds_ratio": -0.27922189235687256, "logits/chosen": 0.8481492400169373, "logits/rejected": 0.8423892855644226, "logps/chosen": -2.209122657775879, "logps/rejected": -3.647905111312866, "loss": 0.7248, "nll_loss": 0.6969160437583923, "rewards/accuracies": 1.0, "rewards/chosen": -0.22091227769851685, "rewards/margins": 0.14387823641300201, "rewards/rejected": -0.36479049921035767, "step": 2201 }, { "epoch": 6.028747433264887, "grad_norm": 4.346587657928467, "learning_rate": 6.984931506849314e-07, "log_odds_chosen": 1.0674463510513306, "log_odds_ratio": -0.4744540750980377, "logits/chosen": 0.8552390336990356, "logits/rejected": 0.8750839233398438, "logps/chosen": -2.743603229522705, "logps/rejected": -3.779447555541992, "loss": 0.9484, "nll_loss": 0.9009950757026672, "rewards/accuracies": 0.5, "rewards/chosen": -0.2743602991104126, "rewards/margins": 0.10358445346355438, "rewards/rejected": -0.3779447674751282, "step": 2202 }, { "epoch": 6.031485284052019, "grad_norm": 4.0026774406433105, "learning_rate": 6.983561643835616e-07, "log_odds_chosen": 1.0919419527053833, "log_odds_ratio": -0.4288879930973053, "logits/chosen": 0.5332029461860657, "logits/rejected": 0.46187299489974976, "logps/chosen": -1.9780329465866089, "logps/rejected": -2.969693660736084, "loss": 0.877, "nll_loss": 0.8340897560119629, "rewards/accuracies": 0.875, "rewards/chosen": -0.1978032886981964, "rewards/margins": 0.09916609525680542, "rewards/rejected": -0.2969694137573242, "step": 2203 }, { "epoch": 6.034223134839151, "grad_norm": 3.6363518238067627, "learning_rate": 6.982191780821918e-07, "log_odds_chosen": 1.1784727573394775, "log_odds_ratio": -0.3997099697589874, "logits/chosen": 0.8626974821090698, "logits/rejected": 0.9356781244277954, "logps/chosen": -2.480776071548462, "logps/rejected": -3.587782859802246, "loss": 0.9513, "nll_loss": 0.9113763570785522, "rewards/accuracies": 0.875, "rewards/chosen": -0.2480776011943817, "rewards/margins": 0.11070066690444946, "rewards/rejected": -0.35877829790115356, "step": 2204 }, { "epoch": 6.036960985626283, "grad_norm": 3.90042781829834, "learning_rate": 6.980821917808219e-07, "log_odds_chosen": 1.528184413909912, "log_odds_ratio": -0.3742939233779907, "logits/chosen": 0.9388541579246521, "logits/rejected": 0.9683172106742859, "logps/chosen": -2.226470470428467, "logps/rejected": -3.701842784881592, "loss": 0.7966, "nll_loss": 0.7591925263404846, "rewards/accuracies": 0.75, "rewards/chosen": -0.2226470559835434, "rewards/margins": 0.1475372016429901, "rewards/rejected": -0.3701842725276947, "step": 2205 }, { "epoch": 6.039698836413415, "grad_norm": 3.9160690307617188, "learning_rate": 6.97945205479452e-07, "log_odds_chosen": 2.2624282836914062, "log_odds_ratio": -0.3815331757068634, "logits/chosen": 0.7463081479072571, "logits/rejected": 0.7843008041381836, "logps/chosen": -2.189419746398926, "logps/rejected": -4.361520767211914, "loss": 0.7956, "nll_loss": 0.7574610710144043, "rewards/accuracies": 0.75, "rewards/chosen": -0.21894197165966034, "rewards/margins": 0.21721014380455017, "rewards/rejected": -0.4361521005630493, "step": 2206 }, { "epoch": 6.0424366872005475, "grad_norm": 3.9517486095428467, "learning_rate": 6.978082191780822e-07, "log_odds_chosen": 2.0424587726593018, "log_odds_ratio": -0.2447318136692047, "logits/chosen": 0.9132263660430908, "logits/rejected": 0.8666365146636963, "logps/chosen": -2.4509081840515137, "logps/rejected": -4.4084882736206055, "loss": 0.9223, "nll_loss": 0.8977935910224915, "rewards/accuracies": 1.0, "rewards/chosen": -0.24509084224700928, "rewards/margins": 0.19575801491737366, "rewards/rejected": -0.4408488869667053, "step": 2207 }, { "epoch": 6.04517453798768, "grad_norm": 4.165241718292236, "learning_rate": 6.976712328767123e-07, "log_odds_chosen": 1.1723933219909668, "log_odds_ratio": -0.32114171981811523, "logits/chosen": 0.5964880585670471, "logits/rejected": 0.5197285413742065, "logps/chosen": -1.696029543876648, "logps/rejected": -2.734867572784424, "loss": 0.9501, "nll_loss": 0.9180265665054321, "rewards/accuracies": 1.0, "rewards/chosen": -0.16960296034812927, "rewards/margins": 0.1038837879896164, "rewards/rejected": -0.2734867334365845, "step": 2208 }, { "epoch": 6.047912388774812, "grad_norm": 3.559882879257202, "learning_rate": 6.975342465753424e-07, "log_odds_chosen": 1.8124725818634033, "log_odds_ratio": -0.24230405688285828, "logits/chosen": 0.8019590377807617, "logits/rejected": 0.7900314927101135, "logps/chosen": -1.6525006294250488, "logps/rejected": -3.281008720397949, "loss": 0.9478, "nll_loss": 0.9235579967498779, "rewards/accuracies": 1.0, "rewards/chosen": -0.16525007784366608, "rewards/margins": 0.16285081207752228, "rewards/rejected": -0.32810088992118835, "step": 2209 }, { "epoch": 6.050650239561944, "grad_norm": 4.207676887512207, "learning_rate": 6.973972602739726e-07, "log_odds_chosen": 0.49435797333717346, "log_odds_ratio": -0.5384213328361511, "logits/chosen": 0.729111909866333, "logits/rejected": 0.657314658164978, "logps/chosen": -1.8332717418670654, "logps/rejected": -2.27592396736145, "loss": 0.9081, "nll_loss": 0.8543013334274292, "rewards/accuracies": 0.625, "rewards/chosen": -0.18332718312740326, "rewards/margins": 0.04426521807909012, "rewards/rejected": -0.22759239375591278, "step": 2210 }, { "epoch": 6.053388090349076, "grad_norm": 3.68062162399292, "learning_rate": 6.972602739726027e-07, "log_odds_chosen": 0.7182802557945251, "log_odds_ratio": -0.43132275342941284, "logits/chosen": 1.0070453882217407, "logits/rejected": 1.018693208694458, "logps/chosen": -1.9871222972869873, "logps/rejected": -2.6099438667297363, "loss": 0.8067, "nll_loss": 0.7635944485664368, "rewards/accuracies": 0.875, "rewards/chosen": -0.19871222972869873, "rewards/margins": 0.06228215619921684, "rewards/rejected": -0.26099440455436707, "step": 2211 }, { "epoch": 6.056125941136208, "grad_norm": 3.1937742233276367, "learning_rate": 6.971232876712329e-07, "log_odds_chosen": 2.149845838546753, "log_odds_ratio": -0.27553674578666687, "logits/chosen": 1.0419496297836304, "logits/rejected": 1.0680793523788452, "logps/chosen": -2.2524776458740234, "logps/rejected": -4.306427955627441, "loss": 0.7711, "nll_loss": 0.7435207366943359, "rewards/accuracies": 1.0, "rewards/chosen": -0.22524774074554443, "rewards/margins": 0.20539507269859314, "rewards/rejected": -0.43064284324645996, "step": 2212 }, { "epoch": 6.05886379192334, "grad_norm": 3.5369465351104736, "learning_rate": 6.96986301369863e-07, "log_odds_chosen": 1.87109375, "log_odds_ratio": -0.3616827726364136, "logits/chosen": 0.9730281829833984, "logits/rejected": 0.956216037273407, "logps/chosen": -1.712800145149231, "logps/rejected": -3.458442211151123, "loss": 0.792, "nll_loss": 0.7558159828186035, "rewards/accuracies": 0.875, "rewards/chosen": -0.17128002643585205, "rewards/margins": 0.1745642125606537, "rewards/rejected": -0.34584423899650574, "step": 2213 }, { "epoch": 6.061601642710472, "grad_norm": 3.921088933944702, "learning_rate": 6.968493150684931e-07, "log_odds_chosen": 1.2653553485870361, "log_odds_ratio": -0.3422233462333679, "logits/chosen": 0.8382712602615356, "logits/rejected": 0.842821478843689, "logps/chosen": -2.217651605606079, "logps/rejected": -3.394341230392456, "loss": 0.8725, "nll_loss": 0.8382539749145508, "rewards/accuracies": 0.875, "rewards/chosen": -0.2217651605606079, "rewards/margins": 0.1176689863204956, "rewards/rejected": -0.3394341468811035, "step": 2214 }, { "epoch": 6.064339493497604, "grad_norm": 4.735326290130615, "learning_rate": 6.967123287671233e-07, "log_odds_chosen": 0.8925946354866028, "log_odds_ratio": -0.44991040229797363, "logits/chosen": 0.9042633175849915, "logits/rejected": 0.9252387285232544, "logps/chosen": -2.7055320739746094, "logps/rejected": -3.529696464538574, "loss": 0.8662, "nll_loss": 0.8211740851402283, "rewards/accuracies": 0.75, "rewards/chosen": -0.27055323123931885, "rewards/margins": 0.08241641521453857, "rewards/rejected": -0.3529696464538574, "step": 2215 }, { "epoch": 6.067077344284736, "grad_norm": 3.6275665760040283, "learning_rate": 6.965753424657534e-07, "log_odds_chosen": 1.6180143356323242, "log_odds_ratio": -0.4029625654220581, "logits/chosen": 0.8873319029808044, "logits/rejected": 0.9168996214866638, "logps/chosen": -2.637457847595215, "logps/rejected": -4.181840896606445, "loss": 0.8667, "nll_loss": 0.8264422416687012, "rewards/accuracies": 0.875, "rewards/chosen": -0.26374581456184387, "rewards/margins": 0.15443828701972961, "rewards/rejected": -0.4181841015815735, "step": 2216 }, { "epoch": 6.069815195071868, "grad_norm": 3.565642833709717, "learning_rate": 6.964383561643835e-07, "log_odds_chosen": 2.220553159713745, "log_odds_ratio": -0.18632420897483826, "logits/chosen": 0.9175235033035278, "logits/rejected": 0.9052629470825195, "logps/chosen": -2.430820941925049, "logps/rejected": -4.531700134277344, "loss": 0.8092, "nll_loss": 0.7905983924865723, "rewards/accuracies": 1.0, "rewards/chosen": -0.24308209121227264, "rewards/margins": 0.21008792519569397, "rewards/rejected": -0.4531700015068054, "step": 2217 }, { "epoch": 6.072553045859001, "grad_norm": 5.893045902252197, "learning_rate": 6.963013698630137e-07, "log_odds_chosen": 0.970234751701355, "log_odds_ratio": -0.6191427111625671, "logits/chosen": 0.7192901372909546, "logits/rejected": 0.6307905912399292, "logps/chosen": -2.334317684173584, "logps/rejected": -3.224010705947876, "loss": 0.9775, "nll_loss": 0.9155939817428589, "rewards/accuracies": 0.875, "rewards/chosen": -0.23343177139759064, "rewards/margins": 0.08896930515766144, "rewards/rejected": -0.3224010765552521, "step": 2218 }, { "epoch": 6.075290896646133, "grad_norm": 4.0161542892456055, "learning_rate": 6.961643835616438e-07, "log_odds_chosen": 1.8865612745285034, "log_odds_ratio": -0.231629878282547, "logits/chosen": 0.9476323127746582, "logits/rejected": 1.0220353603363037, "logps/chosen": -2.5184497833251953, "logps/rejected": -4.314554214477539, "loss": 0.8201, "nll_loss": 0.7969238758087158, "rewards/accuracies": 1.0, "rewards/chosen": -0.25184500217437744, "rewards/margins": 0.179610475897789, "rewards/rejected": -0.43145546317100525, "step": 2219 }, { "epoch": 6.078028747433265, "grad_norm": 4.191153049468994, "learning_rate": 6.960273972602739e-07, "log_odds_chosen": 0.6581813097000122, "log_odds_ratio": -0.43006545305252075, "logits/chosen": 0.7256811857223511, "logits/rejected": 0.7914149165153503, "logps/chosen": -2.7571873664855957, "logps/rejected": -3.385960817337036, "loss": 0.9043, "nll_loss": 0.8613409996032715, "rewards/accuracies": 0.875, "rewards/chosen": -0.2757187485694885, "rewards/margins": 0.06287732720375061, "rewards/rejected": -0.33859607577323914, "step": 2220 }, { "epoch": 6.080766598220397, "grad_norm": 3.6904819011688232, "learning_rate": 6.958904109589041e-07, "log_odds_chosen": 1.6508229970932007, "log_odds_ratio": -0.23072150349617004, "logits/chosen": 0.8380321860313416, "logits/rejected": 0.8228611946105957, "logps/chosen": -1.844332218170166, "logps/rejected": -3.34078311920166, "loss": 0.7711, "nll_loss": 0.7479944229125977, "rewards/accuracies": 1.0, "rewards/chosen": -0.1844332218170166, "rewards/margins": 0.1496451050043106, "rewards/rejected": -0.334078311920166, "step": 2221 }, { "epoch": 6.083504449007529, "grad_norm": 4.2335638999938965, "learning_rate": 6.957534246575342e-07, "log_odds_chosen": 1.831403374671936, "log_odds_ratio": -0.38440972566604614, "logits/chosen": 1.008151888847351, "logits/rejected": 1.0167087316513062, "logps/chosen": -2.9535555839538574, "logps/rejected": -4.718329429626465, "loss": 0.8361, "nll_loss": 0.7976388335227966, "rewards/accuracies": 0.875, "rewards/chosen": -0.29535558819770813, "rewards/margins": 0.1764773726463318, "rewards/rejected": -0.4718329906463623, "step": 2222 }, { "epoch": 6.0862422997946615, "grad_norm": 4.1234235763549805, "learning_rate": 6.956164383561643e-07, "log_odds_chosen": 1.5364470481872559, "log_odds_ratio": -0.3601308763027191, "logits/chosen": 0.8523972630500793, "logits/rejected": 0.8493335247039795, "logps/chosen": -2.7931995391845703, "logps/rejected": -4.272722244262695, "loss": 0.7704, "nll_loss": 0.734372615814209, "rewards/accuracies": 0.875, "rewards/chosen": -0.27931997179985046, "rewards/margins": 0.14795225858688354, "rewards/rejected": -0.427272230386734, "step": 2223 }, { "epoch": 6.0889801505817935, "grad_norm": 4.251835823059082, "learning_rate": 6.954794520547945e-07, "log_odds_chosen": 1.2057446241378784, "log_odds_ratio": -0.41441577672958374, "logits/chosen": 0.6157814860343933, "logits/rejected": 0.6165411472320557, "logps/chosen": -2.0750420093536377, "logps/rejected": -2.956353187561035, "loss": 0.8559, "nll_loss": 0.8144427537918091, "rewards/accuracies": 0.875, "rewards/chosen": -0.20750419795513153, "rewards/margins": 0.08813110738992691, "rewards/rejected": -0.29563528299331665, "step": 2224 }, { "epoch": 6.091718001368926, "grad_norm": 3.6510515213012695, "learning_rate": 6.953424657534246e-07, "log_odds_chosen": 2.957170009613037, "log_odds_ratio": -0.12392482161521912, "logits/chosen": 0.8805155754089355, "logits/rejected": 0.8719736337661743, "logps/chosen": -2.4079277515411377, "logps/rejected": -5.265986442565918, "loss": 0.8226, "nll_loss": 0.8101802468299866, "rewards/accuracies": 1.0, "rewards/chosen": -0.24079279601573944, "rewards/margins": 0.2858058214187622, "rewards/rejected": -0.5265986323356628, "step": 2225 }, { "epoch": 6.094455852156058, "grad_norm": 3.535487413406372, "learning_rate": 6.952054794520548e-07, "log_odds_chosen": 2.1799440383911133, "log_odds_ratio": -0.3887295722961426, "logits/chosen": 1.0411133766174316, "logits/rejected": 1.093018651008606, "logps/chosen": -1.8882769346237183, "logps/rejected": -3.965766429901123, "loss": 0.7614, "nll_loss": 0.7224949598312378, "rewards/accuracies": 0.75, "rewards/chosen": -0.18882769346237183, "rewards/margins": 0.2077489197254181, "rewards/rejected": -0.3965766429901123, "step": 2226 }, { "epoch": 6.09719370294319, "grad_norm": 4.495272159576416, "learning_rate": 6.950684931506849e-07, "log_odds_chosen": 1.5252641439437866, "log_odds_ratio": -0.33776673674583435, "logits/chosen": 1.0231813192367554, "logits/rejected": 1.0269449949264526, "logps/chosen": -3.3782835006713867, "logps/rejected": -4.851099967956543, "loss": 0.8814, "nll_loss": 0.8476149439811707, "rewards/accuracies": 0.75, "rewards/chosen": -0.3378283381462097, "rewards/margins": 0.147281676530838, "rewards/rejected": -0.48511001467704773, "step": 2227 }, { "epoch": 6.099931553730322, "grad_norm": 3.3582825660705566, "learning_rate": 6.94931506849315e-07, "log_odds_chosen": 2.182666301727295, "log_odds_ratio": -0.3383302092552185, "logits/chosen": 0.8455628156661987, "logits/rejected": 0.8667168021202087, "logps/chosen": -1.9898957014083862, "logps/rejected": -4.042769908905029, "loss": 0.8301, "nll_loss": 0.7962688207626343, "rewards/accuracies": 0.75, "rewards/chosen": -0.19898955523967743, "rewards/margins": 0.2052873820066452, "rewards/rejected": -0.404276967048645, "step": 2228 }, { "epoch": 6.102669404517454, "grad_norm": 3.558149814605713, "learning_rate": 6.947945205479452e-07, "log_odds_chosen": 1.7185741662979126, "log_odds_ratio": -0.2750890254974365, "logits/chosen": 0.8846777081489563, "logits/rejected": 0.8806802034378052, "logps/chosen": -2.586672067642212, "logps/rejected": -4.246023178100586, "loss": 0.8703, "nll_loss": 0.8428277373313904, "rewards/accuracies": 0.875, "rewards/chosen": -0.2586672008037567, "rewards/margins": 0.16593514382839203, "rewards/rejected": -0.42460232973098755, "step": 2229 }, { "epoch": 6.105407255304586, "grad_norm": 3.5166990756988525, "learning_rate": 6.946575342465753e-07, "log_odds_chosen": 1.4728262424468994, "log_odds_ratio": -0.3112897574901581, "logits/chosen": 0.855313777923584, "logits/rejected": 0.868283748626709, "logps/chosen": -1.9512594938278198, "logps/rejected": -3.295121669769287, "loss": 0.7539, "nll_loss": 0.7227252721786499, "rewards/accuracies": 1.0, "rewards/chosen": -0.19512595236301422, "rewards/margins": 0.13438619673252106, "rewards/rejected": -0.32951217889785767, "step": 2230 }, { "epoch": 6.108145106091718, "grad_norm": 3.885378122329712, "learning_rate": 6.945205479452054e-07, "log_odds_chosen": 2.5973286628723145, "log_odds_ratio": -0.31295865774154663, "logits/chosen": 0.7908821702003479, "logits/rejected": 0.804204523563385, "logps/chosen": -2.779006004333496, "logps/rejected": -5.342448711395264, "loss": 0.9956, "nll_loss": 0.9643443822860718, "rewards/accuracies": 0.75, "rewards/chosen": -0.2779005765914917, "rewards/margins": 0.2563442587852478, "rewards/rejected": -0.5342448949813843, "step": 2231 }, { "epoch": 6.11088295687885, "grad_norm": 5.1331071853637695, "learning_rate": 6.943835616438356e-07, "log_odds_chosen": 2.1861460208892822, "log_odds_ratio": -0.24007365107536316, "logits/chosen": 0.9765841960906982, "logits/rejected": 1.0455960035324097, "logps/chosen": -3.0039703845977783, "logps/rejected": -5.145321846008301, "loss": 0.8522, "nll_loss": 0.82818603515625, "rewards/accuracies": 1.0, "rewards/chosen": -0.3003970682621002, "rewards/margins": 0.21413515508174896, "rewards/rejected": -0.514532208442688, "step": 2232 }, { "epoch": 6.113620807665982, "grad_norm": 3.470418691635132, "learning_rate": 6.942465753424657e-07, "log_odds_chosen": 1.6691875457763672, "log_odds_ratio": -0.26537421345710754, "logits/chosen": 0.8388881087303162, "logits/rejected": 0.8005049228668213, "logps/chosen": -2.077014923095703, "logps/rejected": -3.5950794219970703, "loss": 0.7923, "nll_loss": 0.7657985687255859, "rewards/accuracies": 1.0, "rewards/chosen": -0.20770150423049927, "rewards/margins": 0.15180647373199463, "rewards/rejected": -0.3595079481601715, "step": 2233 }, { "epoch": 6.116358658453114, "grad_norm": 5.660645008087158, "learning_rate": 6.941095890410958e-07, "log_odds_chosen": 1.749151349067688, "log_odds_ratio": -0.3457472324371338, "logits/chosen": 0.9466105699539185, "logits/rejected": 0.9703821539878845, "logps/chosen": -2.7716450691223145, "logps/rejected": -4.3675537109375, "loss": 0.8935, "nll_loss": 0.8589276075363159, "rewards/accuracies": 0.75, "rewards/chosen": -0.2771645188331604, "rewards/margins": 0.15959085524082184, "rewards/rejected": -0.43675538897514343, "step": 2234 }, { "epoch": 6.119096509240246, "grad_norm": 3.635572910308838, "learning_rate": 6.93972602739726e-07, "log_odds_chosen": 1.5343222618103027, "log_odds_ratio": -0.3798495829105377, "logits/chosen": 0.9890363216400146, "logits/rejected": 0.9398321509361267, "logps/chosen": -2.092299461364746, "logps/rejected": -3.4916610717773438, "loss": 0.8294, "nll_loss": 0.7914591431617737, "rewards/accuracies": 0.75, "rewards/chosen": -0.2092299461364746, "rewards/margins": 0.1399361491203308, "rewards/rejected": -0.3491660952568054, "step": 2235 }, { "epoch": 6.121834360027378, "grad_norm": 3.6216161251068115, "learning_rate": 6.938356164383561e-07, "log_odds_chosen": 1.3725143671035767, "log_odds_ratio": -0.3170032203197479, "logits/chosen": 0.7531049251556396, "logits/rejected": 0.7517454028129578, "logps/chosen": -2.0224878787994385, "logps/rejected": -3.302865982055664, "loss": 0.8638, "nll_loss": 0.8320510387420654, "rewards/accuracies": 1.0, "rewards/chosen": -0.20224878191947937, "rewards/margins": 0.12803784012794495, "rewards/rejected": -0.3302866220474243, "step": 2236 }, { "epoch": 6.12457221081451, "grad_norm": 4.40273380279541, "learning_rate": 6.936986301369862e-07, "log_odds_chosen": 0.5856674313545227, "log_odds_ratio": -0.4823339581489563, "logits/chosen": 0.773237407207489, "logits/rejected": 0.7439693808555603, "logps/chosen": -2.349195718765259, "logps/rejected": -2.8690288066864014, "loss": 0.8398, "nll_loss": 0.7915594577789307, "rewards/accuracies": 0.75, "rewards/chosen": -0.23491957783699036, "rewards/margins": 0.0519832968711853, "rewards/rejected": -0.28690287470817566, "step": 2237 }, { "epoch": 6.1273100616016425, "grad_norm": 3.7755472660064697, "learning_rate": 6.935616438356164e-07, "log_odds_chosen": 1.094207763671875, "log_odds_ratio": -0.3965117931365967, "logits/chosen": 0.7723934650421143, "logits/rejected": 0.7597511410713196, "logps/chosen": -1.6871273517608643, "logps/rejected": -2.6245734691619873, "loss": 0.8905, "nll_loss": 0.8508040904998779, "rewards/accuracies": 0.75, "rewards/chosen": -0.16871273517608643, "rewards/margins": 0.09374460577964783, "rewards/rejected": -0.26245737075805664, "step": 2238 }, { "epoch": 6.1300479123887746, "grad_norm": 4.658433437347412, "learning_rate": 6.934246575342465e-07, "log_odds_chosen": 1.2494558095932007, "log_odds_ratio": -0.39740923047065735, "logits/chosen": 0.6891438961029053, "logits/rejected": 0.6655138731002808, "logps/chosen": -2.6347031593322754, "logps/rejected": -3.808305025100708, "loss": 0.8626, "nll_loss": 0.822882354259491, "rewards/accuracies": 0.75, "rewards/chosen": -0.263470321893692, "rewards/margins": 0.1173601895570755, "rewards/rejected": -0.3808305263519287, "step": 2239 }, { "epoch": 6.132785763175907, "grad_norm": 4.345860958099365, "learning_rate": 6.932876712328767e-07, "log_odds_chosen": 2.328258991241455, "log_odds_ratio": -0.28893792629241943, "logits/chosen": 1.05315363407135, "logits/rejected": 0.9991961717605591, "logps/chosen": -1.8638919591903687, "logps/rejected": -4.087165832519531, "loss": 0.901, "nll_loss": 0.872067391872406, "rewards/accuracies": 1.0, "rewards/chosen": -0.18638920783996582, "rewards/margins": 0.22232741117477417, "rewards/rejected": -0.40871661901474, "step": 2240 }, { "epoch": 6.135523613963039, "grad_norm": 3.879045009613037, "learning_rate": 6.931506849315068e-07, "log_odds_chosen": 1.2029750347137451, "log_odds_ratio": -0.33277401328086853, "logits/chosen": 0.6848188042640686, "logits/rejected": 0.667678952217102, "logps/chosen": -2.048628330230713, "logps/rejected": -3.167994737625122, "loss": 0.8518, "nll_loss": 0.8185640573501587, "rewards/accuracies": 1.0, "rewards/chosen": -0.20486286282539368, "rewards/margins": 0.11193662881851196, "rewards/rejected": -0.31679949164390564, "step": 2241 }, { "epoch": 6.138261464750171, "grad_norm": 3.9253005981445312, "learning_rate": 6.930136986301369e-07, "log_odds_chosen": 0.767585813999176, "log_odds_ratio": -0.635474443435669, "logits/chosen": 0.9339286088943481, "logits/rejected": 0.9007396697998047, "logps/chosen": -2.5228216648101807, "logps/rejected": -3.2820658683776855, "loss": 0.9246, "nll_loss": 0.8610259294509888, "rewards/accuracies": 0.75, "rewards/chosen": -0.25228214263916016, "rewards/margins": 0.07592441141605377, "rewards/rejected": -0.3282065689563751, "step": 2242 }, { "epoch": 6.140999315537303, "grad_norm": 4.403207778930664, "learning_rate": 6.928767123287671e-07, "log_odds_chosen": 1.078765630722046, "log_odds_ratio": -0.4452454447746277, "logits/chosen": 0.7067675590515137, "logits/rejected": 0.7022699117660522, "logps/chosen": -2.1828560829162598, "logps/rejected": -3.154700517654419, "loss": 0.9131, "nll_loss": 0.8686185479164124, "rewards/accuracies": 0.875, "rewards/chosen": -0.21828560531139374, "rewards/margins": 0.0971844345331192, "rewards/rejected": -0.31547003984451294, "step": 2243 }, { "epoch": 6.143737166324435, "grad_norm": 4.249197483062744, "learning_rate": 6.927397260273972e-07, "log_odds_chosen": 2.1479601860046387, "log_odds_ratio": -0.18710926175117493, "logits/chosen": 0.8727211356163025, "logits/rejected": 0.8899319171905518, "logps/chosen": -1.960884690284729, "logps/rejected": -3.9733452796936035, "loss": 0.7733, "nll_loss": 0.7545863389968872, "rewards/accuracies": 1.0, "rewards/chosen": -0.19608847796916962, "rewards/margins": 0.20124608278274536, "rewards/rejected": -0.39733457565307617, "step": 2244 }, { "epoch": 6.146475017111568, "grad_norm": 6.819919109344482, "learning_rate": 6.926027397260273e-07, "log_odds_chosen": 0.01899966597557068, "log_odds_ratio": -0.9442055821418762, "logits/chosen": 0.744138777256012, "logits/rejected": 0.7101832032203674, "logps/chosen": -2.4836158752441406, "logps/rejected": -2.454495906829834, "loss": 0.8989, "nll_loss": 0.8045153617858887, "rewards/accuracies": 0.625, "rewards/chosen": -0.24836160242557526, "rewards/margins": -0.0029120035469532013, "rewards/rejected": -0.24544960260391235, "step": 2245 }, { "epoch": 6.1492128678987, "grad_norm": 5.055720806121826, "learning_rate": 6.924657534246575e-07, "log_odds_chosen": 1.2360928058624268, "log_odds_ratio": -0.4112167954444885, "logits/chosen": 0.8355937004089355, "logits/rejected": 0.8520665168762207, "logps/chosen": -2.501293659210205, "logps/rejected": -3.6131627559661865, "loss": 0.7875, "nll_loss": 0.7463779449462891, "rewards/accuracies": 0.75, "rewards/chosen": -0.250129371881485, "rewards/margins": 0.1111869141459465, "rewards/rejected": -0.3613162934780121, "step": 2246 }, { "epoch": 6.151950718685832, "grad_norm": 5.0625104904174805, "learning_rate": 6.923287671232877e-07, "log_odds_chosen": 1.1736160516738892, "log_odds_ratio": -0.4344128370285034, "logits/chosen": 0.698862612247467, "logits/rejected": 0.7371752262115479, "logps/chosen": -2.688347816467285, "logps/rejected": -3.810377597808838, "loss": 1.0731, "nll_loss": 1.0296669006347656, "rewards/accuracies": 0.75, "rewards/chosen": -0.26883479952812195, "rewards/margins": 0.1122029721736908, "rewards/rejected": -0.38103777170181274, "step": 2247 }, { "epoch": 6.154688569472964, "grad_norm": 3.201336145401001, "learning_rate": 6.921917808219177e-07, "log_odds_chosen": 1.1861565113067627, "log_odds_ratio": -0.4109053611755371, "logits/chosen": 0.8312878608703613, "logits/rejected": 0.7776949405670166, "logps/chosen": -2.01633620262146, "logps/rejected": -3.0986521244049072, "loss": 0.8115, "nll_loss": 0.7703900337219238, "rewards/accuracies": 0.75, "rewards/chosen": -0.20163361728191376, "rewards/margins": 0.10823158919811249, "rewards/rejected": -0.30986523628234863, "step": 2248 }, { "epoch": 6.157426420260096, "grad_norm": 4.753237247467041, "learning_rate": 6.920547945205479e-07, "log_odds_chosen": 1.7115652561187744, "log_odds_ratio": -0.3618714213371277, "logits/chosen": 1.0653773546218872, "logits/rejected": 1.115443468093872, "logps/chosen": -3.062234401702881, "logps/rejected": -4.69925594329834, "loss": 0.7866, "nll_loss": 0.7504372596740723, "rewards/accuracies": 0.75, "rewards/chosen": -0.30622345209121704, "rewards/margins": 0.16370216012001038, "rewards/rejected": -0.4699256420135498, "step": 2249 }, { "epoch": 6.160164271047228, "grad_norm": 3.2724781036376953, "learning_rate": 6.91917808219178e-07, "log_odds_chosen": 1.473006010055542, "log_odds_ratio": -0.4080713093280792, "logits/chosen": 0.6042370796203613, "logits/rejected": 0.6953856945037842, "logps/chosen": -1.8859260082244873, "logps/rejected": -3.2754967212677, "loss": 0.856, "nll_loss": 0.8151443004608154, "rewards/accuracies": 0.75, "rewards/chosen": -0.1885925978422165, "rewards/margins": 0.13895708322525024, "rewards/rejected": -0.32754969596862793, "step": 2250 }, { "epoch": 6.16290212183436, "grad_norm": 3.5822818279266357, "learning_rate": 6.917808219178081e-07, "log_odds_chosen": 2.118466854095459, "log_odds_ratio": -0.22463534772396088, "logits/chosen": 1.1524043083190918, "logits/rejected": 1.2078986167907715, "logps/chosen": -2.1980433464050293, "logps/rejected": -4.236055850982666, "loss": 0.7735, "nll_loss": 0.7509987354278564, "rewards/accuracies": 0.875, "rewards/chosen": -0.21980434656143188, "rewards/margins": 0.2038012146949768, "rewards/rejected": -0.4236055612564087, "step": 2251 }, { "epoch": 6.165639972621492, "grad_norm": 3.8475286960601807, "learning_rate": 6.916438356164383e-07, "log_odds_chosen": 1.2979940176010132, "log_odds_ratio": -0.3364922106266022, "logits/chosen": 0.8798621296882629, "logits/rejected": 0.8466659188270569, "logps/chosen": -2.169499397277832, "logps/rejected": -3.3730149269104004, "loss": 0.8297, "nll_loss": 0.7960241436958313, "rewards/accuracies": 1.0, "rewards/chosen": -0.2169499546289444, "rewards/margins": 0.12035153806209564, "rewards/rejected": -0.33730149269104004, "step": 2252 }, { "epoch": 6.168377823408624, "grad_norm": 4.611560344696045, "learning_rate": 6.915068493150684e-07, "log_odds_chosen": 1.7634211778640747, "log_odds_ratio": -0.3238692879676819, "logits/chosen": 1.1094692945480347, "logits/rejected": 1.1888227462768555, "logps/chosen": -3.386113166809082, "logps/rejected": -5.096380710601807, "loss": 0.8386, "nll_loss": 0.8062523007392883, "rewards/accuracies": 0.875, "rewards/chosen": -0.33861133456230164, "rewards/margins": 0.17102673649787903, "rewards/rejected": -0.5096380710601807, "step": 2253 }, { "epoch": 6.1711156741957565, "grad_norm": 3.8814258575439453, "learning_rate": 6.913698630136985e-07, "log_odds_chosen": 2.1486101150512695, "log_odds_ratio": -0.1370927095413208, "logits/chosen": 0.8115788698196411, "logits/rejected": 0.8159303665161133, "logps/chosen": -1.9919885396957397, "logps/rejected": -3.9869942665100098, "loss": 0.8696, "nll_loss": 0.8559256792068481, "rewards/accuracies": 1.0, "rewards/chosen": -0.1991988718509674, "rewards/margins": 0.19950059056282043, "rewards/rejected": -0.39869943261146545, "step": 2254 }, { "epoch": 6.1738535249828885, "grad_norm": 5.963898658752441, "learning_rate": 6.912328767123287e-07, "log_odds_chosen": 0.4342765808105469, "log_odds_ratio": -0.7369987368583679, "logits/chosen": 1.0086053609848022, "logits/rejected": 0.9782348871231079, "logps/chosen": -3.0503334999084473, "logps/rejected": -3.453409433364868, "loss": 0.848, "nll_loss": 0.7742792367935181, "rewards/accuracies": 0.625, "rewards/chosen": -0.3050333261489868, "rewards/margins": 0.04030763357877731, "rewards/rejected": -0.3453409671783447, "step": 2255 }, { "epoch": 6.176591375770021, "grad_norm": 3.576930522918701, "learning_rate": 6.910958904109588e-07, "log_odds_chosen": 1.3373117446899414, "log_odds_ratio": -0.38280317187309265, "logits/chosen": 1.060584545135498, "logits/rejected": 1.1139720678329468, "logps/chosen": -2.3383445739746094, "logps/rejected": -3.593907356262207, "loss": 0.8456, "nll_loss": 0.8073270916938782, "rewards/accuracies": 0.875, "rewards/chosen": -0.23383447527885437, "rewards/margins": 0.12555629014968872, "rewards/rejected": -0.3593907356262207, "step": 2256 }, { "epoch": 6.179329226557153, "grad_norm": 3.8904614448547363, "learning_rate": 6.90958904109589e-07, "log_odds_chosen": 0.9423050880432129, "log_odds_ratio": -0.5068337917327881, "logits/chosen": 1.0351039171218872, "logits/rejected": 1.0499910116195679, "logps/chosen": -2.5520341396331787, "logps/rejected": -3.4268031120300293, "loss": 0.8798, "nll_loss": 0.8291455507278442, "rewards/accuracies": 0.75, "rewards/chosen": -0.2552034258842468, "rewards/margins": 0.08747688680887222, "rewards/rejected": -0.34268030524253845, "step": 2257 }, { "epoch": 6.182067077344285, "grad_norm": 4.89937686920166, "learning_rate": 6.908219178082192e-07, "log_odds_chosen": 1.7145521640777588, "log_odds_ratio": -0.36224156618118286, "logits/chosen": 1.008174180984497, "logits/rejected": 1.0563498735427856, "logps/chosen": -2.6781210899353027, "logps/rejected": -4.303836345672607, "loss": 0.7806, "nll_loss": 0.7443627119064331, "rewards/accuracies": 0.875, "rewards/chosen": -0.2678121030330658, "rewards/margins": 0.16257153451442719, "rewards/rejected": -0.4303836524486542, "step": 2258 }, { "epoch": 6.184804928131417, "grad_norm": 4.997470378875732, "learning_rate": 6.906849315068493e-07, "log_odds_chosen": 0.9732687473297119, "log_odds_ratio": -0.45401620864868164, "logits/chosen": 0.8293923735618591, "logits/rejected": 0.8501470685005188, "logps/chosen": -2.3792247772216797, "logps/rejected": -3.25876522064209, "loss": 0.8036, "nll_loss": 0.7582184076309204, "rewards/accuracies": 0.75, "rewards/chosen": -0.23792250454425812, "rewards/margins": 0.08795402199029922, "rewards/rejected": -0.32587653398513794, "step": 2259 }, { "epoch": 6.187542778918549, "grad_norm": 3.3370964527130127, "learning_rate": 6.905479452054795e-07, "log_odds_chosen": 2.0413875579833984, "log_odds_ratio": -0.20864132046699524, "logits/chosen": 0.8770616054534912, "logits/rejected": 0.8711543083190918, "logps/chosen": -2.794238567352295, "logps/rejected": -4.7771100997924805, "loss": 0.7819, "nll_loss": 0.760993242263794, "rewards/accuracies": 0.875, "rewards/chosen": -0.27942389249801636, "rewards/margins": 0.19828712940216064, "rewards/rejected": -0.477711021900177, "step": 2260 }, { "epoch": 6.190280629705681, "grad_norm": 4.869958877563477, "learning_rate": 6.904109589041097e-07, "log_odds_chosen": 2.5662355422973633, "log_odds_ratio": -0.26509398221969604, "logits/chosen": 1.131049633026123, "logits/rejected": 1.1751903295516968, "logps/chosen": -2.695808172225952, "logps/rejected": -5.198890686035156, "loss": 0.8416, "nll_loss": 0.8150681257247925, "rewards/accuracies": 0.875, "rewards/chosen": -0.26958081126213074, "rewards/margins": 0.25030821561813354, "rewards/rejected": -0.5198890566825867, "step": 2261 }, { "epoch": 6.193018480492813, "grad_norm": 3.41536545753479, "learning_rate": 6.902739726027397e-07, "log_odds_chosen": 1.4545875787734985, "log_odds_ratio": -0.33309364318847656, "logits/chosen": 1.0307480096817017, "logits/rejected": 1.0489797592163086, "logps/chosen": -2.239039659500122, "logps/rejected": -3.582587242126465, "loss": 0.74, "nll_loss": 0.7067123651504517, "rewards/accuracies": 0.875, "rewards/chosen": -0.22390396893024445, "rewards/margins": 0.13435474038124084, "rewards/rejected": -0.3582587242126465, "step": 2262 }, { "epoch": 6.195756331279945, "grad_norm": 4.37467622756958, "learning_rate": 6.901369863013699e-07, "log_odds_chosen": 0.8483939170837402, "log_odds_ratio": -0.44589346647262573, "logits/chosen": 0.56412672996521, "logits/rejected": 0.5874243974685669, "logps/chosen": -2.543267011642456, "logps/rejected": -3.3220791816711426, "loss": 0.8187, "nll_loss": 0.7741308212280273, "rewards/accuracies": 0.75, "rewards/chosen": -0.2543267011642456, "rewards/margins": 0.07788124680519104, "rewards/rejected": -0.33220791816711426, "step": 2263 }, { "epoch": 6.198494182067077, "grad_norm": 5.501218795776367, "learning_rate": 6.9e-07, "log_odds_chosen": 1.3309457302093506, "log_odds_ratio": -0.5533305406570435, "logits/chosen": 0.8748159408569336, "logits/rejected": 0.9011881351470947, "logps/chosen": -2.664907455444336, "logps/rejected": -3.9328532218933105, "loss": 0.8783, "nll_loss": 0.8230164647102356, "rewards/accuracies": 0.625, "rewards/chosen": -0.26649075746536255, "rewards/margins": 0.12679459154605865, "rewards/rejected": -0.39328533411026, "step": 2264 }, { "epoch": 6.201232032854209, "grad_norm": 3.806393623352051, "learning_rate": 6.898630136986301e-07, "log_odds_chosen": 2.73148250579834, "log_odds_ratio": -0.11655160039663315, "logits/chosen": 0.9375331401824951, "logits/rejected": 0.9056628942489624, "logps/chosen": -2.569272041320801, "logps/rejected": -5.203576564788818, "loss": 0.9038, "nll_loss": 0.8921006917953491, "rewards/accuracies": 1.0, "rewards/chosen": -0.2569271922111511, "rewards/margins": 0.26343047618865967, "rewards/rejected": -0.5203576683998108, "step": 2265 }, { "epoch": 6.203969883641341, "grad_norm": 3.6823909282684326, "learning_rate": 6.897260273972603e-07, "log_odds_chosen": 1.200210690498352, "log_odds_ratio": -0.34474509954452515, "logits/chosen": 1.0925172567367554, "logits/rejected": 1.1935093402862549, "logps/chosen": -2.619683265686035, "logps/rejected": -3.7505414485931396, "loss": 0.8257, "nll_loss": 0.791270911693573, "rewards/accuracies": 0.875, "rewards/chosen": -0.26196834444999695, "rewards/margins": 0.1130857989192009, "rewards/rejected": -0.37505412101745605, "step": 2266 }, { "epoch": 6.206707734428473, "grad_norm": 4.148993015289307, "learning_rate": 6.895890410958904e-07, "log_odds_chosen": 2.067631721496582, "log_odds_ratio": -0.32227393984794617, "logits/chosen": 0.8231768608093262, "logits/rejected": 0.876787543296814, "logps/chosen": -2.037106990814209, "logps/rejected": -4.010346412658691, "loss": 0.7583, "nll_loss": 0.7260396480560303, "rewards/accuracies": 0.875, "rewards/chosen": -0.20371073484420776, "rewards/margins": 0.1973239779472351, "rewards/rejected": -0.4010346829891205, "step": 2267 }, { "epoch": 6.209445585215605, "grad_norm": 3.9867801666259766, "learning_rate": 6.894520547945205e-07, "log_odds_chosen": 1.0248510837554932, "log_odds_ratio": -0.4200741648674011, "logits/chosen": 0.7355902194976807, "logits/rejected": 0.7058727145195007, "logps/chosen": -2.495556354522705, "logps/rejected": -3.459836959838867, "loss": 0.9172, "nll_loss": 0.8752309083938599, "rewards/accuracies": 0.875, "rewards/chosen": -0.24955563247203827, "rewards/margins": 0.0964280515909195, "rewards/rejected": -0.34598368406295776, "step": 2268 }, { "epoch": 6.2121834360027375, "grad_norm": 3.3524930477142334, "learning_rate": 6.893150684931507e-07, "log_odds_chosen": 1.6169326305389404, "log_odds_ratio": -0.2809123396873474, "logits/chosen": 0.8729274868965149, "logits/rejected": 0.8624655604362488, "logps/chosen": -2.1111092567443848, "logps/rejected": -3.63083553314209, "loss": 0.8561, "nll_loss": 0.8280460834503174, "rewards/accuracies": 1.0, "rewards/chosen": -0.2111109495162964, "rewards/margins": 0.15197262167930603, "rewards/rejected": -0.36308354139328003, "step": 2269 }, { "epoch": 6.2149212867898695, "grad_norm": 7.805720806121826, "learning_rate": 6.891780821917808e-07, "log_odds_chosen": -0.10929961502552032, "log_odds_ratio": -0.9242318868637085, "logits/chosen": 0.7348588705062866, "logits/rejected": 0.8540155291557312, "logps/chosen": -2.9891955852508545, "logps/rejected": -2.856140375137329, "loss": 0.8848, "nll_loss": 0.7923785448074341, "rewards/accuracies": 0.375, "rewards/chosen": -0.29891955852508545, "rewards/margins": -0.01330552063882351, "rewards/rejected": -0.285614013671875, "step": 2270 }, { "epoch": 6.217659137577002, "grad_norm": 4.734828472137451, "learning_rate": 6.89041095890411e-07, "log_odds_chosen": 2.9276812076568604, "log_odds_ratio": -0.4571479260921478, "logits/chosen": 0.8232468962669373, "logits/rejected": 0.7394137978553772, "logps/chosen": -2.892361640930176, "logps/rejected": -5.726531982421875, "loss": 0.8813, "nll_loss": 0.8355609178543091, "rewards/accuracies": 0.75, "rewards/chosen": -0.2892361581325531, "rewards/margins": 0.28341707587242126, "rewards/rejected": -0.5726532340049744, "step": 2271 }, { "epoch": 6.220396988364135, "grad_norm": 5.042407989501953, "learning_rate": 6.889041095890411e-07, "log_odds_chosen": 1.3309752941131592, "log_odds_ratio": -0.46994486451148987, "logits/chosen": 0.9658488631248474, "logits/rejected": 0.9322131872177124, "logps/chosen": -2.8877205848693848, "logps/rejected": -4.172255516052246, "loss": 0.9245, "nll_loss": 0.8774880766868591, "rewards/accuracies": 0.875, "rewards/chosen": -0.2887720465660095, "rewards/margins": 0.12845349311828613, "rewards/rejected": -0.41722556948661804, "step": 2272 }, { "epoch": 6.223134839151267, "grad_norm": 4.30450439453125, "learning_rate": 6.887671232876712e-07, "log_odds_chosen": 1.4694159030914307, "log_odds_ratio": -0.28978636860847473, "logits/chosen": 0.7637981176376343, "logits/rejected": 0.7831414341926575, "logps/chosen": -2.4038853645324707, "logps/rejected": -3.807678699493408, "loss": 0.8065, "nll_loss": 0.7775152325630188, "rewards/accuracies": 1.0, "rewards/chosen": -0.24038854241371155, "rewards/margins": 0.14037930965423584, "rewards/rejected": -0.3807678818702698, "step": 2273 }, { "epoch": 6.225872689938399, "grad_norm": 6.440709590911865, "learning_rate": 6.886301369863014e-07, "log_odds_chosen": 1.8581712245941162, "log_odds_ratio": -0.25785863399505615, "logits/chosen": 0.9957343935966492, "logits/rejected": 1.0068674087524414, "logps/chosen": -2.0776312351226807, "logps/rejected": -3.834548234939575, "loss": 0.7636, "nll_loss": 0.7377647161483765, "rewards/accuracies": 1.0, "rewards/chosen": -0.20776312053203583, "rewards/margins": 0.17569172382354736, "rewards/rejected": -0.3834548592567444, "step": 2274 }, { "epoch": 6.228610540725531, "grad_norm": 7.121182441711426, "learning_rate": 6.884931506849315e-07, "log_odds_chosen": 1.42301607131958, "log_odds_ratio": -0.36034467816352844, "logits/chosen": 0.9450761079788208, "logits/rejected": 0.9399871230125427, "logps/chosen": -2.884791612625122, "logps/rejected": -4.236036777496338, "loss": 0.8229, "nll_loss": 0.7868709564208984, "rewards/accuracies": 0.875, "rewards/chosen": -0.28847914934158325, "rewards/margins": 0.13512450456619263, "rewards/rejected": -0.42360368371009827, "step": 2275 }, { "epoch": 6.231348391512663, "grad_norm": 5.024622440338135, "learning_rate": 6.883561643835616e-07, "log_odds_chosen": 1.4970693588256836, "log_odds_ratio": -0.2819433808326721, "logits/chosen": 0.9462959170341492, "logits/rejected": 0.9463064670562744, "logps/chosen": -2.0601398944854736, "logps/rejected": -3.4394357204437256, "loss": 0.8461, "nll_loss": 0.8179371953010559, "rewards/accuracies": 1.0, "rewards/chosen": -0.2060139924287796, "rewards/margins": 0.13792958855628967, "rewards/rejected": -0.34394359588623047, "step": 2276 }, { "epoch": 6.234086242299795, "grad_norm": 3.7589805126190186, "learning_rate": 6.882191780821918e-07, "log_odds_chosen": 2.9557912349700928, "log_odds_ratio": -0.13657326996326447, "logits/chosen": 1.1085978746414185, "logits/rejected": 1.161737322807312, "logps/chosen": -2.4513092041015625, "logps/rejected": -5.309012413024902, "loss": 0.804, "nll_loss": 0.7903074026107788, "rewards/accuracies": 1.0, "rewards/chosen": -0.24513092637062073, "rewards/margins": 0.2857702672481537, "rewards/rejected": -0.5309011936187744, "step": 2277 }, { "epoch": 6.236824093086927, "grad_norm": 4.059198379516602, "learning_rate": 6.88082191780822e-07, "log_odds_chosen": 1.5378754138946533, "log_odds_ratio": -0.30997318029403687, "logits/chosen": 0.790117084980011, "logits/rejected": 0.8007580041885376, "logps/chosen": -2.087613344192505, "logps/rejected": -3.499485969543457, "loss": 0.8121, "nll_loss": 0.7810787558555603, "rewards/accuracies": 0.75, "rewards/chosen": -0.2087613344192505, "rewards/margins": 0.14118722081184387, "rewards/rejected": -0.34994855523109436, "step": 2278 }, { "epoch": 6.239561943874059, "grad_norm": 4.074965953826904, "learning_rate": 6.87945205479452e-07, "log_odds_chosen": 1.8907928466796875, "log_odds_ratio": -0.3120615482330322, "logits/chosen": 0.7866204977035522, "logits/rejected": 0.7721105217933655, "logps/chosen": -2.707003116607666, "logps/rejected": -4.559355735778809, "loss": 0.8918, "nll_loss": 0.8606364727020264, "rewards/accuracies": 0.875, "rewards/chosen": -0.27070027589797974, "rewards/margins": 0.18523526191711426, "rewards/rejected": -0.455935537815094, "step": 2279 }, { "epoch": 6.242299794661191, "grad_norm": 3.9593284130096436, "learning_rate": 6.878082191780822e-07, "log_odds_chosen": 1.4568345546722412, "log_odds_ratio": -0.36587390303611755, "logits/chosen": 0.8072938919067383, "logits/rejected": 0.8410295248031616, "logps/chosen": -2.7648041248321533, "logps/rejected": -4.177672386169434, "loss": 0.8639, "nll_loss": 0.8273168802261353, "rewards/accuracies": 1.0, "rewards/chosen": -0.27648043632507324, "rewards/margins": 0.14128684997558594, "rewards/rejected": -0.4177672863006592, "step": 2280 }, { "epoch": 6.245037645448323, "grad_norm": 4.966721057891846, "learning_rate": 6.876712328767123e-07, "log_odds_chosen": 1.2705811262130737, "log_odds_ratio": -0.35290998220443726, "logits/chosen": 0.8597999811172485, "logits/rejected": 0.8473396301269531, "logps/chosen": -2.540341377258301, "logps/rejected": -3.733811378479004, "loss": 0.8712, "nll_loss": 0.8358831405639648, "rewards/accuracies": 0.875, "rewards/chosen": -0.2540341317653656, "rewards/margins": 0.119346983730793, "rewards/rejected": -0.373381108045578, "step": 2281 }, { "epoch": 6.247775496235455, "grad_norm": 3.877535104751587, "learning_rate": 6.875342465753424e-07, "log_odds_chosen": 1.037345051765442, "log_odds_ratio": -0.3871285617351532, "logits/chosen": 0.9494191408157349, "logits/rejected": 1.0141217708587646, "logps/chosen": -2.411468505859375, "logps/rejected": -3.379387855529785, "loss": 0.8083, "nll_loss": 0.7695592641830444, "rewards/accuracies": 0.875, "rewards/chosen": -0.24114684760570526, "rewards/margins": 0.09679196029901505, "rewards/rejected": -0.3379387855529785, "step": 2282 }, { "epoch": 6.250513347022587, "grad_norm": 4.47971773147583, "learning_rate": 6.873972602739726e-07, "log_odds_chosen": 3.060328483581543, "log_odds_ratio": -0.1300024539232254, "logits/chosen": 0.9319097995758057, "logits/rejected": 0.9485640525817871, "logps/chosen": -2.465115547180176, "logps/rejected": -5.429496765136719, "loss": 0.8349, "nll_loss": 0.8218858242034912, "rewards/accuracies": 1.0, "rewards/chosen": -0.2465115487575531, "rewards/margins": 0.29643815755844116, "rewards/rejected": -0.5429496765136719, "step": 2283 }, { "epoch": 6.253251197809719, "grad_norm": 4.686272621154785, "learning_rate": 6.872602739726027e-07, "log_odds_chosen": 0.9142847657203674, "log_odds_ratio": -0.48839181661605835, "logits/chosen": 0.9061346054077148, "logits/rejected": 0.7945770621299744, "logps/chosen": -2.5546560287475586, "logps/rejected": -3.38008975982666, "loss": 1.0049, "nll_loss": 0.9560666084289551, "rewards/accuracies": 0.75, "rewards/chosen": -0.2554655969142914, "rewards/margins": 0.08254338800907135, "rewards/rejected": -0.3380089998245239, "step": 2284 }, { "epoch": 6.2559890485968515, "grad_norm": 3.799313545227051, "learning_rate": 6.871232876712329e-07, "log_odds_chosen": 0.4980059266090393, "log_odds_ratio": -0.5782142877578735, "logits/chosen": 0.9238616228103638, "logits/rejected": 0.895376443862915, "logps/chosen": -2.5671205520629883, "logps/rejected": -3.0400962829589844, "loss": 0.8827, "nll_loss": 0.8248534798622131, "rewards/accuracies": 0.75, "rewards/chosen": -0.25671207904815674, "rewards/margins": 0.047297559678554535, "rewards/rejected": -0.30400967597961426, "step": 2285 }, { "epoch": 6.2587268993839835, "grad_norm": 6.593250751495361, "learning_rate": 6.86986301369863e-07, "log_odds_chosen": 1.2264463901519775, "log_odds_ratio": -0.6446329951286316, "logits/chosen": 1.157000184059143, "logits/rejected": 1.170211911201477, "logps/chosen": -3.599522590637207, "logps/rejected": -4.765375137329102, "loss": 1.0343, "nll_loss": 0.9698704481124878, "rewards/accuracies": 0.875, "rewards/chosen": -0.35995230078697205, "rewards/margins": 0.11658524721860886, "rewards/rejected": -0.4765374958515167, "step": 2286 }, { "epoch": 6.261464750171116, "grad_norm": 4.349644660949707, "learning_rate": 6.868493150684931e-07, "log_odds_chosen": 1.4391084909439087, "log_odds_ratio": -0.4129403531551361, "logits/chosen": 0.953033447265625, "logits/rejected": 0.9197446703910828, "logps/chosen": -2.216421127319336, "logps/rejected": -3.5743935108184814, "loss": 0.8343, "nll_loss": 0.793039858341217, "rewards/accuracies": 0.875, "rewards/chosen": -0.22164210677146912, "rewards/margins": 0.13579724729061127, "rewards/rejected": -0.3574393391609192, "step": 2287 }, { "epoch": 6.264202600958248, "grad_norm": 3.683375358581543, "learning_rate": 6.867123287671233e-07, "log_odds_chosen": 2.432060956954956, "log_odds_ratio": -0.20113912224769592, "logits/chosen": 0.9448153972625732, "logits/rejected": 1.0103061199188232, "logps/chosen": -2.952746629714966, "logps/rejected": -5.3339667320251465, "loss": 0.8034, "nll_loss": 0.7833102345466614, "rewards/accuracies": 1.0, "rewards/chosen": -0.2952747046947479, "rewards/margins": 0.23812198638916016, "rewards/rejected": -0.5333966612815857, "step": 2288 }, { "epoch": 6.26694045174538, "grad_norm": 3.5899288654327393, "learning_rate": 6.865753424657534e-07, "log_odds_chosen": 1.8460716009140015, "log_odds_ratio": -0.32741686701774597, "logits/chosen": 0.8791254758834839, "logits/rejected": 0.8778967261314392, "logps/chosen": -2.089707851409912, "logps/rejected": -3.787930965423584, "loss": 0.8314, "nll_loss": 0.7986737489700317, "rewards/accuracies": 0.875, "rewards/chosen": -0.2089707851409912, "rewards/margins": 0.1698223054409027, "rewards/rejected": -0.3787930905818939, "step": 2289 }, { "epoch": 6.269678302532512, "grad_norm": 3.9634475708007812, "learning_rate": 6.864383561643835e-07, "log_odds_chosen": 1.2632083892822266, "log_odds_ratio": -0.31112220883369446, "logits/chosen": 0.698322057723999, "logits/rejected": 0.7198808193206787, "logps/chosen": -2.10799241065979, "logps/rejected": -3.2647664546966553, "loss": 0.8261, "nll_loss": 0.7950013279914856, "rewards/accuracies": 1.0, "rewards/chosen": -0.21079924702644348, "rewards/margins": 0.11567739397287369, "rewards/rejected": -0.3264766335487366, "step": 2290 }, { "epoch": 6.272416153319644, "grad_norm": 4.868564605712891, "learning_rate": 6.863013698630137e-07, "log_odds_chosen": 1.5534279346466064, "log_odds_ratio": -0.3183296322822571, "logits/chosen": 0.8998157382011414, "logits/rejected": 0.9519096612930298, "logps/chosen": -2.3489456176757812, "logps/rejected": -3.7577595710754395, "loss": 0.8297, "nll_loss": 0.7978970408439636, "rewards/accuracies": 0.875, "rewards/chosen": -0.23489457368850708, "rewards/margins": 0.14088141918182373, "rewards/rejected": -0.3757759630680084, "step": 2291 }, { "epoch": 6.275154004106776, "grad_norm": 4.585319519042969, "learning_rate": 6.861643835616439e-07, "log_odds_chosen": 2.0014455318450928, "log_odds_ratio": -0.2960112988948822, "logits/chosen": 0.9199908375740051, "logits/rejected": 0.9233567714691162, "logps/chosen": -3.0790748596191406, "logps/rejected": -5.036193370819092, "loss": 0.8381, "nll_loss": 0.8085177540779114, "rewards/accuracies": 0.875, "rewards/chosen": -0.3079075217247009, "rewards/margins": 0.19571183621883392, "rewards/rejected": -0.5036193132400513, "step": 2292 }, { "epoch": 6.277891854893908, "grad_norm": 3.5039114952087402, "learning_rate": 6.860273972602739e-07, "log_odds_chosen": 1.3284791707992554, "log_odds_ratio": -0.3645126521587372, "logits/chosen": 0.6593701839447021, "logits/rejected": 0.6675897240638733, "logps/chosen": -1.9856585264205933, "logps/rejected": -3.244568109512329, "loss": 0.8215, "nll_loss": 0.7850130796432495, "rewards/accuracies": 0.75, "rewards/chosen": -0.19856587052345276, "rewards/margins": 0.12589094042778015, "rewards/rejected": -0.3244568109512329, "step": 2293 }, { "epoch": 6.28062970568104, "grad_norm": 3.7791807651519775, "learning_rate": 6.858904109589041e-07, "log_odds_chosen": 1.2457528114318848, "log_odds_ratio": -0.3360831141471863, "logits/chosen": 0.9634940028190613, "logits/rejected": 0.9302489757537842, "logps/chosen": -2.8321480751037598, "logps/rejected": -4.017457008361816, "loss": 0.9174, "nll_loss": 0.8837851881980896, "rewards/accuracies": 1.0, "rewards/chosen": -0.283214807510376, "rewards/margins": 0.11853089928627014, "rewards/rejected": -0.40174567699432373, "step": 2294 }, { "epoch": 6.283367556468172, "grad_norm": 4.689089298248291, "learning_rate": 6.857534246575342e-07, "log_odds_chosen": 1.5336300134658813, "log_odds_ratio": -0.41502514481544495, "logits/chosen": 0.9113548994064331, "logits/rejected": 0.8814186453819275, "logps/chosen": -2.2556440830230713, "logps/rejected": -3.695863723754883, "loss": 0.8855, "nll_loss": 0.8439685106277466, "rewards/accuracies": 0.875, "rewards/chosen": -0.22556443512439728, "rewards/margins": 0.14402194321155548, "rewards/rejected": -0.36958637833595276, "step": 2295 }, { "epoch": 6.286105407255304, "grad_norm": 4.603283405303955, "learning_rate": 6.856164383561643e-07, "log_odds_chosen": 1.5807600021362305, "log_odds_ratio": -0.2958061993122101, "logits/chosen": 1.0455012321472168, "logits/rejected": 1.005216360092163, "logps/chosen": -1.8992496728897095, "logps/rejected": -3.3673930168151855, "loss": 0.8173, "nll_loss": 0.7876899242401123, "rewards/accuracies": 0.875, "rewards/chosen": -0.18992497026920319, "rewards/margins": 0.14681431651115417, "rewards/rejected": -0.33673930168151855, "step": 2296 }, { "epoch": 6.288843258042437, "grad_norm": 4.027651309967041, "learning_rate": 6.854794520547945e-07, "log_odds_chosen": 1.6865606307983398, "log_odds_ratio": -0.2945561408996582, "logits/chosen": 0.7648748755455017, "logits/rejected": 0.7368557453155518, "logps/chosen": -2.613720417022705, "logps/rejected": -4.244649887084961, "loss": 0.9081, "nll_loss": 0.8786163330078125, "rewards/accuracies": 1.0, "rewards/chosen": -0.26137205958366394, "rewards/margins": 0.1630929708480835, "rewards/rejected": -0.42446503043174744, "step": 2297 }, { "epoch": 6.291581108829568, "grad_norm": 4.649476528167725, "learning_rate": 6.853424657534246e-07, "log_odds_chosen": -0.3108190596103668, "log_odds_ratio": -0.9712275862693787, "logits/chosen": 0.6505107283592224, "logits/rejected": 0.6915398240089417, "logps/chosen": -2.6670989990234375, "logps/rejected": -2.377457618713379, "loss": 0.9626, "nll_loss": 0.865497350692749, "rewards/accuracies": 0.375, "rewards/chosen": -0.26670992374420166, "rewards/margins": -0.028964176774024963, "rewards/rejected": -0.2377457618713379, "step": 2298 }, { "epoch": 6.294318959616701, "grad_norm": 4.802613258361816, "learning_rate": 6.852054794520548e-07, "log_odds_chosen": 1.6630198955535889, "log_odds_ratio": -0.44003814458847046, "logits/chosen": 0.7678766250610352, "logits/rejected": 0.7756916284561157, "logps/chosen": -1.841373324394226, "logps/rejected": -3.3099026679992676, "loss": 0.7872, "nll_loss": 0.743194043636322, "rewards/accuracies": 0.875, "rewards/chosen": -0.18413734436035156, "rewards/margins": 0.14685292541980743, "rewards/rejected": -0.3309902548789978, "step": 2299 }, { "epoch": 6.297056810403833, "grad_norm": 4.635749816894531, "learning_rate": 6.850684931506849e-07, "log_odds_chosen": 1.3379660844802856, "log_odds_ratio": -0.4260775148868561, "logits/chosen": 0.7117266058921814, "logits/rejected": 0.6867669224739075, "logps/chosen": -2.2461094856262207, "logps/rejected": -3.5385332107543945, "loss": 0.8799, "nll_loss": 0.837314784526825, "rewards/accuracies": 0.75, "rewards/chosen": -0.22461095452308655, "rewards/margins": 0.12924236059188843, "rewards/rejected": -0.353853315114975, "step": 2300 }, { "epoch": 6.299794661190965, "grad_norm": 4.74873161315918, "learning_rate": 6.84931506849315e-07, "log_odds_chosen": 1.2591146230697632, "log_odds_ratio": -0.33028724789619446, "logits/chosen": 0.6815894246101379, "logits/rejected": 0.6846164464950562, "logps/chosen": -1.9639414548873901, "logps/rejected": -3.0475950241088867, "loss": 0.8107, "nll_loss": 0.7777039408683777, "rewards/accuracies": 0.875, "rewards/chosen": -0.196394145488739, "rewards/margins": 0.10836536437273026, "rewards/rejected": -0.30475950241088867, "step": 2301 }, { "epoch": 6.3025325119780975, "grad_norm": 5.0442938804626465, "learning_rate": 6.847945205479452e-07, "log_odds_chosen": 1.5560216903686523, "log_odds_ratio": -0.7724864482879639, "logits/chosen": 0.7489882707595825, "logits/rejected": 0.8180968165397644, "logps/chosen": -2.510488510131836, "logps/rejected": -3.9763669967651367, "loss": 0.8294, "nll_loss": 0.7521597146987915, "rewards/accuracies": 0.875, "rewards/chosen": -0.25104886293411255, "rewards/margins": 0.14658784866333008, "rewards/rejected": -0.3976367115974426, "step": 2302 }, { "epoch": 6.30527036276523, "grad_norm": 5.023305892944336, "learning_rate": 6.846575342465753e-07, "log_odds_chosen": 3.0358076095581055, "log_odds_ratio": -0.1328417956829071, "logits/chosen": 0.9473656415939331, "logits/rejected": 0.8909502625465393, "logps/chosen": -2.2737534046173096, "logps/rejected": -5.141992568969727, "loss": 0.8059, "nll_loss": 0.7926052808761597, "rewards/accuracies": 1.0, "rewards/chosen": -0.2273753583431244, "rewards/margins": 0.28682392835617065, "rewards/rejected": -0.5141993165016174, "step": 2303 }, { "epoch": 6.308008213552362, "grad_norm": 3.8484625816345215, "learning_rate": 6.845205479452054e-07, "log_odds_chosen": 1.4961544275283813, "log_odds_ratio": -0.2534586787223816, "logits/chosen": 0.8907257318496704, "logits/rejected": 0.8861582279205322, "logps/chosen": -1.8650364875793457, "logps/rejected": -3.249213695526123, "loss": 0.7214, "nll_loss": 0.6960316896438599, "rewards/accuracies": 1.0, "rewards/chosen": -0.18650364875793457, "rewards/margins": 0.13841775059700012, "rewards/rejected": -0.3249213695526123, "step": 2304 }, { "epoch": 6.310746064339494, "grad_norm": 5.017355442047119, "learning_rate": 6.843835616438356e-07, "log_odds_chosen": 1.814910650253296, "log_odds_ratio": -0.22851106524467468, "logits/chosen": 1.0163768529891968, "logits/rejected": 1.0596957206726074, "logps/chosen": -2.814378261566162, "logps/rejected": -4.556800365447998, "loss": 0.8146, "nll_loss": 0.7917623519897461, "rewards/accuracies": 1.0, "rewards/chosen": -0.28143781423568726, "rewards/margins": 0.17424219846725464, "rewards/rejected": -0.4556800425052643, "step": 2305 }, { "epoch": 6.313483915126626, "grad_norm": 3.4827589988708496, "learning_rate": 6.842465753424658e-07, "log_odds_chosen": 1.921349287033081, "log_odds_ratio": -0.24478694796562195, "logits/chosen": 0.7101213335990906, "logits/rejected": 0.7146198749542236, "logps/chosen": -1.9805512428283691, "logps/rejected": -3.7851409912109375, "loss": 0.8445, "nll_loss": 0.8200622200965881, "rewards/accuracies": 1.0, "rewards/chosen": -0.19805514812469482, "rewards/margins": 0.18045896291732788, "rewards/rejected": -0.3785141110420227, "step": 2306 }, { "epoch": 6.316221765913758, "grad_norm": 4.427239894866943, "learning_rate": 6.841095890410958e-07, "log_odds_chosen": 1.2768638134002686, "log_odds_ratio": -0.3906780481338501, "logits/chosen": 0.9566842317581177, "logits/rejected": 0.946241557598114, "logps/chosen": -2.3428220748901367, "logps/rejected": -3.5069990158081055, "loss": 0.7788, "nll_loss": 0.739762008190155, "rewards/accuracies": 0.75, "rewards/chosen": -0.23428219556808472, "rewards/margins": 0.11641772091388702, "rewards/rejected": -0.35069990158081055, "step": 2307 }, { "epoch": 6.31895961670089, "grad_norm": 3.2582061290740967, "learning_rate": 6.83972602739726e-07, "log_odds_chosen": 1.1625418663024902, "log_odds_ratio": -0.5243398547172546, "logits/chosen": 0.7939828038215637, "logits/rejected": 0.8189757466316223, "logps/chosen": -2.4000895023345947, "logps/rejected": -3.5379483699798584, "loss": 0.8749, "nll_loss": 0.8225116729736328, "rewards/accuracies": 0.625, "rewards/chosen": -0.24000895023345947, "rewards/margins": 0.11378589272499084, "rewards/rejected": -0.3537948429584503, "step": 2308 }, { "epoch": 6.321697467488022, "grad_norm": 3.5144472122192383, "learning_rate": 6.838356164383562e-07, "log_odds_chosen": 3.417832851409912, "log_odds_ratio": -0.16357478499412537, "logits/chosen": 1.0559172630310059, "logits/rejected": 1.135918378829956, "logps/chosen": -3.156658172607422, "logps/rejected": -6.523919105529785, "loss": 0.7379, "nll_loss": 0.7215495109558105, "rewards/accuracies": 1.0, "rewards/chosen": -0.3156658411026001, "rewards/margins": 0.3367260694503784, "rewards/rejected": -0.6523919105529785, "step": 2309 }, { "epoch": 6.324435318275154, "grad_norm": 4.403069972991943, "learning_rate": 6.836986301369862e-07, "log_odds_chosen": 0.5817185640335083, "log_odds_ratio": -0.6592182517051697, "logits/chosen": 0.7693735957145691, "logits/rejected": 0.7819682359695435, "logps/chosen": -2.7073895931243896, "logps/rejected": -3.1998679637908936, "loss": 0.9315, "nll_loss": 0.8655818700790405, "rewards/accuracies": 0.625, "rewards/chosen": -0.27073895931243896, "rewards/margins": 0.0492478609085083, "rewards/rejected": -0.31998682022094727, "step": 2310 }, { "epoch": 6.327173169062286, "grad_norm": 3.5274384021759033, "learning_rate": 6.835616438356164e-07, "log_odds_chosen": 2.355759382247925, "log_odds_ratio": -0.36176544427871704, "logits/chosen": 0.852271318435669, "logits/rejected": 0.9286064505577087, "logps/chosen": -2.2625732421875, "logps/rejected": -4.557568550109863, "loss": 0.7535, "nll_loss": 0.717366099357605, "rewards/accuracies": 0.875, "rewards/chosen": -0.2262573093175888, "rewards/margins": 0.22949957847595215, "rewards/rejected": -0.45575690269470215, "step": 2311 }, { "epoch": 6.329911019849418, "grad_norm": 4.733293056488037, "learning_rate": 6.834246575342465e-07, "log_odds_chosen": 1.2798223495483398, "log_odds_ratio": -0.36959201097488403, "logits/chosen": 0.9578882455825806, "logits/rejected": 1.0460654497146606, "logps/chosen": -2.7603166103363037, "logps/rejected": -3.9815661907196045, "loss": 0.7853, "nll_loss": 0.7483570575714111, "rewards/accuracies": 0.875, "rewards/chosen": -0.2760316729545593, "rewards/margins": 0.12212498486042023, "rewards/rejected": -0.39815664291381836, "step": 2312 }, { "epoch": 6.33264887063655, "grad_norm": 3.7609145641326904, "learning_rate": 6.832876712328767e-07, "log_odds_chosen": 2.28609037399292, "log_odds_ratio": -0.2495768517255783, "logits/chosen": 1.1030491590499878, "logits/rejected": 1.0978296995162964, "logps/chosen": -2.7480711936950684, "logps/rejected": -4.983131408691406, "loss": 0.8039, "nll_loss": 0.7789644598960876, "rewards/accuracies": 1.0, "rewards/chosen": -0.2748071253299713, "rewards/margins": 0.22350597381591797, "rewards/rejected": -0.4983130991458893, "step": 2313 }, { "epoch": 6.335386721423682, "grad_norm": 5.954363822937012, "learning_rate": 6.831506849315068e-07, "log_odds_chosen": 1.5173935890197754, "log_odds_ratio": -0.5301764011383057, "logits/chosen": 0.8090344667434692, "logits/rejected": 0.7999714612960815, "logps/chosen": -2.6541407108306885, "logps/rejected": -4.040948867797852, "loss": 0.877, "nll_loss": 0.8239536881446838, "rewards/accuracies": 0.875, "rewards/chosen": -0.2654140591621399, "rewards/margins": 0.13868078589439392, "rewards/rejected": -0.4040948748588562, "step": 2314 }, { "epoch": 6.338124572210814, "grad_norm": 3.8710649013519287, "learning_rate": 6.830136986301369e-07, "log_odds_chosen": 1.7691936492919922, "log_odds_ratio": -0.2770998477935791, "logits/chosen": 0.8004074096679688, "logits/rejected": 0.8029883503913879, "logps/chosen": -1.6203006505966187, "logps/rejected": -3.2478318214416504, "loss": 0.8299, "nll_loss": 0.8021900653839111, "rewards/accuracies": 1.0, "rewards/chosen": -0.16203005611896515, "rewards/margins": 0.16275310516357422, "rewards/rejected": -0.32478317618370056, "step": 2315 }, { "epoch": 6.3408624229979464, "grad_norm": 3.759060859680176, "learning_rate": 6.828767123287671e-07, "log_odds_chosen": 1.5565106868743896, "log_odds_ratio": -0.38443559408187866, "logits/chosen": 0.7317356467247009, "logits/rejected": 0.7092559337615967, "logps/chosen": -1.995708703994751, "logps/rejected": -3.460369348526001, "loss": 0.931, "nll_loss": 0.8925801515579224, "rewards/accuracies": 0.75, "rewards/chosen": -0.19957087934017181, "rewards/margins": 0.14646606147289276, "rewards/rejected": -0.3460369408130646, "step": 2316 }, { "epoch": 6.3436002737850785, "grad_norm": 3.7050745487213135, "learning_rate": 6.827397260273972e-07, "log_odds_chosen": 1.8972162008285522, "log_odds_ratio": -0.24701029062271118, "logits/chosen": 0.9304003119468689, "logits/rejected": 0.8805201649665833, "logps/chosen": -2.3172903060913086, "logps/rejected": -4.120305061340332, "loss": 0.7981, "nll_loss": 0.7733899354934692, "rewards/accuracies": 1.0, "rewards/chosen": -0.23172903060913086, "rewards/margins": 0.1803014874458313, "rewards/rejected": -0.41203051805496216, "step": 2317 }, { "epoch": 6.346338124572211, "grad_norm": 4.16379976272583, "learning_rate": 6.826027397260273e-07, "log_odds_chosen": 1.3810358047485352, "log_odds_ratio": -0.4375740587711334, "logits/chosen": 0.5181472897529602, "logits/rejected": 0.5012142658233643, "logps/chosen": -1.9743587970733643, "logps/rejected": -3.2707784175872803, "loss": 0.9131, "nll_loss": 0.8693749308586121, "rewards/accuracies": 0.875, "rewards/chosen": -0.1974358856678009, "rewards/margins": 0.12964196503162384, "rewards/rejected": -0.32707783579826355, "step": 2318 }, { "epoch": 6.349075975359343, "grad_norm": 4.205564498901367, "learning_rate": 6.824657534246575e-07, "log_odds_chosen": 1.350136637687683, "log_odds_ratio": -0.28545913100242615, "logits/chosen": 0.6512346267700195, "logits/rejected": 0.5924778580665588, "logps/chosen": -2.1026599407196045, "logps/rejected": -3.3176262378692627, "loss": 0.8751, "nll_loss": 0.8466019630432129, "rewards/accuracies": 1.0, "rewards/chosen": -0.21026599407196045, "rewards/margins": 0.12149665504693985, "rewards/rejected": -0.3317626416683197, "step": 2319 }, { "epoch": 6.351813826146475, "grad_norm": 4.328149795532227, "learning_rate": 6.823287671232877e-07, "log_odds_chosen": 1.3659626245498657, "log_odds_ratio": -0.4178416430950165, "logits/chosen": 0.9658083915710449, "logits/rejected": 0.9528973698616028, "logps/chosen": -2.19840145111084, "logps/rejected": -3.45740008354187, "loss": 0.8419, "nll_loss": 0.8001636266708374, "rewards/accuracies": 0.875, "rewards/chosen": -0.2198401391506195, "rewards/margins": 0.12589989602565765, "rewards/rejected": -0.34574002027511597, "step": 2320 }, { "epoch": 6.354551676933607, "grad_norm": 3.832259178161621, "learning_rate": 6.821917808219177e-07, "log_odds_chosen": 2.2512688636779785, "log_odds_ratio": -0.16329902410507202, "logits/chosen": 1.0082902908325195, "logits/rejected": 0.9937283992767334, "logps/chosen": -2.643904685974121, "logps/rejected": -4.814376354217529, "loss": 0.8609, "nll_loss": 0.8445756435394287, "rewards/accuracies": 1.0, "rewards/chosen": -0.2643904685974121, "rewards/margins": 0.21704716980457306, "rewards/rejected": -0.481437623500824, "step": 2321 }, { "epoch": 6.357289527720739, "grad_norm": 4.066016674041748, "learning_rate": 6.820547945205479e-07, "log_odds_chosen": 1.7159264087677002, "log_odds_ratio": -0.23643876612186432, "logits/chosen": 0.6906546354293823, "logits/rejected": 0.6563506126403809, "logps/chosen": -1.694459319114685, "logps/rejected": -3.254488468170166, "loss": 0.8256, "nll_loss": 0.8019914627075195, "rewards/accuracies": 1.0, "rewards/chosen": -0.1694459468126297, "rewards/margins": 0.156002938747406, "rewards/rejected": -0.3254488706588745, "step": 2322 }, { "epoch": 6.360027378507871, "grad_norm": 5.118911266326904, "learning_rate": 6.819178082191781e-07, "log_odds_chosen": 0.481832891702652, "log_odds_ratio": -0.7965012788772583, "logits/chosen": 0.9059967994689941, "logits/rejected": 0.9042553305625916, "logps/chosen": -3.052943229675293, "logps/rejected": -3.5246076583862305, "loss": 0.9419, "nll_loss": 0.862248420715332, "rewards/accuracies": 0.5, "rewards/chosen": -0.30529433488845825, "rewards/margins": 0.04716644436120987, "rewards/rejected": -0.3524608016014099, "step": 2323 }, { "epoch": 6.362765229295004, "grad_norm": 3.8924617767333984, "learning_rate": 6.817808219178081e-07, "log_odds_chosen": 1.4261990785598755, "log_odds_ratio": -0.33466610312461853, "logits/chosen": 0.6283524036407471, "logits/rejected": 0.6021392345428467, "logps/chosen": -2.174670696258545, "logps/rejected": -3.505892753601074, "loss": 0.8979, "nll_loss": 0.8644689917564392, "rewards/accuracies": 0.875, "rewards/chosen": -0.2174670696258545, "rewards/margins": 0.13312219083309174, "rewards/rejected": -0.3505892753601074, "step": 2324 }, { "epoch": 6.365503080082135, "grad_norm": 3.6184241771698, "learning_rate": 6.816438356164383e-07, "log_odds_chosen": 1.0767855644226074, "log_odds_ratio": -0.4147881269454956, "logits/chosen": 0.6786405444145203, "logits/rejected": 0.6193616986274719, "logps/chosen": -1.9286284446716309, "logps/rejected": -2.965590238571167, "loss": 0.8193, "nll_loss": 0.7777881026268005, "rewards/accuracies": 0.75, "rewards/chosen": -0.1928628534078598, "rewards/margins": 0.10369616746902466, "rewards/rejected": -0.29655903577804565, "step": 2325 }, { "epoch": 6.368240930869268, "grad_norm": 6.074556827545166, "learning_rate": 6.815068493150684e-07, "log_odds_chosen": 0.8177000880241394, "log_odds_ratio": -0.5618302226066589, "logits/chosen": 0.8992644548416138, "logits/rejected": 0.9087824821472168, "logps/chosen": -2.2558698654174805, "logps/rejected": -2.9818379878997803, "loss": 0.8014, "nll_loss": 0.7452001571655273, "rewards/accuracies": 0.625, "rewards/chosen": -0.22558695077896118, "rewards/margins": 0.07259683310985565, "rewards/rejected": -0.298183798789978, "step": 2326 }, { "epoch": 6.3709787816564, "grad_norm": 4.019219875335693, "learning_rate": 6.813698630136986e-07, "log_odds_chosen": 1.7993638515472412, "log_odds_ratio": -0.4751010239124298, "logits/chosen": 0.6992226243019104, "logits/rejected": 0.6859598755836487, "logps/chosen": -1.6363970041275024, "logps/rejected": -3.319024085998535, "loss": 0.7872, "nll_loss": 0.739662766456604, "rewards/accuracies": 0.625, "rewards/chosen": -0.16363969445228577, "rewards/margins": 0.16826270520687103, "rewards/rejected": -0.3319023847579956, "step": 2327 }, { "epoch": 6.373716632443532, "grad_norm": 5.01235294342041, "learning_rate": 6.812328767123287e-07, "log_odds_chosen": 0.3404207229614258, "log_odds_ratio": -0.6317264437675476, "logits/chosen": 0.4708740711212158, "logits/rejected": 0.48888465762138367, "logps/chosen": -2.580561399459839, "logps/rejected": -2.879183292388916, "loss": 1.0191, "nll_loss": 0.9558840990066528, "rewards/accuracies": 0.625, "rewards/chosen": -0.2580561339855194, "rewards/margins": 0.029862212017178535, "rewards/rejected": -0.2879183292388916, "step": 2328 }, { "epoch": 6.376454483230664, "grad_norm": 3.9856860637664795, "learning_rate": 6.810958904109588e-07, "log_odds_chosen": 0.361614465713501, "log_odds_ratio": -0.6194946765899658, "logits/chosen": 0.6999746561050415, "logits/rejected": 0.7653181552886963, "logps/chosen": -2.396338701248169, "logps/rejected": -2.7182233333587646, "loss": 0.9189, "nll_loss": 0.8569595813751221, "rewards/accuracies": 0.5, "rewards/chosen": -0.23963387310504913, "rewards/margins": 0.032188449054956436, "rewards/rejected": -0.27182233333587646, "step": 2329 }, { "epoch": 6.379192334017796, "grad_norm": 5.114845275878906, "learning_rate": 6.80958904109589e-07, "log_odds_chosen": 1.3716760873794556, "log_odds_ratio": -0.3632816672325134, "logits/chosen": 1.0343399047851562, "logits/rejected": 1.0227069854736328, "logps/chosen": -1.9417622089385986, "logps/rejected": -3.231839656829834, "loss": 0.8089, "nll_loss": 0.7725492715835571, "rewards/accuracies": 1.0, "rewards/chosen": -0.19417621195316315, "rewards/margins": 0.1290077567100525, "rewards/rejected": -0.32318398356437683, "step": 2330 }, { "epoch": 6.381930184804928, "grad_norm": 4.391404628753662, "learning_rate": 6.808219178082191e-07, "log_odds_chosen": 1.631744623184204, "log_odds_ratio": -0.5054553151130676, "logits/chosen": 0.7683210968971252, "logits/rejected": 0.708257794380188, "logps/chosen": -2.252516746520996, "logps/rejected": -3.7998156547546387, "loss": 0.927, "nll_loss": 0.8764426112174988, "rewards/accuracies": 0.875, "rewards/chosen": -0.2252516746520996, "rewards/margins": 0.1547299176454544, "rewards/rejected": -0.3799815773963928, "step": 2331 }, { "epoch": 6.38466803559206, "grad_norm": 3.705235242843628, "learning_rate": 6.806849315068492e-07, "log_odds_chosen": 1.7903666496276855, "log_odds_ratio": -0.3066660761833191, "logits/chosen": 1.0418951511383057, "logits/rejected": 1.0720107555389404, "logps/chosen": -2.035076141357422, "logps/rejected": -3.717010498046875, "loss": 0.8033, "nll_loss": 0.7726423740386963, "rewards/accuracies": 1.0, "rewards/chosen": -0.20350763201713562, "rewards/margins": 0.16819341480731964, "rewards/rejected": -0.37170106172561646, "step": 2332 }, { "epoch": 6.3874058863791925, "grad_norm": 3.947317361831665, "learning_rate": 6.805479452054794e-07, "log_odds_chosen": 2.4080522060394287, "log_odds_ratio": -0.1627899557352066, "logits/chosen": 0.8237017393112183, "logits/rejected": 0.82303786277771, "logps/chosen": -1.7053301334381104, "logps/rejected": -3.918745994567871, "loss": 0.8574, "nll_loss": 0.8410981893539429, "rewards/accuracies": 1.0, "rewards/chosen": -0.17053300142288208, "rewards/margins": 0.22134160995483398, "rewards/rejected": -0.39187461137771606, "step": 2333 }, { "epoch": 6.3901437371663246, "grad_norm": 3.726174831390381, "learning_rate": 6.804109589041097e-07, "log_odds_chosen": 2.060662031173706, "log_odds_ratio": -0.3476629853248596, "logits/chosen": 0.7618648409843445, "logits/rejected": 0.7053539752960205, "logps/chosen": -2.248591899871826, "logps/rejected": -4.1853132247924805, "loss": 0.9057, "nll_loss": 0.8709416389465332, "rewards/accuracies": 0.875, "rewards/chosen": -0.22485917806625366, "rewards/margins": 0.19367216527462006, "rewards/rejected": -0.4185313284397125, "step": 2334 }, { "epoch": 6.392881587953457, "grad_norm": 4.179347038269043, "learning_rate": 6.802739726027396e-07, "log_odds_chosen": 0.9451184868812561, "log_odds_ratio": -0.3993815779685974, "logits/chosen": 0.9448974132537842, "logits/rejected": 0.9067894816398621, "logps/chosen": -1.670044183731079, "logps/rejected": -2.4237613677978516, "loss": 0.8195, "nll_loss": 0.7795799970626831, "rewards/accuracies": 1.0, "rewards/chosen": -0.16700442135334015, "rewards/margins": 0.07537172734737396, "rewards/rejected": -0.2423761487007141, "step": 2335 }, { "epoch": 6.395619438740589, "grad_norm": 4.116138458251953, "learning_rate": 6.801369863013698e-07, "log_odds_chosen": 0.5555397868156433, "log_odds_ratio": -0.5690284967422485, "logits/chosen": 0.9634593725204468, "logits/rejected": 0.9873858690261841, "logps/chosen": -2.2610902786254883, "logps/rejected": -2.737114191055298, "loss": 0.7416, "nll_loss": 0.6847218871116638, "rewards/accuracies": 0.75, "rewards/chosen": -0.22610901296138763, "rewards/margins": 0.04760240390896797, "rewards/rejected": -0.2737114429473877, "step": 2336 }, { "epoch": 6.398357289527721, "grad_norm": 4.958239555358887, "learning_rate": 6.800000000000001e-07, "log_odds_chosen": 1.5964293479919434, "log_odds_ratio": -0.2993784248828888, "logits/chosen": 0.8519818782806396, "logits/rejected": 0.7936338186264038, "logps/chosen": -2.0523521900177, "logps/rejected": -3.531928539276123, "loss": 0.8022, "nll_loss": 0.772226870059967, "rewards/accuracies": 0.875, "rewards/chosen": -0.20523522794246674, "rewards/margins": 0.14795762300491333, "rewards/rejected": -0.35319283604621887, "step": 2337 }, { "epoch": 6.401095140314853, "grad_norm": 4.8683624267578125, "learning_rate": 6.7986301369863e-07, "log_odds_chosen": 1.676703929901123, "log_odds_ratio": -0.21464839577674866, "logits/chosen": 1.001285195350647, "logits/rejected": 0.9933081865310669, "logps/chosen": -2.2347195148468018, "logps/rejected": -3.7862765789031982, "loss": 0.7846, "nll_loss": 0.7631640434265137, "rewards/accuracies": 1.0, "rewards/chosen": -0.22347195446491241, "rewards/margins": 0.1551557183265686, "rewards/rejected": -0.3786276578903198, "step": 2338 }, { "epoch": 6.403832991101985, "grad_norm": 3.9710795879364014, "learning_rate": 6.797260273972603e-07, "log_odds_chosen": 1.3857756853103638, "log_odds_ratio": -0.36180827021598816, "logits/chosen": 1.0490981340408325, "logits/rejected": 0.9980326294898987, "logps/chosen": -1.8735709190368652, "logps/rejected": -3.145512580871582, "loss": 0.8228, "nll_loss": 0.7865979671478271, "rewards/accuracies": 0.75, "rewards/chosen": -0.187357097864151, "rewards/margins": 0.1271941363811493, "rewards/rejected": -0.3145512640476227, "step": 2339 }, { "epoch": 6.406570841889117, "grad_norm": 3.557924270629883, "learning_rate": 6.795890410958904e-07, "log_odds_chosen": 1.187652587890625, "log_odds_ratio": -0.3923197388648987, "logits/chosen": 0.9377303719520569, "logits/rejected": 0.9009250998497009, "logps/chosen": -2.3615715503692627, "logps/rejected": -3.4979608058929443, "loss": 0.9388, "nll_loss": 0.8995434045791626, "rewards/accuracies": 0.875, "rewards/chosen": -0.23615716397762299, "rewards/margins": 0.11363893747329712, "rewards/rejected": -0.3497960865497589, "step": 2340 }, { "epoch": 6.409308692676249, "grad_norm": 5.286129474639893, "learning_rate": 6.794520547945205e-07, "log_odds_chosen": 1.9515202045440674, "log_odds_ratio": -0.22628360986709595, "logits/chosen": 0.8037640452384949, "logits/rejected": 0.8304203152656555, "logps/chosen": -2.4281694889068604, "logps/rejected": -4.294026851654053, "loss": 0.8022, "nll_loss": 0.779586136341095, "rewards/accuracies": 1.0, "rewards/chosen": -0.2428169548511505, "rewards/margins": 0.18658572435379028, "rewards/rejected": -0.4294026792049408, "step": 2341 }, { "epoch": 6.412046543463381, "grad_norm": 4.114048957824707, "learning_rate": 6.793150684931507e-07, "log_odds_chosen": 1.6050509214401245, "log_odds_ratio": -0.3462393879890442, "logits/chosen": 0.8417168855667114, "logits/rejected": 0.8522452712059021, "logps/chosen": -2.273758888244629, "logps/rejected": -3.836251974105835, "loss": 0.7906, "nll_loss": 0.7559568881988525, "rewards/accuracies": 0.875, "rewards/chosen": -0.22737592458724976, "rewards/margins": 0.1562492996454239, "rewards/rejected": -0.38362520933151245, "step": 2342 }, { "epoch": 6.414784394250513, "grad_norm": 3.5930957794189453, "learning_rate": 6.791780821917808e-07, "log_odds_chosen": 1.759892225265503, "log_odds_ratio": -0.2720491290092468, "logits/chosen": 0.8969790935516357, "logits/rejected": 0.9346513748168945, "logps/chosen": -1.886288046836853, "logps/rejected": -3.5043182373046875, "loss": 0.7516, "nll_loss": 0.7244167923927307, "rewards/accuracies": 0.875, "rewards/chosen": -0.18862882256507874, "rewards/margins": 0.16180302202701569, "rewards/rejected": -0.3504318296909332, "step": 2343 }, { "epoch": 6.417522245037645, "grad_norm": 3.98834228515625, "learning_rate": 6.79041095890411e-07, "log_odds_chosen": 1.1512994766235352, "log_odds_ratio": -0.3737213611602783, "logits/chosen": 0.5566390752792358, "logits/rejected": 0.48612627387046814, "logps/chosen": -2.289649486541748, "logps/rejected": -3.381523847579956, "loss": 0.8631, "nll_loss": 0.8257612586021423, "rewards/accuracies": 1.0, "rewards/chosen": -0.22896495461463928, "rewards/margins": 0.10918746143579483, "rewards/rejected": -0.3381524085998535, "step": 2344 }, { "epoch": 6.420260095824777, "grad_norm": 4.913662910461426, "learning_rate": 6.789041095890411e-07, "log_odds_chosen": 1.2947452068328857, "log_odds_ratio": -0.35238906741142273, "logits/chosen": 0.6854553818702698, "logits/rejected": 0.629655659198761, "logps/chosen": -2.2641494274139404, "logps/rejected": -3.435530185699463, "loss": 0.8513, "nll_loss": 0.8160526752471924, "rewards/accuracies": 0.875, "rewards/chosen": -0.22641496360301971, "rewards/margins": 0.11713807284832001, "rewards/rejected": -0.34355300664901733, "step": 2345 }, { "epoch": 6.422997946611909, "grad_norm": 6.038036346435547, "learning_rate": 6.787671232876712e-07, "log_odds_chosen": 0.2956141233444214, "log_odds_ratio": -0.8559774160385132, "logits/chosen": 0.7720906734466553, "logits/rejected": 0.7582311630249023, "logps/chosen": -2.4107794761657715, "logps/rejected": -2.642183780670166, "loss": 0.8713, "nll_loss": 0.7856923937797546, "rewards/accuracies": 0.625, "rewards/chosen": -0.2410779446363449, "rewards/margins": 0.02314043417572975, "rewards/rejected": -0.26421838998794556, "step": 2346 }, { "epoch": 6.425735797399041, "grad_norm": 3.9746525287628174, "learning_rate": 6.786301369863014e-07, "log_odds_chosen": 1.7223578691482544, "log_odds_ratio": -0.3058655858039856, "logits/chosen": 0.8586314916610718, "logits/rejected": 0.9086061716079712, "logps/chosen": -2.5485241413116455, "logps/rejected": -4.211077690124512, "loss": 0.8234, "nll_loss": 0.7927930355072021, "rewards/accuracies": 0.875, "rewards/chosen": -0.25485241413116455, "rewards/margins": 0.16625535488128662, "rewards/rejected": -0.42110776901245117, "step": 2347 }, { "epoch": 6.4284736481861735, "grad_norm": 3.6457645893096924, "learning_rate": 6.784931506849315e-07, "log_odds_chosen": 1.8971809148788452, "log_odds_ratio": -0.26283398270606995, "logits/chosen": 0.9669507741928101, "logits/rejected": 0.9945523738861084, "logps/chosen": -2.0471107959747314, "logps/rejected": -3.853670358657837, "loss": 0.7717, "nll_loss": 0.745444655418396, "rewards/accuracies": 1.0, "rewards/chosen": -0.20471107959747314, "rewards/margins": 0.18065595626831055, "rewards/rejected": -0.3853670358657837, "step": 2348 }, { "epoch": 6.431211498973306, "grad_norm": 4.781948089599609, "learning_rate": 6.783561643835616e-07, "log_odds_chosen": 2.5611915588378906, "log_odds_ratio": -0.33723679184913635, "logits/chosen": 0.9276075959205627, "logits/rejected": 1.0055838823318481, "logps/chosen": -3.125187873840332, "logps/rejected": -5.639545440673828, "loss": 0.8529, "nll_loss": 0.8191747665405273, "rewards/accuracies": 0.875, "rewards/chosen": -0.31251880526542664, "rewards/margins": 0.2514357566833496, "rewards/rejected": -0.5639545917510986, "step": 2349 }, { "epoch": 6.433949349760438, "grad_norm": 5.392638683319092, "learning_rate": 6.782191780821918e-07, "log_odds_chosen": 1.2482693195343018, "log_odds_ratio": -0.5509960651397705, "logits/chosen": 0.7299665808677673, "logits/rejected": 0.7916548252105713, "logps/chosen": -2.421403646469116, "logps/rejected": -3.592815399169922, "loss": 0.9587, "nll_loss": 0.9036134481430054, "rewards/accuracies": 0.875, "rewards/chosen": -0.24214038252830505, "rewards/margins": 0.11714117974042892, "rewards/rejected": -0.3592815399169922, "step": 2350 }, { "epoch": 6.436687200547571, "grad_norm": 3.858286142349243, "learning_rate": 6.78082191780822e-07, "log_odds_chosen": 1.131824016571045, "log_odds_ratio": -0.3872169852256775, "logits/chosen": 0.8163964748382568, "logits/rejected": 0.8408879041671753, "logps/chosen": -2.2960195541381836, "logps/rejected": -3.3664448261260986, "loss": 0.9003, "nll_loss": 0.8616125583648682, "rewards/accuracies": 0.875, "rewards/chosen": -0.22960197925567627, "rewards/margins": 0.10704252123832703, "rewards/rejected": -0.3366445004940033, "step": 2351 }, { "epoch": 6.439425051334703, "grad_norm": 5.284193992614746, "learning_rate": 6.77945205479452e-07, "log_odds_chosen": 1.8154773712158203, "log_odds_ratio": -0.2964423596858978, "logits/chosen": 0.8535261154174805, "logits/rejected": 0.8562554717063904, "logps/chosen": -2.5280139446258545, "logps/rejected": -4.267308712005615, "loss": 0.8424, "nll_loss": 0.8127149939537048, "rewards/accuracies": 0.875, "rewards/chosen": -0.25280141830444336, "rewards/margins": 0.17392946779727936, "rewards/rejected": -0.4267308712005615, "step": 2352 }, { "epoch": 6.442162902121835, "grad_norm": 3.78067946434021, "learning_rate": 6.778082191780822e-07, "log_odds_chosen": 1.6009958982467651, "log_odds_ratio": -0.309395968914032, "logits/chosen": 0.9008124470710754, "logits/rejected": 0.9104251861572266, "logps/chosen": -1.902448058128357, "logps/rejected": -3.3367514610290527, "loss": 0.7879, "nll_loss": 0.757009744644165, "rewards/accuracies": 0.875, "rewards/chosen": -0.19024479389190674, "rewards/margins": 0.14343035221099854, "rewards/rejected": -0.3336751461029053, "step": 2353 }, { "epoch": 6.444900752908967, "grad_norm": 3.76359224319458, "learning_rate": 6.776712328767124e-07, "log_odds_chosen": 1.6052732467651367, "log_odds_ratio": -0.33866792917251587, "logits/chosen": 0.7271382808685303, "logits/rejected": 0.7369123101234436, "logps/chosen": -2.065225124359131, "logps/rejected": -3.5878777503967285, "loss": 0.8414, "nll_loss": 0.807500958442688, "rewards/accuracies": 0.875, "rewards/chosen": -0.20652252435684204, "rewards/margins": 0.15226523578166962, "rewards/rejected": -0.35878777503967285, "step": 2354 }, { "epoch": 6.447638603696099, "grad_norm": 4.156328201293945, "learning_rate": 6.775342465753424e-07, "log_odds_chosen": 0.9329764246940613, "log_odds_ratio": -0.4057398736476898, "logits/chosen": 0.8586817979812622, "logits/rejected": 0.864391565322876, "logps/chosen": -2.0179505348205566, "logps/rejected": -2.817077875137329, "loss": 0.9334, "nll_loss": 0.8928484916687012, "rewards/accuracies": 0.875, "rewards/chosen": -0.2017950564622879, "rewards/margins": 0.07991272956132889, "rewards/rejected": -0.2817077934741974, "step": 2355 }, { "epoch": 6.450376454483231, "grad_norm": 3.824873924255371, "learning_rate": 6.773972602739726e-07, "log_odds_chosen": 1.0668845176696777, "log_odds_ratio": -0.45955896377563477, "logits/chosen": 0.8219756484031677, "logits/rejected": 0.8181686997413635, "logps/chosen": -1.9635343551635742, "logps/rejected": -2.96051287651062, "loss": 0.8762, "nll_loss": 0.8302932381629944, "rewards/accuracies": 0.875, "rewards/chosen": -0.19635343551635742, "rewards/margins": 0.09969787299633026, "rewards/rejected": -0.2960513234138489, "step": 2356 }, { "epoch": 6.453114305270363, "grad_norm": 6.390560626983643, "learning_rate": 6.772602739726027e-07, "log_odds_chosen": 1.1919726133346558, "log_odds_ratio": -0.4474440813064575, "logits/chosen": 0.7891979217529297, "logits/rejected": 0.6952636241912842, "logps/chosen": -2.3619184494018555, "logps/rejected": -3.4507899284362793, "loss": 0.879, "nll_loss": 0.8342719078063965, "rewards/accuracies": 0.625, "rewards/chosen": -0.23619186878204346, "rewards/margins": 0.10888714343309402, "rewards/rejected": -0.3450790047645569, "step": 2357 }, { "epoch": 6.455852156057495, "grad_norm": 3.245835304260254, "learning_rate": 6.771232876712329e-07, "log_odds_chosen": 3.337186813354492, "log_odds_ratio": -0.09989935159683228, "logits/chosen": 0.8024812936782837, "logits/rejected": 0.7694858908653259, "logps/chosen": -2.4403085708618164, "logps/rejected": -5.665743350982666, "loss": 0.8284, "nll_loss": 0.8183695077896118, "rewards/accuracies": 1.0, "rewards/chosen": -0.24403086304664612, "rewards/margins": 0.32254350185394287, "rewards/rejected": -0.5665743350982666, "step": 2358 }, { "epoch": 6.458590006844627, "grad_norm": 5.077489852905273, "learning_rate": 6.76986301369863e-07, "log_odds_chosen": 0.7105863690376282, "log_odds_ratio": -0.5771607160568237, "logits/chosen": 0.8136397004127502, "logits/rejected": 0.792866587638855, "logps/chosen": -2.50667142868042, "logps/rejected": -3.2219343185424805, "loss": 0.8715, "nll_loss": 0.8138238787651062, "rewards/accuracies": 0.625, "rewards/chosen": -0.25066715478897095, "rewards/margins": 0.07152626663446426, "rewards/rejected": -0.3221934139728546, "step": 2359 }, { "epoch": 6.461327857631759, "grad_norm": 4.239078521728516, "learning_rate": 6.768493150684931e-07, "log_odds_chosen": 1.6533513069152832, "log_odds_ratio": -0.32560279965400696, "logits/chosen": 0.8838316202163696, "logits/rejected": 0.8418965339660645, "logps/chosen": -2.563626527786255, "logps/rejected": -4.160959720611572, "loss": 0.8019, "nll_loss": 0.7693132758140564, "rewards/accuracies": 0.875, "rewards/chosen": -0.256362646818161, "rewards/margins": 0.15973332524299622, "rewards/rejected": -0.4160960018634796, "step": 2360 }, { "epoch": 6.464065708418891, "grad_norm": 4.1891679763793945, "learning_rate": 6.767123287671233e-07, "log_odds_chosen": 1.2681273221969604, "log_odds_ratio": -0.32680439949035645, "logits/chosen": 0.8966093063354492, "logits/rejected": 0.9766024351119995, "logps/chosen": -3.2964630126953125, "logps/rejected": -4.518615245819092, "loss": 0.8293, "nll_loss": 0.7966048121452332, "rewards/accuracies": 0.875, "rewards/chosen": -0.3296462893486023, "rewards/margins": 0.12221522629261017, "rewards/rejected": -0.45186156034469604, "step": 2361 }, { "epoch": 6.466803559206023, "grad_norm": 3.309372663497925, "learning_rate": 6.765753424657534e-07, "log_odds_chosen": 2.1788783073425293, "log_odds_ratio": -0.23346754908561707, "logits/chosen": 0.7843602299690247, "logits/rejected": 0.7785205841064453, "logps/chosen": -1.8273894786834717, "logps/rejected": -3.8840978145599365, "loss": 0.8014, "nll_loss": 0.7780833840370178, "rewards/accuracies": 0.875, "rewards/chosen": -0.18273895978927612, "rewards/margins": 0.2056708037853241, "rewards/rejected": -0.3884097635746002, "step": 2362 }, { "epoch": 6.469541409993155, "grad_norm": 3.643937587738037, "learning_rate": 6.764383561643835e-07, "log_odds_chosen": 1.5750869512557983, "log_odds_ratio": -0.3394261598587036, "logits/chosen": 0.8231241106987, "logits/rejected": 0.7713469862937927, "logps/chosen": -1.9988651275634766, "logps/rejected": -3.4446115493774414, "loss": 0.8185, "nll_loss": 0.7845194339752197, "rewards/accuracies": 0.875, "rewards/chosen": -0.1998865157365799, "rewards/margins": 0.14457464218139648, "rewards/rejected": -0.3444611430168152, "step": 2363 }, { "epoch": 6.4722792607802875, "grad_norm": 4.058936595916748, "learning_rate": 6.763013698630137e-07, "log_odds_chosen": 1.8892614841461182, "log_odds_ratio": -0.26182085275650024, "logits/chosen": 0.7429395914077759, "logits/rejected": 0.7158824801445007, "logps/chosen": -1.9611260890960693, "logps/rejected": -3.728123188018799, "loss": 0.8946, "nll_loss": 0.868446946144104, "rewards/accuracies": 1.0, "rewards/chosen": -0.19611263275146484, "rewards/margins": 0.1766997128725052, "rewards/rejected": -0.37281233072280884, "step": 2364 }, { "epoch": 6.4750171115674195, "grad_norm": 4.111305236816406, "learning_rate": 6.761643835616439e-07, "log_odds_chosen": 0.22474884986877441, "log_odds_ratio": -0.7293645739555359, "logits/chosen": 0.6573073863983154, "logits/rejected": 0.6331650614738464, "logps/chosen": -2.4688069820404053, "logps/rejected": -2.678288698196411, "loss": 0.851, "nll_loss": 0.7780691981315613, "rewards/accuracies": 0.5, "rewards/chosen": -0.24688071012496948, "rewards/margins": 0.020948167890310287, "rewards/rejected": -0.26782888174057007, "step": 2365 }, { "epoch": 6.477754962354552, "grad_norm": 3.7371370792388916, "learning_rate": 6.760273972602739e-07, "log_odds_chosen": 1.79835844039917, "log_odds_ratio": -0.28191664814949036, "logits/chosen": 0.7038167715072632, "logits/rejected": 0.6477853059768677, "logps/chosen": -2.112959146499634, "logps/rejected": -3.809743642807007, "loss": 0.8884, "nll_loss": 0.8601794242858887, "rewards/accuracies": 0.875, "rewards/chosen": -0.21129588782787323, "rewards/margins": 0.1696784645318985, "rewards/rejected": -0.38097435235977173, "step": 2366 }, { "epoch": 6.480492813141684, "grad_norm": 4.599696636199951, "learning_rate": 6.758904109589041e-07, "log_odds_chosen": 1.4267510175704956, "log_odds_ratio": -0.6995049715042114, "logits/chosen": 0.6604429483413696, "logits/rejected": 0.7009690403938293, "logps/chosen": -3.0431652069091797, "logps/rejected": -4.403725624084473, "loss": 0.9243, "nll_loss": 0.8543239831924438, "rewards/accuracies": 0.75, "rewards/chosen": -0.30431652069091797, "rewards/margins": 0.1360560655593872, "rewards/rejected": -0.4403725862503052, "step": 2367 }, { "epoch": 6.483230663928816, "grad_norm": 6.086296558380127, "learning_rate": 6.757534246575343e-07, "log_odds_chosen": 0.18768636882305145, "log_odds_ratio": -0.768550455570221, "logits/chosen": 0.7837249636650085, "logits/rejected": 0.7778993844985962, "logps/chosen": -3.21012544631958, "logps/rejected": -3.3560614585876465, "loss": 0.9866, "nll_loss": 0.9097045660018921, "rewards/accuracies": 0.625, "rewards/chosen": -0.32101255655288696, "rewards/margins": 0.014593586325645447, "rewards/rejected": -0.3356061577796936, "step": 2368 }, { "epoch": 6.485968514715948, "grad_norm": 3.976505994796753, "learning_rate": 6.756164383561643e-07, "log_odds_chosen": 0.8842127323150635, "log_odds_ratio": -0.4670405089855194, "logits/chosen": 1.0105183124542236, "logits/rejected": 0.947573721408844, "logps/chosen": -1.989444613456726, "logps/rejected": -2.82259202003479, "loss": 0.8435, "nll_loss": 0.7968012094497681, "rewards/accuracies": 0.875, "rewards/chosen": -0.19894446432590485, "rewards/margins": 0.08331473171710968, "rewards/rejected": -0.2822592258453369, "step": 2369 }, { "epoch": 6.48870636550308, "grad_norm": 5.067991733551025, "learning_rate": 6.754794520547945e-07, "log_odds_chosen": 1.8748574256896973, "log_odds_ratio": -0.35736700892448425, "logits/chosen": 1.0377049446105957, "logits/rejected": 1.0809707641601562, "logps/chosen": -2.45451021194458, "logps/rejected": -4.255002021789551, "loss": 0.7988, "nll_loss": 0.7631077170372009, "rewards/accuracies": 0.875, "rewards/chosen": -0.24545103311538696, "rewards/margins": 0.18004916608333588, "rewards/rejected": -0.42550021409988403, "step": 2370 }, { "epoch": 6.491444216290212, "grad_norm": 3.7868523597717285, "learning_rate": 6.753424657534246e-07, "log_odds_chosen": 1.1406089067459106, "log_odds_ratio": -0.4905027747154236, "logits/chosen": 0.9838632345199585, "logits/rejected": 1.0357379913330078, "logps/chosen": -2.5832974910736084, "logps/rejected": -3.6903014183044434, "loss": 0.8107, "nll_loss": 0.7616197466850281, "rewards/accuracies": 0.75, "rewards/chosen": -0.25832971930503845, "rewards/margins": 0.11070041358470917, "rewards/rejected": -0.3690301477909088, "step": 2371 }, { "epoch": 6.494182067077344, "grad_norm": 4.007914066314697, "learning_rate": 6.752054794520548e-07, "log_odds_chosen": 0.9714601635932922, "log_odds_ratio": -0.35335439443588257, "logits/chosen": 0.7577988505363464, "logits/rejected": 0.7346183657646179, "logps/chosen": -1.658735752105713, "logps/rejected": -2.4693217277526855, "loss": 0.8128, "nll_loss": 0.7774211764335632, "rewards/accuracies": 1.0, "rewards/chosen": -0.16587358713150024, "rewards/margins": 0.08105858415365219, "rewards/rejected": -0.24693217873573303, "step": 2372 }, { "epoch": 6.496919917864476, "grad_norm": 3.750276803970337, "learning_rate": 6.750684931506849e-07, "log_odds_chosen": 2.6751320362091064, "log_odds_ratio": -0.15681064128875732, "logits/chosen": 0.8455970287322998, "logits/rejected": 0.8594300746917725, "logps/chosen": -2.572394371032715, "logps/rejected": -5.165711402893066, "loss": 0.7704, "nll_loss": 0.754703938961029, "rewards/accuracies": 1.0, "rewards/chosen": -0.2572394609451294, "rewards/margins": 0.25933170318603516, "rewards/rejected": -0.5165711641311646, "step": 2373 }, { "epoch": 6.499657768651608, "grad_norm": 5.116981506347656, "learning_rate": 6.74931506849315e-07, "log_odds_chosen": 0.38549989461898804, "log_odds_ratio": -0.6625637412071228, "logits/chosen": 0.7460868954658508, "logits/rejected": 0.7803071141242981, "logps/chosen": -2.2498247623443604, "logps/rejected": -2.6053647994995117, "loss": 0.9612, "nll_loss": 0.8949175477027893, "rewards/accuracies": 0.5, "rewards/chosen": -0.22498250007629395, "rewards/margins": 0.03555399924516678, "rewards/rejected": -0.2605364918708801, "step": 2374 }, { "epoch": 6.50239561943874, "grad_norm": 4.235517501831055, "learning_rate": 6.747945205479452e-07, "log_odds_chosen": 2.6951217651367188, "log_odds_ratio": -0.2676003873348236, "logits/chosen": 1.0753015279769897, "logits/rejected": 1.119889736175537, "logps/chosen": -3.2304017543792725, "logps/rejected": -5.8581438064575195, "loss": 0.7132, "nll_loss": 0.6864522099494934, "rewards/accuracies": 0.875, "rewards/chosen": -0.3230401873588562, "rewards/margins": 0.26277419924736023, "rewards/rejected": -0.585814356803894, "step": 2375 }, { "epoch": 6.505133470225873, "grad_norm": 4.811735153198242, "learning_rate": 6.746575342465753e-07, "log_odds_chosen": 0.6115168929100037, "log_odds_ratio": -0.5125175714492798, "logits/chosen": 0.8767491579055786, "logits/rejected": 0.8497672080993652, "logps/chosen": -1.6930499076843262, "logps/rejected": -2.124645233154297, "loss": 0.8109, "nll_loss": 0.7596408128738403, "rewards/accuracies": 0.875, "rewards/chosen": -0.1693049967288971, "rewards/margins": 0.04315955191850662, "rewards/rejected": -0.21246454119682312, "step": 2376 }, { "epoch": 6.507871321013004, "grad_norm": 4.211373329162598, "learning_rate": 6.745205479452054e-07, "log_odds_chosen": 2.28464412689209, "log_odds_ratio": -0.2609044015407562, "logits/chosen": 0.8052364587783813, "logits/rejected": 0.820263683795929, "logps/chosen": -2.462432384490967, "logps/rejected": -4.672883033752441, "loss": 0.9987, "nll_loss": 0.9726547002792358, "rewards/accuracies": 1.0, "rewards/chosen": -0.24624323844909668, "rewards/margins": 0.22104504704475403, "rewards/rejected": -0.4672882556915283, "step": 2377 }, { "epoch": 6.510609171800137, "grad_norm": 3.4761602878570557, "learning_rate": 6.743835616438356e-07, "log_odds_chosen": 1.2655287981033325, "log_odds_ratio": -0.3448224365711212, "logits/chosen": 0.7929933071136475, "logits/rejected": 0.8384116888046265, "logps/chosen": -2.0549111366271973, "logps/rejected": -3.2314488887786865, "loss": 0.8076, "nll_loss": 0.7731600999832153, "rewards/accuracies": 1.0, "rewards/chosen": -0.20549112558364868, "rewards/margins": 0.1176537424325943, "rewards/rejected": -0.3231448531150818, "step": 2378 }, { "epoch": 6.5133470225872685, "grad_norm": 4.0714616775512695, "learning_rate": 6.742465753424658e-07, "log_odds_chosen": 2.1197965145111084, "log_odds_ratio": -0.32573047280311584, "logits/chosen": 1.0508623123168945, "logits/rejected": 1.0684173107147217, "logps/chosen": -2.106341600418091, "logps/rejected": -4.145821571350098, "loss": 0.7316, "nll_loss": 0.6989883780479431, "rewards/accuracies": 0.75, "rewards/chosen": -0.21063417196273804, "rewards/margins": 0.20394794642925262, "rewards/rejected": -0.41458213329315186, "step": 2379 }, { "epoch": 6.5160848733744015, "grad_norm": 6.6857781410217285, "learning_rate": 6.741095890410958e-07, "log_odds_chosen": 0.513785719871521, "log_odds_ratio": -0.6714979410171509, "logits/chosen": 0.7200684547424316, "logits/rejected": 0.7326302528381348, "logps/chosen": -2.885698080062866, "logps/rejected": -3.323181629180908, "loss": 0.9115, "nll_loss": 0.844327449798584, "rewards/accuracies": 0.75, "rewards/chosen": -0.288569837808609, "rewards/margins": 0.04374833405017853, "rewards/rejected": -0.33231815695762634, "step": 2380 }, { "epoch": 6.5188227241615335, "grad_norm": 4.320358753204346, "learning_rate": 6.73972602739726e-07, "log_odds_chosen": 1.6228611469268799, "log_odds_ratio": -0.32754743099212646, "logits/chosen": 0.9020963311195374, "logits/rejected": 0.9192915558815002, "logps/chosen": -3.045639753341675, "logps/rejected": -4.557577610015869, "loss": 0.7947, "nll_loss": 0.7619410753250122, "rewards/accuracies": 0.625, "rewards/chosen": -0.304563969373703, "rewards/margins": 0.15119381248950958, "rewards/rejected": -0.4557577967643738, "step": 2381 }, { "epoch": 6.521560574948666, "grad_norm": 4.407508373260498, "learning_rate": 6.738356164383562e-07, "log_odds_chosen": 0.9888286590576172, "log_odds_ratio": -0.36094415187835693, "logits/chosen": 0.8534530997276306, "logits/rejected": 0.8094001412391663, "logps/chosen": -2.0534472465515137, "logps/rejected": -2.944521188735962, "loss": 0.857, "nll_loss": 0.8209368586540222, "rewards/accuracies": 1.0, "rewards/chosen": -0.20534473657608032, "rewards/margins": 0.08910738676786423, "rewards/rejected": -0.29445213079452515, "step": 2382 }, { "epoch": 6.524298425735798, "grad_norm": 4.336147785186768, "learning_rate": 6.736986301369862e-07, "log_odds_chosen": 0.6386339664459229, "log_odds_ratio": -0.6520290374755859, "logits/chosen": 1.0397248268127441, "logits/rejected": 1.1198170185089111, "logps/chosen": -2.758068323135376, "logps/rejected": -3.37860107421875, "loss": 0.865, "nll_loss": 0.7998180985450745, "rewards/accuracies": 0.625, "rewards/chosen": -0.27580684423446655, "rewards/margins": 0.062053292989730835, "rewards/rejected": -0.3378601372241974, "step": 2383 }, { "epoch": 6.52703627652293, "grad_norm": 4.4177565574646, "learning_rate": 6.735616438356164e-07, "log_odds_chosen": 1.5545223951339722, "log_odds_ratio": -0.4416053295135498, "logits/chosen": 0.7203580737113953, "logits/rejected": 0.6221747398376465, "logps/chosen": -1.6956154108047485, "logps/rejected": -3.149956703186035, "loss": 0.9169, "nll_loss": 0.8727675676345825, "rewards/accuracies": 0.75, "rewards/chosen": -0.16956153512001038, "rewards/margins": 0.14543411135673523, "rewards/rejected": -0.3149956464767456, "step": 2384 }, { "epoch": 6.529774127310062, "grad_norm": 8.432318687438965, "learning_rate": 6.734246575342466e-07, "log_odds_chosen": 1.3709399700164795, "log_odds_ratio": -0.4558557868003845, "logits/chosen": 0.7738704681396484, "logits/rejected": 0.6839836239814758, "logps/chosen": -2.039125919342041, "logps/rejected": -3.263906955718994, "loss": 0.9025, "nll_loss": 0.8569250702857971, "rewards/accuracies": 0.875, "rewards/chosen": -0.20391260087490082, "rewards/margins": 0.12247809767723083, "rewards/rejected": -0.32639074325561523, "step": 2385 }, { "epoch": 6.532511978097194, "grad_norm": 4.536214351654053, "learning_rate": 6.732876712328767e-07, "log_odds_chosen": 1.5417344570159912, "log_odds_ratio": -0.5164502263069153, "logits/chosen": 0.7678194046020508, "logits/rejected": 0.7408398985862732, "logps/chosen": -2.0632221698760986, "logps/rejected": -3.4492244720458984, "loss": 0.8272, "nll_loss": 0.7755920886993408, "rewards/accuracies": 0.75, "rewards/chosen": -0.20632222294807434, "rewards/margins": 0.13860023021697998, "rewards/rejected": -0.3449224531650543, "step": 2386 }, { "epoch": 6.535249828884326, "grad_norm": 5.390941143035889, "learning_rate": 6.731506849315068e-07, "log_odds_chosen": 1.3330504894256592, "log_odds_ratio": -0.3257715106010437, "logits/chosen": 0.7922794818878174, "logits/rejected": 0.8312150239944458, "logps/chosen": -2.010176658630371, "logps/rejected": -3.252347946166992, "loss": 0.8088, "nll_loss": 0.7762614488601685, "rewards/accuracies": 1.0, "rewards/chosen": -0.20101767778396606, "rewards/margins": 0.12421712279319763, "rewards/rejected": -0.3252348005771637, "step": 2387 }, { "epoch": 6.537987679671458, "grad_norm": 4.034801959991455, "learning_rate": 6.730136986301369e-07, "log_odds_chosen": 1.7332656383514404, "log_odds_ratio": -0.36275428533554077, "logits/chosen": 0.9478973746299744, "logits/rejected": 0.9829310774803162, "logps/chosen": -1.9059984683990479, "logps/rejected": -3.484480381011963, "loss": 0.8487, "nll_loss": 0.812381386756897, "rewards/accuracies": 0.875, "rewards/chosen": -0.19059984385967255, "rewards/margins": 0.15784819424152374, "rewards/rejected": -0.3484480381011963, "step": 2388 }, { "epoch": 6.54072553045859, "grad_norm": 3.3723154067993164, "learning_rate": 6.728767123287671e-07, "log_odds_chosen": 2.6115856170654297, "log_odds_ratio": -0.19468070566654205, "logits/chosen": 1.1132586002349854, "logits/rejected": 1.1908032894134521, "logps/chosen": -2.077065944671631, "logps/rejected": -4.583427429199219, "loss": 0.6666, "nll_loss": 0.6471657156944275, "rewards/accuracies": 0.875, "rewards/chosen": -0.20770658552646637, "rewards/margins": 0.25063619017601013, "rewards/rejected": -0.4583427906036377, "step": 2389 }, { "epoch": 6.543463381245722, "grad_norm": 3.7716622352600098, "learning_rate": 6.727397260273972e-07, "log_odds_chosen": 2.4036951065063477, "log_odds_ratio": -0.14315202832221985, "logits/chosen": 1.0432026386260986, "logits/rejected": 1.0494880676269531, "logps/chosen": -2.89432954788208, "logps/rejected": -5.225564002990723, "loss": 0.754, "nll_loss": 0.7396709322929382, "rewards/accuracies": 1.0, "rewards/chosen": -0.28943297266960144, "rewards/margins": 0.23312346637248993, "rewards/rejected": -0.5225564241409302, "step": 2390 }, { "epoch": 6.546201232032854, "grad_norm": 4.414161205291748, "learning_rate": 6.726027397260273e-07, "log_odds_chosen": 1.3644368648529053, "log_odds_ratio": -0.3798845410346985, "logits/chosen": 0.9972831010818481, "logits/rejected": 0.9719988703727722, "logps/chosen": -2.5069127082824707, "logps/rejected": -3.776918411254883, "loss": 0.7957, "nll_loss": 0.7577471733093262, "rewards/accuracies": 0.625, "rewards/chosen": -0.250691294670105, "rewards/margins": 0.1270005851984024, "rewards/rejected": -0.3776918649673462, "step": 2391 }, { "epoch": 6.548939082819986, "grad_norm": 4.047641277313232, "learning_rate": 6.724657534246575e-07, "log_odds_chosen": 1.0396842956542969, "log_odds_ratio": -0.3877533972263336, "logits/chosen": 0.7891355752944946, "logits/rejected": 0.766669750213623, "logps/chosen": -2.507401943206787, "logps/rejected": -3.4786131381988525, "loss": 0.8614, "nll_loss": 0.8226605653762817, "rewards/accuracies": 0.75, "rewards/chosen": -0.2507401704788208, "rewards/margins": 0.09712111949920654, "rewards/rejected": -0.34786131978034973, "step": 2392 }, { "epoch": 6.551676933607118, "grad_norm": 6.127111434936523, "learning_rate": 6.723287671232877e-07, "log_odds_chosen": 1.4124988317489624, "log_odds_ratio": -0.7410080432891846, "logits/chosen": 0.8062689304351807, "logits/rejected": 0.8653169870376587, "logps/chosen": -2.7259631156921387, "logps/rejected": -4.098403453826904, "loss": 0.9053, "nll_loss": 0.8312355279922485, "rewards/accuracies": 0.75, "rewards/chosen": -0.2725963294506073, "rewards/margins": 0.13724404573440552, "rewards/rejected": -0.4098403751850128, "step": 2393 }, { "epoch": 6.55441478439425, "grad_norm": 5.351639747619629, "learning_rate": 6.721917808219177e-07, "log_odds_chosen": 2.073164463043213, "log_odds_ratio": -0.286404013633728, "logits/chosen": 0.7687166929244995, "logits/rejected": 0.7910709381103516, "logps/chosen": -2.707548141479492, "logps/rejected": -4.67653751373291, "loss": 0.778, "nll_loss": 0.7493236064910889, "rewards/accuracies": 0.875, "rewards/chosen": -0.2707548439502716, "rewards/margins": 0.1968989372253418, "rewards/rejected": -0.4676537811756134, "step": 2394 }, { "epoch": 6.5571526351813825, "grad_norm": 3.4264204502105713, "learning_rate": 6.720547945205479e-07, "log_odds_chosen": 1.3718433380126953, "log_odds_ratio": -0.3142964243888855, "logits/chosen": 0.655809760093689, "logits/rejected": 0.6167438626289368, "logps/chosen": -1.7832640409469604, "logps/rejected": -2.9836792945861816, "loss": 0.8159, "nll_loss": 0.7844725251197815, "rewards/accuracies": 0.875, "rewards/chosen": -0.17832641303539276, "rewards/margins": 0.12004151195287704, "rewards/rejected": -0.2983679175376892, "step": 2395 }, { "epoch": 6.5598904859685145, "grad_norm": 3.8176798820495605, "learning_rate": 6.719178082191781e-07, "log_odds_chosen": 0.9980384707450867, "log_odds_ratio": -0.3654499053955078, "logits/chosen": 0.6825320720672607, "logits/rejected": 0.7031561732292175, "logps/chosen": -1.8676081895828247, "logps/rejected": -2.740055561065674, "loss": 0.9206, "nll_loss": 0.8840759992599487, "rewards/accuracies": 1.0, "rewards/chosen": -0.186760812997818, "rewards/margins": 0.08724473416805267, "rewards/rejected": -0.27400556206703186, "step": 2396 }, { "epoch": 6.562628336755647, "grad_norm": 4.458269119262695, "learning_rate": 6.717808219178081e-07, "log_odds_chosen": 0.9269805550575256, "log_odds_ratio": -0.6087609529495239, "logits/chosen": 0.6580164432525635, "logits/rejected": 0.5939469337463379, "logps/chosen": -2.093143939971924, "logps/rejected": -2.906879186630249, "loss": 0.9191, "nll_loss": 0.8582171201705933, "rewards/accuracies": 0.625, "rewards/chosen": -0.20931437611579895, "rewards/margins": 0.08137353509664536, "rewards/rejected": -0.2906879186630249, "step": 2397 }, { "epoch": 6.565366187542779, "grad_norm": 4.139200210571289, "learning_rate": 6.716438356164383e-07, "log_odds_chosen": 1.2560033798217773, "log_odds_ratio": -0.31707561016082764, "logits/chosen": 0.5985915660858154, "logits/rejected": 0.5426530838012695, "logps/chosen": -2.117091178894043, "logps/rejected": -3.273714542388916, "loss": 0.8762, "nll_loss": 0.8445274829864502, "rewards/accuracies": 0.875, "rewards/chosen": -0.21170911192893982, "rewards/margins": 0.1156623438000679, "rewards/rejected": -0.3273714482784271, "step": 2398 }, { "epoch": 6.568104038329911, "grad_norm": 5.3297953605651855, "learning_rate": 6.715068493150685e-07, "log_odds_chosen": 1.054570198059082, "log_odds_ratio": -0.6916612982749939, "logits/chosen": 0.8741148710250854, "logits/rejected": 0.9288160800933838, "logps/chosen": -3.3240206241607666, "logps/rejected": -4.368969440460205, "loss": 0.9137, "nll_loss": 0.8445613980293274, "rewards/accuracies": 0.875, "rewards/chosen": -0.3324020504951477, "rewards/margins": 0.10449491441249847, "rewards/rejected": -0.436896950006485, "step": 2399 }, { "epoch": 6.570841889117043, "grad_norm": 3.7009572982788086, "learning_rate": 6.713698630136986e-07, "log_odds_chosen": 0.9752368927001953, "log_odds_ratio": -0.4266175627708435, "logits/chosen": 0.6436668634414673, "logits/rejected": 0.6464307308197021, "logps/chosen": -2.1482746601104736, "logps/rejected": -3.0040392875671387, "loss": 0.8516, "nll_loss": 0.8089253902435303, "rewards/accuracies": 0.875, "rewards/chosen": -0.21482747793197632, "rewards/margins": 0.08557644486427307, "rewards/rejected": -0.3004039227962494, "step": 2400 }, { "epoch": 6.573579739904175, "grad_norm": 3.1967360973358154, "learning_rate": 6.712328767123287e-07, "log_odds_chosen": 2.02264404296875, "log_odds_ratio": -0.24146632850170135, "logits/chosen": 0.9461396336555481, "logits/rejected": 0.9231455326080322, "logps/chosen": -2.6367642879486084, "logps/rejected": -4.578272819519043, "loss": 0.8167, "nll_loss": 0.7925997376441956, "rewards/accuracies": 1.0, "rewards/chosen": -0.2636764347553253, "rewards/margins": 0.1941508650779724, "rewards/rejected": -0.45782727003097534, "step": 2401 }, { "epoch": 6.576317590691307, "grad_norm": 4.117312908172607, "learning_rate": 6.710958904109588e-07, "log_odds_chosen": 0.8671035766601562, "log_odds_ratio": -0.475117564201355, "logits/chosen": 0.7440263032913208, "logits/rejected": 0.6931336522102356, "logps/chosen": -2.072829246520996, "logps/rejected": -2.8718960285186768, "loss": 0.9062, "nll_loss": 0.8586429953575134, "rewards/accuracies": 0.75, "rewards/chosen": -0.2072829306125641, "rewards/margins": 0.07990668714046478, "rewards/rejected": -0.2871896028518677, "step": 2402 }, { "epoch": 6.57905544147844, "grad_norm": 4.874130725860596, "learning_rate": 6.70958904109589e-07, "log_odds_chosen": 1.8149200677871704, "log_odds_ratio": -0.24258804321289062, "logits/chosen": 0.9747137427330017, "logits/rejected": 1.021903395652771, "logps/chosen": -2.4858627319335938, "logps/rejected": -4.231647491455078, "loss": 0.7774, "nll_loss": 0.7531083226203918, "rewards/accuracies": 0.875, "rewards/chosen": -0.24858629703521729, "rewards/margins": 0.17457842826843262, "rewards/rejected": -0.4231647253036499, "step": 2403 }, { "epoch": 6.581793292265571, "grad_norm": 4.655426502227783, "learning_rate": 6.708219178082191e-07, "log_odds_chosen": 0.7432751059532166, "log_odds_ratio": -0.46473467350006104, "logits/chosen": 0.8243083357810974, "logits/rejected": 0.7907841205596924, "logps/chosen": -1.51301908493042, "logps/rejected": -2.116485118865967, "loss": 0.8662, "nll_loss": 0.819713294506073, "rewards/accuracies": 0.875, "rewards/chosen": -0.15130192041397095, "rewards/margins": 0.060346610844135284, "rewards/rejected": -0.21164852380752563, "step": 2404 }, { "epoch": 6.584531143052704, "grad_norm": 5.276735782623291, "learning_rate": 6.706849315068492e-07, "log_odds_chosen": 1.0453989505767822, "log_odds_ratio": -0.5132912993431091, "logits/chosen": 0.7684009075164795, "logits/rejected": 0.7427758574485779, "logps/chosen": -2.7734603881835938, "logps/rejected": -3.739309310913086, "loss": 0.8335, "nll_loss": 0.7821227312088013, "rewards/accuracies": 0.875, "rewards/chosen": -0.27734604477882385, "rewards/margins": 0.09658490121364594, "rewards/rejected": -0.3739309310913086, "step": 2405 }, { "epoch": 6.587268993839836, "grad_norm": 4.483005046844482, "learning_rate": 6.705479452054794e-07, "log_odds_chosen": 1.0666204690933228, "log_odds_ratio": -0.5475897192955017, "logits/chosen": 0.7838762402534485, "logits/rejected": 0.7704315781593323, "logps/chosen": -2.2410976886749268, "logps/rejected": -3.1887948513031006, "loss": 0.9691, "nll_loss": 0.9143171310424805, "rewards/accuracies": 0.75, "rewards/chosen": -0.22410976886749268, "rewards/margins": 0.094769686460495, "rewards/rejected": -0.31887945532798767, "step": 2406 }, { "epoch": 6.590006844626968, "grad_norm": 3.471829652786255, "learning_rate": 6.704109589041096e-07, "log_odds_chosen": 2.544609546661377, "log_odds_ratio": -0.45069706439971924, "logits/chosen": 0.7661542892456055, "logits/rejected": 0.854647159576416, "logps/chosen": -2.5955872535705566, "logps/rejected": -5.058379173278809, "loss": 0.8197, "nll_loss": 0.7746288180351257, "rewards/accuracies": 0.875, "rewards/chosen": -0.2595587372779846, "rewards/margins": 0.24627916514873505, "rewards/rejected": -0.5058379173278809, "step": 2407 }, { "epoch": 6.5927446954141, "grad_norm": 4.228390693664551, "learning_rate": 6.702739726027396e-07, "log_odds_chosen": 1.982186198234558, "log_odds_ratio": -0.2574664354324341, "logits/chosen": 0.7771530151367188, "logits/rejected": 0.6964004039764404, "logps/chosen": -1.7144581079483032, "logps/rejected": -3.503726005554199, "loss": 0.8913, "nll_loss": 0.8655509948730469, "rewards/accuracies": 1.0, "rewards/chosen": -0.1714458167552948, "rewards/margins": 0.17892681062221527, "rewards/rejected": -0.3503726124763489, "step": 2408 }, { "epoch": 6.595482546201232, "grad_norm": 4.59391450881958, "learning_rate": 6.701369863013698e-07, "log_odds_chosen": 0.9436490535736084, "log_odds_ratio": -0.4711417555809021, "logits/chosen": 0.7444230318069458, "logits/rejected": 0.654735803604126, "logps/chosen": -2.190164566040039, "logps/rejected": -3.0361156463623047, "loss": 0.8879, "nll_loss": 0.8408274054527283, "rewards/accuracies": 0.875, "rewards/chosen": -0.21901647746562958, "rewards/margins": 0.08459508419036865, "rewards/rejected": -0.3036115765571594, "step": 2409 }, { "epoch": 6.598220396988364, "grad_norm": 4.025485992431641, "learning_rate": 6.7e-07, "log_odds_chosen": 2.621494770050049, "log_odds_ratio": -0.12599408626556396, "logits/chosen": 0.9532225728034973, "logits/rejected": 1.0378673076629639, "logps/chosen": -2.2860076427459717, "logps/rejected": -4.7567057609558105, "loss": 0.7783, "nll_loss": 0.7656916379928589, "rewards/accuracies": 1.0, "rewards/chosen": -0.22860078513622284, "rewards/margins": 0.2470698058605194, "rewards/rejected": -0.47567057609558105, "step": 2410 }, { "epoch": 6.6009582477754964, "grad_norm": 4.054734706878662, "learning_rate": 6.6986301369863e-07, "log_odds_chosen": 1.639070987701416, "log_odds_ratio": -0.5688203573226929, "logits/chosen": 0.7054183483123779, "logits/rejected": 0.7432386875152588, "logps/chosen": -2.452967882156372, "logps/rejected": -4.071369171142578, "loss": 0.8678, "nll_loss": 0.8109038472175598, "rewards/accuracies": 0.625, "rewards/chosen": -0.24529677629470825, "rewards/margins": 0.16184011101722717, "rewards/rejected": -0.4071369171142578, "step": 2411 }, { "epoch": 6.6036960985626285, "grad_norm": 3.9626026153564453, "learning_rate": 6.697260273972602e-07, "log_odds_chosen": 0.8611394166946411, "log_odds_ratio": -0.539101243019104, "logits/chosen": 0.8502886295318604, "logits/rejected": 0.7900049090385437, "logps/chosen": -2.0706984996795654, "logps/rejected": -2.8121519088745117, "loss": 0.8838, "nll_loss": 0.8298907279968262, "rewards/accuracies": 0.75, "rewards/chosen": -0.20706985890865326, "rewards/margins": 0.07414532452821732, "rewards/rejected": -0.28121519088745117, "step": 2412 }, { "epoch": 6.606433949349761, "grad_norm": 6.116067409515381, "learning_rate": 6.695890410958904e-07, "log_odds_chosen": 0.7597876787185669, "log_odds_ratio": -0.6106767058372498, "logits/chosen": 0.8906958699226379, "logits/rejected": 0.9514769911766052, "logps/chosen": -3.1264379024505615, "logps/rejected": -3.8691277503967285, "loss": 0.9583, "nll_loss": 0.8971843719482422, "rewards/accuracies": 0.75, "rewards/chosen": -0.31264379620552063, "rewards/margins": 0.07426899671554565, "rewards/rejected": -0.3869127929210663, "step": 2413 }, { "epoch": 6.609171800136893, "grad_norm": 4.477991104125977, "learning_rate": 6.694520547945204e-07, "log_odds_chosen": 1.727806568145752, "log_odds_ratio": -0.3192831575870514, "logits/chosen": 0.6222209930419922, "logits/rejected": 0.6341156959533691, "logps/chosen": -2.3518996238708496, "logps/rejected": -3.972784996032715, "loss": 0.8571, "nll_loss": 0.8251473903656006, "rewards/accuracies": 0.875, "rewards/chosen": -0.23518995940685272, "rewards/margins": 0.1620885580778122, "rewards/rejected": -0.39727848768234253, "step": 2414 }, { "epoch": 6.611909650924025, "grad_norm": 4.129980564117432, "learning_rate": 6.693150684931506e-07, "log_odds_chosen": 2.9484755992889404, "log_odds_ratio": -0.24279367923736572, "logits/chosen": 0.8499971628189087, "logits/rejected": 0.8355420231819153, "logps/chosen": -2.1110987663269043, "logps/rejected": -4.9092793464660645, "loss": 0.8429, "nll_loss": 0.8186111450195312, "rewards/accuracies": 0.875, "rewards/chosen": -0.21110987663269043, "rewards/margins": 0.279818058013916, "rewards/rejected": -0.49092793464660645, "step": 2415 }, { "epoch": 6.614647501711157, "grad_norm": 4.41670036315918, "learning_rate": 6.691780821917809e-07, "log_odds_chosen": 1.067096471786499, "log_odds_ratio": -0.7078261375427246, "logits/chosen": 0.910944402217865, "logits/rejected": 0.9438408017158508, "logps/chosen": -2.892331123352051, "logps/rejected": -3.893629312515259, "loss": 0.8864, "nll_loss": 0.8156223297119141, "rewards/accuracies": 0.5, "rewards/chosen": -0.28923311829566956, "rewards/margins": 0.10012981295585632, "rewards/rejected": -0.3893629312515259, "step": 2416 }, { "epoch": 6.617385352498289, "grad_norm": 4.1702728271484375, "learning_rate": 6.69041095890411e-07, "log_odds_chosen": 1.8964946269989014, "log_odds_ratio": -0.3161481022834778, "logits/chosen": 0.9650616645812988, "logits/rejected": 0.9700950980186462, "logps/chosen": -2.417250394821167, "logps/rejected": -4.239140510559082, "loss": 0.8308, "nll_loss": 0.7991704940795898, "rewards/accuracies": 0.875, "rewards/chosen": -0.24172505736351013, "rewards/margins": 0.18218904733657837, "rewards/rejected": -0.4239140450954437, "step": 2417 }, { "epoch": 6.620123203285421, "grad_norm": 4.375664234161377, "learning_rate": 6.68904109589041e-07, "log_odds_chosen": 1.5779368877410889, "log_odds_ratio": -0.3173789978027344, "logits/chosen": 0.8226938247680664, "logits/rejected": 0.8282114863395691, "logps/chosen": -2.3039917945861816, "logps/rejected": -3.810028076171875, "loss": 0.8021, "nll_loss": 0.7703466415405273, "rewards/accuracies": 0.875, "rewards/chosen": -0.23039917647838593, "rewards/margins": 0.15060363709926605, "rewards/rejected": -0.3810027837753296, "step": 2418 }, { "epoch": 6.622861054072553, "grad_norm": 4.5101637840271, "learning_rate": 6.687671232876711e-07, "log_odds_chosen": 0.7199650406837463, "log_odds_ratio": -0.49330246448516846, "logits/chosen": 0.9173007011413574, "logits/rejected": 0.9868651628494263, "logps/chosen": -2.4974865913391113, "logps/rejected": -3.189682960510254, "loss": 0.8781, "nll_loss": 0.828729510307312, "rewards/accuracies": 0.75, "rewards/chosen": -0.24974866211414337, "rewards/margins": 0.06921964883804321, "rewards/rejected": -0.3189682960510254, "step": 2419 }, { "epoch": 6.625598904859685, "grad_norm": 4.117504596710205, "learning_rate": 6.686301369863014e-07, "log_odds_chosen": 1.778176188468933, "log_odds_ratio": -0.39061522483825684, "logits/chosen": 0.9561973810195923, "logits/rejected": 0.9869678020477295, "logps/chosen": -2.4117794036865234, "logps/rejected": -4.081124305725098, "loss": 0.7745, "nll_loss": 0.7354657053947449, "rewards/accuracies": 0.75, "rewards/chosen": -0.2411779761314392, "rewards/margins": 0.16693450510501862, "rewards/rejected": -0.40811246633529663, "step": 2420 }, { "epoch": 6.628336755646817, "grad_norm": 4.5089335441589355, "learning_rate": 6.684931506849316e-07, "log_odds_chosen": 1.6010658740997314, "log_odds_ratio": -0.3172936737537384, "logits/chosen": 0.8501691818237305, "logits/rejected": 0.8629038333892822, "logps/chosen": -2.3685991764068604, "logps/rejected": -3.8739986419677734, "loss": 0.7649, "nll_loss": 0.7331540584564209, "rewards/accuracies": 0.875, "rewards/chosen": -0.23685991764068604, "rewards/margins": 0.15053996443748474, "rewards/rejected": -0.3873998820781708, "step": 2421 }, { "epoch": 6.631074606433949, "grad_norm": 4.066762924194336, "learning_rate": 6.683561643835616e-07, "log_odds_chosen": 2.8791160583496094, "log_odds_ratio": -0.11154183745384216, "logits/chosen": 0.9072757959365845, "logits/rejected": 0.8726394176483154, "logps/chosen": -1.8882298469543457, "logps/rejected": -4.6097283363342285, "loss": 0.7384, "nll_loss": 0.7272030711174011, "rewards/accuracies": 1.0, "rewards/chosen": -0.18882298469543457, "rewards/margins": 0.27214986085891724, "rewards/rejected": -0.4609728455543518, "step": 2422 }, { "epoch": 6.633812457221081, "grad_norm": 4.186716079711914, "learning_rate": 6.682191780821918e-07, "log_odds_chosen": 1.0798406600952148, "log_odds_ratio": -0.437114417552948, "logits/chosen": 0.8956177234649658, "logits/rejected": 0.9119107723236084, "logps/chosen": -2.0817227363586426, "logps/rejected": -3.0827503204345703, "loss": 0.7508, "nll_loss": 0.7070944905281067, "rewards/accuracies": 0.75, "rewards/chosen": -0.2081722915172577, "rewards/margins": 0.1001027524471283, "rewards/rejected": -0.308275043964386, "step": 2423 }, { "epoch": 6.636550308008213, "grad_norm": 4.124065399169922, "learning_rate": 6.68082191780822e-07, "log_odds_chosen": 0.9117804765701294, "log_odds_ratio": -0.45911937952041626, "logits/chosen": 0.8252956867218018, "logits/rejected": 0.7837886810302734, "logps/chosen": -2.867955446243286, "logps/rejected": -3.704298496246338, "loss": 0.9633, "nll_loss": 0.9173852801322937, "rewards/accuracies": 0.875, "rewards/chosen": -0.2867955267429352, "rewards/margins": 0.08363430947065353, "rewards/rejected": -0.3704298436641693, "step": 2424 }, { "epoch": 6.639288158795345, "grad_norm": 4.1103034019470215, "learning_rate": 6.67945205479452e-07, "log_odds_chosen": 1.9230461120605469, "log_odds_ratio": -0.31412309408187866, "logits/chosen": 1.0951309204101562, "logits/rejected": 1.132090449333191, "logps/chosen": -2.433861255645752, "logps/rejected": -4.298676490783691, "loss": 0.7742, "nll_loss": 0.7428258657455444, "rewards/accuracies": 0.875, "rewards/chosen": -0.2433861345052719, "rewards/margins": 0.1864815503358841, "rewards/rejected": -0.429867684841156, "step": 2425 }, { "epoch": 6.6420260095824775, "grad_norm": 4.903200626373291, "learning_rate": 6.678082191780822e-07, "log_odds_chosen": 1.9898697137832642, "log_odds_ratio": -0.3932533264160156, "logits/chosen": 0.8513755798339844, "logits/rejected": 0.8261487483978271, "logps/chosen": -2.41933012008667, "logps/rejected": -4.349387168884277, "loss": 0.9443, "nll_loss": 0.9050021767616272, "rewards/accuracies": 0.75, "rewards/chosen": -0.24193304777145386, "rewards/margins": 0.19300569593906403, "rewards/rejected": -0.4349387586116791, "step": 2426 }, { "epoch": 6.6447638603696095, "grad_norm": 4.718607425689697, "learning_rate": 6.676712328767124e-07, "log_odds_chosen": 1.1925685405731201, "log_odds_ratio": -0.3699589669704437, "logits/chosen": 0.9234040975570679, "logits/rejected": 0.9750798344612122, "logps/chosen": -2.3697474002838135, "logps/rejected": -3.4465503692626953, "loss": 0.8018, "nll_loss": 0.7647746801376343, "rewards/accuracies": 0.875, "rewards/chosen": -0.23697474598884583, "rewards/margins": 0.10768027603626251, "rewards/rejected": -0.34465503692626953, "step": 2427 }, { "epoch": 6.647501711156742, "grad_norm": 4.763894557952881, "learning_rate": 6.675342465753424e-07, "log_odds_chosen": 1.568778157234192, "log_odds_ratio": -0.5466797947883606, "logits/chosen": 0.9509271383285522, "logits/rejected": 0.9777502417564392, "logps/chosen": -2.4618723392486572, "logps/rejected": -3.9011995792388916, "loss": 0.8576, "nll_loss": 0.8029588460922241, "rewards/accuracies": 0.875, "rewards/chosen": -0.246187224984169, "rewards/margins": 0.14393272995948792, "rewards/rejected": -0.3901199698448181, "step": 2428 }, { "epoch": 6.650239561943874, "grad_norm": 4.642488479614258, "learning_rate": 6.673972602739726e-07, "log_odds_chosen": 2.349130392074585, "log_odds_ratio": -0.2564488649368286, "logits/chosen": 0.6797491312026978, "logits/rejected": 0.6049672961235046, "logps/chosen": -2.539299964904785, "logps/rejected": -4.796239852905273, "loss": 0.8578, "nll_loss": 0.8321222066879272, "rewards/accuracies": 0.875, "rewards/chosen": -0.253930002450943, "rewards/margins": 0.22569403052330017, "rewards/rejected": -0.47962403297424316, "step": 2429 }, { "epoch": 6.652977412731007, "grad_norm": 3.864691734313965, "learning_rate": 6.672602739726028e-07, "log_odds_chosen": 1.4618940353393555, "log_odds_ratio": -0.2955315113067627, "logits/chosen": 0.8747936487197876, "logits/rejected": 0.9040893912315369, "logps/chosen": -2.3011245727539062, "logps/rejected": -3.6931495666503906, "loss": 0.7337, "nll_loss": 0.7041912078857422, "rewards/accuracies": 0.875, "rewards/chosen": -0.2301124632358551, "rewards/margins": 0.13920250535011292, "rewards/rejected": -0.3693149983882904, "step": 2430 }, { "epoch": 6.655715263518138, "grad_norm": 3.8246662616729736, "learning_rate": 6.671232876712329e-07, "log_odds_chosen": 2.4130239486694336, "log_odds_ratio": -0.2943927049636841, "logits/chosen": 0.7455927133560181, "logits/rejected": 0.6925345659255981, "logps/chosen": -1.7443867921829224, "logps/rejected": -4.031951904296875, "loss": 0.815, "nll_loss": 0.7855482697486877, "rewards/accuracies": 1.0, "rewards/chosen": -0.1744386851787567, "rewards/margins": 0.22875651717185974, "rewards/rejected": -0.40319520235061646, "step": 2431 }, { "epoch": 6.658453114305271, "grad_norm": 3.7036664485931396, "learning_rate": 6.66986301369863e-07, "log_odds_chosen": 1.7247412204742432, "log_odds_ratio": -0.4142020046710968, "logits/chosen": 0.9413114786148071, "logits/rejected": 0.8859914541244507, "logps/chosen": -1.883116364479065, "logps/rejected": -3.520413875579834, "loss": 0.8996, "nll_loss": 0.8581367135047913, "rewards/accuracies": 0.875, "rewards/chosen": -0.1883116364479065, "rewards/margins": 0.16372975707054138, "rewards/rejected": -0.3520413935184479, "step": 2432 }, { "epoch": 6.661190965092403, "grad_norm": 4.906131744384766, "learning_rate": 6.668493150684931e-07, "log_odds_chosen": 1.3570300340652466, "log_odds_ratio": -0.40452200174331665, "logits/chosen": 0.6659666299819946, "logits/rejected": 0.6559498310089111, "logps/chosen": -2.233241558074951, "logps/rejected": -3.5047121047973633, "loss": 0.811, "nll_loss": 0.7705726027488708, "rewards/accuracies": 0.875, "rewards/chosen": -0.22332414984703064, "rewards/margins": 0.12714706361293793, "rewards/rejected": -0.35047122836112976, "step": 2433 }, { "epoch": 6.663928815879535, "grad_norm": 4.97877311706543, "learning_rate": 6.667123287671233e-07, "log_odds_chosen": 1.050842523574829, "log_odds_ratio": -0.5182271003723145, "logits/chosen": 0.7454066276550293, "logits/rejected": 0.7370687127113342, "logps/chosen": -2.827174186706543, "logps/rejected": -3.8701393604278564, "loss": 0.9233, "nll_loss": 0.8715055584907532, "rewards/accuracies": 0.625, "rewards/chosen": -0.28271743655204773, "rewards/margins": 0.10429652035236359, "rewards/rejected": -0.3870139718055725, "step": 2434 }, { "epoch": 6.666666666666667, "grad_norm": 4.6581711769104, "learning_rate": 6.665753424657534e-07, "log_odds_chosen": 1.1015093326568604, "log_odds_ratio": -0.42088574171066284, "logits/chosen": 0.7619472146034241, "logits/rejected": 0.7037395238876343, "logps/chosen": -2.0952234268188477, "logps/rejected": -3.0508532524108887, "loss": 0.882, "nll_loss": 0.8399301171302795, "rewards/accuracies": 0.625, "rewards/chosen": -0.2095223367214203, "rewards/margins": 0.09556297212839127, "rewards/rejected": -0.30508530139923096, "step": 2435 }, { "epoch": 6.669404517453799, "grad_norm": 4.308501243591309, "learning_rate": 6.664383561643835e-07, "log_odds_chosen": 0.9095195531845093, "log_odds_ratio": -0.38578489422798157, "logits/chosen": 0.9965630173683167, "logits/rejected": 0.9842294454574585, "logps/chosen": -1.7588794231414795, "logps/rejected": -2.5634124279022217, "loss": 0.7651, "nll_loss": 0.7264739871025085, "rewards/accuracies": 1.0, "rewards/chosen": -0.17588794231414795, "rewards/margins": 0.08045332133769989, "rewards/rejected": -0.25634124875068665, "step": 2436 }, { "epoch": 6.672142368240931, "grad_norm": 5.397459506988525, "learning_rate": 6.663013698630137e-07, "log_odds_chosen": 1.9813674688339233, "log_odds_ratio": -0.22868488729000092, "logits/chosen": 0.9269866943359375, "logits/rejected": 0.9860306978225708, "logps/chosen": -2.2922537326812744, "logps/rejected": -4.154971599578857, "loss": 0.8296, "nll_loss": 0.8066993951797485, "rewards/accuracies": 1.0, "rewards/chosen": -0.22922536730766296, "rewards/margins": 0.18627174198627472, "rewards/rejected": -0.4154971241950989, "step": 2437 }, { "epoch": 6.674880219028063, "grad_norm": 4.787875652313232, "learning_rate": 6.661643835616439e-07, "log_odds_chosen": 2.1464524269104004, "log_odds_ratio": -0.34437036514282227, "logits/chosen": 0.8778326511383057, "logits/rejected": 0.9285815954208374, "logps/chosen": -2.5888538360595703, "logps/rejected": -4.6414008140563965, "loss": 0.8618, "nll_loss": 0.827337384223938, "rewards/accuracies": 0.75, "rewards/chosen": -0.2588854134082794, "rewards/margins": 0.20525464415550232, "rewards/rejected": -0.4641400873661041, "step": 2438 }, { "epoch": 6.677618069815195, "grad_norm": 4.603388786315918, "learning_rate": 6.660273972602739e-07, "log_odds_chosen": 3.2469472885131836, "log_odds_ratio": -0.2719153165817261, "logits/chosen": 1.1291100978851318, "logits/rejected": 1.2210184335708618, "logps/chosen": -3.2009871006011963, "logps/rejected": -6.381701469421387, "loss": 0.8102, "nll_loss": 0.7830488085746765, "rewards/accuracies": 0.875, "rewards/chosen": -0.32009872794151306, "rewards/margins": 0.3180714249610901, "rewards/rejected": -0.6381701231002808, "step": 2439 }, { "epoch": 6.680355920602327, "grad_norm": 4.770082473754883, "learning_rate": 6.658904109589041e-07, "log_odds_chosen": 1.394685983657837, "log_odds_ratio": -0.501067042350769, "logits/chosen": 0.8379149436950684, "logits/rejected": 0.8359602689743042, "logps/chosen": -2.8043525218963623, "logps/rejected": -4.149240970611572, "loss": 0.8164, "nll_loss": 0.766262948513031, "rewards/accuracies": 0.75, "rewards/chosen": -0.2804352641105652, "rewards/margins": 0.13448886573314667, "rewards/rejected": -0.41492411494255066, "step": 2440 }, { "epoch": 6.683093771389459, "grad_norm": 4.843624591827393, "learning_rate": 6.657534246575343e-07, "log_odds_chosen": 1.261610746383667, "log_odds_ratio": -0.40219777822494507, "logits/chosen": 0.8335442543029785, "logits/rejected": 0.8427106142044067, "logps/chosen": -3.015252113342285, "logps/rejected": -4.232091426849365, "loss": 0.9794, "nll_loss": 0.9392072558403015, "rewards/accuracies": 0.625, "rewards/chosen": -0.30152520537376404, "rewards/margins": 0.12168392539024353, "rewards/rejected": -0.4232091009616852, "step": 2441 }, { "epoch": 6.685831622176591, "grad_norm": 3.911587715148926, "learning_rate": 6.656164383561643e-07, "log_odds_chosen": 1.4781564474105835, "log_odds_ratio": -0.3966989517211914, "logits/chosen": 0.9035555720329285, "logits/rejected": 0.9199494123458862, "logps/chosen": -2.3864731788635254, "logps/rejected": -3.804314613342285, "loss": 0.8242, "nll_loss": 0.7844853401184082, "rewards/accuracies": 0.875, "rewards/chosen": -0.23864731192588806, "rewards/margins": 0.14178414642810822, "rewards/rejected": -0.38043147325515747, "step": 2442 }, { "epoch": 6.6885694729637235, "grad_norm": 3.9583771228790283, "learning_rate": 6.654794520547945e-07, "log_odds_chosen": 0.738614559173584, "log_odds_ratio": -0.5101492404937744, "logits/chosen": 0.850080132484436, "logits/rejected": 0.8534606695175171, "logps/chosen": -2.158231019973755, "logps/rejected": -2.8535003662109375, "loss": 0.834, "nll_loss": 0.7829714417457581, "rewards/accuracies": 0.625, "rewards/chosen": -0.21582309901714325, "rewards/margins": 0.06952692568302155, "rewards/rejected": -0.2853500247001648, "step": 2443 }, { "epoch": 6.691307323750856, "grad_norm": 3.8501622676849365, "learning_rate": 6.653424657534247e-07, "log_odds_chosen": 1.9395525455474854, "log_odds_ratio": -0.3216167390346527, "logits/chosen": 0.667636513710022, "logits/rejected": 0.621903121471405, "logps/chosen": -1.9440536499023438, "logps/rejected": -3.7407350540161133, "loss": 0.8027, "nll_loss": 0.7705426216125488, "rewards/accuracies": 0.875, "rewards/chosen": -0.19440537691116333, "rewards/margins": 0.179668128490448, "rewards/rejected": -0.37407350540161133, "step": 2444 }, { "epoch": 6.694045174537988, "grad_norm": 3.9874916076660156, "learning_rate": 6.652054794520548e-07, "log_odds_chosen": 1.4712237119674683, "log_odds_ratio": -0.31201815605163574, "logits/chosen": 1.1611840724945068, "logits/rejected": 1.1333303451538086, "logps/chosen": -2.3341569900512695, "logps/rejected": -3.7061262130737305, "loss": 0.7547, "nll_loss": 0.7234919667243958, "rewards/accuracies": 0.875, "rewards/chosen": -0.23341572284698486, "rewards/margins": 0.13719692826271057, "rewards/rejected": -0.37061265110969543, "step": 2445 }, { "epoch": 6.69678302532512, "grad_norm": 5.788606643676758, "learning_rate": 6.650684931506849e-07, "log_odds_chosen": 0.5753893256187439, "log_odds_ratio": -0.5958842039108276, "logits/chosen": 0.9671462774276733, "logits/rejected": 0.9947943687438965, "logps/chosen": -3.464419364929199, "logps/rejected": -4.013241767883301, "loss": 0.8259, "nll_loss": 0.7663554549217224, "rewards/accuracies": 0.625, "rewards/chosen": -0.34644192457199097, "rewards/margins": 0.05488220602273941, "rewards/rejected": -0.40132415294647217, "step": 2446 }, { "epoch": 6.699520876112252, "grad_norm": 4.056350231170654, "learning_rate": 6.649315068493151e-07, "log_odds_chosen": 0.8067210912704468, "log_odds_ratio": -0.4162929356098175, "logits/chosen": 0.856787383556366, "logits/rejected": 0.8480032682418823, "logps/chosen": -1.9576232433319092, "logps/rejected": -2.671865701675415, "loss": 0.8252, "nll_loss": 0.7835768461227417, "rewards/accuracies": 0.875, "rewards/chosen": -0.19576233625411987, "rewards/margins": 0.07142423093318939, "rewards/rejected": -0.26718655228614807, "step": 2447 }, { "epoch": 6.702258726899384, "grad_norm": 4.0370283126831055, "learning_rate": 6.647945205479452e-07, "log_odds_chosen": 1.9843379259109497, "log_odds_ratio": -0.1968226134777069, "logits/chosen": 0.9175811409950256, "logits/rejected": 0.9690164923667908, "logps/chosen": -2.2921574115753174, "logps/rejected": -4.175364971160889, "loss": 0.7167, "nll_loss": 0.6970251798629761, "rewards/accuracies": 1.0, "rewards/chosen": -0.22921574115753174, "rewards/margins": 0.18832078576087952, "rewards/rejected": -0.41753649711608887, "step": 2448 }, { "epoch": 6.704996577686516, "grad_norm": 3.944171667098999, "learning_rate": 6.646575342465753e-07, "log_odds_chosen": 1.0572727918624878, "log_odds_ratio": -0.4875612258911133, "logits/chosen": 0.8141290545463562, "logits/rejected": 0.8228803277015686, "logps/chosen": -1.999311089515686, "logps/rejected": -2.951272487640381, "loss": 0.8957, "nll_loss": 0.8468994498252869, "rewards/accuracies": 0.875, "rewards/chosen": -0.19993111491203308, "rewards/margins": 0.09519614279270172, "rewards/rejected": -0.295127272605896, "step": 2449 }, { "epoch": 6.707734428473648, "grad_norm": 3.8050858974456787, "learning_rate": 6.645205479452054e-07, "log_odds_chosen": 2.9833667278289795, "log_odds_ratio": -0.2696717381477356, "logits/chosen": 0.9254571795463562, "logits/rejected": 0.9157443642616272, "logps/chosen": -2.06992244720459, "logps/rejected": -4.969090938568115, "loss": 0.7819, "nll_loss": 0.7549548745155334, "rewards/accuracies": 0.875, "rewards/chosen": -0.2069922387599945, "rewards/margins": 0.28991687297821045, "rewards/rejected": -0.49690914154052734, "step": 2450 }, { "epoch": 6.71047227926078, "grad_norm": 4.093996524810791, "learning_rate": 6.643835616438356e-07, "log_odds_chosen": 1.1390712261199951, "log_odds_ratio": -0.44139131903648376, "logits/chosen": 0.7722058296203613, "logits/rejected": 0.7463091611862183, "logps/chosen": -2.1593146324157715, "logps/rejected": -3.2396037578582764, "loss": 0.8697, "nll_loss": 0.8255434632301331, "rewards/accuracies": 0.625, "rewards/chosen": -0.2159315049648285, "rewards/margins": 0.10802888870239258, "rewards/rejected": -0.3239603638648987, "step": 2451 }, { "epoch": 6.713210130047912, "grad_norm": 4.390714168548584, "learning_rate": 6.642465753424658e-07, "log_odds_chosen": 2.0085439682006836, "log_odds_ratio": -0.25647950172424316, "logits/chosen": 0.7149264812469482, "logits/rejected": 0.713071882724762, "logps/chosen": -2.218397617340088, "logps/rejected": -4.102934837341309, "loss": 0.8022, "nll_loss": 0.7765295505523682, "rewards/accuracies": 1.0, "rewards/chosen": -0.2218397557735443, "rewards/margins": 0.18845368921756744, "rewards/rejected": -0.41029345989227295, "step": 2452 }, { "epoch": 6.715947980835044, "grad_norm": 4.152828693389893, "learning_rate": 6.641095890410958e-07, "log_odds_chosen": 2.2347991466522217, "log_odds_ratio": -0.30719831585884094, "logits/chosen": 0.913252055644989, "logits/rejected": 0.9934694766998291, "logps/chosen": -2.2361185550689697, "logps/rejected": -4.3881731033325195, "loss": 0.7707, "nll_loss": 0.7399607300758362, "rewards/accuracies": 1.0, "rewards/chosen": -0.22361186146736145, "rewards/margins": 0.21520544588565826, "rewards/rejected": -0.4388173222541809, "step": 2453 }, { "epoch": 6.718685831622176, "grad_norm": 3.974703550338745, "learning_rate": 6.63972602739726e-07, "log_odds_chosen": 1.0003433227539062, "log_odds_ratio": -0.3646232485771179, "logits/chosen": 0.5885642170906067, "logits/rejected": 0.561959981918335, "logps/chosen": -1.8801934719085693, "logps/rejected": -2.7794833183288574, "loss": 0.9, "nll_loss": 0.863493025302887, "rewards/accuracies": 1.0, "rewards/chosen": -0.18801935017108917, "rewards/margins": 0.08992898464202881, "rewards/rejected": -0.2779483199119568, "step": 2454 }, { "epoch": 6.721423682409308, "grad_norm": 4.653148174285889, "learning_rate": 6.638356164383562e-07, "log_odds_chosen": 0.6304877996444702, "log_odds_ratio": -0.6171998977661133, "logits/chosen": 0.8926034569740295, "logits/rejected": 0.859188437461853, "logps/chosen": -2.788235664367676, "logps/rejected": -3.3781769275665283, "loss": 0.9505, "nll_loss": 0.8887315392494202, "rewards/accuracies": 0.5, "rewards/chosen": -0.278823584318161, "rewards/margins": 0.0589941143989563, "rewards/rejected": -0.3378176987171173, "step": 2455 }, { "epoch": 6.72416153319644, "grad_norm": 3.883463144302368, "learning_rate": 6.636986301369862e-07, "log_odds_chosen": 2.205568790435791, "log_odds_ratio": -0.1579524576663971, "logits/chosen": 1.0273607969284058, "logits/rejected": 1.0911877155303955, "logps/chosen": -2.5407962799072266, "logps/rejected": -4.672904014587402, "loss": 0.7055, "nll_loss": 0.6896604895591736, "rewards/accuracies": 1.0, "rewards/chosen": -0.2540796399116516, "rewards/margins": 0.213210791349411, "rewards/rejected": -0.46729040145874023, "step": 2456 }, { "epoch": 6.726899383983573, "grad_norm": 3.8898773193359375, "learning_rate": 6.635616438356164e-07, "log_odds_chosen": 0.7424545884132385, "log_odds_ratio": -0.4164882004261017, "logits/chosen": 0.7676112055778503, "logits/rejected": 0.8097615242004395, "logps/chosen": -2.4801807403564453, "logps/rejected": -3.1759138107299805, "loss": 0.9115, "nll_loss": 0.8698806762695312, "rewards/accuracies": 0.875, "rewards/chosen": -0.2480180859565735, "rewards/margins": 0.06957332789897919, "rewards/rejected": -0.3175913989543915, "step": 2457 }, { "epoch": 6.7296372347707045, "grad_norm": 4.996486186981201, "learning_rate": 6.634246575342466e-07, "log_odds_chosen": 0.5802260637283325, "log_odds_ratio": -0.7075862884521484, "logits/chosen": 0.8749964833259583, "logits/rejected": 0.8148403763771057, "logps/chosen": -2.393352508544922, "logps/rejected": -2.941549777984619, "loss": 0.8602, "nll_loss": 0.7894868850708008, "rewards/accuracies": 0.75, "rewards/chosen": -0.23933523893356323, "rewards/margins": 0.054819729179143906, "rewards/rejected": -0.29415497183799744, "step": 2458 }, { "epoch": 6.7323750855578375, "grad_norm": 4.529028415679932, "learning_rate": 6.632876712328767e-07, "log_odds_chosen": 0.6449259519577026, "log_odds_ratio": -0.5191702246665955, "logits/chosen": 0.818631112575531, "logits/rejected": 0.8575281500816345, "logps/chosen": -2.7925751209259033, "logps/rejected": -3.336271286010742, "loss": 0.8135, "nll_loss": 0.7616099119186401, "rewards/accuracies": 0.75, "rewards/chosen": -0.27925753593444824, "rewards/margins": 0.05436962470412254, "rewards/rejected": -0.3336271643638611, "step": 2459 }, { "epoch": 6.7351129363449695, "grad_norm": 4.31601619720459, "learning_rate": 6.631506849315068e-07, "log_odds_chosen": 1.0613229274749756, "log_odds_ratio": -0.39785000681877136, "logits/chosen": 0.7666045427322388, "logits/rejected": 0.775842547416687, "logps/chosen": -2.841644763946533, "logps/rejected": -3.8501486778259277, "loss": 0.8927, "nll_loss": 0.8529061079025269, "rewards/accuracies": 0.75, "rewards/chosen": -0.2841644883155823, "rewards/margins": 0.10085037350654602, "rewards/rejected": -0.3850148618221283, "step": 2460 }, { "epoch": 6.737850787132102, "grad_norm": 3.9245219230651855, "learning_rate": 6.63013698630137e-07, "log_odds_chosen": 2.070618152618408, "log_odds_ratio": -0.2820979952812195, "logits/chosen": 0.7831589579582214, "logits/rejected": 0.720799446105957, "logps/chosen": -2.2358086109161377, "logps/rejected": -4.2179951667785645, "loss": 0.8672, "nll_loss": 0.8390308618545532, "rewards/accuracies": 0.875, "rewards/chosen": -0.22358085215091705, "rewards/margins": 0.19821864366531372, "rewards/rejected": -0.42179951071739197, "step": 2461 }, { "epoch": 6.740588637919234, "grad_norm": 4.297077178955078, "learning_rate": 6.628767123287671e-07, "log_odds_chosen": 1.1123682260513306, "log_odds_ratio": -0.36004137992858887, "logits/chosen": 0.6969969868659973, "logits/rejected": 0.6491858959197998, "logps/chosen": -1.8079242706298828, "logps/rejected": -2.7778983116149902, "loss": 0.7586, "nll_loss": 0.7225907444953918, "rewards/accuracies": 0.875, "rewards/chosen": -0.180792436003685, "rewards/margins": 0.09699738770723343, "rewards/rejected": -0.277789831161499, "step": 2462 }, { "epoch": 6.743326488706366, "grad_norm": 3.8104915618896484, "learning_rate": 6.627397260273972e-07, "log_odds_chosen": 2.5555076599121094, "log_odds_ratio": -0.21372511982917786, "logits/chosen": 0.890262246131897, "logits/rejected": 0.9148019552230835, "logps/chosen": -2.4099302291870117, "logps/rejected": -4.8379716873168945, "loss": 0.8458, "nll_loss": 0.8244563341140747, "rewards/accuracies": 0.875, "rewards/chosen": -0.24099302291870117, "rewards/margins": 0.242804154753685, "rewards/rejected": -0.48379719257354736, "step": 2463 }, { "epoch": 6.746064339493498, "grad_norm": 5.221512317657471, "learning_rate": 6.626027397260273e-07, "log_odds_chosen": 0.35308998823165894, "log_odds_ratio": -0.6998313665390015, "logits/chosen": 0.7327812314033508, "logits/rejected": 0.7453492879867554, "logps/chosen": -2.807420253753662, "logps/rejected": -3.0769758224487305, "loss": 0.8247, "nll_loss": 0.7547615170478821, "rewards/accuracies": 0.625, "rewards/chosen": -0.28074201941490173, "rewards/margins": 0.026955528184771538, "rewards/rejected": -0.3076975643634796, "step": 2464 }, { "epoch": 6.74880219028063, "grad_norm": 3.6370575428009033, "learning_rate": 6.624657534246575e-07, "log_odds_chosen": 2.957624673843384, "log_odds_ratio": -0.20496954023838043, "logits/chosen": 1.1033223867416382, "logits/rejected": 1.1931475400924683, "logps/chosen": -2.6586074829101562, "logps/rejected": -5.551575183868408, "loss": 0.7683, "nll_loss": 0.7477741241455078, "rewards/accuracies": 1.0, "rewards/chosen": -0.26586073637008667, "rewards/margins": 0.2892967462539673, "rewards/rejected": -0.5551575422286987, "step": 2465 }, { "epoch": 6.751540041067762, "grad_norm": 4.696981906890869, "learning_rate": 6.623287671232877e-07, "log_odds_chosen": 1.3714382648468018, "log_odds_ratio": -0.3748931288719177, "logits/chosen": 0.8100982904434204, "logits/rejected": 0.8421560525894165, "logps/chosen": -2.472867488861084, "logps/rejected": -3.7511956691741943, "loss": 0.7692, "nll_loss": 0.7316743731498718, "rewards/accuracies": 0.75, "rewards/chosen": -0.24728675186634064, "rewards/margins": 0.12783282995224, "rewards/rejected": -0.37511956691741943, "step": 2466 }, { "epoch": 6.754277891854894, "grad_norm": 4.671294689178467, "learning_rate": 6.621917808219177e-07, "log_odds_chosen": 1.053444266319275, "log_odds_ratio": -0.40916743874549866, "logits/chosen": 0.713129997253418, "logits/rejected": 0.7149096131324768, "logps/chosen": -2.1648223400115967, "logps/rejected": -3.0896759033203125, "loss": 0.8142, "nll_loss": 0.7733229994773865, "rewards/accuracies": 0.875, "rewards/chosen": -0.2164822220802307, "rewards/margins": 0.09248538315296173, "rewards/rejected": -0.30896759033203125, "step": 2467 }, { "epoch": 6.757015742642026, "grad_norm": 4.858523368835449, "learning_rate": 6.620547945205479e-07, "log_odds_chosen": 1.9180145263671875, "log_odds_ratio": -0.5028510689735413, "logits/chosen": 0.7199556827545166, "logits/rejected": 0.8681248426437378, "logps/chosen": -2.741248846054077, "logps/rejected": -4.59434700012207, "loss": 0.8252, "nll_loss": 0.7749554514884949, "rewards/accuracies": 0.75, "rewards/chosen": -0.2741248905658722, "rewards/margins": 0.18530979752540588, "rewards/rejected": -0.4594346880912781, "step": 2468 }, { "epoch": 6.759753593429158, "grad_norm": 6.068914413452148, "learning_rate": 6.619178082191781e-07, "log_odds_chosen": 0.7292231917381287, "log_odds_ratio": -0.6604884266853333, "logits/chosen": 0.8194277882575989, "logits/rejected": 0.7771074771881104, "logps/chosen": -3.32675838470459, "logps/rejected": -4.02339506149292, "loss": 0.9237, "nll_loss": 0.8576661348342896, "rewards/accuracies": 0.625, "rewards/chosen": -0.33267584443092346, "rewards/margins": 0.06966366618871689, "rewards/rejected": -0.40233951807022095, "step": 2469 }, { "epoch": 6.76249144421629, "grad_norm": 7.703576564788818, "learning_rate": 6.617808219178081e-07, "log_odds_chosen": 1.18260657787323, "log_odds_ratio": -0.6733590364456177, "logits/chosen": 1.0702385902404785, "logits/rejected": 1.1046366691589355, "logps/chosen": -4.0205841064453125, "logps/rejected": -5.169650077819824, "loss": 0.8494, "nll_loss": 0.7820299863815308, "rewards/accuracies": 0.875, "rewards/chosen": -0.4020583927631378, "rewards/margins": 0.11490660905838013, "rewards/rejected": -0.5169650316238403, "step": 2470 }, { "epoch": 6.765229295003422, "grad_norm": 4.1357102394104, "learning_rate": 6.616438356164383e-07, "log_odds_chosen": 1.8545130491256714, "log_odds_ratio": -0.21258269250392914, "logits/chosen": 0.9587278962135315, "logits/rejected": 0.9887287616729736, "logps/chosen": -2.233567953109741, "logps/rejected": -3.8860011100769043, "loss": 0.7172, "nll_loss": 0.6959425806999207, "rewards/accuracies": 1.0, "rewards/chosen": -0.22335679829120636, "rewards/margins": 0.16524332761764526, "rewards/rejected": -0.38860011100769043, "step": 2471 }, { "epoch": 6.767967145790554, "grad_norm": 4.1938605308532715, "learning_rate": 6.615068493150685e-07, "log_odds_chosen": 1.9247007369995117, "log_odds_ratio": -0.28400492668151855, "logits/chosen": 0.6204110383987427, "logits/rejected": 0.5351154208183289, "logps/chosen": -2.2926130294799805, "logps/rejected": -4.1384172439575195, "loss": 0.9344, "nll_loss": 0.9059803485870361, "rewards/accuracies": 1.0, "rewards/chosen": -0.22926129400730133, "rewards/margins": 0.18458044528961182, "rewards/rejected": -0.41384172439575195, "step": 2472 }, { "epoch": 6.770704996577686, "grad_norm": 4.99950647354126, "learning_rate": 6.613698630136986e-07, "log_odds_chosen": 1.3827598094940186, "log_odds_ratio": -0.5585755705833435, "logits/chosen": 0.8350577354431152, "logits/rejected": 0.8291941285133362, "logps/chosen": -2.6266226768493652, "logps/rejected": -3.8957700729370117, "loss": 0.8446, "nll_loss": 0.7887588143348694, "rewards/accuracies": 0.75, "rewards/chosen": -0.26266226172447205, "rewards/margins": 0.12691473960876465, "rewards/rejected": -0.3895769715309143, "step": 2473 }, { "epoch": 6.7734428473648185, "grad_norm": 4.041412353515625, "learning_rate": 6.612328767123287e-07, "log_odds_chosen": 1.8098167181015015, "log_odds_ratio": -0.3715052008628845, "logits/chosen": 0.6826485991477966, "logits/rejected": 0.6322313547134399, "logps/chosen": -2.214132308959961, "logps/rejected": -3.9493765830993652, "loss": 0.9027, "nll_loss": 0.8655403256416321, "rewards/accuracies": 0.875, "rewards/chosen": -0.22141322493553162, "rewards/margins": 0.1735244244337082, "rewards/rejected": -0.394937664270401, "step": 2474 }, { "epoch": 6.776180698151951, "grad_norm": 3.7291860580444336, "learning_rate": 6.610958904109589e-07, "log_odds_chosen": 2.5493340492248535, "log_odds_ratio": -0.21363654732704163, "logits/chosen": 0.7667940855026245, "logits/rejected": 0.7821319699287415, "logps/chosen": -2.604707717895508, "logps/rejected": -5.060450077056885, "loss": 0.7713, "nll_loss": 0.7499275803565979, "rewards/accuracies": 1.0, "rewards/chosen": -0.26047077775001526, "rewards/margins": 0.2455742359161377, "rewards/rejected": -0.5060449838638306, "step": 2475 }, { "epoch": 6.778918548939083, "grad_norm": 5.5666327476501465, "learning_rate": 6.60958904109589e-07, "log_odds_chosen": 1.03910493850708, "log_odds_ratio": -0.7264944314956665, "logits/chosen": 0.9688702821731567, "logits/rejected": 0.9953526258468628, "logps/chosen": -2.773799419403076, "logps/rejected": -3.6970748901367188, "loss": 0.7686, "nll_loss": 0.695967435836792, "rewards/accuracies": 0.75, "rewards/chosen": -0.27737993001937866, "rewards/margins": 0.0923275500535965, "rewards/rejected": -0.36970749497413635, "step": 2476 }, { "epoch": 6.781656399726215, "grad_norm": 4.517602443695068, "learning_rate": 6.608219178082191e-07, "log_odds_chosen": 0.9898098111152649, "log_odds_ratio": -0.38247039914131165, "logits/chosen": 0.8391013145446777, "logits/rejected": 0.9499697685241699, "logps/chosen": -2.98750638961792, "logps/rejected": -3.919165849685669, "loss": 0.8507, "nll_loss": 0.8124366998672485, "rewards/accuracies": 0.875, "rewards/chosen": -0.298750638961792, "rewards/margins": 0.09316593408584595, "rewards/rejected": -0.3919166028499603, "step": 2477 }, { "epoch": 6.784394250513347, "grad_norm": 4.809262275695801, "learning_rate": 6.606849315068492e-07, "log_odds_chosen": 0.9940364956855774, "log_odds_ratio": -0.5482239723205566, "logits/chosen": 0.8226082921028137, "logits/rejected": 0.8585418462753296, "logps/chosen": -2.4586610794067383, "logps/rejected": -3.344083309173584, "loss": 0.8837, "nll_loss": 0.8289051651954651, "rewards/accuracies": 0.875, "rewards/chosen": -0.24586611986160278, "rewards/margins": 0.08854220807552338, "rewards/rejected": -0.33440831303596497, "step": 2478 }, { "epoch": 6.787132101300479, "grad_norm": 4.224926948547363, "learning_rate": 6.605479452054794e-07, "log_odds_chosen": 2.961930274963379, "log_odds_ratio": -0.3069676160812378, "logits/chosen": 0.8353272676467896, "logits/rejected": 0.7779421806335449, "logps/chosen": -1.8030303716659546, "logps/rejected": -4.6543121337890625, "loss": 0.8945, "nll_loss": 0.863771378993988, "rewards/accuracies": 1.0, "rewards/chosen": -0.18030303716659546, "rewards/margins": 0.285128116607666, "rewards/rejected": -0.46543121337890625, "step": 2479 }, { "epoch": 6.789869952087611, "grad_norm": 4.809876441955566, "learning_rate": 6.604109589041096e-07, "log_odds_chosen": 1.0767065286636353, "log_odds_ratio": -0.6753227710723877, "logits/chosen": 0.7648547291755676, "logits/rejected": 0.7685749530792236, "logps/chosen": -2.998706340789795, "logps/rejected": -4.036405086517334, "loss": 0.8828, "nll_loss": 0.8152585029602051, "rewards/accuracies": 0.75, "rewards/chosen": -0.29987066984176636, "rewards/margins": 0.10376986116170883, "rewards/rejected": -0.4036405086517334, "step": 2480 }, { "epoch": 6.792607802874743, "grad_norm": 4.804977893829346, "learning_rate": 6.602739726027396e-07, "log_odds_chosen": 2.199209213256836, "log_odds_ratio": -0.3074897527694702, "logits/chosen": 0.6892844438552856, "logits/rejected": 0.5883958339691162, "logps/chosen": -1.975380301475525, "logps/rejected": -4.076640605926514, "loss": 0.8393, "nll_loss": 0.8085097074508667, "rewards/accuracies": 0.875, "rewards/chosen": -0.19753803312778473, "rewards/margins": 0.21012604236602783, "rewards/rejected": -0.40766406059265137, "step": 2481 }, { "epoch": 6.795345653661875, "grad_norm": 4.855448246002197, "learning_rate": 6.601369863013698e-07, "log_odds_chosen": 1.170782208442688, "log_odds_ratio": -0.3850441575050354, "logits/chosen": 0.8693689107894897, "logits/rejected": 0.8170987367630005, "logps/chosen": -1.9053199291229248, "logps/rejected": -2.9674127101898193, "loss": 0.7805, "nll_loss": 0.7419803142547607, "rewards/accuracies": 0.875, "rewards/chosen": -0.19053198397159576, "rewards/margins": 0.10620929300785065, "rewards/rejected": -0.2967412769794464, "step": 2482 }, { "epoch": 6.798083504449007, "grad_norm": 4.273154258728027, "learning_rate": 6.6e-07, "log_odds_chosen": 0.6300596594810486, "log_odds_ratio": -0.5219897031784058, "logits/chosen": 0.7320217490196228, "logits/rejected": 0.6360456943511963, "logps/chosen": -2.348090171813965, "logps/rejected": -2.9168860912323, "loss": 0.8698, "nll_loss": 0.8175523281097412, "rewards/accuracies": 0.875, "rewards/chosen": -0.2348090261220932, "rewards/margins": 0.056879591196775436, "rewards/rejected": -0.29168862104415894, "step": 2483 }, { "epoch": 6.80082135523614, "grad_norm": 5.955220699310303, "learning_rate": 6.5986301369863e-07, "log_odds_chosen": 3.1179261207580566, "log_odds_ratio": -0.06438080966472626, "logits/chosen": 1.0562909841537476, "logits/rejected": 1.1044467687606812, "logps/chosen": -2.127695083618164, "logps/rejected": -5.075952053070068, "loss": 0.7376, "nll_loss": 0.7311504483222961, "rewards/accuracies": 1.0, "rewards/chosen": -0.2127695083618164, "rewards/margins": 0.2948257029056549, "rewards/rejected": -0.5075952410697937, "step": 2484 }, { "epoch": 6.803559206023271, "grad_norm": 5.5076189041137695, "learning_rate": 6.597260273972602e-07, "log_odds_chosen": 2.0773372650146484, "log_odds_ratio": -0.4205246567726135, "logits/chosen": 1.1384618282318115, "logits/rejected": 1.1691399812698364, "logps/chosen": -2.9547572135925293, "logps/rejected": -4.969354152679443, "loss": 0.8162, "nll_loss": 0.774150550365448, "rewards/accuracies": 0.75, "rewards/chosen": -0.2954757511615753, "rewards/margins": 0.20145967602729797, "rewards/rejected": -0.4969354271888733, "step": 2485 }, { "epoch": 6.806297056810404, "grad_norm": 4.417515754699707, "learning_rate": 6.595890410958904e-07, "log_odds_chosen": 0.8024990558624268, "log_odds_ratio": -0.4498984217643738, "logits/chosen": 0.8602834939956665, "logits/rejected": 0.8549429774284363, "logps/chosen": -1.9938814640045166, "logps/rejected": -2.6915061473846436, "loss": 0.8637, "nll_loss": 0.8187137246131897, "rewards/accuracies": 0.875, "rewards/chosen": -0.19938816130161285, "rewards/margins": 0.0697624608874321, "rewards/rejected": -0.26915061473846436, "step": 2486 }, { "epoch": 6.809034907597536, "grad_norm": 5.6698150634765625, "learning_rate": 6.594520547945205e-07, "log_odds_chosen": 1.0837093591690063, "log_odds_ratio": -0.37668362259864807, "logits/chosen": 0.8263028860092163, "logits/rejected": 0.8560378551483154, "logps/chosen": -2.9949326515197754, "logps/rejected": -4.031651020050049, "loss": 0.782, "nll_loss": 0.744290828704834, "rewards/accuracies": 0.875, "rewards/chosen": -0.2994932532310486, "rewards/margins": 0.1036718338727951, "rewards/rejected": -0.4031651020050049, "step": 2487 }, { "epoch": 6.811772758384668, "grad_norm": 5.4306721687316895, "learning_rate": 6.593150684931506e-07, "log_odds_chosen": 0.8487313985824585, "log_odds_ratio": -0.5089645385742188, "logits/chosen": 0.6690083742141724, "logits/rejected": 0.6721087098121643, "logps/chosen": -2.5133633613586426, "logps/rejected": -3.298764228820801, "loss": 0.827, "nll_loss": 0.7761143445968628, "rewards/accuracies": 0.875, "rewards/chosen": -0.25133633613586426, "rewards/margins": 0.07854007184505463, "rewards/rejected": -0.3298764228820801, "step": 2488 }, { "epoch": 6.8145106091718, "grad_norm": 5.045227527618408, "learning_rate": 6.591780821917808e-07, "log_odds_chosen": 0.6904260516166687, "log_odds_ratio": -0.5025064945220947, "logits/chosen": 0.6836050152778625, "logits/rejected": 0.7866744995117188, "logps/chosen": -2.813628911972046, "logps/rejected": -3.44586181640625, "loss": 0.8134, "nll_loss": 0.763107180595398, "rewards/accuracies": 0.75, "rewards/chosen": -0.2813628911972046, "rewards/margins": 0.06322328746318817, "rewards/rejected": -0.34458616375923157, "step": 2489 }, { "epoch": 6.8172484599589325, "grad_norm": 4.468054294586182, "learning_rate": 6.590410958904109e-07, "log_odds_chosen": 1.6656148433685303, "log_odds_ratio": -0.25466617941856384, "logits/chosen": 0.7141630053520203, "logits/rejected": 0.6831635236740112, "logps/chosen": -2.0167348384857178, "logps/rejected": -3.5342206954956055, "loss": 0.7876, "nll_loss": 0.7621176838874817, "rewards/accuracies": 1.0, "rewards/chosen": -0.2016734927892685, "rewards/margins": 0.1517486274242401, "rewards/rejected": -0.3534221053123474, "step": 2490 }, { "epoch": 6.8199863107460645, "grad_norm": 4.797975063323975, "learning_rate": 6.58904109589041e-07, "log_odds_chosen": 1.1140272617340088, "log_odds_ratio": -0.47222191095352173, "logits/chosen": 0.8944476246833801, "logits/rejected": 0.8737253546714783, "logps/chosen": -2.4049463272094727, "logps/rejected": -3.413119077682495, "loss": 0.8145, "nll_loss": 0.7672840356826782, "rewards/accuracies": 0.75, "rewards/chosen": -0.24049462378025055, "rewards/margins": 0.10081727802753448, "rewards/rejected": -0.3413119316101074, "step": 2491 }, { "epoch": 6.822724161533197, "grad_norm": 3.670581102371216, "learning_rate": 6.587671232876712e-07, "log_odds_chosen": 2.8974709510803223, "log_odds_ratio": -0.18558964133262634, "logits/chosen": 0.8144307136535645, "logits/rejected": 0.8205431699752808, "logps/chosen": -2.7745046615600586, "logps/rejected": -5.595408916473389, "loss": 0.8811, "nll_loss": 0.8625038862228394, "rewards/accuracies": 1.0, "rewards/chosen": -0.27745044231414795, "rewards/margins": 0.282090425491333, "rewards/rejected": -0.5595409274101257, "step": 2492 }, { "epoch": 6.825462012320329, "grad_norm": 5.4965057373046875, "learning_rate": 6.586301369863013e-07, "log_odds_chosen": 0.587975025177002, "log_odds_ratio": -0.44739657640457153, "logits/chosen": 0.896990180015564, "logits/rejected": 0.8351022601127625, "logps/chosen": -2.4160170555114746, "logps/rejected": -2.9334869384765625, "loss": 0.8746, "nll_loss": 0.8298688530921936, "rewards/accuracies": 1.0, "rewards/chosen": -0.24160173535346985, "rewards/margins": 0.05174696817994118, "rewards/rejected": -0.29334867000579834, "step": 2493 }, { "epoch": 6.828199863107461, "grad_norm": 3.289652109146118, "learning_rate": 6.584931506849315e-07, "log_odds_chosen": 1.365774154663086, "log_odds_ratio": -0.3307379484176636, "logits/chosen": 0.8153320550918579, "logits/rejected": 0.8071494698524475, "logps/chosen": -2.4115166664123535, "logps/rejected": -3.698310136795044, "loss": 0.7497, "nll_loss": 0.7165884971618652, "rewards/accuracies": 0.875, "rewards/chosen": -0.24115169048309326, "rewards/margins": 0.12867936491966248, "rewards/rejected": -0.36983102560043335, "step": 2494 }, { "epoch": 6.830937713894593, "grad_norm": 4.923475742340088, "learning_rate": 6.583561643835615e-07, "log_odds_chosen": 1.272438406944275, "log_odds_ratio": -0.4979935884475708, "logits/chosen": 0.8227623701095581, "logits/rejected": 0.7299723029136658, "logps/chosen": -2.9172141551971436, "logps/rejected": -4.144153118133545, "loss": 0.8133, "nll_loss": 0.763460636138916, "rewards/accuracies": 0.875, "rewards/chosen": -0.2917214334011078, "rewards/margins": 0.12269386649131775, "rewards/rejected": -0.41441529989242554, "step": 2495 }, { "epoch": 6.833675564681725, "grad_norm": 5.44384241104126, "learning_rate": 6.582191780821917e-07, "log_odds_chosen": 0.4959826171398163, "log_odds_ratio": -0.6774877905845642, "logits/chosen": 0.791446328163147, "logits/rejected": 0.7431080341339111, "logps/chosen": -2.2890501022338867, "logps/rejected": -2.6669113636016846, "loss": 0.8504, "nll_loss": 0.7826095223426819, "rewards/accuracies": 0.875, "rewards/chosen": -0.22890503704547882, "rewards/margins": 0.03778610751032829, "rewards/rejected": -0.2666911482810974, "step": 2496 }, { "epoch": 6.836413415468857, "grad_norm": 6.18918514251709, "learning_rate": 6.58082191780822e-07, "log_odds_chosen": 0.7029241919517517, "log_odds_ratio": -0.6909152865409851, "logits/chosen": 1.0086475610733032, "logits/rejected": 1.045417308807373, "logps/chosen": -2.578747510910034, "logps/rejected": -3.1349587440490723, "loss": 0.8455, "nll_loss": 0.7763718962669373, "rewards/accuracies": 0.75, "rewards/chosen": -0.2578747570514679, "rewards/margins": 0.055621154606342316, "rewards/rejected": -0.3134959042072296, "step": 2497 }, { "epoch": 6.839151266255989, "grad_norm": 4.85722541809082, "learning_rate": 6.579452054794519e-07, "log_odds_chosen": 0.46935391426086426, "log_odds_ratio": -0.5274258852005005, "logits/chosen": 0.9240748286247253, "logits/rejected": 0.9318417906761169, "logps/chosen": -3.095698833465576, "logps/rejected": -3.533522605895996, "loss": 0.7989, "nll_loss": 0.7461177706718445, "rewards/accuracies": 0.875, "rewards/chosen": -0.3095698654651642, "rewards/margins": 0.04378240182995796, "rewards/rejected": -0.35335227847099304, "step": 2498 }, { "epoch": 6.841889117043121, "grad_norm": 3.9131174087524414, "learning_rate": 6.578082191780821e-07, "log_odds_chosen": 2.2440710067749023, "log_odds_ratio": -0.22663037478923798, "logits/chosen": 0.7434258460998535, "logits/rejected": 0.7158892154693604, "logps/chosen": -1.8719804286956787, "logps/rejected": -3.981161117553711, "loss": 0.9353, "nll_loss": 0.9126364588737488, "rewards/accuracies": 1.0, "rewards/chosen": -0.18719804286956787, "rewards/margins": 0.21091806888580322, "rewards/rejected": -0.3981161117553711, "step": 2499 }, { "epoch": 6.844626967830253, "grad_norm": 4.618666172027588, "learning_rate": 6.576712328767124e-07, "log_odds_chosen": 1.9251658916473389, "log_odds_ratio": -0.21840199828147888, "logits/chosen": 0.8011512756347656, "logits/rejected": 0.763892650604248, "logps/chosen": -1.9847851991653442, "logps/rejected": -3.780945301055908, "loss": 0.7657, "nll_loss": 0.743893027305603, "rewards/accuracies": 1.0, "rewards/chosen": -0.19847851991653442, "rewards/margins": 0.17961598932743073, "rewards/rejected": -0.37809452414512634, "step": 2500 }, { "epoch": 6.847364818617385, "grad_norm": 4.132411479949951, "learning_rate": 6.575342465753423e-07, "log_odds_chosen": 1.3986165523529053, "log_odds_ratio": -0.38069748878479004, "logits/chosen": 0.7403314113616943, "logits/rejected": 0.6764997839927673, "logps/chosen": -2.307931661605835, "logps/rejected": -3.6074280738830566, "loss": 0.8844, "nll_loss": 0.8463653326034546, "rewards/accuracies": 0.75, "rewards/chosen": -0.23079317808151245, "rewards/margins": 0.1299496442079544, "rewards/rejected": -0.36074280738830566, "step": 2501 }, { "epoch": 6.850102669404517, "grad_norm": 5.561521530151367, "learning_rate": 6.573972602739726e-07, "log_odds_chosen": 1.2303259372711182, "log_odds_ratio": -0.549333930015564, "logits/chosen": 0.6975023746490479, "logits/rejected": 0.6155967116355896, "logps/chosen": -2.741987705230713, "logps/rejected": -3.927180051803589, "loss": 1.0348, "nll_loss": 0.9798829555511475, "rewards/accuracies": 0.75, "rewards/chosen": -0.2741987705230713, "rewards/margins": 0.11851923167705536, "rewards/rejected": -0.39271801710128784, "step": 2502 }, { "epoch": 6.852840520191649, "grad_norm": 4.127110004425049, "learning_rate": 6.572602739726028e-07, "log_odds_chosen": 2.5883636474609375, "log_odds_ratio": -0.12426917254924774, "logits/chosen": 1.1427228450775146, "logits/rejected": 1.1495482921600342, "logps/chosen": -2.638930320739746, "logps/rejected": -5.14409065246582, "loss": 0.7624, "nll_loss": 0.7500080466270447, "rewards/accuracies": 1.0, "rewards/chosen": -0.2638930380344391, "rewards/margins": 0.25051605701446533, "rewards/rejected": -0.514409065246582, "step": 2503 }, { "epoch": 6.855578370978781, "grad_norm": 4.002884387969971, "learning_rate": 6.571232876712329e-07, "log_odds_chosen": 1.1028028726577759, "log_odds_ratio": -0.33980485796928406, "logits/chosen": 0.7622886300086975, "logits/rejected": 0.7200779914855957, "logps/chosen": -1.8486106395721436, "logps/rejected": -2.8205177783966064, "loss": 0.7761, "nll_loss": 0.7421345710754395, "rewards/accuracies": 1.0, "rewards/chosen": -0.18486106395721436, "rewards/margins": 0.097190722823143, "rewards/rejected": -0.28205180168151855, "step": 2504 }, { "epoch": 6.8583162217659135, "grad_norm": 5.643581867218018, "learning_rate": 6.56986301369863e-07, "log_odds_chosen": 2.603677272796631, "log_odds_ratio": -0.24080109596252441, "logits/chosen": 1.1734141111373901, "logits/rejected": 1.1858527660369873, "logps/chosen": -3.1187233924865723, "logps/rejected": -5.662558555603027, "loss": 0.774, "nll_loss": 0.7499182224273682, "rewards/accuracies": 0.875, "rewards/chosen": -0.31187230348587036, "rewards/margins": 0.25438353419303894, "rewards/rejected": -0.5662558674812317, "step": 2505 }, { "epoch": 6.861054072553046, "grad_norm": 5.139036655426025, "learning_rate": 6.568493150684932e-07, "log_odds_chosen": 1.1509522199630737, "log_odds_ratio": -0.4217708110809326, "logits/chosen": 0.6589045524597168, "logits/rejected": 0.6305235624313354, "logps/chosen": -2.113116979598999, "logps/rejected": -3.170565605163574, "loss": 0.813, "nll_loss": 0.7708027362823486, "rewards/accuracies": 0.875, "rewards/chosen": -0.2113116979598999, "rewards/margins": 0.10574483871459961, "rewards/rejected": -0.3170565366744995, "step": 2506 }, { "epoch": 6.863791923340178, "grad_norm": 5.228567600250244, "learning_rate": 6.567123287671233e-07, "log_odds_chosen": 1.875672459602356, "log_odds_ratio": -0.40442630648612976, "logits/chosen": 0.846541702747345, "logits/rejected": 0.7633109092712402, "logps/chosen": -2.607107162475586, "logps/rejected": -4.365127086639404, "loss": 0.8187, "nll_loss": 0.7782130837440491, "rewards/accuracies": 0.75, "rewards/chosen": -0.2607107162475586, "rewards/margins": 0.17580199241638184, "rewards/rejected": -0.4365127384662628, "step": 2507 }, { "epoch": 6.86652977412731, "grad_norm": 4.743661880493164, "learning_rate": 6.565753424657535e-07, "log_odds_chosen": 1.9564926624298096, "log_odds_ratio": -0.3795144259929657, "logits/chosen": 1.1532889604568481, "logits/rejected": 1.1668970584869385, "logps/chosen": -2.8812265396118164, "logps/rejected": -4.806498050689697, "loss": 0.7695, "nll_loss": 0.7315170168876648, "rewards/accuracies": 0.75, "rewards/chosen": -0.28812265396118164, "rewards/margins": 0.1925271451473236, "rewards/rejected": -0.48064976930618286, "step": 2508 }, { "epoch": 6.869267624914443, "grad_norm": 4.274388790130615, "learning_rate": 6.564383561643835e-07, "log_odds_chosen": 1.6392168998718262, "log_odds_ratio": -0.31043845415115356, "logits/chosen": 0.843480110168457, "logits/rejected": 0.8745247721672058, "logps/chosen": -2.5753369331359863, "logps/rejected": -4.1330885887146, "loss": 0.8104, "nll_loss": 0.7793763875961304, "rewards/accuracies": 0.875, "rewards/chosen": -0.2575336694717407, "rewards/margins": 0.15577518939971924, "rewards/rejected": -0.41330885887145996, "step": 2509 }, { "epoch": 6.872005475701574, "grad_norm": 8.240141868591309, "learning_rate": 6.563013698630137e-07, "log_odds_chosen": 1.2858260869979858, "log_odds_ratio": -0.9261360168457031, "logits/chosen": 0.6684293746948242, "logits/rejected": 0.6227039098739624, "logps/chosen": -3.2756569385528564, "logps/rejected": -4.442220687866211, "loss": 0.9621, "nll_loss": 0.8695234060287476, "rewards/accuracies": 0.875, "rewards/chosen": -0.3275657296180725, "rewards/margins": 0.11665638536214828, "rewards/rejected": -0.44422203302383423, "step": 2510 }, { "epoch": 6.874743326488707, "grad_norm": 4.512011528015137, "learning_rate": 6.561643835616439e-07, "log_odds_chosen": 1.5973050594329834, "log_odds_ratio": -0.2964613139629364, "logits/chosen": 0.7891310453414917, "logits/rejected": 0.8054381608963013, "logps/chosen": -2.576132297515869, "logps/rejected": -4.097177982330322, "loss": 0.8083, "nll_loss": 0.7786167860031128, "rewards/accuracies": 1.0, "rewards/chosen": -0.25761324167251587, "rewards/margins": 0.15210457146167755, "rewards/rejected": -0.4097177982330322, "step": 2511 }, { "epoch": 6.877481177275839, "grad_norm": 7.443317413330078, "learning_rate": 6.560273972602739e-07, "log_odds_chosen": 0.9975176453590393, "log_odds_ratio": -0.5752708911895752, "logits/chosen": 0.935086727142334, "logits/rejected": 0.991966962814331, "logps/chosen": -2.5380859375, "logps/rejected": -3.4577131271362305, "loss": 0.7323, "nll_loss": 0.6747353076934814, "rewards/accuracies": 0.75, "rewards/chosen": -0.2538086175918579, "rewards/margins": 0.09196273237466812, "rewards/rejected": -0.34577134251594543, "step": 2512 }, { "epoch": 6.880219028062971, "grad_norm": 4.345098495483398, "learning_rate": 6.558904109589041e-07, "log_odds_chosen": 1.0672426223754883, "log_odds_ratio": -0.38138800859451294, "logits/chosen": 0.5979305505752563, "logits/rejected": 0.5993935465812683, "logps/chosen": -2.095308780670166, "logps/rejected": -3.0713002681732178, "loss": 0.9218, "nll_loss": 0.8836567997932434, "rewards/accuracies": 0.875, "rewards/chosen": -0.20953087508678436, "rewards/margins": 0.09759916365146637, "rewards/rejected": -0.30713003873825073, "step": 2513 }, { "epoch": 6.882956878850103, "grad_norm": 5.562948703765869, "learning_rate": 6.557534246575343e-07, "log_odds_chosen": 0.890097975730896, "log_odds_ratio": -0.4028443396091461, "logits/chosen": 0.775642991065979, "logits/rejected": 0.7704973220825195, "logps/chosen": -2.7366840839385986, "logps/rejected": -3.565580129623413, "loss": 0.8859, "nll_loss": 0.8456467390060425, "rewards/accuracies": 0.75, "rewards/chosen": -0.27366843819618225, "rewards/margins": 0.08288958668708801, "rewards/rejected": -0.35655802488327026, "step": 2514 }, { "epoch": 6.885694729637235, "grad_norm": 4.036162853240967, "learning_rate": 6.556164383561643e-07, "log_odds_chosen": 1.0092213153839111, "log_odds_ratio": -0.37060675024986267, "logits/chosen": 0.96717369556427, "logits/rejected": 0.93170166015625, "logps/chosen": -2.0094003677368164, "logps/rejected": -2.917713165283203, "loss": 0.7803, "nll_loss": 0.7432284355163574, "rewards/accuracies": 0.875, "rewards/chosen": -0.20094004273414612, "rewards/margins": 0.09083129465579987, "rewards/rejected": -0.2917713522911072, "step": 2515 }, { "epoch": 6.888432580424367, "grad_norm": 4.492440700531006, "learning_rate": 6.554794520547945e-07, "log_odds_chosen": 1.1808557510375977, "log_odds_ratio": -0.5299444794654846, "logits/chosen": 0.9756926894187927, "logits/rejected": 0.9226312637329102, "logps/chosen": -1.967315673828125, "logps/rejected": -3.092705249786377, "loss": 0.8383, "nll_loss": 0.7853331565856934, "rewards/accuracies": 0.625, "rewards/chosen": -0.1967315822839737, "rewards/margins": 0.11253894865512848, "rewards/rejected": -0.3092705309391022, "step": 2516 }, { "epoch": 6.891170431211499, "grad_norm": 4.597042560577393, "learning_rate": 6.553424657534247e-07, "log_odds_chosen": 1.5347468852996826, "log_odds_ratio": -0.523699164390564, "logits/chosen": 1.0344352722167969, "logits/rejected": 1.0493180751800537, "logps/chosen": -2.5208091735839844, "logps/rejected": -3.967890739440918, "loss": 0.8072, "nll_loss": 0.7548769116401672, "rewards/accuracies": 0.875, "rewards/chosen": -0.2520809471607208, "rewards/margins": 0.14470815658569336, "rewards/rejected": -0.3967890739440918, "step": 2517 }, { "epoch": 6.893908281998631, "grad_norm": 5.63921594619751, "learning_rate": 6.552054794520548e-07, "log_odds_chosen": 2.1886277198791504, "log_odds_ratio": -0.287844181060791, "logits/chosen": 0.904486894607544, "logits/rejected": 0.872549295425415, "logps/chosen": -2.7451329231262207, "logps/rejected": -4.862059593200684, "loss": 0.7972, "nll_loss": 0.7683830857276917, "rewards/accuracies": 0.875, "rewards/chosen": -0.274513304233551, "rewards/margins": 0.2116926610469818, "rewards/rejected": -0.4862059950828552, "step": 2518 }, { "epoch": 6.896646132785763, "grad_norm": 5.0467400550842285, "learning_rate": 6.550684931506849e-07, "log_odds_chosen": 1.2464816570281982, "log_odds_ratio": -0.4859682023525238, "logits/chosen": 0.8040840029716492, "logits/rejected": 0.6771703362464905, "logps/chosen": -3.1614880561828613, "logps/rejected": -4.354171276092529, "loss": 1.0007, "nll_loss": 0.9521092772483826, "rewards/accuracies": 0.625, "rewards/chosen": -0.3161488175392151, "rewards/margins": 0.11926832795143127, "rewards/rejected": -0.43541714549064636, "step": 2519 }, { "epoch": 6.899383983572895, "grad_norm": 4.2964277267456055, "learning_rate": 6.549315068493151e-07, "log_odds_chosen": 0.754305899143219, "log_odds_ratio": -0.5451955795288086, "logits/chosen": 0.6388114094734192, "logits/rejected": 0.6495261192321777, "logps/chosen": -2.123093366622925, "logps/rejected": -2.781118392944336, "loss": 0.905, "nll_loss": 0.8504483699798584, "rewards/accuracies": 0.75, "rewards/chosen": -0.2123093456029892, "rewards/margins": 0.06580250710248947, "rewards/rejected": -0.27811184525489807, "step": 2520 }, { "epoch": 6.9021218343600275, "grad_norm": 3.645113706588745, "learning_rate": 6.547945205479452e-07, "log_odds_chosen": 4.023116111755371, "log_odds_ratio": -0.16984175145626068, "logits/chosen": 0.8800598978996277, "logits/rejected": 0.8907411694526672, "logps/chosen": -1.8800101280212402, "logps/rejected": -5.719524383544922, "loss": 0.7342, "nll_loss": 0.7171761989593506, "rewards/accuracies": 1.0, "rewards/chosen": -0.18800100684165955, "rewards/margins": 0.38395145535469055, "rewards/rejected": -0.5719524621963501, "step": 2521 }, { "epoch": 6.9048596851471595, "grad_norm": 4.047597408294678, "learning_rate": 6.546575342465753e-07, "log_odds_chosen": 1.3767403364181519, "log_odds_ratio": -0.3552524447441101, "logits/chosen": 0.7460453510284424, "logits/rejected": 0.7888364791870117, "logps/chosen": -2.438276767730713, "logps/rejected": -3.7277016639709473, "loss": 0.7323, "nll_loss": 0.6967258453369141, "rewards/accuracies": 0.875, "rewards/chosen": -0.2438277006149292, "rewards/margins": 0.12894248962402344, "rewards/rejected": -0.37277016043663025, "step": 2522 }, { "epoch": 6.907597535934292, "grad_norm": 4.047223091125488, "learning_rate": 6.545205479452055e-07, "log_odds_chosen": 2.1928091049194336, "log_odds_ratio": -0.1859392672777176, "logits/chosen": 0.8372671604156494, "logits/rejected": 0.8353927135467529, "logps/chosen": -2.3341917991638184, "logps/rejected": -4.419687271118164, "loss": 0.8115, "nll_loss": 0.7929187417030334, "rewards/accuracies": 1.0, "rewards/chosen": -0.23341919481754303, "rewards/margins": 0.20854951441287994, "rewards/rejected": -0.441968709230423, "step": 2523 }, { "epoch": 6.910335386721424, "grad_norm": 4.140045166015625, "learning_rate": 6.543835616438356e-07, "log_odds_chosen": 2.838716983795166, "log_odds_ratio": -0.2785417437553406, "logits/chosen": 1.0430638790130615, "logits/rejected": 1.0496602058410645, "logps/chosen": -2.110348701477051, "logps/rejected": -4.7705254554748535, "loss": 0.7614, "nll_loss": 0.7335307598114014, "rewards/accuracies": 0.875, "rewards/chosen": -0.211034893989563, "rewards/margins": 0.2660176455974579, "rewards/rejected": -0.47705256938934326, "step": 2524 }, { "epoch": 6.913073237508556, "grad_norm": 4.8558855056762695, "learning_rate": 6.542465753424658e-07, "log_odds_chosen": 2.935123920440674, "log_odds_ratio": -0.15114475786685944, "logits/chosen": 1.0220005512237549, "logits/rejected": 0.997738242149353, "logps/chosen": -2.6120851039886475, "logps/rejected": -5.47308349609375, "loss": 0.84, "nll_loss": 0.8248908519744873, "rewards/accuracies": 1.0, "rewards/chosen": -0.26120850443840027, "rewards/margins": 0.2860998511314392, "rewards/rejected": -0.5473083853721619, "step": 2525 }, { "epoch": 6.915811088295688, "grad_norm": 3.9314045906066895, "learning_rate": 6.541095890410958e-07, "log_odds_chosen": 1.3388254642486572, "log_odds_ratio": -0.4598967134952545, "logits/chosen": 0.8369433283805847, "logits/rejected": 0.7970508337020874, "logps/chosen": -2.335308074951172, "logps/rejected": -3.6233890056610107, "loss": 0.9004, "nll_loss": 0.8544552326202393, "rewards/accuracies": 0.75, "rewards/chosen": -0.23353080451488495, "rewards/margins": 0.12880809605121613, "rewards/rejected": -0.3623389005661011, "step": 2526 }, { "epoch": 6.91854893908282, "grad_norm": 3.876365900039673, "learning_rate": 6.53972602739726e-07, "log_odds_chosen": 2.3631038665771484, "log_odds_ratio": -0.2694898545742035, "logits/chosen": 0.992057204246521, "logits/rejected": 1.0291863679885864, "logps/chosen": -2.5778653621673584, "logps/rejected": -4.848588943481445, "loss": 0.7625, "nll_loss": 0.7355774641036987, "rewards/accuracies": 0.875, "rewards/chosen": -0.2577865421772003, "rewards/margins": 0.22707238793373108, "rewards/rejected": -0.4848589301109314, "step": 2527 }, { "epoch": 6.921286789869952, "grad_norm": 3.9715981483459473, "learning_rate": 6.538356164383562e-07, "log_odds_chosen": 1.601675033569336, "log_odds_ratio": -0.32954010367393494, "logits/chosen": 0.8945027589797974, "logits/rejected": 0.8864064812660217, "logps/chosen": -2.5219085216522217, "logps/rejected": -4.0012664794921875, "loss": 0.814, "nll_loss": 0.7810322642326355, "rewards/accuracies": 0.875, "rewards/chosen": -0.25219085812568665, "rewards/margins": 0.14793576300144196, "rewards/rejected": -0.4001266360282898, "step": 2528 }, { "epoch": 6.924024640657084, "grad_norm": 3.935433864593506, "learning_rate": 6.536986301369862e-07, "log_odds_chosen": 1.4895615577697754, "log_odds_ratio": -0.4041960835456848, "logits/chosen": 0.8850881457328796, "logits/rejected": 0.8844864368438721, "logps/chosen": -1.837959885597229, "logps/rejected": -3.109978675842285, "loss": 0.7895, "nll_loss": 0.7491036057472229, "rewards/accuracies": 0.75, "rewards/chosen": -0.1837959885597229, "rewards/margins": 0.1272018700838089, "rewards/rejected": -0.310997873544693, "step": 2529 }, { "epoch": 6.926762491444216, "grad_norm": 3.593790054321289, "learning_rate": 6.535616438356164e-07, "log_odds_chosen": 2.2953338623046875, "log_odds_ratio": -0.1896277517080307, "logits/chosen": 0.6612851619720459, "logits/rejected": 0.6304293870925903, "logps/chosen": -2.318096399307251, "logps/rejected": -4.472945690155029, "loss": 0.7934, "nll_loss": 0.774390459060669, "rewards/accuracies": 1.0, "rewards/chosen": -0.23180963099002838, "rewards/margins": 0.21548491716384888, "rewards/rejected": -0.44729456305503845, "step": 2530 }, { "epoch": 6.929500342231348, "grad_norm": 4.070347785949707, "learning_rate": 6.534246575342466e-07, "log_odds_chosen": 1.3846237659454346, "log_odds_ratio": -0.30490759015083313, "logits/chosen": 0.7749919891357422, "logits/rejected": 0.7856292128562927, "logps/chosen": -2.1547532081604004, "logps/rejected": -3.4397823810577393, "loss": 0.8529, "nll_loss": 0.8224274516105652, "rewards/accuracies": 1.0, "rewards/chosen": -0.21547535061836243, "rewards/margins": 0.12850290536880493, "rewards/rejected": -0.34397825598716736, "step": 2531 }, { "epoch": 6.93223819301848, "grad_norm": 5.593310356140137, "learning_rate": 6.532876712328767e-07, "log_odds_chosen": 0.7942880988121033, "log_odds_ratio": -0.5619962215423584, "logits/chosen": 1.034731388092041, "logits/rejected": 0.9889019727706909, "logps/chosen": -2.5679306983947754, "logps/rejected": -3.308779239654541, "loss": 0.8354, "nll_loss": 0.779232382774353, "rewards/accuracies": 0.875, "rewards/chosen": -0.2567930519580841, "rewards/margins": 0.07408487796783447, "rewards/rejected": -0.33087795972824097, "step": 2532 }, { "epoch": 6.934976043805612, "grad_norm": 4.51149320602417, "learning_rate": 6.531506849315068e-07, "log_odds_chosen": 1.4624745845794678, "log_odds_ratio": -0.4652770459651947, "logits/chosen": 0.8792349696159363, "logits/rejected": 0.9428473711013794, "logps/chosen": -2.7130773067474365, "logps/rejected": -4.107250213623047, "loss": 0.8708, "nll_loss": 0.8242961168289185, "rewards/accuracies": 0.75, "rewards/chosen": -0.27130773663520813, "rewards/margins": 0.13941726088523865, "rewards/rejected": -0.4107249975204468, "step": 2533 }, { "epoch": 6.937713894592744, "grad_norm": 3.7533442974090576, "learning_rate": 6.53013698630137e-07, "log_odds_chosen": 1.3822011947631836, "log_odds_ratio": -0.2857716381549835, "logits/chosen": 0.876693606376648, "logits/rejected": 0.8492966294288635, "logps/chosen": -2.3959879875183105, "logps/rejected": -3.6984524726867676, "loss": 0.777, "nll_loss": 0.7484716176986694, "rewards/accuracies": 1.0, "rewards/chosen": -0.23959878087043762, "rewards/margins": 0.13024644553661346, "rewards/rejected": -0.3698452115058899, "step": 2534 }, { "epoch": 6.940451745379876, "grad_norm": 4.038811683654785, "learning_rate": 6.528767123287671e-07, "log_odds_chosen": 1.5807965993881226, "log_odds_ratio": -0.32362547516822815, "logits/chosen": 0.9358739256858826, "logits/rejected": 0.9913206696510315, "logps/chosen": -2.810574531555176, "logps/rejected": -4.34675407409668, "loss": 0.7542, "nll_loss": 0.7218778133392334, "rewards/accuracies": 1.0, "rewards/chosen": -0.2810574769973755, "rewards/margins": 0.1536179780960083, "rewards/rejected": -0.4346754550933838, "step": 2535 }, { "epoch": 6.943189596167009, "grad_norm": 3.713057279586792, "learning_rate": 6.527397260273972e-07, "log_odds_chosen": 1.7178595066070557, "log_odds_ratio": -0.21082258224487305, "logits/chosen": 0.9660784602165222, "logits/rejected": 0.8892491459846497, "logps/chosen": -1.8929063081741333, "logps/rejected": -3.4521331787109375, "loss": 0.7924, "nll_loss": 0.7712945938110352, "rewards/accuracies": 1.0, "rewards/chosen": -0.1892906278371811, "rewards/margins": 0.15592268109321594, "rewards/rejected": -0.3452133238315582, "step": 2536 }, { "epoch": 6.9459274469541405, "grad_norm": 4.794040679931641, "learning_rate": 6.526027397260274e-07, "log_odds_chosen": 2.297036647796631, "log_odds_ratio": -0.350999116897583, "logits/chosen": 0.7467136383056641, "logits/rejected": 0.7325676679611206, "logps/chosen": -2.674607753753662, "logps/rejected": -4.864848613739014, "loss": 0.8637, "nll_loss": 0.8285525441169739, "rewards/accuracies": 0.875, "rewards/chosen": -0.26746076345443726, "rewards/margins": 0.21902412176132202, "rewards/rejected": -0.4864848852157593, "step": 2537 }, { "epoch": 6.9486652977412735, "grad_norm": 4.178096771240234, "learning_rate": 6.524657534246575e-07, "log_odds_chosen": 1.337687373161316, "log_odds_ratio": -0.2569565176963806, "logits/chosen": 1.090285062789917, "logits/rejected": 1.114540457725525, "logps/chosen": -2.380540132522583, "logps/rejected": -3.626614809036255, "loss": 0.746, "nll_loss": 0.7202981114387512, "rewards/accuracies": 1.0, "rewards/chosen": -0.23805400729179382, "rewards/margins": 0.12460747361183167, "rewards/rejected": -0.3626614809036255, "step": 2538 }, { "epoch": 6.951403148528406, "grad_norm": 3.8281049728393555, "learning_rate": 6.523287671232877e-07, "log_odds_chosen": 2.8937788009643555, "log_odds_ratio": -0.24618341028690338, "logits/chosen": 0.6115357875823975, "logits/rejected": 0.6446127891540527, "logps/chosen": -2.072673797607422, "logps/rejected": -4.858050346374512, "loss": 0.8863, "nll_loss": 0.8617035746574402, "rewards/accuracies": 1.0, "rewards/chosen": -0.2072674036026001, "rewards/margins": 0.27853766083717346, "rewards/rejected": -0.48580503463745117, "step": 2539 }, { "epoch": 6.954140999315538, "grad_norm": 3.572251319885254, "learning_rate": 6.521917808219177e-07, "log_odds_chosen": 1.8565387725830078, "log_odds_ratio": -0.3875287175178528, "logits/chosen": 0.9141873717308044, "logits/rejected": 0.9160372614860535, "logps/chosen": -2.043383836746216, "logps/rejected": -3.824549436569214, "loss": 0.8176, "nll_loss": 0.7788071632385254, "rewards/accuracies": 0.625, "rewards/chosen": -0.20433840155601501, "rewards/margins": 0.178116574883461, "rewards/rejected": -0.3824549615383148, "step": 2540 }, { "epoch": 6.95687885010267, "grad_norm": 4.595612525939941, "learning_rate": 6.520547945205479e-07, "log_odds_chosen": 1.3547991514205933, "log_odds_ratio": -0.330186128616333, "logits/chosen": 0.7332665324211121, "logits/rejected": 0.7080615162849426, "logps/chosen": -1.7844362258911133, "logps/rejected": -2.9697723388671875, "loss": 0.8387, "nll_loss": 0.8057199120521545, "rewards/accuracies": 1.0, "rewards/chosen": -0.17844361066818237, "rewards/margins": 0.11853362619876862, "rewards/rejected": -0.2969772517681122, "step": 2541 }, { "epoch": 6.959616700889802, "grad_norm": 4.661019802093506, "learning_rate": 6.519178082191781e-07, "log_odds_chosen": 1.1264461278915405, "log_odds_ratio": -0.47099998593330383, "logits/chosen": 0.8380691409111023, "logits/rejected": 0.9164516925811768, "logps/chosen": -2.9840989112854004, "logps/rejected": -4.055599212646484, "loss": 0.8897, "nll_loss": 0.8426070213317871, "rewards/accuracies": 0.875, "rewards/chosen": -0.29840990900993347, "rewards/margins": 0.10715004801750183, "rewards/rejected": -0.4055599570274353, "step": 2542 }, { "epoch": 6.962354551676934, "grad_norm": 3.9099714756011963, "learning_rate": 6.517808219178081e-07, "log_odds_chosen": 1.2899582386016846, "log_odds_ratio": -0.34824687242507935, "logits/chosen": 0.9439878463745117, "logits/rejected": 0.9398216009140015, "logps/chosen": -2.104743719100952, "logps/rejected": -3.316655397415161, "loss": 0.7907, "nll_loss": 0.7559162974357605, "rewards/accuracies": 1.0, "rewards/chosen": -0.2104743868112564, "rewards/margins": 0.12119114398956299, "rewards/rejected": -0.3316655158996582, "step": 2543 }, { "epoch": 6.965092402464066, "grad_norm": 4.101217746734619, "learning_rate": 6.516438356164383e-07, "log_odds_chosen": 1.4701015949249268, "log_odds_ratio": -0.376125305891037, "logits/chosen": 0.8666572570800781, "logits/rejected": 0.848271369934082, "logps/chosen": -2.037688732147217, "logps/rejected": -3.3060340881347656, "loss": 0.7096, "nll_loss": 0.6719395518302917, "rewards/accuracies": 0.875, "rewards/chosen": -0.20376887917518616, "rewards/margins": 0.12683454155921936, "rewards/rejected": -0.3306034207344055, "step": 2544 }, { "epoch": 6.967830253251198, "grad_norm": 5.457516193389893, "learning_rate": 6.515068493150685e-07, "log_odds_chosen": 0.6833407878875732, "log_odds_ratio": -0.5159356594085693, "logits/chosen": 1.0368807315826416, "logits/rejected": 1.015284776687622, "logps/chosen": -2.7302706241607666, "logps/rejected": -3.3365159034729004, "loss": 0.8711, "nll_loss": 0.8194948434829712, "rewards/accuracies": 0.875, "rewards/chosen": -0.27302706241607666, "rewards/margins": 0.06062454357743263, "rewards/rejected": -0.333651602268219, "step": 2545 }, { "epoch": 6.97056810403833, "grad_norm": 3.4735798835754395, "learning_rate": 6.513698630136986e-07, "log_odds_chosen": 1.3886826038360596, "log_odds_ratio": -0.3323671221733093, "logits/chosen": 0.7527137398719788, "logits/rejected": 0.8207215070724487, "logps/chosen": -2.0932092666625977, "logps/rejected": -3.379786491394043, "loss": 0.7432, "nll_loss": 0.7100013494491577, "rewards/accuracies": 0.875, "rewards/chosen": -0.20932090282440186, "rewards/margins": 0.128657728433609, "rewards/rejected": -0.33797866106033325, "step": 2546 }, { "epoch": 6.973305954825462, "grad_norm": 3.3019864559173584, "learning_rate": 6.512328767123287e-07, "log_odds_chosen": 3.084291934967041, "log_odds_ratio": -0.24034060537815094, "logits/chosen": 0.6706628799438477, "logits/rejected": 0.6070088148117065, "logps/chosen": -1.952294111251831, "logps/rejected": -4.927083492279053, "loss": 0.8349, "nll_loss": 0.810835063457489, "rewards/accuracies": 0.875, "rewards/chosen": -0.1952294260263443, "rewards/margins": 0.29747894406318665, "rewards/rejected": -0.49270838499069214, "step": 2547 }, { "epoch": 6.976043805612594, "grad_norm": 4.064515113830566, "learning_rate": 6.510958904109589e-07, "log_odds_chosen": 1.108379602432251, "log_odds_ratio": -0.382783442735672, "logits/chosen": 0.8428774476051331, "logits/rejected": 0.8076410293579102, "logps/chosen": -2.012025833129883, "logps/rejected": -3.028195858001709, "loss": 0.8668, "nll_loss": 0.8285428285598755, "rewards/accuracies": 0.875, "rewards/chosen": -0.20120258629322052, "rewards/margins": 0.1016169860959053, "rewards/rejected": -0.3028195798397064, "step": 2548 }, { "epoch": 6.978781656399726, "grad_norm": 4.545489311218262, "learning_rate": 6.50958904109589e-07, "log_odds_chosen": 2.027189254760742, "log_odds_ratio": -0.140217125415802, "logits/chosen": 0.9118945002555847, "logits/rejected": 0.8892290592193604, "logps/chosen": -2.156116008758545, "logps/rejected": -4.036868095397949, "loss": 0.7021, "nll_loss": 0.6880410313606262, "rewards/accuracies": 1.0, "rewards/chosen": -0.21561160683631897, "rewards/margins": 0.18807518482208252, "rewards/rejected": -0.4036867916584015, "step": 2549 }, { "epoch": 6.981519507186858, "grad_norm": 4.198113918304443, "learning_rate": 6.508219178082191e-07, "log_odds_chosen": 1.3697303533554077, "log_odds_ratio": -0.29024577140808105, "logits/chosen": 0.7166308760643005, "logits/rejected": 0.671013355255127, "logps/chosen": -1.779365062713623, "logps/rejected": -2.985711097717285, "loss": 0.7948, "nll_loss": 0.7658088803291321, "rewards/accuracies": 1.0, "rewards/chosen": -0.17793649435043335, "rewards/margins": 0.12063463032245636, "rewards/rejected": -0.2985711395740509, "step": 2550 }, { "epoch": 6.98425735797399, "grad_norm": 5.786930561065674, "learning_rate": 6.506849315068493e-07, "log_odds_chosen": 0.7975656986236572, "log_odds_ratio": -0.6857119798660278, "logits/chosen": 0.7968459725379944, "logits/rejected": 0.8379589915275574, "logps/chosen": -2.847701072692871, "logps/rejected": -3.616896390914917, "loss": 0.8401, "nll_loss": 0.7715723514556885, "rewards/accuracies": 0.625, "rewards/chosen": -0.284770131111145, "rewards/margins": 0.07691952586174011, "rewards/rejected": -0.36168965697288513, "step": 2551 }, { "epoch": 6.9869952087611225, "grad_norm": 4.110084056854248, "learning_rate": 6.505479452054794e-07, "log_odds_chosen": 1.21795654296875, "log_odds_ratio": -0.3303769826889038, "logits/chosen": 0.8032633066177368, "logits/rejected": 0.8198736906051636, "logps/chosen": -2.2050585746765137, "logps/rejected": -3.3183581829071045, "loss": 0.8254, "nll_loss": 0.7923264503479004, "rewards/accuracies": 0.875, "rewards/chosen": -0.22050584852695465, "rewards/margins": 0.11132996529340744, "rewards/rejected": -0.3318358361721039, "step": 2552 }, { "epoch": 6.9897330595482545, "grad_norm": 4.301632881164551, "learning_rate": 6.504109589041096e-07, "log_odds_chosen": 0.971355140209198, "log_odds_ratio": -0.36761674284935, "logits/chosen": 0.7960430979728699, "logits/rejected": 0.7730071544647217, "logps/chosen": -2.1099753379821777, "logps/rejected": -2.9781455993652344, "loss": 0.8191, "nll_loss": 0.7823803424835205, "rewards/accuracies": 0.875, "rewards/chosen": -0.21099752187728882, "rewards/margins": 0.08681704103946686, "rewards/rejected": -0.29781457781791687, "step": 2553 }, { "epoch": 6.992470910335387, "grad_norm": 3.892430305480957, "learning_rate": 6.502739726027397e-07, "log_odds_chosen": 1.3500787019729614, "log_odds_ratio": -0.5737219452857971, "logits/chosen": 0.7980406284332275, "logits/rejected": 0.851539134979248, "logps/chosen": -2.6911752223968506, "logps/rejected": -3.976409912109375, "loss": 0.9367, "nll_loss": 0.8792939186096191, "rewards/accuracies": 0.875, "rewards/chosen": -0.269117534160614, "rewards/margins": 0.12852346897125244, "rewards/rejected": -0.39764100313186646, "step": 2554 }, { "epoch": 6.995208761122519, "grad_norm": 4.46870756149292, "learning_rate": 6.501369863013698e-07, "log_odds_chosen": 2.0310256481170654, "log_odds_ratio": -0.4861001968383789, "logits/chosen": 0.8713802695274353, "logits/rejected": 0.8771003484725952, "logps/chosen": -2.262169599533081, "logps/rejected": -4.254582405090332, "loss": 0.8423, "nll_loss": 0.7937106490135193, "rewards/accuracies": 0.75, "rewards/chosen": -0.22621695697307587, "rewards/margins": 0.1992412954568863, "rewards/rejected": -0.42545825242996216, "step": 2555 }, { "epoch": 6.997946611909651, "grad_norm": 7.6046857833862305, "learning_rate": 6.5e-07, "log_odds_chosen": 0.5810843706130981, "log_odds_ratio": -0.6742562055587769, "logits/chosen": 0.9198712706565857, "logits/rejected": 0.9566673040390015, "logps/chosen": -3.196078062057495, "logps/rejected": -3.711576461791992, "loss": 0.8468, "nll_loss": 0.77940434217453, "rewards/accuracies": 0.625, "rewards/chosen": -0.31960782408714294, "rewards/margins": 0.05154983699321747, "rewards/rejected": -0.371157705783844, "step": 2556 }, { "epoch": 7.000684462696783, "grad_norm": 4.857081413269043, "learning_rate": 6.4986301369863e-07, "log_odds_chosen": 0.6092219352722168, "log_odds_ratio": -0.6086550354957581, "logits/chosen": 0.9545356035232544, "logits/rejected": 0.9170156717300415, "logps/chosen": -2.069772481918335, "logps/rejected": -2.5934934616088867, "loss": 0.8808, "nll_loss": 0.8199167251586914, "rewards/accuracies": 0.75, "rewards/chosen": -0.2069772481918335, "rewards/margins": 0.05237208306789398, "rewards/rejected": -0.25934934616088867, "step": 2557 }, { "epoch": 7.003422313483915, "grad_norm": 4.491929054260254, "learning_rate": 6.497260273972602e-07, "log_odds_chosen": 2.1917521953582764, "log_odds_ratio": -0.14151202142238617, "logits/chosen": 0.9040679931640625, "logits/rejected": 0.9628763198852539, "logps/chosen": -2.0084240436553955, "logps/rejected": -4.049020767211914, "loss": 0.746, "nll_loss": 0.7318004369735718, "rewards/accuracies": 1.0, "rewards/chosen": -0.20084241032600403, "rewards/margins": 0.2040596604347229, "rewards/rejected": -0.4049021005630493, "step": 2558 }, { "epoch": 7.006160164271047, "grad_norm": 3.8719255924224854, "learning_rate": 6.495890410958904e-07, "log_odds_chosen": 2.129253387451172, "log_odds_ratio": -0.2778226137161255, "logits/chosen": 0.7451964616775513, "logits/rejected": 0.7615436315536499, "logps/chosen": -1.9305661916732788, "logps/rejected": -3.906461715698242, "loss": 0.8865, "nll_loss": 0.8587480187416077, "rewards/accuracies": 1.0, "rewards/chosen": -0.1930566132068634, "rewards/margins": 0.19758956134319305, "rewards/rejected": -0.39064618945121765, "step": 2559 }, { "epoch": 7.008898015058179, "grad_norm": 4.089164733886719, "learning_rate": 6.494520547945205e-07, "log_odds_chosen": 1.7798774242401123, "log_odds_ratio": -0.25461286306381226, "logits/chosen": 0.9408090114593506, "logits/rejected": 1.0071396827697754, "logps/chosen": -2.2404837608337402, "logps/rejected": -3.915341854095459, "loss": 0.7021, "nll_loss": 0.6766541600227356, "rewards/accuracies": 1.0, "rewards/chosen": -0.22404837608337402, "rewards/margins": 0.1674857884645462, "rewards/rejected": -0.3915341794490814, "step": 2560 }, { "epoch": 7.011635865845311, "grad_norm": 3.9705400466918945, "learning_rate": 6.493150684931506e-07, "log_odds_chosen": 1.9380818605422974, "log_odds_ratio": -0.17358753085136414, "logits/chosen": 0.8783695697784424, "logits/rejected": 0.8995192050933838, "logps/chosen": -2.3626339435577393, "logps/rejected": -4.185604095458984, "loss": 0.8298, "nll_loss": 0.8124120235443115, "rewards/accuracies": 1.0, "rewards/chosen": -0.23626339435577393, "rewards/margins": 0.1822970062494278, "rewards/rejected": -0.4185603857040405, "step": 2561 }, { "epoch": 7.014373716632443, "grad_norm": 3.789921522140503, "learning_rate": 6.491780821917808e-07, "log_odds_chosen": 1.9367331266403198, "log_odds_ratio": -0.2901167571544647, "logits/chosen": 0.9123053550720215, "logits/rejected": 0.9064948558807373, "logps/chosen": -2.2093541622161865, "logps/rejected": -4.051863670349121, "loss": 0.7458, "nll_loss": 0.7167799472808838, "rewards/accuracies": 0.875, "rewards/chosen": -0.22093543410301208, "rewards/margins": 0.18425095081329346, "rewards/rejected": -0.40518635511398315, "step": 2562 }, { "epoch": 7.017111567419575, "grad_norm": 3.5008411407470703, "learning_rate": 6.490410958904109e-07, "log_odds_chosen": 2.0946240425109863, "log_odds_ratio": -0.17009209096431732, "logits/chosen": 0.7667475938796997, "logits/rejected": 0.7837792038917542, "logps/chosen": -1.6327064037322998, "logps/rejected": -3.546074390411377, "loss": 0.7319, "nll_loss": 0.7148634791374207, "rewards/accuracies": 1.0, "rewards/chosen": -0.16327062249183655, "rewards/margins": 0.19133679568767548, "rewards/rejected": -0.3546074628829956, "step": 2563 }, { "epoch": 7.019849418206708, "grad_norm": 4.3657732009887695, "learning_rate": 6.48904109589041e-07, "log_odds_chosen": 1.4023118019104004, "log_odds_ratio": -0.3582826852798462, "logits/chosen": 0.9883413910865784, "logits/rejected": 1.0559983253479004, "logps/chosen": -2.386425256729126, "logps/rejected": -3.7073020935058594, "loss": 0.7906, "nll_loss": 0.7548021078109741, "rewards/accuracies": 0.875, "rewards/chosen": -0.23864254355430603, "rewards/margins": 0.13208764791488647, "rewards/rejected": -0.3707301914691925, "step": 2564 }, { "epoch": 7.02258726899384, "grad_norm": 5.3341546058654785, "learning_rate": 6.487671232876712e-07, "log_odds_chosen": 0.44718703627586365, "log_odds_ratio": -0.6548894643783569, "logits/chosen": 0.7297837734222412, "logits/rejected": 0.6996122002601624, "logps/chosen": -2.8157076835632324, "logps/rejected": -3.255150556564331, "loss": 0.8949, "nll_loss": 0.8294192552566528, "rewards/accuracies": 0.625, "rewards/chosen": -0.28157076239585876, "rewards/margins": 0.04394429922103882, "rewards/rejected": -0.32551509141921997, "step": 2565 }, { "epoch": 7.025325119780972, "grad_norm": 4.751175403594971, "learning_rate": 6.486301369863013e-07, "log_odds_chosen": 2.068598747253418, "log_odds_ratio": -0.3006991446018219, "logits/chosen": 1.1187500953674316, "logits/rejected": 1.0814028978347778, "logps/chosen": -2.802968740463257, "logps/rejected": -4.7676191329956055, "loss": 0.7954, "nll_loss": 0.7653721570968628, "rewards/accuracies": 0.875, "rewards/chosen": -0.28029686212539673, "rewards/margins": 0.19646508991718292, "rewards/rejected": -0.47676199674606323, "step": 2566 }, { "epoch": 7.028062970568104, "grad_norm": 4.134835243225098, "learning_rate": 6.484931506849315e-07, "log_odds_chosen": 2.350501537322998, "log_odds_ratio": -0.33157533407211304, "logits/chosen": 1.0028369426727295, "logits/rejected": 0.9998931288719177, "logps/chosen": -2.6046905517578125, "logps/rejected": -4.899181842803955, "loss": 0.8326, "nll_loss": 0.7994810938835144, "rewards/accuracies": 0.875, "rewards/chosen": -0.2604690492153168, "rewards/margins": 0.22944916784763336, "rewards/rejected": -0.48991820216178894, "step": 2567 }, { "epoch": 7.030800821355236, "grad_norm": 5.4462103843688965, "learning_rate": 6.483561643835616e-07, "log_odds_chosen": 1.9469404220581055, "log_odds_ratio": -0.31713125109672546, "logits/chosen": 0.8829811811447144, "logits/rejected": 0.8885089159011841, "logps/chosen": -2.5559420585632324, "logps/rejected": -4.37977409362793, "loss": 0.828, "nll_loss": 0.7963336110115051, "rewards/accuracies": 0.75, "rewards/chosen": -0.2555942237377167, "rewards/margins": 0.1823832094669342, "rewards/rejected": -0.4379774034023285, "step": 2568 }, { "epoch": 7.0335386721423685, "grad_norm": 4.253317832946777, "learning_rate": 6.482191780821917e-07, "log_odds_chosen": 1.3405826091766357, "log_odds_ratio": -0.4084756374359131, "logits/chosen": 0.955070972442627, "logits/rejected": 1.0206375122070312, "logps/chosen": -2.527329444885254, "logps/rejected": -3.744638442993164, "loss": 0.7788, "nll_loss": 0.7379603385925293, "rewards/accuracies": 0.75, "rewards/chosen": -0.2527329623699188, "rewards/margins": 0.12173090130090714, "rewards/rejected": -0.37446385622024536, "step": 2569 }, { "epoch": 7.036276522929501, "grad_norm": 3.8500142097473145, "learning_rate": 6.480821917808219e-07, "log_odds_chosen": 1.8831251859664917, "log_odds_ratio": -0.28729283809661865, "logits/chosen": 0.6836993098258972, "logits/rejected": 0.650598406791687, "logps/chosen": -2.2983152866363525, "logps/rejected": -4.088351249694824, "loss": 0.8631, "nll_loss": 0.8344043493270874, "rewards/accuracies": 0.875, "rewards/chosen": -0.2298315316438675, "rewards/margins": 0.17900361120700836, "rewards/rejected": -0.40883514285087585, "step": 2570 }, { "epoch": 7.039014373716633, "grad_norm": 3.829561710357666, "learning_rate": 6.479452054794519e-07, "log_odds_chosen": 1.9307093620300293, "log_odds_ratio": -0.4258105456829071, "logits/chosen": 0.6287227272987366, "logits/rejected": 0.6049835085868835, "logps/chosen": -1.9568127393722534, "logps/rejected": -3.757772922515869, "loss": 0.8067, "nll_loss": 0.7641376256942749, "rewards/accuracies": 0.75, "rewards/chosen": -0.19568127393722534, "rewards/margins": 0.18009603023529053, "rewards/rejected": -0.3757772743701935, "step": 2571 }, { "epoch": 7.041752224503765, "grad_norm": 3.925230026245117, "learning_rate": 6.478082191780821e-07, "log_odds_chosen": 1.123332142829895, "log_odds_ratio": -0.3356963098049164, "logits/chosen": 0.8162740468978882, "logits/rejected": 0.7708468437194824, "logps/chosen": -2.367619514465332, "logps/rejected": -3.4029033184051514, "loss": 0.8594, "nll_loss": 0.8258213996887207, "rewards/accuracies": 1.0, "rewards/chosen": -0.2367619425058365, "rewards/margins": 0.10352836549282074, "rewards/rejected": -0.3402903079986572, "step": 2572 }, { "epoch": 7.044490075290897, "grad_norm": 3.6944591999053955, "learning_rate": 6.476712328767123e-07, "log_odds_chosen": 1.201122760772705, "log_odds_ratio": -0.4961972236633301, "logits/chosen": 0.9000040888786316, "logits/rejected": 0.8793551325798035, "logps/chosen": -2.5263748168945312, "logps/rejected": -3.616598129272461, "loss": 0.9549, "nll_loss": 0.9052331447601318, "rewards/accuracies": 0.875, "rewards/chosen": -0.25263750553131104, "rewards/margins": 0.10902231931686401, "rewards/rejected": -0.36165982484817505, "step": 2573 }, { "epoch": 7.047227926078029, "grad_norm": 5.240984916687012, "learning_rate": 6.475342465753424e-07, "log_odds_chosen": 1.260186791419983, "log_odds_ratio": -0.4229089915752411, "logits/chosen": 0.6486035585403442, "logits/rejected": 0.6296110153198242, "logps/chosen": -2.2293267250061035, "logps/rejected": -3.390047550201416, "loss": 0.8241, "nll_loss": 0.7817820906639099, "rewards/accuracies": 0.875, "rewards/chosen": -0.22293266654014587, "rewards/margins": 0.11607206612825394, "rewards/rejected": -0.3390047252178192, "step": 2574 }, { "epoch": 7.049965776865161, "grad_norm": 3.825594902038574, "learning_rate": 6.473972602739725e-07, "log_odds_chosen": 2.2318272590637207, "log_odds_ratio": -0.3410753905773163, "logits/chosen": 0.8520939350128174, "logits/rejected": 0.9650804996490479, "logps/chosen": -3.4310989379882812, "logps/rejected": -5.613881587982178, "loss": 0.8513, "nll_loss": 0.8171923160552979, "rewards/accuracies": 0.875, "rewards/chosen": -0.3431099057197571, "rewards/margins": 0.21827827394008636, "rewards/rejected": -0.5613881349563599, "step": 2575 }, { "epoch": 7.052703627652293, "grad_norm": 4.912737846374512, "learning_rate": 6.472602739726027e-07, "log_odds_chosen": 2.31443452835083, "log_odds_ratio": -0.3435033857822418, "logits/chosen": 1.059784173965454, "logits/rejected": 1.0381020307540894, "logps/chosen": -2.490020751953125, "logps/rejected": -4.727370262145996, "loss": 0.7366, "nll_loss": 0.7022020816802979, "rewards/accuracies": 0.875, "rewards/chosen": -0.24900208413600922, "rewards/margins": 0.22373494505882263, "rewards/rejected": -0.47273704409599304, "step": 2576 }, { "epoch": 7.055441478439425, "grad_norm": 5.58356237411499, "learning_rate": 6.471232876712328e-07, "log_odds_chosen": 1.9617096185684204, "log_odds_ratio": -0.31224387884140015, "logits/chosen": 0.849748969078064, "logits/rejected": 0.8488729000091553, "logps/chosen": -1.8044850826263428, "logps/rejected": -3.6109201908111572, "loss": 0.7696, "nll_loss": 0.7384212613105774, "rewards/accuracies": 0.875, "rewards/chosen": -0.180448517203331, "rewards/margins": 0.18064351379871368, "rewards/rejected": -0.3610920011997223, "step": 2577 }, { "epoch": 7.058179329226557, "grad_norm": 4.498866081237793, "learning_rate": 6.469863013698629e-07, "log_odds_chosen": 0.824397087097168, "log_odds_ratio": -0.5309015512466431, "logits/chosen": 0.5937693119049072, "logits/rejected": 0.6332973837852478, "logps/chosen": -2.7399611473083496, "logps/rejected": -3.5292277336120605, "loss": 0.8634, "nll_loss": 0.8102691173553467, "rewards/accuracies": 0.625, "rewards/chosen": -0.27399614453315735, "rewards/margins": 0.07892663776874542, "rewards/rejected": -0.3529227674007416, "step": 2578 }, { "epoch": 7.060917180013689, "grad_norm": 3.2726283073425293, "learning_rate": 6.468493150684932e-07, "log_odds_chosen": 2.7349679470062256, "log_odds_ratio": -0.1862833946943283, "logits/chosen": 0.8951510190963745, "logits/rejected": 0.9132684469223022, "logps/chosen": -2.0872316360473633, "logps/rejected": -4.703704357147217, "loss": 0.7301, "nll_loss": 0.7114314436912537, "rewards/accuracies": 1.0, "rewards/chosen": -0.20872318744659424, "rewards/margins": 0.2616473138332367, "rewards/rejected": -0.47037047147750854, "step": 2579 }, { "epoch": 7.063655030800821, "grad_norm": 4.115294456481934, "learning_rate": 6.467123287671232e-07, "log_odds_chosen": 2.0915451049804688, "log_odds_ratio": -0.30199021100997925, "logits/chosen": 0.6460981369018555, "logits/rejected": 0.6009344458580017, "logps/chosen": -2.2200398445129395, "logps/rejected": -4.1685638427734375, "loss": 0.7373, "nll_loss": 0.7070900201797485, "rewards/accuracies": 0.875, "rewards/chosen": -0.2220039963722229, "rewards/margins": 0.19485239684581757, "rewards/rejected": -0.4168563485145569, "step": 2580 }, { "epoch": 7.066392881587953, "grad_norm": 4.784152507781982, "learning_rate": 6.465753424657535e-07, "log_odds_chosen": 2.1070053577423096, "log_odds_ratio": -0.2690913677215576, "logits/chosen": 0.8511301279067993, "logits/rejected": 0.8113762140274048, "logps/chosen": -1.9995825290679932, "logps/rejected": -4.010590553283691, "loss": 0.7873, "nll_loss": 0.7603711485862732, "rewards/accuracies": 1.0, "rewards/chosen": -0.19995826482772827, "rewards/margins": 0.20110078155994415, "rewards/rejected": -0.40105903148651123, "step": 2581 }, { "epoch": 7.069130732375085, "grad_norm": 5.295147895812988, "learning_rate": 6.464383561643836e-07, "log_odds_chosen": 1.151658535003662, "log_odds_ratio": -0.37017446756362915, "logits/chosen": 0.998583197593689, "logits/rejected": 0.9890837669372559, "logps/chosen": -2.5223305225372314, "logps/rejected": -3.585028648376465, "loss": 0.7984, "nll_loss": 0.7613657712936401, "rewards/accuracies": 0.875, "rewards/chosen": -0.25223302841186523, "rewards/margins": 0.10626984387636185, "rewards/rejected": -0.35850289463996887, "step": 2582 }, { "epoch": 7.0718685831622174, "grad_norm": 4.012388229370117, "learning_rate": 6.463013698630137e-07, "log_odds_chosen": 2.780291795730591, "log_odds_ratio": -0.17173582315444946, "logits/chosen": 0.7270768880844116, "logits/rejected": 0.6981472373008728, "logps/chosen": -2.0390453338623047, "logps/rejected": -4.655776500701904, "loss": 0.7811, "nll_loss": 0.763941764831543, "rewards/accuracies": 1.0, "rewards/chosen": -0.20390453934669495, "rewards/margins": 0.26167309284210205, "rewards/rejected": -0.4655776619911194, "step": 2583 }, { "epoch": 7.0746064339493495, "grad_norm": 4.0035271644592285, "learning_rate": 6.461643835616439e-07, "log_odds_chosen": 2.269594669342041, "log_odds_ratio": -0.39006611704826355, "logits/chosen": 0.8161628842353821, "logits/rejected": 0.8727091550827026, "logps/chosen": -2.528921127319336, "logps/rejected": -4.729155540466309, "loss": 0.9072, "nll_loss": 0.8682327270507812, "rewards/accuracies": 0.875, "rewards/chosen": -0.2528921365737915, "rewards/margins": 0.22002346813678741, "rewards/rejected": -0.47291556000709534, "step": 2584 }, { "epoch": 7.077344284736482, "grad_norm": 3.9607656002044678, "learning_rate": 6.46027397260274e-07, "log_odds_chosen": 2.130702495574951, "log_odds_ratio": -0.25492334365844727, "logits/chosen": 0.775562047958374, "logits/rejected": 0.782464861869812, "logps/chosen": -1.8342972993850708, "logps/rejected": -3.798532485961914, "loss": 0.8492, "nll_loss": 0.8236788511276245, "rewards/accuracies": 0.875, "rewards/chosen": -0.1834297478199005, "rewards/margins": 0.1964235007762909, "rewards/rejected": -0.3798532485961914, "step": 2585 }, { "epoch": 7.080082135523614, "grad_norm": 3.889319658279419, "learning_rate": 6.458904109589041e-07, "log_odds_chosen": 3.0414867401123047, "log_odds_ratio": -0.21126598119735718, "logits/chosen": 0.8146693706512451, "logits/rejected": 0.8110063076019287, "logps/chosen": -2.046603202819824, "logps/rejected": -4.946555137634277, "loss": 0.8869, "nll_loss": 0.8657492399215698, "rewards/accuracies": 0.875, "rewards/chosen": -0.2046603411436081, "rewards/margins": 0.2899951934814453, "rewards/rejected": -0.4946555495262146, "step": 2586 }, { "epoch": 7.082819986310746, "grad_norm": 3.9249815940856934, "learning_rate": 6.457534246575343e-07, "log_odds_chosen": 0.5711982250213623, "log_odds_ratio": -0.5224168300628662, "logits/chosen": 0.6356902122497559, "logits/rejected": 0.6280993223190308, "logps/chosen": -1.9903367757797241, "logps/rejected": -2.486146926879883, "loss": 0.851, "nll_loss": 0.7987697124481201, "rewards/accuracies": 0.75, "rewards/chosen": -0.1990336775779724, "rewards/margins": 0.049581028521060944, "rewards/rejected": -0.24861471354961395, "step": 2587 }, { "epoch": 7.085557837097878, "grad_norm": 4.932101726531982, "learning_rate": 6.456164383561643e-07, "log_odds_chosen": 1.0745807886123657, "log_odds_ratio": -0.5952889919281006, "logits/chosen": 0.7935057878494263, "logits/rejected": 0.8715217113494873, "logps/chosen": -2.360424518585205, "logps/rejected": -3.3627548217773438, "loss": 0.7904, "nll_loss": 0.7308764457702637, "rewards/accuracies": 0.875, "rewards/chosen": -0.23604246973991394, "rewards/margins": 0.1002330332994461, "rewards/rejected": -0.33627548813819885, "step": 2588 }, { "epoch": 7.08829568788501, "grad_norm": 3.9348692893981934, "learning_rate": 6.454794520547945e-07, "log_odds_chosen": 2.749293804168701, "log_odds_ratio": -0.1313881129026413, "logits/chosen": 0.770248532295227, "logits/rejected": 0.7447755932807922, "logps/chosen": -1.868591547012329, "logps/rejected": -4.440565586090088, "loss": 0.7945, "nll_loss": 0.7813185453414917, "rewards/accuracies": 1.0, "rewards/chosen": -0.18685917556285858, "rewards/margins": 0.25719740986824036, "rewards/rejected": -0.44405657052993774, "step": 2589 }, { "epoch": 7.091033538672143, "grad_norm": 4.1663031578063965, "learning_rate": 6.453424657534247e-07, "log_odds_chosen": 2.153693914413452, "log_odds_ratio": -0.2692781090736389, "logits/chosen": 1.009937047958374, "logits/rejected": 0.9742677211761475, "logps/chosen": -1.6593974828720093, "logps/rejected": -3.666018486022949, "loss": 0.7872, "nll_loss": 0.7602856159210205, "rewards/accuracies": 1.0, "rewards/chosen": -0.16593976318836212, "rewards/margins": 0.20066207647323608, "rewards/rejected": -0.3666018545627594, "step": 2590 }, { "epoch": 7.093771389459275, "grad_norm": 4.5900044441223145, "learning_rate": 6.452054794520548e-07, "log_odds_chosen": 1.7322343587875366, "log_odds_ratio": -0.5619357824325562, "logits/chosen": 0.7861624956130981, "logits/rejected": 0.779350996017456, "logps/chosen": -2.484865427017212, "logps/rejected": -4.182888031005859, "loss": 0.8598, "nll_loss": 0.8036515712738037, "rewards/accuracies": 0.5, "rewards/chosen": -0.24848654866218567, "rewards/margins": 0.16980227828025818, "rewards/rejected": -0.41828882694244385, "step": 2591 }, { "epoch": 7.096509240246407, "grad_norm": 4.935871601104736, "learning_rate": 6.450684931506849e-07, "log_odds_chosen": 1.5465928316116333, "log_odds_ratio": -0.6070595979690552, "logits/chosen": 0.9262570142745972, "logits/rejected": 1.031847596168518, "logps/chosen": -3.898667335510254, "logps/rejected": -5.432337760925293, "loss": 1.0197, "nll_loss": 0.9589978456497192, "rewards/accuracies": 0.75, "rewards/chosen": -0.3898667097091675, "rewards/margins": 0.1533670723438263, "rewards/rejected": -0.5432337522506714, "step": 2592 }, { "epoch": 7.099247091033539, "grad_norm": 5.427186965942383, "learning_rate": 6.449315068493151e-07, "log_odds_chosen": 2.0846996307373047, "log_odds_ratio": -0.3411158323287964, "logits/chosen": 0.8019957542419434, "logits/rejected": 0.8585927486419678, "logps/chosen": -2.517449378967285, "logps/rejected": -4.550239562988281, "loss": 0.8422, "nll_loss": 0.8081341981887817, "rewards/accuracies": 0.875, "rewards/chosen": -0.25174495577812195, "rewards/margins": 0.20327895879745483, "rewards/rejected": -0.4550238847732544, "step": 2593 }, { "epoch": 7.101984941820671, "grad_norm": 5.02794885635376, "learning_rate": 6.447945205479452e-07, "log_odds_chosen": 1.3589096069335938, "log_odds_ratio": -0.5395946502685547, "logits/chosen": 0.663922905921936, "logits/rejected": 0.5688199996948242, "logps/chosen": -2.2072644233703613, "logps/rejected": -3.456423282623291, "loss": 0.8913, "nll_loss": 0.8372994661331177, "rewards/accuracies": 0.75, "rewards/chosen": -0.22072644531726837, "rewards/margins": 0.12491588294506073, "rewards/rejected": -0.3456423282623291, "step": 2594 }, { "epoch": 7.104722792607803, "grad_norm": 4.058465480804443, "learning_rate": 6.446575342465753e-07, "log_odds_chosen": 1.706721305847168, "log_odds_ratio": -0.2862618863582611, "logits/chosen": 0.9436516761779785, "logits/rejected": 0.9556832313537598, "logps/chosen": -2.2660744190216064, "logps/rejected": -3.786512851715088, "loss": 0.8363, "nll_loss": 0.8076434135437012, "rewards/accuracies": 0.875, "rewards/chosen": -0.22660744190216064, "rewards/margins": 0.15204383432865143, "rewards/rejected": -0.3786512613296509, "step": 2595 }, { "epoch": 7.107460643394935, "grad_norm": 3.690342426300049, "learning_rate": 6.445205479452055e-07, "log_odds_chosen": 1.294111967086792, "log_odds_ratio": -0.5131525993347168, "logits/chosen": 0.6555287837982178, "logits/rejected": 0.6497951745986938, "logps/chosen": -2.2482738494873047, "logps/rejected": -3.382061243057251, "loss": 0.8016, "nll_loss": 0.7502777576446533, "rewards/accuracies": 0.875, "rewards/chosen": -0.22482740879058838, "rewards/margins": 0.11337871849536896, "rewards/rejected": -0.33820614218711853, "step": 2596 }, { "epoch": 7.110198494182067, "grad_norm": 3.743133544921875, "learning_rate": 6.443835616438356e-07, "log_odds_chosen": 2.0156726837158203, "log_odds_ratio": -0.16579490900039673, "logits/chosen": 0.8883389234542847, "logits/rejected": 0.915022611618042, "logps/chosen": -2.3681511878967285, "logps/rejected": -4.256466865539551, "loss": 0.688, "nll_loss": 0.6714446544647217, "rewards/accuracies": 1.0, "rewards/chosen": -0.23681512475013733, "rewards/margins": 0.18883155286312103, "rewards/rejected": -0.42564666271209717, "step": 2597 }, { "epoch": 7.112936344969199, "grad_norm": 3.846219301223755, "learning_rate": 6.442465753424658e-07, "log_odds_chosen": 2.510535955429077, "log_odds_ratio": -0.32919251918792725, "logits/chosen": 0.8441193103790283, "logits/rejected": 0.8658230304718018, "logps/chosen": -2.5214600563049316, "logps/rejected": -4.9172868728637695, "loss": 0.9296, "nll_loss": 0.896649956703186, "rewards/accuracies": 0.75, "rewards/chosen": -0.25214600563049316, "rewards/margins": 0.23958271741867065, "rewards/rejected": -0.4917287230491638, "step": 2598 }, { "epoch": 7.115674195756331, "grad_norm": 5.777045726776123, "learning_rate": 6.441095890410959e-07, "log_odds_chosen": 1.857344150543213, "log_odds_ratio": -0.5826594233512878, "logits/chosen": 1.1015695333480835, "logits/rejected": 1.1807302236557007, "logps/chosen": -2.764341115951538, "logps/rejected": -4.547922134399414, "loss": 0.8345, "nll_loss": 0.7762480974197388, "rewards/accuracies": 0.625, "rewards/chosen": -0.27643412351608276, "rewards/margins": 0.17835815250873566, "rewards/rejected": -0.45479223132133484, "step": 2599 }, { "epoch": 7.1184120465434635, "grad_norm": 4.749007225036621, "learning_rate": 6.43972602739726e-07, "log_odds_chosen": 1.9470572471618652, "log_odds_ratio": -0.27102023363113403, "logits/chosen": 0.8973151445388794, "logits/rejected": 0.9058550596237183, "logps/chosen": -1.783366084098816, "logps/rejected": -3.5833919048309326, "loss": 0.7912, "nll_loss": 0.7641037702560425, "rewards/accuracies": 0.875, "rewards/chosen": -0.17833662033081055, "rewards/margins": 0.1800025850534439, "rewards/rejected": -0.35833919048309326, "step": 2600 }, { "epoch": 7.121149897330596, "grad_norm": 6.196109294891357, "learning_rate": 6.438356164383562e-07, "log_odds_chosen": 2.3780555725097656, "log_odds_ratio": -0.47019022703170776, "logits/chosen": 0.9001681804656982, "logits/rejected": 0.8949295282363892, "logps/chosen": -2.4167027473449707, "logps/rejected": -4.681366920471191, "loss": 0.7919, "nll_loss": 0.7448450922966003, "rewards/accuracies": 0.875, "rewards/chosen": -0.24167028069496155, "rewards/margins": 0.22646641731262207, "rewards/rejected": -0.46813666820526123, "step": 2601 }, { "epoch": 7.123887748117728, "grad_norm": 5.087160587310791, "learning_rate": 6.436986301369862e-07, "log_odds_chosen": 1.8664219379425049, "log_odds_ratio": -0.24386508762836456, "logits/chosen": 1.0575672388076782, "logits/rejected": 1.054522156715393, "logps/chosen": -2.1719777584075928, "logps/rejected": -3.83994722366333, "loss": 0.7597, "nll_loss": 0.7353282570838928, "rewards/accuracies": 1.0, "rewards/chosen": -0.21719779074192047, "rewards/margins": 0.16679693758487701, "rewards/rejected": -0.3839947581291199, "step": 2602 }, { "epoch": 7.12662559890486, "grad_norm": 4.736503601074219, "learning_rate": 6.435616438356164e-07, "log_odds_chosen": 1.351992130279541, "log_odds_ratio": -0.32537102699279785, "logits/chosen": 0.7916924953460693, "logits/rejected": 0.8257593512535095, "logps/chosen": -2.467751979827881, "logps/rejected": -3.706812620162964, "loss": 0.747, "nll_loss": 0.7144465446472168, "rewards/accuracies": 0.875, "rewards/chosen": -0.24677519500255585, "rewards/margins": 0.12390609085559845, "rewards/rejected": -0.3706812858581543, "step": 2603 }, { "epoch": 7.129363449691992, "grad_norm": 3.633753776550293, "learning_rate": 6.434246575342466e-07, "log_odds_chosen": 2.4668703079223633, "log_odds_ratio": -0.19783714413642883, "logits/chosen": 0.7540246844291687, "logits/rejected": 0.698268473148346, "logps/chosen": -1.6838359832763672, "logps/rejected": -3.9843904972076416, "loss": 0.8258, "nll_loss": 0.8059805035591125, "rewards/accuracies": 1.0, "rewards/chosen": -0.16838359832763672, "rewards/margins": 0.23005545139312744, "rewards/rejected": -0.39843904972076416, "step": 2604 }, { "epoch": 7.132101300479124, "grad_norm": 3.8628029823303223, "learning_rate": 6.432876712328767e-07, "log_odds_chosen": 1.7968660593032837, "log_odds_ratio": -0.2979918122291565, "logits/chosen": 1.041698694229126, "logits/rejected": 1.0479240417480469, "logps/chosen": -2.221173048019409, "logps/rejected": -3.9152636528015137, "loss": 0.7543, "nll_loss": 0.7245075702667236, "rewards/accuracies": 0.875, "rewards/chosen": -0.22211730480194092, "rewards/margins": 0.16940903663635254, "rewards/rejected": -0.39152634143829346, "step": 2605 }, { "epoch": 7.134839151266256, "grad_norm": 5.692542552947998, "learning_rate": 6.431506849315068e-07, "log_odds_chosen": 0.43562251329421997, "log_odds_ratio": -0.5810605883598328, "logits/chosen": 0.8027483820915222, "logits/rejected": 0.8175764083862305, "logps/chosen": -3.0255422592163086, "logps/rejected": -3.4236652851104736, "loss": 0.8734, "nll_loss": 0.8153001070022583, "rewards/accuracies": 0.75, "rewards/chosen": -0.3025542199611664, "rewards/margins": 0.039812296628952026, "rewards/rejected": -0.3423665165901184, "step": 2606 }, { "epoch": 7.137577002053388, "grad_norm": 4.077224254608154, "learning_rate": 6.43013698630137e-07, "log_odds_chosen": 0.9954757690429688, "log_odds_ratio": -0.48036855459213257, "logits/chosen": 0.8140568733215332, "logits/rejected": 0.8119263052940369, "logps/chosen": -2.18233585357666, "logps/rejected": -3.131263017654419, "loss": 0.7623, "nll_loss": 0.71428382396698, "rewards/accuracies": 0.75, "rewards/chosen": -0.2182336002588272, "rewards/margins": 0.094892717897892, "rewards/rejected": -0.3131263256072998, "step": 2607 }, { "epoch": 7.14031485284052, "grad_norm": 4.349678039550781, "learning_rate": 6.428767123287671e-07, "log_odds_chosen": 1.9792677164077759, "log_odds_ratio": -0.2219846546649933, "logits/chosen": 0.7647315859794617, "logits/rejected": 0.6788696050643921, "logps/chosen": -1.8469254970550537, "logps/rejected": -3.6850032806396484, "loss": 0.7462, "nll_loss": 0.7239658236503601, "rewards/accuracies": 1.0, "rewards/chosen": -0.18469256162643433, "rewards/margins": 0.18380776047706604, "rewards/rejected": -0.36850032210350037, "step": 2608 }, { "epoch": 7.143052703627652, "grad_norm": 5.167586326599121, "learning_rate": 6.427397260273972e-07, "log_odds_chosen": 1.4734899997711182, "log_odds_ratio": -0.43655925989151, "logits/chosen": 0.8525820374488831, "logits/rejected": 0.8924281597137451, "logps/chosen": -3.5147228240966797, "logps/rejected": -4.954204559326172, "loss": 0.9381, "nll_loss": 0.8944113254547119, "rewards/accuracies": 0.625, "rewards/chosen": -0.35147225856781006, "rewards/margins": 0.14394818246364594, "rewards/rejected": -0.4954204559326172, "step": 2609 }, { "epoch": 7.145790554414784, "grad_norm": 5.001930236816406, "learning_rate": 6.426027397260274e-07, "log_odds_chosen": 0.6617145538330078, "log_odds_ratio": -0.5856870412826538, "logits/chosen": 1.060720443725586, "logits/rejected": 1.0765838623046875, "logps/chosen": -3.0598278045654297, "logps/rejected": -3.6841464042663574, "loss": 0.873, "nll_loss": 0.8144315481185913, "rewards/accuracies": 0.75, "rewards/chosen": -0.305982768535614, "rewards/margins": 0.06243186444044113, "rewards/rejected": -0.36841464042663574, "step": 2610 }, { "epoch": 7.148528405201916, "grad_norm": 4.2007269859313965, "learning_rate": 6.424657534246575e-07, "log_odds_chosen": 1.010202169418335, "log_odds_ratio": -0.43455883860588074, "logits/chosen": 0.8237940073013306, "logits/rejected": 0.8306862711906433, "logps/chosen": -1.944297194480896, "logps/rejected": -2.885697603225708, "loss": 0.856, "nll_loss": 0.8125772476196289, "rewards/accuracies": 0.875, "rewards/chosen": -0.19442972540855408, "rewards/margins": 0.09414003044366837, "rewards/rejected": -0.28856974840164185, "step": 2611 }, { "epoch": 7.151266255989048, "grad_norm": 4.730701446533203, "learning_rate": 6.423287671232877e-07, "log_odds_chosen": 1.490991473197937, "log_odds_ratio": -0.38556480407714844, "logits/chosen": 0.7987930774688721, "logits/rejected": 0.7768022418022156, "logps/chosen": -2.562335729598999, "logps/rejected": -3.982998847961426, "loss": 0.9441, "nll_loss": 0.9054959416389465, "rewards/accuracies": 1.0, "rewards/chosen": -0.2562335729598999, "rewards/margins": 0.14206629991531372, "rewards/rejected": -0.398299902677536, "step": 2612 }, { "epoch": 7.15400410677618, "grad_norm": 4.35567569732666, "learning_rate": 6.421917808219178e-07, "log_odds_chosen": 2.937969207763672, "log_odds_ratio": -0.12463071942329407, "logits/chosen": 1.0506770610809326, "logits/rejected": 1.0875195264816284, "logps/chosen": -2.356328010559082, "logps/rejected": -5.183499336242676, "loss": 0.6703, "nll_loss": 0.6578864455223083, "rewards/accuracies": 1.0, "rewards/chosen": -0.23563280701637268, "rewards/margins": 0.28271710872650146, "rewards/rejected": -0.5183499455451965, "step": 2613 }, { "epoch": 7.156741957563312, "grad_norm": 4.318653583526611, "learning_rate": 6.420547945205479e-07, "log_odds_chosen": 1.1781888008117676, "log_odds_ratio": -0.32102811336517334, "logits/chosen": 0.7893185615539551, "logits/rejected": 0.7462799549102783, "logps/chosen": -1.7169263362884521, "logps/rejected": -2.745932102203369, "loss": 0.7582, "nll_loss": 0.7261451482772827, "rewards/accuracies": 1.0, "rewards/chosen": -0.1716926246881485, "rewards/margins": 0.10290059447288513, "rewards/rejected": -0.2745932340621948, "step": 2614 }, { "epoch": 7.1594798083504445, "grad_norm": 4.345186710357666, "learning_rate": 6.419178082191781e-07, "log_odds_chosen": 1.761235237121582, "log_odds_ratio": -0.37590500712394714, "logits/chosen": 0.9956018924713135, "logits/rejected": 1.012816309928894, "logps/chosen": -2.6988353729248047, "logps/rejected": -4.375731468200684, "loss": 0.8261, "nll_loss": 0.7885138988494873, "rewards/accuracies": 0.875, "rewards/chosen": -0.26988354325294495, "rewards/margins": 0.16768957674503326, "rewards/rejected": -0.437573105096817, "step": 2615 }, { "epoch": 7.162217659137577, "grad_norm": 3.9994847774505615, "learning_rate": 6.417808219178081e-07, "log_odds_chosen": 1.8926492929458618, "log_odds_ratio": -0.31679874658584595, "logits/chosen": 0.8124305009841919, "logits/rejected": 0.8672716021537781, "logps/chosen": -2.0425736904144287, "logps/rejected": -3.8501014709472656, "loss": 0.8243, "nll_loss": 0.792661190032959, "rewards/accuracies": 0.875, "rewards/chosen": -0.20425735414028168, "rewards/margins": 0.18075278401374817, "rewards/rejected": -0.38501015305519104, "step": 2616 }, { "epoch": 7.1649555099247095, "grad_norm": 5.1549882888793945, "learning_rate": 6.416438356164383e-07, "log_odds_chosen": 1.7188727855682373, "log_odds_ratio": -0.29692041873931885, "logits/chosen": 0.8748387694358826, "logits/rejected": 0.8546434640884399, "logps/chosen": -2.5426223278045654, "logps/rejected": -4.186689376831055, "loss": 0.7286, "nll_loss": 0.6988939046859741, "rewards/accuracies": 0.875, "rewards/chosen": -0.254262238740921, "rewards/margins": 0.16440671682357788, "rewards/rejected": -0.4186689555644989, "step": 2617 }, { "epoch": 7.167693360711842, "grad_norm": 6.527858734130859, "learning_rate": 6.415068493150685e-07, "log_odds_chosen": 1.0644429922103882, "log_odds_ratio": -0.7615770101547241, "logits/chosen": 0.6921607851982117, "logits/rejected": 0.6833464503288269, "logps/chosen": -2.5148725509643555, "logps/rejected": -3.5521061420440674, "loss": 0.8187, "nll_loss": 0.7425742149353027, "rewards/accuracies": 0.75, "rewards/chosen": -0.25148725509643555, "rewards/margins": 0.10372336208820343, "rewards/rejected": -0.35521063208580017, "step": 2618 }, { "epoch": 7.170431211498974, "grad_norm": 4.2008376121521, "learning_rate": 6.413698630136986e-07, "log_odds_chosen": 1.013676404953003, "log_odds_ratio": -0.3541945815086365, "logits/chosen": 0.7156941294670105, "logits/rejected": 0.7025423645973206, "logps/chosen": -2.336197853088379, "logps/rejected": -3.2676916122436523, "loss": 0.7887, "nll_loss": 0.7533056735992432, "rewards/accuracies": 1.0, "rewards/chosen": -0.2336197942495346, "rewards/margins": 0.09314940124750137, "rewards/rejected": -0.3267691731452942, "step": 2619 }, { "epoch": 7.173169062286106, "grad_norm": 5.780280113220215, "learning_rate": 6.412328767123287e-07, "log_odds_chosen": 0.7991500496864319, "log_odds_ratio": -0.5023650527000427, "logits/chosen": 0.6749932765960693, "logits/rejected": 0.6100826263427734, "logps/chosen": -2.156590461730957, "logps/rejected": -2.883065700531006, "loss": 0.9247, "nll_loss": 0.8744481801986694, "rewards/accuracies": 0.75, "rewards/chosen": -0.21565905213356018, "rewards/margins": 0.07264751195907593, "rewards/rejected": -0.2883065640926361, "step": 2620 }, { "epoch": 7.175906913073238, "grad_norm": 5.364114284515381, "learning_rate": 6.410958904109589e-07, "log_odds_chosen": 1.267185926437378, "log_odds_ratio": -0.36004534363746643, "logits/chosen": 0.6740819811820984, "logits/rejected": 0.5877931118011475, "logps/chosen": -1.6132341623306274, "logps/rejected": -2.7499563694000244, "loss": 0.8784, "nll_loss": 0.8424438238143921, "rewards/accuracies": 0.875, "rewards/chosen": -0.1613234132528305, "rewards/margins": 0.11367224156856537, "rewards/rejected": -0.2749956548213959, "step": 2621 }, { "epoch": 7.17864476386037, "grad_norm": 4.749582290649414, "learning_rate": 6.40958904109589e-07, "log_odds_chosen": 1.238634467124939, "log_odds_ratio": -0.31767094135284424, "logits/chosen": 0.7953576445579529, "logits/rejected": 0.7740696668624878, "logps/chosen": -1.6257851123809814, "logps/rejected": -2.719606876373291, "loss": 0.7652, "nll_loss": 0.7334372401237488, "rewards/accuracies": 1.0, "rewards/chosen": -0.1625785082578659, "rewards/margins": 0.10938217490911484, "rewards/rejected": -0.27196070551872253, "step": 2622 }, { "epoch": 7.181382614647502, "grad_norm": 4.669775485992432, "learning_rate": 6.408219178082191e-07, "log_odds_chosen": 2.6945719718933105, "log_odds_ratio": -0.27303430438041687, "logits/chosen": 0.8989848494529724, "logits/rejected": 0.8892354965209961, "logps/chosen": -2.0354392528533936, "logps/rejected": -4.61563777923584, "loss": 0.9401, "nll_loss": 0.9127712249755859, "rewards/accuracies": 0.875, "rewards/chosen": -0.20354393124580383, "rewards/margins": 0.2580198049545288, "rewards/rejected": -0.46156373620033264, "step": 2623 }, { "epoch": 7.184120465434634, "grad_norm": 4.123577117919922, "learning_rate": 6.406849315068493e-07, "log_odds_chosen": 1.5123165845870972, "log_odds_ratio": -0.32546305656433105, "logits/chosen": 1.068901777267456, "logits/rejected": 1.1138484477996826, "logps/chosen": -2.057234764099121, "logps/rejected": -3.434903144836426, "loss": 0.7119, "nll_loss": 0.6793496012687683, "rewards/accuracies": 0.875, "rewards/chosen": -0.20572346448898315, "rewards/margins": 0.13776686787605286, "rewards/rejected": -0.343490332365036, "step": 2624 }, { "epoch": 7.186858316221766, "grad_norm": 4.492019176483154, "learning_rate": 6.405479452054794e-07, "log_odds_chosen": 1.8470826148986816, "log_odds_ratio": -0.2889418601989746, "logits/chosen": 0.8429515957832336, "logits/rejected": 0.8032007217407227, "logps/chosen": -2.1780920028686523, "logps/rejected": -3.9372901916503906, "loss": 0.7877, "nll_loss": 0.7588385939598083, "rewards/accuracies": 1.0, "rewards/chosen": -0.21780920028686523, "rewards/margins": 0.17591983079910278, "rewards/rejected": -0.393729031085968, "step": 2625 }, { "epoch": 7.189596167008898, "grad_norm": 3.5546276569366455, "learning_rate": 6.404109589041096e-07, "log_odds_chosen": 3.2355244159698486, "log_odds_ratio": -0.1827867031097412, "logits/chosen": 0.8336159586906433, "logits/rejected": 0.856776773929596, "logps/chosen": -2.547727346420288, "logps/rejected": -5.661041259765625, "loss": 0.8415, "nll_loss": 0.8231973648071289, "rewards/accuracies": 1.0, "rewards/chosen": -0.25477275252342224, "rewards/margins": 0.3113313913345337, "rewards/rejected": -0.5661041140556335, "step": 2626 }, { "epoch": 7.19233401779603, "grad_norm": 4.678942680358887, "learning_rate": 6.402739726027397e-07, "log_odds_chosen": 1.5881750583648682, "log_odds_ratio": -0.30675044655799866, "logits/chosen": 0.8350756168365479, "logits/rejected": 0.8524436354637146, "logps/chosen": -2.3213722705841064, "logps/rejected": -3.800224781036377, "loss": 0.8492, "nll_loss": 0.8185555934906006, "rewards/accuracies": 0.875, "rewards/chosen": -0.23213723301887512, "rewards/margins": 0.1478852778673172, "rewards/rejected": -0.3800225257873535, "step": 2627 }, { "epoch": 7.195071868583162, "grad_norm": 5.960131645202637, "learning_rate": 6.401369863013698e-07, "log_odds_chosen": 0.5423641800880432, "log_odds_ratio": -0.5324482917785645, "logits/chosen": 0.8603353500366211, "logits/rejected": 0.9066253900527954, "logps/chosen": -2.3433263301849365, "logps/rejected": -2.828373432159424, "loss": 0.7938, "nll_loss": 0.7405128479003906, "rewards/accuracies": 0.75, "rewards/chosen": -0.2343326210975647, "rewards/margins": 0.04850468784570694, "rewards/rejected": -0.2828373312950134, "step": 2628 }, { "epoch": 7.197809719370294, "grad_norm": 6.3604841232299805, "learning_rate": 6.4e-07, "log_odds_chosen": 1.773348331451416, "log_odds_ratio": -0.34176957607269287, "logits/chosen": 1.0114445686340332, "logits/rejected": 1.0185532569885254, "logps/chosen": -2.590970993041992, "logps/rejected": -4.309476852416992, "loss": 0.8251, "nll_loss": 0.7909026145935059, "rewards/accuracies": 0.875, "rewards/chosen": -0.2590970993041992, "rewards/margins": 0.17185057699680328, "rewards/rejected": -0.4309476613998413, "step": 2629 }, { "epoch": 7.200547570157426, "grad_norm": 3.616151809692383, "learning_rate": 6.398630136986301e-07, "log_odds_chosen": 1.1614418029785156, "log_odds_ratio": -0.3429044485092163, "logits/chosen": 0.6003164052963257, "logits/rejected": 0.5785868167877197, "logps/chosen": -2.0211594104766846, "logps/rejected": -3.0524981021881104, "loss": 0.7797, "nll_loss": 0.7454386949539185, "rewards/accuracies": 0.875, "rewards/chosen": -0.20211592316627502, "rewards/margins": 0.10313385725021362, "rewards/rejected": -0.30524981021881104, "step": 2630 }, { "epoch": 7.2032854209445585, "grad_norm": 4.986647605895996, "learning_rate": 6.397260273972602e-07, "log_odds_chosen": 1.6443860530853271, "log_odds_ratio": -0.27485188841819763, "logits/chosen": 0.7630744576454163, "logits/rejected": 0.7446209788322449, "logps/chosen": -2.064936637878418, "logps/rejected": -3.607799768447876, "loss": 0.8646, "nll_loss": 0.8371036052703857, "rewards/accuracies": 1.0, "rewards/chosen": -0.20649367570877075, "rewards/margins": 0.15428629517555237, "rewards/rejected": -0.3607800006866455, "step": 2631 }, { "epoch": 7.2060232717316905, "grad_norm": 5.689704895019531, "learning_rate": 6.395890410958904e-07, "log_odds_chosen": 0.2854915261268616, "log_odds_ratio": -0.7252859473228455, "logits/chosen": 0.9336737394332886, "logits/rejected": 0.8430296182632446, "logps/chosen": -2.93326473236084, "logps/rejected": -3.192115068435669, "loss": 0.8869, "nll_loss": 0.8143231272697449, "rewards/accuracies": 0.625, "rewards/chosen": -0.2933264970779419, "rewards/margins": 0.025885052978992462, "rewards/rejected": -0.31921154260635376, "step": 2632 }, { "epoch": 7.208761122518823, "grad_norm": 3.7586331367492676, "learning_rate": 6.394520547945205e-07, "log_odds_chosen": 1.5339107513427734, "log_odds_ratio": -0.348357617855072, "logits/chosen": 0.7185454368591309, "logits/rejected": 0.7121589779853821, "logps/chosen": -1.9518234729766846, "logps/rejected": -3.378554582595825, "loss": 0.8951, "nll_loss": 0.860291600227356, "rewards/accuracies": 0.875, "rewards/chosen": -0.19518233835697174, "rewards/margins": 0.14267311990261078, "rewards/rejected": -0.3378554582595825, "step": 2633 }, { "epoch": 7.211498973305955, "grad_norm": 4.675075531005859, "learning_rate": 6.393150684931506e-07, "log_odds_chosen": 0.7055596709251404, "log_odds_ratio": -0.5852649211883545, "logits/chosen": 0.9343775510787964, "logits/rejected": 0.9483616352081299, "logps/chosen": -2.3534107208251953, "logps/rejected": -3.003451108932495, "loss": 0.7515, "nll_loss": 0.6929712295532227, "rewards/accuracies": 0.625, "rewards/chosen": -0.23534107208251953, "rewards/margins": 0.06500403583049774, "rewards/rejected": -0.30034512281417847, "step": 2634 }, { "epoch": 7.214236824093087, "grad_norm": 3.8452465534210205, "learning_rate": 6.391780821917808e-07, "log_odds_chosen": 2.1499524116516113, "log_odds_ratio": -0.19077306985855103, "logits/chosen": 0.9001689553260803, "logits/rejected": 0.9495924711227417, "logps/chosen": -2.1586878299713135, "logps/rejected": -4.197735786437988, "loss": 0.6544, "nll_loss": 0.6353614330291748, "rewards/accuracies": 1.0, "rewards/chosen": -0.21586880087852478, "rewards/margins": 0.20390477776527405, "rewards/rejected": -0.4197736084461212, "step": 2635 }, { "epoch": 7.216974674880219, "grad_norm": 4.332772254943848, "learning_rate": 6.390410958904109e-07, "log_odds_chosen": 1.669223666191101, "log_odds_ratio": -0.3481634557247162, "logits/chosen": 0.8388354182243347, "logits/rejected": 0.7076834440231323, "logps/chosen": -1.8503172397613525, "logps/rejected": -3.4307918548583984, "loss": 0.8603, "nll_loss": 0.8255001902580261, "rewards/accuracies": 0.875, "rewards/chosen": -0.1850317120552063, "rewards/margins": 0.15804743766784668, "rewards/rejected": -0.343079149723053, "step": 2636 }, { "epoch": 7.219712525667351, "grad_norm": 4.815263748168945, "learning_rate": 6.38904109589041e-07, "log_odds_chosen": 1.1537600755691528, "log_odds_ratio": -0.4588078260421753, "logits/chosen": 0.6999993324279785, "logits/rejected": 0.6867200136184692, "logps/chosen": -1.9903206825256348, "logps/rejected": -3.030445098876953, "loss": 0.8265, "nll_loss": 0.780596137046814, "rewards/accuracies": 0.75, "rewards/chosen": -0.19903206825256348, "rewards/margins": 0.10401243716478348, "rewards/rejected": -0.30304449796676636, "step": 2637 }, { "epoch": 7.222450376454483, "grad_norm": 4.304107189178467, "learning_rate": 6.387671232876712e-07, "log_odds_chosen": 1.796386957168579, "log_odds_ratio": -0.3229462802410126, "logits/chosen": 0.7725259065628052, "logits/rejected": 0.7240935564041138, "logps/chosen": -2.0001049041748047, "logps/rejected": -3.679518938064575, "loss": 0.8538, "nll_loss": 0.8214719891548157, "rewards/accuracies": 0.875, "rewards/chosen": -0.2000104933977127, "rewards/margins": 0.1679413914680481, "rewards/rejected": -0.367951899766922, "step": 2638 }, { "epoch": 7.225188227241615, "grad_norm": 5.206910133361816, "learning_rate": 6.386301369863013e-07, "log_odds_chosen": 0.6646467447280884, "log_odds_ratio": -0.523639976978302, "logits/chosen": 0.784734308719635, "logits/rejected": 0.8137505054473877, "logps/chosen": -2.643333911895752, "logps/rejected": -3.2471859455108643, "loss": 0.7908, "nll_loss": 0.7384296655654907, "rewards/accuracies": 0.75, "rewards/chosen": -0.26433342695236206, "rewards/margins": 0.060385167598724365, "rewards/rejected": -0.3247185945510864, "step": 2639 }, { "epoch": 7.227926078028747, "grad_norm": 4.9016947746276855, "learning_rate": 6.384931506849315e-07, "log_odds_chosen": 1.0162049531936646, "log_odds_ratio": -0.3650910258293152, "logits/chosen": 0.8329653739929199, "logits/rejected": 0.8161476254463196, "logps/chosen": -2.324215888977051, "logps/rejected": -3.252411365509033, "loss": 0.8916, "nll_loss": 0.8551274538040161, "rewards/accuracies": 1.0, "rewards/chosen": -0.23242159187793732, "rewards/margins": 0.09281957149505615, "rewards/rejected": -0.3252411484718323, "step": 2640 }, { "epoch": 7.230663928815879, "grad_norm": 4.987558364868164, "learning_rate": 6.383561643835616e-07, "log_odds_chosen": 1.5563057661056519, "log_odds_ratio": -0.3173413872718811, "logits/chosen": 0.8961478471755981, "logits/rejected": 0.9100357294082642, "logps/chosen": -2.3692078590393066, "logps/rejected": -3.7937231063842773, "loss": 0.7167, "nll_loss": 0.6850033402442932, "rewards/accuracies": 0.875, "rewards/chosen": -0.2369207739830017, "rewards/margins": 0.14245153963565826, "rewards/rejected": -0.3793722987174988, "step": 2641 }, { "epoch": 7.233401779603011, "grad_norm": 3.6976044178009033, "learning_rate": 6.382191780821917e-07, "log_odds_chosen": 2.4461610317230225, "log_odds_ratio": -0.2642771601676941, "logits/chosen": 0.9480171203613281, "logits/rejected": 0.9521628022193909, "logps/chosen": -2.12515926361084, "logps/rejected": -4.460778713226318, "loss": 0.8359, "nll_loss": 0.809479296207428, "rewards/accuracies": 0.875, "rewards/chosen": -0.2125159204006195, "rewards/margins": 0.23356196284294128, "rewards/rejected": -0.4460778832435608, "step": 2642 }, { "epoch": 7.236139630390143, "grad_norm": 4.660993576049805, "learning_rate": 6.380821917808219e-07, "log_odds_chosen": 1.6752456426620483, "log_odds_ratio": -0.40432101488113403, "logits/chosen": 0.7291402816772461, "logits/rejected": 0.6954388618469238, "logps/chosen": -2.268263578414917, "logps/rejected": -3.8485679626464844, "loss": 0.8499, "nll_loss": 0.8094188570976257, "rewards/accuracies": 0.625, "rewards/chosen": -0.22682633996009827, "rewards/margins": 0.1580304503440857, "rewards/rejected": -0.38485682010650635, "step": 2643 }, { "epoch": 7.238877481177276, "grad_norm": 3.729316473007202, "learning_rate": 6.37945205479452e-07, "log_odds_chosen": 2.9414141178131104, "log_odds_ratio": -0.19021004438400269, "logits/chosen": 0.8732112646102905, "logits/rejected": 0.8475368022918701, "logps/chosen": -2.025585412979126, "logps/rejected": -4.836868762969971, "loss": 0.7164, "nll_loss": 0.6974243521690369, "rewards/accuracies": 1.0, "rewards/chosen": -0.20255854725837708, "rewards/margins": 0.28112831711769104, "rewards/rejected": -0.4836868643760681, "step": 2644 }, { "epoch": 7.241615331964408, "grad_norm": 4.691531658172607, "learning_rate": 6.378082191780821e-07, "log_odds_chosen": 2.28025484085083, "log_odds_ratio": -0.32002198696136475, "logits/chosen": 0.8895549774169922, "logits/rejected": 0.8763532638549805, "logps/chosen": -2.2204437255859375, "logps/rejected": -4.438933849334717, "loss": 0.7755, "nll_loss": 0.74348384141922, "rewards/accuracies": 0.875, "rewards/chosen": -0.22204437851905823, "rewards/margins": 0.2218490093946457, "rewards/rejected": -0.4438933730125427, "step": 2645 }, { "epoch": 7.24435318275154, "grad_norm": 4.75288724899292, "learning_rate": 6.376712328767123e-07, "log_odds_chosen": 1.8323261737823486, "log_odds_ratio": -0.3675229549407959, "logits/chosen": 0.7925227880477905, "logits/rejected": 0.788293719291687, "logps/chosen": -2.53025484085083, "logps/rejected": -4.29008150100708, "loss": 0.8827, "nll_loss": 0.8459798097610474, "rewards/accuracies": 0.875, "rewards/chosen": -0.25302547216415405, "rewards/margins": 0.17598269879817963, "rewards/rejected": -0.4290081858634949, "step": 2646 }, { "epoch": 7.2470910335386725, "grad_norm": 5.2827935218811035, "learning_rate": 6.375342465753424e-07, "log_odds_chosen": 1.8973822593688965, "log_odds_ratio": -0.489865243434906, "logits/chosen": 0.8730176687240601, "logits/rejected": 0.8732256889343262, "logps/chosen": -2.9152979850769043, "logps/rejected": -4.713884353637695, "loss": 0.8084, "nll_loss": 0.759397029876709, "rewards/accuracies": 0.75, "rewards/chosen": -0.2915298044681549, "rewards/margins": 0.17985862493515015, "rewards/rejected": -0.47138845920562744, "step": 2647 }, { "epoch": 7.2498288843258045, "grad_norm": 4.119657516479492, "learning_rate": 6.373972602739725e-07, "log_odds_chosen": 1.5683659315109253, "log_odds_ratio": -0.24729344248771667, "logits/chosen": 0.8085891604423523, "logits/rejected": 0.7185418009757996, "logps/chosen": -2.187870502471924, "logps/rejected": -3.637502670288086, "loss": 0.7773, "nll_loss": 0.7525251507759094, "rewards/accuracies": 1.0, "rewards/chosen": -0.2187870740890503, "rewards/margins": 0.14496320486068726, "rewards/rejected": -0.36375027894973755, "step": 2648 }, { "epoch": 7.252566735112937, "grad_norm": 4.857478141784668, "learning_rate": 6.372602739726027e-07, "log_odds_chosen": 1.9197360277175903, "log_odds_ratio": -0.24267365038394928, "logits/chosen": 0.6814298033714294, "logits/rejected": 0.7055261731147766, "logps/chosen": -2.644644260406494, "logps/rejected": -4.390348434448242, "loss": 0.8314, "nll_loss": 0.8071476817131042, "rewards/accuracies": 0.875, "rewards/chosen": -0.26446443796157837, "rewards/margins": 0.17457038164138794, "rewards/rejected": -0.4390348196029663, "step": 2649 }, { "epoch": 7.255304585900069, "grad_norm": 4.296270847320557, "learning_rate": 6.371232876712328e-07, "log_odds_chosen": 2.263303518295288, "log_odds_ratio": -0.23184481263160706, "logits/chosen": 0.609477162361145, "logits/rejected": 0.6167259812355042, "logps/chosen": -2.38582181930542, "logps/rejected": -4.509830474853516, "loss": 0.8878, "nll_loss": 0.8645845651626587, "rewards/accuracies": 1.0, "rewards/chosen": -0.23858219385147095, "rewards/margins": 0.21240077912807465, "rewards/rejected": -0.4509830176830292, "step": 2650 }, { "epoch": 7.258042436687201, "grad_norm": 4.325334072113037, "learning_rate": 6.369863013698629e-07, "log_odds_chosen": 3.0875070095062256, "log_odds_ratio": -0.14029553532600403, "logits/chosen": 0.9495190382003784, "logits/rejected": 0.9927242398262024, "logps/chosen": -2.551985502243042, "logps/rejected": -5.507082939147949, "loss": 0.7317, "nll_loss": 0.7176597118377686, "rewards/accuracies": 0.875, "rewards/chosen": -0.25519853830337524, "rewards/margins": 0.2955097556114197, "rewards/rejected": -0.5507082939147949, "step": 2651 }, { "epoch": 7.260780287474333, "grad_norm": 3.986396312713623, "learning_rate": 6.368493150684931e-07, "log_odds_chosen": 1.9707796573638916, "log_odds_ratio": -0.33241748809814453, "logits/chosen": 0.7347120046615601, "logits/rejected": 0.767963171005249, "logps/chosen": -2.354024887084961, "logps/rejected": -4.2223100662231445, "loss": 0.8715, "nll_loss": 0.8382899761199951, "rewards/accuracies": 0.75, "rewards/chosen": -0.23540246486663818, "rewards/margins": 0.18682855367660522, "rewards/rejected": -0.4222310185432434, "step": 2652 }, { "epoch": 7.263518138261465, "grad_norm": 4.2595038414001465, "learning_rate": 6.367123287671232e-07, "log_odds_chosen": 1.5033305883407593, "log_odds_ratio": -0.4602694511413574, "logits/chosen": 0.8767430782318115, "logits/rejected": 0.7611216902732849, "logps/chosen": -1.8249990940093994, "logps/rejected": -3.2749905586242676, "loss": 0.8289, "nll_loss": 0.7828553915023804, "rewards/accuracies": 0.625, "rewards/chosen": -0.18249990046024323, "rewards/margins": 0.14499914646148682, "rewards/rejected": -0.32749903202056885, "step": 2653 }, { "epoch": 7.266255989048597, "grad_norm": 4.200562953948975, "learning_rate": 6.365753424657534e-07, "log_odds_chosen": 0.6974858641624451, "log_odds_ratio": -0.4178618788719177, "logits/chosen": 0.6472984552383423, "logits/rejected": 0.6602538228034973, "logps/chosen": -2.8130574226379395, "logps/rejected": -3.4608521461486816, "loss": 0.8896, "nll_loss": 0.8478080034255981, "rewards/accuracies": 1.0, "rewards/chosen": -0.281305730342865, "rewards/margins": 0.06477948278188705, "rewards/rejected": -0.34608519077301025, "step": 2654 }, { "epoch": 7.268993839835729, "grad_norm": 4.050461292266846, "learning_rate": 6.364383561643835e-07, "log_odds_chosen": 1.2963576316833496, "log_odds_ratio": -0.4724075496196747, "logits/chosen": 0.7906375527381897, "logits/rejected": 0.6600274443626404, "logps/chosen": -1.6644742488861084, "logps/rejected": -2.864553928375244, "loss": 0.8617, "nll_loss": 0.8144170045852661, "rewards/accuracies": 0.875, "rewards/chosen": -0.16644743084907532, "rewards/margins": 0.12000797688961029, "rewards/rejected": -0.2864553928375244, "step": 2655 }, { "epoch": 7.271731690622861, "grad_norm": 5.062902927398682, "learning_rate": 6.363013698630136e-07, "log_odds_chosen": 2.7326366901397705, "log_odds_ratio": -0.2681933641433716, "logits/chosen": 0.9200147390365601, "logits/rejected": 0.8857675790786743, "logps/chosen": -2.605712413787842, "logps/rejected": -5.1734161376953125, "loss": 0.7133, "nll_loss": 0.6865109801292419, "rewards/accuracies": 0.875, "rewards/chosen": -0.2605712413787842, "rewards/margins": 0.25677037239074707, "rewards/rejected": -0.5173416137695312, "step": 2656 }, { "epoch": 7.274469541409993, "grad_norm": 4.587258338928223, "learning_rate": 6.361643835616438e-07, "log_odds_chosen": 2.420982837677002, "log_odds_ratio": -0.23106229305267334, "logits/chosen": 0.7967817783355713, "logits/rejected": 0.8227527141571045, "logps/chosen": -2.675999164581299, "logps/rejected": -4.974282741546631, "loss": 0.8774, "nll_loss": 0.8542894721031189, "rewards/accuracies": 1.0, "rewards/chosen": -0.2675999402999878, "rewards/margins": 0.229828342795372, "rewards/rejected": -0.4974282681941986, "step": 2657 }, { "epoch": 7.277207392197125, "grad_norm": 3.797391891479492, "learning_rate": 6.36027397260274e-07, "log_odds_chosen": 1.8058750629425049, "log_odds_ratio": -0.2796396017074585, "logits/chosen": 0.5973929166793823, "logits/rejected": 0.5951627492904663, "logps/chosen": -1.9075629711151123, "logps/rejected": -3.606369972229004, "loss": 0.7179, "nll_loss": 0.6899121403694153, "rewards/accuracies": 0.875, "rewards/chosen": -0.19075629115104675, "rewards/margins": 0.169880673289299, "rewards/rejected": -0.36063694953918457, "step": 2658 }, { "epoch": 7.279945242984257, "grad_norm": 3.4648478031158447, "learning_rate": 6.35890410958904e-07, "log_odds_chosen": 2.2232728004455566, "log_odds_ratio": -0.22454559803009033, "logits/chosen": 0.9929342269897461, "logits/rejected": 0.9741212129592896, "logps/chosen": -2.0315308570861816, "logps/rejected": -4.143309593200684, "loss": 0.7726, "nll_loss": 0.7500966191291809, "rewards/accuracies": 1.0, "rewards/chosen": -0.2031531035900116, "rewards/margins": 0.21117788553237915, "rewards/rejected": -0.41433095932006836, "step": 2659 }, { "epoch": 7.282683093771389, "grad_norm": 4.450504779815674, "learning_rate": 6.357534246575343e-07, "log_odds_chosen": 1.569445013999939, "log_odds_ratio": -0.2864174246788025, "logits/chosen": 0.9948694109916687, "logits/rejected": 1.0132704973220825, "logps/chosen": -2.5591437816619873, "logps/rejected": -3.99948787689209, "loss": 0.8174, "nll_loss": 0.7887689471244812, "rewards/accuracies": 0.875, "rewards/chosen": -0.2559143900871277, "rewards/margins": 0.14403438568115234, "rewards/rejected": -0.39994877576828003, "step": 2660 }, { "epoch": 7.285420944558521, "grad_norm": 5.830663204193115, "learning_rate": 6.356164383561645e-07, "log_odds_chosen": 1.4303157329559326, "log_odds_ratio": -0.5077128410339355, "logits/chosen": 0.9471533298492432, "logits/rejected": 0.9561129808425903, "logps/chosen": -2.8490915298461914, "logps/rejected": -4.221749305725098, "loss": 0.8286, "nll_loss": 0.777786135673523, "rewards/accuracies": 0.875, "rewards/chosen": -0.28490912914276123, "rewards/margins": 0.13726577162742615, "rewards/rejected": -0.4221749007701874, "step": 2661 }, { "epoch": 7.2881587953456535, "grad_norm": 6.102592468261719, "learning_rate": 6.354794520547944e-07, "log_odds_chosen": 0.5298815965652466, "log_odds_ratio": -0.579016923904419, "logits/chosen": 1.056031346321106, "logits/rejected": 1.0354273319244385, "logps/chosen": -2.9655160903930664, "logps/rejected": -3.4503860473632812, "loss": 0.8531, "nll_loss": 0.7951564788818359, "rewards/accuracies": 0.75, "rewards/chosen": -0.2965516149997711, "rewards/margins": 0.048486996442079544, "rewards/rejected": -0.34503862261772156, "step": 2662 }, { "epoch": 7.2908966461327855, "grad_norm": 4.123968124389648, "learning_rate": 6.353424657534247e-07, "log_odds_chosen": 1.9431695938110352, "log_odds_ratio": -0.2871924638748169, "logits/chosen": 1.0091993808746338, "logits/rejected": 1.0446157455444336, "logps/chosen": -2.387281656265259, "logps/rejected": -4.233518600463867, "loss": 0.8553, "nll_loss": 0.8265931010246277, "rewards/accuracies": 0.875, "rewards/chosen": -0.23872816562652588, "rewards/margins": 0.18462368845939636, "rewards/rejected": -0.42335188388824463, "step": 2663 }, { "epoch": 7.293634496919918, "grad_norm": 6.565540313720703, "learning_rate": 6.352054794520548e-07, "log_odds_chosen": 0.38597869873046875, "log_odds_ratio": -0.8982707262039185, "logits/chosen": 0.6368855834007263, "logits/rejected": 0.6641284227371216, "logps/chosen": -2.77075457572937, "logps/rejected": -3.0886120796203613, "loss": 0.8962, "nll_loss": 0.8063331246376038, "rewards/accuracies": 0.75, "rewards/chosen": -0.27707546949386597, "rewards/margins": 0.03178572654724121, "rewards/rejected": -0.3088611960411072, "step": 2664 }, { "epoch": 7.29637234770705, "grad_norm": 4.520362854003906, "learning_rate": 6.350684931506849e-07, "log_odds_chosen": 1.5162550210952759, "log_odds_ratio": -0.3014686703681946, "logits/chosen": 0.8434170484542847, "logits/rejected": 0.8175273537635803, "logps/chosen": -2.8099725246429443, "logps/rejected": -4.2751874923706055, "loss": 0.8691, "nll_loss": 0.8389092683792114, "rewards/accuracies": 0.875, "rewards/chosen": -0.28099724650382996, "rewards/margins": 0.14652150869369507, "rewards/rejected": -0.4275187849998474, "step": 2665 }, { "epoch": 7.299110198494182, "grad_norm": 4.6227850914001465, "learning_rate": 6.349315068493151e-07, "log_odds_chosen": 2.348996639251709, "log_odds_ratio": -0.21317893266677856, "logits/chosen": 0.6725765466690063, "logits/rejected": 0.6078412532806396, "logps/chosen": -1.8041703701019287, "logps/rejected": -3.995828151702881, "loss": 0.7973, "nll_loss": 0.7759348154067993, "rewards/accuracies": 1.0, "rewards/chosen": -0.1804170310497284, "rewards/margins": 0.2191658318042755, "rewards/rejected": -0.3995828628540039, "step": 2666 }, { "epoch": 7.301848049281314, "grad_norm": 4.200307369232178, "learning_rate": 6.347945205479452e-07, "log_odds_chosen": 1.426497459411621, "log_odds_ratio": -0.357149213552475, "logits/chosen": 1.0574668645858765, "logits/rejected": 1.0365872383117676, "logps/chosen": -2.696735143661499, "logps/rejected": -4.052423000335693, "loss": 0.7523, "nll_loss": 0.7165897488594055, "rewards/accuracies": 0.875, "rewards/chosen": -0.26967352628707886, "rewards/margins": 0.1355687975883484, "rewards/rejected": -0.40524232387542725, "step": 2667 }, { "epoch": 7.304585900068446, "grad_norm": 4.422375679016113, "learning_rate": 6.346575342465754e-07, "log_odds_chosen": 0.6832389831542969, "log_odds_ratio": -0.4406825304031372, "logits/chosen": 0.8239833116531372, "logits/rejected": 0.791789710521698, "logps/chosen": -1.9541312456130981, "logps/rejected": -2.552736282348633, "loss": 0.7896, "nll_loss": 0.7455068230628967, "rewards/accuracies": 0.875, "rewards/chosen": -0.19541311264038086, "rewards/margins": 0.059860505163669586, "rewards/rejected": -0.25527364015579224, "step": 2668 }, { "epoch": 7.307323750855579, "grad_norm": 4.808262825012207, "learning_rate": 6.345205479452055e-07, "log_odds_chosen": 1.9352514743804932, "log_odds_ratio": -0.2866981327533722, "logits/chosen": 1.0053915977478027, "logits/rejected": 1.0078492164611816, "logps/chosen": -2.4128341674804688, "logps/rejected": -4.269181251525879, "loss": 0.8064, "nll_loss": 0.7776987552642822, "rewards/accuracies": 1.0, "rewards/chosen": -0.24128341674804688, "rewards/margins": 0.18563470244407654, "rewards/rejected": -0.4269181191921234, "step": 2669 }, { "epoch": 7.31006160164271, "grad_norm": 4.547852039337158, "learning_rate": 6.343835616438356e-07, "log_odds_chosen": 1.7149628400802612, "log_odds_ratio": -0.3490750193595886, "logits/chosen": 0.8514676690101624, "logits/rejected": 0.8975948691368103, "logps/chosen": -2.2636303901672363, "logps/rejected": -3.903872489929199, "loss": 0.8016, "nll_loss": 0.7667085528373718, "rewards/accuracies": 0.875, "rewards/chosen": -0.22636303305625916, "rewards/margins": 0.1640242338180542, "rewards/rejected": -0.39038726687431335, "step": 2670 }, { "epoch": 7.312799452429843, "grad_norm": 6.306236743927002, "learning_rate": 6.342465753424658e-07, "log_odds_chosen": 0.7625901699066162, "log_odds_ratio": -0.510204017162323, "logits/chosen": 0.8580019474029541, "logits/rejected": 0.8282712697982788, "logps/chosen": -2.2293026447296143, "logps/rejected": -2.8927855491638184, "loss": 0.7733, "nll_loss": 0.7222645282745361, "rewards/accuracies": 0.875, "rewards/chosen": -0.22293028235435486, "rewards/margins": 0.06634830683469772, "rewards/rejected": -0.2892785668373108, "step": 2671 }, { "epoch": 7.315537303216975, "grad_norm": 4.853222846984863, "learning_rate": 6.341095890410959e-07, "log_odds_chosen": 1.7240961790084839, "log_odds_ratio": -0.29264479875564575, "logits/chosen": 0.7967309951782227, "logits/rejected": 0.7946847677230835, "logps/chosen": -2.0314278602600098, "logps/rejected": -3.597727060317993, "loss": 0.7519, "nll_loss": 0.7226797938346863, "rewards/accuracies": 0.875, "rewards/chosen": -0.20314279198646545, "rewards/margins": 0.15662989020347595, "rewards/rejected": -0.3597726821899414, "step": 2672 }, { "epoch": 7.318275154004107, "grad_norm": 4.334230899810791, "learning_rate": 6.33972602739726e-07, "log_odds_chosen": 1.0307427644729614, "log_odds_ratio": -0.3717058002948761, "logits/chosen": 0.6005128622055054, "logits/rejected": 0.6391627192497253, "logps/chosen": -2.1970129013061523, "logps/rejected": -3.0851991176605225, "loss": 0.9104, "nll_loss": 0.8732649683952332, "rewards/accuracies": 0.875, "rewards/chosen": -0.21970129013061523, "rewards/margins": 0.08881863206624985, "rewards/rejected": -0.30851995944976807, "step": 2673 }, { "epoch": 7.321013004791239, "grad_norm": 5.810070991516113, "learning_rate": 6.338356164383562e-07, "log_odds_chosen": 1.4285815954208374, "log_odds_ratio": -0.6008732318878174, "logits/chosen": 0.8112507462501526, "logits/rejected": 0.9558950662612915, "logps/chosen": -2.5072102546691895, "logps/rejected": -3.887852668762207, "loss": 0.7505, "nll_loss": 0.6903722286224365, "rewards/accuracies": 0.625, "rewards/chosen": -0.2507210373878479, "rewards/margins": 0.1380642056465149, "rewards/rejected": -0.3887852430343628, "step": 2674 }, { "epoch": 7.323750855578371, "grad_norm": 7.279299259185791, "learning_rate": 6.336986301369864e-07, "log_odds_chosen": 2.011960983276367, "log_odds_ratio": -0.5551908016204834, "logits/chosen": 1.237203598022461, "logits/rejected": 1.2334681749343872, "logps/chosen": -3.0026755332946777, "logps/rejected": -4.922576904296875, "loss": 0.8094, "nll_loss": 0.7538847923278809, "rewards/accuracies": 0.875, "rewards/chosen": -0.3002675771713257, "rewards/margins": 0.19199012219905853, "rewards/rejected": -0.4922577142715454, "step": 2675 }, { "epoch": 7.326488706365503, "grad_norm": 4.951963901519775, "learning_rate": 6.335616438356164e-07, "log_odds_chosen": 0.9747958779335022, "log_odds_ratio": -0.517542839050293, "logits/chosen": 0.8342776298522949, "logits/rejected": 0.9003663063049316, "logps/chosen": -2.8188109397888184, "logps/rejected": -3.7797560691833496, "loss": 0.8065, "nll_loss": 0.7547688484191895, "rewards/accuracies": 0.625, "rewards/chosen": -0.28188109397888184, "rewards/margins": 0.0960945188999176, "rewards/rejected": -0.3779756426811218, "step": 2676 }, { "epoch": 7.329226557152635, "grad_norm": 4.354272365570068, "learning_rate": 6.334246575342466e-07, "log_odds_chosen": 1.4341809749603271, "log_odds_ratio": -0.3302532434463501, "logits/chosen": 0.9532480239868164, "logits/rejected": 0.9030503034591675, "logps/chosen": -2.2537717819213867, "logps/rejected": -3.6020302772521973, "loss": 0.8417, "nll_loss": 0.8086791634559631, "rewards/accuracies": 0.875, "rewards/chosen": -0.2253771871328354, "rewards/margins": 0.13482585549354553, "rewards/rejected": -0.3602030277252197, "step": 2677 }, { "epoch": 7.3319644079397674, "grad_norm": 4.885953903198242, "learning_rate": 6.332876712328767e-07, "log_odds_chosen": 1.4277325868606567, "log_odds_ratio": -0.540594220161438, "logits/chosen": 0.8641065955162048, "logits/rejected": 0.8180487155914307, "logps/chosen": -2.256821632385254, "logps/rejected": -3.60280704498291, "loss": 0.7875, "nll_loss": 0.7334147095680237, "rewards/accuracies": 0.875, "rewards/chosen": -0.22568215429782867, "rewards/margins": 0.1345985233783722, "rewards/rejected": -0.36028069257736206, "step": 2678 }, { "epoch": 7.3347022587268995, "grad_norm": 3.8877205848693848, "learning_rate": 6.331506849315068e-07, "log_odds_chosen": 1.3348133563995361, "log_odds_ratio": -0.4902799725532532, "logits/chosen": 0.6012358665466309, "logits/rejected": 0.5759182572364807, "logps/chosen": -2.2558236122131348, "logps/rejected": -3.569978713989258, "loss": 0.8653, "nll_loss": 0.8162623047828674, "rewards/accuracies": 0.875, "rewards/chosen": -0.22558239102363586, "rewards/margins": 0.13141551613807678, "rewards/rejected": -0.35699787735939026, "step": 2679 }, { "epoch": 7.337440109514032, "grad_norm": 3.9437925815582275, "learning_rate": 6.33013698630137e-07, "log_odds_chosen": 2.421889543533325, "log_odds_ratio": -0.26590025424957275, "logits/chosen": 0.9923614859580994, "logits/rejected": 0.9988902807235718, "logps/chosen": -2.2995245456695557, "logps/rejected": -4.63926887512207, "loss": 0.7308, "nll_loss": 0.70418381690979, "rewards/accuracies": 0.875, "rewards/chosen": -0.22995245456695557, "rewards/margins": 0.23397448658943176, "rewards/rejected": -0.46392691135406494, "step": 2680 }, { "epoch": 7.340177960301164, "grad_norm": 4.400112628936768, "learning_rate": 6.328767123287671e-07, "log_odds_chosen": 1.6135131120681763, "log_odds_ratio": -0.3983084261417389, "logits/chosen": 0.7344892024993896, "logits/rejected": 0.6935135126113892, "logps/chosen": -1.902234435081482, "logps/rejected": -3.3834362030029297, "loss": 0.7556, "nll_loss": 0.7157978415489197, "rewards/accuracies": 0.875, "rewards/chosen": -0.19022345542907715, "rewards/margins": 0.14812017977237701, "rewards/rejected": -0.33834362030029297, "step": 2681 }, { "epoch": 7.342915811088296, "grad_norm": 4.3230814933776855, "learning_rate": 6.327397260273972e-07, "log_odds_chosen": 1.7290549278259277, "log_odds_ratio": -0.3579327464103699, "logits/chosen": 0.8451628684997559, "logits/rejected": 0.8079275488853455, "logps/chosen": -2.424368381500244, "logps/rejected": -4.092197418212891, "loss": 0.8216, "nll_loss": 0.785772979259491, "rewards/accuracies": 0.875, "rewards/chosen": -0.24243685603141785, "rewards/margins": 0.16678285598754883, "rewards/rejected": -0.40921974182128906, "step": 2682 }, { "epoch": 7.345653661875428, "grad_norm": 4.381768703460693, "learning_rate": 6.326027397260274e-07, "log_odds_chosen": 2.042180061340332, "log_odds_ratio": -0.31069034337997437, "logits/chosen": 1.0698870420455933, "logits/rejected": 1.1499905586242676, "logps/chosen": -2.3554744720458984, "logps/rejected": -4.299440860748291, "loss": 0.8995, "nll_loss": 0.8684307336807251, "rewards/accuracies": 0.75, "rewards/chosen": -0.23554743826389313, "rewards/margins": 0.19439662992954254, "rewards/rejected": -0.42994409799575806, "step": 2683 }, { "epoch": 7.34839151266256, "grad_norm": 4.480532169342041, "learning_rate": 6.324657534246575e-07, "log_odds_chosen": 0.7075832486152649, "log_odds_ratio": -0.45582664012908936, "logits/chosen": 0.7286124229431152, "logits/rejected": 0.810153067111969, "logps/chosen": -2.664802074432373, "logps/rejected": -3.3161709308624268, "loss": 0.7748, "nll_loss": 0.7292183041572571, "rewards/accuracies": 0.875, "rewards/chosen": -0.2664802074432373, "rewards/margins": 0.06513689458370209, "rewards/rejected": -0.3316171169281006, "step": 2684 }, { "epoch": 7.351129363449692, "grad_norm": 4.677982807159424, "learning_rate": 6.323287671232877e-07, "log_odds_chosen": 2.502483367919922, "log_odds_ratio": -0.1564922332763672, "logits/chosen": 1.0119082927703857, "logits/rejected": 1.0364383459091187, "logps/chosen": -2.8825817108154297, "logps/rejected": -5.302402496337891, "loss": 0.7981, "nll_loss": 0.7824694514274597, "rewards/accuracies": 1.0, "rewards/chosen": -0.2882581949234009, "rewards/margins": 0.241982102394104, "rewards/rejected": -0.5302402973175049, "step": 2685 }, { "epoch": 7.353867214236824, "grad_norm": 4.437484264373779, "learning_rate": 6.321917808219178e-07, "log_odds_chosen": 1.5132912397384644, "log_odds_ratio": -0.33006834983825684, "logits/chosen": 0.8360993266105652, "logits/rejected": 0.8807529211044312, "logps/chosen": -1.8761765956878662, "logps/rejected": -3.287875175476074, "loss": 0.7241, "nll_loss": 0.6910938024520874, "rewards/accuracies": 0.875, "rewards/chosen": -0.18761767446994781, "rewards/margins": 0.14116986095905304, "rewards/rejected": -0.32878753542900085, "step": 2686 }, { "epoch": 7.356605065023956, "grad_norm": 6.4911723136901855, "learning_rate": 6.320547945205479e-07, "log_odds_chosen": 2.375217914581299, "log_odds_ratio": -0.18415962159633636, "logits/chosen": 1.0946359634399414, "logits/rejected": 1.1308718919754028, "logps/chosen": -2.405757427215576, "logps/rejected": -4.663476943969727, "loss": 0.7656, "nll_loss": 0.7472325563430786, "rewards/accuracies": 1.0, "rewards/chosen": -0.24057577550411224, "rewards/margins": 0.225771963596344, "rewards/rejected": -0.46634772419929504, "step": 2687 }, { "epoch": 7.359342915811088, "grad_norm": 5.23626184463501, "learning_rate": 6.319178082191781e-07, "log_odds_chosen": 1.5761256217956543, "log_odds_ratio": -0.5633969306945801, "logits/chosen": 1.0706979036331177, "logits/rejected": 1.0851590633392334, "logps/chosen": -2.3077454566955566, "logps/rejected": -3.698751926422119, "loss": 0.8566, "nll_loss": 0.800304651260376, "rewards/accuracies": 0.75, "rewards/chosen": -0.23077455163002014, "rewards/margins": 0.13910065591335297, "rewards/rejected": -0.3698751926422119, "step": 2688 }, { "epoch": 7.36208076659822, "grad_norm": 8.272695541381836, "learning_rate": 6.317808219178082e-07, "log_odds_chosen": 0.6166085600852966, "log_odds_ratio": -0.6534323692321777, "logits/chosen": 0.9181314706802368, "logits/rejected": 0.8861243724822998, "logps/chosen": -2.6971845626831055, "logps/rejected": -3.2532198429107666, "loss": 0.8382, "nll_loss": 0.7728791236877441, "rewards/accuracies": 0.75, "rewards/chosen": -0.2697184681892395, "rewards/margins": 0.0556035116314888, "rewards/rejected": -0.3253219723701477, "step": 2689 }, { "epoch": 7.364818617385352, "grad_norm": 4.425479888916016, "learning_rate": 6.316438356164383e-07, "log_odds_chosen": 1.6801488399505615, "log_odds_ratio": -0.271969735622406, "logits/chosen": 0.9118146896362305, "logits/rejected": 0.9942123293876648, "logps/chosen": -2.601919412612915, "logps/rejected": -4.197791576385498, "loss": 0.7565, "nll_loss": 0.7292697429656982, "rewards/accuracies": 1.0, "rewards/chosen": -0.260191947221756, "rewards/margins": 0.15958723425865173, "rewards/rejected": -0.4197791814804077, "step": 2690 }, { "epoch": 7.367556468172484, "grad_norm": 4.448155879974365, "learning_rate": 6.315068493150685e-07, "log_odds_chosen": 4.046303749084473, "log_odds_ratio": -0.19543328881263733, "logits/chosen": 1.0094678401947021, "logits/rejected": 1.0971760749816895, "logps/chosen": -2.8927736282348633, "logps/rejected": -6.865090370178223, "loss": 0.749, "nll_loss": 0.7294566631317139, "rewards/accuracies": 1.0, "rewards/chosen": -0.2892773449420929, "rewards/margins": 0.39723172783851624, "rewards/rejected": -0.6865091323852539, "step": 2691 }, { "epoch": 7.370294318959616, "grad_norm": 4.805701732635498, "learning_rate": 6.313698630136987e-07, "log_odds_chosen": 1.7700748443603516, "log_odds_ratio": -0.24359318614006042, "logits/chosen": 0.8337754607200623, "logits/rejected": 0.8902534246444702, "logps/chosen": -2.4903225898742676, "logps/rejected": -4.175999641418457, "loss": 0.7858, "nll_loss": 0.7614158987998962, "rewards/accuracies": 1.0, "rewards/chosen": -0.24903222918510437, "rewards/margins": 0.1685677021741867, "rewards/rejected": -0.41759997606277466, "step": 2692 }, { "epoch": 7.3730321697467485, "grad_norm": 5.5198869705200195, "learning_rate": 6.312328767123287e-07, "log_odds_chosen": 1.478753685951233, "log_odds_ratio": -0.31419092416763306, "logits/chosen": 1.1518529653549194, "logits/rejected": 1.2175434827804565, "logps/chosen": -2.4554107189178467, "logps/rejected": -3.87058162689209, "loss": 0.7378, "nll_loss": 0.7063975930213928, "rewards/accuracies": 0.875, "rewards/chosen": -0.24554108083248138, "rewards/margins": 0.14151708781719208, "rewards/rejected": -0.38705816864967346, "step": 2693 }, { "epoch": 7.3757700205338805, "grad_norm": 4.273677349090576, "learning_rate": 6.310958904109589e-07, "log_odds_chosen": 1.1106013059616089, "log_odds_ratio": -0.40788280963897705, "logits/chosen": 0.8223512768745422, "logits/rejected": 0.7457243204116821, "logps/chosen": -1.78963303565979, "logps/rejected": -2.808884620666504, "loss": 0.7504, "nll_loss": 0.7096418142318726, "rewards/accuracies": 0.75, "rewards/chosen": -0.178963303565979, "rewards/margins": 0.10192517191171646, "rewards/rejected": -0.28088849782943726, "step": 2694 }, { "epoch": 7.378507871321013, "grad_norm": 4.047491550445557, "learning_rate": 6.30958904109589e-07, "log_odds_chosen": 0.9794191718101501, "log_odds_ratio": -0.5022236704826355, "logits/chosen": 0.6880984902381897, "logits/rejected": 0.7049282193183899, "logps/chosen": -1.875662088394165, "logps/rejected": -2.7610676288604736, "loss": 0.8149, "nll_loss": 0.7646486163139343, "rewards/accuracies": 0.75, "rewards/chosen": -0.18756622076034546, "rewards/margins": 0.08854055404663086, "rewards/rejected": -0.2761067748069763, "step": 2695 }, { "epoch": 7.381245722108146, "grad_norm": 3.90452241897583, "learning_rate": 6.308219178082191e-07, "log_odds_chosen": 1.7723495960235596, "log_odds_ratio": -0.2509116530418396, "logits/chosen": 1.0295684337615967, "logits/rejected": 1.0879048109054565, "logps/chosen": -1.9007089138031006, "logps/rejected": -3.5479137897491455, "loss": 0.6984, "nll_loss": 0.6732724905014038, "rewards/accuracies": 1.0, "rewards/chosen": -0.19007088243961334, "rewards/margins": 0.16472050547599792, "rewards/rejected": -0.35479140281677246, "step": 2696 }, { "epoch": 7.383983572895277, "grad_norm": 4.55617094039917, "learning_rate": 6.306849315068493e-07, "log_odds_chosen": 2.6017980575561523, "log_odds_ratio": -0.19945572316646576, "logits/chosen": 0.8341692090034485, "logits/rejected": 0.8174384832382202, "logps/chosen": -2.6659469604492188, "logps/rejected": -5.1886773109436035, "loss": 0.8998, "nll_loss": 0.879869282245636, "rewards/accuracies": 0.875, "rewards/chosen": -0.26659470796585083, "rewards/margins": 0.2522730231285095, "rewards/rejected": -0.5188677310943604, "step": 2697 }, { "epoch": 7.38672142368241, "grad_norm": 3.9466729164123535, "learning_rate": 6.305479452054794e-07, "log_odds_chosen": 1.6010297536849976, "log_odds_ratio": -0.39245226979255676, "logits/chosen": 0.6501043438911438, "logits/rejected": 0.6098666787147522, "logps/chosen": -2.0834789276123047, "logps/rejected": -3.6109707355499268, "loss": 0.7988, "nll_loss": 0.7595703601837158, "rewards/accuracies": 0.875, "rewards/chosen": -0.20834791660308838, "rewards/margins": 0.15274915099143982, "rewards/rejected": -0.3610970675945282, "step": 2698 }, { "epoch": 7.389459274469542, "grad_norm": 5.055842399597168, "learning_rate": 6.304109589041096e-07, "log_odds_chosen": 1.0136182308197021, "log_odds_ratio": -0.4077163338661194, "logits/chosen": 0.8214040398597717, "logits/rejected": 0.8114979863166809, "logps/chosen": -2.649902105331421, "logps/rejected": -3.575246572494507, "loss": 0.8368, "nll_loss": 0.7960318326950073, "rewards/accuracies": 1.0, "rewards/chosen": -0.2649902105331421, "rewards/margins": 0.09253446757793427, "rewards/rejected": -0.35752469301223755, "step": 2699 }, { "epoch": 7.392197125256674, "grad_norm": 4.717654705047607, "learning_rate": 6.302739726027397e-07, "log_odds_chosen": 2.09541392326355, "log_odds_ratio": -0.1846727430820465, "logits/chosen": 0.6787007451057434, "logits/rejected": 0.692462682723999, "logps/chosen": -2.101789951324463, "logps/rejected": -4.049800872802734, "loss": 0.7219, "nll_loss": 0.7034217715263367, "rewards/accuracies": 1.0, "rewards/chosen": -0.21017900109291077, "rewards/margins": 0.19480109214782715, "rewards/rejected": -0.4049801230430603, "step": 2700 }, { "epoch": 7.394934976043806, "grad_norm": 5.9810709953308105, "learning_rate": 6.301369863013698e-07, "log_odds_chosen": 0.4226093888282776, "log_odds_ratio": -0.775624692440033, "logits/chosen": 0.8068957924842834, "logits/rejected": 0.7705445885658264, "logps/chosen": -2.3112974166870117, "logps/rejected": -2.6859123706817627, "loss": 0.9179, "nll_loss": 0.8403093814849854, "rewards/accuracies": 0.75, "rewards/chosen": -0.2311297506093979, "rewards/margins": 0.03746148198843002, "rewards/rejected": -0.2685912251472473, "step": 2701 }, { "epoch": 7.397672826830938, "grad_norm": 6.049590110778809, "learning_rate": 6.3e-07, "log_odds_chosen": 2.339313507080078, "log_odds_ratio": -0.22223451733589172, "logits/chosen": 1.041397213935852, "logits/rejected": 1.1073790788650513, "logps/chosen": -2.3118436336517334, "logps/rejected": -4.5502519607543945, "loss": 0.7902, "nll_loss": 0.7679430246353149, "rewards/accuracies": 0.875, "rewards/chosen": -0.23118436336517334, "rewards/margins": 0.22384081780910492, "rewards/rejected": -0.45502519607543945, "step": 2702 }, { "epoch": 7.40041067761807, "grad_norm": 5.189583778381348, "learning_rate": 6.298630136986301e-07, "log_odds_chosen": 0.7153317332267761, "log_odds_ratio": -0.5427839756011963, "logits/chosen": 0.8327401876449585, "logits/rejected": 0.7622672319412231, "logps/chosen": -2.441169261932373, "logps/rejected": -3.043696403503418, "loss": 0.9096, "nll_loss": 0.855361819267273, "rewards/accuracies": 0.875, "rewards/chosen": -0.2441169023513794, "rewards/margins": 0.06025272235274315, "rewards/rejected": -0.30436962842941284, "step": 2703 }, { "epoch": 7.403148528405202, "grad_norm": 4.782167434692383, "learning_rate": 6.297260273972602e-07, "log_odds_chosen": 2.5715579986572266, "log_odds_ratio": -0.1433297097682953, "logits/chosen": 1.1153675317764282, "logits/rejected": 1.152463674545288, "logps/chosen": -3.086087226867676, "logps/rejected": -5.584924697875977, "loss": 0.8595, "nll_loss": 0.8451314568519592, "rewards/accuracies": 1.0, "rewards/chosen": -0.308608740568161, "rewards/margins": 0.2498837411403656, "rewards/rejected": -0.5584924817085266, "step": 2704 }, { "epoch": 7.405886379192334, "grad_norm": 3.85921049118042, "learning_rate": 6.295890410958904e-07, "log_odds_chosen": 1.7302815914154053, "log_odds_ratio": -0.34123432636260986, "logits/chosen": 0.7659311294555664, "logits/rejected": 0.7587507963180542, "logps/chosen": -1.968292236328125, "logps/rejected": -3.60304594039917, "loss": 0.8132, "nll_loss": 0.7791132926940918, "rewards/accuracies": 0.875, "rewards/chosen": -0.19682922959327698, "rewards/margins": 0.1634753793478012, "rewards/rejected": -0.360304594039917, "step": 2705 }, { "epoch": 7.408624229979466, "grad_norm": 5.9295878410339355, "learning_rate": 6.294520547945206e-07, "log_odds_chosen": 1.2373130321502686, "log_odds_ratio": -0.46120142936706543, "logits/chosen": 0.7585846185684204, "logits/rejected": 0.8289318680763245, "logps/chosen": -2.0815072059631348, "logps/rejected": -3.1954429149627686, "loss": 0.8203, "nll_loss": 0.7741566896438599, "rewards/accuracies": 0.875, "rewards/chosen": -0.208150714635849, "rewards/margins": 0.11139357089996338, "rewards/rejected": -0.3195442855358124, "step": 2706 }, { "epoch": 7.411362080766598, "grad_norm": 4.38479471206665, "learning_rate": 6.293150684931506e-07, "log_odds_chosen": 1.398427128791809, "log_odds_ratio": -0.2707165777683258, "logits/chosen": 1.1915979385375977, "logits/rejected": 1.244700312614441, "logps/chosen": -2.495762825012207, "logps/rejected": -3.843242645263672, "loss": 0.6848, "nll_loss": 0.6577584743499756, "rewards/accuracies": 1.0, "rewards/chosen": -0.24957627058029175, "rewards/margins": 0.1347479671239853, "rewards/rejected": -0.38432425260543823, "step": 2707 }, { "epoch": 7.41409993155373, "grad_norm": 4.468080043792725, "learning_rate": 6.291780821917808e-07, "log_odds_chosen": 2.73545503616333, "log_odds_ratio": -0.2195877581834793, "logits/chosen": 0.7992513179779053, "logits/rejected": 0.7937909364700317, "logps/chosen": -1.880176067352295, "logps/rejected": -4.486312389373779, "loss": 0.731, "nll_loss": 0.7090779542922974, "rewards/accuracies": 1.0, "rewards/chosen": -0.1880176067352295, "rewards/margins": 0.2606136202812195, "rewards/rejected": -0.448631227016449, "step": 2708 }, { "epoch": 7.416837782340862, "grad_norm": 3.8614413738250732, "learning_rate": 6.290410958904109e-07, "log_odds_chosen": 2.5646071434020996, "log_odds_ratio": -0.1899232417345047, "logits/chosen": 0.9573580026626587, "logits/rejected": 0.9713380932807922, "logps/chosen": -2.0410726070404053, "logps/rejected": -4.477877616882324, "loss": 0.7661, "nll_loss": 0.7471128702163696, "rewards/accuracies": 1.0, "rewards/chosen": -0.20410726964473724, "rewards/margins": 0.24368047714233398, "rewards/rejected": -0.4477877616882324, "step": 2709 }, { "epoch": 7.4195756331279945, "grad_norm": 4.799287796020508, "learning_rate": 6.28904109589041e-07, "log_odds_chosen": 1.7001285552978516, "log_odds_ratio": -0.43671414256095886, "logits/chosen": 0.971137523651123, "logits/rejected": 0.9797688126564026, "logps/chosen": -2.6498141288757324, "logps/rejected": -4.274746417999268, "loss": 0.8533, "nll_loss": 0.8096505999565125, "rewards/accuracies": 0.875, "rewards/chosen": -0.26498138904571533, "rewards/margins": 0.16249321401119232, "rewards/rejected": -0.42747464776039124, "step": 2710 }, { "epoch": 7.422313483915127, "grad_norm": 6.09520959854126, "learning_rate": 6.287671232876712e-07, "log_odds_chosen": 1.9024535417556763, "log_odds_ratio": -0.6550492644309998, "logits/chosen": 0.8246282339096069, "logits/rejected": 0.8644859194755554, "logps/chosen": -2.772145986557007, "logps/rejected": -4.554086208343506, "loss": 0.8631, "nll_loss": 0.797640323638916, "rewards/accuracies": 0.875, "rewards/chosen": -0.2772146165370941, "rewards/margins": 0.17819398641586304, "rewards/rejected": -0.45540860295295715, "step": 2711 }, { "epoch": 7.425051334702259, "grad_norm": 4.319340229034424, "learning_rate": 6.286301369863013e-07, "log_odds_chosen": 1.2083654403686523, "log_odds_ratio": -0.37441933155059814, "logits/chosen": 0.7943708300590515, "logits/rejected": 0.7710906863212585, "logps/chosen": -2.116144895553589, "logps/rejected": -3.2392420768737793, "loss": 0.7657, "nll_loss": 0.7282910346984863, "rewards/accuracies": 0.875, "rewards/chosen": -0.2116144895553589, "rewards/margins": 0.11230971664190292, "rewards/rejected": -0.3239242434501648, "step": 2712 }, { "epoch": 7.427789185489391, "grad_norm": 6.708675384521484, "learning_rate": 6.284931506849315e-07, "log_odds_chosen": 0.7611532211303711, "log_odds_ratio": -0.6191455721855164, "logits/chosen": 0.8520712852478027, "logits/rejected": 0.8093839287757874, "logps/chosen": -2.3022842407226562, "logps/rejected": -2.9281954765319824, "loss": 0.9136, "nll_loss": 0.85173499584198, "rewards/accuracies": 0.75, "rewards/chosen": -0.23022842407226562, "rewards/margins": 0.06259115040302277, "rewards/rejected": -0.2928195595741272, "step": 2713 }, { "epoch": 7.430527036276523, "grad_norm": 4.3938493728637695, "learning_rate": 6.283561643835616e-07, "log_odds_chosen": 1.1233420372009277, "log_odds_ratio": -0.351103276014328, "logits/chosen": 0.80732262134552, "logits/rejected": 0.7650240659713745, "logps/chosen": -2.1616992950439453, "logps/rejected": -3.189880609512329, "loss": 0.773, "nll_loss": 0.7379090189933777, "rewards/accuracies": 1.0, "rewards/chosen": -0.21616993844509125, "rewards/margins": 0.10281811654567719, "rewards/rejected": -0.31898805499076843, "step": 2714 }, { "epoch": 7.433264887063655, "grad_norm": 4.752907752990723, "learning_rate": 6.282191780821917e-07, "log_odds_chosen": 2.4068500995635986, "log_odds_ratio": -0.32987749576568604, "logits/chosen": 0.9608676433563232, "logits/rejected": 0.9954603910446167, "logps/chosen": -2.5062897205352783, "logps/rejected": -4.831007957458496, "loss": 0.8526, "nll_loss": 0.8196141123771667, "rewards/accuracies": 0.875, "rewards/chosen": -0.2506289780139923, "rewards/margins": 0.23247182369232178, "rewards/rejected": -0.4831008017063141, "step": 2715 }, { "epoch": 7.436002737850787, "grad_norm": 5.280570983886719, "learning_rate": 6.280821917808219e-07, "log_odds_chosen": 1.2737690210342407, "log_odds_ratio": -0.4547365605831146, "logits/chosen": 1.0466734170913696, "logits/rejected": 1.0859415531158447, "logps/chosen": -2.7406234741210938, "logps/rejected": -3.9899277687072754, "loss": 0.7847, "nll_loss": 0.7392034530639648, "rewards/accuracies": 0.625, "rewards/chosen": -0.2740623354911804, "rewards/margins": 0.12493042647838593, "rewards/rejected": -0.39899277687072754, "step": 2716 }, { "epoch": 7.438740588637919, "grad_norm": 6.367281436920166, "learning_rate": 6.27945205479452e-07, "log_odds_chosen": 2.7937581539154053, "log_odds_ratio": -0.7139416933059692, "logits/chosen": 0.7458867430686951, "logits/rejected": 0.7751380205154419, "logps/chosen": -2.3146605491638184, "logps/rejected": -5.008190631866455, "loss": 1.0494, "nll_loss": 0.9780557155609131, "rewards/accuracies": 0.5, "rewards/chosen": -0.23146605491638184, "rewards/margins": 0.2693530321121216, "rewards/rejected": -0.5008190870285034, "step": 2717 }, { "epoch": 7.441478439425051, "grad_norm": 5.401995658874512, "learning_rate": 6.278082191780821e-07, "log_odds_chosen": 2.4925270080566406, "log_odds_ratio": -0.29451361298561096, "logits/chosen": 0.8857646584510803, "logits/rejected": 0.8690915107727051, "logps/chosen": -2.2953734397888184, "logps/rejected": -4.634585380554199, "loss": 0.7733, "nll_loss": 0.7438588738441467, "rewards/accuracies": 0.875, "rewards/chosen": -0.22953733801841736, "rewards/margins": 0.2339211404323578, "rewards/rejected": -0.4634585380554199, "step": 2718 }, { "epoch": 7.444216290212183, "grad_norm": 4.318441867828369, "learning_rate": 6.276712328767123e-07, "log_odds_chosen": 0.8145653009414673, "log_odds_ratio": -0.47033384442329407, "logits/chosen": 0.8144984245300293, "logits/rejected": 0.8263585567474365, "logps/chosen": -2.730963945388794, "logps/rejected": -3.48057222366333, "loss": 0.7985, "nll_loss": 0.7514978647232056, "rewards/accuracies": 0.75, "rewards/chosen": -0.27309638261795044, "rewards/margins": 0.07496082782745361, "rewards/rejected": -0.34805721044540405, "step": 2719 }, { "epoch": 7.446954140999315, "grad_norm": 4.725257396697998, "learning_rate": 6.275342465753425e-07, "log_odds_chosen": 1.0434077978134155, "log_odds_ratio": -0.4403793215751648, "logits/chosen": 0.6462737917900085, "logits/rejected": 0.6275303959846497, "logps/chosen": -1.934306263923645, "logps/rejected": -2.8955445289611816, "loss": 0.833, "nll_loss": 0.7889608144760132, "rewards/accuracies": 0.75, "rewards/chosen": -0.19343064725399017, "rewards/margins": 0.09612381458282471, "rewards/rejected": -0.2895544469356537, "step": 2720 }, { "epoch": 7.449691991786447, "grad_norm": 4.720539093017578, "learning_rate": 6.273972602739725e-07, "log_odds_chosen": 1.0771149396896362, "log_odds_ratio": -0.7582836151123047, "logits/chosen": 0.5898509621620178, "logits/rejected": 0.68876713514328, "logps/chosen": -2.3459582328796387, "logps/rejected": -3.3518834114074707, "loss": 0.8035, "nll_loss": 0.727668285369873, "rewards/accuracies": 0.75, "rewards/chosen": -0.23459583520889282, "rewards/margins": 0.10059250891208649, "rewards/rejected": -0.3351883590221405, "step": 2721 }, { "epoch": 7.452429842573579, "grad_norm": 8.43017864227295, "learning_rate": 6.272602739726027e-07, "log_odds_chosen": 1.113289713859558, "log_odds_ratio": -0.4218001663684845, "logits/chosen": 1.1007287502288818, "logits/rejected": 1.1108403205871582, "logps/chosen": -3.394550323486328, "logps/rejected": -4.422171592712402, "loss": 0.8424, "nll_loss": 0.8002420663833618, "rewards/accuracies": 0.75, "rewards/chosen": -0.3394550681114197, "rewards/margins": 0.1027621403336525, "rewards/rejected": -0.44221723079681396, "step": 2722 }, { "epoch": 7.455167693360712, "grad_norm": 4.598719596862793, "learning_rate": 6.271232876712329e-07, "log_odds_chosen": 1.8264275789260864, "log_odds_ratio": -0.37907421588897705, "logits/chosen": 1.1437315940856934, "logits/rejected": 1.088181495666504, "logps/chosen": -2.6147713661193848, "logps/rejected": -4.397263526916504, "loss": 0.8593, "nll_loss": 0.8213999271392822, "rewards/accuracies": 0.75, "rewards/chosen": -0.26147714257240295, "rewards/margins": 0.17824919521808624, "rewards/rejected": -0.439726322889328, "step": 2723 }, { "epoch": 7.457905544147844, "grad_norm": 5.8296990394592285, "learning_rate": 6.269863013698629e-07, "log_odds_chosen": 1.367043137550354, "log_odds_ratio": -0.2923935353755951, "logits/chosen": 0.9134030342102051, "logits/rejected": 0.9085699319839478, "logps/chosen": -1.3611750602722168, "logps/rejected": -2.5199198722839355, "loss": 0.8076, "nll_loss": 0.7783173322677612, "rewards/accuracies": 0.875, "rewards/chosen": -0.13611751794815063, "rewards/margins": 0.11587447673082352, "rewards/rejected": -0.25199198722839355, "step": 2724 }, { "epoch": 7.460643394934976, "grad_norm": 6.157370090484619, "learning_rate": 6.268493150684931e-07, "log_odds_chosen": 1.4676284790039062, "log_odds_ratio": -0.4528038799762726, "logits/chosen": 0.9856998920440674, "logits/rejected": 0.9698488712310791, "logps/chosen": -2.327744722366333, "logps/rejected": -3.694044351577759, "loss": 0.7874, "nll_loss": 0.7421436309814453, "rewards/accuracies": 0.875, "rewards/chosen": -0.23277448117733002, "rewards/margins": 0.13662993907928467, "rewards/rejected": -0.3694044351577759, "step": 2725 }, { "epoch": 7.4633812457221085, "grad_norm": 5.1431145668029785, "learning_rate": 6.267123287671232e-07, "log_odds_chosen": 1.5120784044265747, "log_odds_ratio": -0.25612449645996094, "logits/chosen": 1.0263644456863403, "logits/rejected": 0.8792310357093811, "logps/chosen": -1.597306251525879, "logps/rejected": -2.937995672225952, "loss": 0.7571, "nll_loss": 0.7314795255661011, "rewards/accuracies": 1.0, "rewards/chosen": -0.15973064303398132, "rewards/margins": 0.13406893610954285, "rewards/rejected": -0.29379957914352417, "step": 2726 }, { "epoch": 7.4661190965092405, "grad_norm": 5.240860462188721, "learning_rate": 6.265753424657534e-07, "log_odds_chosen": 1.8558270931243896, "log_odds_ratio": -0.2909013032913208, "logits/chosen": 0.9888429045677185, "logits/rejected": 0.9082800149917603, "logps/chosen": -2.3397555351257324, "logps/rejected": -4.106328010559082, "loss": 0.8774, "nll_loss": 0.8483537435531616, "rewards/accuracies": 0.875, "rewards/chosen": -0.23397555947303772, "rewards/margins": 0.17665722966194153, "rewards/rejected": -0.41063278913497925, "step": 2727 }, { "epoch": 7.468856947296373, "grad_norm": 5.037036418914795, "learning_rate": 6.264383561643835e-07, "log_odds_chosen": 1.4416013956069946, "log_odds_ratio": -0.34990447759628296, "logits/chosen": 0.8548415899276733, "logits/rejected": 0.8158855438232422, "logps/chosen": -2.3138012886047363, "logps/rejected": -3.612567663192749, "loss": 0.84, "nll_loss": 0.8050472736358643, "rewards/accuracies": 0.875, "rewards/chosen": -0.2313801348209381, "rewards/margins": 0.12987665832042694, "rewards/rejected": -0.36125680804252625, "step": 2728 }, { "epoch": 7.471594798083505, "grad_norm": 4.921698093414307, "learning_rate": 6.263013698630136e-07, "log_odds_chosen": 1.787447214126587, "log_odds_ratio": -0.3268534243106842, "logits/chosen": 0.6152797937393188, "logits/rejected": 0.6361168622970581, "logps/chosen": -2.0990548133850098, "logps/rejected": -3.756133556365967, "loss": 0.7872, "nll_loss": 0.7544705867767334, "rewards/accuracies": 1.0, "rewards/chosen": -0.2099054604768753, "rewards/margins": 0.16570790112018585, "rewards/rejected": -0.37561336159706116, "step": 2729 }, { "epoch": 7.474332648870637, "grad_norm": 4.201699733734131, "learning_rate": 6.261643835616438e-07, "log_odds_chosen": 2.159379005432129, "log_odds_ratio": -0.21643376350402832, "logits/chosen": 0.8995362520217896, "logits/rejected": 0.9217398762702942, "logps/chosen": -1.6635003089904785, "logps/rejected": -3.6265625953674316, "loss": 0.7162, "nll_loss": 0.6945198774337769, "rewards/accuracies": 1.0, "rewards/chosen": -0.16635005176067352, "rewards/margins": 0.19630621373653412, "rewards/rejected": -0.36265626549720764, "step": 2730 }, { "epoch": 7.477070499657769, "grad_norm": 3.9586517810821533, "learning_rate": 6.260273972602739e-07, "log_odds_chosen": 1.8314483165740967, "log_odds_ratio": -0.21179062128067017, "logits/chosen": 1.0053225755691528, "logits/rejected": 1.0364704132080078, "logps/chosen": -1.7580089569091797, "logps/rejected": -3.3979244232177734, "loss": 0.7096, "nll_loss": 0.6883836984634399, "rewards/accuracies": 1.0, "rewards/chosen": -0.1758008897304535, "rewards/margins": 0.1639915257692337, "rewards/rejected": -0.3397924304008484, "step": 2731 }, { "epoch": 7.479808350444901, "grad_norm": 4.289855003356934, "learning_rate": 6.25890410958904e-07, "log_odds_chosen": 2.3715734481811523, "log_odds_ratio": -0.3090492784976959, "logits/chosen": 0.8404774069786072, "logits/rejected": 0.8669015169143677, "logps/chosen": -3.4688880443573, "logps/rejected": -5.759490013122559, "loss": 0.9051, "nll_loss": 0.8742079734802246, "rewards/accuracies": 0.75, "rewards/chosen": -0.34688878059387207, "rewards/margins": 0.22906020283699036, "rewards/rejected": -0.57594895362854, "step": 2732 }, { "epoch": 7.482546201232033, "grad_norm": 4.622648239135742, "learning_rate": 6.257534246575342e-07, "log_odds_chosen": 0.956469714641571, "log_odds_ratio": -0.45831334590911865, "logits/chosen": 0.8049890995025635, "logits/rejected": 0.775888204574585, "logps/chosen": -1.8722803592681885, "logps/rejected": -2.7126197814941406, "loss": 0.7825, "nll_loss": 0.7366982102394104, "rewards/accuracies": 0.875, "rewards/chosen": -0.1872280389070511, "rewards/margins": 0.08403396606445312, "rewards/rejected": -0.271261990070343, "step": 2733 }, { "epoch": 7.485284052019165, "grad_norm": 4.688697338104248, "learning_rate": 6.256164383561644e-07, "log_odds_chosen": 0.9929569959640503, "log_odds_ratio": -0.4001232981681824, "logits/chosen": 0.7130594253540039, "logits/rejected": 0.705664873123169, "logps/chosen": -1.4706676006317139, "logps/rejected": -2.311861991882324, "loss": 0.7872, "nll_loss": 0.7471699118614197, "rewards/accuracies": 0.875, "rewards/chosen": -0.14706675708293915, "rewards/margins": 0.08411944657564163, "rewards/rejected": -0.23118621110916138, "step": 2734 }, { "epoch": 7.488021902806297, "grad_norm": 4.592208385467529, "learning_rate": 6.254794520547944e-07, "log_odds_chosen": 1.2518198490142822, "log_odds_ratio": -0.4201182723045349, "logits/chosen": 0.7136107087135315, "logits/rejected": 0.6620513200759888, "logps/chosen": -1.7127546072006226, "logps/rejected": -2.8502378463745117, "loss": 0.8043, "nll_loss": 0.7623012065887451, "rewards/accuracies": 0.75, "rewards/chosen": -0.17127546668052673, "rewards/margins": 0.11374832689762115, "rewards/rejected": -0.2850238084793091, "step": 2735 }, { "epoch": 7.490759753593429, "grad_norm": 3.8991940021514893, "learning_rate": 6.253424657534246e-07, "log_odds_chosen": 3.2890737056732178, "log_odds_ratio": -0.17475908994674683, "logits/chosen": 1.06386137008667, "logits/rejected": 1.1009701490402222, "logps/chosen": -2.720876693725586, "logps/rejected": -5.939551830291748, "loss": 0.9297, "nll_loss": 0.9122198820114136, "rewards/accuracies": 1.0, "rewards/chosen": -0.2720876932144165, "rewards/margins": 0.32186752557754517, "rewards/rejected": -0.5939552783966064, "step": 2736 }, { "epoch": 7.493497604380561, "grad_norm": 5.065277576446533, "learning_rate": 6.252054794520548e-07, "log_odds_chosen": 1.3375245332717896, "log_odds_ratio": -0.3094433844089508, "logits/chosen": 0.7830238342285156, "logits/rejected": 0.7955322861671448, "logps/chosen": -2.644379138946533, "logps/rejected": -3.8845863342285156, "loss": 0.921, "nll_loss": 0.8900306224822998, "rewards/accuracies": 1.0, "rewards/chosen": -0.26443788409233093, "rewards/margins": 0.12402074784040451, "rewards/rejected": -0.38845863938331604, "step": 2737 }, { "epoch": 7.496235455167693, "grad_norm": 5.050021648406982, "learning_rate": 6.250684931506848e-07, "log_odds_chosen": 2.6997170448303223, "log_odds_ratio": -0.34313493967056274, "logits/chosen": 0.8818398714065552, "logits/rejected": 0.8871215581893921, "logps/chosen": -2.5922913551330566, "logps/rejected": -5.201254844665527, "loss": 0.7919, "nll_loss": 0.7575691342353821, "rewards/accuracies": 0.875, "rewards/chosen": -0.2592291533946991, "rewards/margins": 0.2608962655067444, "rewards/rejected": -0.5201253890991211, "step": 2738 }, { "epoch": 7.498973305954825, "grad_norm": 3.974864959716797, "learning_rate": 6.24931506849315e-07, "log_odds_chosen": 1.711837887763977, "log_odds_ratio": -0.26049360632896423, "logits/chosen": 0.6997455954551697, "logits/rejected": 0.7212719917297363, "logps/chosen": -2.19486927986145, "logps/rejected": -3.802638530731201, "loss": 0.8098, "nll_loss": 0.7837596535682678, "rewards/accuracies": 1.0, "rewards/chosen": -0.21948692202568054, "rewards/margins": 0.16077691316604614, "rewards/rejected": -0.3802638649940491, "step": 2739 }, { "epoch": 7.501711156741957, "grad_norm": 4.131383895874023, "learning_rate": 6.247945205479451e-07, "log_odds_chosen": 1.1409456729888916, "log_odds_ratio": -0.33179014921188354, "logits/chosen": 0.8932210803031921, "logits/rejected": 0.9346624612808228, "logps/chosen": -3.560593366622925, "logps/rejected": -4.634387016296387, "loss": 0.8051, "nll_loss": 0.7719553709030151, "rewards/accuracies": 0.875, "rewards/chosen": -0.35605934262275696, "rewards/margins": 0.10737939924001694, "rewards/rejected": -0.4634386897087097, "step": 2740 }, { "epoch": 7.5044490075290895, "grad_norm": 5.634385585784912, "learning_rate": 6.246575342465754e-07, "log_odds_chosen": 2.367697238922119, "log_odds_ratio": -0.23217806220054626, "logits/chosen": 0.9826974272727966, "logits/rejected": 1.0925812721252441, "logps/chosen": -3.168490409851074, "logps/rejected": -5.4961934089660645, "loss": 0.7641, "nll_loss": 0.7408789396286011, "rewards/accuracies": 0.875, "rewards/chosen": -0.316849023103714, "rewards/margins": 0.23277035355567932, "rewards/rejected": -0.5496193766593933, "step": 2741 }, { "epoch": 7.507186858316222, "grad_norm": 3.5707318782806396, "learning_rate": 6.245205479452055e-07, "log_odds_chosen": 1.4603607654571533, "log_odds_ratio": -0.3220204710960388, "logits/chosen": 0.7611991763114929, "logits/rejected": 0.7265611886978149, "logps/chosen": -2.0857937335968018, "logps/rejected": -3.4460010528564453, "loss": 0.8213, "nll_loss": 0.7890855669975281, "rewards/accuracies": 1.0, "rewards/chosen": -0.2085793912410736, "rewards/margins": 0.1360207051038742, "rewards/rejected": -0.3446000814437866, "step": 2742 }, { "epoch": 7.509924709103354, "grad_norm": 5.889176845550537, "learning_rate": 6.243835616438356e-07, "log_odds_chosen": 2.219338893890381, "log_odds_ratio": -0.26231446862220764, "logits/chosen": 0.7513135671615601, "logits/rejected": 0.7652570009231567, "logps/chosen": -2.041025400161743, "logps/rejected": -4.107420921325684, "loss": 0.7516, "nll_loss": 0.7253958582878113, "rewards/accuracies": 0.875, "rewards/chosen": -0.2041025459766388, "rewards/margins": 0.2066395878791809, "rewards/rejected": -0.4107421338558197, "step": 2743 }, { "epoch": 7.512662559890486, "grad_norm": 4.489269256591797, "learning_rate": 6.242465753424658e-07, "log_odds_chosen": 3.014522075653076, "log_odds_ratio": -0.13936859369277954, "logits/chosen": 0.849769651889801, "logits/rejected": 0.9000078439712524, "logps/chosen": -2.5012412071228027, "logps/rejected": -5.388523578643799, "loss": 0.8626, "nll_loss": 0.8486183881759644, "rewards/accuracies": 0.875, "rewards/chosen": -0.25012412667274475, "rewards/margins": 0.2887282371520996, "rewards/rejected": -0.538852334022522, "step": 2744 }, { "epoch": 7.515400410677618, "grad_norm": 4.355244159698486, "learning_rate": 6.241095890410959e-07, "log_odds_chosen": 1.717495322227478, "log_odds_ratio": -0.49939292669296265, "logits/chosen": 0.7779065370559692, "logits/rejected": 0.780776858329773, "logps/chosen": -3.0921850204467773, "logps/rejected": -4.75876522064209, "loss": 0.8194, "nll_loss": 0.7694183588027954, "rewards/accuracies": 0.75, "rewards/chosen": -0.30921852588653564, "rewards/margins": 0.16665802896022797, "rewards/rejected": -0.4758765399456024, "step": 2745 }, { "epoch": 7.51813826146475, "grad_norm": 5.628077983856201, "learning_rate": 6.23972602739726e-07, "log_odds_chosen": 0.08349849283695221, "log_odds_ratio": -0.8241614103317261, "logits/chosen": 1.0101299285888672, "logits/rejected": 0.9792394638061523, "logps/chosen": -2.039978504180908, "logps/rejected": -2.082402467727661, "loss": 0.8033, "nll_loss": 0.7209236025810242, "rewards/accuracies": 0.5, "rewards/chosen": -0.2039978802204132, "rewards/margins": 0.004242381080985069, "rewards/rejected": -0.20824025571346283, "step": 2746 }, { "epoch": 7.520876112251882, "grad_norm": 4.397878170013428, "learning_rate": 6.238356164383562e-07, "log_odds_chosen": 1.7867785692214966, "log_odds_ratio": -0.2791198194026947, "logits/chosen": 1.0255900621414185, "logits/rejected": 1.0487360954284668, "logps/chosen": -2.1035733222961426, "logps/rejected": -3.7571306228637695, "loss": 0.7513, "nll_loss": 0.7233985662460327, "rewards/accuracies": 1.0, "rewards/chosen": -0.21035733819007874, "rewards/margins": 0.16535574197769165, "rewards/rejected": -0.375713050365448, "step": 2747 }, { "epoch": 7.523613963039015, "grad_norm": 4.1904706954956055, "learning_rate": 6.236986301369864e-07, "log_odds_chosen": 1.7326812744140625, "log_odds_ratio": -0.3207398056983948, "logits/chosen": 0.7284102439880371, "logits/rejected": 0.7674416899681091, "logps/chosen": -2.236917018890381, "logps/rejected": -3.9036645889282227, "loss": 0.8045, "nll_loss": 0.7724448442459106, "rewards/accuracies": 0.875, "rewards/chosen": -0.22369170188903809, "rewards/margins": 0.16667479276657104, "rewards/rejected": -0.39036649465560913, "step": 2748 }, { "epoch": 7.526351813826146, "grad_norm": 3.4765801429748535, "learning_rate": 6.235616438356164e-07, "log_odds_chosen": 1.699947714805603, "log_odds_ratio": -0.2539259195327759, "logits/chosen": 0.9590154886245728, "logits/rejected": 0.9828765392303467, "logps/chosen": -2.6041505336761475, "logps/rejected": -4.210968971252441, "loss": 0.7645, "nll_loss": 0.7390754818916321, "rewards/accuracies": 1.0, "rewards/chosen": -0.26041507720947266, "rewards/margins": 0.1606818288564682, "rewards/rejected": -0.42109689116477966, "step": 2749 }, { "epoch": 7.529089664613279, "grad_norm": 6.595651149749756, "learning_rate": 6.234246575342466e-07, "log_odds_chosen": 2.4320576190948486, "log_odds_ratio": -0.2792646884918213, "logits/chosen": 0.9053045511245728, "logits/rejected": 0.9147524833679199, "logps/chosen": -2.866762161254883, "logps/rejected": -5.220454216003418, "loss": 0.7412, "nll_loss": 0.7132987976074219, "rewards/accuracies": 0.875, "rewards/chosen": -0.28667622804641724, "rewards/margins": 0.23536919057369232, "rewards/rejected": -0.522045373916626, "step": 2750 }, { "epoch": 7.531827515400411, "grad_norm": 4.710212707519531, "learning_rate": 6.232876712328768e-07, "log_odds_chosen": 1.269863247871399, "log_odds_ratio": -0.3321041762828827, "logits/chosen": 0.8921852707862854, "logits/rejected": 0.934289813041687, "logps/chosen": -2.5902953147888184, "logps/rejected": -3.8103907108306885, "loss": 0.7463, "nll_loss": 0.7130916118621826, "rewards/accuracies": 0.875, "rewards/chosen": -0.2590295374393463, "rewards/margins": 0.12200955301523209, "rewards/rejected": -0.3810390830039978, "step": 2751 }, { "epoch": 7.534565366187543, "grad_norm": 4.8378586769104, "learning_rate": 6.231506849315068e-07, "log_odds_chosen": 0.5082455277442932, "log_odds_ratio": -0.6667741537094116, "logits/chosen": 0.43684273958206177, "logits/rejected": 0.49939942359924316, "logps/chosen": -2.002514362335205, "logps/rejected": -2.5226826667785645, "loss": 0.8547, "nll_loss": 0.7880228757858276, "rewards/accuracies": 0.625, "rewards/chosen": -0.20025144517421722, "rewards/margins": 0.05201681703329086, "rewards/rejected": -0.2522682547569275, "step": 2752 }, { "epoch": 7.537303216974675, "grad_norm": 4.262503623962402, "learning_rate": 6.23013698630137e-07, "log_odds_chosen": 1.4379198551177979, "log_odds_ratio": -0.30674052238464355, "logits/chosen": 0.8765550851821899, "logits/rejected": 0.9239311814308167, "logps/chosen": -2.5213122367858887, "logps/rejected": -3.8868319988250732, "loss": 0.865, "nll_loss": 0.8343511819839478, "rewards/accuracies": 0.875, "rewards/chosen": -0.25213122367858887, "rewards/margins": 0.13655199110507965, "rewards/rejected": -0.3886831998825073, "step": 2753 }, { "epoch": 7.540041067761807, "grad_norm": 5.1136908531188965, "learning_rate": 6.228767123287671e-07, "log_odds_chosen": 1.8843262195587158, "log_odds_ratio": -0.2856638431549072, "logits/chosen": 0.7266325354576111, "logits/rejected": 0.7723913192749023, "logps/chosen": -2.0567526817321777, "logps/rejected": -3.8269920349121094, "loss": 0.8768, "nll_loss": 0.8482239842414856, "rewards/accuracies": 1.0, "rewards/chosen": -0.20567528903484344, "rewards/margins": 0.17702393233776093, "rewards/rejected": -0.382699191570282, "step": 2754 }, { "epoch": 7.542778918548939, "grad_norm": 4.829310894012451, "learning_rate": 6.227397260273973e-07, "log_odds_chosen": 1.0755572319030762, "log_odds_ratio": -0.36613357067108154, "logits/chosen": 0.7603474855422974, "logits/rejected": 0.7653178572654724, "logps/chosen": -2.284576892852783, "logps/rejected": -3.2715139389038086, "loss": 0.7708, "nll_loss": 0.7342185974121094, "rewards/accuracies": 0.875, "rewards/chosen": -0.22845768928527832, "rewards/margins": 0.09869371354579926, "rewards/rejected": -0.3271513879299164, "step": 2755 }, { "epoch": 7.545516769336071, "grad_norm": 6.733376979827881, "learning_rate": 6.226027397260274e-07, "log_odds_chosen": 0.3682299554347992, "log_odds_ratio": -0.8447398543357849, "logits/chosen": 0.8364962935447693, "logits/rejected": 0.9153465628623962, "logps/chosen": -3.462714195251465, "logps/rejected": -3.7783915996551514, "loss": 0.8116, "nll_loss": 0.7271549701690674, "rewards/accuracies": 0.75, "rewards/chosen": -0.34627142548561096, "rewards/margins": 0.03156775236129761, "rewards/rejected": -0.37783920764923096, "step": 2756 }, { "epoch": 7.5482546201232035, "grad_norm": 5.072086811065674, "learning_rate": 6.224657534246575e-07, "log_odds_chosen": 1.6520636081695557, "log_odds_ratio": -0.2550186216831207, "logits/chosen": 0.7479438781738281, "logits/rejected": 0.6982659101486206, "logps/chosen": -1.739848017692566, "logps/rejected": -3.210581064224243, "loss": 0.766, "nll_loss": 0.7404648661613464, "rewards/accuracies": 1.0, "rewards/chosen": -0.17398479580879211, "rewards/margins": 0.14707332849502563, "rewards/rejected": -0.32105809450149536, "step": 2757 }, { "epoch": 7.5509924709103355, "grad_norm": 7.151658535003662, "learning_rate": 6.223287671232877e-07, "log_odds_chosen": 1.6786123514175415, "log_odds_ratio": -0.3299645185470581, "logits/chosen": 0.7269536256790161, "logits/rejected": 0.67931067943573, "logps/chosen": -3.299440383911133, "logps/rejected": -4.934982776641846, "loss": 0.8911, "nll_loss": 0.8580622673034668, "rewards/accuracies": 0.875, "rewards/chosen": -0.32994404435157776, "rewards/margins": 0.16355428099632263, "rewards/rejected": -0.4934982657432556, "step": 2758 }, { "epoch": 7.553730321697468, "grad_norm": 4.688180446624756, "learning_rate": 6.221917808219178e-07, "log_odds_chosen": 0.9262614250183105, "log_odds_ratio": -0.4270474910736084, "logits/chosen": 0.7139730453491211, "logits/rejected": 0.6426286101341248, "logps/chosen": -2.194045305252075, "logps/rejected": -3.0321474075317383, "loss": 0.838, "nll_loss": 0.7953379154205322, "rewards/accuracies": 0.875, "rewards/chosen": -0.21940453350543976, "rewards/margins": 0.08381025493144989, "rewards/rejected": -0.30321475863456726, "step": 2759 }, { "epoch": 7.5564681724846, "grad_norm": 4.587836265563965, "learning_rate": 6.220547945205479e-07, "log_odds_chosen": 1.356006145477295, "log_odds_ratio": -0.3828875422477722, "logits/chosen": 1.0501855611801147, "logits/rejected": 1.105910301208496, "logps/chosen": -2.688835620880127, "logps/rejected": -3.9495625495910645, "loss": 0.7882, "nll_loss": 0.749942421913147, "rewards/accuracies": 0.875, "rewards/chosen": -0.2688835859298706, "rewards/margins": 0.12607264518737793, "rewards/rejected": -0.39495620131492615, "step": 2760 }, { "epoch": 7.559206023271732, "grad_norm": 4.627840518951416, "learning_rate": 6.219178082191781e-07, "log_odds_chosen": 2.4383068084716797, "log_odds_ratio": -0.37993624806404114, "logits/chosen": 0.9802713990211487, "logits/rejected": 0.9805017113685608, "logps/chosen": -1.9706270694732666, "logps/rejected": -4.350724220275879, "loss": 0.7638, "nll_loss": 0.7258319854736328, "rewards/accuracies": 0.875, "rewards/chosen": -0.19706270098686218, "rewards/margins": 0.2380097508430481, "rewards/rejected": -0.43507248163223267, "step": 2761 }, { "epoch": 7.561943874058864, "grad_norm": 5.129662036895752, "learning_rate": 6.217808219178083e-07, "log_odds_chosen": 1.88656485080719, "log_odds_ratio": -0.34597647190093994, "logits/chosen": 0.8761565685272217, "logits/rejected": 0.9218092560768127, "logps/chosen": -2.7351064682006836, "logps/rejected": -4.548470973968506, "loss": 0.8881, "nll_loss": 0.8535414338111877, "rewards/accuracies": 0.75, "rewards/chosen": -0.2735106647014618, "rewards/margins": 0.1813364028930664, "rewards/rejected": -0.4548470973968506, "step": 2762 }, { "epoch": 7.564681724845996, "grad_norm": 4.493592262268066, "learning_rate": 6.216438356164383e-07, "log_odds_chosen": 2.6925506591796875, "log_odds_ratio": -0.23038199543952942, "logits/chosen": 0.8533550500869751, "logits/rejected": 0.8310301899909973, "logps/chosen": -2.272637128829956, "logps/rejected": -4.8301262855529785, "loss": 0.8658, "nll_loss": 0.8427594900131226, "rewards/accuracies": 0.875, "rewards/chosen": -0.22726371884346008, "rewards/margins": 0.2557489275932312, "rewards/rejected": -0.4830126166343689, "step": 2763 }, { "epoch": 7.567419575633128, "grad_norm": 5.282995700836182, "learning_rate": 6.215068493150685e-07, "log_odds_chosen": 1.116155743598938, "log_odds_ratio": -0.40701571106910706, "logits/chosen": 0.8013672828674316, "logits/rejected": 0.7835514545440674, "logps/chosen": -2.094292163848877, "logps/rejected": -3.1170287132263184, "loss": 0.8882, "nll_loss": 0.8475338220596313, "rewards/accuracies": 0.875, "rewards/chosen": -0.20942918956279755, "rewards/margins": 0.10227368772029877, "rewards/rejected": -0.3117028772830963, "step": 2764 }, { "epoch": 7.57015742642026, "grad_norm": 4.140343189239502, "learning_rate": 6.213698630136987e-07, "log_odds_chosen": 1.4764448404312134, "log_odds_ratio": -0.27776235342025757, "logits/chosen": 0.927573561668396, "logits/rejected": 0.9722337126731873, "logps/chosen": -1.9678901433944702, "logps/rejected": -3.314495325088501, "loss": 0.8315, "nll_loss": 0.8037717342376709, "rewards/accuracies": 1.0, "rewards/chosen": -0.1967889964580536, "rewards/margins": 0.134660542011261, "rewards/rejected": -0.3314495384693146, "step": 2765 }, { "epoch": 7.572895277207392, "grad_norm": 4.610848903656006, "learning_rate": 6.212328767123287e-07, "log_odds_chosen": 0.8805593252182007, "log_odds_ratio": -0.4162927269935608, "logits/chosen": 0.8061115145683289, "logits/rejected": 0.8003429174423218, "logps/chosen": -1.836090326309204, "logps/rejected": -2.6230504512786865, "loss": 0.8224, "nll_loss": 0.7807652950286865, "rewards/accuracies": 0.75, "rewards/chosen": -0.1836090236902237, "rewards/margins": 0.07869602739810944, "rewards/rejected": -0.2623050808906555, "step": 2766 }, { "epoch": 7.575633127994524, "grad_norm": 5.472080230712891, "learning_rate": 6.210958904109589e-07, "log_odds_chosen": 1.2168701887130737, "log_odds_ratio": -0.5530476570129395, "logits/chosen": 0.8471169471740723, "logits/rejected": 0.8130689263343811, "logps/chosen": -3.2594289779663086, "logps/rejected": -4.4434494972229, "loss": 0.8362, "nll_loss": 0.7809026837348938, "rewards/accuracies": 0.75, "rewards/chosen": -0.3259429335594177, "rewards/margins": 0.11840206384658813, "rewards/rejected": -0.44434496760368347, "step": 2767 }, { "epoch": 7.578370978781656, "grad_norm": 5.233205318450928, "learning_rate": 6.209589041095891e-07, "log_odds_chosen": 1.3933913707733154, "log_odds_ratio": -0.3512669503688812, "logits/chosen": 0.8016473054885864, "logits/rejected": 0.794565737247467, "logps/chosen": -2.285039186477661, "logps/rejected": -3.582943916320801, "loss": 0.8193, "nll_loss": 0.7842093110084534, "rewards/accuracies": 0.875, "rewards/chosen": -0.22850391268730164, "rewards/margins": 0.12979048490524292, "rewards/rejected": -0.35829442739486694, "step": 2768 }, { "epoch": 7.581108829568788, "grad_norm": 4.070616245269775, "learning_rate": 6.208219178082191e-07, "log_odds_chosen": 0.8697874546051025, "log_odds_ratio": -0.4381752014160156, "logits/chosen": 0.7552865147590637, "logits/rejected": 0.7928590178489685, "logps/chosen": -2.411787509918213, "logps/rejected": -3.213960647583008, "loss": 0.9134, "nll_loss": 0.8695444464683533, "rewards/accuracies": 0.75, "rewards/chosen": -0.24117876589298248, "rewards/margins": 0.08021733164787292, "rewards/rejected": -0.3213960826396942, "step": 2769 }, { "epoch": 7.58384668035592, "grad_norm": 5.8948774337768555, "learning_rate": 6.206849315068493e-07, "log_odds_chosen": 2.450557231903076, "log_odds_ratio": -0.49793559312820435, "logits/chosen": 0.7736369371414185, "logits/rejected": 0.6942301988601685, "logps/chosen": -2.5414516925811768, "logps/rejected": -4.910928249359131, "loss": 0.8557, "nll_loss": 0.8058937788009644, "rewards/accuracies": 0.875, "rewards/chosen": -0.25414517521858215, "rewards/margins": 0.23694762587547302, "rewards/rejected": -0.4910928010940552, "step": 2770 }, { "epoch": 7.586584531143052, "grad_norm": 4.427573204040527, "learning_rate": 6.205479452054794e-07, "log_odds_chosen": 3.1901841163635254, "log_odds_ratio": -0.26710519194602966, "logits/chosen": 0.9200918078422546, "logits/rejected": 0.9836662411689758, "logps/chosen": -3.254628896713257, "logps/rejected": -6.421900749206543, "loss": 0.9505, "nll_loss": 0.9237703680992126, "rewards/accuracies": 0.75, "rewards/chosen": -0.3254629075527191, "rewards/margins": 0.3167271912097931, "rewards/rejected": -0.6421900987625122, "step": 2771 }, { "epoch": 7.5893223819301845, "grad_norm": 4.330256462097168, "learning_rate": 6.204109589041096e-07, "log_odds_chosen": 1.1694097518920898, "log_odds_ratio": -0.48756441473960876, "logits/chosen": 0.9404027462005615, "logits/rejected": 0.9180075526237488, "logps/chosen": -2.051884889602661, "logps/rejected": -3.128571033477783, "loss": 0.8438, "nll_loss": 0.7950510382652283, "rewards/accuracies": 0.875, "rewards/chosen": -0.20518849790096283, "rewards/margins": 0.10766862332820892, "rewards/rejected": -0.31285712122917175, "step": 2772 }, { "epoch": 7.592060232717317, "grad_norm": 4.420226097106934, "learning_rate": 6.202739726027397e-07, "log_odds_chosen": 2.1241188049316406, "log_odds_ratio": -0.29759666323661804, "logits/chosen": 0.6761987209320068, "logits/rejected": 0.5352064371109009, "logps/chosen": -1.9396436214447021, "logps/rejected": -3.9102020263671875, "loss": 0.8676, "nll_loss": 0.8378471732139587, "rewards/accuracies": 0.875, "rewards/chosen": -0.19396436214447021, "rewards/margins": 0.19705581665039062, "rewards/rejected": -0.39102017879486084, "step": 2773 }, { "epoch": 7.594798083504449, "grad_norm": 4.651329517364502, "learning_rate": 6.201369863013698e-07, "log_odds_chosen": 1.868190050125122, "log_odds_ratio": -0.2261984646320343, "logits/chosen": 0.9388377666473389, "logits/rejected": 1.0124577283859253, "logps/chosen": -2.569089889526367, "logps/rejected": -4.360054016113281, "loss": 0.8472, "nll_loss": 0.8245480060577393, "rewards/accuracies": 1.0, "rewards/chosen": -0.25690898299217224, "rewards/margins": 0.17909644544124603, "rewards/rejected": -0.4360054135322571, "step": 2774 }, { "epoch": 7.597535934291582, "grad_norm": 3.9822962284088135, "learning_rate": 6.2e-07, "log_odds_chosen": 1.7858349084854126, "log_odds_ratio": -0.3541256785392761, "logits/chosen": 0.7402668595314026, "logits/rejected": 0.6706032156944275, "logps/chosen": -1.7453606128692627, "logps/rejected": -3.4158530235290527, "loss": 0.843, "nll_loss": 0.8076288104057312, "rewards/accuracies": 1.0, "rewards/chosen": -0.1745360791683197, "rewards/margins": 0.16704925894737244, "rewards/rejected": -0.34158533811569214, "step": 2775 }, { "epoch": 7.600273785078713, "grad_norm": 4.612009525299072, "learning_rate": 6.198630136986301e-07, "log_odds_chosen": 0.9663863182067871, "log_odds_ratio": -0.5061248540878296, "logits/chosen": 0.8261125683784485, "logits/rejected": 0.7771680355072021, "logps/chosen": -3.254556655883789, "logps/rejected": -4.186488628387451, "loss": 0.8317, "nll_loss": 0.7811349034309387, "rewards/accuracies": 0.625, "rewards/chosen": -0.3254556655883789, "rewards/margins": 0.0931931659579277, "rewards/rejected": -0.4186488389968872, "step": 2776 }, { "epoch": 7.603011635865846, "grad_norm": 4.739287853240967, "learning_rate": 6.197260273972602e-07, "log_odds_chosen": 0.9678764939308167, "log_odds_ratio": -0.5960884690284729, "logits/chosen": 0.6823582649230957, "logits/rejected": 0.615324079990387, "logps/chosen": -2.636687755584717, "logps/rejected": -3.524745225906372, "loss": 0.8542, "nll_loss": 0.7945654392242432, "rewards/accuracies": 0.75, "rewards/chosen": -0.2636687755584717, "rewards/margins": 0.08880575001239777, "rewards/rejected": -0.35247451066970825, "step": 2777 }, { "epoch": 7.605749486652978, "grad_norm": 3.9921813011169434, "learning_rate": 6.195890410958904e-07, "log_odds_chosen": 2.6525025367736816, "log_odds_ratio": -0.15781807899475098, "logits/chosen": 0.9063483476638794, "logits/rejected": 0.8625433444976807, "logps/chosen": -1.8369901180267334, "logps/rejected": -4.327398300170898, "loss": 0.7681, "nll_loss": 0.7523664832115173, "rewards/accuracies": 1.0, "rewards/chosen": -0.18369901180267334, "rewards/margins": 0.24904082715511322, "rewards/rejected": -0.43273985385894775, "step": 2778 }, { "epoch": 7.60848733744011, "grad_norm": 3.9770407676696777, "learning_rate": 6.194520547945206e-07, "log_odds_chosen": 2.043804168701172, "log_odds_ratio": -0.23995324969291687, "logits/chosen": 0.719585120677948, "logits/rejected": 0.7356181740760803, "logps/chosen": -2.3656325340270996, "logps/rejected": -4.321310520172119, "loss": 0.7851, "nll_loss": 0.7611337900161743, "rewards/accuracies": 1.0, "rewards/chosen": -0.23656326532363892, "rewards/margins": 0.1955677568912506, "rewards/rejected": -0.4321310222148895, "step": 2779 }, { "epoch": 7.611225188227242, "grad_norm": 5.733536720275879, "learning_rate": 6.193150684931506e-07, "log_odds_chosen": 1.0835508108139038, "log_odds_ratio": -0.47441115975379944, "logits/chosen": 0.9077991247177124, "logits/rejected": 0.9298808574676514, "logps/chosen": -2.5656704902648926, "logps/rejected": -3.57751202583313, "loss": 0.7846, "nll_loss": 0.7371393442153931, "rewards/accuracies": 0.75, "rewards/chosen": -0.2565670609474182, "rewards/margins": 0.1011841744184494, "rewards/rejected": -0.3577512502670288, "step": 2780 }, { "epoch": 7.613963039014374, "grad_norm": 4.886210918426514, "learning_rate": 6.191780821917808e-07, "log_odds_chosen": 1.4439691305160522, "log_odds_ratio": -0.5374559760093689, "logits/chosen": 0.8725387454032898, "logits/rejected": 0.8640993237495422, "logps/chosen": -2.691439151763916, "logps/rejected": -4.102743148803711, "loss": 0.8694, "nll_loss": 0.8156373500823975, "rewards/accuracies": 0.625, "rewards/chosen": -0.2691439390182495, "rewards/margins": 0.14113040268421173, "rewards/rejected": -0.41027432680130005, "step": 2781 }, { "epoch": 7.616700889801506, "grad_norm": 5.1487250328063965, "learning_rate": 6.19041095890411e-07, "log_odds_chosen": 1.051870346069336, "log_odds_ratio": -0.3749958872795105, "logits/chosen": 0.9888098239898682, "logits/rejected": 1.05600905418396, "logps/chosen": -2.5207080841064453, "logps/rejected": -3.4698455333709717, "loss": 0.7374, "nll_loss": 0.6998616456985474, "rewards/accuracies": 0.875, "rewards/chosen": -0.2520708441734314, "rewards/margins": 0.09491371363401413, "rewards/rejected": -0.34698453545570374, "step": 2782 }, { "epoch": 7.619438740588638, "grad_norm": 3.84025502204895, "learning_rate": 6.18904109589041e-07, "log_odds_chosen": 1.287084698677063, "log_odds_ratio": -0.3342560827732086, "logits/chosen": 0.8636121153831482, "logits/rejected": 0.8210275769233704, "logps/chosen": -2.268158435821533, "logps/rejected": -3.483306407928467, "loss": 0.8133, "nll_loss": 0.7799052000045776, "rewards/accuracies": 0.875, "rewards/chosen": -0.2268158495426178, "rewards/margins": 0.12151478230953217, "rewards/rejected": -0.34833067655563354, "step": 2783 }, { "epoch": 7.62217659137577, "grad_norm": 4.047921180725098, "learning_rate": 6.187671232876712e-07, "log_odds_chosen": 1.0408267974853516, "log_odds_ratio": -0.46231019496917725, "logits/chosen": 0.7676665782928467, "logits/rejected": 0.7509658932685852, "logps/chosen": -2.3245458602905273, "logps/rejected": -3.3061363697052, "loss": 0.7745, "nll_loss": 0.7282450199127197, "rewards/accuracies": 0.75, "rewards/chosen": -0.2324545830488205, "rewards/margins": 0.0981590524315834, "rewards/rejected": -0.3306136131286621, "step": 2784 }, { "epoch": 7.624914442162902, "grad_norm": 4.282618045806885, "learning_rate": 6.186301369863013e-07, "log_odds_chosen": 2.1671035289764404, "log_odds_ratio": -0.3156754672527313, "logits/chosen": 0.6988557577133179, "logits/rejected": 0.667108416557312, "logps/chosen": -2.275196075439453, "logps/rejected": -4.374980926513672, "loss": 0.8147, "nll_loss": 0.7831153869628906, "rewards/accuracies": 1.0, "rewards/chosen": -0.22751960158348083, "rewards/margins": 0.20997850596904755, "rewards/rejected": -0.4374980926513672, "step": 2785 }, { "epoch": 7.627652292950034, "grad_norm": 3.9840805530548096, "learning_rate": 6.184931506849315e-07, "log_odds_chosen": 2.411281108856201, "log_odds_ratio": -0.3208238482475281, "logits/chosen": 0.8293410539627075, "logits/rejected": 0.7898433208465576, "logps/chosen": -2.203993320465088, "logps/rejected": -4.552298545837402, "loss": 0.8271, "nll_loss": 0.7949824929237366, "rewards/accuracies": 0.875, "rewards/chosen": -0.22039932012557983, "rewards/margins": 0.23483049869537354, "rewards/rejected": -0.45522981882095337, "step": 2786 }, { "epoch": 7.630390143737166, "grad_norm": 5.134719371795654, "learning_rate": 6.183561643835616e-07, "log_odds_chosen": 1.3136457204818726, "log_odds_ratio": -0.33459582924842834, "logits/chosen": 0.8849079012870789, "logits/rejected": 0.9224915504455566, "logps/chosen": -2.538269281387329, "logps/rejected": -3.779722213745117, "loss": 0.791, "nll_loss": 0.7575591802597046, "rewards/accuracies": 1.0, "rewards/chosen": -0.25382694602012634, "rewards/margins": 0.12414531409740448, "rewards/rejected": -0.37797224521636963, "step": 2787 }, { "epoch": 7.6331279945242985, "grad_norm": 4.572686195373535, "learning_rate": 6.182191780821917e-07, "log_odds_chosen": 2.982205867767334, "log_odds_ratio": -0.26031649112701416, "logits/chosen": 0.9864813089370728, "logits/rejected": 0.9348633289337158, "logps/chosen": -1.617101788520813, "logps/rejected": -4.386014938354492, "loss": 0.7162, "nll_loss": 0.6901508569717407, "rewards/accuracies": 0.875, "rewards/chosen": -0.16171018779277802, "rewards/margins": 0.2768913209438324, "rewards/rejected": -0.4386014938354492, "step": 2788 }, { "epoch": 7.6358658453114305, "grad_norm": 4.0428924560546875, "learning_rate": 6.180821917808219e-07, "log_odds_chosen": 2.124601364135742, "log_odds_ratio": -0.2853853106498718, "logits/chosen": 0.6932823657989502, "logits/rejected": 0.718553900718689, "logps/chosen": -2.169800043106079, "logps/rejected": -4.168576240539551, "loss": 0.8619, "nll_loss": 0.8333147168159485, "rewards/accuracies": 1.0, "rewards/chosen": -0.2169800102710724, "rewards/margins": 0.19987763464450836, "rewards/rejected": -0.41685763001441956, "step": 2789 }, { "epoch": 7.638603696098563, "grad_norm": 4.319789886474609, "learning_rate": 6.17945205479452e-07, "log_odds_chosen": 2.31945538520813, "log_odds_ratio": -0.19797921180725098, "logits/chosen": 0.9339187145233154, "logits/rejected": 0.9668576717376709, "logps/chosen": -2.15523099899292, "logps/rejected": -4.356428623199463, "loss": 0.7541, "nll_loss": 0.7342586517333984, "rewards/accuracies": 1.0, "rewards/chosen": -0.21552309393882751, "rewards/margins": 0.22011978924274445, "rewards/rejected": -0.43564286828041077, "step": 2790 }, { "epoch": 7.641341546885695, "grad_norm": 4.5543389320373535, "learning_rate": 6.178082191780821e-07, "log_odds_chosen": 1.128078579902649, "log_odds_ratio": -0.38267961144447327, "logits/chosen": 0.8980780839920044, "logits/rejected": 0.8732624053955078, "logps/chosen": -1.5817371606826782, "logps/rejected": -2.590853214263916, "loss": 0.7561, "nll_loss": 0.7178139686584473, "rewards/accuracies": 1.0, "rewards/chosen": -0.15817372500896454, "rewards/margins": 0.10091158747673035, "rewards/rejected": -0.2590852975845337, "step": 2791 }, { "epoch": 7.644079397672827, "grad_norm": 5.152072906494141, "learning_rate": 6.176712328767123e-07, "log_odds_chosen": 0.7164111137390137, "log_odds_ratio": -0.46061617136001587, "logits/chosen": 0.8106988072395325, "logits/rejected": 0.6949345469474792, "logps/chosen": -2.0338120460510254, "logps/rejected": -2.667890787124634, "loss": 0.8314, "nll_loss": 0.7852941155433655, "rewards/accuracies": 0.875, "rewards/chosen": -0.20338119566440582, "rewards/margins": 0.06340789794921875, "rewards/rejected": -0.2667890787124634, "step": 2792 }, { "epoch": 7.646817248459959, "grad_norm": 4.274441242218018, "learning_rate": 6.175342465753425e-07, "log_odds_chosen": 1.7750314474105835, "log_odds_ratio": -0.24689753353595734, "logits/chosen": 0.7878592014312744, "logits/rejected": 0.759208619594574, "logps/chosen": -1.7199537754058838, "logps/rejected": -3.3082528114318848, "loss": 0.7831, "nll_loss": 0.7584279179573059, "rewards/accuracies": 1.0, "rewards/chosen": -0.1719953864812851, "rewards/margins": 0.15882991254329681, "rewards/rejected": -0.3308252692222595, "step": 2793 }, { "epoch": 7.649555099247091, "grad_norm": 4.005409240722656, "learning_rate": 6.173972602739725e-07, "log_odds_chosen": 2.9911575317382812, "log_odds_ratio": -0.17065249383449554, "logits/chosen": 0.9752970933914185, "logits/rejected": 0.9953430891036987, "logps/chosen": -2.259298801422119, "logps/rejected": -5.128612518310547, "loss": 0.7582, "nll_loss": 0.7410905957221985, "rewards/accuracies": 0.875, "rewards/chosen": -0.2259298861026764, "rewards/margins": 0.2869313955307007, "rewards/rejected": -0.5128612518310547, "step": 2794 }, { "epoch": 7.652292950034223, "grad_norm": 3.9447009563446045, "learning_rate": 6.172602739726027e-07, "log_odds_chosen": 2.614896774291992, "log_odds_ratio": -0.2816779911518097, "logits/chosen": 0.7105849981307983, "logits/rejected": 0.6459504961967468, "logps/chosen": -1.891321063041687, "logps/rejected": -4.419934272766113, "loss": 0.8426, "nll_loss": 0.8144396543502808, "rewards/accuracies": 0.875, "rewards/chosen": -0.18913210928440094, "rewards/margins": 0.2528613209724426, "rewards/rejected": -0.44199344515800476, "step": 2795 }, { "epoch": 7.655030800821355, "grad_norm": 6.067991256713867, "learning_rate": 6.171232876712329e-07, "log_odds_chosen": 0.26476186513900757, "log_odds_ratio": -0.8997316956520081, "logits/chosen": 0.8671112060546875, "logits/rejected": 0.8425465822219849, "logps/chosen": -2.5741333961486816, "logps/rejected": -2.7157535552978516, "loss": 0.9781, "nll_loss": 0.8881527185440063, "rewards/accuracies": 0.625, "rewards/chosen": -0.2574133276939392, "rewards/margins": 0.014162013307213783, "rewards/rejected": -0.27157536149024963, "step": 2796 }, { "epoch": 7.657768651608487, "grad_norm": 5.7227091789245605, "learning_rate": 6.169863013698629e-07, "log_odds_chosen": 1.0205366611480713, "log_odds_ratio": -0.49452731013298035, "logits/chosen": 0.8316724896430969, "logits/rejected": 0.7553252577781677, "logps/chosen": -2.699887752532959, "logps/rejected": -3.6635286808013916, "loss": 0.809, "nll_loss": 0.7595919370651245, "rewards/accuracies": 0.75, "rewards/chosen": -0.2699887752532959, "rewards/margins": 0.0963641032576561, "rewards/rejected": -0.3663528561592102, "step": 2797 }, { "epoch": 7.660506502395619, "grad_norm": 4.10166072845459, "learning_rate": 6.168493150684931e-07, "log_odds_chosen": 1.9389216899871826, "log_odds_ratio": -0.21894465386867523, "logits/chosen": 0.9262685775756836, "logits/rejected": 0.9579018354415894, "logps/chosen": -2.297335147857666, "logps/rejected": -4.149548530578613, "loss": 0.7453, "nll_loss": 0.7233589887619019, "rewards/accuracies": 1.0, "rewards/chosen": -0.22973352670669556, "rewards/margins": 0.18522131443023682, "rewards/rejected": -0.4149548411369324, "step": 2798 }, { "epoch": 7.663244353182751, "grad_norm": 5.695282459259033, "learning_rate": 6.167123287671233e-07, "log_odds_chosen": 1.4274779558181763, "log_odds_ratio": -0.6858164072036743, "logits/chosen": 0.8590843677520752, "logits/rejected": 0.9112064838409424, "logps/chosen": -2.57025146484375, "logps/rejected": -3.9600605964660645, "loss": 0.8687, "nll_loss": 0.8001148700714111, "rewards/accuracies": 0.625, "rewards/chosen": -0.2570251524448395, "rewards/margins": 0.1389809250831604, "rewards/rejected": -0.3960060477256775, "step": 2799 }, { "epoch": 7.665982203969883, "grad_norm": 3.888355016708374, "learning_rate": 6.165753424657534e-07, "log_odds_chosen": 2.3039324283599854, "log_odds_ratio": -0.2977931797504425, "logits/chosen": 0.9014882445335388, "logits/rejected": 0.8885327577590942, "logps/chosen": -2.3184871673583984, "logps/rejected": -4.574985504150391, "loss": 0.7869, "nll_loss": 0.7570940256118774, "rewards/accuracies": 0.875, "rewards/chosen": -0.23184871673583984, "rewards/margins": 0.22564980387687683, "rewards/rejected": -0.4574985206127167, "step": 2800 }, { "epoch": 7.668720054757015, "grad_norm": 5.198953151702881, "learning_rate": 6.164383561643835e-07, "log_odds_chosen": 0.9696464538574219, "log_odds_ratio": -0.4621877670288086, "logits/chosen": 0.8673554062843323, "logits/rejected": 0.777689516544342, "logps/chosen": -2.0958251953125, "logps/rejected": -3.0182178020477295, "loss": 0.8959, "nll_loss": 0.8496729135513306, "rewards/accuracies": 0.875, "rewards/chosen": -0.20958253741264343, "rewards/margins": 0.09223925322294235, "rewards/rejected": -0.301821768283844, "step": 2801 }, { "epoch": 7.671457905544148, "grad_norm": 4.846626281738281, "learning_rate": 6.163013698630136e-07, "log_odds_chosen": 3.1233534812927246, "log_odds_ratio": -0.2996697425842285, "logits/chosen": 0.7106220722198486, "logits/rejected": 0.5843161940574646, "logps/chosen": -1.9868953227996826, "logps/rejected": -4.967093467712402, "loss": 0.8341, "nll_loss": 0.8041543960571289, "rewards/accuracies": 0.875, "rewards/chosen": -0.1986895501613617, "rewards/margins": 0.2980198264122009, "rewards/rejected": -0.49670934677124023, "step": 2802 }, { "epoch": 7.6741957563312795, "grad_norm": 6.673298358917236, "learning_rate": 6.161643835616438e-07, "log_odds_chosen": 1.6475701332092285, "log_odds_ratio": -0.4829902946949005, "logits/chosen": 1.032421588897705, "logits/rejected": 1.0898295640945435, "logps/chosen": -3.6171927452087402, "logps/rejected": -5.218101978302002, "loss": 0.8192, "nll_loss": 0.7709184885025024, "rewards/accuracies": 0.75, "rewards/chosen": -0.3617193102836609, "rewards/margins": 0.16009089350700378, "rewards/rejected": -0.5218101739883423, "step": 2803 }, { "epoch": 7.676933607118412, "grad_norm": 5.353127479553223, "learning_rate": 6.160273972602739e-07, "log_odds_chosen": 1.2838683128356934, "log_odds_ratio": -0.56963711977005, "logits/chosen": 0.9379467368125916, "logits/rejected": 0.9145979881286621, "logps/chosen": -2.133063316345215, "logps/rejected": -3.362694501876831, "loss": 0.8187, "nll_loss": 0.7617457509040833, "rewards/accuracies": 0.75, "rewards/chosen": -0.21330633759498596, "rewards/margins": 0.12296313047409058, "rewards/rejected": -0.33626943826675415, "step": 2804 }, { "epoch": 7.6796714579055445, "grad_norm": 4.131744384765625, "learning_rate": 6.15890410958904e-07, "log_odds_chosen": 2.5425829887390137, "log_odds_ratio": -0.22335295379161835, "logits/chosen": 0.8905905485153198, "logits/rejected": 0.8140466809272766, "logps/chosen": -1.9145244359970093, "logps/rejected": -4.335183143615723, "loss": 0.7983, "nll_loss": 0.7760094404220581, "rewards/accuracies": 1.0, "rewards/chosen": -0.19145244359970093, "rewards/margins": 0.2420659065246582, "rewards/rejected": -0.43351835012435913, "step": 2805 }, { "epoch": 7.682409308692677, "grad_norm": 4.928638458251953, "learning_rate": 6.157534246575342e-07, "log_odds_chosen": 3.6564278602600098, "log_odds_ratio": -0.1370200365781784, "logits/chosen": 0.7869783639907837, "logits/rejected": 0.8121894598007202, "logps/chosen": -2.219261407852173, "logps/rejected": -5.745034217834473, "loss": 0.8081, "nll_loss": 0.7944372296333313, "rewards/accuracies": 1.0, "rewards/chosen": -0.22192613780498505, "rewards/margins": 0.352577269077301, "rewards/rejected": -0.5745034217834473, "step": 2806 }, { "epoch": 7.685147159479809, "grad_norm": 4.69675874710083, "learning_rate": 6.156164383561644e-07, "log_odds_chosen": 2.091357707977295, "log_odds_ratio": -0.2873784005641937, "logits/chosen": 0.6440969705581665, "logits/rejected": 0.607014000415802, "logps/chosen": -2.1648082733154297, "logps/rejected": -4.17378044128418, "loss": 0.8067, "nll_loss": 0.7780002355575562, "rewards/accuracies": 0.875, "rewards/chosen": -0.21648085117340088, "rewards/margins": 0.2008971869945526, "rewards/rejected": -0.4173780083656311, "step": 2807 }, { "epoch": 7.687885010266941, "grad_norm": 4.594544410705566, "learning_rate": 6.154794520547944e-07, "log_odds_chosen": 0.8781276941299438, "log_odds_ratio": -0.394759863615036, "logits/chosen": 0.8670011758804321, "logits/rejected": 0.8887684941291809, "logps/chosen": -2.1079118251800537, "logps/rejected": -2.904975414276123, "loss": 0.8358, "nll_loss": 0.7963590621948242, "rewards/accuracies": 0.875, "rewards/chosen": -0.21079117059707642, "rewards/margins": 0.0797063410282135, "rewards/rejected": -0.2904975414276123, "step": 2808 }, { "epoch": 7.690622861054073, "grad_norm": 4.170290470123291, "learning_rate": 6.153424657534246e-07, "log_odds_chosen": 2.286801815032959, "log_odds_ratio": -0.2844115197658539, "logits/chosen": 0.7454480528831482, "logits/rejected": 0.7416067719459534, "logps/chosen": -2.4155070781707764, "logps/rejected": -4.62291955947876, "loss": 0.8278, "nll_loss": 0.7993724942207336, "rewards/accuracies": 0.875, "rewards/chosen": -0.24155071377754211, "rewards/margins": 0.22074128687381744, "rewards/rejected": -0.46229201555252075, "step": 2809 }, { "epoch": 7.693360711841205, "grad_norm": 3.9937567710876465, "learning_rate": 6.152054794520548e-07, "log_odds_chosen": 1.5932503938674927, "log_odds_ratio": -0.3286832571029663, "logits/chosen": 0.667375385761261, "logits/rejected": 0.6549804210662842, "logps/chosen": -1.5523221492767334, "logps/rejected": -2.9973199367523193, "loss": 0.7023, "nll_loss": 0.6694096326828003, "rewards/accuracies": 0.875, "rewards/chosen": -0.15523222088813782, "rewards/margins": 0.1444997638463974, "rewards/rejected": -0.2997319996356964, "step": 2810 }, { "epoch": 7.696098562628337, "grad_norm": 4.297767639160156, "learning_rate": 6.150684931506848e-07, "log_odds_chosen": 2.285378932952881, "log_odds_ratio": -0.3008353114128113, "logits/chosen": 0.8504996299743652, "logits/rejected": 0.8803849220275879, "logps/chosen": -1.9414631128311157, "logps/rejected": -4.133278846740723, "loss": 0.7601, "nll_loss": 0.730032205581665, "rewards/accuracies": 1.0, "rewards/chosen": -0.19414633512496948, "rewards/margins": 0.2191815823316574, "rewards/rejected": -0.4133278727531433, "step": 2811 }, { "epoch": 7.698836413415469, "grad_norm": 5.916943073272705, "learning_rate": 6.14931506849315e-07, "log_odds_chosen": 0.894730806350708, "log_odds_ratio": -0.6375091671943665, "logits/chosen": 1.1291130781173706, "logits/rejected": 1.18459951877594, "logps/chosen": -3.2296390533447266, "logps/rejected": -4.082036972045898, "loss": 0.8042, "nll_loss": 0.7404397130012512, "rewards/accuracies": 0.75, "rewards/chosen": -0.3229638934135437, "rewards/margins": 0.08523976802825928, "rewards/rejected": -0.408203661441803, "step": 2812 }, { "epoch": 7.701574264202601, "grad_norm": 4.540668487548828, "learning_rate": 6.147945205479452e-07, "log_odds_chosen": 1.5983519554138184, "log_odds_ratio": -0.26446065306663513, "logits/chosen": 1.0129835605621338, "logits/rejected": 1.086138129234314, "logps/chosen": -2.76961612701416, "logps/rejected": -4.312661170959473, "loss": 0.7496, "nll_loss": 0.7231490015983582, "rewards/accuracies": 1.0, "rewards/chosen": -0.27696162462234497, "rewards/margins": 0.15430450439453125, "rewards/rejected": -0.4312661290168762, "step": 2813 }, { "epoch": 7.704312114989733, "grad_norm": 4.650897979736328, "learning_rate": 6.146575342465753e-07, "log_odds_chosen": 3.412529945373535, "log_odds_ratio": -0.07717035710811615, "logits/chosen": 0.9474657773971558, "logits/rejected": 1.0434365272521973, "logps/chosen": -3.0294482707977295, "logps/rejected": -6.2491912841796875, "loss": 0.8841, "nll_loss": 0.8764129281044006, "rewards/accuracies": 1.0, "rewards/chosen": -0.3029448390007019, "rewards/margins": 0.3219743072986603, "rewards/rejected": -0.6249191164970398, "step": 2814 }, { "epoch": 7.707049965776865, "grad_norm": 4.939421653747559, "learning_rate": 6.145205479452054e-07, "log_odds_chosen": 1.2562177181243896, "log_odds_ratio": -0.4049990475177765, "logits/chosen": 0.6218932271003723, "logits/rejected": 0.5955408215522766, "logps/chosen": -2.371332883834839, "logps/rejected": -3.5761780738830566, "loss": 0.7772, "nll_loss": 0.7367002367973328, "rewards/accuracies": 0.75, "rewards/chosen": -0.23713329434394836, "rewards/margins": 0.12048451602458954, "rewards/rejected": -0.3576178252696991, "step": 2815 }, { "epoch": 7.709787816563997, "grad_norm": 5.214293479919434, "learning_rate": 6.143835616438355e-07, "log_odds_chosen": 2.1295931339263916, "log_odds_ratio": -0.34897440671920776, "logits/chosen": 0.9364003539085388, "logits/rejected": 0.9389030933380127, "logps/chosen": -2.006988048553467, "logps/rejected": -3.9777297973632812, "loss": 0.764, "nll_loss": 0.7291311621665955, "rewards/accuracies": 0.875, "rewards/chosen": -0.2006988227367401, "rewards/margins": 0.19707417488098145, "rewards/rejected": -0.39777299761772156, "step": 2816 }, { "epoch": 7.712525667351129, "grad_norm": 4.447544097900391, "learning_rate": 6.142465753424657e-07, "log_odds_chosen": 0.9631469249725342, "log_odds_ratio": -0.3666352927684784, "logits/chosen": 0.7772557735443115, "logits/rejected": 0.6852717995643616, "logps/chosen": -2.3017311096191406, "logps/rejected": -3.1897668838500977, "loss": 0.9119, "nll_loss": 0.8752147555351257, "rewards/accuracies": 0.875, "rewards/chosen": -0.23017311096191406, "rewards/margins": 0.08880355209112167, "rewards/rejected": -0.31897667050361633, "step": 2817 }, { "epoch": 7.715263518138261, "grad_norm": 5.113322734832764, "learning_rate": 6.141095890410958e-07, "log_odds_chosen": 1.1157151460647583, "log_odds_ratio": -0.6017400026321411, "logits/chosen": 1.0456278324127197, "logits/rejected": 1.1374931335449219, "logps/chosen": -2.689154863357544, "logps/rejected": -3.721611976623535, "loss": 0.8008, "nll_loss": 0.7405843138694763, "rewards/accuracies": 0.625, "rewards/chosen": -0.26891547441482544, "rewards/margins": 0.10324571281671524, "rewards/rejected": -0.37216120958328247, "step": 2818 }, { "epoch": 7.7180013689253935, "grad_norm": 5.441740989685059, "learning_rate": 6.139726027397259e-07, "log_odds_chosen": 1.0351285934448242, "log_odds_ratio": -0.40784531831741333, "logits/chosen": 0.6621654629707336, "logits/rejected": 0.6240841150283813, "logps/chosen": -2.2935264110565186, "logps/rejected": -3.1862270832061768, "loss": 0.7962, "nll_loss": 0.7553767561912537, "rewards/accuracies": 0.875, "rewards/chosen": -0.22935263812541962, "rewards/margins": 0.08927005529403687, "rewards/rejected": -0.3186227083206177, "step": 2819 }, { "epoch": 7.7207392197125255, "grad_norm": 5.297430038452148, "learning_rate": 6.138356164383561e-07, "log_odds_chosen": 1.1324642896652222, "log_odds_ratio": -0.3867957890033722, "logits/chosen": 1.0717005729675293, "logits/rejected": 1.1245253086090088, "logps/chosen": -2.41757869720459, "logps/rejected": -3.4732112884521484, "loss": 0.7534, "nll_loss": 0.7147554159164429, "rewards/accuracies": 0.875, "rewards/chosen": -0.24175786972045898, "rewards/margins": 0.10556328296661377, "rewards/rejected": -0.34732115268707275, "step": 2820 }, { "epoch": 7.723477070499658, "grad_norm": 4.117093086242676, "learning_rate": 6.136986301369864e-07, "log_odds_chosen": 2.0718517303466797, "log_odds_ratio": -0.2657735347747803, "logits/chosen": 0.8520941734313965, "logits/rejected": 0.8802903294563293, "logps/chosen": -2.822355270385742, "logps/rejected": -4.810811519622803, "loss": 0.7946, "nll_loss": 0.7680435180664062, "rewards/accuracies": 1.0, "rewards/chosen": -0.2822355329990387, "rewards/margins": 0.19884561002254486, "rewards/rejected": -0.48108112812042236, "step": 2821 }, { "epoch": 7.72621492128679, "grad_norm": 6.995273590087891, "learning_rate": 6.135616438356163e-07, "log_odds_chosen": 1.1794211864471436, "log_odds_ratio": -0.6065346598625183, "logits/chosen": 0.9261059761047363, "logits/rejected": 0.9109416007995605, "logps/chosen": -2.3058648109436035, "logps/rejected": -3.4109301567077637, "loss": 0.857, "nll_loss": 0.7963692545890808, "rewards/accuracies": 0.75, "rewards/chosen": -0.23058649897575378, "rewards/margins": 0.11050654202699661, "rewards/rejected": -0.3410930037498474, "step": 2822 }, { "epoch": 7.728952772073922, "grad_norm": 4.855928897857666, "learning_rate": 6.134246575342466e-07, "log_odds_chosen": 2.032749652862549, "log_odds_ratio": -0.2423303723335266, "logits/chosen": 1.0501821041107178, "logits/rejected": 1.134711503982544, "logps/chosen": -3.0768284797668457, "logps/rejected": -5.039687156677246, "loss": 0.7567, "nll_loss": 0.7324613928794861, "rewards/accuracies": 0.875, "rewards/chosen": -0.3076828718185425, "rewards/margins": 0.19628585875034332, "rewards/rejected": -0.5039687156677246, "step": 2823 }, { "epoch": 7.731690622861054, "grad_norm": 4.012898921966553, "learning_rate": 6.132876712328768e-07, "log_odds_chosen": 1.2109800577163696, "log_odds_ratio": -0.36001110076904297, "logits/chosen": 0.8351219296455383, "logits/rejected": 0.9000402688980103, "logps/chosen": -2.5911927223205566, "logps/rejected": -3.7312488555908203, "loss": 0.6906, "nll_loss": 0.6545850038528442, "rewards/accuracies": 0.875, "rewards/chosen": -0.25911927223205566, "rewards/margins": 0.11400561779737473, "rewards/rejected": -0.373124897480011, "step": 2824 }, { "epoch": 7.734428473648186, "grad_norm": 5.431341648101807, "learning_rate": 6.131506849315067e-07, "log_odds_chosen": 3.5595877170562744, "log_odds_ratio": -0.1531582772731781, "logits/chosen": 1.0803966522216797, "logits/rejected": 1.1321144104003906, "logps/chosen": -2.200759172439575, "logps/rejected": -5.647738456726074, "loss": 0.7131, "nll_loss": 0.697789192199707, "rewards/accuracies": 1.0, "rewards/chosen": -0.22007592022418976, "rewards/margins": 0.3446979522705078, "rewards/rejected": -0.5647738575935364, "step": 2825 }, { "epoch": 7.737166324435318, "grad_norm": 4.423973083496094, "learning_rate": 6.13013698630137e-07, "log_odds_chosen": 1.6411333084106445, "log_odds_ratio": -0.3075978457927704, "logits/chosen": 1.1922435760498047, "logits/rejected": 1.2096686363220215, "logps/chosen": -2.667759418487549, "logps/rejected": -4.174033164978027, "loss": 0.7224, "nll_loss": 0.6915996670722961, "rewards/accuracies": 1.0, "rewards/chosen": -0.2667759656906128, "rewards/margins": 0.15062734484672546, "rewards/rejected": -0.41740331053733826, "step": 2826 }, { "epoch": 7.73990417522245, "grad_norm": 4.921054363250732, "learning_rate": 6.128767123287672e-07, "log_odds_chosen": 2.2573256492614746, "log_odds_ratio": -0.24894411861896515, "logits/chosen": 0.7913354635238647, "logits/rejected": 0.7036073207855225, "logps/chosen": -2.3533945083618164, "logps/rejected": -4.464736461639404, "loss": 0.8167, "nll_loss": 0.7918457984924316, "rewards/accuracies": 0.875, "rewards/chosen": -0.2353394627571106, "rewards/margins": 0.2111341953277588, "rewards/rejected": -0.4464736580848694, "step": 2827 }, { "epoch": 7.742642026009582, "grad_norm": 4.584079265594482, "learning_rate": 6.127397260273973e-07, "log_odds_chosen": 1.17637038230896, "log_odds_ratio": -0.3085084855556488, "logits/chosen": 0.807937741279602, "logits/rejected": 0.8807550072669983, "logps/chosen": -2.806743621826172, "logps/rejected": -3.914475202560425, "loss": 0.8596, "nll_loss": 0.8287912011146545, "rewards/accuracies": 1.0, "rewards/chosen": -0.28067436814308167, "rewards/margins": 0.11077316850423813, "rewards/rejected": -0.3914475440979004, "step": 2828 }, { "epoch": 7.745379876796715, "grad_norm": 4.681229591369629, "learning_rate": 6.126027397260274e-07, "log_odds_chosen": 1.376821517944336, "log_odds_ratio": -0.29258301854133606, "logits/chosen": 1.0033247470855713, "logits/rejected": 0.9848123788833618, "logps/chosen": -2.3204057216644287, "logps/rejected": -3.617666482925415, "loss": 0.823, "nll_loss": 0.7937177419662476, "rewards/accuracies": 1.0, "rewards/chosen": -0.23204056918621063, "rewards/margins": 0.1297260820865631, "rewards/rejected": -0.36176666617393494, "step": 2829 }, { "epoch": 7.748117727583846, "grad_norm": 5.1324567794799805, "learning_rate": 6.124657534246576e-07, "log_odds_chosen": 0.951788067817688, "log_odds_ratio": -0.5641971230506897, "logits/chosen": 0.7110282182693481, "logits/rejected": 0.7736338973045349, "logps/chosen": -2.6391992568969727, "logps/rejected": -3.5272088050842285, "loss": 0.9323, "nll_loss": 0.8758374452590942, "rewards/accuracies": 0.625, "rewards/chosen": -0.2639199495315552, "rewards/margins": 0.08880092948675156, "rewards/rejected": -0.35272085666656494, "step": 2830 }, { "epoch": 7.750855578370979, "grad_norm": 4.637084007263184, "learning_rate": 6.123287671232877e-07, "log_odds_chosen": 1.6837890148162842, "log_odds_ratio": -0.26847872138023376, "logits/chosen": 0.745012640953064, "logits/rejected": 0.7059056758880615, "logps/chosen": -2.0734710693359375, "logps/rejected": -3.576925277709961, "loss": 0.8228, "nll_loss": 0.7959942817687988, "rewards/accuracies": 0.875, "rewards/chosen": -0.20734713971614838, "rewards/margins": 0.15034539997577667, "rewards/rejected": -0.35769253969192505, "step": 2831 }, { "epoch": 7.753593429158111, "grad_norm": 3.894484519958496, "learning_rate": 6.121917808219178e-07, "log_odds_chosen": 2.26428484916687, "log_odds_ratio": -0.19376175105571747, "logits/chosen": 0.7415880560874939, "logits/rejected": 0.7043582797050476, "logps/chosen": -2.136467218399048, "logps/rejected": -4.2845001220703125, "loss": 0.7873, "nll_loss": 0.7678771018981934, "rewards/accuracies": 1.0, "rewards/chosen": -0.21364670991897583, "rewards/margins": 0.2148033231496811, "rewards/rejected": -0.4284500479698181, "step": 2832 }, { "epoch": 7.756331279945243, "grad_norm": 5.91280460357666, "learning_rate": 6.120547945205479e-07, "log_odds_chosen": 2.6830480098724365, "log_odds_ratio": -0.20121687650680542, "logits/chosen": 0.8557040691375732, "logits/rejected": 0.7970806360244751, "logps/chosen": -2.4144833087921143, "logps/rejected": -5.0117950439453125, "loss": 0.8267, "nll_loss": 0.8065418601036072, "rewards/accuracies": 1.0, "rewards/chosen": -0.24144835770130157, "rewards/margins": 0.2597312033176422, "rewards/rejected": -0.5011795163154602, "step": 2833 }, { "epoch": 7.759069130732375, "grad_norm": 4.136894702911377, "learning_rate": 6.119178082191781e-07, "log_odds_chosen": 1.7815216779708862, "log_odds_ratio": -0.23949775099754333, "logits/chosen": 0.8366971611976624, "logits/rejected": 0.8502545356750488, "logps/chosen": -1.999495267868042, "logps/rejected": -3.6589698791503906, "loss": 0.8602, "nll_loss": 0.8362209796905518, "rewards/accuracies": 1.0, "rewards/chosen": -0.19994953274726868, "rewards/margins": 0.16594745218753815, "rewards/rejected": -0.365896999835968, "step": 2834 }, { "epoch": 7.761806981519507, "grad_norm": 4.782190322875977, "learning_rate": 6.117808219178083e-07, "log_odds_chosen": 2.4888458251953125, "log_odds_ratio": -0.30007970333099365, "logits/chosen": 0.941498875617981, "logits/rejected": 0.9283490777015686, "logps/chosen": -2.1037135124206543, "logps/rejected": -4.493350982666016, "loss": 0.7485, "nll_loss": 0.7185126543045044, "rewards/accuracies": 1.0, "rewards/chosen": -0.21037134528160095, "rewards/margins": 0.23896373808383942, "rewards/rejected": -0.44933509826660156, "step": 2835 }, { "epoch": 7.7645448323066395, "grad_norm": 4.621226787567139, "learning_rate": 6.116438356164383e-07, "log_odds_chosen": 2.132230281829834, "log_odds_ratio": -0.23377037048339844, "logits/chosen": 1.0959949493408203, "logits/rejected": 1.1517267227172852, "logps/chosen": -2.7296600341796875, "logps/rejected": -4.782630920410156, "loss": 0.684, "nll_loss": 0.6606286764144897, "rewards/accuracies": 0.875, "rewards/chosen": -0.2729659974575043, "rewards/margins": 0.2052970826625824, "rewards/rejected": -0.47826310992240906, "step": 2836 }, { "epoch": 7.767282683093772, "grad_norm": 5.048306465148926, "learning_rate": 6.115068493150685e-07, "log_odds_chosen": 2.3211209774017334, "log_odds_ratio": -0.18534067273139954, "logits/chosen": 0.9634687900543213, "logits/rejected": 1.0369209051132202, "logps/chosen": -2.5591816902160645, "logps/rejected": -4.781435012817383, "loss": 0.7581, "nll_loss": 0.7395449280738831, "rewards/accuracies": 1.0, "rewards/chosen": -0.2559181749820709, "rewards/margins": 0.2222253382205963, "rewards/rejected": -0.47814348340034485, "step": 2837 }, { "epoch": 7.770020533880904, "grad_norm": 5.124544143676758, "learning_rate": 6.113698630136987e-07, "log_odds_chosen": 0.5919219255447388, "log_odds_ratio": -0.5637487769126892, "logits/chosen": 0.8949830532073975, "logits/rejected": 0.9736584424972534, "logps/chosen": -2.4825751781463623, "logps/rejected": -3.0130133628845215, "loss": 0.7785, "nll_loss": 0.7220995426177979, "rewards/accuracies": 0.75, "rewards/chosen": -0.24825751781463623, "rewards/margins": 0.05304379016160965, "rewards/rejected": -0.3013013005256653, "step": 2838 }, { "epoch": 7.772758384668036, "grad_norm": 4.069250106811523, "learning_rate": 6.112328767123287e-07, "log_odds_chosen": 2.33294677734375, "log_odds_ratio": -0.28572002053260803, "logits/chosen": 1.0543276071548462, "logits/rejected": 1.105455756187439, "logps/chosen": -2.3289706707000732, "logps/rejected": -4.595416069030762, "loss": 0.7002, "nll_loss": 0.6716779470443726, "rewards/accuracies": 0.875, "rewards/chosen": -0.2328970581293106, "rewards/margins": 0.22664450109004974, "rewards/rejected": -0.45954158902168274, "step": 2839 }, { "epoch": 7.775496235455168, "grad_norm": 4.574056625366211, "learning_rate": 6.110958904109589e-07, "log_odds_chosen": 1.830775260925293, "log_odds_ratio": -0.3646959960460663, "logits/chosen": 0.8418446183204651, "logits/rejected": 0.919958770275116, "logps/chosen": -2.7954611778259277, "logps/rejected": -4.557460308074951, "loss": 0.7797, "nll_loss": 0.7432503700256348, "rewards/accuracies": 0.875, "rewards/chosen": -0.2795461416244507, "rewards/margins": 0.17619992792606354, "rewards/rejected": -0.455746054649353, "step": 2840 }, { "epoch": 7.7782340862423, "grad_norm": 5.216531276702881, "learning_rate": 6.109589041095891e-07, "log_odds_chosen": 1.4656507968902588, "log_odds_ratio": -0.398826003074646, "logits/chosen": 0.7993438243865967, "logits/rejected": 0.8356674313545227, "logps/chosen": -2.5435080528259277, "logps/rejected": -3.989230155944824, "loss": 0.783, "nll_loss": 0.7430686950683594, "rewards/accuracies": 0.875, "rewards/chosen": -0.25435081124305725, "rewards/margins": 0.14457222819328308, "rewards/rejected": -0.39892300963401794, "step": 2841 }, { "epoch": 7.780971937029432, "grad_norm": 4.786051273345947, "learning_rate": 6.108219178082191e-07, "log_odds_chosen": 2.516746759414673, "log_odds_ratio": -0.26978182792663574, "logits/chosen": 0.856474757194519, "logits/rejected": 0.8268816471099854, "logps/chosen": -2.5838518142700195, "logps/rejected": -4.997587203979492, "loss": 0.8377, "nll_loss": 0.810736358165741, "rewards/accuracies": 0.875, "rewards/chosen": -0.25838518142700195, "rewards/margins": 0.24137358367443085, "rewards/rejected": -0.499758780002594, "step": 2842 }, { "epoch": 7.783709787816564, "grad_norm": 4.364648818969727, "learning_rate": 6.106849315068493e-07, "log_odds_chosen": 2.13914155960083, "log_odds_ratio": -0.1743764877319336, "logits/chosen": 0.8495756983757019, "logits/rejected": 0.8428497314453125, "logps/chosen": -2.593123435974121, "logps/rejected": -4.638319969177246, "loss": 0.8278, "nll_loss": 0.8103856444358826, "rewards/accuracies": 1.0, "rewards/chosen": -0.25931239128112793, "rewards/margins": 0.2045195996761322, "rewards/rejected": -0.46383196115493774, "step": 2843 }, { "epoch": 7.786447638603696, "grad_norm": 4.878700256347656, "learning_rate": 6.105479452054795e-07, "log_odds_chosen": 1.3424491882324219, "log_odds_ratio": -0.28913912177085876, "logits/chosen": 0.875512421131134, "logits/rejected": 0.8967875838279724, "logps/chosen": -1.6106798648834229, "logps/rejected": -2.77127742767334, "loss": 0.7332, "nll_loss": 0.7043173313140869, "rewards/accuracies": 1.0, "rewards/chosen": -0.16106799244880676, "rewards/margins": 0.11605977267026901, "rewards/rejected": -0.277127742767334, "step": 2844 }, { "epoch": 7.789185489390828, "grad_norm": 4.8983235359191895, "learning_rate": 6.104109589041096e-07, "log_odds_chosen": 1.664332628250122, "log_odds_ratio": -0.27284035086631775, "logits/chosen": 0.7890284061431885, "logits/rejected": 0.8580011129379272, "logps/chosen": -2.913846969604492, "logps/rejected": -4.4609575271606445, "loss": 0.7647, "nll_loss": 0.7373930215835571, "rewards/accuracies": 1.0, "rewards/chosen": -0.2913846969604492, "rewards/margins": 0.15471112728118896, "rewards/rejected": -0.4460958242416382, "step": 2845 }, { "epoch": 7.79192334017796, "grad_norm": 5.109227657318115, "learning_rate": 6.102739726027397e-07, "log_odds_chosen": 1.2835685014724731, "log_odds_ratio": -0.278840571641922, "logits/chosen": 0.7289568781852722, "logits/rejected": 0.6690050363540649, "logps/chosen": -1.961578130722046, "logps/rejected": -3.108055830001831, "loss": 0.8113, "nll_loss": 0.7834145426750183, "rewards/accuracies": 1.0, "rewards/chosen": -0.1961578130722046, "rewards/margins": 0.114647775888443, "rewards/rejected": -0.3108055889606476, "step": 2846 }, { "epoch": 7.794661190965092, "grad_norm": 4.460216999053955, "learning_rate": 6.101369863013698e-07, "log_odds_chosen": 0.9601150751113892, "log_odds_ratio": -0.491191029548645, "logits/chosen": 0.6450199484825134, "logits/rejected": 0.6750859022140503, "logps/chosen": -2.03545880317688, "logps/rejected": -2.952509880065918, "loss": 0.784, "nll_loss": 0.734882116317749, "rewards/accuracies": 0.875, "rewards/chosen": -0.20354586839675903, "rewards/margins": 0.09170514345169067, "rewards/rejected": -0.2952510118484497, "step": 2847 }, { "epoch": 7.797399041752224, "grad_norm": 6.219033241271973, "learning_rate": 6.1e-07, "log_odds_chosen": 0.9464349746704102, "log_odds_ratio": -0.6038665175437927, "logits/chosen": 0.7369953989982605, "logits/rejected": 0.9122296571731567, "logps/chosen": -3.018645763397217, "logps/rejected": -3.9626646041870117, "loss": 0.8933, "nll_loss": 0.8329377174377441, "rewards/accuracies": 0.75, "rewards/chosen": -0.3018645644187927, "rewards/margins": 0.09440188109874725, "rewards/rejected": -0.39626646041870117, "step": 2848 }, { "epoch": 7.800136892539356, "grad_norm": 5.212369918823242, "learning_rate": 6.098630136986302e-07, "log_odds_chosen": 0.7503568530082703, "log_odds_ratio": -0.4911355674266815, "logits/chosen": 0.8385310173034668, "logits/rejected": 0.81556636095047, "logps/chosen": -1.9897308349609375, "logps/rejected": -2.6972103118896484, "loss": 0.7711, "nll_loss": 0.7220215797424316, "rewards/accuracies": 0.875, "rewards/chosen": -0.19897308945655823, "rewards/margins": 0.07074794918298721, "rewards/rejected": -0.26972103118896484, "step": 2849 }, { "epoch": 7.8028747433264884, "grad_norm": 4.490713119506836, "learning_rate": 6.097260273972602e-07, "log_odds_chosen": 2.3127686977386475, "log_odds_ratio": -0.28798797726631165, "logits/chosen": 0.9360936284065247, "logits/rejected": 0.9738060235977173, "logps/chosen": -2.4701457023620605, "logps/rejected": -4.693244457244873, "loss": 0.8036, "nll_loss": 0.774833083152771, "rewards/accuracies": 0.875, "rewards/chosen": -0.24701455235481262, "rewards/margins": 0.2223098874092102, "rewards/rejected": -0.4693244397640228, "step": 2850 }, { "epoch": 7.8056125941136205, "grad_norm": 5.340813636779785, "learning_rate": 6.095890410958904e-07, "log_odds_chosen": 1.3636059761047363, "log_odds_ratio": -0.28093332052230835, "logits/chosen": 1.1146066188812256, "logits/rejected": 1.1589778661727905, "logps/chosen": -2.0464181900024414, "logps/rejected": -3.2811026573181152, "loss": 0.7297, "nll_loss": 0.7015713453292847, "rewards/accuracies": 1.0, "rewards/chosen": -0.20464181900024414, "rewards/margins": 0.12346842885017395, "rewards/rejected": -0.3281102776527405, "step": 2851 }, { "epoch": 7.808350444900753, "grad_norm": 3.8894009590148926, "learning_rate": 6.094520547945206e-07, "log_odds_chosen": 1.9261764287948608, "log_odds_ratio": -0.21687036752700806, "logits/chosen": 0.7241653800010681, "logits/rejected": 0.7172126770019531, "logps/chosen": -1.9045426845550537, "logps/rejected": -3.6745169162750244, "loss": 0.7572, "nll_loss": 0.7354695200920105, "rewards/accuracies": 1.0, "rewards/chosen": -0.19045427441596985, "rewards/margins": 0.17699743807315826, "rewards/rejected": -0.3674517273902893, "step": 2852 }, { "epoch": 7.811088295687885, "grad_norm": 4.330106735229492, "learning_rate": 6.093150684931506e-07, "log_odds_chosen": 2.067462205886841, "log_odds_ratio": -0.2787858247756958, "logits/chosen": 0.704798698425293, "logits/rejected": 0.6306636929512024, "logps/chosen": -1.899366021156311, "logps/rejected": -3.8385262489318848, "loss": 0.8482, "nll_loss": 0.8202775716781616, "rewards/accuracies": 0.875, "rewards/chosen": -0.18993660807609558, "rewards/margins": 0.19391605257987976, "rewards/rejected": -0.38385266065597534, "step": 2853 }, { "epoch": 7.813826146475018, "grad_norm": 4.609618663787842, "learning_rate": 6.091780821917808e-07, "log_odds_chosen": 2.4170756340026855, "log_odds_ratio": -0.2979110777378082, "logits/chosen": 0.838180422782898, "logits/rejected": 0.8652341961860657, "logps/chosen": -2.4780161380767822, "logps/rejected": -4.806134223937988, "loss": 0.7325, "nll_loss": 0.7027217149734497, "rewards/accuracies": 0.875, "rewards/chosen": -0.24780160188674927, "rewards/margins": 0.23281177878379822, "rewards/rejected": -0.4806134104728699, "step": 2854 }, { "epoch": 7.816563997262149, "grad_norm": 4.467105388641357, "learning_rate": 6.09041095890411e-07, "log_odds_chosen": 2.386715888977051, "log_odds_ratio": -0.20086048543453217, "logits/chosen": 0.808459460735321, "logits/rejected": 0.8035286664962769, "logps/chosen": -1.8274766206741333, "logps/rejected": -4.080080509185791, "loss": 0.7603, "nll_loss": 0.74024498462677, "rewards/accuracies": 0.875, "rewards/chosen": -0.18274764716625214, "rewards/margins": 0.2252604067325592, "rewards/rejected": -0.40800803899765015, "step": 2855 }, { "epoch": 7.819301848049282, "grad_norm": 5.038597583770752, "learning_rate": 6.08904109589041e-07, "log_odds_chosen": 1.3352679014205933, "log_odds_ratio": -0.3560107946395874, "logits/chosen": 0.8735222816467285, "logits/rejected": 0.8684114217758179, "logps/chosen": -3.0226998329162598, "logps/rejected": -4.251689910888672, "loss": 0.8453, "nll_loss": 0.8097066283226013, "rewards/accuracies": 0.875, "rewards/chosen": -0.30226999521255493, "rewards/margins": 0.12289898842573166, "rewards/rejected": -0.4251689612865448, "step": 2856 }, { "epoch": 7.822039698836413, "grad_norm": 4.683393478393555, "learning_rate": 6.087671232876712e-07, "log_odds_chosen": 1.6165919303894043, "log_odds_ratio": -0.4252842664718628, "logits/chosen": 0.8201597929000854, "logits/rejected": 0.7227504253387451, "logps/chosen": -2.5793752670288086, "logps/rejected": -4.132718086242676, "loss": 0.8885, "nll_loss": 0.8459615707397461, "rewards/accuracies": 0.75, "rewards/chosen": -0.25793755054473877, "rewards/margins": 0.1553342640399933, "rewards/rejected": -0.41327181458473206, "step": 2857 }, { "epoch": 7.824777549623546, "grad_norm": 4.317538261413574, "learning_rate": 6.086301369863014e-07, "log_odds_chosen": 1.5405402183532715, "log_odds_ratio": -0.3047441840171814, "logits/chosen": 0.7538793683052063, "logits/rejected": 0.7558404207229614, "logps/chosen": -2.3896472454071045, "logps/rejected": -3.855841875076294, "loss": 0.7889, "nll_loss": 0.7583996057510376, "rewards/accuracies": 1.0, "rewards/chosen": -0.23896470665931702, "rewards/margins": 0.14661946892738342, "rewards/rejected": -0.38558417558670044, "step": 2858 }, { "epoch": 7.827515400410678, "grad_norm": 4.775881767272949, "learning_rate": 6.084931506849315e-07, "log_odds_chosen": 1.0995020866394043, "log_odds_ratio": -0.42639458179473877, "logits/chosen": 0.9394091367721558, "logits/rejected": 0.9817336797714233, "logps/chosen": -3.1835289001464844, "logps/rejected": -4.252137184143066, "loss": 0.9343, "nll_loss": 0.8916926383972168, "rewards/accuracies": 0.625, "rewards/chosen": -0.3183528780937195, "rewards/margins": 0.10686086118221283, "rewards/rejected": -0.4252137541770935, "step": 2859 }, { "epoch": 7.83025325119781, "grad_norm": 3.813978672027588, "learning_rate": 6.083561643835616e-07, "log_odds_chosen": 1.826094627380371, "log_odds_ratio": -0.2813231348991394, "logits/chosen": 0.9558790326118469, "logits/rejected": 0.9647406935691833, "logps/chosen": -2.5886409282684326, "logps/rejected": -4.319216728210449, "loss": 0.7045, "nll_loss": 0.6763248443603516, "rewards/accuracies": 0.875, "rewards/chosen": -0.2588641047477722, "rewards/margins": 0.17305760085582733, "rewards/rejected": -0.43192172050476074, "step": 2860 }, { "epoch": 7.832991101984942, "grad_norm": 6.081592082977295, "learning_rate": 6.082191780821918e-07, "log_odds_chosen": 1.5050618648529053, "log_odds_ratio": -0.5746062994003296, "logits/chosen": 0.692664623260498, "logits/rejected": 0.6667434573173523, "logps/chosen": -2.6959245204925537, "logps/rejected": -4.109553813934326, "loss": 1.0016, "nll_loss": 0.9441425800323486, "rewards/accuracies": 0.75, "rewards/chosen": -0.2695924639701843, "rewards/margins": 0.14136293530464172, "rewards/rejected": -0.41095539927482605, "step": 2861 }, { "epoch": 7.835728952772074, "grad_norm": 4.633415222167969, "learning_rate": 6.080821917808219e-07, "log_odds_chosen": 1.1591631174087524, "log_odds_ratio": -0.5061473846435547, "logits/chosen": 0.8523935079574585, "logits/rejected": 0.8232311010360718, "logps/chosen": -2.0434377193450928, "logps/rejected": -2.951429843902588, "loss": 0.7541, "nll_loss": 0.7035155296325684, "rewards/accuracies": 0.75, "rewards/chosen": -0.2043437659740448, "rewards/margins": 0.0907992273569107, "rewards/rejected": -0.2951430082321167, "step": 2862 }, { "epoch": 7.838466803559206, "grad_norm": 4.201138496398926, "learning_rate": 6.07945205479452e-07, "log_odds_chosen": 1.649890661239624, "log_odds_ratio": -0.39049428701400757, "logits/chosen": 0.8076092004776001, "logits/rejected": 0.7810549736022949, "logps/chosen": -1.6656428575515747, "logps/rejected": -3.2121219635009766, "loss": 0.8085, "nll_loss": 0.7694462537765503, "rewards/accuracies": 1.0, "rewards/chosen": -0.16656427085399628, "rewards/margins": 0.15464791655540466, "rewards/rejected": -0.32121220231056213, "step": 2863 }, { "epoch": 7.841204654346338, "grad_norm": 4.899837017059326, "learning_rate": 6.078082191780821e-07, "log_odds_chosen": 0.8471583724021912, "log_odds_ratio": -0.4328014552593231, "logits/chosen": 0.9225374460220337, "logits/rejected": 0.823357105255127, "logps/chosen": -2.3723104000091553, "logps/rejected": -3.1683621406555176, "loss": 0.8367, "nll_loss": 0.7934110164642334, "rewards/accuracies": 0.875, "rewards/chosen": -0.23723104596138, "rewards/margins": 0.07960517704486847, "rewards/rejected": -0.3168362081050873, "step": 2864 }, { "epoch": 7.84394250513347, "grad_norm": 4.696325302124023, "learning_rate": 6.076712328767123e-07, "log_odds_chosen": 2.6654534339904785, "log_odds_ratio": -0.11289198696613312, "logits/chosen": 1.0650266408920288, "logits/rejected": 1.133657455444336, "logps/chosen": -2.5418200492858887, "logps/rejected": -5.112774848937988, "loss": 0.7071, "nll_loss": 0.6958574652671814, "rewards/accuracies": 1.0, "rewards/chosen": -0.25418201088905334, "rewards/margins": 0.2570955157279968, "rewards/rejected": -0.5112775564193726, "step": 2865 }, { "epoch": 7.846680355920602, "grad_norm": 4.937753200531006, "learning_rate": 6.075342465753425e-07, "log_odds_chosen": 0.9869123697280884, "log_odds_ratio": -0.5693387389183044, "logits/chosen": 0.8223627805709839, "logits/rejected": 0.8436682224273682, "logps/chosen": -2.5279810428619385, "logps/rejected": -3.4337306022644043, "loss": 0.8295, "nll_loss": 0.7725356221199036, "rewards/accuracies": 0.625, "rewards/chosen": -0.2527981102466583, "rewards/margins": 0.0905749648809433, "rewards/rejected": -0.34337306022644043, "step": 2866 }, { "epoch": 7.8494182067077345, "grad_norm": 5.2824788093566895, "learning_rate": 6.073972602739725e-07, "log_odds_chosen": 1.5299615859985352, "log_odds_ratio": -0.3424455523490906, "logits/chosen": 0.9631459712982178, "logits/rejected": 0.9492473602294922, "logps/chosen": -2.0429346561431885, "logps/rejected": -3.3895769119262695, "loss": 0.7284, "nll_loss": 0.694155216217041, "rewards/accuracies": 0.875, "rewards/chosen": -0.20429345965385437, "rewards/margins": 0.13466423749923706, "rewards/rejected": -0.33895769715309143, "step": 2867 }, { "epoch": 7.852156057494867, "grad_norm": 4.5345048904418945, "learning_rate": 6.072602739726027e-07, "log_odds_chosen": 1.3517630100250244, "log_odds_ratio": -0.3153917193412781, "logits/chosen": 0.8386622071266174, "logits/rejected": 0.856711745262146, "logps/chosen": -2.393902540206909, "logps/rejected": -3.6753182411193848, "loss": 0.8148, "nll_loss": 0.7832890748977661, "rewards/accuracies": 0.875, "rewards/chosen": -0.23939025402069092, "rewards/margins": 0.12814156711101532, "rewards/rejected": -0.36753183603286743, "step": 2868 }, { "epoch": 7.854893908281999, "grad_norm": 5.036495208740234, "learning_rate": 6.071232876712329e-07, "log_odds_chosen": 0.6995028257369995, "log_odds_ratio": -0.5816307663917542, "logits/chosen": 0.610328733921051, "logits/rejected": 0.5420975089073181, "logps/chosen": -2.208650827407837, "logps/rejected": -2.810211658477783, "loss": 0.8309, "nll_loss": 0.7727097868919373, "rewards/accuracies": 0.875, "rewards/chosen": -0.22086510062217712, "rewards/margins": 0.060156069695949554, "rewards/rejected": -0.2810211777687073, "step": 2869 }, { "epoch": 7.857631759069131, "grad_norm": 4.720707893371582, "learning_rate": 6.069863013698629e-07, "log_odds_chosen": 1.4958124160766602, "log_odds_ratio": -0.4381570816040039, "logits/chosen": 0.8074000477790833, "logits/rejected": 0.808188796043396, "logps/chosen": -2.8910675048828125, "logps/rejected": -4.335370063781738, "loss": 0.9095, "nll_loss": 0.8657090663909912, "rewards/accuracies": 0.625, "rewards/chosen": -0.2891067862510681, "rewards/margins": 0.1444302499294281, "rewards/rejected": -0.4335370361804962, "step": 2870 }, { "epoch": 7.860369609856263, "grad_norm": 6.353756427764893, "learning_rate": 6.068493150684931e-07, "log_odds_chosen": 1.1474225521087646, "log_odds_ratio": -0.3980563282966614, "logits/chosen": 0.9037567973136902, "logits/rejected": 0.9150673747062683, "logps/chosen": -3.073756456375122, "logps/rejected": -4.162557601928711, "loss": 0.8205, "nll_loss": 0.7806993126869202, "rewards/accuracies": 0.875, "rewards/chosen": -0.3073756694793701, "rewards/margins": 0.10888006538152695, "rewards/rejected": -0.41625574231147766, "step": 2871 }, { "epoch": 7.863107460643395, "grad_norm": 4.2530646324157715, "learning_rate": 6.067123287671233e-07, "log_odds_chosen": 2.8449015617370605, "log_odds_ratio": -0.11407504975795746, "logits/chosen": 0.9209938645362854, "logits/rejected": 0.948249876499176, "logps/chosen": -2.4293572902679443, "logps/rejected": -5.149572372436523, "loss": 0.753, "nll_loss": 0.7416059374809265, "rewards/accuracies": 1.0, "rewards/chosen": -0.24293573200702667, "rewards/margins": 0.2720215320587158, "rewards/rejected": -0.5149572491645813, "step": 2872 }, { "epoch": 7.865845311430527, "grad_norm": 4.7307868003845215, "learning_rate": 6.065753424657534e-07, "log_odds_chosen": 2.0049829483032227, "log_odds_ratio": -0.25100183486938477, "logits/chosen": 0.5110894441604614, "logits/rejected": 0.4904271960258484, "logps/chosen": -2.2428324222564697, "logps/rejected": -4.095457077026367, "loss": 0.8088, "nll_loss": 0.7837333083152771, "rewards/accuracies": 1.0, "rewards/chosen": -0.22428324818611145, "rewards/margins": 0.18526245653629303, "rewards/rejected": -0.4095456898212433, "step": 2873 }, { "epoch": 7.868583162217659, "grad_norm": 4.365447044372559, "learning_rate": 6.064383561643835e-07, "log_odds_chosen": 1.4436291456222534, "log_odds_ratio": -0.25582897663116455, "logits/chosen": 0.9665204882621765, "logits/rejected": 0.9765375852584839, "logps/chosen": -2.413087844848633, "logps/rejected": -3.7664530277252197, "loss": 0.7381, "nll_loss": 0.7125163078308105, "rewards/accuracies": 1.0, "rewards/chosen": -0.2413087785243988, "rewards/margins": 0.1353365182876587, "rewards/rejected": -0.3766452968120575, "step": 2874 }, { "epoch": 7.871321013004791, "grad_norm": 4.0687055587768555, "learning_rate": 6.063013698630137e-07, "log_odds_chosen": 3.0727803707122803, "log_odds_ratio": -0.1435789167881012, "logits/chosen": 0.9611834287643433, "logits/rejected": 0.9474131464958191, "logps/chosen": -2.040891647338867, "logps/rejected": -4.9597578048706055, "loss": 0.6837, "nll_loss": 0.6693275570869446, "rewards/accuracies": 1.0, "rewards/chosen": -0.2040891796350479, "rewards/margins": 0.2918866276741028, "rewards/rejected": -0.4959757924079895, "step": 2875 }, { "epoch": 7.874058863791923, "grad_norm": 7.192904472351074, "learning_rate": 6.061643835616438e-07, "log_odds_chosen": 1.8660271167755127, "log_odds_ratio": -0.511968731880188, "logits/chosen": 0.9176281690597534, "logits/rejected": 0.9632389545440674, "logps/chosen": -3.2391340732574463, "logps/rejected": -5.024542808532715, "loss": 0.8433, "nll_loss": 0.7920583486557007, "rewards/accuracies": 0.875, "rewards/chosen": -0.32391342520713806, "rewards/margins": 0.17854087054729462, "rewards/rejected": -0.5024542808532715, "step": 2876 }, { "epoch": 7.876796714579055, "grad_norm": 4.343300819396973, "learning_rate": 6.060273972602739e-07, "log_odds_chosen": 2.0934815406799316, "log_odds_ratio": -0.2860895097255707, "logits/chosen": 0.8107541799545288, "logits/rejected": 0.8845168352127075, "logps/chosen": -2.629281997680664, "logps/rejected": -4.655288219451904, "loss": 0.6909, "nll_loss": 0.6622546911239624, "rewards/accuracies": 0.875, "rewards/chosen": -0.26292818784713745, "rewards/margins": 0.2026006430387497, "rewards/rejected": -0.4655288755893707, "step": 2877 }, { "epoch": 7.879534565366187, "grad_norm": 3.9044644832611084, "learning_rate": 6.05890410958904e-07, "log_odds_chosen": 2.2148709297180176, "log_odds_ratio": -0.23616620898246765, "logits/chosen": 0.6938968896865845, "logits/rejected": 0.6795737743377686, "logps/chosen": -2.20526123046875, "logps/rejected": -4.316973686218262, "loss": 0.7951, "nll_loss": 0.7714338302612305, "rewards/accuracies": 1.0, "rewards/chosen": -0.22052612900733948, "rewards/margins": 0.21117128431797028, "rewards/rejected": -0.43169739842414856, "step": 2878 }, { "epoch": 7.882272416153319, "grad_norm": 4.434540271759033, "learning_rate": 6.057534246575342e-07, "log_odds_chosen": 1.8832992315292358, "log_odds_ratio": -0.3531695604324341, "logits/chosen": 0.7544299364089966, "logits/rejected": 0.760610818862915, "logps/chosen": -3.0016422271728516, "logps/rejected": -4.815629005432129, "loss": 0.8318, "nll_loss": 0.7964648008346558, "rewards/accuracies": 0.75, "rewards/chosen": -0.30016422271728516, "rewards/margins": 0.1813986599445343, "rewards/rejected": -0.48156291246414185, "step": 2879 }, { "epoch": 7.885010266940451, "grad_norm": 7.102908611297607, "learning_rate": 6.056164383561644e-07, "log_odds_chosen": 1.315777063369751, "log_odds_ratio": -0.424406498670578, "logits/chosen": 1.0528755187988281, "logits/rejected": 1.0443477630615234, "logps/chosen": -2.3314497470855713, "logps/rejected": -3.5499331951141357, "loss": 0.8319, "nll_loss": 0.7894782423973083, "rewards/accuracies": 0.875, "rewards/chosen": -0.23314498364925385, "rewards/margins": 0.12184834480285645, "rewards/rejected": -0.3549933433532715, "step": 2880 }, { "epoch": 7.887748117727584, "grad_norm": 3.8794972896575928, "learning_rate": 6.054794520547944e-07, "log_odds_chosen": 1.712809443473816, "log_odds_ratio": -0.39614391326904297, "logits/chosen": 0.902298092842102, "logits/rejected": 1.0141152143478394, "logps/chosen": -2.3995273113250732, "logps/rejected": -3.77050518989563, "loss": 0.8048, "nll_loss": 0.7651658654212952, "rewards/accuracies": 0.875, "rewards/chosen": -0.23995274305343628, "rewards/margins": 0.1370977759361267, "rewards/rejected": -0.377050518989563, "step": 2881 }, { "epoch": 7.8904859685147155, "grad_norm": 4.261421203613281, "learning_rate": 6.053424657534246e-07, "log_odds_chosen": 2.009812831878662, "log_odds_ratio": -0.2565845251083374, "logits/chosen": 1.0015370845794678, "logits/rejected": 1.0272669792175293, "logps/chosen": -2.0303590297698975, "logps/rejected": -3.948076009750366, "loss": 0.6924, "nll_loss": 0.6667598485946655, "rewards/accuracies": 1.0, "rewards/chosen": -0.2030358910560608, "rewards/margins": 0.1917717158794403, "rewards/rejected": -0.3948076367378235, "step": 2882 }, { "epoch": 7.8932238193018485, "grad_norm": 4.574621200561523, "learning_rate": 6.052054794520548e-07, "log_odds_chosen": 2.3214025497436523, "log_odds_ratio": -0.14399951696395874, "logits/chosen": 0.8275755643844604, "logits/rejected": 0.8933303356170654, "logps/chosen": -1.8710824251174927, "logps/rejected": -3.9465057849884033, "loss": 0.79, "nll_loss": 0.7755554914474487, "rewards/accuracies": 1.0, "rewards/chosen": -0.18710824847221375, "rewards/margins": 0.2075423300266266, "rewards/rejected": -0.3946506083011627, "step": 2883 }, { "epoch": 7.8959616700889805, "grad_norm": 5.028138160705566, "learning_rate": 6.050684931506848e-07, "log_odds_chosen": 1.587707281112671, "log_odds_ratio": -0.33825963735580444, "logits/chosen": 0.878101110458374, "logits/rejected": 0.9228928089141846, "logps/chosen": -1.9761989116668701, "logps/rejected": -3.4181976318359375, "loss": 0.7266, "nll_loss": 0.6927868127822876, "rewards/accuracies": 0.875, "rewards/chosen": -0.19761990010738373, "rewards/margins": 0.14419987797737122, "rewards/rejected": -0.34181976318359375, "step": 2884 }, { "epoch": 7.898699520876113, "grad_norm": 5.417342662811279, "learning_rate": 6.04931506849315e-07, "log_odds_chosen": 1.9211679697036743, "log_odds_ratio": -0.4315783679485321, "logits/chosen": 1.0386675596237183, "logits/rejected": 1.069406270980835, "logps/chosen": -3.566310405731201, "logps/rejected": -5.459900856018066, "loss": 0.7541, "nll_loss": 0.7109482288360596, "rewards/accuracies": 0.75, "rewards/chosen": -0.3566310703754425, "rewards/margins": 0.18935903906822205, "rewards/rejected": -0.5459901094436646, "step": 2885 }, { "epoch": 7.901437371663245, "grad_norm": 4.351256847381592, "learning_rate": 6.047945205479452e-07, "log_odds_chosen": 1.4533610343933105, "log_odds_ratio": -0.33784061670303345, "logits/chosen": 0.6320202350616455, "logits/rejected": 0.6565366983413696, "logps/chosen": -2.469251871109009, "logps/rejected": -3.84263277053833, "loss": 0.7642, "nll_loss": 0.7304168343544006, "rewards/accuracies": 0.75, "rewards/chosen": -0.24692519009113312, "rewards/margins": 0.13733810186386108, "rewards/rejected": -0.3842633068561554, "step": 2886 }, { "epoch": 7.904175222450377, "grad_norm": 5.400022506713867, "learning_rate": 6.046575342465753e-07, "log_odds_chosen": 1.0854110717773438, "log_odds_ratio": -0.483560711145401, "logits/chosen": 0.9016616344451904, "logits/rejected": 0.8384675979614258, "logps/chosen": -2.507286310195923, "logps/rejected": -3.5640757083892822, "loss": 0.8222, "nll_loss": 0.7738513946533203, "rewards/accuracies": 0.75, "rewards/chosen": -0.25072863698005676, "rewards/margins": 0.1056789681315422, "rewards/rejected": -0.35640761256217957, "step": 2887 }, { "epoch": 7.906913073237509, "grad_norm": 4.186129570007324, "learning_rate": 6.045205479452054e-07, "log_odds_chosen": 1.0492546558380127, "log_odds_ratio": -0.36611419916152954, "logits/chosen": 0.9944653511047363, "logits/rejected": 1.0058138370513916, "logps/chosen": -2.4158360958099365, "logps/rejected": -3.3896048069000244, "loss": 0.8423, "nll_loss": 0.8056713938713074, "rewards/accuracies": 1.0, "rewards/chosen": -0.24158360064029694, "rewards/margins": 0.09737688302993774, "rewards/rejected": -0.3389604985713959, "step": 2888 }, { "epoch": 7.909650924024641, "grad_norm": 4.650493621826172, "learning_rate": 6.043835616438356e-07, "log_odds_chosen": 1.7332844734191895, "log_odds_ratio": -0.33551979064941406, "logits/chosen": 0.7823540568351746, "logits/rejected": 0.8058419823646545, "logps/chosen": -2.0445938110351562, "logps/rejected": -3.6816306114196777, "loss": 0.8257, "nll_loss": 0.7921965718269348, "rewards/accuracies": 0.75, "rewards/chosen": -0.20445938408374786, "rewards/margins": 0.16370366513729095, "rewards/rejected": -0.3681630492210388, "step": 2889 }, { "epoch": 7.912388774811773, "grad_norm": 4.8974480628967285, "learning_rate": 6.042465753424657e-07, "log_odds_chosen": 1.048032522201538, "log_odds_ratio": -0.5746893882751465, "logits/chosen": 0.8885942697525024, "logits/rejected": 0.8908936977386475, "logps/chosen": -2.2976796627044678, "logps/rejected": -3.269688606262207, "loss": 0.7994, "nll_loss": 0.7418832778930664, "rewards/accuracies": 0.625, "rewards/chosen": -0.22976796329021454, "rewards/margins": 0.0972009003162384, "rewards/rejected": -0.32696887850761414, "step": 2890 }, { "epoch": 7.915126625598905, "grad_norm": 4.330539703369141, "learning_rate": 6.041095890410958e-07, "log_odds_chosen": 2.355504274368286, "log_odds_ratio": -0.23873966932296753, "logits/chosen": 0.9926621913909912, "logits/rejected": 0.9678224921226501, "logps/chosen": -2.7567036151885986, "logps/rejected": -5.0358123779296875, "loss": 0.8071, "nll_loss": 0.7832081913948059, "rewards/accuracies": 1.0, "rewards/chosen": -0.2756703495979309, "rewards/margins": 0.22791089117527008, "rewards/rejected": -0.5035812258720398, "step": 2891 }, { "epoch": 7.917864476386037, "grad_norm": 4.8042378425598145, "learning_rate": 6.039726027397259e-07, "log_odds_chosen": 1.4060804843902588, "log_odds_ratio": -0.36861956119537354, "logits/chosen": 0.6476001739501953, "logits/rejected": 0.6256027817726135, "logps/chosen": -2.3009469509124756, "logps/rejected": -3.6550474166870117, "loss": 0.7806, "nll_loss": 0.7437646985054016, "rewards/accuracies": 0.875, "rewards/chosen": -0.23009470105171204, "rewards/margins": 0.13541005551815033, "rewards/rejected": -0.36550474166870117, "step": 2892 }, { "epoch": 7.920602327173169, "grad_norm": 3.6692357063293457, "learning_rate": 6.038356164383561e-07, "log_odds_chosen": 1.3616373538970947, "log_odds_ratio": -0.2479897141456604, "logits/chosen": 0.675371527671814, "logits/rejected": 0.6601014137268066, "logps/chosen": -2.027125597000122, "logps/rejected": -3.266828775405884, "loss": 0.8736, "nll_loss": 0.8488050103187561, "rewards/accuracies": 1.0, "rewards/chosen": -0.20271256566047668, "rewards/margins": 0.12397032976150513, "rewards/rejected": -0.3266828954219818, "step": 2893 }, { "epoch": 7.923340177960301, "grad_norm": 5.466732025146484, "learning_rate": 6.036986301369863e-07, "log_odds_chosen": 1.3016047477722168, "log_odds_ratio": -0.3643902838230133, "logits/chosen": 0.7337163090705872, "logits/rejected": 0.7346970438957214, "logps/chosen": -2.4459660053253174, "logps/rejected": -3.6507792472839355, "loss": 0.8574, "nll_loss": 0.8209435939788818, "rewards/accuracies": 0.875, "rewards/chosen": -0.24459661543369293, "rewards/margins": 0.12048129737377167, "rewards/rejected": -0.3650779128074646, "step": 2894 }, { "epoch": 7.926078028747433, "grad_norm": 3.8544578552246094, "learning_rate": 6.035616438356163e-07, "log_odds_chosen": 1.9427058696746826, "log_odds_ratio": -0.26466643810272217, "logits/chosen": 0.6358715891838074, "logits/rejected": 0.5818160772323608, "logps/chosen": -1.9942445755004883, "logps/rejected": -3.834437608718872, "loss": 0.7272, "nll_loss": 0.7006947994232178, "rewards/accuracies": 0.875, "rewards/chosen": -0.19942446053028107, "rewards/margins": 0.1840192675590515, "rewards/rejected": -0.3834437429904938, "step": 2895 }, { "epoch": 7.928815879534565, "grad_norm": 5.105476379394531, "learning_rate": 6.034246575342465e-07, "log_odds_chosen": 2.5920650959014893, "log_odds_ratio": -0.19041535258293152, "logits/chosen": 0.8979402780532837, "logits/rejected": 0.9716569185256958, "logps/chosen": -2.4013705253601074, "logps/rejected": -4.879537582397461, "loss": 0.8725, "nll_loss": 0.8534228801727295, "rewards/accuracies": 1.0, "rewards/chosen": -0.24013705551624298, "rewards/margins": 0.24781668186187744, "rewards/rejected": -0.4879537522792816, "step": 2896 }, { "epoch": 7.931553730321697, "grad_norm": 5.4956746101379395, "learning_rate": 6.032876712328767e-07, "log_odds_chosen": 1.0247517824172974, "log_odds_ratio": -0.6627047657966614, "logits/chosen": 0.9481152892112732, "logits/rejected": 1.0070459842681885, "logps/chosen": -3.755685567855835, "logps/rejected": -4.754705429077148, "loss": 0.8202, "nll_loss": 0.7538962364196777, "rewards/accuracies": 0.75, "rewards/chosen": -0.37556856870651245, "rewards/margins": 0.09990197420120239, "rewards/rejected": -0.47547054290771484, "step": 2897 }, { "epoch": 7.9342915811088295, "grad_norm": 4.851409435272217, "learning_rate": 6.031506849315067e-07, "log_odds_chosen": 1.2556443214416504, "log_odds_ratio": -0.41421830654144287, "logits/chosen": 0.8511514067649841, "logits/rejected": 0.8553758859634399, "logps/chosen": -2.0817034244537354, "logps/rejected": -3.19498348236084, "loss": 0.7818, "nll_loss": 0.7403979301452637, "rewards/accuracies": 0.875, "rewards/chosen": -0.2081703543663025, "rewards/margins": 0.11132797598838806, "rewards/rejected": -0.31949833035469055, "step": 2898 }, { "epoch": 7.9370294318959616, "grad_norm": 4.797452449798584, "learning_rate": 6.030136986301369e-07, "log_odds_chosen": 1.0683000087738037, "log_odds_ratio": -0.4699307680130005, "logits/chosen": 0.9232584238052368, "logits/rejected": 0.8268377184867859, "logps/chosen": -2.9588732719421387, "logps/rejected": -4.000462532043457, "loss": 0.9453, "nll_loss": 0.8983279466629028, "rewards/accuracies": 0.75, "rewards/chosen": -0.2958873510360718, "rewards/margins": 0.10415893793106079, "rewards/rejected": -0.40004631876945496, "step": 2899 }, { "epoch": 7.939767282683094, "grad_norm": 5.271275997161865, "learning_rate": 6.028767123287671e-07, "log_odds_chosen": 2.6589951515197754, "log_odds_ratio": -0.2947472333908081, "logits/chosen": 1.0519222021102905, "logits/rejected": 1.0196070671081543, "logps/chosen": -2.6829090118408203, "logps/rejected": -5.260058879852295, "loss": 0.776, "nll_loss": 0.7464845180511475, "rewards/accuracies": 0.875, "rewards/chosen": -0.2682909369468689, "rewards/margins": 0.25771498680114746, "rewards/rejected": -0.5260059833526611, "step": 2900 }, { "epoch": 7.942505133470226, "grad_norm": 3.981212615966797, "learning_rate": 6.027397260273972e-07, "log_odds_chosen": 1.6144779920578003, "log_odds_ratio": -0.25092101097106934, "logits/chosen": 0.7679004073143005, "logits/rejected": 0.7500624060630798, "logps/chosen": -2.2091588973999023, "logps/rejected": -3.7019362449645996, "loss": 0.7918, "nll_loss": 0.7666810750961304, "rewards/accuracies": 1.0, "rewards/chosen": -0.22091588377952576, "rewards/margins": 0.14927776157855988, "rewards/rejected": -0.3701936602592468, "step": 2901 }, { "epoch": 7.945242984257358, "grad_norm": 4.919211387634277, "learning_rate": 6.026027397260273e-07, "log_odds_chosen": 1.9820411205291748, "log_odds_ratio": -0.374447226524353, "logits/chosen": 0.7183897495269775, "logits/rejected": 0.7336664199829102, "logps/chosen": -2.3967788219451904, "logps/rejected": -4.3044586181640625, "loss": 0.9456, "nll_loss": 0.9081627130508423, "rewards/accuracies": 0.875, "rewards/chosen": -0.23967790603637695, "rewards/margins": 0.19076797366142273, "rewards/rejected": -0.4304458796977997, "step": 2902 }, { "epoch": 7.94798083504449, "grad_norm": 3.9775731563568115, "learning_rate": 6.024657534246576e-07, "log_odds_chosen": 1.9385347366333008, "log_odds_ratio": -0.24223211407661438, "logits/chosen": 0.830222487449646, "logits/rejected": 0.7841341495513916, "logps/chosen": -2.0684683322906494, "logps/rejected": -3.8946683406829834, "loss": 0.7482, "nll_loss": 0.723994791507721, "rewards/accuracies": 1.0, "rewards/chosen": -0.20684683322906494, "rewards/margins": 0.18262001872062683, "rewards/rejected": -0.3894668519496918, "step": 2903 }, { "epoch": 7.950718685831622, "grad_norm": 6.057006359100342, "learning_rate": 6.023287671232877e-07, "log_odds_chosen": 1.787440299987793, "log_odds_ratio": -0.33454060554504395, "logits/chosen": 1.0639674663543701, "logits/rejected": 1.0716105699539185, "logps/chosen": -2.7769293785095215, "logps/rejected": -4.48246955871582, "loss": 0.7634, "nll_loss": 0.7299723625183105, "rewards/accuracies": 0.875, "rewards/chosen": -0.27769291400909424, "rewards/margins": 0.1705540120601654, "rewards/rejected": -0.44824695587158203, "step": 2904 }, { "epoch": 7.953456536618754, "grad_norm": 6.310235023498535, "learning_rate": 6.021917808219178e-07, "log_odds_chosen": 0.9921488761901855, "log_odds_ratio": -0.7772706747055054, "logits/chosen": 0.8735777139663696, "logits/rejected": 0.8979647159576416, "logps/chosen": -3.3573994636535645, "logps/rejected": -4.318999767303467, "loss": 0.8721, "nll_loss": 0.7943877577781677, "rewards/accuracies": 0.75, "rewards/chosen": -0.33573994040489197, "rewards/margins": 0.09616000950336456, "rewards/rejected": -0.43189993500709534, "step": 2905 }, { "epoch": 7.956194387405886, "grad_norm": 5.181453704833984, "learning_rate": 6.02054794520548e-07, "log_odds_chosen": 0.5009822249412537, "log_odds_ratio": -0.5111664533615112, "logits/chosen": 1.0767719745635986, "logits/rejected": 1.0571006536483765, "logps/chosen": -2.173995018005371, "logps/rejected": -2.624009370803833, "loss": 0.7689, "nll_loss": 0.7177587747573853, "rewards/accuracies": 0.75, "rewards/chosen": -0.2173995077610016, "rewards/margins": 0.045001450926065445, "rewards/rejected": -0.26240095496177673, "step": 2906 }, { "epoch": 7.958932238193018, "grad_norm": 7.966811180114746, "learning_rate": 6.019178082191781e-07, "log_odds_chosen": 1.0711331367492676, "log_odds_ratio": -0.796028733253479, "logits/chosen": 0.8451775312423706, "logits/rejected": 0.8916832804679871, "logps/chosen": -3.2554502487182617, "logps/rejected": -4.240009307861328, "loss": 0.8568, "nll_loss": 0.7771876454353333, "rewards/accuracies": 0.875, "rewards/chosen": -0.3255450129508972, "rewards/margins": 0.09845590591430664, "rewards/rejected": -0.42400094866752625, "step": 2907 }, { "epoch": 7.961670088980151, "grad_norm": 4.395799160003662, "learning_rate": 6.017808219178083e-07, "log_odds_chosen": 1.9649300575256348, "log_odds_ratio": -0.1847621500492096, "logits/chosen": 0.9270890951156616, "logits/rejected": 0.9397245645523071, "logps/chosen": -2.4959511756896973, "logps/rejected": -4.371073246002197, "loss": 0.6869, "nll_loss": 0.6684713363647461, "rewards/accuracies": 1.0, "rewards/chosen": -0.24959513545036316, "rewards/margins": 0.18751217424869537, "rewards/rejected": -0.4371073246002197, "step": 2908 }, { "epoch": 7.964407939767282, "grad_norm": 3.8523051738739014, "learning_rate": 6.016438356164383e-07, "log_odds_chosen": 0.8717942833900452, "log_odds_ratio": -0.5109924077987671, "logits/chosen": 0.9152342081069946, "logits/rejected": 0.9806386232376099, "logps/chosen": -2.535222053527832, "logps/rejected": -3.389350414276123, "loss": 0.8137, "nll_loss": 0.7625811100006104, "rewards/accuracies": 0.75, "rewards/chosen": -0.25352221727371216, "rewards/margins": 0.08541283011436462, "rewards/rejected": -0.3389350175857544, "step": 2909 }, { "epoch": 7.967145790554415, "grad_norm": 5.482451438903809, "learning_rate": 6.015068493150685e-07, "log_odds_chosen": 1.3695168495178223, "log_odds_ratio": -0.37998005747795105, "logits/chosen": 0.7974439859390259, "logits/rejected": 0.7459179759025574, "logps/chosen": -2.0844109058380127, "logps/rejected": -3.248807430267334, "loss": 0.8144, "nll_loss": 0.776388943195343, "rewards/accuracies": 0.875, "rewards/chosen": -0.2084411084651947, "rewards/margins": 0.11643967032432556, "rewards/rejected": -0.32488077878952026, "step": 2910 }, { "epoch": 7.969883641341547, "grad_norm": 4.942446231842041, "learning_rate": 6.013698630136987e-07, "log_odds_chosen": 1.2136566638946533, "log_odds_ratio": -0.49330610036849976, "logits/chosen": 1.0927361249923706, "logits/rejected": 1.0650243759155273, "logps/chosen": -2.1578965187072754, "logps/rejected": -3.2525441646575928, "loss": 0.7692, "nll_loss": 0.7199162840843201, "rewards/accuracies": 0.875, "rewards/chosen": -0.21578964591026306, "rewards/margins": 0.10946476459503174, "rewards/rejected": -0.3252544105052948, "step": 2911 }, { "epoch": 7.972621492128679, "grad_norm": 4.415659427642822, "learning_rate": 6.012328767123287e-07, "log_odds_chosen": 1.8842103481292725, "log_odds_ratio": -0.26262611150741577, "logits/chosen": 0.6492589116096497, "logits/rejected": 0.6487101912498474, "logps/chosen": -1.7960968017578125, "logps/rejected": -3.5492026805877686, "loss": 0.7954, "nll_loss": 0.7691579461097717, "rewards/accuracies": 1.0, "rewards/chosen": -0.17960968613624573, "rewards/margins": 0.17531058192253113, "rewards/rejected": -0.35492026805877686, "step": 2912 }, { "epoch": 7.975359342915811, "grad_norm": 4.1860671043396, "learning_rate": 6.010958904109589e-07, "log_odds_chosen": 1.4066998958587646, "log_odds_ratio": -0.3390454053878784, "logits/chosen": 0.7540445327758789, "logits/rejected": 0.736582338809967, "logps/chosen": -1.559694766998291, "logps/rejected": -2.814096450805664, "loss": 0.7651, "nll_loss": 0.7311705350875854, "rewards/accuracies": 1.0, "rewards/chosen": -0.15596947073936462, "rewards/margins": 0.12544016540050507, "rewards/rejected": -0.2814096212387085, "step": 2913 }, { "epoch": 7.9780971937029435, "grad_norm": 3.9269754886627197, "learning_rate": 6.009589041095891e-07, "log_odds_chosen": 2.612731695175171, "log_odds_ratio": -0.2585069537162781, "logits/chosen": 0.8029487133026123, "logits/rejected": 0.861364483833313, "logps/chosen": -2.3099989891052246, "logps/rejected": -4.797659397125244, "loss": 0.802, "nll_loss": 0.7761541604995728, "rewards/accuracies": 0.875, "rewards/chosen": -0.2309999167919159, "rewards/margins": 0.24876604974269867, "rewards/rejected": -0.47976595163345337, "step": 2914 }, { "epoch": 7.9808350444900755, "grad_norm": 3.822603225708008, "learning_rate": 6.008219178082192e-07, "log_odds_chosen": 2.4039323329925537, "log_odds_ratio": -0.1838206797838211, "logits/chosen": 0.8061205744743347, "logits/rejected": 0.7582038640975952, "logps/chosen": -2.2478017807006836, "logps/rejected": -4.5491557121276855, "loss": 0.7986, "nll_loss": 0.7801914811134338, "rewards/accuracies": 1.0, "rewards/chosen": -0.22478017210960388, "rewards/margins": 0.23013544082641602, "rewards/rejected": -0.4549155831336975, "step": 2915 }, { "epoch": 7.983572895277208, "grad_norm": 3.8407795429229736, "learning_rate": 6.006849315068493e-07, "log_odds_chosen": 1.0304216146469116, "log_odds_ratio": -0.38116392493247986, "logits/chosen": 0.6582508087158203, "logits/rejected": 0.7211095690727234, "logps/chosen": -2.178300380706787, "logps/rejected": -3.1378695964813232, "loss": 0.854, "nll_loss": 0.8159198760986328, "rewards/accuracies": 0.875, "rewards/chosen": -0.21783004701137543, "rewards/margins": 0.09595691412687302, "rewards/rejected": -0.31378695368766785, "step": 2916 }, { "epoch": 7.98631074606434, "grad_norm": 5.018369674682617, "learning_rate": 6.005479452054795e-07, "log_odds_chosen": 1.4153656959533691, "log_odds_ratio": -0.30294984579086304, "logits/chosen": 0.9289522171020508, "logits/rejected": 0.8958503007888794, "logps/chosen": -2.1986100673675537, "logps/rejected": -3.4784834384918213, "loss": 0.7709, "nll_loss": 0.7405786514282227, "rewards/accuracies": 0.875, "rewards/chosen": -0.2198610007762909, "rewards/margins": 0.1279873549938202, "rewards/rejected": -0.3478483557701111, "step": 2917 }, { "epoch": 7.989048596851472, "grad_norm": 4.327919960021973, "learning_rate": 6.004109589041096e-07, "log_odds_chosen": 2.249423027038574, "log_odds_ratio": -0.24758684635162354, "logits/chosen": 0.9529192447662354, "logits/rejected": 0.9647213220596313, "logps/chosen": -2.3300535678863525, "logps/rejected": -4.476408004760742, "loss": 0.796, "nll_loss": 0.7712602019309998, "rewards/accuracies": 1.0, "rewards/chosen": -0.2330053448677063, "rewards/margins": 0.2146354615688324, "rewards/rejected": -0.4476408064365387, "step": 2918 }, { "epoch": 7.991786447638604, "grad_norm": 7.10576057434082, "learning_rate": 6.002739726027397e-07, "log_odds_chosen": 1.9304672479629517, "log_odds_ratio": -0.6643715500831604, "logits/chosen": 0.8197765350341797, "logits/rejected": 0.7988494038581848, "logps/chosen": -2.81333065032959, "logps/rejected": -4.633594512939453, "loss": 0.8088, "nll_loss": 0.7423883676528931, "rewards/accuracies": 0.75, "rewards/chosen": -0.2813330590724945, "rewards/margins": 0.18202635645866394, "rewards/rejected": -0.46335941553115845, "step": 2919 }, { "epoch": 7.994524298425736, "grad_norm": 5.057011604309082, "learning_rate": 6.001369863013699e-07, "log_odds_chosen": 1.3452093601226807, "log_odds_ratio": -0.4466277062892914, "logits/chosen": 0.8018374443054199, "logits/rejected": 0.8251030445098877, "logps/chosen": -2.2890100479125977, "logps/rejected": -3.5489399433135986, "loss": 0.8011, "nll_loss": 0.7564581632614136, "rewards/accuracies": 0.625, "rewards/chosen": -0.2289009988307953, "rewards/margins": 0.1259930431842804, "rewards/rejected": -0.3548940420150757, "step": 2920 }, { "epoch": 7.997262149212868, "grad_norm": 4.860567092895508, "learning_rate": 6e-07, "log_odds_chosen": 1.3255746364593506, "log_odds_ratio": -0.4122970700263977, "logits/chosen": 0.659557044506073, "logits/rejected": 0.5834583044052124, "logps/chosen": -1.9823036193847656, "logps/rejected": -3.211703300476074, "loss": 0.7845, "nll_loss": 0.7432244420051575, "rewards/accuracies": 0.75, "rewards/chosen": -0.19823035597801208, "rewards/margins": 0.12293995916843414, "rewards/rejected": -0.3211703300476074, "step": 2921 }, { "epoch": 8.0, "grad_norm": 4.442699909210205, "learning_rate": 5.998630136986302e-07, "log_odds_chosen": 1.3987656831741333, "log_odds_ratio": -0.34369319677352905, "logits/chosen": 0.8632336854934692, "logits/rejected": 0.8112284541130066, "logps/chosen": -1.9685667753219604, "logps/rejected": -3.277383327484131, "loss": 0.7342, "nll_loss": 0.6998435854911804, "rewards/accuracies": 0.875, "rewards/chosen": -0.19685667753219604, "rewards/margins": 0.13088169693946838, "rewards/rejected": -0.32773834466934204, "step": 2922 }, { "epoch": 8.002737850787133, "grad_norm": 5.914403915405273, "learning_rate": 5.997260273972602e-07, "log_odds_chosen": 0.9542650580406189, "log_odds_ratio": -0.6238282322883606, "logits/chosen": 0.8739575147628784, "logits/rejected": 0.9452003836631775, "logps/chosen": -2.290060520172119, "logps/rejected": -3.156792640686035, "loss": 0.7469, "nll_loss": 0.6845430731773376, "rewards/accuracies": 0.875, "rewards/chosen": -0.22900605201721191, "rewards/margins": 0.08667320758104324, "rewards/rejected": -0.31567928194999695, "step": 2923 }, { "epoch": 8.005475701574264, "grad_norm": 4.085738658905029, "learning_rate": 5.995890410958904e-07, "log_odds_chosen": 1.6769338846206665, "log_odds_ratio": -0.23661421239376068, "logits/chosen": 0.8015087246894836, "logits/rejected": 0.8645468354225159, "logps/chosen": -1.6922147274017334, "logps/rejected": -3.1903398036956787, "loss": 0.7312, "nll_loss": 0.707575261592865, "rewards/accuracies": 1.0, "rewards/chosen": -0.16922147572040558, "rewards/margins": 0.1498125046491623, "rewards/rejected": -0.31903398036956787, "step": 2924 }, { "epoch": 8.008213552361397, "grad_norm": 6.704708576202393, "learning_rate": 5.994520547945206e-07, "log_odds_chosen": 0.9124807119369507, "log_odds_ratio": -0.7307615280151367, "logits/chosen": 1.0577020645141602, "logits/rejected": 1.1120622158050537, "logps/chosen": -2.7890214920043945, "logps/rejected": -3.602449893951416, "loss": 0.756, "nll_loss": 0.6829555034637451, "rewards/accuracies": 0.875, "rewards/chosen": -0.278902143239975, "rewards/margins": 0.08134286105632782, "rewards/rejected": -0.360245019197464, "step": 2925 }, { "epoch": 8.010951403148528, "grad_norm": 5.099404811859131, "learning_rate": 5.993150684931506e-07, "log_odds_chosen": 1.6093740463256836, "log_odds_ratio": -0.3673296868801117, "logits/chosen": 1.0499321222305298, "logits/rejected": 1.077498197555542, "logps/chosen": -2.532197952270508, "logps/rejected": -4.0616350173950195, "loss": 0.8359, "nll_loss": 0.7991851568222046, "rewards/accuracies": 0.875, "rewards/chosen": -0.2532197833061218, "rewards/margins": 0.1529437005519867, "rewards/rejected": -0.40616345405578613, "step": 2926 }, { "epoch": 8.013689253935661, "grad_norm": 4.422236442565918, "learning_rate": 5.991780821917808e-07, "log_odds_chosen": 1.529550552368164, "log_odds_ratio": -0.39124763011932373, "logits/chosen": 0.8144899606704712, "logits/rejected": 0.8442711234092712, "logps/chosen": -1.6949563026428223, "logps/rejected": -3.07535982131958, "loss": 0.681, "nll_loss": 0.6419064998626709, "rewards/accuracies": 0.875, "rewards/chosen": -0.16949564218521118, "rewards/margins": 0.13804034888744354, "rewards/rejected": -0.30753597617149353, "step": 2927 }, { "epoch": 8.016427104722792, "grad_norm": 5.51698112487793, "learning_rate": 5.99041095890411e-07, "log_odds_chosen": 1.5691969394683838, "log_odds_ratio": -0.33267199993133545, "logits/chosen": 0.8223077654838562, "logits/rejected": 0.7565518617630005, "logps/chosen": -2.621999979019165, "logps/rejected": -4.126424789428711, "loss": 0.7433, "nll_loss": 0.710005521774292, "rewards/accuracies": 1.0, "rewards/chosen": -0.2622000277042389, "rewards/margins": 0.1504424810409546, "rewards/rejected": -0.41264253854751587, "step": 2928 }, { "epoch": 8.019164955509925, "grad_norm": 4.197620868682861, "learning_rate": 5.98904109589041e-07, "log_odds_chosen": 1.0483126640319824, "log_odds_ratio": -0.47852152585983276, "logits/chosen": 0.7814896106719971, "logits/rejected": 0.8139369487762451, "logps/chosen": -2.524449110031128, "logps/rejected": -3.5314760208129883, "loss": 0.794, "nll_loss": 0.7461037635803223, "rewards/accuracies": 0.75, "rewards/chosen": -0.25244492292404175, "rewards/margins": 0.10070270299911499, "rewards/rejected": -0.35314759612083435, "step": 2929 }, { "epoch": 8.021902806297057, "grad_norm": 4.138354778289795, "learning_rate": 5.987671232876712e-07, "log_odds_chosen": 2.0869197845458984, "log_odds_ratio": -0.4376693665981293, "logits/chosen": 0.7204829454421997, "logits/rejected": 0.7046747207641602, "logps/chosen": -2.639956474304199, "logps/rejected": -4.628755569458008, "loss": 0.86, "nll_loss": 0.816252589225769, "rewards/accuracies": 0.875, "rewards/chosen": -0.2639956474304199, "rewards/margins": 0.1988799273967743, "rewards/rejected": -0.4628755748271942, "step": 2930 }, { "epoch": 8.02464065708419, "grad_norm": 4.3131890296936035, "learning_rate": 5.986301369863014e-07, "log_odds_chosen": 3.092040777206421, "log_odds_ratio": -0.30382731556892395, "logits/chosen": 0.7200795412063599, "logits/rejected": 0.7115208506584167, "logps/chosen": -2.3482508659362793, "logps/rejected": -5.308151721954346, "loss": 0.7727, "nll_loss": 0.742322564125061, "rewards/accuracies": 0.75, "rewards/chosen": -0.23482510447502136, "rewards/margins": 0.29599007964134216, "rewards/rejected": -0.5308151841163635, "step": 2931 }, { "epoch": 8.02737850787132, "grad_norm": 4.071172714233398, "learning_rate": 5.984931506849315e-07, "log_odds_chosen": 1.699796438217163, "log_odds_ratio": -0.22219425439834595, "logits/chosen": 0.9383102655410767, "logits/rejected": 0.9621652960777283, "logps/chosen": -1.7773685455322266, "logps/rejected": -3.3279662132263184, "loss": 0.6807, "nll_loss": 0.6585096120834351, "rewards/accuracies": 1.0, "rewards/chosen": -0.17773686349391937, "rewards/margins": 0.15505975484848022, "rewards/rejected": -0.3327966332435608, "step": 2932 }, { "epoch": 8.030116358658454, "grad_norm": 4.884191036224365, "learning_rate": 5.983561643835616e-07, "log_odds_chosen": 2.392108678817749, "log_odds_ratio": -0.3707689940929413, "logits/chosen": 0.8006250858306885, "logits/rejected": 0.8482601642608643, "logps/chosen": -2.553621292114258, "logps/rejected": -4.856322765350342, "loss": 0.8194, "nll_loss": 0.7823050022125244, "rewards/accuracies": 0.75, "rewards/chosen": -0.2553621232509613, "rewards/margins": 0.2302701473236084, "rewards/rejected": -0.4856322407722473, "step": 2933 }, { "epoch": 8.032854209445585, "grad_norm": 6.166343688964844, "learning_rate": 5.982191780821918e-07, "log_odds_chosen": 1.3478715419769287, "log_odds_ratio": -0.41108232736587524, "logits/chosen": 0.8913283944129944, "logits/rejected": 1.006375789642334, "logps/chosen": -2.5263943672180176, "logps/rejected": -3.8338279724121094, "loss": 0.7787, "nll_loss": 0.7375860810279846, "rewards/accuracies": 0.75, "rewards/chosen": -0.25263944268226624, "rewards/margins": 0.1307433694601059, "rewards/rejected": -0.38338279724121094, "step": 2934 }, { "epoch": 8.035592060232718, "grad_norm": 4.140854835510254, "learning_rate": 5.980821917808219e-07, "log_odds_chosen": 1.9023241996765137, "log_odds_ratio": -0.310481995344162, "logits/chosen": 0.9615771770477295, "logits/rejected": 0.9525325298309326, "logps/chosen": -1.8569921255111694, "logps/rejected": -3.6507320404052734, "loss": 0.7008, "nll_loss": 0.6697924137115479, "rewards/accuracies": 0.875, "rewards/chosen": -0.1856992244720459, "rewards/margins": 0.17937399446964264, "rewards/rejected": -0.36507323384284973, "step": 2935 }, { "epoch": 8.038329911019849, "grad_norm": 3.4969379901885986, "learning_rate": 5.97945205479452e-07, "log_odds_chosen": 2.31657338142395, "log_odds_ratio": -0.30168187618255615, "logits/chosen": 0.845197319984436, "logits/rejected": 0.865816593170166, "logps/chosen": -1.790802240371704, "logps/rejected": -4.003140926361084, "loss": 0.7262, "nll_loss": 0.6960434317588806, "rewards/accuracies": 0.875, "rewards/chosen": -0.17908023297786713, "rewards/margins": 0.22123390436172485, "rewards/rejected": -0.4003141224384308, "step": 2936 }, { "epoch": 8.041067761806982, "grad_norm": 4.092083930969238, "learning_rate": 5.978082191780822e-07, "log_odds_chosen": 2.0921337604522705, "log_odds_ratio": -0.20648962259292603, "logits/chosen": 0.8432410955429077, "logits/rejected": 0.818493127822876, "logps/chosen": -1.961830735206604, "logps/rejected": -3.8604159355163574, "loss": 0.702, "nll_loss": 0.6813541650772095, "rewards/accuracies": 1.0, "rewards/chosen": -0.19618308544158936, "rewards/margins": 0.18985852599143982, "rewards/rejected": -0.3860416114330292, "step": 2937 }, { "epoch": 8.043805612594113, "grad_norm": 5.17478084564209, "learning_rate": 5.976712328767123e-07, "log_odds_chosen": 1.8445483446121216, "log_odds_ratio": -0.30827754735946655, "logits/chosen": 0.9057226777076721, "logits/rejected": 0.9628678560256958, "logps/chosen": -2.6495184898376465, "logps/rejected": -4.42512845993042, "loss": 0.8005, "nll_loss": 0.7696645259857178, "rewards/accuracies": 0.875, "rewards/chosen": -0.26495182514190674, "rewards/margins": 0.17756101489067078, "rewards/rejected": -0.4425128400325775, "step": 2938 }, { "epoch": 8.046543463381246, "grad_norm": 4.703587055206299, "learning_rate": 5.975342465753425e-07, "log_odds_chosen": 2.2140655517578125, "log_odds_ratio": -0.2285168170928955, "logits/chosen": 0.7320386171340942, "logits/rejected": 0.7242206335067749, "logps/chosen": -2.385641098022461, "logps/rejected": -4.445547103881836, "loss": 0.7375, "nll_loss": 0.7146570086479187, "rewards/accuracies": 1.0, "rewards/chosen": -0.23856410384178162, "rewards/margins": 0.20599058270454407, "rewards/rejected": -0.44455471634864807, "step": 2939 }, { "epoch": 8.049281314168377, "grad_norm": 4.837001800537109, "learning_rate": 5.973972602739725e-07, "log_odds_chosen": 3.255896806716919, "log_odds_ratio": -0.30798041820526123, "logits/chosen": 1.0492310523986816, "logits/rejected": 1.1347754001617432, "logps/chosen": -2.066382884979248, "logps/rejected": -5.218316078186035, "loss": 0.6655, "nll_loss": 0.6346978545188904, "rewards/accuracies": 0.75, "rewards/chosen": -0.20663832128047943, "rewards/margins": 0.3151932954788208, "rewards/rejected": -0.5218316316604614, "step": 2940 }, { "epoch": 8.05201916495551, "grad_norm": 4.410618305206299, "learning_rate": 5.972602739726027e-07, "log_odds_chosen": 1.789175033569336, "log_odds_ratio": -0.4272189140319824, "logits/chosen": 0.7723405361175537, "logits/rejected": 0.8108402490615845, "logps/chosen": -2.554586887359619, "logps/rejected": -4.276515007019043, "loss": 0.7354, "nll_loss": 0.6926382780075073, "rewards/accuracies": 0.75, "rewards/chosen": -0.25545868277549744, "rewards/margins": 0.17219284176826477, "rewards/rejected": -0.4276515245437622, "step": 2941 }, { "epoch": 8.054757015742641, "grad_norm": 5.097138404846191, "learning_rate": 5.971232876712329e-07, "log_odds_chosen": 1.4827635288238525, "log_odds_ratio": -0.5308483242988586, "logits/chosen": 0.6602639555931091, "logits/rejected": 0.5864531993865967, "logps/chosen": -2.267902374267578, "logps/rejected": -3.716527223587036, "loss": 0.7957, "nll_loss": 0.7426304817199707, "rewards/accuracies": 0.75, "rewards/chosen": -0.22679021954536438, "rewards/margins": 0.14486253261566162, "rewards/rejected": -0.3716527223587036, "step": 2942 }, { "epoch": 8.057494866529774, "grad_norm": 4.72747802734375, "learning_rate": 5.969863013698629e-07, "log_odds_chosen": 1.6807790994644165, "log_odds_ratio": -0.3359107971191406, "logits/chosen": 0.682711660861969, "logits/rejected": 0.6269720196723938, "logps/chosen": -3.1292738914489746, "logps/rejected": -4.751674652099609, "loss": 0.8988, "nll_loss": 0.8652020692825317, "rewards/accuracies": 0.875, "rewards/chosen": -0.3129274249076843, "rewards/margins": 0.16224008798599243, "rewards/rejected": -0.47516751289367676, "step": 2943 }, { "epoch": 8.060232717316905, "grad_norm": 4.136028289794922, "learning_rate": 5.968493150684931e-07, "log_odds_chosen": 2.379848003387451, "log_odds_ratio": -0.2680636942386627, "logits/chosen": 0.9335212707519531, "logits/rejected": 0.9458290338516235, "logps/chosen": -1.7097735404968262, "logps/rejected": -3.9053733348846436, "loss": 0.8161, "nll_loss": 0.7892626523971558, "rewards/accuracies": 0.875, "rewards/chosen": -0.17097735404968262, "rewards/margins": 0.21955999732017517, "rewards/rejected": -0.3905373513698578, "step": 2944 }, { "epoch": 8.062970568104038, "grad_norm": 4.785904884338379, "learning_rate": 5.967123287671233e-07, "log_odds_chosen": 1.2788825035095215, "log_odds_ratio": -0.37600141763687134, "logits/chosen": 0.8156141638755798, "logits/rejected": 0.8422861695289612, "logps/chosen": -2.2466049194335938, "logps/rejected": -3.438918113708496, "loss": 0.8088, "nll_loss": 0.7711748480796814, "rewards/accuracies": 0.75, "rewards/chosen": -0.22466051578521729, "rewards/margins": 0.11923132091760635, "rewards/rejected": -0.34389182925224304, "step": 2945 }, { "epoch": 8.06570841889117, "grad_norm": 3.9567782878875732, "learning_rate": 5.965753424657534e-07, "log_odds_chosen": 2.3841466903686523, "log_odds_ratio": -0.35594770312309265, "logits/chosen": 0.7834299206733704, "logits/rejected": 0.8155878782272339, "logps/chosen": -2.4022419452667236, "logps/rejected": -4.671010494232178, "loss": 0.7592, "nll_loss": 0.7236365079879761, "rewards/accuracies": 0.75, "rewards/chosen": -0.2402242124080658, "rewards/margins": 0.2268768697977066, "rewards/rejected": -0.4671010375022888, "step": 2946 }, { "epoch": 8.068446269678303, "grad_norm": 6.0865912437438965, "learning_rate": 5.964383561643835e-07, "log_odds_chosen": 1.3167914152145386, "log_odds_ratio": -0.43612873554229736, "logits/chosen": 0.896061897277832, "logits/rejected": 0.945399284362793, "logps/chosen": -2.100804090499878, "logps/rejected": -3.305147171020508, "loss": 0.7838, "nll_loss": 0.7401812076568604, "rewards/accuracies": 0.75, "rewards/chosen": -0.21008040010929108, "rewards/margins": 0.12043432891368866, "rewards/rejected": -0.33051472902297974, "step": 2947 }, { "epoch": 8.071184120465434, "grad_norm": 4.78485631942749, "learning_rate": 5.963013698630137e-07, "log_odds_chosen": 1.1271827220916748, "log_odds_ratio": -0.3821674585342407, "logits/chosen": 0.760338306427002, "logits/rejected": 0.7795171737670898, "logps/chosen": -2.7081236839294434, "logps/rejected": -3.7858519554138184, "loss": 0.9022, "nll_loss": 0.863953709602356, "rewards/accuracies": 0.875, "rewards/chosen": -0.27081239223480225, "rewards/margins": 0.1077728271484375, "rewards/rejected": -0.37858521938323975, "step": 2948 }, { "epoch": 8.073921971252567, "grad_norm": 4.260054588317871, "learning_rate": 5.961643835616438e-07, "log_odds_chosen": 0.705783486366272, "log_odds_ratio": -0.6505692601203918, "logits/chosen": 0.7543607354164124, "logits/rejected": 0.8458055257797241, "logps/chosen": -2.8354763984680176, "logps/rejected": -3.467313289642334, "loss": 0.8313, "nll_loss": 0.766211211681366, "rewards/accuracies": 0.875, "rewards/chosen": -0.28354763984680176, "rewards/margins": 0.06318368017673492, "rewards/rejected": -0.3467313349246979, "step": 2949 }, { "epoch": 8.0766598220397, "grad_norm": 3.711926221847534, "learning_rate": 5.960273972602739e-07, "log_odds_chosen": 2.4849531650543213, "log_odds_ratio": -0.13889215886592865, "logits/chosen": 0.9110336899757385, "logits/rejected": 0.9479974508285522, "logps/chosen": -1.897570013999939, "logps/rejected": -4.204570770263672, "loss": 0.7328, "nll_loss": 0.7189379930496216, "rewards/accuracies": 1.0, "rewards/chosen": -0.18975698947906494, "rewards/margins": 0.2307000756263733, "rewards/rejected": -0.42045706510543823, "step": 2950 }, { "epoch": 8.07939767282683, "grad_norm": 5.112411975860596, "learning_rate": 5.958904109589041e-07, "log_odds_chosen": 2.485954523086548, "log_odds_ratio": -0.38771289587020874, "logits/chosen": 0.7300738096237183, "logits/rejected": 0.8301892876625061, "logps/chosen": -2.6230483055114746, "logps/rejected": -5.014822959899902, "loss": 0.7668, "nll_loss": 0.7280191779136658, "rewards/accuracies": 0.875, "rewards/chosen": -0.2623048424720764, "rewards/margins": 0.23917749524116516, "rewards/rejected": -0.501482367515564, "step": 2951 }, { "epoch": 8.082135523613964, "grad_norm": 5.309886932373047, "learning_rate": 5.957534246575342e-07, "log_odds_chosen": 1.4659940004348755, "log_odds_ratio": -0.43824321031570435, "logits/chosen": 0.9409666061401367, "logits/rejected": 1.0295318365097046, "logps/chosen": -3.501018524169922, "logps/rejected": -4.919734001159668, "loss": 0.8548, "nll_loss": 0.8109873533248901, "rewards/accuracies": 0.625, "rewards/chosen": -0.35010185837745667, "rewards/margins": 0.14187157154083252, "rewards/rejected": -0.4919734001159668, "step": 2952 }, { "epoch": 8.084873374401095, "grad_norm": 6.118600845336914, "learning_rate": 5.956164383561644e-07, "log_odds_chosen": 1.5678590536117554, "log_odds_ratio": -0.3893182575702667, "logits/chosen": 0.8425774574279785, "logits/rejected": 0.8615187406539917, "logps/chosen": -2.891369104385376, "logps/rejected": -4.3773932456970215, "loss": 0.9473, "nll_loss": 0.9083679914474487, "rewards/accuracies": 0.75, "rewards/chosen": -0.2891369163990021, "rewards/margins": 0.14860239624977112, "rewards/rejected": -0.4377393126487732, "step": 2953 }, { "epoch": 8.087611225188228, "grad_norm": 7.206841468811035, "learning_rate": 5.954794520547944e-07, "log_odds_chosen": 1.825620174407959, "log_odds_ratio": -0.44651877880096436, "logits/chosen": 0.893659234046936, "logits/rejected": 0.8522850275039673, "logps/chosen": -3.4049103260040283, "logps/rejected": -5.129127502441406, "loss": 0.8672, "nll_loss": 0.8225085735321045, "rewards/accuracies": 0.75, "rewards/chosen": -0.34049108624458313, "rewards/margins": 0.17242173850536346, "rewards/rejected": -0.5129128098487854, "step": 2954 }, { "epoch": 8.09034907597536, "grad_norm": 4.979612350463867, "learning_rate": 5.953424657534246e-07, "log_odds_chosen": 0.6635697484016418, "log_odds_ratio": -0.5013158321380615, "logits/chosen": 0.7606201767921448, "logits/rejected": 0.7476900815963745, "logps/chosen": -2.698601722717285, "logps/rejected": -3.300136089324951, "loss": 0.8105, "nll_loss": 0.7603607177734375, "rewards/accuracies": 0.75, "rewards/chosen": -0.2698601484298706, "rewards/margins": 0.06015343964099884, "rewards/rejected": -0.33001360297203064, "step": 2955 }, { "epoch": 8.093086926762492, "grad_norm": 4.63942289352417, "learning_rate": 5.952054794520548e-07, "log_odds_chosen": 1.2973753213882446, "log_odds_ratio": -0.5129981637001038, "logits/chosen": 0.6164595484733582, "logits/rejected": 0.6320828199386597, "logps/chosen": -2.9251456260681152, "logps/rejected": -4.202200889587402, "loss": 0.8215, "nll_loss": 0.7702066898345947, "rewards/accuracies": 0.75, "rewards/chosen": -0.2925145626068115, "rewards/margins": 0.12770552933216095, "rewards/rejected": -0.4202200770378113, "step": 2956 }, { "epoch": 8.095824777549623, "grad_norm": 5.455239772796631, "learning_rate": 5.950684931506848e-07, "log_odds_chosen": 0.06357640027999878, "log_odds_ratio": -0.7110149264335632, "logits/chosen": 0.698539137840271, "logits/rejected": 0.6861187219619751, "logps/chosen": -2.0598630905151367, "logps/rejected": -2.1460142135620117, "loss": 0.8282, "nll_loss": 0.7571290731430054, "rewards/accuracies": 0.375, "rewards/chosen": -0.20598632097244263, "rewards/margins": 0.008615084923803806, "rewards/rejected": -0.21460142731666565, "step": 2957 }, { "epoch": 8.098562628336756, "grad_norm": 4.797145366668701, "learning_rate": 5.94931506849315e-07, "log_odds_chosen": 0.8936557769775391, "log_odds_ratio": -0.3863249719142914, "logits/chosen": 0.8425790667533875, "logits/rejected": 0.7984764575958252, "logps/chosen": -1.6616684198379517, "logps/rejected": -2.4352829456329346, "loss": 0.7744, "nll_loss": 0.7358022332191467, "rewards/accuracies": 0.875, "rewards/chosen": -0.16616684198379517, "rewards/margins": 0.07736144959926605, "rewards/rejected": -0.24352827668190002, "step": 2958 }, { "epoch": 8.101300479123887, "grad_norm": 5.2714762687683105, "learning_rate": 5.947945205479452e-07, "log_odds_chosen": 1.1018749475479126, "log_odds_ratio": -0.38855987787246704, "logits/chosen": 1.0400458574295044, "logits/rejected": 1.0062615871429443, "logps/chosen": -2.623019218444824, "logps/rejected": -3.651477813720703, "loss": 0.7819, "nll_loss": 0.7430934309959412, "rewards/accuracies": 0.875, "rewards/chosen": -0.2623019218444824, "rewards/margins": 0.10284584760665894, "rewards/rejected": -0.36514776945114136, "step": 2959 }, { "epoch": 8.10403832991102, "grad_norm": 4.7758989334106445, "learning_rate": 5.946575342465753e-07, "log_odds_chosen": 0.9577959775924683, "log_odds_ratio": -0.4580899178981781, "logits/chosen": 0.7667142152786255, "logits/rejected": 0.7815279960632324, "logps/chosen": -2.272451877593994, "logps/rejected": -3.141831398010254, "loss": 0.7308, "nll_loss": 0.6849702596664429, "rewards/accuracies": 0.875, "rewards/chosen": -0.22724519670009613, "rewards/margins": 0.08693794906139374, "rewards/rejected": -0.31418314576148987, "step": 2960 }, { "epoch": 8.106776180698152, "grad_norm": 5.599181175231934, "learning_rate": 5.945205479452054e-07, "log_odds_chosen": 1.870482325553894, "log_odds_ratio": -0.5739589929580688, "logits/chosen": 0.8594305515289307, "logits/rejected": 0.842178225517273, "logps/chosen": -2.7139463424682617, "logps/rejected": -4.500815391540527, "loss": 0.8218, "nll_loss": 0.7643888592720032, "rewards/accuracies": 0.875, "rewards/chosen": -0.27139464020729065, "rewards/margins": 0.17868690192699432, "rewards/rejected": -0.4500815272331238, "step": 2961 }, { "epoch": 8.109514031485284, "grad_norm": 4.677968978881836, "learning_rate": 5.943835616438356e-07, "log_odds_chosen": 2.0432426929473877, "log_odds_ratio": -0.1850469410419464, "logits/chosen": 0.9391596913337708, "logits/rejected": 0.9683769941329956, "logps/chosen": -2.082007646560669, "logps/rejected": -3.911801338195801, "loss": 0.709, "nll_loss": 0.6904811859130859, "rewards/accuracies": 1.0, "rewards/chosen": -0.20820075273513794, "rewards/margins": 0.18297940492630005, "rewards/rejected": -0.391180157661438, "step": 2962 }, { "epoch": 8.112251882272416, "grad_norm": 4.990067958831787, "learning_rate": 5.942465753424657e-07, "log_odds_chosen": 1.3598830699920654, "log_odds_ratio": -0.5242891907691956, "logits/chosen": 0.892096996307373, "logits/rejected": 0.9596484303474426, "logps/chosen": -2.285627841949463, "logps/rejected": -3.5648300647735596, "loss": 0.8197, "nll_loss": 0.7672949433326721, "rewards/accuracies": 0.75, "rewards/chosen": -0.22856280207633972, "rewards/margins": 0.12792019546031952, "rewards/rejected": -0.3564830422401428, "step": 2963 }, { "epoch": 8.114989733059549, "grad_norm": 4.755975723266602, "learning_rate": 5.941095890410958e-07, "log_odds_chosen": 1.8721895217895508, "log_odds_ratio": -0.39787545800209045, "logits/chosen": 1.1438360214233398, "logits/rejected": 1.1605916023254395, "logps/chosen": -2.2383980751037598, "logps/rejected": -3.992288112640381, "loss": 0.8091, "nll_loss": 0.7693291306495667, "rewards/accuracies": 0.875, "rewards/chosen": -0.22383983433246613, "rewards/margins": 0.17538897693157196, "rewards/rejected": -0.3992288112640381, "step": 2964 }, { "epoch": 8.11772758384668, "grad_norm": 5.483027935028076, "learning_rate": 5.93972602739726e-07, "log_odds_chosen": 1.7942888736724854, "log_odds_ratio": -0.23802773654460907, "logits/chosen": 1.0861036777496338, "logits/rejected": 1.044785737991333, "logps/chosen": -2.9397308826446533, "logps/rejected": -4.64738655090332, "loss": 0.797, "nll_loss": 0.773194432258606, "rewards/accuracies": 1.0, "rewards/chosen": -0.29397308826446533, "rewards/margins": 0.17076556384563446, "rewards/rejected": -0.464738667011261, "step": 2965 }, { "epoch": 8.120465434633813, "grad_norm": 6.6840667724609375, "learning_rate": 5.938356164383561e-07, "log_odds_chosen": 0.9894396066665649, "log_odds_ratio": -0.6325157284736633, "logits/chosen": 0.8785343766212463, "logits/rejected": 0.8788301348686218, "logps/chosen": -3.0452675819396973, "logps/rejected": -3.9411404132843018, "loss": 0.7727, "nll_loss": 0.7094969153404236, "rewards/accuracies": 0.875, "rewards/chosen": -0.30452677607536316, "rewards/margins": 0.08958727866411209, "rewards/rejected": -0.39411407709121704, "step": 2966 }, { "epoch": 8.123203285420944, "grad_norm": 4.417445659637451, "learning_rate": 5.936986301369863e-07, "log_odds_chosen": 1.7370027303695679, "log_odds_ratio": -0.29902711510658264, "logits/chosen": 0.5762490630149841, "logits/rejected": 0.5621262192726135, "logps/chosen": -1.6365450620651245, "logps/rejected": -3.2244908809661865, "loss": 0.8316, "nll_loss": 0.801705002784729, "rewards/accuracies": 0.875, "rewards/chosen": -0.16365450620651245, "rewards/margins": 0.1587945818901062, "rewards/rejected": -0.32244911789894104, "step": 2967 }, { "epoch": 8.125941136208077, "grad_norm": 3.604475736618042, "learning_rate": 5.935616438356164e-07, "log_odds_chosen": 3.0361366271972656, "log_odds_ratio": -0.15353693068027496, "logits/chosen": 1.046553373336792, "logits/rejected": 1.0318394899368286, "logps/chosen": -2.0706474781036377, "logps/rejected": -4.971887111663818, "loss": 0.7471, "nll_loss": 0.7317707538604736, "rewards/accuracies": 1.0, "rewards/chosen": -0.20706476271152496, "rewards/margins": 0.29012396931648254, "rewards/rejected": -0.4971887171268463, "step": 2968 }, { "epoch": 8.128678986995208, "grad_norm": 5.13166618347168, "learning_rate": 5.934246575342465e-07, "log_odds_chosen": 1.0837998390197754, "log_odds_ratio": -0.38087892532348633, "logits/chosen": 0.9445227384567261, "logits/rejected": 0.941460132598877, "logps/chosen": -2.6059212684631348, "logps/rejected": -3.615139961242676, "loss": 0.7981, "nll_loss": 0.7600328922271729, "rewards/accuracies": 0.875, "rewards/chosen": -0.26059216260910034, "rewards/margins": 0.1009218692779541, "rewards/rejected": -0.36151400208473206, "step": 2969 }, { "epoch": 8.131416837782341, "grad_norm": 4.458151817321777, "learning_rate": 5.932876712328767e-07, "log_odds_chosen": 1.3311347961425781, "log_odds_ratio": -0.5606022477149963, "logits/chosen": 0.7912023663520813, "logits/rejected": 0.8767410516738892, "logps/chosen": -2.2108566761016846, "logps/rejected": -3.4696719646453857, "loss": 0.8456, "nll_loss": 0.7895135879516602, "rewards/accuracies": 0.875, "rewards/chosen": -0.22108565270900726, "rewards/margins": 0.12588155269622803, "rewards/rejected": -0.3469672203063965, "step": 2970 }, { "epoch": 8.134154688569472, "grad_norm": 4.570126056671143, "learning_rate": 5.931506849315067e-07, "log_odds_chosen": 1.0070054531097412, "log_odds_ratio": -0.4325868487358093, "logits/chosen": 0.69112229347229, "logits/rejected": 0.6901713013648987, "logps/chosen": -2.50338077545166, "logps/rejected": -3.4247968196868896, "loss": 0.7515, "nll_loss": 0.7081918120384216, "rewards/accuracies": 0.875, "rewards/chosen": -0.250338077545166, "rewards/margins": 0.09214161336421967, "rewards/rejected": -0.3424797058105469, "step": 2971 }, { "epoch": 8.136892539356605, "grad_norm": 4.5724077224731445, "learning_rate": 5.930136986301369e-07, "log_odds_chosen": 1.6888015270233154, "log_odds_ratio": -0.3182962238788605, "logits/chosen": 0.890124499797821, "logits/rejected": 0.8440899848937988, "logps/chosen": -2.5255191326141357, "logps/rejected": -4.074889183044434, "loss": 0.8102, "nll_loss": 0.7783973813056946, "rewards/accuracies": 0.875, "rewards/chosen": -0.2525519132614136, "rewards/margins": 0.1549370288848877, "rewards/rejected": -0.40748894214630127, "step": 2972 }, { "epoch": 8.139630390143736, "grad_norm": 5.920516490936279, "learning_rate": 5.928767123287671e-07, "log_odds_chosen": 2.1150362491607666, "log_odds_ratio": -0.40124061703681946, "logits/chosen": 0.6262638568878174, "logits/rejected": 0.6618683338165283, "logps/chosen": -3.0526046752929688, "logps/rejected": -5.112010478973389, "loss": 0.7839, "nll_loss": 0.7437916994094849, "rewards/accuracies": 0.75, "rewards/chosen": -0.30526044964790344, "rewards/margins": 0.2059405893087387, "rewards/rejected": -0.511201024055481, "step": 2973 }, { "epoch": 8.14236824093087, "grad_norm": 4.644294261932373, "learning_rate": 5.927397260273972e-07, "log_odds_chosen": 1.3266514539718628, "log_odds_ratio": -0.3240585923194885, "logits/chosen": 0.8062141537666321, "logits/rejected": 0.8334541320800781, "logps/chosen": -1.7482569217681885, "logps/rejected": -2.936347007751465, "loss": 0.695, "nll_loss": 0.6626030802726746, "rewards/accuracies": 1.0, "rewards/chosen": -0.17482568323612213, "rewards/margins": 0.11880902945995331, "rewards/rejected": -0.29363471269607544, "step": 2974 }, { "epoch": 8.145106091718002, "grad_norm": 4.53352689743042, "learning_rate": 5.926027397260273e-07, "log_odds_chosen": 1.1310715675354004, "log_odds_ratio": -0.3751733899116516, "logits/chosen": 0.9302143454551697, "logits/rejected": 0.9987466931343079, "logps/chosen": -2.6912293434143066, "logps/rejected": -3.7656774520874023, "loss": 0.815, "nll_loss": 0.7775307893753052, "rewards/accuracies": 1.0, "rewards/chosen": -0.2691229283809662, "rewards/margins": 0.10744481533765793, "rewards/rejected": -0.3765677511692047, "step": 2975 }, { "epoch": 8.147843942505133, "grad_norm": 5.031406402587891, "learning_rate": 5.924657534246575e-07, "log_odds_chosen": 1.513902187347412, "log_odds_ratio": -0.32787004113197327, "logits/chosen": 0.9545236825942993, "logits/rejected": 1.013969898223877, "logps/chosen": -2.9366207122802734, "logps/rejected": -4.3532304763793945, "loss": 0.7197, "nll_loss": 0.6869189739227295, "rewards/accuracies": 1.0, "rewards/chosen": -0.29366207122802734, "rewards/margins": 0.14166100323200226, "rewards/rejected": -0.4353230893611908, "step": 2976 }, { "epoch": 8.150581793292266, "grad_norm": 5.292628765106201, "learning_rate": 5.923287671232876e-07, "log_odds_chosen": 0.864094078540802, "log_odds_ratio": -0.4017091989517212, "logits/chosen": 1.018560528755188, "logits/rejected": 1.0011491775512695, "logps/chosen": -1.800693392753601, "logps/rejected": -2.54355525970459, "loss": 0.7279, "nll_loss": 0.6877539753913879, "rewards/accuracies": 0.875, "rewards/chosen": -0.18006932735443115, "rewards/margins": 0.07428619265556335, "rewards/rejected": -0.2543555200099945, "step": 2977 }, { "epoch": 8.153319644079398, "grad_norm": 7.420287132263184, "learning_rate": 5.921917808219177e-07, "log_odds_chosen": 1.5350072383880615, "log_odds_ratio": -0.6182965040206909, "logits/chosen": 1.0327080488204956, "logits/rejected": 0.966217041015625, "logps/chosen": -3.14416766166687, "logps/rejected": -4.66790771484375, "loss": 0.9159, "nll_loss": 0.8540378212928772, "rewards/accuracies": 0.625, "rewards/chosen": -0.314416766166687, "rewards/margins": 0.1523740142583847, "rewards/rejected": -0.4667907953262329, "step": 2978 }, { "epoch": 8.15605749486653, "grad_norm": 5.706361293792725, "learning_rate": 5.920547945205479e-07, "log_odds_chosen": 1.4111870527267456, "log_odds_ratio": -0.5376802682876587, "logits/chosen": 1.0901970863342285, "logits/rejected": 1.1038920879364014, "logps/chosen": -2.8336806297302246, "logps/rejected": -4.217057228088379, "loss": 0.7828, "nll_loss": 0.729081928730011, "rewards/accuracies": 0.75, "rewards/chosen": -0.2833680808544159, "rewards/margins": 0.13833768665790558, "rewards/rejected": -0.42170578241348267, "step": 2979 }, { "epoch": 8.158795345653662, "grad_norm": 8.2557373046875, "learning_rate": 5.91917808219178e-07, "log_odds_chosen": 1.6632236242294312, "log_odds_ratio": -0.3634495139122009, "logits/chosen": 1.0661617517471313, "logits/rejected": 1.0123951435089111, "logps/chosen": -2.869673252105713, "logps/rejected": -4.470881938934326, "loss": 0.9789, "nll_loss": 0.9425089955329895, "rewards/accuracies": 0.875, "rewards/chosen": -0.28696733713150024, "rewards/margins": 0.16012084484100342, "rewards/rejected": -0.44708818197250366, "step": 2980 }, { "epoch": 8.161533196440795, "grad_norm": 6.319578170776367, "learning_rate": 5.917808219178083e-07, "log_odds_chosen": 1.2462841272354126, "log_odds_ratio": -0.5002633929252625, "logits/chosen": 0.9318146705627441, "logits/rejected": 0.93190598487854, "logps/chosen": -2.576836347579956, "logps/rejected": -3.7528905868530273, "loss": 0.7938, "nll_loss": 0.7437961101531982, "rewards/accuracies": 0.625, "rewards/chosen": -0.2576836347579956, "rewards/margins": 0.11760540306568146, "rewards/rejected": -0.37528905272483826, "step": 2981 }, { "epoch": 8.164271047227926, "grad_norm": 4.731475830078125, "learning_rate": 5.916438356164383e-07, "log_odds_chosen": 1.297238826751709, "log_odds_ratio": -0.4380098581314087, "logits/chosen": 0.7415922284126282, "logits/rejected": 0.7063189744949341, "logps/chosen": -2.6375603675842285, "logps/rejected": -3.8699488639831543, "loss": 0.8557, "nll_loss": 0.8118683695793152, "rewards/accuracies": 0.625, "rewards/chosen": -0.26375603675842285, "rewards/margins": 0.12323885411024094, "rewards/rejected": -0.3869948983192444, "step": 2982 }, { "epoch": 8.167008898015059, "grad_norm": 4.8993401527404785, "learning_rate": 5.915068493150684e-07, "log_odds_chosen": 1.5423436164855957, "log_odds_ratio": -0.30210208892822266, "logits/chosen": 0.9814810752868652, "logits/rejected": 1.06168532371521, "logps/chosen": -2.275740623474121, "logps/rejected": -3.749377489089966, "loss": 0.8005, "nll_loss": 0.770312488079071, "rewards/accuracies": 1.0, "rewards/chosen": -0.22757408022880554, "rewards/margins": 0.14736367762088776, "rewards/rejected": -0.3749377727508545, "step": 2983 }, { "epoch": 8.16974674880219, "grad_norm": 4.369385719299316, "learning_rate": 5.913698630136987e-07, "log_odds_chosen": 1.7880752086639404, "log_odds_ratio": -0.4112493395805359, "logits/chosen": 0.9841498136520386, "logits/rejected": 1.0500377416610718, "logps/chosen": -2.896148681640625, "logps/rejected": -4.639996528625488, "loss": 0.7891, "nll_loss": 0.747940719127655, "rewards/accuracies": 0.75, "rewards/chosen": -0.28961485624313354, "rewards/margins": 0.17438483238220215, "rewards/rejected": -0.4639996886253357, "step": 2984 }, { "epoch": 8.172484599589323, "grad_norm": 3.9464056491851807, "learning_rate": 5.912328767123286e-07, "log_odds_chosen": 0.8627318143844604, "log_odds_ratio": -0.4814281165599823, "logits/chosen": 0.8189317584037781, "logits/rejected": 0.8144104480743408, "logps/chosen": -2.155325174331665, "logps/rejected": -2.9364659786224365, "loss": 0.768, "nll_loss": 0.7198989391326904, "rewards/accuracies": 0.75, "rewards/chosen": -0.21553251147270203, "rewards/margins": 0.07811406999826431, "rewards/rejected": -0.29364657402038574, "step": 2985 }, { "epoch": 8.175222450376454, "grad_norm": 4.49507474899292, "learning_rate": 5.910958904109589e-07, "log_odds_chosen": 1.4141775369644165, "log_odds_ratio": -0.40330541133880615, "logits/chosen": 0.7263683676719666, "logits/rejected": 0.7434173822402954, "logps/chosen": -1.9059925079345703, "logps/rejected": -3.1962053775787354, "loss": 0.8757, "nll_loss": 0.8354079723358154, "rewards/accuracies": 0.875, "rewards/chosen": -0.190599262714386, "rewards/margins": 0.12902125716209412, "rewards/rejected": -0.3196205198764801, "step": 2986 }, { "epoch": 8.177960301163587, "grad_norm": 5.826785087585449, "learning_rate": 5.909589041095891e-07, "log_odds_chosen": 1.760794758796692, "log_odds_ratio": -0.285807728767395, "logits/chosen": 0.7418403625488281, "logits/rejected": 0.6447702050209045, "logps/chosen": -2.19320011138916, "logps/rejected": -3.8418197631835938, "loss": 0.7612, "nll_loss": 0.7325984835624695, "rewards/accuracies": 1.0, "rewards/chosen": -0.21932002902030945, "rewards/margins": 0.16486194729804993, "rewards/rejected": -0.3841819763183594, "step": 2987 }, { "epoch": 8.180698151950718, "grad_norm": 4.357940673828125, "learning_rate": 5.908219178082192e-07, "log_odds_chosen": 1.7292687892913818, "log_odds_ratio": -0.32646647095680237, "logits/chosen": 0.7545416355133057, "logits/rejected": 0.788533091545105, "logps/chosen": -2.265376567840576, "logps/rejected": -3.8902082443237305, "loss": 0.7863, "nll_loss": 0.753629744052887, "rewards/accuracies": 0.875, "rewards/chosen": -0.22653764486312866, "rewards/margins": 0.16248318552970886, "rewards/rejected": -0.3890208601951599, "step": 2988 }, { "epoch": 8.183436002737851, "grad_norm": 5.164045333862305, "learning_rate": 5.906849315068493e-07, "log_odds_chosen": 1.3198115825653076, "log_odds_ratio": -0.3197367787361145, "logits/chosen": 0.8869919180870056, "logits/rejected": 0.9137017130851746, "logps/chosen": -2.2659027576446533, "logps/rejected": -3.4845633506774902, "loss": 0.747, "nll_loss": 0.7150441408157349, "rewards/accuracies": 1.0, "rewards/chosen": -0.22659027576446533, "rewards/margins": 0.12186606973409653, "rewards/rejected": -0.34845635294914246, "step": 2989 }, { "epoch": 8.186173853524982, "grad_norm": 7.172316074371338, "learning_rate": 5.905479452054795e-07, "log_odds_chosen": 1.8215460777282715, "log_odds_ratio": -0.5879685282707214, "logits/chosen": 0.7608284950256348, "logits/rejected": 0.7720062136650085, "logps/chosen": -2.642428159713745, "logps/rejected": -4.357791900634766, "loss": 0.8214, "nll_loss": 0.7625551819801331, "rewards/accuracies": 0.75, "rewards/chosen": -0.2642427980899811, "rewards/margins": 0.171536386013031, "rewards/rejected": -0.4357792139053345, "step": 2990 }, { "epoch": 8.188911704312115, "grad_norm": 5.102231979370117, "learning_rate": 5.904109589041096e-07, "log_odds_chosen": 1.7147729396820068, "log_odds_ratio": -0.34167543053627014, "logits/chosen": 0.7965062260627747, "logits/rejected": 0.6930382251739502, "logps/chosen": -2.375159740447998, "logps/rejected": -4.006289958953857, "loss": 0.922, "nll_loss": 0.887876570224762, "rewards/accuracies": 0.875, "rewards/chosen": -0.23751597106456757, "rewards/margins": 0.1631130427122116, "rewards/rejected": -0.4006290137767792, "step": 2991 }, { "epoch": 8.191649555099247, "grad_norm": 4.0932464599609375, "learning_rate": 5.902739726027397e-07, "log_odds_chosen": 2.321959972381592, "log_odds_ratio": -0.24210338294506073, "logits/chosen": 0.731429934501648, "logits/rejected": 0.7674852609634399, "logps/chosen": -2.3910467624664307, "logps/rejected": -4.6142683029174805, "loss": 0.8662, "nll_loss": 0.8420106172561646, "rewards/accuracies": 1.0, "rewards/chosen": -0.23910468816757202, "rewards/margins": 0.22232216596603394, "rewards/rejected": -0.46142688393592834, "step": 2992 }, { "epoch": 8.19438740588638, "grad_norm": 4.199321746826172, "learning_rate": 5.901369863013699e-07, "log_odds_chosen": 1.8423957824707031, "log_odds_ratio": -0.23562614619731903, "logits/chosen": 0.8815792798995972, "logits/rejected": 0.9505623579025269, "logps/chosen": -2.2944328784942627, "logps/rejected": -3.970102310180664, "loss": 0.8108, "nll_loss": 0.7871913313865662, "rewards/accuracies": 0.875, "rewards/chosen": -0.22944329679012299, "rewards/margins": 0.1675669401884079, "rewards/rejected": -0.3970102071762085, "step": 2993 }, { "epoch": 8.19712525667351, "grad_norm": 4.694896697998047, "learning_rate": 5.9e-07, "log_odds_chosen": 2.533698558807373, "log_odds_ratio": -0.27750325202941895, "logits/chosen": 0.6871851086616516, "logits/rejected": 0.6958956718444824, "logps/chosen": -1.702016830444336, "logps/rejected": -4.068077087402344, "loss": 0.7713, "nll_loss": 0.7435370683670044, "rewards/accuracies": 1.0, "rewards/chosen": -0.17020168900489807, "rewards/margins": 0.23660603165626526, "rewards/rejected": -0.40680772066116333, "step": 2994 }, { "epoch": 8.199863107460644, "grad_norm": 5.942811012268066, "learning_rate": 5.898630136986302e-07, "log_odds_chosen": 0.5974750518798828, "log_odds_ratio": -0.7062177062034607, "logits/chosen": 0.7837628126144409, "logits/rejected": 0.7838997840881348, "logps/chosen": -2.6916327476501465, "logps/rejected": -3.261353015899658, "loss": 0.8024, "nll_loss": 0.7317588329315186, "rewards/accuracies": 0.5, "rewards/chosen": -0.26916325092315674, "rewards/margins": 0.05697202309966087, "rewards/rejected": -0.3261353075504303, "step": 2995 }, { "epoch": 8.202600958247775, "grad_norm": 4.362284183502197, "learning_rate": 5.897260273972603e-07, "log_odds_chosen": 1.0754923820495605, "log_odds_ratio": -0.3279043138027191, "logits/chosen": 0.7888676524162292, "logits/rejected": 0.7329641580581665, "logps/chosen": -2.0876684188842773, "logps/rejected": -3.076524257659912, "loss": 0.7872, "nll_loss": 0.7543886303901672, "rewards/accuracies": 1.0, "rewards/chosen": -0.2087668478488922, "rewards/margins": 0.09888558089733124, "rewards/rejected": -0.30765244364738464, "step": 2996 }, { "epoch": 8.205338809034908, "grad_norm": 5.620078086853027, "learning_rate": 5.895890410958904e-07, "log_odds_chosen": 0.7910903096199036, "log_odds_ratio": -0.5033668279647827, "logits/chosen": 0.6177152395248413, "logits/rejected": 0.5625433921813965, "logps/chosen": -2.443937063217163, "logps/rejected": -3.1773219108581543, "loss": 0.9456, "nll_loss": 0.8952682018280029, "rewards/accuracies": 0.625, "rewards/chosen": -0.2443937063217163, "rewards/margins": 0.07333848625421524, "rewards/rejected": -0.31773218512535095, "step": 2997 }, { "epoch": 8.208076659822039, "grad_norm": 5.760426044464111, "learning_rate": 5.894520547945206e-07, "log_odds_chosen": 0.7604113817214966, "log_odds_ratio": -0.6502221822738647, "logits/chosen": 0.6639947891235352, "logits/rejected": 0.6135058403015137, "logps/chosen": -2.9473109245300293, "logps/rejected": -3.681739330291748, "loss": 0.8261, "nll_loss": 0.7611003518104553, "rewards/accuracies": 0.75, "rewards/chosen": -0.294731080532074, "rewards/margins": 0.07344284653663635, "rewards/rejected": -0.3681739568710327, "step": 2998 }, { "epoch": 8.210814510609172, "grad_norm": 4.542555332183838, "learning_rate": 5.893150684931507e-07, "log_odds_chosen": 1.2944446802139282, "log_odds_ratio": -0.37781697511672974, "logits/chosen": 0.7487121224403381, "logits/rejected": 0.7738856673240662, "logps/chosen": -2.733996868133545, "logps/rejected": -3.979790687561035, "loss": 0.8072, "nll_loss": 0.7694235444068909, "rewards/accuracies": 0.875, "rewards/chosen": -0.27339968085289, "rewards/margins": 0.12457937002182007, "rewards/rejected": -0.39797908067703247, "step": 2999 }, { "epoch": 8.213552361396303, "grad_norm": 3.9616057872772217, "learning_rate": 5.891780821917808e-07, "log_odds_chosen": 1.767175555229187, "log_odds_ratio": -0.24727770686149597, "logits/chosen": 0.9893690943717957, "logits/rejected": 1.029681921005249, "logps/chosen": -1.955714225769043, "logps/rejected": -3.553420066833496, "loss": 0.7005, "nll_loss": 0.6757338047027588, "rewards/accuracies": 1.0, "rewards/chosen": -0.1955714374780655, "rewards/margins": 0.15977056324481964, "rewards/rejected": -0.35534200072288513, "step": 3000 }, { "epoch": 8.216290212183436, "grad_norm": 3.9768924713134766, "learning_rate": 5.89041095890411e-07, "log_odds_chosen": 2.299576759338379, "log_odds_ratio": -0.23521959781646729, "logits/chosen": 0.9464870095252991, "logits/rejected": 0.9976441860198975, "logps/chosen": -2.3861308097839355, "logps/rejected": -4.590494155883789, "loss": 0.661, "nll_loss": 0.6374514698982239, "rewards/accuracies": 0.875, "rewards/chosen": -0.238613098859787, "rewards/margins": 0.22043633460998535, "rewards/rejected": -0.45904940366744995, "step": 3001 }, { "epoch": 8.219028062970569, "grad_norm": 4.495428562164307, "learning_rate": 5.88904109589041e-07, "log_odds_chosen": 0.8954278230667114, "log_odds_ratio": -0.404937207698822, "logits/chosen": 0.6811748147010803, "logits/rejected": 0.7311401963233948, "logps/chosen": -2.984645128250122, "logps/rejected": -3.8260350227355957, "loss": 0.978, "nll_loss": 0.9375238418579102, "rewards/accuracies": 1.0, "rewards/chosen": -0.29846450686454773, "rewards/margins": 0.08413900434970856, "rewards/rejected": -0.3826034963130951, "step": 3002 }, { "epoch": 8.2217659137577, "grad_norm": 4.85382080078125, "learning_rate": 5.887671232876712e-07, "log_odds_chosen": 0.7722988724708557, "log_odds_ratio": -0.6705282926559448, "logits/chosen": 1.0162688493728638, "logits/rejected": 1.1577682495117188, "logps/chosen": -3.041538953781128, "logps/rejected": -3.7943055629730225, "loss": 0.791, "nll_loss": 0.7239870429039001, "rewards/accuracies": 0.625, "rewards/chosen": -0.3041539192199707, "rewards/margins": 0.07527664303779602, "rewards/rejected": -0.3794305622577667, "step": 3003 }, { "epoch": 8.224503764544833, "grad_norm": 3.650797128677368, "learning_rate": 5.886301369863014e-07, "log_odds_chosen": 1.8605129718780518, "log_odds_ratio": -0.18677359819412231, "logits/chosen": 0.9185752868652344, "logits/rejected": 0.9818074107170105, "logps/chosen": -2.4429166316986084, "logps/rejected": -4.19559383392334, "loss": 0.7184, "nll_loss": 0.6997411251068115, "rewards/accuracies": 1.0, "rewards/chosen": -0.24429166316986084, "rewards/margins": 0.17526772618293762, "rewards/rejected": -0.4195593595504761, "step": 3004 }, { "epoch": 8.227241615331964, "grad_norm": 4.763111114501953, "learning_rate": 5.884931506849315e-07, "log_odds_chosen": 1.094557523727417, "log_odds_ratio": -0.46186840534210205, "logits/chosen": 1.0065096616744995, "logits/rejected": 0.9637640714645386, "logps/chosen": -2.1077187061309814, "logps/rejected": -3.0713179111480713, "loss": 0.7923, "nll_loss": 0.7461411952972412, "rewards/accuracies": 0.875, "rewards/chosen": -0.21077187359333038, "rewards/margins": 0.09635992348194122, "rewards/rejected": -0.3071317970752716, "step": 3005 }, { "epoch": 8.229979466119097, "grad_norm": 4.556416034698486, "learning_rate": 5.883561643835616e-07, "log_odds_chosen": 1.6736457347869873, "log_odds_ratio": -0.32871580123901367, "logits/chosen": 0.8901461362838745, "logits/rejected": 0.9741151332855225, "logps/chosen": -2.7978899478912354, "logps/rejected": -4.423384666442871, "loss": 0.7089, "nll_loss": 0.6760426759719849, "rewards/accuracies": 0.75, "rewards/chosen": -0.279789000749588, "rewards/margins": 0.1625494807958603, "rewards/rejected": -0.4423384666442871, "step": 3006 }, { "epoch": 8.232717316906228, "grad_norm": 4.274418830871582, "learning_rate": 5.882191780821918e-07, "log_odds_chosen": 2.930532455444336, "log_odds_ratio": -0.18347401916980743, "logits/chosen": 0.8250740766525269, "logits/rejected": 0.8681354522705078, "logps/chosen": -2.4505434036254883, "logps/rejected": -5.300808906555176, "loss": 0.8055, "nll_loss": 0.7871627807617188, "rewards/accuracies": 1.0, "rewards/chosen": -0.24505434930324554, "rewards/margins": 0.28502655029296875, "rewards/rejected": -0.5300808548927307, "step": 3007 }, { "epoch": 8.235455167693361, "grad_norm": 4.338803291320801, "learning_rate": 5.880821917808219e-07, "log_odds_chosen": 0.7985994815826416, "log_odds_ratio": -0.4788492023944855, "logits/chosen": 0.680381178855896, "logits/rejected": 0.639635443687439, "logps/chosen": -1.9724515676498413, "logps/rejected": -2.6876094341278076, "loss": 0.8424, "nll_loss": 0.7945359945297241, "rewards/accuracies": 0.875, "rewards/chosen": -0.19724515080451965, "rewards/margins": 0.07151578366756439, "rewards/rejected": -0.26876091957092285, "step": 3008 }, { "epoch": 8.238193018480493, "grad_norm": 4.662453651428223, "learning_rate": 5.879452054794521e-07, "log_odds_chosen": 2.493887424468994, "log_odds_ratio": -0.22858627140522003, "logits/chosen": 1.070146083831787, "logits/rejected": 1.091301441192627, "logps/chosen": -1.9125027656555176, "logps/rejected": -4.283426284790039, "loss": 0.6543, "nll_loss": 0.6314557790756226, "rewards/accuracies": 1.0, "rewards/chosen": -0.1912502646446228, "rewards/margins": 0.23709239065647125, "rewards/rejected": -0.42834264039993286, "step": 3009 }, { "epoch": 8.240930869267626, "grad_norm": 4.726446151733398, "learning_rate": 5.878082191780822e-07, "log_odds_chosen": 0.8409188985824585, "log_odds_ratio": -0.41740620136260986, "logits/chosen": 0.7291065454483032, "logits/rejected": 0.695044755935669, "logps/chosen": -2.0888662338256836, "logps/rejected": -2.8653125762939453, "loss": 0.8477, "nll_loss": 0.8059373497962952, "rewards/accuracies": 0.875, "rewards/chosen": -0.20888663828372955, "rewards/margins": 0.07764460891485214, "rewards/rejected": -0.2865312397480011, "step": 3010 }, { "epoch": 8.243668720054757, "grad_norm": 4.560963153839111, "learning_rate": 5.876712328767123e-07, "log_odds_chosen": 1.472666621208191, "log_odds_ratio": -0.36568161845207214, "logits/chosen": 1.027491807937622, "logits/rejected": 1.0623910427093506, "logps/chosen": -1.913996696472168, "logps/rejected": -3.3018155097961426, "loss": 0.6834, "nll_loss": 0.6468778252601624, "rewards/accuracies": 0.75, "rewards/chosen": -0.1913996785879135, "rewards/margins": 0.13878189027309418, "rewards/rejected": -0.3301815688610077, "step": 3011 }, { "epoch": 8.24640657084189, "grad_norm": 6.873880386352539, "learning_rate": 5.875342465753425e-07, "log_odds_chosen": 0.9896604418754578, "log_odds_ratio": -0.48497867584228516, "logits/chosen": 1.0138347148895264, "logits/rejected": 1.081339716911316, "logps/chosen": -3.0466437339782715, "logps/rejected": -3.963310718536377, "loss": 0.6836, "nll_loss": 0.6351206302642822, "rewards/accuracies": 0.75, "rewards/chosen": -0.30466437339782715, "rewards/margins": 0.09166672080755234, "rewards/rejected": -0.3963311016559601, "step": 3012 }, { "epoch": 8.24914442162902, "grad_norm": 6.393824100494385, "learning_rate": 5.873972602739726e-07, "log_odds_chosen": 1.1018787622451782, "log_odds_ratio": -0.602323591709137, "logits/chosen": 0.7756847739219666, "logits/rejected": 0.8012381196022034, "logps/chosen": -2.4627737998962402, "logps/rejected": -3.5009238719940186, "loss": 0.7899, "nll_loss": 0.7296890616416931, "rewards/accuracies": 0.875, "rewards/chosen": -0.2462773621082306, "rewards/margins": 0.1038149893283844, "rewards/rejected": -0.3500923812389374, "step": 3013 }, { "epoch": 8.251882272416154, "grad_norm": 4.9447503089904785, "learning_rate": 5.872602739726027e-07, "log_odds_chosen": 1.9251201152801514, "log_odds_ratio": -0.3391197621822357, "logits/chosen": 0.9553696513175964, "logits/rejected": 0.9595475792884827, "logps/chosen": -2.2656259536743164, "logps/rejected": -4.081357002258301, "loss": 0.828, "nll_loss": 0.7940600514411926, "rewards/accuracies": 0.875, "rewards/chosen": -0.22656257450580597, "rewards/margins": 0.18157310783863068, "rewards/rejected": -0.40813568234443665, "step": 3014 }, { "epoch": 8.254620123203285, "grad_norm": 4.5920515060424805, "learning_rate": 5.871232876712329e-07, "log_odds_chosen": 2.9683732986450195, "log_odds_ratio": -0.13403922319412231, "logits/chosen": 0.9257215261459351, "logits/rejected": 0.9301630854606628, "logps/chosen": -2.056128978729248, "logps/rejected": -4.8812642097473145, "loss": 0.7049, "nll_loss": 0.6914790868759155, "rewards/accuracies": 1.0, "rewards/chosen": -0.205612912774086, "rewards/margins": 0.28251349925994873, "rewards/rejected": -0.4881264269351959, "step": 3015 }, { "epoch": 8.257357973990418, "grad_norm": 4.656325817108154, "learning_rate": 5.869863013698629e-07, "log_odds_chosen": 2.9160146713256836, "log_odds_ratio": -0.23318824172019958, "logits/chosen": 0.8186264634132385, "logits/rejected": 0.8100296854972839, "logps/chosen": -2.2134056091308594, "logps/rejected": -5.022284507751465, "loss": 0.7638, "nll_loss": 0.7404729127883911, "rewards/accuracies": 0.875, "rewards/chosen": -0.22134056687355042, "rewards/margins": 0.2808878719806671, "rewards/rejected": -0.5022284388542175, "step": 3016 }, { "epoch": 8.260095824777549, "grad_norm": 4.6486496925354, "learning_rate": 5.868493150684931e-07, "log_odds_chosen": 2.4427969455718994, "log_odds_ratio": -0.25050902366638184, "logits/chosen": 0.9432145357131958, "logits/rejected": 0.9789023399353027, "logps/chosen": -2.2161221504211426, "logps/rejected": -4.465184211730957, "loss": 0.7485, "nll_loss": 0.7234804630279541, "rewards/accuracies": 0.875, "rewards/chosen": -0.22161221504211426, "rewards/margins": 0.22490623593330383, "rewards/rejected": -0.4465184211730957, "step": 3017 }, { "epoch": 8.262833675564682, "grad_norm": 4.195676803588867, "learning_rate": 5.867123287671233e-07, "log_odds_chosen": 1.6349995136260986, "log_odds_ratio": -0.2758576273918152, "logits/chosen": 0.8339036703109741, "logits/rejected": 0.8709702491760254, "logps/chosen": -2.261837959289551, "logps/rejected": -3.8189969062805176, "loss": 0.7105, "nll_loss": 0.6829460263252258, "rewards/accuracies": 0.875, "rewards/chosen": -0.22618380188941956, "rewards/margins": 0.1557159125804901, "rewards/rejected": -0.38189971446990967, "step": 3018 }, { "epoch": 8.265571526351813, "grad_norm": 6.1106038093566895, "learning_rate": 5.865753424657534e-07, "log_odds_chosen": 0.621134877204895, "log_odds_ratio": -0.5884823799133301, "logits/chosen": 0.8751068711280823, "logits/rejected": 0.8662265539169312, "logps/chosen": -2.3025333881378174, "logps/rejected": -2.880824565887451, "loss": 0.8338, "nll_loss": 0.7749462127685547, "rewards/accuracies": 0.625, "rewards/chosen": -0.23025335371494293, "rewards/margins": 0.057829100638628006, "rewards/rejected": -0.28808245062828064, "step": 3019 }, { "epoch": 8.268309377138946, "grad_norm": 4.471856594085693, "learning_rate": 5.864383561643835e-07, "log_odds_chosen": 2.3641715049743652, "log_odds_ratio": -0.2389611005783081, "logits/chosen": 1.096037745475769, "logits/rejected": 1.047787070274353, "logps/chosen": -2.308328628540039, "logps/rejected": -4.586848258972168, "loss": 0.7722, "nll_loss": 0.7483316659927368, "rewards/accuracies": 1.0, "rewards/chosen": -0.23083284497261047, "rewards/margins": 0.22785192728042603, "rewards/rejected": -0.4586847722530365, "step": 3020 }, { "epoch": 8.271047227926077, "grad_norm": 5.109471321105957, "learning_rate": 5.863013698630137e-07, "log_odds_chosen": 2.0468320846557617, "log_odds_ratio": -0.28275635838508606, "logits/chosen": 0.8406634330749512, "logits/rejected": 0.8689490556716919, "logps/chosen": -2.5796308517456055, "logps/rejected": -4.559586524963379, "loss": 0.8848, "nll_loss": 0.8565630912780762, "rewards/accuracies": 0.875, "rewards/chosen": -0.25796306133270264, "rewards/margins": 0.1979956030845642, "rewards/rejected": -0.45595866441726685, "step": 3021 }, { "epoch": 8.27378507871321, "grad_norm": 3.9539341926574707, "learning_rate": 5.861643835616438e-07, "log_odds_chosen": 1.9100934267044067, "log_odds_ratio": -0.2492361068725586, "logits/chosen": 0.7346903085708618, "logits/rejected": 0.7434706091880798, "logps/chosen": -2.5688095092773438, "logps/rejected": -4.412994384765625, "loss": 0.7822, "nll_loss": 0.7572372555732727, "rewards/accuracies": 1.0, "rewards/chosen": -0.2568809390068054, "rewards/margins": 0.18441849946975708, "rewards/rejected": -0.4412994384765625, "step": 3022 }, { "epoch": 8.276522929500342, "grad_norm": 4.152256011962891, "learning_rate": 5.860273972602739e-07, "log_odds_chosen": 1.9673871994018555, "log_odds_ratio": -0.263393759727478, "logits/chosen": 0.5886716246604919, "logits/rejected": 0.5634686350822449, "logps/chosen": -1.8204021453857422, "logps/rejected": -3.6213316917419434, "loss": 0.8223, "nll_loss": 0.7959113121032715, "rewards/accuracies": 1.0, "rewards/chosen": -0.18204021453857422, "rewards/margins": 0.1800929754972458, "rewards/rejected": -0.3621332049369812, "step": 3023 }, { "epoch": 8.279260780287474, "grad_norm": 4.037786960601807, "learning_rate": 5.858904109589041e-07, "log_odds_chosen": 1.9715502262115479, "log_odds_ratio": -0.3280319571495056, "logits/chosen": 0.6217803955078125, "logits/rejected": 0.6824820637702942, "logps/chosen": -2.2958152294158936, "logps/rejected": -4.157341957092285, "loss": 0.8774, "nll_loss": 0.8445613384246826, "rewards/accuracies": 0.875, "rewards/chosen": -0.2295815348625183, "rewards/margins": 0.1861526370048523, "rewards/rejected": -0.4157341718673706, "step": 3024 }, { "epoch": 8.281998631074606, "grad_norm": 4.743382930755615, "learning_rate": 5.857534246575342e-07, "log_odds_chosen": 1.0433508157730103, "log_odds_ratio": -0.42270317673683167, "logits/chosen": 0.9114940166473389, "logits/rejected": 0.9305267333984375, "logps/chosen": -2.691542863845825, "logps/rejected": -3.682180166244507, "loss": 0.768, "nll_loss": 0.7257170081138611, "rewards/accuracies": 0.875, "rewards/chosen": -0.26915428042411804, "rewards/margins": 0.09906372427940369, "rewards/rejected": -0.36821800470352173, "step": 3025 }, { "epoch": 8.284736481861739, "grad_norm": 5.904750823974609, "learning_rate": 5.856164383561644e-07, "log_odds_chosen": 1.1558599472045898, "log_odds_ratio": -0.462541401386261, "logits/chosen": 1.107001543045044, "logits/rejected": 1.1916707754135132, "logps/chosen": -3.454326629638672, "logps/rejected": -4.587442398071289, "loss": 0.7763, "nll_loss": 0.7300125360488892, "rewards/accuracies": 0.75, "rewards/chosen": -0.34543269872665405, "rewards/margins": 0.11331158876419067, "rewards/rejected": -0.45874425768852234, "step": 3026 }, { "epoch": 8.28747433264887, "grad_norm": 4.336629867553711, "learning_rate": 5.854794520547945e-07, "log_odds_chosen": 2.3053083419799805, "log_odds_ratio": -0.2355966866016388, "logits/chosen": 1.0750069618225098, "logits/rejected": 1.1269841194152832, "logps/chosen": -3.029823064804077, "logps/rejected": -5.288558006286621, "loss": 0.8149, "nll_loss": 0.7913782000541687, "rewards/accuracies": 0.875, "rewards/chosen": -0.30298230051994324, "rewards/margins": 0.22587351500988007, "rewards/rejected": -0.5288558006286621, "step": 3027 }, { "epoch": 8.290212183436003, "grad_norm": 4.657855033874512, "learning_rate": 5.853424657534246e-07, "log_odds_chosen": 2.408268928527832, "log_odds_ratio": -0.20120874047279358, "logits/chosen": 0.8513889908790588, "logits/rejected": 0.8749730587005615, "logps/chosen": -2.4619975090026855, "logps/rejected": -4.741182804107666, "loss": 0.7397, "nll_loss": 0.7196090221405029, "rewards/accuracies": 1.0, "rewards/chosen": -0.24619975686073303, "rewards/margins": 0.22791849076747894, "rewards/rejected": -0.47411826252937317, "step": 3028 }, { "epoch": 8.292950034223136, "grad_norm": 4.54287576675415, "learning_rate": 5.852054794520548e-07, "log_odds_chosen": 2.515726327896118, "log_odds_ratio": -0.2113131284713745, "logits/chosen": 0.9857548475265503, "logits/rejected": 1.096144199371338, "logps/chosen": -2.487200975418091, "logps/rejected": -4.921970367431641, "loss": 0.7535, "nll_loss": 0.7323499917984009, "rewards/accuracies": 1.0, "rewards/chosen": -0.24872010946273804, "rewards/margins": 0.24347692728042603, "rewards/rejected": -0.49219703674316406, "step": 3029 }, { "epoch": 8.295687885010267, "grad_norm": 4.269098281860352, "learning_rate": 5.850684931506848e-07, "log_odds_chosen": 1.842386245727539, "log_odds_ratio": -0.35905277729034424, "logits/chosen": 0.8328631520271301, "logits/rejected": 0.788798451423645, "logps/chosen": -1.6368181705474854, "logps/rejected": -3.3551676273345947, "loss": 0.8334, "nll_loss": 0.7974720597267151, "rewards/accuracies": 1.0, "rewards/chosen": -0.16368183493614197, "rewards/margins": 0.17183491587638855, "rewards/rejected": -0.3355167508125305, "step": 3030 }, { "epoch": 8.2984257357974, "grad_norm": 4.198888301849365, "learning_rate": 5.84931506849315e-07, "log_odds_chosen": 1.7036232948303223, "log_odds_ratio": -0.2596980035305023, "logits/chosen": 1.0605833530426025, "logits/rejected": 1.0846461057662964, "logps/chosen": -2.347107410430908, "logps/rejected": -3.9425368309020996, "loss": 0.7726, "nll_loss": 0.746620237827301, "rewards/accuracies": 1.0, "rewards/chosen": -0.23471075296401978, "rewards/margins": 0.1595429629087448, "rewards/rejected": -0.3942537009716034, "step": 3031 }, { "epoch": 8.301163586584531, "grad_norm": 5.95847749710083, "learning_rate": 5.847945205479452e-07, "log_odds_chosen": 2.1034271717071533, "log_odds_ratio": -0.3468787372112274, "logits/chosen": 0.9591823816299438, "logits/rejected": 0.8945977687835693, "logps/chosen": -2.0993072986602783, "logps/rejected": -3.988369941711426, "loss": 0.7539, "nll_loss": 0.7192310094833374, "rewards/accuracies": 0.875, "rewards/chosen": -0.20993074774742126, "rewards/margins": 0.18890626728534698, "rewards/rejected": -0.39883697032928467, "step": 3032 }, { "epoch": 8.303901437371664, "grad_norm": 4.257450580596924, "learning_rate": 5.846575342465753e-07, "log_odds_chosen": 1.411902904510498, "log_odds_ratio": -0.2988240122795105, "logits/chosen": 0.9684176445007324, "logits/rejected": 0.9524875283241272, "logps/chosen": -2.0583972930908203, "logps/rejected": -3.3424370288848877, "loss": 0.7435, "nll_loss": 0.7136099338531494, "rewards/accuracies": 1.0, "rewards/chosen": -0.20583972334861755, "rewards/margins": 0.12840399146080017, "rewards/rejected": -0.3342437148094177, "step": 3033 }, { "epoch": 8.306639288158795, "grad_norm": 5.4961748123168945, "learning_rate": 5.845205479452054e-07, "log_odds_chosen": 1.274613857269287, "log_odds_ratio": -0.43792590498924255, "logits/chosen": 0.8832208514213562, "logits/rejected": 0.9264684915542603, "logps/chosen": -2.221088171005249, "logps/rejected": -3.394224166870117, "loss": 0.6947, "nll_loss": 0.6509217619895935, "rewards/accuracies": 0.75, "rewards/chosen": -0.22210881114006042, "rewards/margins": 0.11731360852718353, "rewards/rejected": -0.33942240476608276, "step": 3034 }, { "epoch": 8.309377138945928, "grad_norm": 6.556037902832031, "learning_rate": 5.843835616438356e-07, "log_odds_chosen": 0.8137116432189941, "log_odds_ratio": -0.5678200721740723, "logits/chosen": 0.814077615737915, "logits/rejected": 0.8921462893486023, "logps/chosen": -2.6314573287963867, "logps/rejected": -3.3653810024261475, "loss": 0.8756, "nll_loss": 0.8187844157218933, "rewards/accuracies": 0.75, "rewards/chosen": -0.2631457448005676, "rewards/margins": 0.0733923688530922, "rewards/rejected": -0.3365381360054016, "step": 3035 }, { "epoch": 8.31211498973306, "grad_norm": 4.400460243225098, "learning_rate": 5.842465753424657e-07, "log_odds_chosen": 1.2312837839126587, "log_odds_ratio": -0.4032633900642395, "logits/chosen": 0.869225800037384, "logits/rejected": 0.9097456336021423, "logps/chosen": -2.1961755752563477, "logps/rejected": -3.3660082817077637, "loss": 0.7412, "nll_loss": 0.7008766531944275, "rewards/accuracies": 0.875, "rewards/chosen": -0.219617560505867, "rewards/margins": 0.11698327213525772, "rewards/rejected": -0.3366008400917053, "step": 3036 }, { "epoch": 8.314852840520192, "grad_norm": 4.17297887802124, "learning_rate": 5.841095890410958e-07, "log_odds_chosen": 1.3927282094955444, "log_odds_ratio": -0.31889450550079346, "logits/chosen": 0.9066405892372131, "logits/rejected": 0.8937912583351135, "logps/chosen": -2.0477488040924072, "logps/rejected": -3.3153250217437744, "loss": 0.7553, "nll_loss": 0.7234452366828918, "rewards/accuracies": 0.875, "rewards/chosen": -0.2047748863697052, "rewards/margins": 0.12675759196281433, "rewards/rejected": -0.33153247833251953, "step": 3037 }, { "epoch": 8.317590691307323, "grad_norm": 5.539182186126709, "learning_rate": 5.83972602739726e-07, "log_odds_chosen": 1.408984899520874, "log_odds_ratio": -0.26079028844833374, "logits/chosen": 0.7861582040786743, "logits/rejected": 0.7648282051086426, "logps/chosen": -1.7942638397216797, "logps/rejected": -3.056098699569702, "loss": 0.6926, "nll_loss": 0.6665032505989075, "rewards/accuracies": 1.0, "rewards/chosen": -0.1794263869524002, "rewards/margins": 0.12618348002433777, "rewards/rejected": -0.3056098520755768, "step": 3038 }, { "epoch": 8.320328542094456, "grad_norm": 4.906067848205566, "learning_rate": 5.838356164383561e-07, "log_odds_chosen": 0.39023685455322266, "log_odds_ratio": -0.5906424522399902, "logits/chosen": 0.6815754771232605, "logits/rejected": 0.6368780136108398, "logps/chosen": -2.317498207092285, "logps/rejected": -2.6796278953552246, "loss": 0.8377, "nll_loss": 0.7786155343055725, "rewards/accuracies": 0.5, "rewards/chosen": -0.23174983263015747, "rewards/margins": 0.0362129807472229, "rewards/rejected": -0.26796281337738037, "step": 3039 }, { "epoch": 8.323066392881588, "grad_norm": 5.051379680633545, "learning_rate": 5.836986301369863e-07, "log_odds_chosen": 2.2687597274780273, "log_odds_ratio": -0.2714463174343109, "logits/chosen": 1.0868592262268066, "logits/rejected": 1.0150905847549438, "logps/chosen": -2.2072744369506836, "logps/rejected": -4.34730339050293, "loss": 0.7551, "nll_loss": 0.7279469966888428, "rewards/accuracies": 0.875, "rewards/chosen": -0.22072744369506836, "rewards/margins": 0.21400290727615356, "rewards/rejected": -0.4347303509712219, "step": 3040 }, { "epoch": 8.32580424366872, "grad_norm": 4.7118611335754395, "learning_rate": 5.835616438356164e-07, "log_odds_chosen": 1.2511181831359863, "log_odds_ratio": -0.3162423372268677, "logits/chosen": 0.8056313991546631, "logits/rejected": 0.7419646382331848, "logps/chosen": -2.2711150646209717, "logps/rejected": -3.4553143978118896, "loss": 0.7929, "nll_loss": 0.7612869143486023, "rewards/accuracies": 0.875, "rewards/chosen": -0.22711151838302612, "rewards/margins": 0.11841994524002075, "rewards/rejected": -0.3455314636230469, "step": 3041 }, { "epoch": 8.328542094455852, "grad_norm": 4.403916835784912, "learning_rate": 5.834246575342465e-07, "log_odds_chosen": 3.282992362976074, "log_odds_ratio": -0.24550211429595947, "logits/chosen": 0.710757851600647, "logits/rejected": 0.6531474590301514, "logps/chosen": -2.166795492172241, "logps/rejected": -5.342437744140625, "loss": 0.8235, "nll_loss": 0.7989954948425293, "rewards/accuracies": 1.0, "rewards/chosen": -0.21667955815792084, "rewards/margins": 0.3175641894340515, "rewards/rejected": -0.5342437028884888, "step": 3042 }, { "epoch": 8.331279945242985, "grad_norm": 4.235513210296631, "learning_rate": 5.832876712328767e-07, "log_odds_chosen": 2.6628026962280273, "log_odds_ratio": -0.21103951334953308, "logits/chosen": 0.8728872537612915, "logits/rejected": 0.8845118284225464, "logps/chosen": -2.0593314170837402, "logps/rejected": -4.543410301208496, "loss": 0.6894, "nll_loss": 0.6682526469230652, "rewards/accuracies": 1.0, "rewards/chosen": -0.20593313872814178, "rewards/margins": 0.24840790033340454, "rewards/rejected": -0.4543410539627075, "step": 3043 }, { "epoch": 8.334017796030116, "grad_norm": 4.33862829208374, "learning_rate": 5.831506849315068e-07, "log_odds_chosen": 1.9298838376998901, "log_odds_ratio": -0.33462804555892944, "logits/chosen": 0.848950207233429, "logits/rejected": 0.8479541540145874, "logps/chosen": -2.172126293182373, "logps/rejected": -4.022653579711914, "loss": 0.8421, "nll_loss": 0.808602511882782, "rewards/accuracies": 1.0, "rewards/chosen": -0.21721264719963074, "rewards/margins": 0.18505272269248962, "rewards/rejected": -0.40226536989212036, "step": 3044 }, { "epoch": 8.336755646817249, "grad_norm": 6.524210453033447, "learning_rate": 5.830136986301369e-07, "log_odds_chosen": 2.429720401763916, "log_odds_ratio": -0.45875659584999084, "logits/chosen": 1.1317013502120972, "logits/rejected": 1.2131507396697998, "logps/chosen": -3.163181781768799, "logps/rejected": -5.440277576446533, "loss": 0.8188, "nll_loss": 0.7729211449623108, "rewards/accuracies": 0.75, "rewards/chosen": -0.31631818413734436, "rewards/margins": 0.22770956158638, "rewards/rejected": -0.5440277457237244, "step": 3045 }, { "epoch": 8.33949349760438, "grad_norm": 5.240801811218262, "learning_rate": 5.828767123287671e-07, "log_odds_chosen": 4.016049385070801, "log_odds_ratio": -0.04513963311910629, "logits/chosen": 1.2256522178649902, "logits/rejected": 1.2780094146728516, "logps/chosen": -2.4664108753204346, "logps/rejected": -6.358628273010254, "loss": 0.7061, "nll_loss": 0.7015432119369507, "rewards/accuracies": 1.0, "rewards/chosen": -0.2466410994529724, "rewards/margins": 0.389221727848053, "rewards/rejected": -0.6358628273010254, "step": 3046 }, { "epoch": 8.342231348391513, "grad_norm": 4.672215461730957, "learning_rate": 5.827397260273972e-07, "log_odds_chosen": 2.6830594539642334, "log_odds_ratio": -0.15117020905017853, "logits/chosen": 0.772129476070404, "logits/rejected": 0.6740652322769165, "logps/chosen": -1.724134922027588, "logps/rejected": -4.223794460296631, "loss": 0.836, "nll_loss": 0.8208773732185364, "rewards/accuracies": 1.0, "rewards/chosen": -0.17241349816322327, "rewards/margins": 0.24996596574783325, "rewards/rejected": -0.42237943410873413, "step": 3047 }, { "epoch": 8.344969199178644, "grad_norm": 11.1813383102417, "learning_rate": 5.826027397260273e-07, "log_odds_chosen": -0.009729772806167603, "log_odds_ratio": -0.8771126866340637, "logits/chosen": 0.6517458558082581, "logits/rejected": 0.5947548747062683, "logps/chosen": -3.1022448539733887, "logps/rejected": -3.06766414642334, "loss": 0.8912, "nll_loss": 0.8034652471542358, "rewards/accuracies": 0.375, "rewards/chosen": -0.3102244734764099, "rewards/margins": -0.003458067774772644, "rewards/rejected": -0.30676645040512085, "step": 3048 }, { "epoch": 8.347707049965777, "grad_norm": 6.435463905334473, "learning_rate": 5.824657534246575e-07, "log_odds_chosen": 2.8086209297180176, "log_odds_ratio": -0.2978720963001251, "logits/chosen": 1.1204699277877808, "logits/rejected": 1.1058752536773682, "logps/chosen": -2.428220272064209, "logps/rejected": -5.180868625640869, "loss": 0.8214, "nll_loss": 0.791610598564148, "rewards/accuracies": 0.875, "rewards/chosen": -0.2428220510482788, "rewards/margins": 0.27526479959487915, "rewards/rejected": -0.5180869102478027, "step": 3049 }, { "epoch": 8.350444900752908, "grad_norm": 4.501232624053955, "learning_rate": 5.823287671232876e-07, "log_odds_chosen": 1.3036158084869385, "log_odds_ratio": -0.32621991634368896, "logits/chosen": 0.8546558022499084, "logits/rejected": 0.8818753361701965, "logps/chosen": -1.6452603340148926, "logps/rejected": -2.7967097759246826, "loss": 0.7447, "nll_loss": 0.7120678424835205, "rewards/accuracies": 0.875, "rewards/chosen": -0.1645260453224182, "rewards/margins": 0.11514493077993393, "rewards/rejected": -0.27967098355293274, "step": 3050 }, { "epoch": 8.353182751540041, "grad_norm": 4.462782859802246, "learning_rate": 5.821917808219177e-07, "log_odds_chosen": 1.5860811471939087, "log_odds_ratio": -0.34802913665771484, "logits/chosen": 0.8687341213226318, "logits/rejected": 0.8210190534591675, "logps/chosen": -1.7446556091308594, "logps/rejected": -3.222257137298584, "loss": 0.7034, "nll_loss": 0.6685964465141296, "rewards/accuracies": 1.0, "rewards/chosen": -0.17446555197238922, "rewards/margins": 0.14776015281677246, "rewards/rejected": -0.3222257196903229, "step": 3051 }, { "epoch": 8.355920602327172, "grad_norm": 4.401371002197266, "learning_rate": 5.820547945205479e-07, "log_odds_chosen": 2.17283034324646, "log_odds_ratio": -0.2764401137828827, "logits/chosen": 0.7854945659637451, "logits/rejected": 0.7616925835609436, "logps/chosen": -1.842166781425476, "logps/rejected": -3.8782148361206055, "loss": 0.8182, "nll_loss": 0.7905332446098328, "rewards/accuracies": 1.0, "rewards/chosen": -0.1842166781425476, "rewards/margins": 0.2036047875881195, "rewards/rejected": -0.3878214955329895, "step": 3052 }, { "epoch": 8.358658453114305, "grad_norm": 5.203064441680908, "learning_rate": 5.81917808219178e-07, "log_odds_chosen": 1.6748944520950317, "log_odds_ratio": -0.24394255876541138, "logits/chosen": 1.0794354677200317, "logits/rejected": 1.099646806716919, "logps/chosen": -2.180452346801758, "logps/rejected": -3.751279354095459, "loss": 0.7244, "nll_loss": 0.7000223994255066, "rewards/accuracies": 1.0, "rewards/chosen": -0.21804523468017578, "rewards/margins": 0.1570827066898346, "rewards/rejected": -0.375127911567688, "step": 3053 }, { "epoch": 8.361396303901437, "grad_norm": 4.130885601043701, "learning_rate": 5.817808219178082e-07, "log_odds_chosen": 1.4159495830535889, "log_odds_ratio": -0.3923097252845764, "logits/chosen": 0.7713402509689331, "logits/rejected": 0.7592569589614868, "logps/chosen": -2.3077855110168457, "logps/rejected": -3.6762855052948, "loss": 0.8431, "nll_loss": 0.8039188385009766, "rewards/accuracies": 0.875, "rewards/chosen": -0.2307785600423813, "rewards/margins": 0.1368499994277954, "rewards/rejected": -0.3676285445690155, "step": 3054 }, { "epoch": 8.36413415468857, "grad_norm": 4.976696491241455, "learning_rate": 5.816438356164383e-07, "log_odds_chosen": 2.353151559829712, "log_odds_ratio": -0.23801137506961823, "logits/chosen": 0.83809894323349, "logits/rejected": 0.8578913807868958, "logps/chosen": -2.150155782699585, "logps/rejected": -4.381490230560303, "loss": 0.7676, "nll_loss": 0.7438048124313354, "rewards/accuracies": 0.875, "rewards/chosen": -0.21501559019088745, "rewards/margins": 0.22313344478607178, "rewards/rejected": -0.43814903497695923, "step": 3055 }, { "epoch": 8.366872005475702, "grad_norm": 4.5346221923828125, "learning_rate": 5.815068493150684e-07, "log_odds_chosen": 1.9863277673721313, "log_odds_ratio": -0.181919664144516, "logits/chosen": 0.6225253939628601, "logits/rejected": 0.5072379112243652, "logps/chosen": -1.603340744972229, "logps/rejected": -3.3973684310913086, "loss": 0.754, "nll_loss": 0.7358123660087585, "rewards/accuracies": 1.0, "rewards/chosen": -0.16033408045768738, "rewards/margins": 0.17940276861190796, "rewards/rejected": -0.33973684906959534, "step": 3056 }, { "epoch": 8.369609856262834, "grad_norm": 3.7943482398986816, "learning_rate": 5.813698630136986e-07, "log_odds_chosen": 1.1630523204803467, "log_odds_ratio": -0.3312440514564514, "logits/chosen": 0.952649712562561, "logits/rejected": 0.9521088004112244, "logps/chosen": -1.8052990436553955, "logps/rejected": -2.826369285583496, "loss": 0.6973, "nll_loss": 0.6642182469367981, "rewards/accuracies": 1.0, "rewards/chosen": -0.1805299073457718, "rewards/margins": 0.10210705548524857, "rewards/rejected": -0.28263694047927856, "step": 3057 }, { "epoch": 8.372347707049967, "grad_norm": 5.344255447387695, "learning_rate": 5.812328767123287e-07, "log_odds_chosen": 0.6097532510757446, "log_odds_ratio": -0.5210583209991455, "logits/chosen": 0.5335133075714111, "logits/rejected": 0.5373150110244751, "logps/chosen": -2.4434964656829834, "logps/rejected": -3.0257785320281982, "loss": 0.8015, "nll_loss": 0.7494441270828247, "rewards/accuracies": 0.75, "rewards/chosen": -0.2443496733903885, "rewards/margins": 0.05822818726301193, "rewards/rejected": -0.3025778532028198, "step": 3058 }, { "epoch": 8.375085557837098, "grad_norm": 4.815364360809326, "learning_rate": 5.810958904109588e-07, "log_odds_chosen": 0.6080043315887451, "log_odds_ratio": -0.5988330841064453, "logits/chosen": 0.6887775659561157, "logits/rejected": 0.7100066542625427, "logps/chosen": -2.3215699195861816, "logps/rejected": -2.8424720764160156, "loss": 0.7683, "nll_loss": 0.7084310054779053, "rewards/accuracies": 0.875, "rewards/chosen": -0.23215700685977936, "rewards/margins": 0.052090223878622055, "rewards/rejected": -0.2842472195625305, "step": 3059 }, { "epoch": 8.37782340862423, "grad_norm": 3.498544692993164, "learning_rate": 5.80958904109589e-07, "log_odds_chosen": 1.7472155094146729, "log_odds_ratio": -0.3424074947834015, "logits/chosen": 0.9059954881668091, "logits/rejected": 0.920242190361023, "logps/chosen": -2.6443982124328613, "logps/rejected": -4.333413124084473, "loss": 0.7522, "nll_loss": 0.7179312109947205, "rewards/accuracies": 0.875, "rewards/chosen": -0.26443982124328613, "rewards/margins": 0.16890151798725128, "rewards/rejected": -0.4333413243293762, "step": 3060 }, { "epoch": 8.380561259411362, "grad_norm": 6.5099968910217285, "learning_rate": 5.808219178082191e-07, "log_odds_chosen": 1.576136827468872, "log_odds_ratio": -0.5823628902435303, "logits/chosen": 0.9881391525268555, "logits/rejected": 0.9004557132720947, "logps/chosen": -2.7294235229492188, "logps/rejected": -4.236302375793457, "loss": 0.8125, "nll_loss": 0.7542951703071594, "rewards/accuracies": 0.625, "rewards/chosen": -0.27294233441352844, "rewards/margins": 0.15068790316581726, "rewards/rejected": -0.4236302375793457, "step": 3061 }, { "epoch": 8.383299110198495, "grad_norm": 5.145661354064941, "learning_rate": 5.806849315068492e-07, "log_odds_chosen": 1.6535184383392334, "log_odds_ratio": -0.5342362523078918, "logits/chosen": 0.629920482635498, "logits/rejected": 0.6325562000274658, "logps/chosen": -2.6940932273864746, "logps/rejected": -4.296407699584961, "loss": 0.8201, "nll_loss": 0.7666743993759155, "rewards/accuracies": 0.875, "rewards/chosen": -0.26940932869911194, "rewards/margins": 0.16023141145706177, "rewards/rejected": -0.4296407401561737, "step": 3062 }, { "epoch": 8.386036960985626, "grad_norm": 4.343651294708252, "learning_rate": 5.805479452054795e-07, "log_odds_chosen": 1.9139339923858643, "log_odds_ratio": -0.22185635566711426, "logits/chosen": 0.8165141940116882, "logits/rejected": 0.7498209476470947, "logps/chosen": -1.9991188049316406, "logps/rejected": -3.758908748626709, "loss": 0.7688, "nll_loss": 0.7466457486152649, "rewards/accuracies": 1.0, "rewards/chosen": -0.19991189241409302, "rewards/margins": 0.17597900331020355, "rewards/rejected": -0.37589091062545776, "step": 3063 }, { "epoch": 8.388774811772759, "grad_norm": 3.6433401107788086, "learning_rate": 5.804109589041095e-07, "log_odds_chosen": 2.5074706077575684, "log_odds_ratio": -0.27405107021331787, "logits/chosen": 1.035170078277588, "logits/rejected": 1.0146639347076416, "logps/chosen": -2.081414222717285, "logps/rejected": -4.495039463043213, "loss": 0.8399, "nll_loss": 0.8124527931213379, "rewards/accuracies": 1.0, "rewards/chosen": -0.20814141631126404, "rewards/margins": 0.2413625419139862, "rewards/rejected": -0.44950395822525024, "step": 3064 }, { "epoch": 8.39151266255989, "grad_norm": 5.656749248504639, "learning_rate": 5.802739726027396e-07, "log_odds_chosen": 1.021162986755371, "log_odds_ratio": -0.5717406272888184, "logits/chosen": 0.7221797704696655, "logits/rejected": 0.6837244033813477, "logps/chosen": -2.1564440727233887, "logps/rejected": -3.0794448852539062, "loss": 0.7779, "nll_loss": 0.7207458019256592, "rewards/accuracies": 0.875, "rewards/chosen": -0.21564441919326782, "rewards/margins": 0.09230008721351624, "rewards/rejected": -0.30794450640678406, "step": 3065 }, { "epoch": 8.394250513347023, "grad_norm": 6.272488117218018, "learning_rate": 5.801369863013699e-07, "log_odds_chosen": 2.9418141841888428, "log_odds_ratio": -0.3135223090648651, "logits/chosen": 0.9511144161224365, "logits/rejected": 1.021549105644226, "logps/chosen": -3.040109395980835, "logps/rejected": -5.899925708770752, "loss": 0.8576, "nll_loss": 0.8262068033218384, "rewards/accuracies": 0.875, "rewards/chosen": -0.30401095747947693, "rewards/margins": 0.2859816253185272, "rewards/rejected": -0.5899925231933594, "step": 3066 }, { "epoch": 8.396988364134154, "grad_norm": 4.214248180389404, "learning_rate": 5.8e-07, "log_odds_chosen": 1.5373979806900024, "log_odds_ratio": -0.29426535964012146, "logits/chosen": 0.9334751963615417, "logits/rejected": 0.9517378211021423, "logps/chosen": -2.4819090366363525, "logps/rejected": -3.9326391220092773, "loss": 0.8762, "nll_loss": 0.846759021282196, "rewards/accuracies": 0.875, "rewards/chosen": -0.24819090962409973, "rewards/margins": 0.14507299661636353, "rewards/rejected": -0.39326390624046326, "step": 3067 }, { "epoch": 8.399726214921287, "grad_norm": 4.820222854614258, "learning_rate": 5.798630136986302e-07, "log_odds_chosen": 1.5450537204742432, "log_odds_ratio": -0.3457314372062683, "logits/chosen": 0.7733405232429504, "logits/rejected": 0.7106986045837402, "logps/chosen": -1.9505404233932495, "logps/rejected": -3.376166820526123, "loss": 0.7663, "nll_loss": 0.7316834926605225, "rewards/accuracies": 1.0, "rewards/chosen": -0.1950540393590927, "rewards/margins": 0.1425626575946808, "rewards/rejected": -0.3376166820526123, "step": 3068 }, { "epoch": 8.402464065708418, "grad_norm": 4.21793794631958, "learning_rate": 5.797260273972603e-07, "log_odds_chosen": 2.590562582015991, "log_odds_ratio": -0.2427290976047516, "logits/chosen": 0.7420772314071655, "logits/rejected": 0.8320126533508301, "logps/chosen": -1.786938190460205, "logps/rejected": -4.189783573150635, "loss": 0.8001, "nll_loss": 0.7758538722991943, "rewards/accuracies": 1.0, "rewards/chosen": -0.17869383096694946, "rewards/margins": 0.24028456211090088, "rewards/rejected": -0.41897836327552795, "step": 3069 }, { "epoch": 8.405201916495551, "grad_norm": 4.454807758331299, "learning_rate": 5.795890410958904e-07, "log_odds_chosen": 1.126061201095581, "log_odds_ratio": -0.37607425451278687, "logits/chosen": 0.7809118032455444, "logits/rejected": 0.6928842067718506, "logps/chosen": -2.7419071197509766, "logps/rejected": -3.7912352085113525, "loss": 0.8153, "nll_loss": 0.7776972055435181, "rewards/accuracies": 0.875, "rewards/chosen": -0.2741907238960266, "rewards/margins": 0.10493279248476028, "rewards/rejected": -0.3791235089302063, "step": 3070 }, { "epoch": 8.407939767282683, "grad_norm": 4.676081657409668, "learning_rate": 5.794520547945206e-07, "log_odds_chosen": 1.0268938541412354, "log_odds_ratio": -0.3792518973350525, "logits/chosen": 0.7897027730941772, "logits/rejected": 0.7269766330718994, "logps/chosen": -1.9329043626785278, "logps/rejected": -2.878119945526123, "loss": 0.8557, "nll_loss": 0.8177509307861328, "rewards/accuracies": 0.875, "rewards/chosen": -0.19329044222831726, "rewards/margins": 0.09452155232429504, "rewards/rejected": -0.2878119945526123, "step": 3071 }, { "epoch": 8.410677618069816, "grad_norm": 5.935638427734375, "learning_rate": 5.793150684931507e-07, "log_odds_chosen": 1.148911714553833, "log_odds_ratio": -0.4027644693851471, "logits/chosen": 0.8235002160072327, "logits/rejected": 0.7600325345993042, "logps/chosen": -2.0492615699768066, "logps/rejected": -3.0672755241394043, "loss": 0.7591, "nll_loss": 0.7187846302986145, "rewards/accuracies": 0.875, "rewards/chosen": -0.20492614805698395, "rewards/margins": 0.10180141776800156, "rewards/rejected": -0.3067275881767273, "step": 3072 }, { "epoch": 8.413415468856947, "grad_norm": 4.6206278800964355, "learning_rate": 5.791780821917808e-07, "log_odds_chosen": 1.7281770706176758, "log_odds_ratio": -0.35404160618782043, "logits/chosen": 0.8853235244750977, "logits/rejected": 0.8640450239181519, "logps/chosen": -1.7801378965377808, "logps/rejected": -3.367267370223999, "loss": 0.7003, "nll_loss": 0.6649254560470581, "rewards/accuracies": 0.875, "rewards/chosen": -0.17801380157470703, "rewards/margins": 0.15871292352676392, "rewards/rejected": -0.33672672510147095, "step": 3073 }, { "epoch": 8.41615331964408, "grad_norm": 4.71402645111084, "learning_rate": 5.79041095890411e-07, "log_odds_chosen": 2.6356992721557617, "log_odds_ratio": -0.25502753257751465, "logits/chosen": 0.8335990905761719, "logits/rejected": 0.7863656878471375, "logps/chosen": -2.2500782012939453, "logps/rejected": -4.772127628326416, "loss": 0.8368, "nll_loss": 0.8112771511077881, "rewards/accuracies": 1.0, "rewards/chosen": -0.2250078022480011, "rewards/margins": 0.252204954624176, "rewards/rejected": -0.4772128164768219, "step": 3074 }, { "epoch": 8.41889117043121, "grad_norm": 4.890954494476318, "learning_rate": 5.789041095890412e-07, "log_odds_chosen": 1.1439759731292725, "log_odds_ratio": -0.5837419629096985, "logits/chosen": 0.8634021282196045, "logits/rejected": 0.7733899354934692, "logps/chosen": -2.7861344814300537, "logps/rejected": -3.8581478595733643, "loss": 0.8125, "nll_loss": 0.7541127800941467, "rewards/accuracies": 0.625, "rewards/chosen": -0.27861347794532776, "rewards/margins": 0.10720133036375046, "rewards/rejected": -0.3858148157596588, "step": 3075 }, { "epoch": 8.421629021218344, "grad_norm": 3.8835809230804443, "learning_rate": 5.787671232876712e-07, "log_odds_chosen": 2.563626527786255, "log_odds_ratio": -0.2113182693719864, "logits/chosen": 0.886663556098938, "logits/rejected": 0.810573160648346, "logps/chosen": -1.7178703546524048, "logps/rejected": -4.075273036956787, "loss": 0.7463, "nll_loss": 0.7251518368721008, "rewards/accuracies": 0.875, "rewards/chosen": -0.17178703844547272, "rewards/margins": 0.2357402741909027, "rewards/rejected": -0.4075273275375366, "step": 3076 }, { "epoch": 8.424366872005475, "grad_norm": 5.151292324066162, "learning_rate": 5.786301369863014e-07, "log_odds_chosen": 0.9821009039878845, "log_odds_ratio": -0.49139076471328735, "logits/chosen": 0.6903240084648132, "logits/rejected": 0.6597631573677063, "logps/chosen": -2.645211696624756, "logps/rejected": -3.577950954437256, "loss": 0.8004, "nll_loss": 0.7512660026550293, "rewards/accuracies": 0.875, "rewards/chosen": -0.26452118158340454, "rewards/margins": 0.09327390789985657, "rewards/rejected": -0.3577950596809387, "step": 3077 }, { "epoch": 8.427104722792608, "grad_norm": 5.103641033172607, "learning_rate": 5.784931506849315e-07, "log_odds_chosen": 2.2016727924346924, "log_odds_ratio": -0.27304622530937195, "logits/chosen": 0.7252516150474548, "logits/rejected": 0.7149558067321777, "logps/chosen": -2.23837947845459, "logps/rejected": -4.324677467346191, "loss": 0.804, "nll_loss": 0.7766476273536682, "rewards/accuracies": 0.875, "rewards/chosen": -0.2238379567861557, "rewards/margins": 0.2086298167705536, "rewards/rejected": -0.4324677586555481, "step": 3078 }, { "epoch": 8.429842573579739, "grad_norm": 5.081141471862793, "learning_rate": 5.783561643835616e-07, "log_odds_chosen": 2.6435770988464355, "log_odds_ratio": -0.18469861149787903, "logits/chosen": 1.040243148803711, "logits/rejected": 1.076362133026123, "logps/chosen": -2.1356964111328125, "logps/rejected": -4.644984722137451, "loss": 0.6902, "nll_loss": 0.671727180480957, "rewards/accuracies": 1.0, "rewards/chosen": -0.21356964111328125, "rewards/margins": 0.2509288191795349, "rewards/rejected": -0.46449846029281616, "step": 3079 }, { "epoch": 8.432580424366872, "grad_norm": 4.796020984649658, "learning_rate": 5.782191780821918e-07, "log_odds_chosen": 0.06313599646091461, "log_odds_ratio": -0.7297534346580505, "logits/chosen": 0.531795859336853, "logits/rejected": 0.7037611603736877, "logps/chosen": -2.2463529109954834, "logps/rejected": -2.329897880554199, "loss": 0.8465, "nll_loss": 0.7735617756843567, "rewards/accuracies": 0.625, "rewards/chosen": -0.2246353030204773, "rewards/margins": 0.008354507386684418, "rewards/rejected": -0.23298980295658112, "step": 3080 }, { "epoch": 8.435318275154003, "grad_norm": 5.404982566833496, "learning_rate": 5.780821917808219e-07, "log_odds_chosen": 1.1951359510421753, "log_odds_ratio": -0.33601194620132446, "logits/chosen": 0.8842781782150269, "logits/rejected": 0.8358290195465088, "logps/chosen": -1.7141761779785156, "logps/rejected": -2.7955849170684814, "loss": 0.7384, "nll_loss": 0.7047747373580933, "rewards/accuracies": 1.0, "rewards/chosen": -0.17141760885715485, "rewards/margins": 0.10814087092876434, "rewards/rejected": -0.2795584797859192, "step": 3081 }, { "epoch": 8.438056125941136, "grad_norm": 4.468260765075684, "learning_rate": 5.779452054794521e-07, "log_odds_chosen": 1.6971911191940308, "log_odds_ratio": -0.30596813559532166, "logits/chosen": 0.716758668422699, "logits/rejected": 0.6819875240325928, "logps/chosen": -2.6901674270629883, "logps/rejected": -4.335270881652832, "loss": 0.8623, "nll_loss": 0.8316737413406372, "rewards/accuracies": 0.875, "rewards/chosen": -0.26901674270629883, "rewards/margins": 0.1645103543996811, "rewards/rejected": -0.43352705240249634, "step": 3082 }, { "epoch": 8.44079397672827, "grad_norm": 4.5902299880981445, "learning_rate": 5.778082191780822e-07, "log_odds_chosen": 1.9484074115753174, "log_odds_ratio": -0.2863907217979431, "logits/chosen": 1.0742099285125732, "logits/rejected": 1.1129719018936157, "logps/chosen": -1.9671381711959839, "logps/rejected": -3.777219295501709, "loss": 0.7323, "nll_loss": 0.7036890983581543, "rewards/accuracies": 0.875, "rewards/chosen": -0.19671382009983063, "rewards/margins": 0.18100810050964355, "rewards/rejected": -0.3777219355106354, "step": 3083 }, { "epoch": 8.4435318275154, "grad_norm": 4.460525989532471, "learning_rate": 5.776712328767123e-07, "log_odds_chosen": 1.8291066884994507, "log_odds_ratio": -0.3322950005531311, "logits/chosen": 0.8161698579788208, "logits/rejected": 0.8167117238044739, "logps/chosen": -2.284874439239502, "logps/rejected": -4.02825403213501, "loss": 0.6849, "nll_loss": 0.6516847014427185, "rewards/accuracies": 0.875, "rewards/chosen": -0.22848746180534363, "rewards/margins": 0.1743379533290863, "rewards/rejected": -0.40282541513442993, "step": 3084 }, { "epoch": 8.446269678302533, "grad_norm": 4.725026607513428, "learning_rate": 5.775342465753425e-07, "log_odds_chosen": 2.052570104598999, "log_odds_ratio": -0.33268094062805176, "logits/chosen": 0.7226243615150452, "logits/rejected": 0.6577856540679932, "logps/chosen": -2.343801498413086, "logps/rejected": -4.3115153312683105, "loss": 0.7629, "nll_loss": 0.7296246886253357, "rewards/accuracies": 0.875, "rewards/chosen": -0.23438015580177307, "rewards/margins": 0.19677139818668365, "rewards/rejected": -0.43115153908729553, "step": 3085 }, { "epoch": 8.449007529089664, "grad_norm": 5.431869983673096, "learning_rate": 5.773972602739726e-07, "log_odds_chosen": 0.7582245469093323, "log_odds_ratio": -0.5182977318763733, "logits/chosen": 0.7703607082366943, "logits/rejected": 0.7835031151771545, "logps/chosen": -2.7327754497528076, "logps/rejected": -3.442479133605957, "loss": 0.8336, "nll_loss": 0.7817671895027161, "rewards/accuracies": 0.625, "rewards/chosen": -0.2732775807380676, "rewards/margins": 0.07097035646438599, "rewards/rejected": -0.34424787759780884, "step": 3086 }, { "epoch": 8.451745379876797, "grad_norm": 6.014240264892578, "learning_rate": 5.772602739726027e-07, "log_odds_chosen": 1.5814921855926514, "log_odds_ratio": -0.4178650975227356, "logits/chosen": 1.1476616859436035, "logits/rejected": 1.269822120666504, "logps/chosen": -3.0672965049743652, "logps/rejected": -4.523674964904785, "loss": 0.7079, "nll_loss": 0.6661437749862671, "rewards/accuracies": 0.875, "rewards/chosen": -0.30672964453697205, "rewards/margins": 0.1456378549337387, "rewards/rejected": -0.45236748456954956, "step": 3087 }, { "epoch": 8.454483230663929, "grad_norm": 4.927172660827637, "learning_rate": 5.771232876712329e-07, "log_odds_chosen": 2.372342109680176, "log_odds_ratio": -0.3230613172054291, "logits/chosen": 0.8358359336853027, "logits/rejected": 0.7940648794174194, "logps/chosen": -1.7347091436386108, "logps/rejected": -3.9305009841918945, "loss": 0.7817, "nll_loss": 0.7493813633918762, "rewards/accuracies": 0.875, "rewards/chosen": -0.17347091436386108, "rewards/margins": 0.21957919001579285, "rewards/rejected": -0.39305010437965393, "step": 3088 }, { "epoch": 8.457221081451062, "grad_norm": 4.792344093322754, "learning_rate": 5.769863013698631e-07, "log_odds_chosen": 1.836010217666626, "log_odds_ratio": -0.20970219373703003, "logits/chosen": 0.8732766509056091, "logits/rejected": 0.8602777123451233, "logps/chosen": -1.8902065753936768, "logps/rejected": -3.565558910369873, "loss": 0.7446, "nll_loss": 0.7236198782920837, "rewards/accuracies": 1.0, "rewards/chosen": -0.18902066349983215, "rewards/margins": 0.1675352305173874, "rewards/rejected": -0.35655587911605835, "step": 3089 }, { "epoch": 8.459958932238193, "grad_norm": 4.537597179412842, "learning_rate": 5.768493150684931e-07, "log_odds_chosen": 1.019029140472412, "log_odds_ratio": -0.489944189786911, "logits/chosen": 0.8195845484733582, "logits/rejected": 0.8844711780548096, "logps/chosen": -2.1490511894226074, "logps/rejected": -3.053408622741699, "loss": 0.8001, "nll_loss": 0.7511422038078308, "rewards/accuracies": 0.75, "rewards/chosen": -0.21490512788295746, "rewards/margins": 0.09043576568365097, "rewards/rejected": -0.30534088611602783, "step": 3090 }, { "epoch": 8.462696783025326, "grad_norm": 4.851672172546387, "learning_rate": 5.767123287671233e-07, "log_odds_chosen": 2.410224676132202, "log_odds_ratio": -0.16923990845680237, "logits/chosen": 0.9069843292236328, "logits/rejected": 0.9684172868728638, "logps/chosen": -3.0826053619384766, "logps/rejected": -5.448520660400391, "loss": 0.8758, "nll_loss": 0.8588685989379883, "rewards/accuracies": 1.0, "rewards/chosen": -0.3082605302333832, "rewards/margins": 0.23659147322177887, "rewards/rejected": -0.5448520183563232, "step": 3091 }, { "epoch": 8.465434633812457, "grad_norm": 4.4211812019348145, "learning_rate": 5.765753424657534e-07, "log_odds_chosen": 2.0978550910949707, "log_odds_ratio": -0.1997707188129425, "logits/chosen": 0.9048699140548706, "logits/rejected": 0.9317073822021484, "logps/chosen": -2.172297954559326, "logps/rejected": -4.157472610473633, "loss": 0.8065, "nll_loss": 0.7865556478500366, "rewards/accuracies": 1.0, "rewards/chosen": -0.21722978353500366, "rewards/margins": 0.19851748645305634, "rewards/rejected": -0.4157472848892212, "step": 3092 }, { "epoch": 8.46817248459959, "grad_norm": 4.096501350402832, "learning_rate": 5.764383561643835e-07, "log_odds_chosen": 2.285201072692871, "log_odds_ratio": -0.17426156997680664, "logits/chosen": 0.8787335753440857, "logits/rejected": 0.8816288113594055, "logps/chosen": -2.313839912414551, "logps/rejected": -4.470498085021973, "loss": 0.7529, "nll_loss": 0.7354289293289185, "rewards/accuracies": 1.0, "rewards/chosen": -0.23138399422168732, "rewards/margins": 0.21566584706306458, "rewards/rejected": -0.4470498263835907, "step": 3093 }, { "epoch": 8.470910335386721, "grad_norm": 4.75125789642334, "learning_rate": 5.763013698630137e-07, "log_odds_chosen": 1.2308666706085205, "log_odds_ratio": -0.2936252951622009, "logits/chosen": 0.8622744083404541, "logits/rejected": 0.849825382232666, "logps/chosen": -1.6028704643249512, "logps/rejected": -2.6864888668060303, "loss": 0.711, "nll_loss": 0.6816222667694092, "rewards/accuracies": 1.0, "rewards/chosen": -0.1602870523929596, "rewards/margins": 0.10836184769868851, "rewards/rejected": -0.2686488926410675, "step": 3094 }, { "epoch": 8.473648186173854, "grad_norm": 4.631993770599365, "learning_rate": 5.761643835616438e-07, "log_odds_chosen": 2.3635060787200928, "log_odds_ratio": -0.259059876203537, "logits/chosen": 0.9769124984741211, "logits/rejected": 1.0275511741638184, "logps/chosen": -2.904120445251465, "logps/rejected": -5.190817832946777, "loss": 0.7246, "nll_loss": 0.6986563205718994, "rewards/accuracies": 0.875, "rewards/chosen": -0.290412038564682, "rewards/margins": 0.22866979241371155, "rewards/rejected": -0.5190818309783936, "step": 3095 }, { "epoch": 8.476386036960985, "grad_norm": 5.254396438598633, "learning_rate": 5.76027397260274e-07, "log_odds_chosen": 0.9048460721969604, "log_odds_ratio": -0.37923726439476013, "logits/chosen": 0.7778302431106567, "logits/rejected": 0.6615908145904541, "logps/chosen": -1.6334854364395142, "logps/rejected": -2.4067773818969727, "loss": 0.8341, "nll_loss": 0.7961478233337402, "rewards/accuracies": 1.0, "rewards/chosen": -0.16334855556488037, "rewards/margins": 0.07732921093702316, "rewards/rejected": -0.24067777395248413, "step": 3096 }, { "epoch": 8.479123887748118, "grad_norm": 5.429492473602295, "learning_rate": 5.758904109589041e-07, "log_odds_chosen": 1.560714840888977, "log_odds_ratio": -0.2877344787120819, "logits/chosen": 0.9507705569267273, "logits/rejected": 0.8725715279579163, "logps/chosen": -2.672738552093506, "logps/rejected": -4.132936477661133, "loss": 1.0041, "nll_loss": 0.9753455519676208, "rewards/accuracies": 1.0, "rewards/chosen": -0.26727384328842163, "rewards/margins": 0.1460198163986206, "rewards/rejected": -0.41329365968704224, "step": 3097 }, { "epoch": 8.48186173853525, "grad_norm": 5.60365629196167, "learning_rate": 5.757534246575342e-07, "log_odds_chosen": 2.101576328277588, "log_odds_ratio": -0.4022616446018219, "logits/chosen": 0.8995786309242249, "logits/rejected": 0.8872269988059998, "logps/chosen": -2.673950672149658, "logps/rejected": -4.685214996337891, "loss": 0.7773, "nll_loss": 0.7371166944503784, "rewards/accuracies": 0.75, "rewards/chosen": -0.2673950791358948, "rewards/margins": 0.20112642645835876, "rewards/rejected": -0.46852150559425354, "step": 3098 }, { "epoch": 8.484599589322382, "grad_norm": 4.672165393829346, "learning_rate": 5.756164383561644e-07, "log_odds_chosen": 3.4051477909088135, "log_odds_ratio": -0.29388731718063354, "logits/chosen": 0.7622082233428955, "logits/rejected": 0.7968224287033081, "logps/chosen": -2.0751311779022217, "logps/rejected": -5.38594388961792, "loss": 0.7652, "nll_loss": 0.7358541488647461, "rewards/accuracies": 0.75, "rewards/chosen": -0.20751312375068665, "rewards/margins": 0.3310813009738922, "rewards/rejected": -0.5385944247245789, "step": 3099 }, { "epoch": 8.487337440109513, "grad_norm": 5.088054656982422, "learning_rate": 5.754794520547945e-07, "log_odds_chosen": 1.0695610046386719, "log_odds_ratio": -0.545378565788269, "logits/chosen": 0.7329111099243164, "logits/rejected": 0.7076773643493652, "logps/chosen": -2.0350825786590576, "logps/rejected": -2.998422622680664, "loss": 0.7824, "nll_loss": 0.7278921008110046, "rewards/accuracies": 0.75, "rewards/chosen": -0.20350827276706696, "rewards/margins": 0.09633397310972214, "rewards/rejected": -0.2998422682285309, "step": 3100 }, { "epoch": 8.490075290896646, "grad_norm": 4.458194732666016, "learning_rate": 5.753424657534246e-07, "log_odds_chosen": 1.4955365657806396, "log_odds_ratio": -0.3627264201641083, "logits/chosen": 0.848759651184082, "logits/rejected": 0.7941571474075317, "logps/chosen": -2.1235499382019043, "logps/rejected": -3.5525803565979004, "loss": 0.7566, "nll_loss": 0.7203616499900818, "rewards/accuracies": 0.875, "rewards/chosen": -0.21235500276088715, "rewards/margins": 0.14290305972099304, "rewards/rejected": -0.3552580773830414, "step": 3101 }, { "epoch": 8.492813141683778, "grad_norm": 4.454322338104248, "learning_rate": 5.752054794520548e-07, "log_odds_chosen": 3.1487653255462646, "log_odds_ratio": -0.08817112445831299, "logits/chosen": 0.7612372040748596, "logits/rejected": 0.7813382148742676, "logps/chosen": -2.3343443870544434, "logps/rejected": -5.260974407196045, "loss": 0.7185, "nll_loss": 0.7096452713012695, "rewards/accuracies": 1.0, "rewards/chosen": -0.23343443870544434, "rewards/margins": 0.292663037776947, "rewards/rejected": -0.5260974764823914, "step": 3102 }, { "epoch": 8.49555099247091, "grad_norm": 4.938995361328125, "learning_rate": 5.75068493150685e-07, "log_odds_chosen": 2.7960777282714844, "log_odds_ratio": -0.08409950137138367, "logits/chosen": 1.1084297895431519, "logits/rejected": 1.1292362213134766, "logps/chosen": -2.0581727027893066, "logps/rejected": -4.693117618560791, "loss": 0.7105, "nll_loss": 0.7021206021308899, "rewards/accuracies": 1.0, "rewards/chosen": -0.20581728219985962, "rewards/margins": 0.26349449157714844, "rewards/rejected": -0.46931177377700806, "step": 3103 }, { "epoch": 8.498288843258042, "grad_norm": 4.067386150360107, "learning_rate": 5.74931506849315e-07, "log_odds_chosen": 2.0386404991149902, "log_odds_ratio": -0.19785159826278687, "logits/chosen": 0.8532317280769348, "logits/rejected": 0.8433153629302979, "logps/chosen": -2.5840702056884766, "logps/rejected": -4.506465911865234, "loss": 0.758, "nll_loss": 0.7381747364997864, "rewards/accuracies": 1.0, "rewards/chosen": -0.25840702652931213, "rewards/margins": 0.19223959743976593, "rewards/rejected": -0.45064660906791687, "step": 3104 }, { "epoch": 8.501026694045175, "grad_norm": 5.469929218292236, "learning_rate": 5.747945205479452e-07, "log_odds_chosen": 1.7250361442565918, "log_odds_ratio": -0.21559454500675201, "logits/chosen": 0.5937906503677368, "logits/rejected": 0.5238080620765686, "logps/chosen": -1.5760343074798584, "logps/rejected": -3.1095211505889893, "loss": 0.7768, "nll_loss": 0.7551950216293335, "rewards/accuracies": 1.0, "rewards/chosen": -0.1576034426689148, "rewards/margins": 0.15334868431091309, "rewards/rejected": -0.3109521269798279, "step": 3105 }, { "epoch": 8.503764544832306, "grad_norm": 4.071467876434326, "learning_rate": 5.746575342465754e-07, "log_odds_chosen": 1.6453663110733032, "log_odds_ratio": -0.3131943643093109, "logits/chosen": 0.6954328417778015, "logits/rejected": 0.6518878936767578, "logps/chosen": -1.9062941074371338, "logps/rejected": -3.432730197906494, "loss": 0.7902, "nll_loss": 0.758849561214447, "rewards/accuracies": 0.875, "rewards/chosen": -0.19062942266464233, "rewards/margins": 0.1526435762643814, "rewards/rejected": -0.34327298402786255, "step": 3106 }, { "epoch": 8.506502395619439, "grad_norm": 5.745391368865967, "learning_rate": 5.745205479452054e-07, "log_odds_chosen": 2.711883068084717, "log_odds_ratio": -0.1951906681060791, "logits/chosen": 1.0836946964263916, "logits/rejected": 1.1404914855957031, "logps/chosen": -2.728994369506836, "logps/rejected": -5.295392990112305, "loss": 0.7657, "nll_loss": 0.7462277412414551, "rewards/accuracies": 0.875, "rewards/chosen": -0.27289941906929016, "rewards/margins": 0.25663986802101135, "rewards/rejected": -0.5295392870903015, "step": 3107 }, { "epoch": 8.50924024640657, "grad_norm": 4.700713634490967, "learning_rate": 5.743835616438356e-07, "log_odds_chosen": 2.07188081741333, "log_odds_ratio": -0.31040671467781067, "logits/chosen": 0.7681803107261658, "logits/rejected": 0.81475830078125, "logps/chosen": -2.626758337020874, "logps/rejected": -4.642638206481934, "loss": 0.8729, "nll_loss": 0.8418841361999512, "rewards/accuracies": 0.875, "rewards/chosen": -0.26267582178115845, "rewards/margins": 0.20158803462982178, "rewards/rejected": -0.4642638564109802, "step": 3108 }, { "epoch": 8.511978097193703, "grad_norm": 5.349642276763916, "learning_rate": 5.742465753424657e-07, "log_odds_chosen": 1.5861459970474243, "log_odds_ratio": -0.4691304862499237, "logits/chosen": 0.792606770992279, "logits/rejected": 0.9118247628211975, "logps/chosen": -2.741973638534546, "logps/rejected": -4.282981872558594, "loss": 0.6981, "nll_loss": 0.6511573791503906, "rewards/accuracies": 0.75, "rewards/chosen": -0.27419736981391907, "rewards/margins": 0.15410080552101135, "rewards/rejected": -0.4282982051372528, "step": 3109 }, { "epoch": 8.514715947980836, "grad_norm": 3.6260414123535156, "learning_rate": 5.741095890410958e-07, "log_odds_chosen": 4.290562629699707, "log_odds_ratio": -0.0917988270521164, "logits/chosen": 0.91616290807724, "logits/rejected": 0.9880545735359192, "logps/chosen": -2.041886329650879, "logps/rejected": -6.154050827026367, "loss": 0.652, "nll_loss": 0.6427916288375854, "rewards/accuracies": 1.0, "rewards/chosen": -0.20418862998485565, "rewards/margins": 0.4112164080142975, "rewards/rejected": -0.6154050230979919, "step": 3110 }, { "epoch": 8.517453798767967, "grad_norm": 4.339973449707031, "learning_rate": 5.73972602739726e-07, "log_odds_chosen": 3.6070680618286133, "log_odds_ratio": -0.05949769914150238, "logits/chosen": 1.0738977193832397, "logits/rejected": 1.090232014656067, "logps/chosen": -2.640470504760742, "logps/rejected": -6.134572505950928, "loss": 0.7912, "nll_loss": 0.7852006554603577, "rewards/accuracies": 1.0, "rewards/chosen": -0.2640470862388611, "rewards/margins": 0.34941017627716064, "rewards/rejected": -0.6134572625160217, "step": 3111 }, { "epoch": 8.5201916495551, "grad_norm": 3.895003080368042, "learning_rate": 5.738356164383561e-07, "log_odds_chosen": 2.587033987045288, "log_odds_ratio": -0.23283955454826355, "logits/chosen": 0.6523881554603577, "logits/rejected": 0.6233570575714111, "logps/chosen": -2.01749849319458, "logps/rejected": -4.437199592590332, "loss": 0.8218, "nll_loss": 0.7985548973083496, "rewards/accuracies": 0.875, "rewards/chosen": -0.20174984633922577, "rewards/margins": 0.24197015166282654, "rewards/rejected": -0.4437200427055359, "step": 3112 }, { "epoch": 8.522929500342231, "grad_norm": 4.65414571762085, "learning_rate": 5.736986301369863e-07, "log_odds_chosen": 1.4902323484420776, "log_odds_ratio": -0.39093372225761414, "logits/chosen": 0.9355289936065674, "logits/rejected": 0.9563338160514832, "logps/chosen": -2.3653459548950195, "logps/rejected": -3.7713699340820312, "loss": 0.7751, "nll_loss": 0.7360512018203735, "rewards/accuracies": 0.875, "rewards/chosen": -0.23653459548950195, "rewards/margins": 0.14060240983963013, "rewards/rejected": -0.3771370053291321, "step": 3113 }, { "epoch": 8.525667351129364, "grad_norm": 4.042994022369385, "learning_rate": 5.735616438356164e-07, "log_odds_chosen": 2.1894869804382324, "log_odds_ratio": -0.2630392014980316, "logits/chosen": 0.7607858777046204, "logits/rejected": 0.7752927541732788, "logps/chosen": -2.318730115890503, "logps/rejected": -4.3933186531066895, "loss": 0.802, "nll_loss": 0.7757030725479126, "rewards/accuracies": 1.0, "rewards/chosen": -0.23187300562858582, "rewards/margins": 0.20745883882045746, "rewards/rejected": -0.4393318295478821, "step": 3114 }, { "epoch": 8.528405201916495, "grad_norm": 4.513430118560791, "learning_rate": 5.734246575342465e-07, "log_odds_chosen": 3.7906365394592285, "log_odds_ratio": -0.1479833722114563, "logits/chosen": 0.8758970499038696, "logits/rejected": 0.9072407484054565, "logps/chosen": -2.380479574203491, "logps/rejected": -6.049922943115234, "loss": 0.8195, "nll_loss": 0.8047119975090027, "rewards/accuracies": 1.0, "rewards/chosen": -0.23804795742034912, "rewards/margins": 0.3669443428516388, "rewards/rejected": -0.6049923300743103, "step": 3115 }, { "epoch": 8.531143052703628, "grad_norm": 4.460229396820068, "learning_rate": 5.732876712328767e-07, "log_odds_chosen": 3.366126775741577, "log_odds_ratio": -0.11772631108760834, "logits/chosen": 0.9008594155311584, "logits/rejected": 0.9040791988372803, "logps/chosen": -2.1429779529571533, "logps/rejected": -5.3906683921813965, "loss": 0.7212, "nll_loss": 0.7094691395759583, "rewards/accuracies": 1.0, "rewards/chosen": -0.2142978012561798, "rewards/margins": 0.3247690796852112, "rewards/rejected": -0.5390668511390686, "step": 3116 }, { "epoch": 8.53388090349076, "grad_norm": 6.467041969299316, "learning_rate": 5.731506849315068e-07, "log_odds_chosen": 1.2328100204467773, "log_odds_ratio": -0.41578301787376404, "logits/chosen": 1.0287671089172363, "logits/rejected": 1.063359260559082, "logps/chosen": -2.5378661155700684, "logps/rejected": -3.734415054321289, "loss": 0.8294, "nll_loss": 0.7878640294075012, "rewards/accuracies": 0.875, "rewards/chosen": -0.2537866234779358, "rewards/margins": 0.11965487152338028, "rewards/rejected": -0.37344151735305786, "step": 3117 }, { "epoch": 8.536618754277892, "grad_norm": 5.363152027130127, "learning_rate": 5.730136986301369e-07, "log_odds_chosen": 1.0785908699035645, "log_odds_ratio": -0.42392033338546753, "logits/chosen": 0.8737903833389282, "logits/rejected": 0.7866525650024414, "logps/chosen": -1.7821553945541382, "logps/rejected": -2.7684006690979004, "loss": 0.7676, "nll_loss": 0.7252452373504639, "rewards/accuracies": 0.75, "rewards/chosen": -0.17821554839611053, "rewards/margins": 0.09862455725669861, "rewards/rejected": -0.27684009075164795, "step": 3118 }, { "epoch": 8.539356605065024, "grad_norm": 4.463628768920898, "learning_rate": 5.728767123287671e-07, "log_odds_chosen": 1.4709136486053467, "log_odds_ratio": -0.4577338695526123, "logits/chosen": 0.8035610914230347, "logits/rejected": 0.7886677384376526, "logps/chosen": -2.3970141410827637, "logps/rejected": -3.8138017654418945, "loss": 0.789, "nll_loss": 0.7431808710098267, "rewards/accuracies": 0.75, "rewards/chosen": -0.23970142006874084, "rewards/margins": 0.14167878031730652, "rewards/rejected": -0.38138020038604736, "step": 3119 }, { "epoch": 8.542094455852157, "grad_norm": 4.508766174316406, "learning_rate": 5.727397260273973e-07, "log_odds_chosen": 2.3513660430908203, "log_odds_ratio": -0.30401962995529175, "logits/chosen": 0.852414608001709, "logits/rejected": 0.8860446810722351, "logps/chosen": -2.362731456756592, "logps/rejected": -4.638947010040283, "loss": 0.851, "nll_loss": 0.8205565214157104, "rewards/accuracies": 0.875, "rewards/chosen": -0.2362731397151947, "rewards/margins": 0.22762154042720795, "rewards/rejected": -0.46389466524124146, "step": 3120 }, { "epoch": 8.544832306639288, "grad_norm": 4.418780326843262, "learning_rate": 5.726027397260273e-07, "log_odds_chosen": 3.47363543510437, "log_odds_ratio": -0.41975685954093933, "logits/chosen": 0.9383145570755005, "logits/rejected": 0.9967483282089233, "logps/chosen": -2.965280771255493, "logps/rejected": -6.399415016174316, "loss": 0.9325, "nll_loss": 0.8905113339424133, "rewards/accuracies": 0.75, "rewards/chosen": -0.29652807116508484, "rewards/margins": 0.34341341257095337, "rewards/rejected": -0.6399414539337158, "step": 3121 }, { "epoch": 8.54757015742642, "grad_norm": 3.9367024898529053, "learning_rate": 5.724657534246575e-07, "log_odds_chosen": 2.0472707748413086, "log_odds_ratio": -0.2140248417854309, "logits/chosen": 0.7098598480224609, "logits/rejected": 0.7003835439682007, "logps/chosen": -1.556100606918335, "logps/rejected": -3.394608736038208, "loss": 0.7419, "nll_loss": 0.7204856276512146, "rewards/accuracies": 1.0, "rewards/chosen": -0.15561005473136902, "rewards/margins": 0.18385082483291626, "rewards/rejected": -0.3394608795642853, "step": 3122 }, { "epoch": 8.550308008213552, "grad_norm": 5.779995441436768, "learning_rate": 5.723287671232876e-07, "log_odds_chosen": 1.1982226371765137, "log_odds_ratio": -0.42691442370414734, "logits/chosen": 0.9289924502372742, "logits/rejected": 0.928184986114502, "logps/chosen": -2.1884121894836426, "logps/rejected": -3.307772397994995, "loss": 0.7687, "nll_loss": 0.7259692549705505, "rewards/accuracies": 0.75, "rewards/chosen": -0.21884122490882874, "rewards/margins": 0.11193603277206421, "rewards/rejected": -0.33077725768089294, "step": 3123 }, { "epoch": 8.553045859000685, "grad_norm": 4.393331050872803, "learning_rate": 5.721917808219177e-07, "log_odds_chosen": 2.2684884071350098, "log_odds_ratio": -0.271230548620224, "logits/chosen": 0.7499839067459106, "logits/rejected": 0.7574068307876587, "logps/chosen": -2.128777265548706, "logps/rejected": -4.262015342712402, "loss": 0.8034, "nll_loss": 0.7762479782104492, "rewards/accuracies": 0.875, "rewards/chosen": -0.21287773549556732, "rewards/margins": 0.21332377195358276, "rewards/rejected": -0.4262015223503113, "step": 3124 }, { "epoch": 8.555783709787816, "grad_norm": 5.145119667053223, "learning_rate": 5.720547945205479e-07, "log_odds_chosen": 1.361615538597107, "log_odds_ratio": -0.3720586597919464, "logits/chosen": 0.8700869083404541, "logits/rejected": 0.8392838835716248, "logps/chosen": -2.1701014041900635, "logps/rejected": -3.45693302154541, "loss": 0.753, "nll_loss": 0.7158058881759644, "rewards/accuracies": 0.75, "rewards/chosen": -0.21701014041900635, "rewards/margins": 0.1286831796169281, "rewards/rejected": -0.34569332003593445, "step": 3125 }, { "epoch": 8.558521560574949, "grad_norm": 4.568725109100342, "learning_rate": 5.71917808219178e-07, "log_odds_chosen": 0.8715063333511353, "log_odds_ratio": -0.4219699501991272, "logits/chosen": 0.683525025844574, "logits/rejected": 0.6745585203170776, "logps/chosen": -2.4501752853393555, "logps/rejected": -3.2738733291625977, "loss": 0.7929, "nll_loss": 0.7507078647613525, "rewards/accuracies": 0.875, "rewards/chosen": -0.24501752853393555, "rewards/margins": 0.08236981183290482, "rewards/rejected": -0.32738733291625977, "step": 3126 }, { "epoch": 8.56125941136208, "grad_norm": 4.618778228759766, "learning_rate": 5.717808219178082e-07, "log_odds_chosen": 1.8391708135604858, "log_odds_ratio": -0.2653985023498535, "logits/chosen": 0.8712762594223022, "logits/rejected": 0.7970725893974304, "logps/chosen": -2.070474624633789, "logps/rejected": -3.7844719886779785, "loss": 0.8019, "nll_loss": 0.7753894925117493, "rewards/accuracies": 1.0, "rewards/chosen": -0.2070474773645401, "rewards/margins": 0.17139974236488342, "rewards/rejected": -0.37844720482826233, "step": 3127 }, { "epoch": 8.563997262149213, "grad_norm": 4.450758934020996, "learning_rate": 5.716438356164383e-07, "log_odds_chosen": 1.3167616128921509, "log_odds_ratio": -0.3020777404308319, "logits/chosen": 0.6653245687484741, "logits/rejected": 0.6609041690826416, "logps/chosen": -2.145991802215576, "logps/rejected": -3.3696656227111816, "loss": 0.7454, "nll_loss": 0.7151458859443665, "rewards/accuracies": 0.875, "rewards/chosen": -0.21459920704364777, "rewards/margins": 0.12236737459897995, "rewards/rejected": -0.3369665741920471, "step": 3128 }, { "epoch": 8.566735112936344, "grad_norm": 4.384880065917969, "learning_rate": 5.715068493150684e-07, "log_odds_chosen": 2.5364909172058105, "log_odds_ratio": -0.2471984475851059, "logits/chosen": 0.7980791926383972, "logits/rejected": 0.8259397149085999, "logps/chosen": -2.1214373111724854, "logps/rejected": -4.5186614990234375, "loss": 0.6778, "nll_loss": 0.6530919671058655, "rewards/accuracies": 1.0, "rewards/chosen": -0.21214373409748077, "rewards/margins": 0.23972243070602417, "rewards/rejected": -0.45186617970466614, "step": 3129 }, { "epoch": 8.569472963723477, "grad_norm": 6.303730010986328, "learning_rate": 5.713698630136986e-07, "log_odds_chosen": 1.5288338661193848, "log_odds_ratio": -0.33427417278289795, "logits/chosen": 1.186981439590454, "logits/rejected": 1.2652044296264648, "logps/chosen": -2.742391347885132, "logps/rejected": -4.182944297790527, "loss": 0.6989, "nll_loss": 0.6654608249664307, "rewards/accuracies": 0.75, "rewards/chosen": -0.27423912286758423, "rewards/margins": 0.1440552920103073, "rewards/rejected": -0.41829442977905273, "step": 3130 }, { "epoch": 8.572210814510608, "grad_norm": 4.375984191894531, "learning_rate": 5.712328767123287e-07, "log_odds_chosen": 1.4340423345565796, "log_odds_ratio": -0.4201738238334656, "logits/chosen": 1.01792311668396, "logits/rejected": 1.1168229579925537, "logps/chosen": -2.2787909507751465, "logps/rejected": -3.671470880508423, "loss": 0.7432, "nll_loss": 0.7012313604354858, "rewards/accuracies": 0.75, "rewards/chosen": -0.2278791069984436, "rewards/margins": 0.13926799595355988, "rewards/rejected": -0.3671470880508423, "step": 3131 }, { "epoch": 8.574948665297741, "grad_norm": 4.628002166748047, "learning_rate": 5.710958904109588e-07, "log_odds_chosen": 1.4742469787597656, "log_odds_ratio": -0.27548766136169434, "logits/chosen": 0.8986774682998657, "logits/rejected": 0.8708255290985107, "logps/chosen": -1.6699577569961548, "logps/rejected": -2.9595651626586914, "loss": 0.7321, "nll_loss": 0.7045604586601257, "rewards/accuracies": 1.0, "rewards/chosen": -0.16699577867984772, "rewards/margins": 0.1289607286453247, "rewards/rejected": -0.29595649242401123, "step": 3132 }, { "epoch": 8.577686516084874, "grad_norm": 5.334560871124268, "learning_rate": 5.70958904109589e-07, "log_odds_chosen": 1.928312063217163, "log_odds_ratio": -0.2910817861557007, "logits/chosen": 0.7875517010688782, "logits/rejected": 0.730048418045044, "logps/chosen": -2.356248140335083, "logps/rejected": -4.161609172821045, "loss": 0.7369, "nll_loss": 0.7077915668487549, "rewards/accuracies": 0.75, "rewards/chosen": -0.23562481999397278, "rewards/margins": 0.18053609132766724, "rewards/rejected": -0.41616091132164, "step": 3133 }, { "epoch": 8.580424366872005, "grad_norm": 4.1059393882751465, "learning_rate": 5.708219178082192e-07, "log_odds_chosen": 2.2382705211639404, "log_odds_ratio": -0.27952054142951965, "logits/chosen": 0.8465080857276917, "logits/rejected": 0.8135989308357239, "logps/chosen": -2.253530979156494, "logps/rejected": -4.381126403808594, "loss": 0.7177, "nll_loss": 0.6897448301315308, "rewards/accuracies": 0.875, "rewards/chosen": -0.22535310685634613, "rewards/margins": 0.21275953948497772, "rewards/rejected": -0.43811267614364624, "step": 3134 }, { "epoch": 8.583162217659137, "grad_norm": 5.336700916290283, "learning_rate": 5.706849315068492e-07, "log_odds_chosen": 3.0328311920166016, "log_odds_ratio": -0.25319719314575195, "logits/chosen": 0.9180699586868286, "logits/rejected": 0.9665776491165161, "logps/chosen": -2.3135485649108887, "logps/rejected": -5.214707851409912, "loss": 0.8258, "nll_loss": 0.8005262613296509, "rewards/accuracies": 0.875, "rewards/chosen": -0.23135486245155334, "rewards/margins": 0.29011592268943787, "rewards/rejected": -0.5214707851409912, "step": 3135 }, { "epoch": 8.58590006844627, "grad_norm": 6.14333963394165, "learning_rate": 5.705479452054794e-07, "log_odds_chosen": 1.2639644145965576, "log_odds_ratio": -0.3455430865287781, "logits/chosen": 0.8764936327934265, "logits/rejected": 0.8605883717536926, "logps/chosen": -2.709487199783325, "logps/rejected": -3.901698112487793, "loss": 0.7497, "nll_loss": 0.7151421308517456, "rewards/accuracies": 0.875, "rewards/chosen": -0.27094870805740356, "rewards/margins": 0.11922109127044678, "rewards/rejected": -0.39016982913017273, "step": 3136 }, { "epoch": 8.588637919233403, "grad_norm": 4.011596202850342, "learning_rate": 5.704109589041096e-07, "log_odds_chosen": 3.0447909832000732, "log_odds_ratio": -0.20789189636707306, "logits/chosen": 1.0864957571029663, "logits/rejected": 1.097975492477417, "logps/chosen": -2.5528483390808105, "logps/rejected": -5.49732780456543, "loss": 0.7962, "nll_loss": 0.7754282355308533, "rewards/accuracies": 0.875, "rewards/chosen": -0.2552848160266876, "rewards/margins": 0.29444795846939087, "rewards/rejected": -0.5497327446937561, "step": 3137 }, { "epoch": 8.591375770020534, "grad_norm": 4.393767356872559, "learning_rate": 5.702739726027396e-07, "log_odds_chosen": 2.2256672382354736, "log_odds_ratio": -0.19594523310661316, "logits/chosen": 0.9257286787033081, "logits/rejected": 0.9994557499885559, "logps/chosen": -2.259092092514038, "logps/rejected": -4.378138542175293, "loss": 0.7727, "nll_loss": 0.7530746459960938, "rewards/accuracies": 1.0, "rewards/chosen": -0.22590921819210052, "rewards/margins": 0.21190467476844788, "rewards/rejected": -0.4378138780593872, "step": 3138 }, { "epoch": 8.594113620807667, "grad_norm": 5.192179203033447, "learning_rate": 5.701369863013698e-07, "log_odds_chosen": 1.5042104721069336, "log_odds_ratio": -0.38855046033859253, "logits/chosen": 1.0539846420288086, "logits/rejected": 1.0522279739379883, "logps/chosen": -2.2733306884765625, "logps/rejected": -3.693167209625244, "loss": 0.7742, "nll_loss": 0.7353065013885498, "rewards/accuracies": 0.75, "rewards/chosen": -0.22733305394649506, "rewards/margins": 0.14198368787765503, "rewards/rejected": -0.3693167269229889, "step": 3139 }, { "epoch": 8.596851471594798, "grad_norm": 4.793206691741943, "learning_rate": 5.699999999999999e-07, "log_odds_chosen": 2.7351794242858887, "log_odds_ratio": -0.2701480984687805, "logits/chosen": 0.9889040589332581, "logits/rejected": 1.006974697113037, "logps/chosen": -2.6248273849487305, "logps/rejected": -5.3180036544799805, "loss": 0.7034, "nll_loss": 0.6763990521430969, "rewards/accuracies": 0.875, "rewards/chosen": -0.26248276233673096, "rewards/margins": 0.2693175673484802, "rewards/rejected": -0.5318003296852112, "step": 3140 }, { "epoch": 8.59958932238193, "grad_norm": 5.2335052490234375, "learning_rate": 5.698630136986301e-07, "log_odds_chosen": 2.6437487602233887, "log_odds_ratio": -0.44103556871414185, "logits/chosen": 0.7052323818206787, "logits/rejected": 0.7046564221382141, "logps/chosen": -2.1389899253845215, "logps/rejected": -4.676120758056641, "loss": 0.7662, "nll_loss": 0.7221066951751709, "rewards/accuracies": 0.75, "rewards/chosen": -0.21389900147914886, "rewards/margins": 0.25371307134628296, "rewards/rejected": -0.467612087726593, "step": 3141 }, { "epoch": 8.602327173169062, "grad_norm": 4.360132694244385, "learning_rate": 5.697260273972602e-07, "log_odds_chosen": 2.301208972930908, "log_odds_ratio": -0.16912758350372314, "logits/chosen": 0.8316272497177124, "logits/rejected": 0.850767970085144, "logps/chosen": -2.0701520442962646, "logps/rejected": -4.234537124633789, "loss": 0.7609, "nll_loss": 0.7440351247787476, "rewards/accuracies": 1.0, "rewards/chosen": -0.20701521635055542, "rewards/margins": 0.21643848717212677, "rewards/rejected": -0.423453688621521, "step": 3142 }, { "epoch": 8.605065023956195, "grad_norm": 6.249680042266846, "learning_rate": 5.695890410958903e-07, "log_odds_chosen": 1.302727222442627, "log_odds_ratio": -0.5688939690589905, "logits/chosen": 0.6999210119247437, "logits/rejected": 0.6947449445724487, "logps/chosen": -2.456024646759033, "logps/rejected": -3.6173980236053467, "loss": 0.9614, "nll_loss": 0.9044929146766663, "rewards/accuracies": 0.875, "rewards/chosen": -0.24560247361660004, "rewards/margins": 0.11613735556602478, "rewards/rejected": -0.3617398142814636, "step": 3143 }, { "epoch": 8.607802874743326, "grad_norm": 5.4884138107299805, "learning_rate": 5.694520547945206e-07, "log_odds_chosen": 3.0624823570251465, "log_odds_ratio": -0.322151243686676, "logits/chosen": 0.9221019148826599, "logits/rejected": 0.9230422377586365, "logps/chosen": -2.319884777069092, "logps/rejected": -5.270586967468262, "loss": 0.7536, "nll_loss": 0.7214272618293762, "rewards/accuracies": 0.75, "rewards/chosen": -0.23198847472667694, "rewards/margins": 0.2950702905654907, "rewards/rejected": -0.5270587205886841, "step": 3144 }, { "epoch": 8.61054072553046, "grad_norm": 6.246682643890381, "learning_rate": 5.693150684931506e-07, "log_odds_chosen": 1.5324617624282837, "log_odds_ratio": -0.48577994108200073, "logits/chosen": 0.9614147543907166, "logits/rejected": 1.0293817520141602, "logps/chosen": -2.982983112335205, "logps/rejected": -4.469795227050781, "loss": 0.8081, "nll_loss": 0.7595285177230835, "rewards/accuracies": 0.75, "rewards/chosen": -0.29829829931259155, "rewards/margins": 0.14868119359016418, "rewards/rejected": -0.4469795227050781, "step": 3145 }, { "epoch": 8.61327857631759, "grad_norm": 5.919047832489014, "learning_rate": 5.691780821917807e-07, "log_odds_chosen": 1.6505155563354492, "log_odds_ratio": -0.5851107835769653, "logits/chosen": 0.9747102856636047, "logits/rejected": 0.9292081594467163, "logps/chosen": -2.096492290496826, "logps/rejected": -3.6716485023498535, "loss": 0.8073, "nll_loss": 0.748761773109436, "rewards/accuracies": 0.5, "rewards/chosen": -0.2096492201089859, "rewards/margins": 0.15751564502716064, "rewards/rejected": -0.36716485023498535, "step": 3146 }, { "epoch": 8.616016427104723, "grad_norm": 5.157100200653076, "learning_rate": 5.69041095890411e-07, "log_odds_chosen": 2.6753182411193848, "log_odds_ratio": -0.18796639144420624, "logits/chosen": 0.9397343397140503, "logits/rejected": 0.9931174516677856, "logps/chosen": -2.4668099880218506, "logps/rejected": -4.997268199920654, "loss": 0.724, "nll_loss": 0.7052319645881653, "rewards/accuracies": 1.0, "rewards/chosen": -0.24668100476264954, "rewards/margins": 0.25304582715034485, "rewards/rejected": -0.4997268319129944, "step": 3147 }, { "epoch": 8.618754277891854, "grad_norm": 5.345527648925781, "learning_rate": 5.689041095890412e-07, "log_odds_chosen": 2.7057535648345947, "log_odds_ratio": -0.1264197826385498, "logits/chosen": 1.1370350122451782, "logits/rejected": 1.1461433172225952, "logps/chosen": -2.669327974319458, "logps/rejected": -5.273087978363037, "loss": 0.691, "nll_loss": 0.6783446669578552, "rewards/accuracies": 1.0, "rewards/chosen": -0.26693278551101685, "rewards/margins": 0.2603760063648224, "rewards/rejected": -0.5273088216781616, "step": 3148 }, { "epoch": 8.621492128678987, "grad_norm": 4.551733493804932, "learning_rate": 5.687671232876712e-07, "log_odds_chosen": 1.0574817657470703, "log_odds_ratio": -0.3397422134876251, "logits/chosen": 0.8822457194328308, "logits/rejected": 0.9139910936355591, "logps/chosen": -2.165870189666748, "logps/rejected": -3.129940986633301, "loss": 0.6768, "nll_loss": 0.6427822709083557, "rewards/accuracies": 1.0, "rewards/chosen": -0.21658703684806824, "rewards/margins": 0.09640707075595856, "rewards/rejected": -0.312994122505188, "step": 3149 }, { "epoch": 8.624229979466119, "grad_norm": 3.772149085998535, "learning_rate": 5.686301369863014e-07, "log_odds_chosen": 2.8065781593322754, "log_odds_ratio": -0.18347273766994476, "logits/chosen": 0.886394202709198, "logits/rejected": 0.8817527294158936, "logps/chosen": -1.7967804670333862, "logps/rejected": -4.411954879760742, "loss": 0.7344, "nll_loss": 0.7160449028015137, "rewards/accuracies": 1.0, "rewards/chosen": -0.1796780675649643, "rewards/margins": 0.2615174651145935, "rewards/rejected": -0.4411955177783966, "step": 3150 }, { "epoch": 8.626967830253252, "grad_norm": 4.4322052001953125, "learning_rate": 5.684931506849316e-07, "log_odds_chosen": 2.1695337295532227, "log_odds_ratio": -0.24622896313667297, "logits/chosen": 0.8491808176040649, "logits/rejected": 0.8497405052185059, "logps/chosen": -2.6395275592803955, "logps/rejected": -4.635017395019531, "loss": 0.691, "nll_loss": 0.666399359703064, "rewards/accuracies": 1.0, "rewards/chosen": -0.26395276188850403, "rewards/margins": 0.19954901933670044, "rewards/rejected": -0.46350178122520447, "step": 3151 }, { "epoch": 8.629705681040383, "grad_norm": 8.322787284851074, "learning_rate": 5.683561643835616e-07, "log_odds_chosen": 1.566306710243225, "log_odds_ratio": -0.6715565919876099, "logits/chosen": 1.1803388595581055, "logits/rejected": 1.1384303569793701, "logps/chosen": -2.741319179534912, "logps/rejected": -4.166851043701172, "loss": 0.7886, "nll_loss": 0.7214192152023315, "rewards/accuracies": 0.875, "rewards/chosen": -0.2741319239139557, "rewards/margins": 0.1425531506538391, "rewards/rejected": -0.4166851043701172, "step": 3152 }, { "epoch": 8.632443531827516, "grad_norm": 4.061464309692383, "learning_rate": 5.682191780821918e-07, "log_odds_chosen": 2.0789999961853027, "log_odds_ratio": -0.19628466665744781, "logits/chosen": 0.9626235961914062, "logits/rejected": 0.9840219020843506, "logps/chosen": -2.17720890045166, "logps/rejected": -4.143118381500244, "loss": 0.7739, "nll_loss": 0.7542470693588257, "rewards/accuracies": 1.0, "rewards/chosen": -0.2177208960056305, "rewards/margins": 0.19659091532230377, "rewards/rejected": -0.41431182622909546, "step": 3153 }, { "epoch": 8.635181382614647, "grad_norm": 4.508747577667236, "learning_rate": 5.680821917808219e-07, "log_odds_chosen": 1.7404003143310547, "log_odds_ratio": -0.3597261607646942, "logits/chosen": 0.8370068073272705, "logits/rejected": 0.8486089110374451, "logps/chosen": -1.5747653245925903, "logps/rejected": -3.1606600284576416, "loss": 0.8314, "nll_loss": 0.795407235622406, "rewards/accuracies": 0.75, "rewards/chosen": -0.1574765294790268, "rewards/margins": 0.15858948230743408, "rewards/rejected": -0.31606602668762207, "step": 3154 }, { "epoch": 8.63791923340178, "grad_norm": 4.173140048980713, "learning_rate": 5.679452054794521e-07, "log_odds_chosen": 1.8152109384536743, "log_odds_ratio": -0.30954357981681824, "logits/chosen": 0.7911603450775146, "logits/rejected": 0.7528845071792603, "logps/chosen": -2.7829248905181885, "logps/rejected": -4.451512336730957, "loss": 0.7977, "nll_loss": 0.7667262554168701, "rewards/accuracies": 0.875, "rewards/chosen": -0.2782924771308899, "rewards/margins": 0.1668587476015091, "rewards/rejected": -0.4451512396335602, "step": 3155 }, { "epoch": 8.640657084188911, "grad_norm": 5.462137699127197, "learning_rate": 5.678082191780822e-07, "log_odds_chosen": 2.2494800090789795, "log_odds_ratio": -0.34383606910705566, "logits/chosen": 0.8984363079071045, "logits/rejected": 0.9907095432281494, "logps/chosen": -2.0700061321258545, "logps/rejected": -3.9202089309692383, "loss": 0.8474, "nll_loss": 0.8129978179931641, "rewards/accuracies": 0.875, "rewards/chosen": -0.20700061321258545, "rewards/margins": 0.18502026796340942, "rewards/rejected": -0.3920208811759949, "step": 3156 }, { "epoch": 8.643394934976044, "grad_norm": 5.26214599609375, "learning_rate": 5.676712328767123e-07, "log_odds_chosen": 0.7967358231544495, "log_odds_ratio": -0.5064994096755981, "logits/chosen": 0.8844188451766968, "logits/rejected": 0.9812124967575073, "logps/chosen": -2.6510865688323975, "logps/rejected": -3.421151638031006, "loss": 0.7941, "nll_loss": 0.7434934377670288, "rewards/accuracies": 0.75, "rewards/chosen": -0.2651086747646332, "rewards/margins": 0.07700648903846741, "rewards/rejected": -0.3421151638031006, "step": 3157 }, { "epoch": 8.646132785763175, "grad_norm": 4.7882304191589355, "learning_rate": 5.675342465753425e-07, "log_odds_chosen": 1.5932716131210327, "log_odds_ratio": -0.3356991410255432, "logits/chosen": 0.7160950899124146, "logits/rejected": 0.6492670774459839, "logps/chosen": -1.7402424812316895, "logps/rejected": -3.2247672080993652, "loss": 0.7678, "nll_loss": 0.7342293858528137, "rewards/accuracies": 0.875, "rewards/chosen": -0.17402425408363342, "rewards/margins": 0.1484525054693222, "rewards/rejected": -0.32247674465179443, "step": 3158 }, { "epoch": 8.648870636550308, "grad_norm": 5.763742923736572, "learning_rate": 5.673972602739726e-07, "log_odds_chosen": 3.2744390964508057, "log_odds_ratio": -0.08413590490818024, "logits/chosen": 1.1430411338806152, "logits/rejected": 1.2000776529312134, "logps/chosen": -2.331212043762207, "logps/rejected": -5.490687847137451, "loss": 0.7806, "nll_loss": 0.7721666693687439, "rewards/accuracies": 1.0, "rewards/chosen": -0.23312120139598846, "rewards/margins": 0.31594759225845337, "rewards/rejected": -0.549068808555603, "step": 3159 }, { "epoch": 8.651608487337441, "grad_norm": 5.534823417663574, "learning_rate": 5.672602739726027e-07, "log_odds_chosen": 0.9306211471557617, "log_odds_ratio": -0.4548216760158539, "logits/chosen": 0.817301869392395, "logits/rejected": 0.7831727266311646, "logps/chosen": -2.7632508277893066, "logps/rejected": -3.64467716217041, "loss": 0.7651, "nll_loss": 0.7196327447891235, "rewards/accuracies": 0.75, "rewards/chosen": -0.2763250768184662, "rewards/margins": 0.08814262598752975, "rewards/rejected": -0.36446771025657654, "step": 3160 }, { "epoch": 8.654346338124572, "grad_norm": 3.666853904724121, "learning_rate": 5.671232876712329e-07, "log_odds_chosen": 2.6575937271118164, "log_odds_ratio": -0.21314936876296997, "logits/chosen": 0.9032941460609436, "logits/rejected": 0.9219257831573486, "logps/chosen": -3.4261677265167236, "logps/rejected": -5.958488464355469, "loss": 0.7673, "nll_loss": 0.7459452748298645, "rewards/accuracies": 0.875, "rewards/chosen": -0.3426167666912079, "rewards/margins": 0.25323206186294556, "rewards/rejected": -0.5958489179611206, "step": 3161 }, { "epoch": 8.657084188911703, "grad_norm": 5.2257537841796875, "learning_rate": 5.669863013698631e-07, "log_odds_chosen": 2.6940836906433105, "log_odds_ratio": -0.1372661143541336, "logits/chosen": 0.9289214015007019, "logits/rejected": 1.0101655721664429, "logps/chosen": -2.9533157348632812, "logps/rejected": -5.575237274169922, "loss": 0.7633, "nll_loss": 0.7495879530906677, "rewards/accuracies": 1.0, "rewards/chosen": -0.29533159732818604, "rewards/margins": 0.26219213008880615, "rewards/rejected": -0.5575237274169922, "step": 3162 }, { "epoch": 8.659822039698836, "grad_norm": 4.8771491050720215, "learning_rate": 5.668493150684931e-07, "log_odds_chosen": 1.9403263330459595, "log_odds_ratio": -0.2328086495399475, "logits/chosen": 1.000649094581604, "logits/rejected": 1.0183371305465698, "logps/chosen": -1.6074159145355225, "logps/rejected": -3.375659465789795, "loss": 0.7203, "nll_loss": 0.6969950199127197, "rewards/accuracies": 1.0, "rewards/chosen": -0.16074159741401672, "rewards/margins": 0.17682436108589172, "rewards/rejected": -0.33756595849990845, "step": 3163 }, { "epoch": 8.66255989048597, "grad_norm": 5.404991626739502, "learning_rate": 5.667123287671233e-07, "log_odds_chosen": 1.947401762008667, "log_odds_ratio": -0.3908821940422058, "logits/chosen": 0.863358736038208, "logits/rejected": 0.810172438621521, "logps/chosen": -1.9506951570510864, "logps/rejected": -3.7657971382141113, "loss": 0.8045, "nll_loss": 0.7654574513435364, "rewards/accuracies": 0.75, "rewards/chosen": -0.19506950676441193, "rewards/margins": 0.18151019513607025, "rewards/rejected": -0.3765797019004822, "step": 3164 }, { "epoch": 8.6652977412731, "grad_norm": 4.105766773223877, "learning_rate": 5.665753424657535e-07, "log_odds_chosen": 2.359539031982422, "log_odds_ratio": -0.2543385922908783, "logits/chosen": 0.671830952167511, "logits/rejected": 0.6432062387466431, "logps/chosen": -2.5287086963653564, "logps/rejected": -4.805503845214844, "loss": 0.8249, "nll_loss": 0.7994773387908936, "rewards/accuracies": 0.875, "rewards/chosen": -0.2528708875179291, "rewards/margins": 0.2276795506477356, "rewards/rejected": -0.4805504083633423, "step": 3165 }, { "epoch": 8.668035592060233, "grad_norm": 5.6224470138549805, "learning_rate": 5.664383561643835e-07, "log_odds_chosen": 1.017235279083252, "log_odds_ratio": -0.5179005861282349, "logits/chosen": 0.7280778288841248, "logits/rejected": 0.7404860258102417, "logps/chosen": -2.368725061416626, "logps/rejected": -3.3123245239257812, "loss": 0.8529, "nll_loss": 0.8010646104812622, "rewards/accuracies": 0.875, "rewards/chosen": -0.23687249422073364, "rewards/margins": 0.09435995668172836, "rewards/rejected": -0.3312324583530426, "step": 3166 }, { "epoch": 8.670773442847365, "grad_norm": 4.494600296020508, "learning_rate": 5.663013698630137e-07, "log_odds_chosen": 2.251256227493286, "log_odds_ratio": -0.23975184559822083, "logits/chosen": 1.0302197933197021, "logits/rejected": 1.066576600074768, "logps/chosen": -2.1313579082489014, "logps/rejected": -4.2672834396362305, "loss": 0.7284, "nll_loss": 0.7043873071670532, "rewards/accuracies": 1.0, "rewards/chosen": -0.21313580870628357, "rewards/margins": 0.2135925441980362, "rewards/rejected": -0.42672833800315857, "step": 3167 }, { "epoch": 8.673511293634498, "grad_norm": 4.3831634521484375, "learning_rate": 5.661643835616438e-07, "log_odds_chosen": 2.9304285049438477, "log_odds_ratio": -0.1867588460445404, "logits/chosen": 0.7563514709472656, "logits/rejected": 0.7308619618415833, "logps/chosen": -1.906131386756897, "logps/rejected": -4.69562292098999, "loss": 0.8343, "nll_loss": 0.8156337738037109, "rewards/accuracies": 0.875, "rewards/chosen": -0.19061313569545746, "rewards/margins": 0.27894914150238037, "rewards/rejected": -0.469562292098999, "step": 3168 }, { "epoch": 8.676249144421629, "grad_norm": 4.029239177703857, "learning_rate": 5.66027397260274e-07, "log_odds_chosen": 2.1899523735046387, "log_odds_ratio": -0.2053690403699875, "logits/chosen": 1.0827628374099731, "logits/rejected": 1.097324013710022, "logps/chosen": -2.0987401008605957, "logps/rejected": -4.143969535827637, "loss": 0.7206, "nll_loss": 0.7000744342803955, "rewards/accuracies": 1.0, "rewards/chosen": -0.2098740190267563, "rewards/margins": 0.20452295243740082, "rewards/rejected": -0.4143969416618347, "step": 3169 }, { "epoch": 8.678986995208762, "grad_norm": 5.144701957702637, "learning_rate": 5.658904109589041e-07, "log_odds_chosen": 2.0820250511169434, "log_odds_ratio": -0.3383867144584656, "logits/chosen": 0.9407128095626831, "logits/rejected": 0.9134255051612854, "logps/chosen": -2.2096550464630127, "logps/rejected": -4.198116302490234, "loss": 0.8137, "nll_loss": 0.7798810601234436, "rewards/accuracies": 0.875, "rewards/chosen": -0.22096551954746246, "rewards/margins": 0.19884614646434784, "rewards/rejected": -0.4198116660118103, "step": 3170 }, { "epoch": 8.681724845995893, "grad_norm": 5.209510803222656, "learning_rate": 5.657534246575342e-07, "log_odds_chosen": 2.817187547683716, "log_odds_ratio": -0.12477204948663712, "logits/chosen": 1.084001898765564, "logits/rejected": 1.1749824285507202, "logps/chosen": -1.8987114429473877, "logps/rejected": -4.525669574737549, "loss": 0.582, "nll_loss": 0.5695217847824097, "rewards/accuracies": 1.0, "rewards/chosen": -0.189871147274971, "rewards/margins": 0.262695848941803, "rewards/rejected": -0.4525669813156128, "step": 3171 }, { "epoch": 8.684462696783026, "grad_norm": 12.704227447509766, "learning_rate": 5.656164383561644e-07, "log_odds_chosen": 1.094832420349121, "log_odds_ratio": -0.8211379051208496, "logits/chosen": 0.8188120722770691, "logits/rejected": 0.7945900559425354, "logps/chosen": -3.3450825214385986, "logps/rejected": -4.4079270362854, "loss": 0.9221, "nll_loss": 0.8399659395217896, "rewards/accuracies": 0.625, "rewards/chosen": -0.3345082402229309, "rewards/margins": 0.1062844842672348, "rewards/rejected": -0.4407927393913269, "step": 3172 }, { "epoch": 8.687200547570157, "grad_norm": 5.692783355712891, "learning_rate": 5.654794520547945e-07, "log_odds_chosen": 1.8551990985870361, "log_odds_ratio": -0.3655945360660553, "logits/chosen": 1.1273466348648071, "logits/rejected": 1.1123106479644775, "logps/chosen": -2.1480212211608887, "logps/rejected": -3.8053958415985107, "loss": 0.6879, "nll_loss": 0.6513378024101257, "rewards/accuracies": 0.875, "rewards/chosen": -0.2148021161556244, "rewards/margins": 0.16573745012283325, "rewards/rejected": -0.38053956627845764, "step": 3173 }, { "epoch": 8.68993839835729, "grad_norm": 4.480318069458008, "learning_rate": 5.653424657534246e-07, "log_odds_chosen": 2.095979690551758, "log_odds_ratio": -0.19495373964309692, "logits/chosen": 0.8278331756591797, "logits/rejected": 0.8153561949729919, "logps/chosen": -1.960089921951294, "logps/rejected": -3.8945555686950684, "loss": 0.8162, "nll_loss": 0.7967386245727539, "rewards/accuracies": 1.0, "rewards/chosen": -0.19600899517536163, "rewards/margins": 0.1934465765953064, "rewards/rejected": -0.38945555686950684, "step": 3174 }, { "epoch": 8.692676249144421, "grad_norm": 4.427473545074463, "learning_rate": 5.652054794520548e-07, "log_odds_chosen": 1.920871615409851, "log_odds_ratio": -0.2590405344963074, "logits/chosen": 0.7081161737442017, "logits/rejected": 0.6490142941474915, "logps/chosen": -1.6386891603469849, "logps/rejected": -3.3888657093048096, "loss": 0.8404, "nll_loss": 0.8144511580467224, "rewards/accuracies": 1.0, "rewards/chosen": -0.16386890411376953, "rewards/margins": 0.17501766979694366, "rewards/rejected": -0.338886559009552, "step": 3175 }, { "epoch": 8.695414099931554, "grad_norm": 5.394284248352051, "learning_rate": 5.65068493150685e-07, "log_odds_chosen": 1.1088953018188477, "log_odds_ratio": -0.5251665115356445, "logits/chosen": 0.813938558101654, "logits/rejected": 0.7947404980659485, "logps/chosen": -2.6307852268218994, "logps/rejected": -3.6876347064971924, "loss": 0.8384, "nll_loss": 0.7859129905700684, "rewards/accuracies": 0.875, "rewards/chosen": -0.2630785405635834, "rewards/margins": 0.10568492859601974, "rewards/rejected": -0.36876344680786133, "step": 3176 }, { "epoch": 8.698151950718685, "grad_norm": 3.82108998298645, "learning_rate": 5.64931506849315e-07, "log_odds_chosen": 2.0073487758636475, "log_odds_ratio": -0.21806252002716064, "logits/chosen": 0.629867672920227, "logits/rejected": 0.6064099669456482, "logps/chosen": -2.3699393272399902, "logps/rejected": -4.290899276733398, "loss": 0.8281, "nll_loss": 0.8063271641731262, "rewards/accuracies": 1.0, "rewards/chosen": -0.2369939535856247, "rewards/margins": 0.19209598004817963, "rewards/rejected": -0.4290899336338043, "step": 3177 }, { "epoch": 8.700889801505818, "grad_norm": 4.507159233093262, "learning_rate": 5.647945205479452e-07, "log_odds_chosen": 1.7323037385940552, "log_odds_ratio": -0.2815493047237396, "logits/chosen": 0.9085970520973206, "logits/rejected": 0.8691336512565613, "logps/chosen": -2.192866563796997, "logps/rejected": -3.832688808441162, "loss": 0.7566, "nll_loss": 0.7283962965011597, "rewards/accuracies": 1.0, "rewards/chosen": -0.21928668022155762, "rewards/margins": 0.16398224234580994, "rewards/rejected": -0.38326889276504517, "step": 3178 }, { "epoch": 8.70362765229295, "grad_norm": 4.342905521392822, "learning_rate": 5.646575342465754e-07, "log_odds_chosen": 1.2196705341339111, "log_odds_ratio": -0.3354867994785309, "logits/chosen": 0.6709601283073425, "logits/rejected": 0.6676911115646362, "logps/chosen": -2.430497169494629, "logps/rejected": -3.5376155376434326, "loss": 0.7506, "nll_loss": 0.7170538902282715, "rewards/accuracies": 0.875, "rewards/chosen": -0.2430497109889984, "rewards/margins": 0.11071185767650604, "rewards/rejected": -0.35376158356666565, "step": 3179 }, { "epoch": 8.706365503080082, "grad_norm": 4.673288822174072, "learning_rate": 5.645205479452054e-07, "log_odds_chosen": 3.292210102081299, "log_odds_ratio": -0.11980006098747253, "logits/chosen": 0.9984437227249146, "logits/rejected": 1.0762624740600586, "logps/chosen": -2.363563060760498, "logps/rejected": -5.542481422424316, "loss": 0.7172, "nll_loss": 0.7051770687103271, "rewards/accuracies": 1.0, "rewards/chosen": -0.23635631799697876, "rewards/margins": 0.3178918659687042, "rewards/rejected": -0.5542481541633606, "step": 3180 }, { "epoch": 8.709103353867214, "grad_norm": 6.051589488983154, "learning_rate": 5.643835616438356e-07, "log_odds_chosen": 1.6296939849853516, "log_odds_ratio": -0.47479021549224854, "logits/chosen": 0.7688860893249512, "logits/rejected": 0.8433657884597778, "logps/chosen": -2.3753745555877686, "logps/rejected": -3.930392265319824, "loss": 0.8234, "nll_loss": 0.7759667038917542, "rewards/accuracies": 0.625, "rewards/chosen": -0.2375374436378479, "rewards/margins": 0.15550178289413452, "rewards/rejected": -0.3930392265319824, "step": 3181 }, { "epoch": 8.711841204654347, "grad_norm": 5.80877161026001, "learning_rate": 5.642465753424658e-07, "log_odds_chosen": 0.6514397859573364, "log_odds_ratio": -0.6651386618614197, "logits/chosen": 0.9905282258987427, "logits/rejected": 0.9287421703338623, "logps/chosen": -2.4350719451904297, "logps/rejected": -3.084120750427246, "loss": 0.8412, "nll_loss": 0.7747307419776917, "rewards/accuracies": 0.5, "rewards/chosen": -0.24350719153881073, "rewards/margins": 0.06490488350391388, "rewards/rejected": -0.3084120750427246, "step": 3182 }, { "epoch": 8.714579055441478, "grad_norm": 4.083870887756348, "learning_rate": 5.641095890410958e-07, "log_odds_chosen": 2.592806339263916, "log_odds_ratio": -0.11471664160490036, "logits/chosen": 1.1209254264831543, "logits/rejected": 1.1816327571868896, "logps/chosen": -2.4084880352020264, "logps/rejected": -4.885197639465332, "loss": 0.6904, "nll_loss": 0.6789613962173462, "rewards/accuracies": 1.0, "rewards/chosen": -0.24084880948066711, "rewards/margins": 0.2476709634065628, "rewards/rejected": -0.4885197877883911, "step": 3183 }, { "epoch": 8.71731690622861, "grad_norm": 4.503445148468018, "learning_rate": 5.63972602739726e-07, "log_odds_chosen": 1.9908409118652344, "log_odds_ratio": -0.23275049030780792, "logits/chosen": 0.7375736236572266, "logits/rejected": 0.7713444828987122, "logps/chosen": -2.1755785942077637, "logps/rejected": -4.031590461730957, "loss": 0.7608, "nll_loss": 0.7374856472015381, "rewards/accuracies": 1.0, "rewards/chosen": -0.2175578773021698, "rewards/margins": 0.18560120463371277, "rewards/rejected": -0.4031590521335602, "step": 3184 }, { "epoch": 8.720054757015742, "grad_norm": 5.462538242340088, "learning_rate": 5.638356164383561e-07, "log_odds_chosen": 1.7246694564819336, "log_odds_ratio": -0.41398006677627563, "logits/chosen": 1.0103164911270142, "logits/rejected": 1.0059640407562256, "logps/chosen": -3.079887866973877, "logps/rejected": -4.744328498840332, "loss": 0.7964, "nll_loss": 0.7549968361854553, "rewards/accuracies": 0.875, "rewards/chosen": -0.3079887628555298, "rewards/margins": 0.1664441078901291, "rewards/rejected": -0.4744328558444977, "step": 3185 }, { "epoch": 8.722792607802875, "grad_norm": 7.128351211547852, "learning_rate": 5.636986301369863e-07, "log_odds_chosen": -0.17464075982570648, "log_odds_ratio": -1.0208840370178223, "logits/chosen": 0.9022836685180664, "logits/rejected": 0.9460461139678955, "logps/chosen": -3.574702739715576, "logps/rejected": -3.369767665863037, "loss": 0.9382, "nll_loss": 0.8361138701438904, "rewards/accuracies": 0.625, "rewards/chosen": -0.3574702739715576, "rewards/margins": -0.02049349807202816, "rewards/rejected": -0.3369767665863037, "step": 3186 }, { "epoch": 8.725530458590008, "grad_norm": 4.794908046722412, "learning_rate": 5.635616438356164e-07, "log_odds_chosen": 1.7144689559936523, "log_odds_ratio": -0.22833603620529175, "logits/chosen": 0.8598208427429199, "logits/rejected": 0.8145396709442139, "logps/chosen": -2.0171265602111816, "logps/rejected": -3.5988903045654297, "loss": 0.7205, "nll_loss": 0.6976478099822998, "rewards/accuracies": 1.0, "rewards/chosen": -0.20171266794204712, "rewards/margins": 0.15817636251449585, "rewards/rejected": -0.35988903045654297, "step": 3187 }, { "epoch": 8.728268309377139, "grad_norm": 4.504880428314209, "learning_rate": 5.634246575342465e-07, "log_odds_chosen": 1.6546030044555664, "log_odds_ratio": -0.40220242738723755, "logits/chosen": 0.8917074203491211, "logits/rejected": 0.8945238590240479, "logps/chosen": -2.735548973083496, "logps/rejected": -4.350790977478027, "loss": 0.9012, "nll_loss": 0.8609607815742493, "rewards/accuracies": 0.875, "rewards/chosen": -0.27355489134788513, "rewards/margins": 0.1615241914987564, "rewards/rejected": -0.43507906794548035, "step": 3188 }, { "epoch": 8.73100616016427, "grad_norm": 4.513782024383545, "learning_rate": 5.632876712328767e-07, "log_odds_chosen": 2.4067025184631348, "log_odds_ratio": -0.22472041845321655, "logits/chosen": 0.6921341419219971, "logits/rejected": 0.6338180303573608, "logps/chosen": -3.000709056854248, "logps/rejected": -5.3270063400268555, "loss": 0.9959, "nll_loss": 0.9733797907829285, "rewards/accuracies": 1.0, "rewards/chosen": -0.3000709116458893, "rewards/margins": 0.232629656791687, "rewards/rejected": -0.5327005386352539, "step": 3189 }, { "epoch": 8.733744010951403, "grad_norm": 4.392301559448242, "learning_rate": 5.631506849315069e-07, "log_odds_chosen": 1.9442567825317383, "log_odds_ratio": -0.21888577938079834, "logits/chosen": 0.7567768096923828, "logits/rejected": 0.7346163988113403, "logps/chosen": -1.913604974746704, "logps/rejected": -3.725029468536377, "loss": 0.8095, "nll_loss": 0.787602424621582, "rewards/accuracies": 1.0, "rewards/chosen": -0.1913605034351349, "rewards/margins": 0.1811424344778061, "rewards/rejected": -0.3725029528141022, "step": 3190 }, { "epoch": 8.736481861738536, "grad_norm": 4.015016078948975, "learning_rate": 5.630136986301369e-07, "log_odds_chosen": 1.89689302444458, "log_odds_ratio": -0.2669721841812134, "logits/chosen": 1.0120898485183716, "logits/rejected": 1.0602182149887085, "logps/chosen": -2.1194398403167725, "logps/rejected": -3.9104833602905273, "loss": 0.7226, "nll_loss": 0.6958639621734619, "rewards/accuracies": 1.0, "rewards/chosen": -0.21194398403167725, "rewards/margins": 0.17910435795783997, "rewards/rejected": -0.3910483419895172, "step": 3191 }, { "epoch": 8.739219712525667, "grad_norm": 6.834279537200928, "learning_rate": 5.628767123287671e-07, "log_odds_chosen": 1.5683588981628418, "log_odds_ratio": -0.43348604440689087, "logits/chosen": 0.6916748285293579, "logits/rejected": 0.7164816856384277, "logps/chosen": -1.9135334491729736, "logps/rejected": -3.4056129455566406, "loss": 0.7338, "nll_loss": 0.6904148459434509, "rewards/accuracies": 0.75, "rewards/chosen": -0.19135335087776184, "rewards/margins": 0.1492079496383667, "rewards/rejected": -0.34056130051612854, "step": 3192 }, { "epoch": 8.7419575633128, "grad_norm": 4.2824811935424805, "learning_rate": 5.627397260273973e-07, "log_odds_chosen": 1.8339977264404297, "log_odds_ratio": -0.3147408366203308, "logits/chosen": 0.937724769115448, "logits/rejected": 0.9613934755325317, "logps/chosen": -1.9007045030593872, "logps/rejected": -3.49444317817688, "loss": 0.7462, "nll_loss": 0.7147271037101746, "rewards/accuracies": 0.875, "rewards/chosen": -0.19007045030593872, "rewards/margins": 0.15937387943267822, "rewards/rejected": -0.34944432973861694, "step": 3193 }, { "epoch": 8.744695414099931, "grad_norm": 8.054241180419922, "learning_rate": 5.626027397260273e-07, "log_odds_chosen": 1.123852252960205, "log_odds_ratio": -0.7360960245132446, "logits/chosen": 0.9631000757217407, "logits/rejected": 0.9673597812652588, "logps/chosen": -2.9723644256591797, "logps/rejected": -3.9498488903045654, "loss": 0.8201, "nll_loss": 0.7465134859085083, "rewards/accuracies": 0.75, "rewards/chosen": -0.29723644256591797, "rewards/margins": 0.09774847328662872, "rewards/rejected": -0.3949849307537079, "step": 3194 }, { "epoch": 8.747433264887064, "grad_norm": 4.743768215179443, "learning_rate": 5.624657534246575e-07, "log_odds_chosen": 1.1732585430145264, "log_odds_ratio": -0.32228443026542664, "logits/chosen": 0.7469075918197632, "logits/rejected": 0.7274667024612427, "logps/chosen": -2.422255039215088, "logps/rejected": -3.5077221393585205, "loss": 0.8428, "nll_loss": 0.8105811476707458, "rewards/accuracies": 1.0, "rewards/chosen": -0.2422255128622055, "rewards/margins": 0.10854670405387878, "rewards/rejected": -0.3507722020149231, "step": 3195 }, { "epoch": 8.750171115674195, "grad_norm": 6.387363910675049, "learning_rate": 5.623287671232877e-07, "log_odds_chosen": 2.6805195808410645, "log_odds_ratio": -0.3439568281173706, "logits/chosen": 1.210140347480774, "logits/rejected": 1.2030444145202637, "logps/chosen": -2.49851131439209, "logps/rejected": -5.056928634643555, "loss": 0.7525, "nll_loss": 0.718113899230957, "rewards/accuracies": 0.875, "rewards/chosen": -0.24985113739967346, "rewards/margins": 0.25584179162979126, "rewards/rejected": -0.5056929588317871, "step": 3196 }, { "epoch": 8.752908966461328, "grad_norm": 6.027527332305908, "learning_rate": 5.621917808219177e-07, "log_odds_chosen": 1.9107069969177246, "log_odds_ratio": -0.38427841663360596, "logits/chosen": 1.0468316078186035, "logits/rejected": 1.0047988891601562, "logps/chosen": -1.7956184148788452, "logps/rejected": -3.493345260620117, "loss": 0.6992, "nll_loss": 0.6607584357261658, "rewards/accuracies": 0.75, "rewards/chosen": -0.17956185340881348, "rewards/margins": 0.169772669672966, "rewards/rejected": -0.3493345081806183, "step": 3197 }, { "epoch": 8.75564681724846, "grad_norm": 5.230481147766113, "learning_rate": 5.620547945205479e-07, "log_odds_chosen": 1.8938260078430176, "log_odds_ratio": -0.24366356432437897, "logits/chosen": 0.9876629114151001, "logits/rejected": 1.0728380680084229, "logps/chosen": -1.9980175495147705, "logps/rejected": -3.7248013019561768, "loss": 0.6313, "nll_loss": 0.6068897247314453, "rewards/accuracies": 0.875, "rewards/chosen": -0.1998017579317093, "rewards/margins": 0.1726783812046051, "rewards/rejected": -0.3724801540374756, "step": 3198 }, { "epoch": 8.758384668035593, "grad_norm": 5.213837623596191, "learning_rate": 5.61917808219178e-07, "log_odds_chosen": 0.9972579479217529, "log_odds_ratio": -0.3843209743499756, "logits/chosen": 0.8589341044425964, "logits/rejected": 0.812290608882904, "logps/chosen": -1.7960376739501953, "logps/rejected": -2.7007524967193604, "loss": 0.7818, "nll_loss": 0.7433536648750305, "rewards/accuracies": 1.0, "rewards/chosen": -0.17960378527641296, "rewards/margins": 0.09047148376703262, "rewards/rejected": -0.270075261592865, "step": 3199 }, { "epoch": 8.761122518822724, "grad_norm": 3.9501614570617676, "learning_rate": 5.617808219178082e-07, "log_odds_chosen": 2.925640821456909, "log_odds_ratio": -0.12728768587112427, "logits/chosen": 1.1418265104293823, "logits/rejected": 1.1896308660507202, "logps/chosen": -2.3320112228393555, "logps/rejected": -5.135378837585449, "loss": 0.6501, "nll_loss": 0.6373352408409119, "rewards/accuracies": 1.0, "rewards/chosen": -0.23320110142230988, "rewards/margins": 0.28033679723739624, "rewards/rejected": -0.5135378837585449, "step": 3200 }, { "epoch": 8.763860369609857, "grad_norm": 4.686951160430908, "learning_rate": 5.616438356164383e-07, "log_odds_chosen": 1.1413381099700928, "log_odds_ratio": -0.4222452640533447, "logits/chosen": 0.9014778137207031, "logits/rejected": 0.9026811718940735, "logps/chosen": -2.1039276123046875, "logps/rejected": -3.1829187870025635, "loss": 0.8941, "nll_loss": 0.8518924713134766, "rewards/accuracies": 0.75, "rewards/chosen": -0.2103927582502365, "rewards/margins": 0.10789913684129715, "rewards/rejected": -0.31829190254211426, "step": 3201 }, { "epoch": 8.766598220396988, "grad_norm": 6.992869853973389, "learning_rate": 5.615068493150684e-07, "log_odds_chosen": 0.7329485416412354, "log_odds_ratio": -0.46995845437049866, "logits/chosen": 0.9749886989593506, "logits/rejected": 0.9517996907234192, "logps/chosen": -2.6520214080810547, "logps/rejected": -3.362363338470459, "loss": 0.8294, "nll_loss": 0.7824139595031738, "rewards/accuracies": 0.75, "rewards/chosen": -0.2652021646499634, "rewards/margins": 0.07103417068719864, "rewards/rejected": -0.33623629808425903, "step": 3202 }, { "epoch": 8.76933607118412, "grad_norm": 4.499264717102051, "learning_rate": 5.613698630136986e-07, "log_odds_chosen": 2.6495578289031982, "log_odds_ratio": -0.2340293973684311, "logits/chosen": 0.8676972389221191, "logits/rejected": 0.8818379640579224, "logps/chosen": -2.13706111907959, "logps/rejected": -4.6539788246154785, "loss": 0.7597, "nll_loss": 0.7363401055335999, "rewards/accuracies": 0.875, "rewards/chosen": -0.2137061059474945, "rewards/margins": 0.2516917884349823, "rewards/rejected": -0.4653978943824768, "step": 3203 }, { "epoch": 8.772073921971252, "grad_norm": 4.572640419006348, "learning_rate": 5.612328767123287e-07, "log_odds_chosen": 1.3008532524108887, "log_odds_ratio": -0.36485856771469116, "logits/chosen": 0.962843120098114, "logits/rejected": 0.9340711832046509, "logps/chosen": -2.0879921913146973, "logps/rejected": -3.3136677742004395, "loss": 0.7479, "nll_loss": 0.7113912105560303, "rewards/accuracies": 0.75, "rewards/chosen": -0.20879921317100525, "rewards/margins": 0.1225675642490387, "rewards/rejected": -0.33136677742004395, "step": 3204 }, { "epoch": 8.774811772758385, "grad_norm": 4.499452590942383, "learning_rate": 5.610958904109588e-07, "log_odds_chosen": 2.635251045227051, "log_odds_ratio": -0.17137828469276428, "logits/chosen": 0.9181234836578369, "logits/rejected": 0.8566069602966309, "logps/chosen": -2.1297576427459717, "logps/rejected": -4.603320121765137, "loss": 0.8832, "nll_loss": 0.8660852909088135, "rewards/accuracies": 1.0, "rewards/chosen": -0.21297577023506165, "rewards/margins": 0.24735622107982635, "rewards/rejected": -0.4603319764137268, "step": 3205 }, { "epoch": 8.777549623545516, "grad_norm": 5.281980037689209, "learning_rate": 5.60958904109589e-07, "log_odds_chosen": 2.24493670463562, "log_odds_ratio": -0.6997167468070984, "logits/chosen": 0.8243802785873413, "logits/rejected": 0.9126193523406982, "logps/chosen": -3.557374954223633, "logps/rejected": -5.795419692993164, "loss": 0.8568, "nll_loss": 0.7868104577064514, "rewards/accuracies": 0.75, "rewards/chosen": -0.35573750734329224, "rewards/margins": 0.22380447387695312, "rewards/rejected": -0.5795420408248901, "step": 3206 }, { "epoch": 8.780287474332649, "grad_norm": 3.6547563076019287, "learning_rate": 5.608219178082192e-07, "log_odds_chosen": 4.044382572174072, "log_odds_ratio": -0.17817628383636475, "logits/chosen": 1.0882270336151123, "logits/rejected": 1.1180527210235596, "logps/chosen": -2.7305068969726562, "logps/rejected": -6.718343734741211, "loss": 0.7749, "nll_loss": 0.7570701837539673, "rewards/accuracies": 0.875, "rewards/chosen": -0.2730506956577301, "rewards/margins": 0.39878374338150024, "rewards/rejected": -0.671834409236908, "step": 3207 }, { "epoch": 8.78302532511978, "grad_norm": 4.968675136566162, "learning_rate": 5.606849315068492e-07, "log_odds_chosen": 1.5330801010131836, "log_odds_ratio": -0.33858513832092285, "logits/chosen": 0.6683700680732727, "logits/rejected": 0.6367267966270447, "logps/chosen": -2.139131784439087, "logps/rejected": -3.523096799850464, "loss": 0.8067, "nll_loss": 0.7728021144866943, "rewards/accuracies": 0.875, "rewards/chosen": -0.21391315758228302, "rewards/margins": 0.1383965164422989, "rewards/rejected": -0.3523097038269043, "step": 3208 }, { "epoch": 8.785763175906913, "grad_norm": 4.351930141448975, "learning_rate": 5.605479452054794e-07, "log_odds_chosen": 1.9647315740585327, "log_odds_ratio": -0.31715017557144165, "logits/chosen": 0.7949414253234863, "logits/rejected": 0.768452525138855, "logps/chosen": -1.7949628829956055, "logps/rejected": -3.6415629386901855, "loss": 0.7801, "nll_loss": 0.748337984085083, "rewards/accuracies": 1.0, "rewards/chosen": -0.17949631810188293, "rewards/margins": 0.18466001749038696, "rewards/rejected": -0.3641563355922699, "step": 3209 }, { "epoch": 8.788501026694044, "grad_norm": 5.2655181884765625, "learning_rate": 5.604109589041096e-07, "log_odds_chosen": 0.5647388696670532, "log_odds_ratio": -0.510857343673706, "logits/chosen": 0.7809399962425232, "logits/rejected": 0.7774266004562378, "logps/chosen": -2.3512370586395264, "logps/rejected": -2.8127169609069824, "loss": 0.7509, "nll_loss": 0.6998582482337952, "rewards/accuracies": 0.875, "rewards/chosen": -0.23512369394302368, "rewards/margins": 0.046147994697093964, "rewards/rejected": -0.28127169609069824, "step": 3210 }, { "epoch": 8.791238877481177, "grad_norm": 4.708775997161865, "learning_rate": 5.602739726027396e-07, "log_odds_chosen": 2.9609792232513428, "log_odds_ratio": -0.11644896864891052, "logits/chosen": 0.9733568429946899, "logits/rejected": 0.9941757917404175, "logps/chosen": -2.661159038543701, "logps/rejected": -5.510749340057373, "loss": 0.7591, "nll_loss": 0.7474194765090942, "rewards/accuracies": 1.0, "rewards/chosen": -0.2661159038543701, "rewards/margins": 0.284959077835083, "rewards/rejected": -0.5510749816894531, "step": 3211 }, { "epoch": 8.793976728268309, "grad_norm": 5.07305383682251, "learning_rate": 5.601369863013698e-07, "log_odds_chosen": 0.6688322424888611, "log_odds_ratio": -0.545612096786499, "logits/chosen": 0.8584333062171936, "logits/rejected": 0.9372853636741638, "logps/chosen": -2.5183181762695312, "logps/rejected": -3.1316027641296387, "loss": 0.8168, "nll_loss": 0.7622326016426086, "rewards/accuracies": 0.75, "rewards/chosen": -0.2518318295478821, "rewards/margins": 0.061328478157520294, "rewards/rejected": -0.3131603002548218, "step": 3212 }, { "epoch": 8.796714579055442, "grad_norm": 4.7083539962768555, "learning_rate": 5.6e-07, "log_odds_chosen": 2.1178817749023438, "log_odds_ratio": -0.3076797127723694, "logits/chosen": 0.8421908617019653, "logits/rejected": 0.8466405868530273, "logps/chosen": -2.5948994159698486, "logps/rejected": -4.617005825042725, "loss": 0.7369, "nll_loss": 0.7061009407043457, "rewards/accuracies": 0.875, "rewards/chosen": -0.25948992371559143, "rewards/margins": 0.20221063494682312, "rewards/rejected": -0.46170058846473694, "step": 3213 }, { "epoch": 8.799452429842574, "grad_norm": 4.024257659912109, "learning_rate": 5.598630136986301e-07, "log_odds_chosen": 2.9425463676452637, "log_odds_ratio": -0.11031264811754227, "logits/chosen": 0.7655223608016968, "logits/rejected": 0.8053958415985107, "logps/chosen": -2.20648455619812, "logps/rejected": -4.97645902633667, "loss": 0.8354, "nll_loss": 0.8244069814682007, "rewards/accuracies": 1.0, "rewards/chosen": -0.22064845263957977, "rewards/margins": 0.276997447013855, "rewards/rejected": -0.49764591455459595, "step": 3214 }, { "epoch": 8.802190280629706, "grad_norm": 4.411969184875488, "learning_rate": 5.597260273972602e-07, "log_odds_chosen": 1.835208535194397, "log_odds_ratio": -0.39886054396629333, "logits/chosen": 1.129233717918396, "logits/rejected": 1.1655263900756836, "logps/chosen": -2.6903247833251953, "logps/rejected": -4.441286087036133, "loss": 0.8009, "nll_loss": 0.7609927654266357, "rewards/accuracies": 0.875, "rewards/chosen": -0.26903247833251953, "rewards/margins": 0.17509612441062927, "rewards/rejected": -0.4441286325454712, "step": 3215 }, { "epoch": 8.804928131416839, "grad_norm": 4.179412841796875, "learning_rate": 5.595890410958903e-07, "log_odds_chosen": 1.7012145519256592, "log_odds_ratio": -0.2542476952075958, "logits/chosen": 0.9137552976608276, "logits/rejected": 0.9150712490081787, "logps/chosen": -2.0789780616760254, "logps/rejected": -3.604346513748169, "loss": 0.7472, "nll_loss": 0.7218222618103027, "rewards/accuracies": 1.0, "rewards/chosen": -0.20789778232574463, "rewards/margins": 0.15253683924674988, "rewards/rejected": -0.3604346513748169, "step": 3216 }, { "epoch": 8.80766598220397, "grad_norm": 3.9146106243133545, "learning_rate": 5.594520547945205e-07, "log_odds_chosen": 2.3148066997528076, "log_odds_ratio": -0.255005419254303, "logits/chosen": 0.9918591976165771, "logits/rejected": 0.9814391136169434, "logps/chosen": -2.75225830078125, "logps/rejected": -4.948023319244385, "loss": 0.7089, "nll_loss": 0.6834460496902466, "rewards/accuracies": 0.875, "rewards/chosen": -0.27522581815719604, "rewards/margins": 0.21957653760910034, "rewards/rejected": -0.4948023557662964, "step": 3217 }, { "epoch": 8.810403832991103, "grad_norm": 8.781682968139648, "learning_rate": 5.593150684931506e-07, "log_odds_chosen": 0.9234268069267273, "log_odds_ratio": -0.9848883152008057, "logits/chosen": 0.7600916624069214, "logits/rejected": 0.7123551368713379, "logps/chosen": -3.8204755783081055, "logps/rejected": -4.703741550445557, "loss": 1.0043, "nll_loss": 0.9057899713516235, "rewards/accuracies": 0.625, "rewards/chosen": -0.38204753398895264, "rewards/margins": 0.0883266031742096, "rewards/rejected": -0.4703741669654846, "step": 3218 }, { "epoch": 8.813141683778234, "grad_norm": 4.23718786239624, "learning_rate": 5.591780821917807e-07, "log_odds_chosen": 2.2171568870544434, "log_odds_ratio": -0.23135992884635925, "logits/chosen": 1.0219573974609375, "logits/rejected": 1.1119709014892578, "logps/chosen": -2.3603222370147705, "logps/rejected": -4.507875442504883, "loss": 0.6848, "nll_loss": 0.6616901755332947, "rewards/accuracies": 0.875, "rewards/chosen": -0.23603221774101257, "rewards/margins": 0.2147553265094757, "rewards/rejected": -0.4507875144481659, "step": 3219 }, { "epoch": 8.815879534565367, "grad_norm": 5.264628887176514, "learning_rate": 5.590410958904109e-07, "log_odds_chosen": 1.042154312133789, "log_odds_ratio": -0.39201265573501587, "logits/chosen": 0.9267305135726929, "logits/rejected": 0.9866092205047607, "logps/chosen": -2.9056031703948975, "logps/rejected": -3.895899772644043, "loss": 0.7398, "nll_loss": 0.7005935907363892, "rewards/accuracies": 0.875, "rewards/chosen": -0.2905603349208832, "rewards/margins": 0.09902964532375336, "rewards/rejected": -0.38958996534347534, "step": 3220 }, { "epoch": 8.818617385352498, "grad_norm": 4.655205726623535, "learning_rate": 5.589041095890411e-07, "log_odds_chosen": 1.7389070987701416, "log_odds_ratio": -0.3344007432460785, "logits/chosen": 0.9624600410461426, "logits/rejected": 0.9518284797668457, "logps/chosen": -2.7132346630096436, "logps/rejected": -4.370037078857422, "loss": 0.7733, "nll_loss": 0.739851713180542, "rewards/accuracies": 0.75, "rewards/chosen": -0.27132344245910645, "rewards/margins": 0.16568025946617126, "rewards/rejected": -0.4370037317276001, "step": 3221 }, { "epoch": 8.821355236139631, "grad_norm": 4.5153727531433105, "learning_rate": 5.587671232876711e-07, "log_odds_chosen": 1.9952211380004883, "log_odds_ratio": -0.2685009241104126, "logits/chosen": 1.0674771070480347, "logits/rejected": 1.0749698877334595, "logps/chosen": -2.173220634460449, "logps/rejected": -4.067238807678223, "loss": 0.6791, "nll_loss": 0.6522362232208252, "rewards/accuracies": 0.875, "rewards/chosen": -0.21732206642627716, "rewards/margins": 0.1894018054008484, "rewards/rejected": -0.40672385692596436, "step": 3222 }, { "epoch": 8.824093086926762, "grad_norm": 5.80983304977417, "learning_rate": 5.586301369863013e-07, "log_odds_chosen": 0.6258838772773743, "log_odds_ratio": -0.6471261978149414, "logits/chosen": 0.9015397429466248, "logits/rejected": 0.8314236402511597, "logps/chosen": -2.0678892135620117, "logps/rejected": -2.590203285217285, "loss": 0.8027, "nll_loss": 0.7379563450813293, "rewards/accuracies": 0.75, "rewards/chosen": -0.20678892731666565, "rewards/margins": 0.05223139375448227, "rewards/rejected": -0.2590203285217285, "step": 3223 }, { "epoch": 8.826830937713895, "grad_norm": 4.1512250900268555, "learning_rate": 5.584931506849316e-07, "log_odds_chosen": 3.496103048324585, "log_odds_ratio": -0.21489422023296356, "logits/chosen": 1.004254937171936, "logits/rejected": 1.084348440170288, "logps/chosen": -2.82769775390625, "logps/rejected": -6.229119777679443, "loss": 0.7702, "nll_loss": 0.7487540245056152, "rewards/accuracies": 0.875, "rewards/chosen": -0.2827697992324829, "rewards/margins": 0.340142160654068, "rewards/rejected": -0.6229119300842285, "step": 3224 }, { "epoch": 8.829568788501026, "grad_norm": 5.3322625160217285, "learning_rate": 5.583561643835615e-07, "log_odds_chosen": 0.9159747958183289, "log_odds_ratio": -0.4059881567955017, "logits/chosen": 0.7363640666007996, "logits/rejected": 0.6968027949333191, "logps/chosen": -2.078685760498047, "logps/rejected": -2.9158780574798584, "loss": 0.7821, "nll_loss": 0.7415054440498352, "rewards/accuracies": 1.0, "rewards/chosen": -0.2078685760498047, "rewards/margins": 0.08371923118829727, "rewards/rejected": -0.29158779978752136, "step": 3225 }, { "epoch": 8.83230663928816, "grad_norm": 4.624204158782959, "learning_rate": 5.582191780821918e-07, "log_odds_chosen": 2.0094635486602783, "log_odds_ratio": -0.28267812728881836, "logits/chosen": 0.8262420892715454, "logits/rejected": 0.7498396039009094, "logps/chosen": -1.9951869249343872, "logps/rejected": -3.9008736610412598, "loss": 0.79, "nll_loss": 0.761696457862854, "rewards/accuracies": 0.875, "rewards/chosen": -0.19951869547367096, "rewards/margins": 0.19056867063045502, "rewards/rejected": -0.390087366104126, "step": 3226 }, { "epoch": 8.83504449007529, "grad_norm": 4.295771598815918, "learning_rate": 5.58082191780822e-07, "log_odds_chosen": 2.230653762817383, "log_odds_ratio": -0.18949349224567413, "logits/chosen": 0.802514374256134, "logits/rejected": 0.7912802696228027, "logps/chosen": -2.1488101482391357, "logps/rejected": -4.213840007781982, "loss": 0.6951, "nll_loss": 0.676190197467804, "rewards/accuracies": 1.0, "rewards/chosen": -0.21488100290298462, "rewards/margins": 0.20650294423103333, "rewards/rejected": -0.42138397693634033, "step": 3227 }, { "epoch": 8.837782340862423, "grad_norm": 5.239390850067139, "learning_rate": 5.579452054794521e-07, "log_odds_chosen": 1.43021821975708, "log_odds_ratio": -0.3883640170097351, "logits/chosen": 0.7988478541374207, "logits/rejected": 0.7276479005813599, "logps/chosen": -1.8880884647369385, "logps/rejected": -3.2378628253936768, "loss": 0.8292, "nll_loss": 0.7903724908828735, "rewards/accuracies": 0.875, "rewards/chosen": -0.1888088583946228, "rewards/margins": 0.13497743010520935, "rewards/rejected": -0.32378625869750977, "step": 3228 }, { "epoch": 8.840520191649555, "grad_norm": 4.5239057540893555, "learning_rate": 5.578082191780822e-07, "log_odds_chosen": 1.9078516960144043, "log_odds_ratio": -0.35430285334587097, "logits/chosen": 0.7849964499473572, "logits/rejected": 0.7919709086418152, "logps/chosen": -2.5669164657592773, "logps/rejected": -4.384650707244873, "loss": 0.7682, "nll_loss": 0.7327897548675537, "rewards/accuracies": 0.875, "rewards/chosen": -0.2566916346549988, "rewards/margins": 0.18177342414855957, "rewards/rejected": -0.43846505880355835, "step": 3229 }, { "epoch": 8.843258042436688, "grad_norm": 7.384883880615234, "learning_rate": 5.576712328767123e-07, "log_odds_chosen": 1.3205598592758179, "log_odds_ratio": -0.5620909929275513, "logits/chosen": 0.7390771508216858, "logits/rejected": 0.6206227540969849, "logps/chosen": -1.9446263313293457, "logps/rejected": -3.092984199523926, "loss": 0.8025, "nll_loss": 0.7462912797927856, "rewards/accuracies": 0.875, "rewards/chosen": -0.1944626271724701, "rewards/margins": 0.11483578383922577, "rewards/rejected": -0.30929842591285706, "step": 3230 }, { "epoch": 8.845995893223819, "grad_norm": 5.805368423461914, "learning_rate": 5.575342465753425e-07, "log_odds_chosen": 0.8713112473487854, "log_odds_ratio": -0.6019126176834106, "logits/chosen": 0.7587960362434387, "logits/rejected": 0.7292288541793823, "logps/chosen": -2.5417022705078125, "logps/rejected": -3.338252305984497, "loss": 0.8146, "nll_loss": 0.7544544339179993, "rewards/accuracies": 0.75, "rewards/chosen": -0.2541702389717102, "rewards/margins": 0.0796549916267395, "rewards/rejected": -0.3338252305984497, "step": 3231 }, { "epoch": 8.848733744010952, "grad_norm": 4.899600982666016, "learning_rate": 5.573972602739726e-07, "log_odds_chosen": 2.084016799926758, "log_odds_ratio": -0.19926440715789795, "logits/chosen": 0.7790126204490662, "logits/rejected": 0.7847059965133667, "logps/chosen": -2.3174171447753906, "logps/rejected": -4.172214984893799, "loss": 0.7352, "nll_loss": 0.7152519822120667, "rewards/accuracies": 1.0, "rewards/chosen": -0.23174171149730682, "rewards/margins": 0.1854797601699829, "rewards/rejected": -0.41722145676612854, "step": 3232 }, { "epoch": 8.851471594798083, "grad_norm": 3.9101905822753906, "learning_rate": 5.572602739726027e-07, "log_odds_chosen": 3.0854804515838623, "log_odds_ratio": -0.26011601090431213, "logits/chosen": 0.8859594464302063, "logits/rejected": 0.9512896537780762, "logps/chosen": -3.064749240875244, "logps/rejected": -6.095687389373779, "loss": 0.8294, "nll_loss": 0.8034162521362305, "rewards/accuracies": 1.0, "rewards/chosen": -0.3064749240875244, "rewards/margins": 0.3030937910079956, "rewards/rejected": -0.60956871509552, "step": 3233 }, { "epoch": 8.854209445585216, "grad_norm": 4.665815830230713, "learning_rate": 5.571232876712329e-07, "log_odds_chosen": 0.5098200440406799, "log_odds_ratio": -0.5088302493095398, "logits/chosen": 0.7113455533981323, "logits/rejected": 0.6924321055412292, "logps/chosen": -2.086766004562378, "logps/rejected": -2.507202386856079, "loss": 0.7567, "nll_loss": 0.7057880163192749, "rewards/accuracies": 0.75, "rewards/chosen": -0.20867660641670227, "rewards/margins": 0.04204364866018295, "rewards/rejected": -0.25072023272514343, "step": 3234 }, { "epoch": 8.856947296372347, "grad_norm": 4.052916526794434, "learning_rate": 5.569863013698631e-07, "log_odds_chosen": 1.8692659139633179, "log_odds_ratio": -0.22104433178901672, "logits/chosen": 1.0204100608825684, "logits/rejected": 0.9593266844749451, "logps/chosen": -1.8726484775543213, "logps/rejected": -3.6112430095672607, "loss": 0.7052, "nll_loss": 0.683096170425415, "rewards/accuracies": 1.0, "rewards/chosen": -0.18726485967636108, "rewards/margins": 0.17385944724082947, "rewards/rejected": -0.36112433671951294, "step": 3235 }, { "epoch": 8.85968514715948, "grad_norm": 4.2099432945251465, "learning_rate": 5.568493150684931e-07, "log_odds_chosen": 1.5836929082870483, "log_odds_ratio": -0.2970541715621948, "logits/chosen": 0.8598871231079102, "logits/rejected": 0.8243483901023865, "logps/chosen": -1.9153900146484375, "logps/rejected": -3.4023165702819824, "loss": 0.7557, "nll_loss": 0.7260293364524841, "rewards/accuracies": 0.875, "rewards/chosen": -0.191539004445076, "rewards/margins": 0.14869262278079987, "rewards/rejected": -0.34023165702819824, "step": 3236 }, { "epoch": 8.862422997946611, "grad_norm": 4.545956134796143, "learning_rate": 5.567123287671233e-07, "log_odds_chosen": 0.8447298407554626, "log_odds_ratio": -0.4613625407218933, "logits/chosen": 0.7471239566802979, "logits/rejected": 0.6990246176719666, "logps/chosen": -1.952216625213623, "logps/rejected": -2.734347105026245, "loss": 0.7515, "nll_loss": 0.7053359150886536, "rewards/accuracies": 0.75, "rewards/chosen": -0.1952216625213623, "rewards/margins": 0.07821303606033325, "rewards/rejected": -0.27343469858169556, "step": 3237 }, { "epoch": 8.865160848733744, "grad_norm": 5.040693759918213, "learning_rate": 5.565753424657535e-07, "log_odds_chosen": 1.644279956817627, "log_odds_ratio": -0.40279340744018555, "logits/chosen": 0.8249363899230957, "logits/rejected": 0.8521953821182251, "logps/chosen": -2.254234790802002, "logps/rejected": -3.855492353439331, "loss": 0.7854, "nll_loss": 0.745169997215271, "rewards/accuracies": 0.875, "rewards/chosen": -0.22542348504066467, "rewards/margins": 0.1601257622241974, "rewards/rejected": -0.38554924726486206, "step": 3238 }, { "epoch": 8.867898699520875, "grad_norm": 4.816882133483887, "learning_rate": 5.564383561643835e-07, "log_odds_chosen": 1.5236599445343018, "log_odds_ratio": -0.350427508354187, "logits/chosen": 0.7568174600601196, "logits/rejected": 0.7248727083206177, "logps/chosen": -1.5856654644012451, "logps/rejected": -2.9742298126220703, "loss": 0.7355, "nll_loss": 0.7004454135894775, "rewards/accuracies": 0.875, "rewards/chosen": -0.15856654942035675, "rewards/margins": 0.1388564258813858, "rewards/rejected": -0.29742297530174255, "step": 3239 }, { "epoch": 8.870636550308008, "grad_norm": 4.707831859588623, "learning_rate": 5.563013698630137e-07, "log_odds_chosen": 2.5091676712036133, "log_odds_ratio": -0.1590614914894104, "logits/chosen": 1.0382657051086426, "logits/rejected": 0.9284095168113708, "logps/chosen": -2.379533290863037, "logps/rejected": -4.764331340789795, "loss": 0.841, "nll_loss": 0.8250904083251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.2379533350467682, "rewards/margins": 0.2384798228740692, "rewards/rejected": -0.4764331579208374, "step": 3240 }, { "epoch": 8.873374401095141, "grad_norm": 5.354855537414551, "learning_rate": 5.561643835616439e-07, "log_odds_chosen": 2.7311315536499023, "log_odds_ratio": -0.21124202013015747, "logits/chosen": 0.8770017027854919, "logits/rejected": 0.9629079699516296, "logps/chosen": -2.121584415435791, "logps/rejected": -4.723172187805176, "loss": 0.8552, "nll_loss": 0.8341110348701477, "rewards/accuracies": 1.0, "rewards/chosen": -0.2121584266424179, "rewards/margins": 0.26015883684158325, "rewards/rejected": -0.47231727838516235, "step": 3241 }, { "epoch": 8.876112251882272, "grad_norm": 5.581032752990723, "learning_rate": 5.56027397260274e-07, "log_odds_chosen": 1.227057933807373, "log_odds_ratio": -0.5235928893089294, "logits/chosen": 0.9302036762237549, "logits/rejected": 1.034313678741455, "logps/chosen": -2.98641300201416, "logps/rejected": -4.118535041809082, "loss": 0.7685, "nll_loss": 0.7160958647727966, "rewards/accuracies": 0.75, "rewards/chosen": -0.29864129424095154, "rewards/margins": 0.1132122129201889, "rewards/rejected": -0.41185352206230164, "step": 3242 }, { "epoch": 8.878850102669405, "grad_norm": 5.32893705368042, "learning_rate": 5.558904109589041e-07, "log_odds_chosen": 1.4934066534042358, "log_odds_ratio": -0.2563673257827759, "logits/chosen": 0.8049337267875671, "logits/rejected": 0.8446297645568848, "logps/chosen": -2.6048836708068848, "logps/rejected": -4.03045654296875, "loss": 0.6856, "nll_loss": 0.6599831581115723, "rewards/accuracies": 0.875, "rewards/chosen": -0.260488361120224, "rewards/margins": 0.142557293176651, "rewards/rejected": -0.403045654296875, "step": 3243 }, { "epoch": 8.881587953456537, "grad_norm": 5.549989700317383, "learning_rate": 5.557534246575343e-07, "log_odds_chosen": 2.668886661529541, "log_odds_ratio": -0.17108897864818573, "logits/chosen": 0.6265837550163269, "logits/rejected": 0.6723887920379639, "logps/chosen": -2.546980857849121, "logps/rejected": -5.126554489135742, "loss": 0.898, "nll_loss": 0.8809281587600708, "rewards/accuracies": 1.0, "rewards/chosen": -0.2546980679035187, "rewards/margins": 0.2579573690891266, "rewards/rejected": -0.5126554369926453, "step": 3244 }, { "epoch": 8.88432580424367, "grad_norm": 5.133763313293457, "learning_rate": 5.556164383561644e-07, "log_odds_chosen": 1.172147274017334, "log_odds_ratio": -0.3992829918861389, "logits/chosen": 0.7183672785758972, "logits/rejected": 0.6723995804786682, "logps/chosen": -2.491612672805786, "logps/rejected": -3.5564017295837402, "loss": 0.7471, "nll_loss": 0.7071231007575989, "rewards/accuracies": 0.875, "rewards/chosen": -0.2491612732410431, "rewards/margins": 0.10647892951965332, "rewards/rejected": -0.3556402027606964, "step": 3245 }, { "epoch": 8.8870636550308, "grad_norm": 5.064461708068848, "learning_rate": 5.554794520547945e-07, "log_odds_chosen": 2.6245055198669434, "log_odds_ratio": -0.24948975443840027, "logits/chosen": 0.9937117099761963, "logits/rejected": 0.9618929028511047, "logps/chosen": -2.4720771312713623, "logps/rejected": -5.034242153167725, "loss": 0.8633, "nll_loss": 0.838356614112854, "rewards/accuracies": 0.875, "rewards/chosen": -0.24720771610736847, "rewards/margins": 0.25621652603149414, "rewards/rejected": -0.5034242272377014, "step": 3246 }, { "epoch": 8.889801505817934, "grad_norm": 4.788340091705322, "learning_rate": 5.553424657534246e-07, "log_odds_chosen": 2.076796531677246, "log_odds_ratio": -0.31342387199401855, "logits/chosen": 0.9341751337051392, "logits/rejected": 0.9046232104301453, "logps/chosen": -1.9952070713043213, "logps/rejected": -3.841853380203247, "loss": 0.7584, "nll_loss": 0.7270422577857971, "rewards/accuracies": 0.875, "rewards/chosen": -0.19952070713043213, "rewards/margins": 0.18466463685035706, "rewards/rejected": -0.3841853737831116, "step": 3247 }, { "epoch": 8.892539356605065, "grad_norm": 4.725493907928467, "learning_rate": 5.552054794520548e-07, "log_odds_chosen": 1.3271763324737549, "log_odds_ratio": -0.3149168789386749, "logits/chosen": 0.7582113146781921, "logits/rejected": 0.7322472333908081, "logps/chosen": -2.4832019805908203, "logps/rejected": -3.745027780532837, "loss": 0.8435, "nll_loss": 0.8119754791259766, "rewards/accuracies": 0.875, "rewards/chosen": -0.24832019209861755, "rewards/margins": 0.12618263065814972, "rewards/rejected": -0.37450283765792847, "step": 3248 }, { "epoch": 8.895277207392198, "grad_norm": 5.626695156097412, "learning_rate": 5.55068493150685e-07, "log_odds_chosen": 2.313070297241211, "log_odds_ratio": -0.2041286826133728, "logits/chosen": 0.777444064617157, "logits/rejected": 0.7856584787368774, "logps/chosen": -2.0524377822875977, "logps/rejected": -4.233351707458496, "loss": 0.7351, "nll_loss": 0.7146904468536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.2052437961101532, "rewards/margins": 0.2180914282798767, "rewards/rejected": -0.4233352243900299, "step": 3249 }, { "epoch": 8.898015058179329, "grad_norm": 4.764834403991699, "learning_rate": 5.54931506849315e-07, "log_odds_chosen": 1.9123977422714233, "log_odds_ratio": -0.2774544358253479, "logits/chosen": 1.076028823852539, "logits/rejected": 1.026343584060669, "logps/chosen": -2.27174973487854, "logps/rejected": -4.081523418426514, "loss": 0.8008, "nll_loss": 0.7730162143707275, "rewards/accuracies": 0.875, "rewards/chosen": -0.22717496752738953, "rewards/margins": 0.18097737431526184, "rewards/rejected": -0.40815237164497375, "step": 3250 }, { "epoch": 8.900752908966462, "grad_norm": 5.288290977478027, "learning_rate": 5.547945205479452e-07, "log_odds_chosen": 1.6483585834503174, "log_odds_ratio": -0.22819308936595917, "logits/chosen": 0.9606746435165405, "logits/rejected": 0.9860159158706665, "logps/chosen": -2.700418710708618, "logps/rejected": -4.286975860595703, "loss": 0.9009, "nll_loss": 0.8780478239059448, "rewards/accuracies": 1.0, "rewards/chosen": -0.27004188299179077, "rewards/margins": 0.15865570306777954, "rewards/rejected": -0.4286975562572479, "step": 3251 }, { "epoch": 8.903490759753593, "grad_norm": 5.93626594543457, "learning_rate": 5.546575342465754e-07, "log_odds_chosen": 1.8354586362838745, "log_odds_ratio": -0.5055966973304749, "logits/chosen": 0.7878240346908569, "logits/rejected": 0.7648259401321411, "logps/chosen": -2.1549601554870605, "logps/rejected": -3.8567142486572266, "loss": 0.8135, "nll_loss": 0.7629746794700623, "rewards/accuracies": 0.875, "rewards/chosen": -0.2154960185289383, "rewards/margins": 0.1701754331588745, "rewards/rejected": -0.3856714367866516, "step": 3252 }, { "epoch": 8.906228610540726, "grad_norm": 6.666302680969238, "learning_rate": 5.545205479452054e-07, "log_odds_chosen": 2.06660532951355, "log_odds_ratio": -0.36044713854789734, "logits/chosen": 0.8879520893096924, "logits/rejected": 0.9491491317749023, "logps/chosen": -2.8929214477539062, "logps/rejected": -4.878104209899902, "loss": 0.8075, "nll_loss": 0.7714786529541016, "rewards/accuracies": 0.875, "rewards/chosen": -0.2892921566963196, "rewards/margins": 0.1985182762145996, "rewards/rejected": -0.4878104329109192, "step": 3253 }, { "epoch": 8.908966461327857, "grad_norm": 4.113620281219482, "learning_rate": 5.543835616438356e-07, "log_odds_chosen": 2.3839972019195557, "log_odds_ratio": -0.23017571866512299, "logits/chosen": 0.9428685903549194, "logits/rejected": 0.9472067356109619, "logps/chosen": -2.0165772438049316, "logps/rejected": -4.263796329498291, "loss": 0.6818, "nll_loss": 0.6588009595870972, "rewards/accuracies": 0.875, "rewards/chosen": -0.2016577273607254, "rewards/margins": 0.22472189366817474, "rewards/rejected": -0.42637962102890015, "step": 3254 }, { "epoch": 8.91170431211499, "grad_norm": 4.829493045806885, "learning_rate": 5.542465753424658e-07, "log_odds_chosen": 2.8667361736297607, "log_odds_ratio": -0.19873322546482086, "logits/chosen": 0.8119162917137146, "logits/rejected": 0.8305329084396362, "logps/chosen": -2.192915201187134, "logps/rejected": -4.949949264526367, "loss": 0.6988, "nll_loss": 0.6789370179176331, "rewards/accuracies": 1.0, "rewards/chosen": -0.21929152309894562, "rewards/margins": 0.27570343017578125, "rewards/rejected": -0.49499496817588806, "step": 3255 }, { "epoch": 8.914442162902121, "grad_norm": 5.286592960357666, "learning_rate": 5.541095890410959e-07, "log_odds_chosen": 1.7303013801574707, "log_odds_ratio": -0.3989752531051636, "logits/chosen": 0.6453782320022583, "logits/rejected": 0.6815637350082397, "logps/chosen": -2.2052435874938965, "logps/rejected": -3.887213706970215, "loss": 0.7189, "nll_loss": 0.6789544820785522, "rewards/accuracies": 0.75, "rewards/chosen": -0.22052434086799622, "rewards/margins": 0.16819705069065094, "rewards/rejected": -0.38872140645980835, "step": 3256 }, { "epoch": 8.917180013689254, "grad_norm": 4.4376325607299805, "learning_rate": 5.53972602739726e-07, "log_odds_chosen": 1.3668115139007568, "log_odds_ratio": -0.2957606315612793, "logits/chosen": 0.6811426281929016, "logits/rejected": 0.5745126008987427, "logps/chosen": -2.054804801940918, "logps/rejected": -3.3136987686157227, "loss": 0.787, "nll_loss": 0.7574318051338196, "rewards/accuracies": 1.0, "rewards/chosen": -0.20548048615455627, "rewards/margins": 0.125889390707016, "rewards/rejected": -0.33136987686157227, "step": 3257 }, { "epoch": 8.919917864476385, "grad_norm": 4.690773963928223, "learning_rate": 5.538356164383562e-07, "log_odds_chosen": 2.58905291557312, "log_odds_ratio": -0.3068639636039734, "logits/chosen": 0.8412240743637085, "logits/rejected": 0.8580082654953003, "logps/chosen": -1.756019115447998, "logps/rejected": -4.198698043823242, "loss": 0.8311, "nll_loss": 0.800373911857605, "rewards/accuracies": 0.875, "rewards/chosen": -0.17560191452503204, "rewards/margins": 0.24426797032356262, "rewards/rejected": -0.41986986994743347, "step": 3258 }, { "epoch": 8.922655715263518, "grad_norm": 5.780145168304443, "learning_rate": 5.536986301369863e-07, "log_odds_chosen": 1.32469642162323, "log_odds_ratio": -0.47179165482521057, "logits/chosen": 1.045372724533081, "logits/rejected": 1.1033363342285156, "logps/chosen": -2.6196646690368652, "logps/rejected": -3.838338851928711, "loss": 0.7095, "nll_loss": 0.6623106002807617, "rewards/accuracies": 0.75, "rewards/chosen": -0.2619664669036865, "rewards/margins": 0.12186741083860397, "rewards/rejected": -0.3838338553905487, "step": 3259 }, { "epoch": 8.92539356605065, "grad_norm": 4.399590969085693, "learning_rate": 5.535616438356164e-07, "log_odds_chosen": 2.4658331871032715, "log_odds_ratio": -0.22605310380458832, "logits/chosen": 0.9534687995910645, "logits/rejected": 0.9969558715820312, "logps/chosen": -2.430643081665039, "logps/rejected": -4.801997184753418, "loss": 0.6774, "nll_loss": 0.6547622084617615, "rewards/accuracies": 1.0, "rewards/chosen": -0.24306431412696838, "rewards/margins": 0.2371353805065155, "rewards/rejected": -0.4801996946334839, "step": 3260 }, { "epoch": 8.928131416837783, "grad_norm": 4.15421724319458, "learning_rate": 5.534246575342465e-07, "log_odds_chosen": 2.971590042114258, "log_odds_ratio": -0.22508494555950165, "logits/chosen": 0.9536139965057373, "logits/rejected": 0.8775817155838013, "logps/chosen": -2.0118539333343506, "logps/rejected": -4.830021858215332, "loss": 0.7273, "nll_loss": 0.7047777771949768, "rewards/accuracies": 0.875, "rewards/chosen": -0.20118539035320282, "rewards/margins": 0.28181684017181396, "rewards/rejected": -0.4830022156238556, "step": 3261 }, { "epoch": 8.930869267624914, "grad_norm": 5.1324872970581055, "learning_rate": 5.532876712328767e-07, "log_odds_chosen": 3.481438159942627, "log_odds_ratio": -0.1189209371805191, "logits/chosen": 0.8687766790390015, "logits/rejected": 0.8549452424049377, "logps/chosen": -2.4159109592437744, "logps/rejected": -5.793788433074951, "loss": 0.7466, "nll_loss": 0.7347128987312317, "rewards/accuracies": 1.0, "rewards/chosen": -0.24159111082553864, "rewards/margins": 0.3377877473831177, "rewards/rejected": -0.5793788433074951, "step": 3262 }, { "epoch": 8.933607118412047, "grad_norm": 7.325011730194092, "learning_rate": 5.531506849315069e-07, "log_odds_chosen": 1.9986448287963867, "log_odds_ratio": -0.3680683374404907, "logits/chosen": 0.7599320411682129, "logits/rejected": 0.7862791419029236, "logps/chosen": -2.2826695442199707, "logps/rejected": -4.1456298828125, "loss": 0.774, "nll_loss": 0.7371782660484314, "rewards/accuracies": 0.875, "rewards/chosen": -0.22826696932315826, "rewards/margins": 0.1862960159778595, "rewards/rejected": -0.41456300020217896, "step": 3263 }, { "epoch": 8.936344969199178, "grad_norm": 5.238301753997803, "learning_rate": 5.530136986301369e-07, "log_odds_chosen": 1.9484822750091553, "log_odds_ratio": -0.32483476400375366, "logits/chosen": 0.8848699331283569, "logits/rejected": 0.9158705472946167, "logps/chosen": -2.503934860229492, "logps/rejected": -4.351222991943359, "loss": 0.7516, "nll_loss": 0.7190799713134766, "rewards/accuracies": 1.0, "rewards/chosen": -0.25039350986480713, "rewards/margins": 0.18472877144813538, "rewards/rejected": -0.4351222813129425, "step": 3264 }, { "epoch": 8.93908281998631, "grad_norm": 5.08968448638916, "learning_rate": 5.528767123287671e-07, "log_odds_chosen": 1.479067325592041, "log_odds_ratio": -0.2782071828842163, "logits/chosen": 0.9159553050994873, "logits/rejected": 0.90245521068573, "logps/chosen": -1.9131864309310913, "logps/rejected": -3.301504135131836, "loss": 0.7059, "nll_loss": 0.6780522465705872, "rewards/accuracies": 0.875, "rewards/chosen": -0.19131863117218018, "rewards/margins": 0.13883179426193237, "rewards/rejected": -0.33015042543411255, "step": 3265 }, { "epoch": 8.941820670773442, "grad_norm": 4.607022762298584, "learning_rate": 5.527397260273973e-07, "log_odds_chosen": 1.3596307039260864, "log_odds_ratio": -0.49092888832092285, "logits/chosen": 1.0271965265274048, "logits/rejected": 1.0735856294631958, "logps/chosen": -2.028534412384033, "logps/rejected": -3.3000235557556152, "loss": 0.7274, "nll_loss": 0.6783502101898193, "rewards/accuracies": 0.75, "rewards/chosen": -0.20285345613956451, "rewards/margins": 0.12714892625808716, "rewards/rejected": -0.3300023674964905, "step": 3266 }, { "epoch": 8.944558521560575, "grad_norm": 4.442809581756592, "learning_rate": 5.526027397260273e-07, "log_odds_chosen": 2.0789895057678223, "log_odds_ratio": -0.23027044534683228, "logits/chosen": 0.6628499031066895, "logits/rejected": 0.6603561639785767, "logps/chosen": -2.0468392372131348, "logps/rejected": -4.026088237762451, "loss": 0.7476, "nll_loss": 0.7245579361915588, "rewards/accuracies": 1.0, "rewards/chosen": -0.20468392968177795, "rewards/margins": 0.1979249119758606, "rewards/rejected": -0.40260887145996094, "step": 3267 }, { "epoch": 8.947296372347708, "grad_norm": 5.828400135040283, "learning_rate": 5.524657534246575e-07, "log_odds_chosen": 1.6826236248016357, "log_odds_ratio": -0.41078582406044006, "logits/chosen": 0.8781440258026123, "logits/rejected": 0.9200088977813721, "logps/chosen": -2.7809112071990967, "logps/rejected": -4.351430416107178, "loss": 0.8278, "nll_loss": 0.7867653965950012, "rewards/accuracies": 0.875, "rewards/chosen": -0.2780911326408386, "rewards/margins": 0.1570519208908081, "rewards/rejected": -0.43514305353164673, "step": 3268 }, { "epoch": 8.950034223134839, "grad_norm": 5.159860610961914, "learning_rate": 5.523287671232877e-07, "log_odds_chosen": 1.762886643409729, "log_odds_ratio": -0.2716977000236511, "logits/chosen": 0.8801100254058838, "logits/rejected": 0.8576817512512207, "logps/chosen": -2.3683013916015625, "logps/rejected": -4.04960823059082, "loss": 0.7185, "nll_loss": 0.6913283467292786, "rewards/accuracies": 0.875, "rewards/chosen": -0.23683014512062073, "rewards/margins": 0.16813069581985474, "rewards/rejected": -0.40496084094047546, "step": 3269 }, { "epoch": 8.952772073921972, "grad_norm": 4.296100616455078, "learning_rate": 5.521917808219177e-07, "log_odds_chosen": 2.284043073654175, "log_odds_ratio": -0.24403047561645508, "logits/chosen": 0.860587477684021, "logits/rejected": 0.8530827164649963, "logps/chosen": -2.270975112915039, "logps/rejected": -4.460200786590576, "loss": 0.7205, "nll_loss": 0.696113109588623, "rewards/accuracies": 1.0, "rewards/chosen": -0.2270975261926651, "rewards/margins": 0.21892257034778595, "rewards/rejected": -0.44602006673812866, "step": 3270 }, { "epoch": 8.955509924709103, "grad_norm": 5.003279685974121, "learning_rate": 5.520547945205479e-07, "log_odds_chosen": 0.2766870856285095, "log_odds_ratio": -0.903479278087616, "logits/chosen": 0.8973467350006104, "logits/rejected": 0.9166948795318604, "logps/chosen": -2.8513569831848145, "logps/rejected": -3.1041297912597656, "loss": 0.8518, "nll_loss": 0.7614678144454956, "rewards/accuracies": 0.75, "rewards/chosen": -0.2851356863975525, "rewards/margins": 0.02527727745473385, "rewards/rejected": -0.3104130029678345, "step": 3271 }, { "epoch": 8.958247775496236, "grad_norm": 5.641133785247803, "learning_rate": 5.519178082191781e-07, "log_odds_chosen": 2.117170572280884, "log_odds_ratio": -0.28040188550949097, "logits/chosen": 0.8724243640899658, "logits/rejected": 0.805965781211853, "logps/chosen": -2.1595635414123535, "logps/rejected": -4.215612411499023, "loss": 0.7858, "nll_loss": 0.7577977180480957, "rewards/accuracies": 0.875, "rewards/chosen": -0.21595636010169983, "rewards/margins": 0.20560488104820251, "rewards/rejected": -0.42156127095222473, "step": 3272 }, { "epoch": 8.960985626283367, "grad_norm": 4.246100902557373, "learning_rate": 5.517808219178082e-07, "log_odds_chosen": 2.0241641998291016, "log_odds_ratio": -0.20348601043224335, "logits/chosen": 0.8812180757522583, "logits/rejected": 0.8527859449386597, "logps/chosen": -1.8536083698272705, "logps/rejected": -3.732883930206299, "loss": 0.7599, "nll_loss": 0.7395117878913879, "rewards/accuracies": 1.0, "rewards/chosen": -0.185360848903656, "rewards/margins": 0.18792755901813507, "rewards/rejected": -0.37328842282295227, "step": 3273 }, { "epoch": 8.9637234770705, "grad_norm": 4.476574420928955, "learning_rate": 5.516438356164383e-07, "log_odds_chosen": 2.527648687362671, "log_odds_ratio": -0.16674524545669556, "logits/chosen": 0.772266149520874, "logits/rejected": 0.7614490389823914, "logps/chosen": -1.776648998260498, "logps/rejected": -4.133102893829346, "loss": 0.7264, "nll_loss": 0.7096787095069885, "rewards/accuracies": 1.0, "rewards/chosen": -0.1776648908853531, "rewards/margins": 0.23564539849758148, "rewards/rejected": -0.41331028938293457, "step": 3274 }, { "epoch": 8.966461327857632, "grad_norm": 6.339888095855713, "learning_rate": 5.515068493150685e-07, "log_odds_chosen": 2.355367660522461, "log_odds_ratio": -0.3124721944332123, "logits/chosen": 0.950963020324707, "logits/rejected": 0.9863532185554504, "logps/chosen": -2.171536922454834, "logps/rejected": -4.468876838684082, "loss": 0.7878, "nll_loss": 0.7565588355064392, "rewards/accuracies": 0.875, "rewards/chosen": -0.21715369820594788, "rewards/margins": 0.22973404824733734, "rewards/rejected": -0.4468877613544464, "step": 3275 }, { "epoch": 8.969199178644764, "grad_norm": 4.82981538772583, "learning_rate": 5.513698630136986e-07, "log_odds_chosen": 1.790260910987854, "log_odds_ratio": -0.2670162320137024, "logits/chosen": 0.8508726358413696, "logits/rejected": 0.8358268737792969, "logps/chosen": -2.2285544872283936, "logps/rejected": -3.917968511581421, "loss": 0.8324, "nll_loss": 0.805696964263916, "rewards/accuracies": 1.0, "rewards/chosen": -0.22285544872283936, "rewards/margins": 0.1689414381980896, "rewards/rejected": -0.39179688692092896, "step": 3276 }, { "epoch": 8.971937029431896, "grad_norm": 4.636299133300781, "learning_rate": 5.512328767123287e-07, "log_odds_chosen": 1.0646748542785645, "log_odds_ratio": -0.4139821529388428, "logits/chosen": 0.7382442355155945, "logits/rejected": 0.6860922574996948, "logps/chosen": -2.7467100620269775, "logps/rejected": -3.7341108322143555, "loss": 0.7992, "nll_loss": 0.7577798366546631, "rewards/accuracies": 0.75, "rewards/chosen": -0.2746710181236267, "rewards/margins": 0.09874008595943451, "rewards/rejected": -0.3734110891819, "step": 3277 }, { "epoch": 8.974674880219029, "grad_norm": 4.597742557525635, "learning_rate": 5.510958904109588e-07, "log_odds_chosen": 2.943798542022705, "log_odds_ratio": -0.09597966820001602, "logits/chosen": 1.0791398286819458, "logits/rejected": 1.0829391479492188, "logps/chosen": -2.7664241790771484, "logps/rejected": -5.600130081176758, "loss": 0.7397, "nll_loss": 0.7300780415534973, "rewards/accuracies": 1.0, "rewards/chosen": -0.27664241194725037, "rewards/margins": 0.28337058424949646, "rewards/rejected": -0.5600129961967468, "step": 3278 }, { "epoch": 8.97741273100616, "grad_norm": 6.142932415008545, "learning_rate": 5.50958904109589e-07, "log_odds_chosen": 1.955923318862915, "log_odds_ratio": -0.23351389169692993, "logits/chosen": 0.8736933469772339, "logits/rejected": 0.8664239048957825, "logps/chosen": -2.094726085662842, "logps/rejected": -3.894522190093994, "loss": 0.7518, "nll_loss": 0.7284899353981018, "rewards/accuracies": 1.0, "rewards/chosen": -0.20947259664535522, "rewards/margins": 0.1799796223640442, "rewards/rejected": -0.3894522190093994, "step": 3279 }, { "epoch": 8.980150581793293, "grad_norm": 4.79909610748291, "learning_rate": 5.508219178082192e-07, "log_odds_chosen": 1.335893154144287, "log_odds_ratio": -0.33690112829208374, "logits/chosen": 0.7604460716247559, "logits/rejected": 0.7687562704086304, "logps/chosen": -1.7546584606170654, "logps/rejected": -2.97588849067688, "loss": 0.6933, "nll_loss": 0.6595629453659058, "rewards/accuracies": 1.0, "rewards/chosen": -0.17546585202217102, "rewards/margins": 0.12212300300598145, "rewards/rejected": -0.2975888252258301, "step": 3280 }, { "epoch": 8.982888432580424, "grad_norm": 4.908051490783691, "learning_rate": 5.506849315068492e-07, "log_odds_chosen": 1.628919005393982, "log_odds_ratio": -0.2782784104347229, "logits/chosen": 1.0360691547393799, "logits/rejected": 1.0828285217285156, "logps/chosen": -3.0844669342041016, "logps/rejected": -4.630191802978516, "loss": 0.7319, "nll_loss": 0.7040632963180542, "rewards/accuracies": 0.875, "rewards/chosen": -0.3084467053413391, "rewards/margins": 0.1545724719762802, "rewards/rejected": -0.4630191922187805, "step": 3281 }, { "epoch": 8.985626283367557, "grad_norm": 4.405810832977295, "learning_rate": 5.505479452054794e-07, "log_odds_chosen": 1.2009780406951904, "log_odds_ratio": -0.39405614137649536, "logits/chosen": 1.0781859159469604, "logits/rejected": 1.0786240100860596, "logps/chosen": -2.569124221801758, "logps/rejected": -3.702801465988159, "loss": 0.7501, "nll_loss": 0.7106822729110718, "rewards/accuracies": 0.875, "rewards/chosen": -0.2569124400615692, "rewards/margins": 0.1133677288889885, "rewards/rejected": -0.3702801764011383, "step": 3282 }, { "epoch": 8.988364134154688, "grad_norm": 4.523462295532227, "learning_rate": 5.504109589041096e-07, "log_odds_chosen": 2.416626453399658, "log_odds_ratio": -0.1510641872882843, "logits/chosen": 0.8964505195617676, "logits/rejected": 0.9278176426887512, "logps/chosen": -2.6457314491271973, "logps/rejected": -4.984095573425293, "loss": 0.7022, "nll_loss": 0.6870736479759216, "rewards/accuracies": 1.0, "rewards/chosen": -0.2645731270313263, "rewards/margins": 0.23383644223213196, "rewards/rejected": -0.49840956926345825, "step": 3283 }, { "epoch": 8.991101984941821, "grad_norm": 5.209122180938721, "learning_rate": 5.502739726027396e-07, "log_odds_chosen": 2.7815425395965576, "log_odds_ratio": -0.2791670560836792, "logits/chosen": 0.9273828268051147, "logits/rejected": 0.9661015868186951, "logps/chosen": -2.743408441543579, "logps/rejected": -5.455049514770508, "loss": 0.8413, "nll_loss": 0.8133975267410278, "rewards/accuracies": 0.875, "rewards/chosen": -0.2743408679962158, "rewards/margins": 0.27116408944129944, "rewards/rejected": -0.5455049276351929, "step": 3284 }, { "epoch": 8.993839835728952, "grad_norm": 5.706219673156738, "learning_rate": 5.501369863013698e-07, "log_odds_chosen": 1.789673089981079, "log_odds_ratio": -0.4776235520839691, "logits/chosen": 0.9627643823623657, "logits/rejected": 0.939673125743866, "logps/chosen": -2.204512596130371, "logps/rejected": -3.8678102493286133, "loss": 0.7725, "nll_loss": 0.7247357368469238, "rewards/accuracies": 0.75, "rewards/chosen": -0.22045128047466278, "rewards/margins": 0.1663297414779663, "rewards/rejected": -0.3867810368537903, "step": 3285 }, { "epoch": 8.996577686516085, "grad_norm": 4.332769870758057, "learning_rate": 5.5e-07, "log_odds_chosen": 1.348970890045166, "log_odds_ratio": -0.2940608859062195, "logits/chosen": 0.812926173210144, "logits/rejected": 0.9057354927062988, "logps/chosen": -2.171335220336914, "logps/rejected": -3.439594268798828, "loss": 0.7288, "nll_loss": 0.6994159817695618, "rewards/accuracies": 1.0, "rewards/chosen": -0.2171335220336914, "rewards/margins": 0.12682589888572693, "rewards/rejected": -0.34395942091941833, "step": 3286 }, { "epoch": 8.999315537303216, "grad_norm": 4.637881278991699, "learning_rate": 5.498630136986301e-07, "log_odds_chosen": 2.006742477416992, "log_odds_ratio": -0.2377801090478897, "logits/chosen": 0.8503756523132324, "logits/rejected": 0.9703595638275146, "logps/chosen": -2.845747232437134, "logps/rejected": -4.786722183227539, "loss": 0.9118, "nll_loss": 0.8880242109298706, "rewards/accuracies": 1.0, "rewards/chosen": -0.2845747172832489, "rewards/margins": 0.19409748911857605, "rewards/rejected": -0.47867220640182495, "step": 3287 }, { "epoch": 9.00205338809035, "grad_norm": 4.440590858459473, "learning_rate": 5.497260273972602e-07, "log_odds_chosen": 1.559935450553894, "log_odds_ratio": -0.26071083545684814, "logits/chosen": 0.982909083366394, "logits/rejected": 0.9784228801727295, "logps/chosen": -2.7169203758239746, "logps/rejected": -4.218667030334473, "loss": 0.7688, "nll_loss": 0.7427042126655579, "rewards/accuracies": 1.0, "rewards/chosen": -0.27169206738471985, "rewards/margins": 0.15017461776733398, "rewards/rejected": -0.42186668515205383, "step": 3288 }, { "epoch": 9.00479123887748, "grad_norm": 4.189736366271973, "learning_rate": 5.495890410958904e-07, "log_odds_chosen": 1.9535846710205078, "log_odds_ratio": -0.1939634084701538, "logits/chosen": 1.0862452983856201, "logits/rejected": 1.1069514751434326, "logps/chosen": -2.476703643798828, "logps/rejected": -4.342653274536133, "loss": 0.6411, "nll_loss": 0.6217202544212341, "rewards/accuracies": 1.0, "rewards/chosen": -0.24767035245895386, "rewards/margins": 0.18659496307373047, "rewards/rejected": -0.4342653453350067, "step": 3289 }, { "epoch": 9.007529089664613, "grad_norm": 6.855615139007568, "learning_rate": 5.494520547945205e-07, "log_odds_chosen": 2.8192105293273926, "log_odds_ratio": -0.35202500224113464, "logits/chosen": 0.9259628057479858, "logits/rejected": 0.9077780246734619, "logps/chosen": -2.723444938659668, "logps/rejected": -5.460623741149902, "loss": 0.8083, "nll_loss": 0.7730550169944763, "rewards/accuracies": 0.875, "rewards/chosen": -0.2723444998264313, "rewards/margins": 0.2737179398536682, "rewards/rejected": -0.5460624694824219, "step": 3290 }, { "epoch": 9.010266940451745, "grad_norm": 5.857389450073242, "learning_rate": 5.493150684931506e-07, "log_odds_chosen": 0.8673016428947449, "log_odds_ratio": -0.510422945022583, "logits/chosen": 0.9398767948150635, "logits/rejected": 0.9063553214073181, "logps/chosen": -1.9554260969161987, "logps/rejected": -2.7185163497924805, "loss": 0.7442, "nll_loss": 0.6931593418121338, "rewards/accuracies": 0.75, "rewards/chosen": -0.1955426186323166, "rewards/margins": 0.07630901783704758, "rewards/rejected": -0.2718515992164612, "step": 3291 }, { "epoch": 9.013004791238878, "grad_norm": 4.754774570465088, "learning_rate": 5.491780821917807e-07, "log_odds_chosen": 1.1395435333251953, "log_odds_ratio": -0.3740818202495575, "logits/chosen": 0.8968951106071472, "logits/rejected": 0.8255442380905151, "logps/chosen": -1.441152811050415, "logps/rejected": -2.396947145462036, "loss": 0.7407, "nll_loss": 0.7032661437988281, "rewards/accuracies": 0.875, "rewards/chosen": -0.14411526918411255, "rewards/margins": 0.09557943046092987, "rewards/rejected": -0.2396947145462036, "step": 3292 }, { "epoch": 9.015742642026009, "grad_norm": 4.14821195602417, "learning_rate": 5.490410958904109e-07, "log_odds_chosen": 2.974329948425293, "log_odds_ratio": -0.314241886138916, "logits/chosen": 0.7038149833679199, "logits/rejected": 0.7186417579650879, "logps/chosen": -2.1481804847717285, "logps/rejected": -5.006361484527588, "loss": 0.7492, "nll_loss": 0.7177281379699707, "rewards/accuracies": 0.875, "rewards/chosen": -0.21481803059577942, "rewards/margins": 0.2858181297779083, "rewards/rejected": -0.5006362199783325, "step": 3293 }, { "epoch": 9.018480492813142, "grad_norm": 3.9011356830596924, "learning_rate": 5.489041095890411e-07, "log_odds_chosen": 2.9186975955963135, "log_odds_ratio": -0.2764544188976288, "logits/chosen": 0.9550386071205139, "logits/rejected": 1.0235481262207031, "logps/chosen": -2.421956777572632, "logps/rejected": -5.234771728515625, "loss": 0.7664, "nll_loss": 0.7387686371803284, "rewards/accuracies": 0.875, "rewards/chosen": -0.24219568073749542, "rewards/margins": 0.2812814712524414, "rewards/rejected": -0.5234771966934204, "step": 3294 }, { "epoch": 9.021218343600275, "grad_norm": 6.114431381225586, "learning_rate": 5.487671232876711e-07, "log_odds_chosen": 0.5599387288093567, "log_odds_ratio": -0.7182112336158752, "logits/chosen": 0.8180243968963623, "logits/rejected": 0.954418957233429, "logps/chosen": -3.6230688095092773, "logps/rejected": -4.141560077667236, "loss": 0.977, "nll_loss": 0.9051365256309509, "rewards/accuracies": 0.5, "rewards/chosen": -0.3623068630695343, "rewards/margins": 0.05184917151927948, "rewards/rejected": -0.4141560196876526, "step": 3295 }, { "epoch": 9.023956194387406, "grad_norm": 4.808529853820801, "learning_rate": 5.486301369863013e-07, "log_odds_chosen": 1.186375379562378, "log_odds_ratio": -0.3416673541069031, "logits/chosen": 0.890230119228363, "logits/rejected": 0.8558853268623352, "logps/chosen": -1.9883524179458618, "logps/rejected": -3.0714850425720215, "loss": 0.7342, "nll_loss": 0.7000151872634888, "rewards/accuracies": 0.875, "rewards/chosen": -0.19883525371551514, "rewards/margins": 0.10831326991319656, "rewards/rejected": -0.3071485161781311, "step": 3296 }, { "epoch": 9.026694045174539, "grad_norm": 4.731873512268066, "learning_rate": 5.484931506849315e-07, "log_odds_chosen": 1.9573392868041992, "log_odds_ratio": -0.3568669557571411, "logits/chosen": 0.8980554342269897, "logits/rejected": 1.028772234916687, "logps/chosen": -2.792323589324951, "logps/rejected": -4.647512435913086, "loss": 0.7834, "nll_loss": 0.747717022895813, "rewards/accuracies": 0.75, "rewards/chosen": -0.27923235297203064, "rewards/margins": 0.18551889061927795, "rewards/rejected": -0.464751273393631, "step": 3297 }, { "epoch": 9.02943189596167, "grad_norm": 5.092735290527344, "learning_rate": 5.483561643835615e-07, "log_odds_chosen": 0.9875897765159607, "log_odds_ratio": -0.34232136607170105, "logits/chosen": 0.9703736901283264, "logits/rejected": 0.967690110206604, "logps/chosen": -1.8722707033157349, "logps/rejected": -2.752960681915283, "loss": 0.7244, "nll_loss": 0.6901703476905823, "rewards/accuracies": 1.0, "rewards/chosen": -0.1872270703315735, "rewards/margins": 0.08806898444890976, "rewards/rejected": -0.27529606223106384, "step": 3298 }, { "epoch": 9.032169746748803, "grad_norm": 5.617648124694824, "learning_rate": 5.482191780821917e-07, "log_odds_chosen": 2.921206474304199, "log_odds_ratio": -0.11789855360984802, "logits/chosen": 0.9219040870666504, "logits/rejected": 1.0046167373657227, "logps/chosen": -2.763936758041382, "logps/rejected": -5.576260566711426, "loss": 0.8054, "nll_loss": 0.7936432361602783, "rewards/accuracies": 1.0, "rewards/chosen": -0.27639368176460266, "rewards/margins": 0.2812323570251465, "rewards/rejected": -0.5576260685920715, "step": 3299 }, { "epoch": 9.034907597535934, "grad_norm": 6.362475872039795, "learning_rate": 5.480821917808219e-07, "log_odds_chosen": 0.520812451839447, "log_odds_ratio": -0.5464670062065125, "logits/chosen": 0.7277058362960815, "logits/rejected": 0.6552857756614685, "logps/chosen": -2.3488903045654297, "logps/rejected": -2.789884090423584, "loss": 0.8372, "nll_loss": 0.7825045585632324, "rewards/accuracies": 0.875, "rewards/chosen": -0.23488904535770416, "rewards/margins": 0.044099368155002594, "rewards/rejected": -0.27898842096328735, "step": 3300 }, { "epoch": 9.037645448323067, "grad_norm": 4.146679401397705, "learning_rate": 5.47945205479452e-07, "log_odds_chosen": 1.3698917627334595, "log_odds_ratio": -0.30858534574508667, "logits/chosen": 0.5376445055007935, "logits/rejected": 0.502371609210968, "logps/chosen": -1.894385576248169, "logps/rejected": -3.1486594676971436, "loss": 0.702, "nll_loss": 0.67115718126297, "rewards/accuracies": 0.875, "rewards/chosen": -0.1894385665655136, "rewards/margins": 0.12542738020420074, "rewards/rejected": -0.31486594676971436, "step": 3301 }, { "epoch": 9.040383299110198, "grad_norm": 5.138659954071045, "learning_rate": 5.478082191780821e-07, "log_odds_chosen": 0.873397946357727, "log_odds_ratio": -0.4706564247608185, "logits/chosen": 0.7726182341575623, "logits/rejected": 0.8034310340881348, "logps/chosen": -2.0236587524414062, "logps/rejected": -2.8207848072052, "loss": 0.7493, "nll_loss": 0.7022309899330139, "rewards/accuracies": 0.75, "rewards/chosen": -0.20236587524414062, "rewards/margins": 0.07971260696649551, "rewards/rejected": -0.28207847476005554, "step": 3302 }, { "epoch": 9.043121149897331, "grad_norm": 6.842611312866211, "learning_rate": 5.476712328767123e-07, "log_odds_chosen": 1.524824619293213, "log_odds_ratio": -0.3125433027744293, "logits/chosen": 0.9434677362442017, "logits/rejected": 0.8900867104530334, "logps/chosen": -2.140281915664673, "logps/rejected": -3.5293784141540527, "loss": 0.7581, "nll_loss": 0.7268518209457397, "rewards/accuracies": 0.875, "rewards/chosen": -0.21402820944786072, "rewards/margins": 0.13890962302684784, "rewards/rejected": -0.35293784737586975, "step": 3303 }, { "epoch": 9.045859000684462, "grad_norm": 4.411350727081299, "learning_rate": 5.475342465753424e-07, "log_odds_chosen": 1.9587098360061646, "log_odds_ratio": -0.3339769244194031, "logits/chosen": 0.9844978451728821, "logits/rejected": 1.0293989181518555, "logps/chosen": -2.631493330001831, "logps/rejected": -4.506281852722168, "loss": 0.9127, "nll_loss": 0.8793138265609741, "rewards/accuracies": 0.875, "rewards/chosen": -0.26314932107925415, "rewards/margins": 0.18747884035110474, "rewards/rejected": -0.4506281614303589, "step": 3304 }, { "epoch": 9.048596851471595, "grad_norm": 7.606939792633057, "learning_rate": 5.473972602739725e-07, "log_odds_chosen": 0.9487158060073853, "log_odds_ratio": -0.4470030665397644, "logits/chosen": 0.8191002607345581, "logits/rejected": 0.7972186803817749, "logps/chosen": -2.645251750946045, "logps/rejected": -3.507622718811035, "loss": 0.7674, "nll_loss": 0.7227328419685364, "rewards/accuracies": 0.875, "rewards/chosen": -0.2645251750946045, "rewards/margins": 0.0862371027469635, "rewards/rejected": -0.3507622480392456, "step": 3305 }, { "epoch": 9.051334702258726, "grad_norm": 4.741866588592529, "learning_rate": 5.472602739726026e-07, "log_odds_chosen": 1.2330423593521118, "log_odds_ratio": -0.3916242718696594, "logits/chosen": 0.5860739946365356, "logits/rejected": 0.550399661064148, "logps/chosen": -2.3653485774993896, "logps/rejected": -3.510676860809326, "loss": 0.8182, "nll_loss": 0.779080331325531, "rewards/accuracies": 0.75, "rewards/chosen": -0.23653484880924225, "rewards/margins": 0.11453282833099365, "rewards/rejected": -0.3510676622390747, "step": 3306 }, { "epoch": 9.05407255304586, "grad_norm": 4.2288665771484375, "learning_rate": 5.471232876712329e-07, "log_odds_chosen": 1.440256953239441, "log_odds_ratio": -0.35231077671051025, "logits/chosen": 0.8419431447982788, "logits/rejected": 0.8854562640190125, "logps/chosen": -2.2572662830352783, "logps/rejected": -3.6122941970825195, "loss": 0.8495, "nll_loss": 0.8143013119697571, "rewards/accuracies": 0.75, "rewards/chosen": -0.22572661936283112, "rewards/margins": 0.13550281524658203, "rewards/rejected": -0.36122944951057434, "step": 3307 }, { "epoch": 9.05681040383299, "grad_norm": 4.725767135620117, "learning_rate": 5.469863013698631e-07, "log_odds_chosen": 1.8901022672653198, "log_odds_ratio": -0.19975998997688293, "logits/chosen": 0.8078707456588745, "logits/rejected": 0.7912632822990417, "logps/chosen": -1.8224979639053345, "logps/rejected": -3.5505335330963135, "loss": 0.7849, "nll_loss": 0.7649105787277222, "rewards/accuracies": 1.0, "rewards/chosen": -0.18224979937076569, "rewards/margins": 0.17280353605747223, "rewards/rejected": -0.3550533652305603, "step": 3308 }, { "epoch": 9.059548254620124, "grad_norm": 5.825088024139404, "learning_rate": 5.46849315068493e-07, "log_odds_chosen": 1.063007116317749, "log_odds_ratio": -0.4639822542667389, "logits/chosen": 0.9349066019058228, "logits/rejected": 1.001300573348999, "logps/chosen": -2.7652769088745117, "logps/rejected": -3.7385284900665283, "loss": 0.758, "nll_loss": 0.711609959602356, "rewards/accuracies": 0.75, "rewards/chosen": -0.2765277028083801, "rewards/margins": 0.0973251610994339, "rewards/rejected": -0.3738528788089752, "step": 3309 }, { "epoch": 9.062286105407255, "grad_norm": 4.098580360412598, "learning_rate": 5.467123287671233e-07, "log_odds_chosen": 2.9616143703460693, "log_odds_ratio": -0.13037899136543274, "logits/chosen": 0.6309264302253723, "logits/rejected": 0.6234844326972961, "logps/chosen": -2.0922155380249023, "logps/rejected": -4.8855791091918945, "loss": 0.6913, "nll_loss": 0.678232729434967, "rewards/accuracies": 1.0, "rewards/chosen": -0.20922154188156128, "rewards/margins": 0.2793363332748413, "rewards/rejected": -0.4885578453540802, "step": 3310 }, { "epoch": 9.065023956194388, "grad_norm": 4.219966888427734, "learning_rate": 5.465753424657535e-07, "log_odds_chosen": 3.2476248741149902, "log_odds_ratio": -0.12608109414577484, "logits/chosen": 0.8493331670761108, "logits/rejected": 0.7841576337814331, "logps/chosen": -1.9361165761947632, "logps/rejected": -5.007876873016357, "loss": 0.8125, "nll_loss": 0.7999273538589478, "rewards/accuracies": 1.0, "rewards/chosen": -0.19361165165901184, "rewards/margins": 0.30717605352401733, "rewards/rejected": -0.5007877349853516, "step": 3311 }, { "epoch": 9.067761806981519, "grad_norm": 5.338283538818359, "learning_rate": 5.464383561643835e-07, "log_odds_chosen": 2.84440279006958, "log_odds_ratio": -0.28184032440185547, "logits/chosen": 0.7981039881706238, "logits/rejected": 0.8603228330612183, "logps/chosen": -2.702416181564331, "logps/rejected": -5.467711448669434, "loss": 0.8153, "nll_loss": 0.7870763540267944, "rewards/accuracies": 0.875, "rewards/chosen": -0.2702416181564331, "rewards/margins": 0.27652955055236816, "rewards/rejected": -0.5467711091041565, "step": 3312 }, { "epoch": 9.070499657768652, "grad_norm": 4.086837291717529, "learning_rate": 5.463013698630137e-07, "log_odds_chosen": 3.55405592918396, "log_odds_ratio": -0.07587824761867523, "logits/chosen": 1.0087772607803345, "logits/rejected": 1.0609062910079956, "logps/chosen": -2.4423177242279053, "logps/rejected": -5.838115215301514, "loss": 0.7794, "nll_loss": 0.7718257904052734, "rewards/accuracies": 1.0, "rewards/chosen": -0.24423177540302277, "rewards/margins": 0.3395797610282898, "rewards/rejected": -0.5838115215301514, "step": 3313 }, { "epoch": 9.073237508555783, "grad_norm": 4.448091506958008, "learning_rate": 5.461643835616439e-07, "log_odds_chosen": 1.7127487659454346, "log_odds_ratio": -0.3768365979194641, "logits/chosen": 0.82002854347229, "logits/rejected": 0.9091680645942688, "logps/chosen": -2.0458521842956543, "logps/rejected": -3.6614437103271484, "loss": 0.7544, "nll_loss": 0.7167251110076904, "rewards/accuracies": 0.875, "rewards/chosen": -0.2045852243900299, "rewards/margins": 0.16155913472175598, "rewards/rejected": -0.3661443591117859, "step": 3314 }, { "epoch": 9.075975359342916, "grad_norm": 4.584537506103516, "learning_rate": 5.46027397260274e-07, "log_odds_chosen": 1.7551965713500977, "log_odds_ratio": -0.2992064654827118, "logits/chosen": 1.0660463571548462, "logits/rejected": 1.1171592473983765, "logps/chosen": -2.159950017929077, "logps/rejected": -3.81949520111084, "loss": 0.6981, "nll_loss": 0.6681410670280457, "rewards/accuracies": 0.875, "rewards/chosen": -0.21599498391151428, "rewards/margins": 0.16595450043678284, "rewards/rejected": -0.38194945454597473, "step": 3315 }, { "epoch": 9.078713210130047, "grad_norm": 4.392846584320068, "learning_rate": 5.458904109589041e-07, "log_odds_chosen": 1.4354974031448364, "log_odds_ratio": -0.32504087686538696, "logits/chosen": 0.8142911195755005, "logits/rejected": 0.8334875106811523, "logps/chosen": -2.023308277130127, "logps/rejected": -3.355142116546631, "loss": 0.676, "nll_loss": 0.6435380578041077, "rewards/accuracies": 0.875, "rewards/chosen": -0.2023307979106903, "rewards/margins": 0.13318338990211487, "rewards/rejected": -0.3355141878128052, "step": 3316 }, { "epoch": 9.08145106091718, "grad_norm": 8.784263610839844, "learning_rate": 5.457534246575343e-07, "log_odds_chosen": 1.858918309211731, "log_odds_ratio": -0.5471303462982178, "logits/chosen": 0.7984188795089722, "logits/rejected": 0.7981114387512207, "logps/chosen": -2.642749786376953, "logps/rejected": -4.381328582763672, "loss": 0.7647, "nll_loss": 0.71002197265625, "rewards/accuracies": 0.75, "rewards/chosen": -0.2642749547958374, "rewards/margins": 0.1738579124212265, "rewards/rejected": -0.4381328821182251, "step": 3317 }, { "epoch": 9.084188911704311, "grad_norm": 5.483384609222412, "learning_rate": 5.456164383561644e-07, "log_odds_chosen": 2.4361915588378906, "log_odds_ratio": -0.2382451742887497, "logits/chosen": 1.0206143856048584, "logits/rejected": 1.0521858930587769, "logps/chosen": -2.716794490814209, "logps/rejected": -5.083202362060547, "loss": 0.7742, "nll_loss": 0.7503643035888672, "rewards/accuracies": 0.875, "rewards/chosen": -0.27167943120002747, "rewards/margins": 0.2366408109664917, "rewards/rejected": -0.5083202719688416, "step": 3318 }, { "epoch": 9.086926762491444, "grad_norm": 4.001038074493408, "learning_rate": 5.454794520547945e-07, "log_odds_chosen": 2.171653985977173, "log_odds_ratio": -0.23466935753822327, "logits/chosen": 0.9035478830337524, "logits/rejected": 0.9310965538024902, "logps/chosen": -2.075660228729248, "logps/rejected": -4.088264465332031, "loss": 0.7025, "nll_loss": 0.6790260076522827, "rewards/accuracies": 0.875, "rewards/chosen": -0.20756599307060242, "rewards/margins": 0.20126044750213623, "rewards/rejected": -0.40882644057273865, "step": 3319 }, { "epoch": 9.089664613278575, "grad_norm": 4.873663902282715, "learning_rate": 5.453424657534247e-07, "log_odds_chosen": 2.156975269317627, "log_odds_ratio": -0.2890515923500061, "logits/chosen": 1.0972678661346436, "logits/rejected": 1.1182421445846558, "logps/chosen": -1.7445087432861328, "logps/rejected": -3.7530415058135986, "loss": 0.669, "nll_loss": 0.640127420425415, "rewards/accuracies": 1.0, "rewards/chosen": -0.17445087432861328, "rewards/margins": 0.20085328817367554, "rewards/rejected": -0.37530413269996643, "step": 3320 }, { "epoch": 9.092402464065708, "grad_norm": 4.115425109863281, "learning_rate": 5.452054794520548e-07, "log_odds_chosen": 1.497917652130127, "log_odds_ratio": -0.3257400095462799, "logits/chosen": 0.7072457671165466, "logits/rejected": 0.7403582334518433, "logps/chosen": -2.018340826034546, "logps/rejected": -3.4157705307006836, "loss": 0.7405, "nll_loss": 0.7079445123672485, "rewards/accuracies": 0.875, "rewards/chosen": -0.2018340826034546, "rewards/margins": 0.13974297046661377, "rewards/rejected": -0.34157708287239075, "step": 3321 }, { "epoch": 9.095140314852841, "grad_norm": 4.519472122192383, "learning_rate": 5.45068493150685e-07, "log_odds_chosen": 1.693765640258789, "log_odds_ratio": -0.5596975088119507, "logits/chosen": 0.8430008292198181, "logits/rejected": 0.8337457180023193, "logps/chosen": -2.5067245960235596, "logps/rejected": -4.134102821350098, "loss": 0.7972, "nll_loss": 0.7412365674972534, "rewards/accuracies": 0.875, "rewards/chosen": -0.25067245960235596, "rewards/margins": 0.16273783147335052, "rewards/rejected": -0.41341033577919006, "step": 3322 }, { "epoch": 9.097878165639973, "grad_norm": 5.79818058013916, "learning_rate": 5.44931506849315e-07, "log_odds_chosen": 1.4470493793487549, "log_odds_ratio": -0.41632890701293945, "logits/chosen": 0.8679271340370178, "logits/rejected": 0.8160232901573181, "logps/chosen": -2.4854025840759277, "logps/rejected": -3.8985958099365234, "loss": 0.8331, "nll_loss": 0.79145747423172, "rewards/accuracies": 0.875, "rewards/chosen": -0.2485402524471283, "rewards/margins": 0.14131931960582733, "rewards/rejected": -0.38985955715179443, "step": 3323 }, { "epoch": 9.100616016427105, "grad_norm": 5.334085941314697, "learning_rate": 5.447945205479452e-07, "log_odds_chosen": 1.0524076223373413, "log_odds_ratio": -0.36957675218582153, "logits/chosen": 0.9832605123519897, "logits/rejected": 0.9085320830345154, "logps/chosen": -2.2988898754119873, "logps/rejected": -3.2638654708862305, "loss": 0.8107, "nll_loss": 0.7737420797348022, "rewards/accuracies": 0.875, "rewards/chosen": -0.22988899052143097, "rewards/margins": 0.0964975506067276, "rewards/rejected": -0.32638654112815857, "step": 3324 }, { "epoch": 9.103353867214237, "grad_norm": 4.905609607696533, "learning_rate": 5.446575342465754e-07, "log_odds_chosen": 2.2528254985809326, "log_odds_ratio": -0.1884763389825821, "logits/chosen": 0.9647576808929443, "logits/rejected": 0.956841230392456, "logps/chosen": -2.681029796600342, "logps/rejected": -4.856963157653809, "loss": 0.7513, "nll_loss": 0.7324149012565613, "rewards/accuracies": 0.875, "rewards/chosen": -0.2681029736995697, "rewards/margins": 0.21759337186813354, "rewards/rejected": -0.48569637537002563, "step": 3325 }, { "epoch": 9.10609171800137, "grad_norm": 5.619903087615967, "learning_rate": 5.445205479452054e-07, "log_odds_chosen": 1.0794612169265747, "log_odds_ratio": -0.40668755769729614, "logits/chosen": 0.9182411432266235, "logits/rejected": 0.863183856010437, "logps/chosen": -2.1462857723236084, "logps/rejected": -3.1679744720458984, "loss": 0.8059, "nll_loss": 0.7652024030685425, "rewards/accuracies": 0.875, "rewards/chosen": -0.21462857723236084, "rewards/margins": 0.10216888040304184, "rewards/rejected": -0.31679749488830566, "step": 3326 }, { "epoch": 9.1088295687885, "grad_norm": 4.7876715660095215, "learning_rate": 5.443835616438356e-07, "log_odds_chosen": 1.6934220790863037, "log_odds_ratio": -0.3114987015724182, "logits/chosen": 0.7697765231132507, "logits/rejected": 0.7556747198104858, "logps/chosen": -2.0404834747314453, "logps/rejected": -3.631178855895996, "loss": 0.7292, "nll_loss": 0.6980080604553223, "rewards/accuracies": 0.875, "rewards/chosen": -0.20404836535453796, "rewards/margins": 0.15906952321529388, "rewards/rejected": -0.36311790347099304, "step": 3327 }, { "epoch": 9.111567419575634, "grad_norm": 4.864201068878174, "learning_rate": 5.442465753424658e-07, "log_odds_chosen": 2.8662197589874268, "log_odds_ratio": -0.27522146701812744, "logits/chosen": 0.8905550837516785, "logits/rejected": 0.9074351787567139, "logps/chosen": -2.5086450576782227, "logps/rejected": -5.315927982330322, "loss": 0.7677, "nll_loss": 0.7401647567749023, "rewards/accuracies": 1.0, "rewards/chosen": -0.25086450576782227, "rewards/margins": 0.280728280544281, "rewards/rejected": -0.5315927863121033, "step": 3328 }, { "epoch": 9.114305270362765, "grad_norm": 4.593480110168457, "learning_rate": 5.441095890410959e-07, "log_odds_chosen": 3.097276210784912, "log_odds_ratio": -0.20193316042423248, "logits/chosen": 0.9312396049499512, "logits/rejected": 0.936913013458252, "logps/chosen": -2.5992422103881836, "logps/rejected": -5.630350112915039, "loss": 0.7929, "nll_loss": 0.77272629737854, "rewards/accuracies": 1.0, "rewards/chosen": -0.2599242329597473, "rewards/margins": 0.303110808134079, "rewards/rejected": -0.5630350112915039, "step": 3329 }, { "epoch": 9.117043121149898, "grad_norm": 5.80495023727417, "learning_rate": 5.43972602739726e-07, "log_odds_chosen": 0.8309166431427002, "log_odds_ratio": -0.40823137760162354, "logits/chosen": 0.8666032552719116, "logits/rejected": 0.8299931883811951, "logps/chosen": -1.6246306896209717, "logps/rejected": -2.3517518043518066, "loss": 0.7111, "nll_loss": 0.6702466011047363, "rewards/accuracies": 0.875, "rewards/chosen": -0.16246306896209717, "rewards/margins": 0.07271209359169006, "rewards/rejected": -0.23517517745494843, "step": 3330 }, { "epoch": 9.119780971937029, "grad_norm": 5.6067986488342285, "learning_rate": 5.438356164383562e-07, "log_odds_chosen": 2.773942470550537, "log_odds_ratio": -0.3092987537384033, "logits/chosen": 0.6433314085006714, "logits/rejected": 0.6637824773788452, "logps/chosen": -1.947204351425171, "logps/rejected": -4.607903003692627, "loss": 0.7256, "nll_loss": 0.6946532726287842, "rewards/accuracies": 0.875, "rewards/chosen": -0.19472044706344604, "rewards/margins": 0.2660698890686035, "rewards/rejected": -0.46079033613204956, "step": 3331 }, { "epoch": 9.122518822724162, "grad_norm": 4.867803573608398, "learning_rate": 5.436986301369863e-07, "log_odds_chosen": 1.727818250656128, "log_odds_ratio": -0.29186004400253296, "logits/chosen": 0.8927581310272217, "logits/rejected": 0.8260411024093628, "logps/chosen": -1.700236201286316, "logps/rejected": -3.2988414764404297, "loss": 0.7454, "nll_loss": 0.716256320476532, "rewards/accuracies": 0.875, "rewards/chosen": -0.17002363502979279, "rewards/margins": 0.1598605215549469, "rewards/rejected": -0.3298841416835785, "step": 3332 }, { "epoch": 9.125256673511293, "grad_norm": 3.9804370403289795, "learning_rate": 5.435616438356164e-07, "log_odds_chosen": 1.7229372262954712, "log_odds_ratio": -0.2594459056854248, "logits/chosen": 0.9268410205841064, "logits/rejected": 0.8492542505264282, "logps/chosen": -1.6746869087219238, "logps/rejected": -3.2584915161132812, "loss": 0.7223, "nll_loss": 0.6963720321655273, "rewards/accuracies": 1.0, "rewards/chosen": -0.16746869683265686, "rewards/margins": 0.15838046371936798, "rewards/rejected": -0.32584917545318604, "step": 3333 }, { "epoch": 9.127994524298426, "grad_norm": 4.8668742179870605, "learning_rate": 5.434246575342466e-07, "log_odds_chosen": 1.7797694206237793, "log_odds_ratio": -0.237729012966156, "logits/chosen": 0.639445960521698, "logits/rejected": 0.658526599407196, "logps/chosen": -1.8651952743530273, "logps/rejected": -3.510171890258789, "loss": 0.7282, "nll_loss": 0.7044333219528198, "rewards/accuracies": 1.0, "rewards/chosen": -0.1865195333957672, "rewards/margins": 0.16449765861034393, "rewards/rejected": -0.35101717710494995, "step": 3334 }, { "epoch": 9.130732375085557, "grad_norm": 5.2781596183776855, "learning_rate": 5.432876712328767e-07, "log_odds_chosen": 1.154184341430664, "log_odds_ratio": -0.39343422651290894, "logits/chosen": 0.953704297542572, "logits/rejected": 0.9685907959938049, "logps/chosen": -2.4362101554870605, "logps/rejected": -3.5529913902282715, "loss": 0.7235, "nll_loss": 0.6841773986816406, "rewards/accuracies": 0.75, "rewards/chosen": -0.24362102150917053, "rewards/margins": 0.11167815327644348, "rewards/rejected": -0.355299174785614, "step": 3335 }, { "epoch": 9.13347022587269, "grad_norm": 5.646183967590332, "learning_rate": 5.431506849315069e-07, "log_odds_chosen": 1.2977490425109863, "log_odds_ratio": -0.37627914547920227, "logits/chosen": 0.812165379524231, "logits/rejected": 0.7219406962394714, "logps/chosen": -1.8107675313949585, "logps/rejected": -3.0042266845703125, "loss": 0.7974, "nll_loss": 0.7597850561141968, "rewards/accuracies": 0.875, "rewards/chosen": -0.1810767650604248, "rewards/margins": 0.11934591829776764, "rewards/rejected": -0.30042269825935364, "step": 3336 }, { "epoch": 9.136208076659821, "grad_norm": 4.569534778594971, "learning_rate": 5.430136986301369e-07, "log_odds_chosen": 2.2158472537994385, "log_odds_ratio": -0.25237512588500977, "logits/chosen": 0.921954870223999, "logits/rejected": 0.9270343780517578, "logps/chosen": -2.252974033355713, "logps/rejected": -4.306819915771484, "loss": 0.751, "nll_loss": 0.7257564067840576, "rewards/accuracies": 0.875, "rewards/chosen": -0.22529743611812592, "rewards/margins": 0.20538459718227386, "rewards/rejected": -0.4306820034980774, "step": 3337 }, { "epoch": 9.138945927446954, "grad_norm": 5.933881759643555, "learning_rate": 5.428767123287671e-07, "log_odds_chosen": 1.7458839416503906, "log_odds_ratio": -0.35759255290031433, "logits/chosen": 0.9736701250076294, "logits/rejected": 1.011117935180664, "logps/chosen": -2.4287004470825195, "logps/rejected": -4.019049644470215, "loss": 0.6347, "nll_loss": 0.5989420413970947, "rewards/accuracies": 0.875, "rewards/chosen": -0.2428700476884842, "rewards/margins": 0.15903495252132416, "rewards/rejected": -0.40190500020980835, "step": 3338 }, { "epoch": 9.141683778234086, "grad_norm": 5.528937816619873, "learning_rate": 5.427397260273973e-07, "log_odds_chosen": 2.334766149520874, "log_odds_ratio": -0.23350223898887634, "logits/chosen": 0.794177234172821, "logits/rejected": 0.8231078386306763, "logps/chosen": -2.241421937942505, "logps/rejected": -4.443360805511475, "loss": 0.7108, "nll_loss": 0.687428891658783, "rewards/accuracies": 0.875, "rewards/chosen": -0.2241421937942505, "rewards/margins": 0.22019390761852264, "rewards/rejected": -0.4443361163139343, "step": 3339 }, { "epoch": 9.144421629021219, "grad_norm": 5.312419891357422, "learning_rate": 5.426027397260273e-07, "log_odds_chosen": 2.7787880897521973, "log_odds_ratio": -0.2852780222892761, "logits/chosen": 0.9750232100486755, "logits/rejected": 0.9926470518112183, "logps/chosen": -2.995661973953247, "logps/rejected": -5.688869476318359, "loss": 0.8271, "nll_loss": 0.7986040115356445, "rewards/accuracies": 0.875, "rewards/chosen": -0.29956620931625366, "rewards/margins": 0.2693207859992981, "rewards/rejected": -0.5688869953155518, "step": 3340 }, { "epoch": 9.14715947980835, "grad_norm": 4.977492332458496, "learning_rate": 5.424657534246575e-07, "log_odds_chosen": 1.1824824810028076, "log_odds_ratio": -0.40519434213638306, "logits/chosen": 0.9123605489730835, "logits/rejected": 0.9621301293373108, "logps/chosen": -2.088660478591919, "logps/rejected": -3.2032015323638916, "loss": 0.7632, "nll_loss": 0.7226862907409668, "rewards/accuracies": 1.0, "rewards/chosen": -0.20886605978012085, "rewards/margins": 0.111454077064991, "rewards/rejected": -0.32032012939453125, "step": 3341 }, { "epoch": 9.149897330595483, "grad_norm": 6.811492919921875, "learning_rate": 5.423287671232877e-07, "log_odds_chosen": 0.6129522323608398, "log_odds_ratio": -0.7942251563072205, "logits/chosen": 0.8787592053413391, "logits/rejected": 0.8943639993667603, "logps/chosen": -2.924762725830078, "logps/rejected": -3.452106237411499, "loss": 0.7741, "nll_loss": 0.6946462988853455, "rewards/accuracies": 0.625, "rewards/chosen": -0.2924762964248657, "rewards/margins": 0.05273433029651642, "rewards/rejected": -0.34521061182022095, "step": 3342 }, { "epoch": 9.152635181382614, "grad_norm": 5.745722770690918, "learning_rate": 5.421917808219178e-07, "log_odds_chosen": 1.4363963603973389, "log_odds_ratio": -0.5109877586364746, "logits/chosen": 0.7673141956329346, "logits/rejected": 0.7733686566352844, "logps/chosen": -2.3377904891967773, "logps/rejected": -3.741441249847412, "loss": 0.8272, "nll_loss": 0.7760866284370422, "rewards/accuracies": 0.75, "rewards/chosen": -0.23377905786037445, "rewards/margins": 0.1403650939464569, "rewards/rejected": -0.37414413690567017, "step": 3343 }, { "epoch": 9.155373032169747, "grad_norm": 4.772974967956543, "learning_rate": 5.420547945205479e-07, "log_odds_chosen": 2.6286563873291016, "log_odds_ratio": -0.2303379476070404, "logits/chosen": 0.8003387451171875, "logits/rejected": 0.802417516708374, "logps/chosen": -1.9784088134765625, "logps/rejected": -4.443613052368164, "loss": 0.7449, "nll_loss": 0.7218457460403442, "rewards/accuracies": 0.875, "rewards/chosen": -0.1978408694267273, "rewards/margins": 0.24652044475078583, "rewards/rejected": -0.4443613290786743, "step": 3344 }, { "epoch": 9.158110882956878, "grad_norm": 4.780266761779785, "learning_rate": 5.419178082191781e-07, "log_odds_chosen": 1.3119490146636963, "log_odds_ratio": -0.41524237394332886, "logits/chosen": 0.7230545282363892, "logits/rejected": 0.6711257100105286, "logps/chosen": -1.7714784145355225, "logps/rejected": -2.959141731262207, "loss": 0.7386, "nll_loss": 0.6970360279083252, "rewards/accuracies": 0.75, "rewards/chosen": -0.17714785039424896, "rewards/margins": 0.11876633018255234, "rewards/rejected": -0.2959141731262207, "step": 3345 }, { "epoch": 9.160848733744011, "grad_norm": 4.4419941902160645, "learning_rate": 5.417808219178082e-07, "log_odds_chosen": 2.520003318786621, "log_odds_ratio": -0.2082338035106659, "logits/chosen": 0.7641529440879822, "logits/rejected": 0.8004302382469177, "logps/chosen": -1.9656873941421509, "logps/rejected": -4.368267059326172, "loss": 0.7793, "nll_loss": 0.7584319114685059, "rewards/accuracies": 1.0, "rewards/chosen": -0.19656872749328613, "rewards/margins": 0.24025797843933105, "rewards/rejected": -0.4368267059326172, "step": 3346 }, { "epoch": 9.163586584531142, "grad_norm": 4.106556415557861, "learning_rate": 5.416438356164383e-07, "log_odds_chosen": 2.3505139350891113, "log_odds_ratio": -0.21484887599945068, "logits/chosen": 0.7270907163619995, "logits/rejected": 0.7171188592910767, "logps/chosen": -2.0785481929779053, "logps/rejected": -4.216894149780273, "loss": 0.8116, "nll_loss": 0.7900711297988892, "rewards/accuracies": 1.0, "rewards/chosen": -0.20785482227802277, "rewards/margins": 0.21383461356163025, "rewards/rejected": -0.4216894507408142, "step": 3347 }, { "epoch": 9.166324435318275, "grad_norm": 5.402964115142822, "learning_rate": 5.415068493150685e-07, "log_odds_chosen": 0.9846435189247131, "log_odds_ratio": -0.3687560558319092, "logits/chosen": 0.852758526802063, "logits/rejected": 0.8677611947059631, "logps/chosen": -2.191771984100342, "logps/rejected": -3.0935769081115723, "loss": 0.6918, "nll_loss": 0.6549100279808044, "rewards/accuracies": 1.0, "rewards/chosen": -0.21917720139026642, "rewards/margins": 0.09018047153949738, "rewards/rejected": -0.3093576431274414, "step": 3348 }, { "epoch": 9.169062286105408, "grad_norm": 4.885008335113525, "learning_rate": 5.413698630136986e-07, "log_odds_chosen": 1.249022126197815, "log_odds_ratio": -0.39422276616096497, "logits/chosen": 0.8096996545791626, "logits/rejected": 0.8123275637626648, "logps/chosen": -2.7045950889587402, "logps/rejected": -3.8866069316864014, "loss": 0.7638, "nll_loss": 0.7243633270263672, "rewards/accuracies": 0.75, "rewards/chosen": -0.27045953273773193, "rewards/margins": 0.11820119619369507, "rewards/rejected": -0.388660728931427, "step": 3349 }, { "epoch": 9.17180013689254, "grad_norm": 4.266299724578857, "learning_rate": 5.412328767123288e-07, "log_odds_chosen": 1.8812156915664673, "log_odds_ratio": -0.33382436633110046, "logits/chosen": 0.7559825778007507, "logits/rejected": 0.7685003876686096, "logps/chosen": -2.3850607872009277, "logps/rejected": -4.166937828063965, "loss": 0.7361, "nll_loss": 0.7027405500411987, "rewards/accuracies": 0.875, "rewards/chosen": -0.23850609362125397, "rewards/margins": 0.17818768322467804, "rewards/rejected": -0.4166937470436096, "step": 3350 }, { "epoch": 9.174537987679672, "grad_norm": 4.204170227050781, "learning_rate": 5.410958904109589e-07, "log_odds_chosen": 2.3781745433807373, "log_odds_ratio": -0.23977439105510712, "logits/chosen": 0.7562817335128784, "logits/rejected": 0.7806260585784912, "logps/chosen": -2.3709487915039062, "logps/rejected": -4.675086975097656, "loss": 0.6952, "nll_loss": 0.6711729764938354, "rewards/accuracies": 0.875, "rewards/chosen": -0.23709489405155182, "rewards/margins": 0.23041385412216187, "rewards/rejected": -0.4675087332725525, "step": 3351 }, { "epoch": 9.177275838466803, "grad_norm": 4.758955478668213, "learning_rate": 5.40958904109589e-07, "log_odds_chosen": 1.8786594867706299, "log_odds_ratio": -0.22837533056735992, "logits/chosen": 0.8731139898300171, "logits/rejected": 0.8989304304122925, "logps/chosen": -2.27952241897583, "logps/rejected": -4.068474769592285, "loss": 0.7109, "nll_loss": 0.6880751252174377, "rewards/accuracies": 1.0, "rewards/chosen": -0.227952241897583, "rewards/margins": 0.1788952350616455, "rewards/rejected": -0.4068475365638733, "step": 3352 }, { "epoch": 9.180013689253936, "grad_norm": 4.984134674072266, "learning_rate": 5.408219178082192e-07, "log_odds_chosen": 1.680309534072876, "log_odds_ratio": -0.38035959005355835, "logits/chosen": 0.8150667548179626, "logits/rejected": 0.7586979866027832, "logps/chosen": -1.4698213338851929, "logps/rejected": -2.998250961303711, "loss": 0.6765, "nll_loss": 0.6384251117706299, "rewards/accuracies": 0.875, "rewards/chosen": -0.1469821333885193, "rewards/margins": 0.1528429538011551, "rewards/rejected": -0.29982510209083557, "step": 3353 }, { "epoch": 9.182751540041068, "grad_norm": 7.8949456214904785, "learning_rate": 5.406849315068492e-07, "log_odds_chosen": 1.1404972076416016, "log_odds_ratio": -0.7977612018585205, "logits/chosen": 0.7459170818328857, "logits/rejected": 0.7110697627067566, "logps/chosen": -2.382361888885498, "logps/rejected": -3.408165454864502, "loss": 0.8255, "nll_loss": 0.7457432746887207, "rewards/accuracies": 0.875, "rewards/chosen": -0.2382361888885498, "rewards/margins": 0.10258036851882935, "rewards/rejected": -0.34081655740737915, "step": 3354 }, { "epoch": 9.1854893908282, "grad_norm": 4.942800998687744, "learning_rate": 5.405479452054794e-07, "log_odds_chosen": 0.7115614414215088, "log_odds_ratio": -0.4645904302597046, "logits/chosen": 0.6777513027191162, "logits/rejected": 0.6708762645721436, "logps/chosen": -2.2854936122894287, "logps/rejected": -2.9502549171447754, "loss": 0.8829, "nll_loss": 0.8364334106445312, "rewards/accuracies": 0.875, "rewards/chosen": -0.22854939103126526, "rewards/margins": 0.06647611409425735, "rewards/rejected": -0.295025497674942, "step": 3355 }, { "epoch": 9.188227241615332, "grad_norm": 4.8904829025268555, "learning_rate": 5.404109589041096e-07, "log_odds_chosen": 2.742903470993042, "log_odds_ratio": -0.193190798163414, "logits/chosen": 1.0915114879608154, "logits/rejected": 1.169488787651062, "logps/chosen": -2.1540517807006836, "logps/rejected": -4.720285415649414, "loss": 0.6604, "nll_loss": 0.6410932540893555, "rewards/accuracies": 0.875, "rewards/chosen": -0.21540521085262299, "rewards/margins": 0.25662338733673096, "rewards/rejected": -0.47202855348587036, "step": 3356 }, { "epoch": 9.190965092402465, "grad_norm": 4.971775054931641, "learning_rate": 5.402739726027396e-07, "log_odds_chosen": 2.8943471908569336, "log_odds_ratio": -0.17389512062072754, "logits/chosen": 0.7995845079421997, "logits/rejected": 0.8331125378608704, "logps/chosen": -1.9629132747650146, "logps/rejected": -4.707860946655273, "loss": 0.6346, "nll_loss": 0.6171643733978271, "rewards/accuracies": 1.0, "rewards/chosen": -0.19629132747650146, "rewards/margins": 0.2744947671890259, "rewards/rejected": -0.47078609466552734, "step": 3357 }, { "epoch": 9.193702943189596, "grad_norm": 5.247677803039551, "learning_rate": 5.401369863013698e-07, "log_odds_chosen": 2.800525665283203, "log_odds_ratio": -0.24122366309165955, "logits/chosen": 0.9434179067611694, "logits/rejected": 0.9258021116256714, "logps/chosen": -2.2574667930603027, "logps/rejected": -4.934647083282471, "loss": 0.7464, "nll_loss": 0.7222877740859985, "rewards/accuracies": 0.875, "rewards/chosen": -0.22574669122695923, "rewards/margins": 0.26771804690361023, "rewards/rejected": -0.49346476793289185, "step": 3358 }, { "epoch": 9.196440793976729, "grad_norm": 3.7977049350738525, "learning_rate": 5.4e-07, "log_odds_chosen": 1.5261059999465942, "log_odds_ratio": -0.2685956656932831, "logits/chosen": 0.9124435782432556, "logits/rejected": 0.8791861534118652, "logps/chosen": -1.6900478601455688, "logps/rejected": -3.0634613037109375, "loss": 0.7563, "nll_loss": 0.7294777631759644, "rewards/accuracies": 1.0, "rewards/chosen": -0.16900479793548584, "rewards/margins": 0.13734133541584015, "rewards/rejected": -0.3063461184501648, "step": 3359 }, { "epoch": 9.19917864476386, "grad_norm": 5.025267601013184, "learning_rate": 5.398630136986301e-07, "log_odds_chosen": 1.538874626159668, "log_odds_ratio": -0.29489898681640625, "logits/chosen": 1.0017237663269043, "logits/rejected": 1.010138988494873, "logps/chosen": -2.442559003829956, "logps/rejected": -3.881638526916504, "loss": 0.8195, "nll_loss": 0.7899612784385681, "rewards/accuracies": 0.875, "rewards/chosen": -0.2442559152841568, "rewards/margins": 0.14390796422958374, "rewards/rejected": -0.38816386461257935, "step": 3360 }, { "epoch": 9.201916495550993, "grad_norm": 4.46168327331543, "learning_rate": 5.397260273972602e-07, "log_odds_chosen": 2.9494681358337402, "log_odds_ratio": -0.20855800807476044, "logits/chosen": 1.0119409561157227, "logits/rejected": 1.0730228424072266, "logps/chosen": -2.085672378540039, "logps/rejected": -4.883528709411621, "loss": 0.7489, "nll_loss": 0.7280592322349548, "rewards/accuracies": 1.0, "rewards/chosen": -0.20856723189353943, "rewards/margins": 0.279785692691803, "rewards/rejected": -0.4883529543876648, "step": 3361 }, { "epoch": 9.204654346338124, "grad_norm": 3.9813218116760254, "learning_rate": 5.395890410958904e-07, "log_odds_chosen": 2.19281005859375, "log_odds_ratio": -0.3065076470375061, "logits/chosen": 0.8690667748451233, "logits/rejected": 0.9314044713973999, "logps/chosen": -2.3516974449157715, "logps/rejected": -4.450035095214844, "loss": 0.7833, "nll_loss": 0.7526220083236694, "rewards/accuracies": 0.875, "rewards/chosen": -0.23516976833343506, "rewards/margins": 0.20983374118804932, "rewards/rejected": -0.4450035095214844, "step": 3362 }, { "epoch": 9.207392197125257, "grad_norm": 4.707369804382324, "learning_rate": 5.394520547945205e-07, "log_odds_chosen": 2.23697829246521, "log_odds_ratio": -0.1542646288871765, "logits/chosen": 1.0018787384033203, "logits/rejected": 1.037902593612671, "logps/chosen": -1.9979363679885864, "logps/rejected": -4.054208278656006, "loss": 0.653, "nll_loss": 0.6376094818115234, "rewards/accuracies": 1.0, "rewards/chosen": -0.19979363679885864, "rewards/margins": 0.2056271880865097, "rewards/rejected": -0.40542083978652954, "step": 3363 }, { "epoch": 9.210130047912388, "grad_norm": 4.17297887802124, "learning_rate": 5.393150684931506e-07, "log_odds_chosen": 2.848457098007202, "log_odds_ratio": -0.08241716772317886, "logits/chosen": 0.9743949174880981, "logits/rejected": 0.9910189509391785, "logps/chosen": -2.384613513946533, "logps/rejected": -5.11329460144043, "loss": 0.7667, "nll_loss": 0.7584761381149292, "rewards/accuracies": 1.0, "rewards/chosen": -0.23846134543418884, "rewards/margins": 0.2728680968284607, "rewards/rejected": -0.5113294124603271, "step": 3364 }, { "epoch": 9.212867898699521, "grad_norm": 4.642765045166016, "learning_rate": 5.391780821917808e-07, "log_odds_chosen": 1.4891996383666992, "log_odds_ratio": -0.3504785895347595, "logits/chosen": 0.7487034201622009, "logits/rejected": 0.6904592514038086, "logps/chosen": -2.5374250411987305, "logps/rejected": -3.971672296524048, "loss": 0.7913, "nll_loss": 0.7562360763549805, "rewards/accuracies": 0.875, "rewards/chosen": -0.253742516040802, "rewards/margins": 0.14342473447322845, "rewards/rejected": -0.39716723561286926, "step": 3365 }, { "epoch": 9.215605749486652, "grad_norm": 4.266764163970947, "learning_rate": 5.390410958904109e-07, "log_odds_chosen": 2.2288081645965576, "log_odds_ratio": -0.30329200625419617, "logits/chosen": 0.7556014060974121, "logits/rejected": 0.777086615562439, "logps/chosen": -2.2777554988861084, "logps/rejected": -4.420933246612549, "loss": 0.7664, "nll_loss": 0.7361207008361816, "rewards/accuracies": 1.0, "rewards/chosen": -0.22777554392814636, "rewards/margins": 0.21431776881217957, "rewards/rejected": -0.4420933127403259, "step": 3366 }, { "epoch": 9.218343600273785, "grad_norm": 5.449673175811768, "learning_rate": 5.389041095890411e-07, "log_odds_chosen": 1.0575973987579346, "log_odds_ratio": -0.5702468156814575, "logits/chosen": 0.7819018959999084, "logits/rejected": 0.9061375856399536, "logps/chosen": -2.2639029026031494, "logps/rejected": -3.2032761573791504, "loss": 0.7904, "nll_loss": 0.7334103584289551, "rewards/accuracies": 0.875, "rewards/chosen": -0.2263903021812439, "rewards/margins": 0.09393730759620667, "rewards/rejected": -0.32032760977745056, "step": 3367 }, { "epoch": 9.221081451060916, "grad_norm": 4.017989158630371, "learning_rate": 5.387671232876711e-07, "log_odds_chosen": 2.324556350708008, "log_odds_ratio": -0.295801043510437, "logits/chosen": 0.854245662689209, "logits/rejected": 0.9614047408103943, "logps/chosen": -2.2292087078094482, "logps/rejected": -4.45368766784668, "loss": 0.8493, "nll_loss": 0.819737434387207, "rewards/accuracies": 0.875, "rewards/chosen": -0.22292087972164154, "rewards/margins": 0.22244787216186523, "rewards/rejected": -0.44536876678466797, "step": 3368 }, { "epoch": 9.22381930184805, "grad_norm": 10.477766036987305, "learning_rate": 5.386301369863013e-07, "log_odds_chosen": 1.5324708223342896, "log_odds_ratio": -0.5412732362747192, "logits/chosen": 1.0933218002319336, "logits/rejected": 1.1181389093399048, "logps/chosen": -3.232553482055664, "logps/rejected": -4.607710838317871, "loss": 0.7321, "nll_loss": 0.6779618263244629, "rewards/accuracies": 0.75, "rewards/chosen": -0.32325536012649536, "rewards/margins": 0.13751570880413055, "rewards/rejected": -0.4607710838317871, "step": 3369 }, { "epoch": 9.22655715263518, "grad_norm": 6.107179164886475, "learning_rate": 5.384931506849315e-07, "log_odds_chosen": 2.3453283309936523, "log_odds_ratio": -0.6196550130844116, "logits/chosen": 0.8440420627593994, "logits/rejected": 0.9504271149635315, "logps/chosen": -2.84273624420166, "logps/rejected": -5.130326747894287, "loss": 0.9278, "nll_loss": 0.8658055663108826, "rewards/accuracies": 0.875, "rewards/chosen": -0.28427359461784363, "rewards/margins": 0.22875909507274628, "rewards/rejected": -0.5130327343940735, "step": 3370 }, { "epoch": 9.229295003422314, "grad_norm": 4.743851184844971, "learning_rate": 5.383561643835615e-07, "log_odds_chosen": 1.4604644775390625, "log_odds_ratio": -0.3174417018890381, "logits/chosen": 0.8690587282180786, "logits/rejected": 0.7755592465400696, "logps/chosen": -2.031871795654297, "logps/rejected": -3.3769688606262207, "loss": 0.8516, "nll_loss": 0.8198910355567932, "rewards/accuracies": 1.0, "rewards/chosen": -0.20318719744682312, "rewards/margins": 0.13450971245765686, "rewards/rejected": -0.33769690990448, "step": 3371 }, { "epoch": 9.232032854209445, "grad_norm": 5.2286481857299805, "learning_rate": 5.382191780821917e-07, "log_odds_chosen": 3.1381564140319824, "log_odds_ratio": -0.2017720639705658, "logits/chosen": 0.7980687618255615, "logits/rejected": 0.8125031590461731, "logps/chosen": -2.409485340118408, "logps/rejected": -5.459963321685791, "loss": 0.8415, "nll_loss": 0.8213728666305542, "rewards/accuracies": 1.0, "rewards/chosen": -0.24094854295253754, "rewards/margins": 0.3050478398799896, "rewards/rejected": -0.545996367931366, "step": 3372 }, { "epoch": 9.234770704996578, "grad_norm": 4.632896900177002, "learning_rate": 5.380821917808219e-07, "log_odds_chosen": 1.342031717300415, "log_odds_ratio": -0.3504538834095001, "logits/chosen": 0.6107079386711121, "logits/rejected": 0.5563309192657471, "logps/chosen": -2.104100227355957, "logps/rejected": -3.3548145294189453, "loss": 0.7276, "nll_loss": 0.6926034092903137, "rewards/accuracies": 0.75, "rewards/chosen": -0.21041002869606018, "rewards/margins": 0.1250714361667633, "rewards/rejected": -0.3354814648628235, "step": 3373 }, { "epoch": 9.23750855578371, "grad_norm": 4.698281288146973, "learning_rate": 5.37945205479452e-07, "log_odds_chosen": 0.9979071617126465, "log_odds_ratio": -0.34399229288101196, "logits/chosen": 0.9781404733657837, "logits/rejected": 0.9768307209014893, "logps/chosen": -2.7717437744140625, "logps/rejected": -3.7091782093048096, "loss": 0.7354, "nll_loss": 0.7009532451629639, "rewards/accuracies": 1.0, "rewards/chosen": -0.27717435359954834, "rewards/margins": 0.09374344348907471, "rewards/rejected": -0.37091782689094543, "step": 3374 }, { "epoch": 9.240246406570842, "grad_norm": 5.150471210479736, "learning_rate": 5.378082191780821e-07, "log_odds_chosen": 1.0972553491592407, "log_odds_ratio": -0.3781854808330536, "logits/chosen": 0.6127696633338928, "logits/rejected": 0.5778517127037048, "logps/chosen": -2.170210599899292, "logps/rejected": -3.1495494842529297, "loss": 0.7909, "nll_loss": 0.7530973553657532, "rewards/accuracies": 0.75, "rewards/chosen": -0.21702107787132263, "rewards/margins": 0.09793388098478317, "rewards/rejected": -0.3149549663066864, "step": 3375 }, { "epoch": 9.242984257357975, "grad_norm": 5.303744316101074, "learning_rate": 5.376712328767123e-07, "log_odds_chosen": 1.5035438537597656, "log_odds_ratio": -0.32452619075775146, "logits/chosen": 0.6788201332092285, "logits/rejected": 0.6772526502609253, "logps/chosen": -1.8912009000778198, "logps/rejected": -3.279555082321167, "loss": 0.783, "nll_loss": 0.750508189201355, "rewards/accuracies": 1.0, "rewards/chosen": -0.1891200989484787, "rewards/margins": 0.13883540034294128, "rewards/rejected": -0.3279554843902588, "step": 3376 }, { "epoch": 9.245722108145106, "grad_norm": 4.898787021636963, "learning_rate": 5.375342465753424e-07, "log_odds_chosen": 2.300013303756714, "log_odds_ratio": -0.32316726446151733, "logits/chosen": 0.7612043023109436, "logits/rejected": 0.7825551629066467, "logps/chosen": -2.4275012016296387, "logps/rejected": -4.652270793914795, "loss": 0.7573, "nll_loss": 0.7249423265457153, "rewards/accuracies": 0.875, "rewards/chosen": -0.2427501380443573, "rewards/margins": 0.22247695922851562, "rewards/rejected": -0.46522706747055054, "step": 3377 }, { "epoch": 9.248459958932239, "grad_norm": 5.095919132232666, "learning_rate": 5.373972602739725e-07, "log_odds_chosen": 1.6496195793151855, "log_odds_ratio": -0.41849952936172485, "logits/chosen": 0.7463690042495728, "logits/rejected": 0.7285721302032471, "logps/chosen": -1.7277752161026, "logps/rejected": -3.288386821746826, "loss": 0.774, "nll_loss": 0.7321025133132935, "rewards/accuracies": 0.875, "rewards/chosen": -0.17277753353118896, "rewards/margins": 0.15606114268302917, "rewards/rejected": -0.32883867621421814, "step": 3378 }, { "epoch": 9.25119780971937, "grad_norm": 5.176572322845459, "learning_rate": 5.372602739726027e-07, "log_odds_chosen": 1.6081184148788452, "log_odds_ratio": -0.2945125997066498, "logits/chosen": 0.8526856899261475, "logits/rejected": 0.8729557394981384, "logps/chosen": -2.0850160121917725, "logps/rejected": -3.5582826137542725, "loss": 0.7087, "nll_loss": 0.6792607307434082, "rewards/accuracies": 0.875, "rewards/chosen": -0.20850160717964172, "rewards/margins": 0.14732663333415985, "rewards/rejected": -0.35582825541496277, "step": 3379 }, { "epoch": 9.253935660506503, "grad_norm": 4.159050941467285, "learning_rate": 5.371232876712328e-07, "log_odds_chosen": 2.835392951965332, "log_odds_ratio": -0.23555251955986023, "logits/chosen": 0.8821902275085449, "logits/rejected": 0.910720944404602, "logps/chosen": -2.2883071899414062, "logps/rejected": -5.036714553833008, "loss": 0.7051, "nll_loss": 0.6814976930618286, "rewards/accuracies": 0.875, "rewards/chosen": -0.2288307249546051, "rewards/margins": 0.27484074234962463, "rewards/rejected": -0.5036714673042297, "step": 3380 }, { "epoch": 9.256673511293634, "grad_norm": 5.916338920593262, "learning_rate": 5.36986301369863e-07, "log_odds_chosen": 2.384275197982788, "log_odds_ratio": -0.2619371712207794, "logits/chosen": 1.145965814590454, "logits/rejected": 1.1918256282806396, "logps/chosen": -2.263758659362793, "logps/rejected": -4.555992126464844, "loss": 0.6743, "nll_loss": 0.6481319069862366, "rewards/accuracies": 0.75, "rewards/chosen": -0.22637584805488586, "rewards/margins": 0.2292233407497406, "rewards/rejected": -0.45559921860694885, "step": 3381 }, { "epoch": 9.259411362080767, "grad_norm": 5.721796035766602, "learning_rate": 5.368493150684931e-07, "log_odds_chosen": 1.286077857017517, "log_odds_ratio": -0.3448185920715332, "logits/chosen": 0.8331718444824219, "logits/rejected": 0.7808457612991333, "logps/chosen": -1.7312182188034058, "logps/rejected": -2.8922431468963623, "loss": 0.7318, "nll_loss": 0.6973371505737305, "rewards/accuracies": 1.0, "rewards/chosen": -0.17312180995941162, "rewards/margins": 0.11610250174999237, "rewards/rejected": -0.2892243266105652, "step": 3382 }, { "epoch": 9.262149212867898, "grad_norm": 4.5094218254089355, "learning_rate": 5.367123287671232e-07, "log_odds_chosen": 1.369014024734497, "log_odds_ratio": -0.42789700627326965, "logits/chosen": 0.7081061005592346, "logits/rejected": 0.7392286062240601, "logps/chosen": -2.053292989730835, "logps/rejected": -3.303865909576416, "loss": 0.6986, "nll_loss": 0.6558389663696289, "rewards/accuracies": 0.625, "rewards/chosen": -0.20532932877540588, "rewards/margins": 0.12505729496479034, "rewards/rejected": -0.33038657903671265, "step": 3383 }, { "epoch": 9.264887063655031, "grad_norm": 6.844117641448975, "learning_rate": 5.365753424657534e-07, "log_odds_chosen": 1.9628245830535889, "log_odds_ratio": -0.3105347752571106, "logits/chosen": 0.49579179286956787, "logits/rejected": 0.5579643249511719, "logps/chosen": -2.430924415588379, "logps/rejected": -4.28628396987915, "loss": 0.917, "nll_loss": 0.8859056234359741, "rewards/accuracies": 0.75, "rewards/chosen": -0.24309243261814117, "rewards/margins": 0.1855359524488449, "rewards/rejected": -0.4286283850669861, "step": 3384 }, { "epoch": 9.267624914442163, "grad_norm": 6.010592937469482, "learning_rate": 5.364383561643834e-07, "log_odds_chosen": 1.345734715461731, "log_odds_ratio": -0.42465442419052124, "logits/chosen": 0.9474788308143616, "logits/rejected": 0.9540067315101624, "logps/chosen": -1.7361416816711426, "logps/rejected": -2.960922956466675, "loss": 0.677, "nll_loss": 0.6345522403717041, "rewards/accuracies": 0.875, "rewards/chosen": -0.17361417412757874, "rewards/margins": 0.12247810512781143, "rewards/rejected": -0.29609230160713196, "step": 3385 }, { "epoch": 9.270362765229295, "grad_norm": 6.144940376281738, "learning_rate": 5.363013698630136e-07, "log_odds_chosen": 3.3562417030334473, "log_odds_ratio": -0.37959039211273193, "logits/chosen": 1.088679552078247, "logits/rejected": 1.1011353731155396, "logps/chosen": -3.408576250076294, "logps/rejected": -6.725255012512207, "loss": 0.8125, "nll_loss": 0.774527907371521, "rewards/accuracies": 0.875, "rewards/chosen": -0.3408576250076294, "rewards/margins": 0.3316679000854492, "rewards/rejected": -0.6725255250930786, "step": 3386 }, { "epoch": 9.273100616016427, "grad_norm": 5.167385578155518, "learning_rate": 5.361643835616439e-07, "log_odds_chosen": 1.3371496200561523, "log_odds_ratio": -0.5145971179008484, "logits/chosen": 0.6774333715438843, "logits/rejected": 0.6792964935302734, "logps/chosen": -2.5439298152923584, "logps/rejected": -3.8305811882019043, "loss": 0.8175, "nll_loss": 0.7660832405090332, "rewards/accuracies": 0.5, "rewards/chosen": -0.25439298152923584, "rewards/margins": 0.12866519391536713, "rewards/rejected": -0.3830581307411194, "step": 3387 }, { "epoch": 9.27583846680356, "grad_norm": 5.530420780181885, "learning_rate": 5.36027397260274e-07, "log_odds_chosen": 1.741750717163086, "log_odds_ratio": -0.2787902057170868, "logits/chosen": 0.8712854981422424, "logits/rejected": 0.9447072744369507, "logps/chosen": -2.6661925315856934, "logps/rejected": -4.333028316497803, "loss": 0.7312, "nll_loss": 0.703337550163269, "rewards/accuracies": 0.875, "rewards/chosen": -0.2666192352771759, "rewards/margins": 0.166683629155159, "rewards/rejected": -0.4333028793334961, "step": 3388 }, { "epoch": 9.27857631759069, "grad_norm": 5.567333221435547, "learning_rate": 5.35890410958904e-07, "log_odds_chosen": 0.22960463166236877, "log_odds_ratio": -0.6956672072410583, "logits/chosen": 0.6817030906677246, "logits/rejected": 0.6688092947006226, "logps/chosen": -2.1485984325408936, "logps/rejected": -2.3253870010375977, "loss": 0.8336, "nll_loss": 0.7640035152435303, "rewards/accuracies": 0.625, "rewards/chosen": -0.21485984325408936, "rewards/margins": 0.017678841948509216, "rewards/rejected": -0.23253868520259857, "step": 3389 }, { "epoch": 9.281314168377824, "grad_norm": 5.093806266784668, "learning_rate": 5.357534246575343e-07, "log_odds_chosen": 2.3341798782348633, "log_odds_ratio": -0.39608919620513916, "logits/chosen": 0.7448974847793579, "logits/rejected": 0.8101465106010437, "logps/chosen": -2.1692352294921875, "logps/rejected": -4.308949947357178, "loss": 0.6779, "nll_loss": 0.6383073925971985, "rewards/accuracies": 0.875, "rewards/chosen": -0.2169235348701477, "rewards/margins": 0.21397149562835693, "rewards/rejected": -0.43089503049850464, "step": 3390 }, { "epoch": 9.284052019164955, "grad_norm": 4.7646098136901855, "learning_rate": 5.356164383561644e-07, "log_odds_chosen": 2.3504600524902344, "log_odds_ratio": -0.14503759145736694, "logits/chosen": 0.9170122742652893, "logits/rejected": 0.9542510509490967, "logps/chosen": -2.297010660171509, "logps/rejected": -4.549750328063965, "loss": 0.7588, "nll_loss": 0.7443147301673889, "rewards/accuracies": 1.0, "rewards/chosen": -0.22970107197761536, "rewards/margins": 0.2252739816904068, "rewards/rejected": -0.45497503876686096, "step": 3391 }, { "epoch": 9.286789869952088, "grad_norm": 5.945959091186523, "learning_rate": 5.354794520547945e-07, "log_odds_chosen": 1.6211143732070923, "log_odds_ratio": -0.43447214365005493, "logits/chosen": 1.0199995040893555, "logits/rejected": 1.0478744506835938, "logps/chosen": -2.565256357192993, "logps/rejected": -4.036392688751221, "loss": 0.8061, "nll_loss": 0.7626866102218628, "rewards/accuracies": 0.75, "rewards/chosen": -0.2565256357192993, "rewards/margins": 0.147113636136055, "rewards/rejected": -0.4036392569541931, "step": 3392 }, { "epoch": 9.289527720739219, "grad_norm": 7.9882097244262695, "learning_rate": 5.353424657534247e-07, "log_odds_chosen": 1.8576560020446777, "log_odds_ratio": -0.5795302391052246, "logits/chosen": 0.7686668634414673, "logits/rejected": 0.7506037354469299, "logps/chosen": -2.831144332885742, "logps/rejected": -4.623684883117676, "loss": 0.8433, "nll_loss": 0.7853472828865051, "rewards/accuracies": 0.75, "rewards/chosen": -0.2831144332885742, "rewards/margins": 0.17925406992435455, "rewards/rejected": -0.46236851811408997, "step": 3393 }, { "epoch": 9.292265571526352, "grad_norm": 5.521625995635986, "learning_rate": 5.352054794520548e-07, "log_odds_chosen": 2.137209415435791, "log_odds_ratio": -0.4691193103790283, "logits/chosen": 0.8759953379631042, "logits/rejected": 0.8190194964408875, "logps/chosen": -2.433608055114746, "logps/rejected": -4.447229385375977, "loss": 0.8148, "nll_loss": 0.7679059505462646, "rewards/accuracies": 0.75, "rewards/chosen": -0.24336081743240356, "rewards/margins": 0.20136207342147827, "rewards/rejected": -0.44472289085388184, "step": 3394 }, { "epoch": 9.295003422313483, "grad_norm": 6.126614570617676, "learning_rate": 5.35068493150685e-07, "log_odds_chosen": 1.6274679899215698, "log_odds_ratio": -0.232808917760849, "logits/chosen": 0.9293414950370789, "logits/rejected": 1.002740502357483, "logps/chosen": -2.4926576614379883, "logps/rejected": -4.020856857299805, "loss": 0.7987, "nll_loss": 0.7754566073417664, "rewards/accuracies": 1.0, "rewards/chosen": -0.24926576018333435, "rewards/margins": 0.15281996130943298, "rewards/rejected": -0.40208572149276733, "step": 3395 }, { "epoch": 9.297741273100616, "grad_norm": 4.801145553588867, "learning_rate": 5.349315068493151e-07, "log_odds_chosen": 1.7004647254943848, "log_odds_ratio": -0.3048788905143738, "logits/chosen": 0.8900691270828247, "logits/rejected": 0.8955914974212646, "logps/chosen": -2.2596402168273926, "logps/rejected": -3.872255563735962, "loss": 0.781, "nll_loss": 0.7504862546920776, "rewards/accuracies": 0.875, "rewards/chosen": -0.2259640395641327, "rewards/margins": 0.16126152873039246, "rewards/rejected": -0.38722556829452515, "step": 3396 }, { "epoch": 9.300479123887747, "grad_norm": 5.4335246086120605, "learning_rate": 5.347945205479452e-07, "log_odds_chosen": 2.44565486907959, "log_odds_ratio": -0.22331956028938293, "logits/chosen": 1.129982829093933, "logits/rejected": 1.1867401599884033, "logps/chosen": -3.6495039463043213, "logps/rejected": -6.05649471282959, "loss": 0.756, "nll_loss": 0.7336241602897644, "rewards/accuracies": 1.0, "rewards/chosen": -0.36495041847229004, "rewards/margins": 0.24069903790950775, "rewards/rejected": -0.605649471282959, "step": 3397 }, { "epoch": 9.30321697467488, "grad_norm": 5.010855197906494, "learning_rate": 5.346575342465754e-07, "log_odds_chosen": 1.2903538942337036, "log_odds_ratio": -0.40463122725486755, "logits/chosen": 0.7213567495346069, "logits/rejected": 0.5976451635360718, "logps/chosen": -2.07635760307312, "logps/rejected": -3.297729015350342, "loss": 0.8481, "nll_loss": 0.8076382279396057, "rewards/accuracies": 0.875, "rewards/chosen": -0.207635760307312, "rewards/margins": 0.12213712930679321, "rewards/rejected": -0.3297728896141052, "step": 3398 }, { "epoch": 9.305954825462011, "grad_norm": 4.727237224578857, "learning_rate": 5.345205479452054e-07, "log_odds_chosen": 2.7357873916625977, "log_odds_ratio": -0.1263556033372879, "logits/chosen": 0.8596508502960205, "logits/rejected": 0.8852306008338928, "logps/chosen": -2.108630895614624, "logps/rejected": -4.710449695587158, "loss": 0.7419, "nll_loss": 0.7293075323104858, "rewards/accuracies": 1.0, "rewards/chosen": -0.21086308360099792, "rewards/margins": 0.26018190383911133, "rewards/rejected": -0.47104498744010925, "step": 3399 }, { "epoch": 9.308692676249144, "grad_norm": 5.673957824707031, "learning_rate": 5.343835616438356e-07, "log_odds_chosen": 2.30609393119812, "log_odds_ratio": -0.3224935233592987, "logits/chosen": 0.7633732557296753, "logits/rejected": 0.7360962629318237, "logps/chosen": -2.2618656158447266, "logps/rejected": -4.392203330993652, "loss": 0.8193, "nll_loss": 0.7870955467224121, "rewards/accuracies": 0.875, "rewards/chosen": -0.22618655860424042, "rewards/margins": 0.2130337506532669, "rewards/rejected": -0.4392203390598297, "step": 3400 }, { "epoch": 9.311430527036277, "grad_norm": 6.026421546936035, "learning_rate": 5.342465753424658e-07, "log_odds_chosen": 1.2009164094924927, "log_odds_ratio": -0.7002239227294922, "logits/chosen": 0.8198573589324951, "logits/rejected": 0.8065142035484314, "logps/chosen": -2.8638458251953125, "logps/rejected": -4.028392791748047, "loss": 0.8335, "nll_loss": 0.7634689211845398, "rewards/accuracies": 0.625, "rewards/chosen": -0.28638461232185364, "rewards/margins": 0.11645470559597015, "rewards/rejected": -0.4028393030166626, "step": 3401 }, { "epoch": 9.314168377823409, "grad_norm": 4.6689653396606445, "learning_rate": 5.341095890410959e-07, "log_odds_chosen": 2.9748597145080566, "log_odds_ratio": -0.1723649650812149, "logits/chosen": 0.9569419622421265, "logits/rejected": 0.9954338669776917, "logps/chosen": -2.1405694484710693, "logps/rejected": -4.983774662017822, "loss": 0.6791, "nll_loss": 0.6618607044219971, "rewards/accuracies": 1.0, "rewards/chosen": -0.21405693888664246, "rewards/margins": 0.28432053327560425, "rewards/rejected": -0.4983775019645691, "step": 3402 }, { "epoch": 9.316906228610542, "grad_norm": 6.0200300216674805, "learning_rate": 5.33972602739726e-07, "log_odds_chosen": 1.787611484527588, "log_odds_ratio": -0.5025544166564941, "logits/chosen": 1.0862627029418945, "logits/rejected": 1.0847275257110596, "logps/chosen": -2.395801544189453, "logps/rejected": -4.050090789794922, "loss": 0.7239, "nll_loss": 0.6736515164375305, "rewards/accuracies": 0.875, "rewards/chosen": -0.2395801693201065, "rewards/margins": 0.16542889177799225, "rewards/rejected": -0.40500903129577637, "step": 3403 }, { "epoch": 9.319644079397673, "grad_norm": 4.183824062347412, "learning_rate": 5.338356164383562e-07, "log_odds_chosen": 1.6138830184936523, "log_odds_ratio": -0.3458118736743927, "logits/chosen": 0.8041698932647705, "logits/rejected": 0.9230045676231384, "logps/chosen": -2.0857224464416504, "logps/rejected": -3.6130549907684326, "loss": 0.7245, "nll_loss": 0.6899203658103943, "rewards/accuracies": 0.875, "rewards/chosen": -0.20857225358486176, "rewards/margins": 0.15273326635360718, "rewards/rejected": -0.3613055348396301, "step": 3404 }, { "epoch": 9.322381930184806, "grad_norm": 5.097278594970703, "learning_rate": 5.336986301369863e-07, "log_odds_chosen": 2.5065391063690186, "log_odds_ratio": -0.40640318393707275, "logits/chosen": 0.8659548759460449, "logits/rejected": 0.9133850336074829, "logps/chosen": -2.805230140686035, "logps/rejected": -5.181694984436035, "loss": 0.8081, "nll_loss": 0.7674906849861145, "rewards/accuracies": 0.875, "rewards/chosen": -0.28052300214767456, "rewards/margins": 0.2376464307308197, "rewards/rejected": -0.5181694626808167, "step": 3405 }, { "epoch": 9.325119780971937, "grad_norm": 4.17430305480957, "learning_rate": 5.335616438356164e-07, "log_odds_chosen": 1.9893485307693481, "log_odds_ratio": -0.26296114921569824, "logits/chosen": 0.7538186311721802, "logits/rejected": 0.7756234407424927, "logps/chosen": -2.6266329288482666, "logps/rejected": -4.564087390899658, "loss": 0.7618, "nll_loss": 0.7354627847671509, "rewards/accuracies": 0.875, "rewards/chosen": -0.2626633048057556, "rewards/margins": 0.1937454640865326, "rewards/rejected": -0.4564087986946106, "step": 3406 }, { "epoch": 9.32785763175907, "grad_norm": 4.343329906463623, "learning_rate": 5.334246575342466e-07, "log_odds_chosen": 2.1621968746185303, "log_odds_ratio": -0.19554440677165985, "logits/chosen": 0.8910760879516602, "logits/rejected": 0.864757776260376, "logps/chosen": -2.334047317504883, "logps/rejected": -4.400273323059082, "loss": 0.7424, "nll_loss": 0.7228729128837585, "rewards/accuracies": 1.0, "rewards/chosen": -0.2334047257900238, "rewards/margins": 0.20662261545658112, "rewards/rejected": -0.4400273561477661, "step": 3407 }, { "epoch": 9.330595482546201, "grad_norm": 4.952834129333496, "learning_rate": 5.332876712328767e-07, "log_odds_chosen": 1.2340465784072876, "log_odds_ratio": -0.37672626972198486, "logits/chosen": 0.6533879041671753, "logits/rejected": 0.6096186637878418, "logps/chosen": -1.9805099964141846, "logps/rejected": -3.1181485652923584, "loss": 0.786, "nll_loss": 0.748366117477417, "rewards/accuracies": 1.0, "rewards/chosen": -0.19805102050304413, "rewards/margins": 0.11376383155584335, "rewards/rejected": -0.3118148446083069, "step": 3408 }, { "epoch": 9.333333333333334, "grad_norm": 5.690377712249756, "learning_rate": 5.331506849315069e-07, "log_odds_chosen": 2.120335102081299, "log_odds_ratio": -0.4524915814399719, "logits/chosen": 1.0277326107025146, "logits/rejected": 1.0283373594284058, "logps/chosen": -2.5453734397888184, "logps/rejected": -4.564360618591309, "loss": 0.7039, "nll_loss": 0.6586621999740601, "rewards/accuracies": 0.875, "rewards/chosen": -0.25453734397888184, "rewards/margins": 0.2018987238407135, "rewards/rejected": -0.45643606781959534, "step": 3409 }, { "epoch": 9.336071184120465, "grad_norm": 4.3115105628967285, "learning_rate": 5.33013698630137e-07, "log_odds_chosen": 2.445424795150757, "log_odds_ratio": -0.2524324357509613, "logits/chosen": 1.0058408975601196, "logits/rejected": 1.0592182874679565, "logps/chosen": -2.5898027420043945, "logps/rejected": -4.964946746826172, "loss": 0.7367, "nll_loss": 0.7114546298980713, "rewards/accuracies": 1.0, "rewards/chosen": -0.25898030400276184, "rewards/margins": 0.23751439154148102, "rewards/rejected": -0.49649468064308167, "step": 3410 }, { "epoch": 9.338809034907598, "grad_norm": 4.708527565002441, "learning_rate": 5.328767123287671e-07, "log_odds_chosen": 2.2828593254089355, "log_odds_ratio": -0.14776299893856049, "logits/chosen": 0.7826384902000427, "logits/rejected": 0.7577874660491943, "logps/chosen": -2.040005683898926, "logps/rejected": -4.173480987548828, "loss": 0.68, "nll_loss": 0.6652366518974304, "rewards/accuracies": 1.0, "rewards/chosen": -0.2040005624294281, "rewards/margins": 0.21334755420684814, "rewards/rejected": -0.41734808683395386, "step": 3411 }, { "epoch": 9.34154688569473, "grad_norm": 4.8097429275512695, "learning_rate": 5.327397260273973e-07, "log_odds_chosen": 2.197518825531006, "log_odds_ratio": -0.31690332293510437, "logits/chosen": 0.8948525190353394, "logits/rejected": 0.9557268619537354, "logps/chosen": -2.600407123565674, "logps/rejected": -4.711196422576904, "loss": 0.7626, "nll_loss": 0.7309207320213318, "rewards/accuracies": 0.875, "rewards/chosen": -0.2600407004356384, "rewards/margins": 0.21107891201972961, "rewards/rejected": -0.47111964225769043, "step": 3412 }, { "epoch": 9.344284736481862, "grad_norm": 4.592346668243408, "learning_rate": 5.326027397260274e-07, "log_odds_chosen": 2.382610321044922, "log_odds_ratio": -0.27714431285858154, "logits/chosen": 1.0034611225128174, "logits/rejected": 1.1182584762573242, "logps/chosen": -2.4835262298583984, "logps/rejected": -4.799567222595215, "loss": 0.7456, "nll_loss": 0.7178526520729065, "rewards/accuracies": 0.875, "rewards/chosen": -0.24835264682769775, "rewards/margins": 0.23160411417484283, "rewards/rejected": -0.4799567461013794, "step": 3413 }, { "epoch": 9.347022587268993, "grad_norm": 4.876325607299805, "learning_rate": 5.324657534246575e-07, "log_odds_chosen": 2.7357289791107178, "log_odds_ratio": -0.31621915102005005, "logits/chosen": 0.6136338114738464, "logits/rejected": 0.583206295967102, "logps/chosen": -2.078339099884033, "logps/rejected": -4.7068963050842285, "loss": 0.8748, "nll_loss": 0.8431782126426697, "rewards/accuracies": 0.875, "rewards/chosen": -0.207833930850029, "rewards/margins": 0.2628557085990906, "rewards/rejected": -0.47068965435028076, "step": 3414 }, { "epoch": 9.349760438056126, "grad_norm": 5.858541011810303, "learning_rate": 5.323287671232877e-07, "log_odds_chosen": 1.9282690286636353, "log_odds_ratio": -0.26264169812202454, "logits/chosen": 1.0895402431488037, "logits/rejected": 1.109407901763916, "logps/chosen": -2.3409178256988525, "logps/rejected": -4.172433853149414, "loss": 0.6893, "nll_loss": 0.6630791425704956, "rewards/accuracies": 0.875, "rewards/chosen": -0.23409180343151093, "rewards/margins": 0.18315160274505615, "rewards/rejected": -0.41724342107772827, "step": 3415 }, { "epoch": 9.352498288843258, "grad_norm": 5.08222770690918, "learning_rate": 5.321917808219178e-07, "log_odds_chosen": 1.6153584718704224, "log_odds_ratio": -0.4149894118309021, "logits/chosen": 0.706218957901001, "logits/rejected": 0.6690245866775513, "logps/chosen": -2.255430221557617, "logps/rejected": -3.802403211593628, "loss": 0.7049, "nll_loss": 0.6634100675582886, "rewards/accuracies": 0.75, "rewards/chosen": -0.22554302215576172, "rewards/margins": 0.15469729900360107, "rewards/rejected": -0.3802403211593628, "step": 3416 }, { "epoch": 9.35523613963039, "grad_norm": 4.357120990753174, "learning_rate": 5.320547945205479e-07, "log_odds_chosen": 2.022317409515381, "log_odds_ratio": -0.24295754730701447, "logits/chosen": 0.8494137525558472, "logits/rejected": 0.91451096534729, "logps/chosen": -2.679309844970703, "logps/rejected": -4.585723400115967, "loss": 0.7226, "nll_loss": 0.6982828378677368, "rewards/accuracies": 1.0, "rewards/chosen": -0.2679309844970703, "rewards/margins": 0.1906413733959198, "rewards/rejected": -0.4585723876953125, "step": 3417 }, { "epoch": 9.357973990417522, "grad_norm": 4.869828224182129, "learning_rate": 5.319178082191781e-07, "log_odds_chosen": 2.3984360694885254, "log_odds_ratio": -0.24475547671318054, "logits/chosen": 0.8901159167289734, "logits/rejected": 0.9731104373931885, "logps/chosen": -2.616903305053711, "logps/rejected": -4.944111347198486, "loss": 0.8759, "nll_loss": 0.8513790369033813, "rewards/accuracies": 0.75, "rewards/chosen": -0.2616903483867645, "rewards/margins": 0.2327207773923874, "rewards/rejected": -0.4944111704826355, "step": 3418 }, { "epoch": 9.360711841204655, "grad_norm": 3.811556577682495, "learning_rate": 5.317808219178082e-07, "log_odds_chosen": 2.6098990440368652, "log_odds_ratio": -0.2547127604484558, "logits/chosen": 1.1002275943756104, "logits/rejected": 1.059507966041565, "logps/chosen": -2.0602433681488037, "logps/rejected": -4.55850076675415, "loss": 0.6935, "nll_loss": 0.6680278778076172, "rewards/accuracies": 0.875, "rewards/chosen": -0.20602433383464813, "rewards/margins": 0.24982574582099915, "rewards/rejected": -0.45585009455680847, "step": 3419 }, { "epoch": 9.363449691991786, "grad_norm": 4.179365158081055, "learning_rate": 5.316438356164383e-07, "log_odds_chosen": 2.557563543319702, "log_odds_ratio": -0.24059104919433594, "logits/chosen": 0.9864354133605957, "logits/rejected": 1.0397062301635742, "logps/chosen": -2.241441011428833, "logps/rejected": -4.711513519287109, "loss": 0.7849, "nll_loss": 0.7608805298805237, "rewards/accuracies": 1.0, "rewards/chosen": -0.2241441011428833, "rewards/margins": 0.24700722098350525, "rewards/rejected": -0.47115135192871094, "step": 3420 }, { "epoch": 9.366187542778919, "grad_norm": 5.180757999420166, "learning_rate": 5.315068493150685e-07, "log_odds_chosen": 1.9845993518829346, "log_odds_ratio": -0.287056565284729, "logits/chosen": 1.0088087320327759, "logits/rejected": 0.9922561645507812, "logps/chosen": -1.8465027809143066, "logps/rejected": -3.716306447982788, "loss": 0.6995, "nll_loss": 0.6708360314369202, "rewards/accuracies": 0.875, "rewards/chosen": -0.1846502721309662, "rewards/margins": 0.18698036670684814, "rewards/rejected": -0.37163063883781433, "step": 3421 }, { "epoch": 9.36892539356605, "grad_norm": 6.214445114135742, "learning_rate": 5.313698630136986e-07, "log_odds_chosen": 2.1073246002197266, "log_odds_ratio": -0.435370534658432, "logits/chosen": 0.8842161297798157, "logits/rejected": 0.8755768537521362, "logps/chosen": -1.9947211742401123, "logps/rejected": -3.8808062076568604, "loss": 0.8343, "nll_loss": 0.7907952070236206, "rewards/accuracies": 0.875, "rewards/chosen": -0.199472114443779, "rewards/margins": 0.18860852718353271, "rewards/rejected": -0.3880806565284729, "step": 3422 }, { "epoch": 9.371663244353183, "grad_norm": 5.11916971206665, "learning_rate": 5.312328767123288e-07, "log_odds_chosen": 2.149567127227783, "log_odds_ratio": -0.20623835921287537, "logits/chosen": 1.0173218250274658, "logits/rejected": 0.9606207609176636, "logps/chosen": -2.29469633102417, "logps/rejected": -4.317262172698975, "loss": 0.7654, "nll_loss": 0.7447424530982971, "rewards/accuracies": 1.0, "rewards/chosen": -0.2294696420431137, "rewards/margins": 0.20225657522678375, "rewards/rejected": -0.43172621726989746, "step": 3423 }, { "epoch": 9.374401095140314, "grad_norm": 4.795996189117432, "learning_rate": 5.310958904109589e-07, "log_odds_chosen": 1.890155553817749, "log_odds_ratio": -0.3191148340702057, "logits/chosen": 0.6885664463043213, "logits/rejected": 0.8055716156959534, "logps/chosen": -2.1032803058624268, "logps/rejected": -3.847698211669922, "loss": 0.6815, "nll_loss": 0.6495684385299683, "rewards/accuracies": 0.875, "rewards/chosen": -0.21032804250717163, "rewards/margins": 0.17444182932376862, "rewards/rejected": -0.38476985692977905, "step": 3424 }, { "epoch": 9.377138945927447, "grad_norm": 6.172145366668701, "learning_rate": 5.30958904109589e-07, "log_odds_chosen": 2.909785270690918, "log_odds_ratio": -0.4857828617095947, "logits/chosen": 0.9158015251159668, "logits/rejected": 0.9169922471046448, "logps/chosen": -2.5611560344696045, "logps/rejected": -5.352816581726074, "loss": 0.7547, "nll_loss": 0.7061704993247986, "rewards/accuracies": 0.75, "rewards/chosen": -0.256115585565567, "rewards/margins": 0.279166042804718, "rewards/rejected": -0.5352816581726074, "step": 3425 }, { "epoch": 9.37987679671458, "grad_norm": 5.136758804321289, "learning_rate": 5.308219178082192e-07, "log_odds_chosen": 3.542870283126831, "log_odds_ratio": -0.3136126697063446, "logits/chosen": 0.8249881863594055, "logits/rejected": 0.7998822927474976, "logps/chosen": -1.7936514616012573, "logps/rejected": -5.185824871063232, "loss": 0.8202, "nll_loss": 0.7888610363006592, "rewards/accuracies": 0.875, "rewards/chosen": -0.1793651282787323, "rewards/margins": 0.3392173647880554, "rewards/rejected": -0.5185825228691101, "step": 3426 }, { "epoch": 9.382614647501711, "grad_norm": 4.371438026428223, "learning_rate": 5.306849315068493e-07, "log_odds_chosen": 2.258331537246704, "log_odds_ratio": -0.19957683980464935, "logits/chosen": 0.9055998921394348, "logits/rejected": 0.9207469820976257, "logps/chosen": -1.9535506963729858, "logps/rejected": -4.089977264404297, "loss": 0.7438, "nll_loss": 0.723879873752594, "rewards/accuracies": 1.0, "rewards/chosen": -0.19535505771636963, "rewards/margins": 0.2136426419019699, "rewards/rejected": -0.40899771451950073, "step": 3427 }, { "epoch": 9.385352498288844, "grad_norm": 4.116179943084717, "learning_rate": 5.305479452054794e-07, "log_odds_chosen": 2.672837972640991, "log_odds_ratio": -0.20606841146945953, "logits/chosen": 0.7201423645019531, "logits/rejected": 0.7305502891540527, "logps/chosen": -2.3265225887298584, "logps/rejected": -4.90846061706543, "loss": 0.7398, "nll_loss": 0.7192294597625732, "rewards/accuracies": 1.0, "rewards/chosen": -0.23265226185321808, "rewards/margins": 0.2581937909126282, "rewards/rejected": -0.49084603786468506, "step": 3428 }, { "epoch": 9.388090349075975, "grad_norm": 5.289124488830566, "learning_rate": 5.304109589041096e-07, "log_odds_chosen": 1.6886451244354248, "log_odds_ratio": -0.34892594814300537, "logits/chosen": 0.6831138730049133, "logits/rejected": 0.701177716255188, "logps/chosen": -2.574553966522217, "logps/rejected": -4.184576988220215, "loss": 0.8001, "nll_loss": 0.7651581764221191, "rewards/accuracies": 0.875, "rewards/chosen": -0.25745540857315063, "rewards/margins": 0.16100230813026428, "rewards/rejected": -0.4184577167034149, "step": 3429 }, { "epoch": 9.390828199863108, "grad_norm": 4.881062030792236, "learning_rate": 5.302739726027396e-07, "log_odds_chosen": 1.8901450634002686, "log_odds_ratio": -0.17947261035442352, "logits/chosen": 0.8312675952911377, "logits/rejected": 0.8417860269546509, "logps/chosen": -2.188892364501953, "logps/rejected": -3.9707939624786377, "loss": 0.6587, "nll_loss": 0.6408016681671143, "rewards/accuracies": 1.0, "rewards/chosen": -0.2188892364501953, "rewards/margins": 0.17819015681743622, "rewards/rejected": -0.3970794081687927, "step": 3430 }, { "epoch": 9.39356605065024, "grad_norm": 5.769585132598877, "learning_rate": 5.301369863013698e-07, "log_odds_chosen": 3.1906604766845703, "log_odds_ratio": -0.3685532212257385, "logits/chosen": 1.0659387111663818, "logits/rejected": 1.0952582359313965, "logps/chosen": -2.4052634239196777, "logps/rejected": -5.469869136810303, "loss": 0.7343, "nll_loss": 0.6974568367004395, "rewards/accuracies": 0.875, "rewards/chosen": -0.24052634835243225, "rewards/margins": 0.30646055936813354, "rewards/rejected": -0.5469869375228882, "step": 3431 }, { "epoch": 9.396303901437372, "grad_norm": 6.323675632476807, "learning_rate": 5.3e-07, "log_odds_chosen": 1.089830994606018, "log_odds_ratio": -0.3687092661857605, "logits/chosen": 0.960051417350769, "logits/rejected": 0.9121454954147339, "logps/chosen": -2.6899564266204834, "logps/rejected": -3.6990649700164795, "loss": 0.9083, "nll_loss": 0.8714661598205566, "rewards/accuracies": 0.875, "rewards/chosen": -0.26899564266204834, "rewards/margins": 0.10091086477041245, "rewards/rejected": -0.3699065148830414, "step": 3432 }, { "epoch": 9.399041752224504, "grad_norm": 5.475799560546875, "learning_rate": 5.298630136986301e-07, "log_odds_chosen": 1.2158764600753784, "log_odds_ratio": -0.3779539465904236, "logits/chosen": 0.6979784965515137, "logits/rejected": 0.644808292388916, "logps/chosen": -2.624480724334717, "logps/rejected": -3.7984986305236816, "loss": 0.7333, "nll_loss": 0.69554203748703, "rewards/accuracies": 0.75, "rewards/chosen": -0.2624480724334717, "rewards/margins": 0.11740179359912872, "rewards/rejected": -0.3798498511314392, "step": 3433 }, { "epoch": 9.401779603011637, "grad_norm": 6.672074317932129, "learning_rate": 5.297260273972602e-07, "log_odds_chosen": 2.1468167304992676, "log_odds_ratio": -0.31801798939704895, "logits/chosen": 0.7420927286148071, "logits/rejected": 0.7600431442260742, "logps/chosen": -3.2085461616516113, "logps/rejected": -5.327935695648193, "loss": 0.8307, "nll_loss": 0.7988758683204651, "rewards/accuracies": 0.75, "rewards/chosen": -0.32085463404655457, "rewards/margins": 0.21193894743919373, "rewards/rejected": -0.5327935814857483, "step": 3434 }, { "epoch": 9.404517453798768, "grad_norm": 4.660399436950684, "learning_rate": 5.295890410958904e-07, "log_odds_chosen": 1.6832901239395142, "log_odds_ratio": -0.4507213830947876, "logits/chosen": 0.798591136932373, "logits/rejected": 0.7828981876373291, "logps/chosen": -1.8722928762435913, "logps/rejected": -3.4959590435028076, "loss": 0.6985, "nll_loss": 0.6533859968185425, "rewards/accuracies": 0.75, "rewards/chosen": -0.18722929060459137, "rewards/margins": 0.1623665988445282, "rewards/rejected": -0.34959590435028076, "step": 3435 }, { "epoch": 9.4072553045859, "grad_norm": 5.215271949768066, "learning_rate": 5.294520547945205e-07, "log_odds_chosen": 1.5289570093154907, "log_odds_ratio": -0.38976937532424927, "logits/chosen": 0.8282817602157593, "logits/rejected": 0.7572590708732605, "logps/chosen": -2.064148426055908, "logps/rejected": -3.4650654792785645, "loss": 0.8062, "nll_loss": 0.7671773433685303, "rewards/accuracies": 0.75, "rewards/chosen": -0.2064148485660553, "rewards/margins": 0.14009171724319458, "rewards/rejected": -0.3465065658092499, "step": 3436 }, { "epoch": 9.409993155373032, "grad_norm": 4.23435640335083, "learning_rate": 5.293150684931507e-07, "log_odds_chosen": 1.8164615631103516, "log_odds_ratio": -0.3191028833389282, "logits/chosen": 0.8226733207702637, "logits/rejected": 0.7823371291160583, "logps/chosen": -1.976889729499817, "logps/rejected": -3.6626930236816406, "loss": 0.7656, "nll_loss": 0.733686089515686, "rewards/accuracies": 0.875, "rewards/chosen": -0.1976889967918396, "rewards/margins": 0.1685803234577179, "rewards/rejected": -0.3662692904472351, "step": 3437 }, { "epoch": 9.412731006160165, "grad_norm": 4.171324729919434, "learning_rate": 5.291780821917808e-07, "log_odds_chosen": 1.993607759475708, "log_odds_ratio": -0.23758910596370697, "logits/chosen": 0.9769782423973083, "logits/rejected": 1.0609235763549805, "logps/chosen": -2.0853164196014404, "logps/rejected": -3.939793586730957, "loss": 0.6546, "nll_loss": 0.6308872699737549, "rewards/accuracies": 1.0, "rewards/chosen": -0.20853164792060852, "rewards/margins": 0.18544775247573853, "rewards/rejected": -0.39397940039634705, "step": 3438 }, { "epoch": 9.415468856947296, "grad_norm": 5.2247419357299805, "learning_rate": 5.290410958904109e-07, "log_odds_chosen": 2.251718044281006, "log_odds_ratio": -0.19658994674682617, "logits/chosen": 0.9326324462890625, "logits/rejected": 1.0514203310012817, "logps/chosen": -2.8902904987335205, "logps/rejected": -5.067888259887695, "loss": 0.6932, "nll_loss": 0.6735826730728149, "rewards/accuracies": 1.0, "rewards/chosen": -0.289029061794281, "rewards/margins": 0.21775975823402405, "rewards/rejected": -0.5067887902259827, "step": 3439 }, { "epoch": 9.418206707734429, "grad_norm": 6.111205101013184, "learning_rate": 5.289041095890411e-07, "log_odds_chosen": 3.3021938800811768, "log_odds_ratio": -0.08561161160469055, "logits/chosen": 0.997430682182312, "logits/rejected": 0.9466487169265747, "logps/chosen": -2.0927999019622803, "logps/rejected": -5.232644557952881, "loss": 0.7192, "nll_loss": 0.7106283903121948, "rewards/accuracies": 1.0, "rewards/chosen": -0.20927998423576355, "rewards/margins": 0.3139844834804535, "rewards/rejected": -0.523264467716217, "step": 3440 }, { "epoch": 9.42094455852156, "grad_norm": 5.106234550476074, "learning_rate": 5.287671232876712e-07, "log_odds_chosen": 2.3566040992736816, "log_odds_ratio": -0.44330325722694397, "logits/chosen": 0.9565470218658447, "logits/rejected": 0.9356436729431152, "logps/chosen": -2.435131072998047, "logps/rejected": -4.602277755737305, "loss": 0.9236, "nll_loss": 0.879278838634491, "rewards/accuracies": 0.75, "rewards/chosen": -0.24351313710212708, "rewards/margins": 0.21671468019485474, "rewards/rejected": -0.4602277874946594, "step": 3441 }, { "epoch": 9.423682409308693, "grad_norm": 5.214237689971924, "learning_rate": 5.286301369863013e-07, "log_odds_chosen": 0.9162346124649048, "log_odds_ratio": -0.44030123949050903, "logits/chosen": 0.9861552119255066, "logits/rejected": 0.9866049885749817, "logps/chosen": -1.6186699867248535, "logps/rejected": -2.4564507007598877, "loss": 0.7228, "nll_loss": 0.6787904500961304, "rewards/accuracies": 0.875, "rewards/chosen": -0.16186699271202087, "rewards/margins": 0.08377807587385178, "rewards/rejected": -0.24564506113529205, "step": 3442 }, { "epoch": 9.426420260095824, "grad_norm": 4.666694164276123, "learning_rate": 5.284931506849315e-07, "log_odds_chosen": 2.337433099746704, "log_odds_ratio": -0.3887858986854553, "logits/chosen": 0.9200230240821838, "logits/rejected": 0.933285653591156, "logps/chosen": -2.6998133659362793, "logps/rejected": -4.997888088226318, "loss": 0.8926, "nll_loss": 0.8536774516105652, "rewards/accuracies": 0.75, "rewards/chosen": -0.26998138427734375, "rewards/margins": 0.22980745136737823, "rewards/rejected": -0.4997888207435608, "step": 3443 }, { "epoch": 9.429158110882957, "grad_norm": 4.945228576660156, "learning_rate": 5.283561643835615e-07, "log_odds_chosen": 2.1348228454589844, "log_odds_ratio": -0.22256460785865784, "logits/chosen": 0.9641410112380981, "logits/rejected": 1.01082444190979, "logps/chosen": -2.485602855682373, "logps/rejected": -4.53994607925415, "loss": 0.6988, "nll_loss": 0.6765568256378174, "rewards/accuracies": 1.0, "rewards/chosen": -0.24856029450893402, "rewards/margins": 0.20543435215950012, "rewards/rejected": -0.45399466156959534, "step": 3444 }, { "epoch": 9.431895961670088, "grad_norm": 6.390544891357422, "learning_rate": 5.282191780821917e-07, "log_odds_chosen": 1.5569908618927002, "log_odds_ratio": -0.5342258810997009, "logits/chosen": 0.8998754024505615, "logits/rejected": 0.9486824870109558, "logps/chosen": -2.937009572982788, "logps/rejected": -4.406215667724609, "loss": 0.8059, "nll_loss": 0.7524592876434326, "rewards/accuracies": 0.625, "rewards/chosen": -0.2937009930610657, "rewards/margins": 0.1469205915927887, "rewards/rejected": -0.440621554851532, "step": 3445 }, { "epoch": 9.434633812457221, "grad_norm": 5.318302154541016, "learning_rate": 5.280821917808219e-07, "log_odds_chosen": 3.126258611679077, "log_odds_ratio": -0.1232147067785263, "logits/chosen": 0.8847362995147705, "logits/rejected": 0.9075598120689392, "logps/chosen": -2.0644450187683105, "logps/rejected": -5.047401428222656, "loss": 0.6663, "nll_loss": 0.6539599895477295, "rewards/accuracies": 1.0, "rewards/chosen": -0.20644447207450867, "rewards/margins": 0.2982957065105438, "rewards/rejected": -0.5047402381896973, "step": 3446 }, { "epoch": 9.437371663244353, "grad_norm": 5.560510635375977, "learning_rate": 5.27945205479452e-07, "log_odds_chosen": 1.7156708240509033, "log_odds_ratio": -0.3544694483280182, "logits/chosen": 0.957077145576477, "logits/rejected": 1.0404903888702393, "logps/chosen": -3.028367042541504, "logps/rejected": -4.688362121582031, "loss": 0.8974, "nll_loss": 0.8620019555091858, "rewards/accuracies": 0.875, "rewards/chosen": -0.30283671617507935, "rewards/margins": 0.16599950194358826, "rewards/rejected": -0.46883624792099, "step": 3447 }, { "epoch": 9.440109514031485, "grad_norm": 4.512141704559326, "learning_rate": 5.278082191780821e-07, "log_odds_chosen": 1.3763459920883179, "log_odds_ratio": -0.35793787240982056, "logits/chosen": 0.8153256773948669, "logits/rejected": 0.9055935740470886, "logps/chosen": -2.4863483905792236, "logps/rejected": -3.782585620880127, "loss": 0.7551, "nll_loss": 0.7192636728286743, "rewards/accuracies": 0.875, "rewards/chosen": -0.24863484501838684, "rewards/margins": 0.12962371110916138, "rewards/rejected": -0.3782585859298706, "step": 3448 }, { "epoch": 9.442847364818617, "grad_norm": 5.931178092956543, "learning_rate": 5.276712328767123e-07, "log_odds_chosen": 1.3983134031295776, "log_odds_ratio": -0.3372598886489868, "logits/chosen": 0.9114683866500854, "logits/rejected": 0.9281665086746216, "logps/chosen": -2.635519027709961, "logps/rejected": -3.9100723266601562, "loss": 0.643, "nll_loss": 0.6092885732650757, "rewards/accuracies": 0.875, "rewards/chosen": -0.26355189085006714, "rewards/margins": 0.12745533883571625, "rewards/rejected": -0.3910072445869446, "step": 3449 }, { "epoch": 9.44558521560575, "grad_norm": 5.837503910064697, "learning_rate": 5.275342465753424e-07, "log_odds_chosen": 1.7622463703155518, "log_odds_ratio": -0.38942623138427734, "logits/chosen": 0.8116485476493835, "logits/rejected": 0.7821024060249329, "logps/chosen": -2.7575020790100098, "logps/rejected": -4.457335472106934, "loss": 0.7071, "nll_loss": 0.6681234836578369, "rewards/accuracies": 0.625, "rewards/chosen": -0.27575021982192993, "rewards/margins": 0.16998332738876343, "rewards/rejected": -0.44573354721069336, "step": 3450 }, { "epoch": 9.44832306639288, "grad_norm": 4.396919250488281, "learning_rate": 5.273972602739725e-07, "log_odds_chosen": 2.188420295715332, "log_odds_ratio": -0.19921545684337616, "logits/chosen": 1.0020084381103516, "logits/rejected": 1.074906587600708, "logps/chosen": -1.7605133056640625, "logps/rejected": -3.774388313293457, "loss": 0.6944, "nll_loss": 0.6745111346244812, "rewards/accuracies": 1.0, "rewards/chosen": -0.17605134844779968, "rewards/margins": 0.20138752460479736, "rewards/rejected": -0.37743884325027466, "step": 3451 }, { "epoch": 9.451060917180014, "grad_norm": 6.111605167388916, "learning_rate": 5.272602739726027e-07, "log_odds_chosen": 1.1498087644577026, "log_odds_ratio": -0.4802675247192383, "logits/chosen": 0.927651047706604, "logits/rejected": 0.9740021228790283, "logps/chosen": -2.809528112411499, "logps/rejected": -3.889585494995117, "loss": 0.7721, "nll_loss": 0.7240769863128662, "rewards/accuracies": 0.75, "rewards/chosen": -0.2809528112411499, "rewards/margins": 0.10800573229789734, "rewards/rejected": -0.38895854353904724, "step": 3452 }, { "epoch": 9.453798767967147, "grad_norm": 5.2797417640686035, "learning_rate": 5.271232876712328e-07, "log_odds_chosen": 3.071934223175049, "log_odds_ratio": -0.17363139986991882, "logits/chosen": 0.9290338158607483, "logits/rejected": 0.9634199738502502, "logps/chosen": -2.091487169265747, "logps/rejected": -5.020834445953369, "loss": 0.6785, "nll_loss": 0.6611617803573608, "rewards/accuracies": 1.0, "rewards/chosen": -0.20914873480796814, "rewards/margins": 0.29293468594551086, "rewards/rejected": -0.5020834803581238, "step": 3453 }, { "epoch": 9.456536618754278, "grad_norm": 4.798487663269043, "learning_rate": 5.26986301369863e-07, "log_odds_chosen": 1.4695041179656982, "log_odds_ratio": -0.27761387825012207, "logits/chosen": 0.7260381579399109, "logits/rejected": 0.7338478565216064, "logps/chosen": -2.513113021850586, "logps/rejected": -3.897695302963257, "loss": 0.7031, "nll_loss": 0.6753380298614502, "rewards/accuracies": 1.0, "rewards/chosen": -0.2513113021850586, "rewards/margins": 0.1384582370519638, "rewards/rejected": -0.3897695541381836, "step": 3454 }, { "epoch": 9.45927446954141, "grad_norm": 5.5862507820129395, "learning_rate": 5.268493150684931e-07, "log_odds_chosen": 1.739865779876709, "log_odds_ratio": -0.3787710666656494, "logits/chosen": 1.0350881814956665, "logits/rejected": 1.0227571725845337, "logps/chosen": -2.4266724586486816, "logps/rejected": -3.9982409477233887, "loss": 0.842, "nll_loss": 0.8040859699249268, "rewards/accuracies": 0.875, "rewards/chosen": -0.24266722798347473, "rewards/margins": 0.15715688467025757, "rewards/rejected": -0.3998241424560547, "step": 3455 }, { "epoch": 9.462012320328542, "grad_norm": 6.874109745025635, "learning_rate": 5.267123287671232e-07, "log_odds_chosen": 1.637546420097351, "log_odds_ratio": -0.3868807852268219, "logits/chosen": 0.807700514793396, "logits/rejected": 0.7415663003921509, "logps/chosen": -3.0505921840667725, "logps/rejected": -4.613410949707031, "loss": 0.8553, "nll_loss": 0.8165923357009888, "rewards/accuracies": 0.75, "rewards/chosen": -0.3050592541694641, "rewards/margins": 0.15628185868263245, "rewards/rejected": -0.46134111285209656, "step": 3456 }, { "epoch": 9.464750171115675, "grad_norm": 5.120408535003662, "learning_rate": 5.265753424657534e-07, "log_odds_chosen": 1.0278832912445068, "log_odds_ratio": -0.5107350945472717, "logits/chosen": 0.7967084646224976, "logits/rejected": 0.8422363996505737, "logps/chosen": -2.8012146949768066, "logps/rejected": -3.7807464599609375, "loss": 0.7361, "nll_loss": 0.6850476264953613, "rewards/accuracies": 0.625, "rewards/chosen": -0.28012144565582275, "rewards/margins": 0.09795314818620682, "rewards/rejected": -0.37807461619377136, "step": 3457 }, { "epoch": 9.467488021902806, "grad_norm": 4.282637119293213, "learning_rate": 5.264383561643835e-07, "log_odds_chosen": 3.419466972351074, "log_odds_ratio": -0.1125054582953453, "logits/chosen": 0.9671617150306702, "logits/rejected": 0.981102466583252, "logps/chosen": -2.4336097240448, "logps/rejected": -5.730315208435059, "loss": 0.7141, "nll_loss": 0.7028955221176147, "rewards/accuracies": 1.0, "rewards/chosen": -0.2433609664440155, "rewards/margins": 0.32967057824134827, "rewards/rejected": -0.5730315446853638, "step": 3458 }, { "epoch": 9.470225872689939, "grad_norm": 4.485628128051758, "learning_rate": 5.263013698630136e-07, "log_odds_chosen": 1.6066025495529175, "log_odds_ratio": -0.2775847315788269, "logits/chosen": 0.8635748624801636, "logits/rejected": 0.9557055234909058, "logps/chosen": -1.8189802169799805, "logps/rejected": -3.272498369216919, "loss": 0.6664, "nll_loss": 0.6386720538139343, "rewards/accuracies": 1.0, "rewards/chosen": -0.18189802765846252, "rewards/margins": 0.1453518271446228, "rewards/rejected": -0.32724982500076294, "step": 3459 }, { "epoch": 9.47296372347707, "grad_norm": 4.850885391235352, "learning_rate": 5.261643835616438e-07, "log_odds_chosen": 1.80539870262146, "log_odds_ratio": -0.46294912695884705, "logits/chosen": 1.0664118528366089, "logits/rejected": 1.0097012519836426, "logps/chosen": -2.550389528274536, "logps/rejected": -4.247224807739258, "loss": 0.7345, "nll_loss": 0.6882451176643372, "rewards/accuracies": 0.75, "rewards/chosen": -0.25503894686698914, "rewards/margins": 0.1696835458278656, "rewards/rejected": -0.42472249269485474, "step": 3460 }, { "epoch": 9.475701574264203, "grad_norm": 5.148194789886475, "learning_rate": 5.260273972602739e-07, "log_odds_chosen": 2.2901124954223633, "log_odds_ratio": -0.30259251594543457, "logits/chosen": 0.9709806442260742, "logits/rejected": 0.9405724406242371, "logps/chosen": -1.911014199256897, "logps/rejected": -4.089282035827637, "loss": 0.805, "nll_loss": 0.7747085690498352, "rewards/accuracies": 0.875, "rewards/chosen": -0.19110141694545746, "rewards/margins": 0.21782678365707397, "rewards/rejected": -0.4089282155036926, "step": 3461 }, { "epoch": 9.478439425051334, "grad_norm": 7.5341973304748535, "learning_rate": 5.25890410958904e-07, "log_odds_chosen": 0.8382704257965088, "log_odds_ratio": -0.9287125468254089, "logits/chosen": 0.9127683043479919, "logits/rejected": 0.8686975836753845, "logps/chosen": -2.920699119567871, "logps/rejected": -3.7564072608947754, "loss": 0.8059, "nll_loss": 0.7130247354507446, "rewards/accuracies": 0.375, "rewards/chosen": -0.2920699119567871, "rewards/margins": 0.08357081562280655, "rewards/rejected": -0.37564072012901306, "step": 3462 }, { "epoch": 9.481177275838467, "grad_norm": 4.685532569885254, "learning_rate": 5.257534246575342e-07, "log_odds_chosen": 2.043084144592285, "log_odds_ratio": -0.24015001952648163, "logits/chosen": 0.8021166324615479, "logits/rejected": 0.8316946029663086, "logps/chosen": -2.921950340270996, "logps/rejected": -4.910817623138428, "loss": 0.8168, "nll_loss": 0.7927855849266052, "rewards/accuracies": 1.0, "rewards/chosen": -0.29219502210617065, "rewards/margins": 0.19888673722743988, "rewards/rejected": -0.49108177423477173, "step": 3463 }, { "epoch": 9.483915126625599, "grad_norm": 4.16222620010376, "learning_rate": 5.256164383561643e-07, "log_odds_chosen": 2.2027854919433594, "log_odds_ratio": -0.22723200917243958, "logits/chosen": 0.9196965098381042, "logits/rejected": 0.9901915192604065, "logps/chosen": -2.314328670501709, "logps/rejected": -4.429335117340088, "loss": 0.7623, "nll_loss": 0.7395669221878052, "rewards/accuracies": 1.0, "rewards/chosen": -0.23143287003040314, "rewards/margins": 0.21150065958499908, "rewards/rejected": -0.44293349981307983, "step": 3464 }, { "epoch": 9.486652977412732, "grad_norm": 5.923300743103027, "learning_rate": 5.254794520547944e-07, "log_odds_chosen": 1.7666734457015991, "log_odds_ratio": -0.3396078050136566, "logits/chosen": 0.8666081428527832, "logits/rejected": 0.868371307849884, "logps/chosen": -2.194450855255127, "logps/rejected": -3.8684699535369873, "loss": 0.8722, "nll_loss": 0.8382529020309448, "rewards/accuracies": 1.0, "rewards/chosen": -0.2194450944662094, "rewards/margins": 0.16740188002586365, "rewards/rejected": -0.38684698939323425, "step": 3465 }, { "epoch": 9.489390828199863, "grad_norm": 6.770915985107422, "learning_rate": 5.253424657534246e-07, "log_odds_chosen": 1.5320202112197876, "log_odds_ratio": -0.49022242426872253, "logits/chosen": 0.7726110219955444, "logits/rejected": 0.7676114439964294, "logps/chosen": -2.861388683319092, "logps/rejected": -4.325167179107666, "loss": 0.8727, "nll_loss": 0.8236755132675171, "rewards/accuracies": 0.875, "rewards/chosen": -0.2861388921737671, "rewards/margins": 0.146377831697464, "rewards/rejected": -0.43251675367355347, "step": 3466 }, { "epoch": 9.492128678986996, "grad_norm": 4.827380180358887, "learning_rate": 5.252054794520547e-07, "log_odds_chosen": 2.9189789295196533, "log_odds_ratio": -0.2930454611778259, "logits/chosen": 0.9202786684036255, "logits/rejected": 0.8871127963066101, "logps/chosen": -1.8556604385375977, "logps/rejected": -4.683015823364258, "loss": 0.7058, "nll_loss": 0.6764687895774841, "rewards/accuracies": 0.875, "rewards/chosen": -0.1855660378932953, "rewards/margins": 0.2827354967594147, "rewards/rejected": -0.46830153465270996, "step": 3467 }, { "epoch": 9.494866529774127, "grad_norm": 4.982840061187744, "learning_rate": 5.25068493150685e-07, "log_odds_chosen": 1.0300676822662354, "log_odds_ratio": -0.37471088767051697, "logits/chosen": 0.7450509071350098, "logits/rejected": 0.715965211391449, "logps/chosen": -1.9276237487792969, "logps/rejected": -2.8617091178894043, "loss": 0.7744, "nll_loss": 0.7369219660758972, "rewards/accuracies": 1.0, "rewards/chosen": -0.1927623748779297, "rewards/margins": 0.09340853989124298, "rewards/rejected": -0.2861708998680115, "step": 3468 }, { "epoch": 9.49760438056126, "grad_norm": 5.2585649490356445, "learning_rate": 5.24931506849315e-07, "log_odds_chosen": 2.272148609161377, "log_odds_ratio": -0.3177357316017151, "logits/chosen": 0.9682770371437073, "logits/rejected": 0.9908862113952637, "logps/chosen": -2.2345526218414307, "logps/rejected": -4.45916748046875, "loss": 0.7689, "nll_loss": 0.7371219396591187, "rewards/accuracies": 0.875, "rewards/chosen": -0.22345523536205292, "rewards/margins": 0.22246153652668, "rewards/rejected": -0.4459167718887329, "step": 3469 }, { "epoch": 9.500342231348391, "grad_norm": 4.971652507781982, "learning_rate": 5.247945205479452e-07, "log_odds_chosen": 1.7177939414978027, "log_odds_ratio": -0.4010750651359558, "logits/chosen": 0.6955347657203674, "logits/rejected": 0.7505967617034912, "logps/chosen": -2.291008710861206, "logps/rejected": -3.9286704063415527, "loss": 0.746, "nll_loss": 0.7059184312820435, "rewards/accuracies": 0.875, "rewards/chosen": -0.22910085320472717, "rewards/margins": 0.16376616060733795, "rewards/rejected": -0.3928670287132263, "step": 3470 }, { "epoch": 9.503080082135524, "grad_norm": 4.415489196777344, "learning_rate": 5.246575342465754e-07, "log_odds_chosen": 1.8621770143508911, "log_odds_ratio": -0.2495894730091095, "logits/chosen": 0.9415512084960938, "logits/rejected": 0.9701469540596008, "logps/chosen": -2.402111530303955, "logps/rejected": -4.205892086029053, "loss": 0.7521, "nll_loss": 0.7271508574485779, "rewards/accuracies": 0.875, "rewards/chosen": -0.24021115899085999, "rewards/margins": 0.1803780496120453, "rewards/rejected": -0.4205892086029053, "step": 3471 }, { "epoch": 9.505817932922655, "grad_norm": 4.740771293640137, "learning_rate": 5.245205479452055e-07, "log_odds_chosen": 2.2575531005859375, "log_odds_ratio": -0.267984539270401, "logits/chosen": 0.8174583315849304, "logits/rejected": 0.8736649751663208, "logps/chosen": -2.740344285964966, "logps/rejected": -4.901549339294434, "loss": 0.804, "nll_loss": 0.7772221565246582, "rewards/accuracies": 1.0, "rewards/chosen": -0.27403444051742554, "rewards/margins": 0.21612048149108887, "rewards/rejected": -0.4901549518108368, "step": 3472 }, { "epoch": 9.508555783709788, "grad_norm": 5.128040790557861, "learning_rate": 5.243835616438356e-07, "log_odds_chosen": 2.7697367668151855, "log_odds_ratio": -0.17698392271995544, "logits/chosen": 0.9744228720664978, "logits/rejected": 1.0244852304458618, "logps/chosen": -2.4237186908721924, "logps/rejected": -5.08476448059082, "loss": 0.8427, "nll_loss": 0.8250012397766113, "rewards/accuracies": 1.0, "rewards/chosen": -0.24237188696861267, "rewards/margins": 0.2661045789718628, "rewards/rejected": -0.5084764957427979, "step": 3473 }, { "epoch": 9.51129363449692, "grad_norm": 6.779922008514404, "learning_rate": 5.242465753424658e-07, "log_odds_chosen": 1.302207112312317, "log_odds_ratio": -0.3792920708656311, "logits/chosen": 0.953693687915802, "logits/rejected": 0.9695872068405151, "logps/chosen": -2.6805367469787598, "logps/rejected": -3.8220345973968506, "loss": 0.742, "nll_loss": 0.7040914297103882, "rewards/accuracies": 0.875, "rewards/chosen": -0.26805368065834045, "rewards/margins": 0.1141498014330864, "rewards/rejected": -0.38220348954200745, "step": 3474 }, { "epoch": 9.514031485284052, "grad_norm": 4.3550028800964355, "learning_rate": 5.241095890410959e-07, "log_odds_chosen": 3.0764098167419434, "log_odds_ratio": -0.15631894767284393, "logits/chosen": 0.6258653998374939, "logits/rejected": 0.6069812178611755, "logps/chosen": -1.8060152530670166, "logps/rejected": -4.718739032745361, "loss": 0.7276, "nll_loss": 0.7119771242141724, "rewards/accuracies": 1.0, "rewards/chosen": -0.18060152232646942, "rewards/margins": 0.29127237200737, "rewards/rejected": -0.4718739092350006, "step": 3475 }, { "epoch": 9.516769336071183, "grad_norm": 7.806344985961914, "learning_rate": 5.23972602739726e-07, "log_odds_chosen": 1.500152349472046, "log_odds_ratio": -0.36521217226982117, "logits/chosen": 0.5936338901519775, "logits/rejected": 0.505813717842102, "logps/chosen": -2.363088607788086, "logps/rejected": -3.6159636974334717, "loss": 0.801, "nll_loss": 0.7644591331481934, "rewards/accuracies": 0.875, "rewards/chosen": -0.23630887269973755, "rewards/margins": 0.1252875179052353, "rewards/rejected": -0.36159637570381165, "step": 3476 }, { "epoch": 9.519507186858316, "grad_norm": 5.337678909301758, "learning_rate": 5.238356164383562e-07, "log_odds_chosen": 2.30553936958313, "log_odds_ratio": -0.2791096866130829, "logits/chosen": 0.8942021131515503, "logits/rejected": 0.8909478187561035, "logps/chosen": -1.913936972618103, "logps/rejected": -4.0690178871154785, "loss": 0.6504, "nll_loss": 0.6224932074546814, "rewards/accuracies": 0.875, "rewards/chosen": -0.1913936883211136, "rewards/margins": 0.2155081033706665, "rewards/rejected": -0.4069017767906189, "step": 3477 }, { "epoch": 9.522245037645447, "grad_norm": 5.610090732574463, "learning_rate": 5.236986301369863e-07, "log_odds_chosen": 1.3141149282455444, "log_odds_ratio": -0.37412816286087036, "logits/chosen": 1.1453644037246704, "logits/rejected": 1.1859194040298462, "logps/chosen": -2.4020473957061768, "logps/rejected": -3.6094768047332764, "loss": 0.6497, "nll_loss": 0.6122689843177795, "rewards/accuracies": 0.75, "rewards/chosen": -0.24020472168922424, "rewards/margins": 0.12074295431375504, "rewards/rejected": -0.3609476685523987, "step": 3478 }, { "epoch": 9.52498288843258, "grad_norm": 5.426120758056641, "learning_rate": 5.235616438356164e-07, "log_odds_chosen": 1.434207797050476, "log_odds_ratio": -0.3318074345588684, "logits/chosen": 0.9968831539154053, "logits/rejected": 0.8316285610198975, "logps/chosen": -2.839656352996826, "logps/rejected": -4.1686224937438965, "loss": 0.823, "nll_loss": 0.7898470163345337, "rewards/accuracies": 0.875, "rewards/chosen": -0.2839656472206116, "rewards/margins": 0.1328965723514557, "rewards/rejected": -0.41686224937438965, "step": 3479 }, { "epoch": 9.527720739219713, "grad_norm": 5.492198467254639, "learning_rate": 5.234246575342466e-07, "log_odds_chosen": 1.0086643695831299, "log_odds_ratio": -0.47184935212135315, "logits/chosen": 0.9224395751953125, "logits/rejected": 0.8397105932235718, "logps/chosen": -2.2068915367126465, "logps/rejected": -3.177907943725586, "loss": 0.8198, "nll_loss": 0.7726300954818726, "rewards/accuracies": 0.875, "rewards/chosen": -0.22068914771080017, "rewards/margins": 0.0971016138792038, "rewards/rejected": -0.31779077649116516, "step": 3480 }, { "epoch": 9.530458590006845, "grad_norm": 6.789530277252197, "learning_rate": 5.232876712328767e-07, "log_odds_chosen": 1.5880950689315796, "log_odds_ratio": -0.5684118270874023, "logits/chosen": 1.0604844093322754, "logits/rejected": 1.1240969896316528, "logps/chosen": -3.1213276386260986, "logps/rejected": -4.680820941925049, "loss": 0.7622, "nll_loss": 0.705363929271698, "rewards/accuracies": 0.875, "rewards/chosen": -0.31213274598121643, "rewards/margins": 0.15594936907291412, "rewards/rejected": -0.46808212995529175, "step": 3481 }, { "epoch": 9.533196440793978, "grad_norm": 6.399606704711914, "learning_rate": 5.231506849315069e-07, "log_odds_chosen": 2.5354883670806885, "log_odds_ratio": -0.452031672000885, "logits/chosen": 1.058868408203125, "logits/rejected": 1.0463697910308838, "logps/chosen": -2.87042498588562, "logps/rejected": -5.347599983215332, "loss": 0.8649, "nll_loss": 0.8196587562561035, "rewards/accuracies": 0.875, "rewards/chosen": -0.287042498588562, "rewards/margins": 0.24771754443645477, "rewards/rejected": -0.534760057926178, "step": 3482 }, { "epoch": 9.535934291581109, "grad_norm": 5.0398149490356445, "learning_rate": 5.23013698630137e-07, "log_odds_chosen": 2.0611412525177, "log_odds_ratio": -0.24007537961006165, "logits/chosen": 0.8746659755706787, "logits/rejected": 0.9081951379776001, "logps/chosen": -2.7741966247558594, "logps/rejected": -4.7459025382995605, "loss": 0.7437, "nll_loss": 0.7196590900421143, "rewards/accuracies": 1.0, "rewards/chosen": -0.27741968631744385, "rewards/margins": 0.19717060029506683, "rewards/rejected": -0.4745902717113495, "step": 3483 }, { "epoch": 9.538672142368242, "grad_norm": 4.6884236335754395, "learning_rate": 5.228767123287671e-07, "log_odds_chosen": 1.89680016040802, "log_odds_ratio": -0.21074654161930084, "logits/chosen": 0.7199889421463013, "logits/rejected": 0.7179058790206909, "logps/chosen": -1.8600356578826904, "logps/rejected": -3.6254632472991943, "loss": 0.7793, "nll_loss": 0.758240818977356, "rewards/accuracies": 1.0, "rewards/chosen": -0.18600356578826904, "rewards/margins": 0.1765427589416504, "rewards/rejected": -0.36254632472991943, "step": 3484 }, { "epoch": 9.541409993155373, "grad_norm": 4.613626480102539, "learning_rate": 5.227397260273973e-07, "log_odds_chosen": 1.7109445333480835, "log_odds_ratio": -0.32668742537498474, "logits/chosen": 0.6377754211425781, "logits/rejected": 0.6714699864387512, "logps/chosen": -1.8048968315124512, "logps/rejected": -3.298185348510742, "loss": 0.7769, "nll_loss": 0.7442134022712708, "rewards/accuracies": 0.875, "rewards/chosen": -0.1804896891117096, "rewards/margins": 0.14932884275913239, "rewards/rejected": -0.3298185169696808, "step": 3485 }, { "epoch": 9.544147843942506, "grad_norm": 4.681380271911621, "learning_rate": 5.226027397260274e-07, "log_odds_chosen": 3.698615550994873, "log_odds_ratio": -0.14906686544418335, "logits/chosen": 1.0545085668563843, "logits/rejected": 1.0986995697021484, "logps/chosen": -2.3060543537139893, "logps/rejected": -5.885727882385254, "loss": 0.7157, "nll_loss": 0.7008310556411743, "rewards/accuracies": 1.0, "rewards/chosen": -0.23060542345046997, "rewards/margins": 0.3579673767089844, "rewards/rejected": -0.5885728597640991, "step": 3486 }, { "epoch": 9.546885694729637, "grad_norm": 5.485394477844238, "learning_rate": 5.224657534246575e-07, "log_odds_chosen": 1.5185192823410034, "log_odds_ratio": -0.28427648544311523, "logits/chosen": 0.8727425336837769, "logits/rejected": 0.8064011335372925, "logps/chosen": -2.4685516357421875, "logps/rejected": -3.804936647415161, "loss": 0.7471, "nll_loss": 0.7186646461486816, "rewards/accuracies": 1.0, "rewards/chosen": -0.24685516953468323, "rewards/margins": 0.13363847136497498, "rewards/rejected": -0.3804936408996582, "step": 3487 }, { "epoch": 9.54962354551677, "grad_norm": 5.867303848266602, "learning_rate": 5.223287671232877e-07, "log_odds_chosen": 1.9209771156311035, "log_odds_ratio": -0.3133140206336975, "logits/chosen": 0.8063886165618896, "logits/rejected": 0.7632372379302979, "logps/chosen": -3.1464881896972656, "logps/rejected": -5.006181716918945, "loss": 0.8187, "nll_loss": 0.7874141335487366, "rewards/accuracies": 0.875, "rewards/chosen": -0.3146488070487976, "rewards/margins": 0.18596939742565155, "rewards/rejected": -0.5006182193756104, "step": 3488 }, { "epoch": 9.552361396303901, "grad_norm": 5.34793758392334, "learning_rate": 5.221917808219179e-07, "log_odds_chosen": 1.2662909030914307, "log_odds_ratio": -0.4021461308002472, "logits/chosen": 0.8579677939414978, "logits/rejected": 0.756947934627533, "logps/chosen": -1.7388124465942383, "logps/rejected": -2.8936586380004883, "loss": 0.7884, "nll_loss": 0.7482104897499084, "rewards/accuracies": 0.75, "rewards/chosen": -0.17388123273849487, "rewards/margins": 0.11548464000225067, "rewards/rejected": -0.28936588764190674, "step": 3489 }, { "epoch": 9.555099247091034, "grad_norm": 5.109663009643555, "learning_rate": 5.220547945205479e-07, "log_odds_chosen": 1.4999290704727173, "log_odds_ratio": -0.34303051233291626, "logits/chosen": 0.6819038391113281, "logits/rejected": 0.674494743347168, "logps/chosen": -1.8742485046386719, "logps/rejected": -3.270895004272461, "loss": 0.7413, "nll_loss": 0.7070355415344238, "rewards/accuracies": 0.875, "rewards/chosen": -0.18742486834526062, "rewards/margins": 0.13966462016105652, "rewards/rejected": -0.3270895183086395, "step": 3490 }, { "epoch": 9.557837097878165, "grad_norm": 4.501601696014404, "learning_rate": 5.219178082191781e-07, "log_odds_chosen": 3.1656813621520996, "log_odds_ratio": -0.13584411144256592, "logits/chosen": 0.896636426448822, "logits/rejected": 0.9133689403533936, "logps/chosen": -2.2787563800811768, "logps/rejected": -5.33635950088501, "loss": 0.7654, "nll_loss": 0.7517923712730408, "rewards/accuracies": 1.0, "rewards/chosen": -0.22787563502788544, "rewards/margins": 0.30576032400131226, "rewards/rejected": -0.5336359143257141, "step": 3491 }, { "epoch": 9.560574948665298, "grad_norm": 6.112136363983154, "learning_rate": 5.217808219178082e-07, "log_odds_chosen": 2.7543411254882812, "log_odds_ratio": -0.46570324897766113, "logits/chosen": 0.916408360004425, "logits/rejected": 0.973797619342804, "logps/chosen": -2.9029788970947266, "logps/rejected": -5.611873149871826, "loss": 0.8554, "nll_loss": 0.8087973594665527, "rewards/accuracies": 0.875, "rewards/chosen": -0.29029789566993713, "rewards/margins": 0.2708894908428192, "rewards/rejected": -0.5611873865127563, "step": 3492 }, { "epoch": 9.56331279945243, "grad_norm": 4.373359680175781, "learning_rate": 5.216438356164383e-07, "log_odds_chosen": 1.4138126373291016, "log_odds_ratio": -0.4829186797142029, "logits/chosen": 0.7496852278709412, "logits/rejected": 0.7459607124328613, "logps/chosen": -2.3052968978881836, "logps/rejected": -3.677206516265869, "loss": 0.8263, "nll_loss": 0.7780512571334839, "rewards/accuracies": 0.75, "rewards/chosen": -0.23052969574928284, "rewards/margins": 0.13719096779823303, "rewards/rejected": -0.36772066354751587, "step": 3493 }, { "epoch": 9.566050650239562, "grad_norm": 4.6739301681518555, "learning_rate": 5.215068493150685e-07, "log_odds_chosen": 2.0878844261169434, "log_odds_ratio": -0.2270776331424713, "logits/chosen": 0.8802773356437683, "logits/rejected": 0.8869958519935608, "logps/chosen": -2.751422882080078, "logps/rejected": -4.756961822509766, "loss": 0.7506, "nll_loss": 0.7278981804847717, "rewards/accuracies": 1.0, "rewards/chosen": -0.27514225244522095, "rewards/margins": 0.20055395364761353, "rewards/rejected": -0.47569623589515686, "step": 3494 }, { "epoch": 9.568788501026694, "grad_norm": 4.2183332443237305, "learning_rate": 5.213698630136986e-07, "log_odds_chosen": 1.989027976989746, "log_odds_ratio": -0.24671900272369385, "logits/chosen": 0.8990253806114197, "logits/rejected": 0.894956648349762, "logps/chosen": -2.009673833847046, "logps/rejected": -3.8410449028015137, "loss": 0.699, "nll_loss": 0.6742824912071228, "rewards/accuracies": 1.0, "rewards/chosen": -0.20096737146377563, "rewards/margins": 0.18313711881637573, "rewards/rejected": -0.38410449028015137, "step": 3495 }, { "epoch": 9.571526351813826, "grad_norm": 5.143420696258545, "learning_rate": 5.212328767123288e-07, "log_odds_chosen": 1.4110997915267944, "log_odds_ratio": -0.3636782765388489, "logits/chosen": 0.6742969751358032, "logits/rejected": 0.7493690252304077, "logps/chosen": -2.694295883178711, "logps/rejected": -4.035398483276367, "loss": 0.8689, "nll_loss": 0.8324906229972839, "rewards/accuracies": 0.875, "rewards/chosen": -0.26942959427833557, "rewards/margins": 0.13411028683185577, "rewards/rejected": -0.40353986620903015, "step": 3496 }, { "epoch": 9.574264202600958, "grad_norm": 4.688100814819336, "learning_rate": 5.210958904109589e-07, "log_odds_chosen": 2.382416248321533, "log_odds_ratio": -0.22775551676750183, "logits/chosen": 0.6513434648513794, "logits/rejected": 0.5795213580131531, "logps/chosen": -1.6649155616760254, "logps/rejected": -3.8871006965637207, "loss": 0.6901, "nll_loss": 0.6673205494880676, "rewards/accuracies": 1.0, "rewards/chosen": -0.1664915680885315, "rewards/margins": 0.22221851348876953, "rewards/rejected": -0.388710081577301, "step": 3497 }, { "epoch": 9.57700205338809, "grad_norm": 6.922590732574463, "learning_rate": 5.20958904109589e-07, "log_odds_chosen": 0.7872674465179443, "log_odds_ratio": -0.4685986042022705, "logits/chosen": 0.7379109859466553, "logits/rejected": 0.6662155985832214, "logps/chosen": -2.6029903888702393, "logps/rejected": -3.3246665000915527, "loss": 0.8297, "nll_loss": 0.7828266620635986, "rewards/accuracies": 0.75, "rewards/chosen": -0.26029905676841736, "rewards/margins": 0.07216762006282806, "rewards/rejected": -0.33246666193008423, "step": 3498 }, { "epoch": 9.579739904175222, "grad_norm": 4.933022499084473, "learning_rate": 5.208219178082192e-07, "log_odds_chosen": 2.840277671813965, "log_odds_ratio": -0.21944186091423035, "logits/chosen": 1.1614569425582886, "logits/rejected": 1.235201120376587, "logps/chosen": -2.260317325592041, "logps/rejected": -4.980251789093018, "loss": 0.7887, "nll_loss": 0.7667926549911499, "rewards/accuracies": 0.875, "rewards/chosen": -0.22603173553943634, "rewards/margins": 0.2719934582710266, "rewards/rejected": -0.49802517890930176, "step": 3499 }, { "epoch": 9.582477754962355, "grad_norm": 4.159668922424316, "learning_rate": 5.206849315068493e-07, "log_odds_chosen": 2.852377414703369, "log_odds_ratio": -0.2069977968931198, "logits/chosen": 0.7857925295829773, "logits/rejected": 0.8411864042282104, "logps/chosen": -1.6638652086257935, "logps/rejected": -4.3229079246521, "loss": 0.7198, "nll_loss": 0.6991445422172546, "rewards/accuracies": 1.0, "rewards/chosen": -0.16638652980327606, "rewards/margins": 0.2659042477607727, "rewards/rejected": -0.43229079246520996, "step": 3500 }, { "epoch": 9.585215605749486, "grad_norm": 4.3429765701293945, "learning_rate": 5.205479452054794e-07, "log_odds_chosen": 2.0494306087493896, "log_odds_ratio": -0.25643807649612427, "logits/chosen": 0.7704790234565735, "logits/rejected": 0.7075839042663574, "logps/chosen": -2.775167465209961, "logps/rejected": -4.734206676483154, "loss": 0.7406, "nll_loss": 0.7149491310119629, "rewards/accuracies": 1.0, "rewards/chosen": -0.2775167226791382, "rewards/margins": 0.195903941988945, "rewards/rejected": -0.4734206795692444, "step": 3501 }, { "epoch": 9.587953456536619, "grad_norm": 7.409368991851807, "learning_rate": 5.204109589041096e-07, "log_odds_chosen": 0.7273796200752258, "log_odds_ratio": -0.6986868977546692, "logits/chosen": 0.9148038625717163, "logits/rejected": 0.8597784042358398, "logps/chosen": -2.222774028778076, "logps/rejected": -2.82468843460083, "loss": 0.8126, "nll_loss": 0.7426853179931641, "rewards/accuracies": 0.875, "rewards/chosen": -0.22227743268013, "rewards/margins": 0.060191426426172256, "rewards/rejected": -0.28246885538101196, "step": 3502 }, { "epoch": 9.59069130732375, "grad_norm": 4.984341144561768, "learning_rate": 5.202739726027398e-07, "log_odds_chosen": 1.8677425384521484, "log_odds_ratio": -0.2890253961086273, "logits/chosen": 0.7846255898475647, "logits/rejected": 0.792495608329773, "logps/chosen": -1.5881609916687012, "logps/rejected": -3.254417657852173, "loss": 0.7092, "nll_loss": 0.6803100109100342, "rewards/accuracies": 1.0, "rewards/chosen": -0.15881609916687012, "rewards/margins": 0.16662566363811493, "rewards/rejected": -0.32544174790382385, "step": 3503 }, { "epoch": 9.593429158110883, "grad_norm": 5.7178449630737305, "learning_rate": 5.201369863013698e-07, "log_odds_chosen": 1.5230379104614258, "log_odds_ratio": -0.5162888169288635, "logits/chosen": 0.9317638874053955, "logits/rejected": 0.9922971129417419, "logps/chosen": -2.670180082321167, "logps/rejected": -4.129899024963379, "loss": 0.7728, "nll_loss": 0.7211544513702393, "rewards/accuracies": 0.875, "rewards/chosen": -0.26701802015304565, "rewards/margins": 0.14597190916538239, "rewards/rejected": -0.41298991441726685, "step": 3504 }, { "epoch": 9.596167008898014, "grad_norm": 5.2344560623168945, "learning_rate": 5.2e-07, "log_odds_chosen": 2.7157082557678223, "log_odds_ratio": -0.31426408886909485, "logits/chosen": 0.8848041296005249, "logits/rejected": 0.8859134912490845, "logps/chosen": -1.9263672828674316, "logps/rejected": -4.477426052093506, "loss": 0.7708, "nll_loss": 0.7394185066223145, "rewards/accuracies": 0.875, "rewards/chosen": -0.19263672828674316, "rewards/margins": 0.2551058530807495, "rewards/rejected": -0.44774264097213745, "step": 3505 }, { "epoch": 9.598904859685147, "grad_norm": 4.439512729644775, "learning_rate": 5.198630136986301e-07, "log_odds_chosen": 1.5899285078048706, "log_odds_ratio": -0.26313501596450806, "logits/chosen": 0.7107820510864258, "logits/rejected": 0.6650031805038452, "logps/chosen": -3.212552309036255, "logps/rejected": -4.69918966293335, "loss": 0.8559, "nll_loss": 0.8295778036117554, "rewards/accuracies": 1.0, "rewards/chosen": -0.32125526666641235, "rewards/margins": 0.1486637443304062, "rewards/rejected": -0.46991899609565735, "step": 3506 }, { "epoch": 9.60164271047228, "grad_norm": 4.730306625366211, "learning_rate": 5.197260273972602e-07, "log_odds_chosen": 1.6397838592529297, "log_odds_ratio": -0.274936318397522, "logits/chosen": 0.7311617732048035, "logits/rejected": 0.7662972807884216, "logps/chosen": -2.005430221557617, "logps/rejected": -3.53256893157959, "loss": 0.6946, "nll_loss": 0.6671275496482849, "rewards/accuracies": 1.0, "rewards/chosen": -0.20054303109645844, "rewards/margins": 0.1527138352394104, "rewards/rejected": -0.35325688123703003, "step": 3507 }, { "epoch": 9.604380561259411, "grad_norm": 4.537073612213135, "learning_rate": 5.195890410958904e-07, "log_odds_chosen": 3.8804121017456055, "log_odds_ratio": -0.0709940567612648, "logits/chosen": 0.9435862302780151, "logits/rejected": 0.9825179576873779, "logps/chosen": -2.3170690536499023, "logps/rejected": -6.035538673400879, "loss": 0.6393, "nll_loss": 0.6321536302566528, "rewards/accuracies": 1.0, "rewards/chosen": -0.2317069172859192, "rewards/margins": 0.3718469440937042, "rewards/rejected": -0.6035538911819458, "step": 3508 }, { "epoch": 9.607118412046544, "grad_norm": 5.390129089355469, "learning_rate": 5.194520547945205e-07, "log_odds_chosen": 3.0507113933563232, "log_odds_ratio": -0.08909912407398224, "logits/chosen": 1.1936814785003662, "logits/rejected": 1.2639408111572266, "logps/chosen": -2.568909168243408, "logps/rejected": -5.449784278869629, "loss": 0.7146, "nll_loss": 0.7056541442871094, "rewards/accuracies": 1.0, "rewards/chosen": -0.2568909227848053, "rewards/margins": 0.28808754682540894, "rewards/rejected": -0.5449784398078918, "step": 3509 }, { "epoch": 9.609856262833675, "grad_norm": 5.751891613006592, "learning_rate": 5.193150684931507e-07, "log_odds_chosen": 1.003502607345581, "log_odds_ratio": -0.4610368013381958, "logits/chosen": 0.7790939807891846, "logits/rejected": 0.6866980791091919, "logps/chosen": -2.530651092529297, "logps/rejected": -3.4718029499053955, "loss": 0.767, "nll_loss": 0.7209387421607971, "rewards/accuracies": 0.875, "rewards/chosen": -0.2530651390552521, "rewards/margins": 0.09411519020795822, "rewards/rejected": -0.3471803069114685, "step": 3510 }, { "epoch": 9.612594113620808, "grad_norm": 4.609908580780029, "learning_rate": 5.191780821917808e-07, "log_odds_chosen": 1.8296327590942383, "log_odds_ratio": -0.23820775747299194, "logits/chosen": 0.6695566177368164, "logits/rejected": 0.5607922077178955, "logps/chosen": -2.0664329528808594, "logps/rejected": -3.7122700214385986, "loss": 0.7395, "nll_loss": 0.7157139778137207, "rewards/accuracies": 1.0, "rewards/chosen": -0.20664328336715698, "rewards/margins": 0.1645837128162384, "rewards/rejected": -0.3712270259857178, "step": 3511 }, { "epoch": 9.61533196440794, "grad_norm": 4.332830905914307, "learning_rate": 5.190410958904109e-07, "log_odds_chosen": 2.1920008659362793, "log_odds_ratio": -0.14889493584632874, "logits/chosen": 0.8899967670440674, "logits/rejected": 0.8898297548294067, "logps/chosen": -2.6572813987731934, "logps/rejected": -4.748485565185547, "loss": 0.694, "nll_loss": 0.6790657639503479, "rewards/accuracies": 1.0, "rewards/chosen": -0.2657281458377838, "rewards/margins": 0.20912036299705505, "rewards/rejected": -0.47484850883483887, "step": 3512 }, { "epoch": 9.618069815195073, "grad_norm": 4.789079666137695, "learning_rate": 5.189041095890411e-07, "log_odds_chosen": 1.9038519859313965, "log_odds_ratio": -0.21702168881893158, "logits/chosen": 0.7672085165977478, "logits/rejected": 0.7573922276496887, "logps/chosen": -2.483485698699951, "logps/rejected": -4.263072490692139, "loss": 0.7727, "nll_loss": 0.7509727478027344, "rewards/accuracies": 1.0, "rewards/chosen": -0.24834856390953064, "rewards/margins": 0.17795869708061218, "rewards/rejected": -0.4263072609901428, "step": 3513 }, { "epoch": 9.620807665982204, "grad_norm": 5.509033679962158, "learning_rate": 5.187671232876712e-07, "log_odds_chosen": 2.2684154510498047, "log_odds_ratio": -0.13796846568584442, "logits/chosen": 0.6422564387321472, "logits/rejected": 0.6040070056915283, "logps/chosen": -2.2923669815063477, "logps/rejected": -4.373533248901367, "loss": 0.8742, "nll_loss": 0.8604476451873779, "rewards/accuracies": 1.0, "rewards/chosen": -0.22923670709133148, "rewards/margins": 0.20811662077903748, "rewards/rejected": -0.43735334277153015, "step": 3514 }, { "epoch": 9.623545516769337, "grad_norm": 5.9471964836120605, "learning_rate": 5.186301369863013e-07, "log_odds_chosen": 1.1749186515808105, "log_odds_ratio": -0.3598901629447937, "logits/chosen": 0.7953076958656311, "logits/rejected": 0.859946072101593, "logps/chosen": -2.7367072105407715, "logps/rejected": -3.850559711456299, "loss": 0.6941, "nll_loss": 0.6581328511238098, "rewards/accuracies": 0.875, "rewards/chosen": -0.2736707031726837, "rewards/margins": 0.11138524860143661, "rewards/rejected": -0.3850559592247009, "step": 3515 }, { "epoch": 9.626283367556468, "grad_norm": 4.996203899383545, "learning_rate": 5.184931506849315e-07, "log_odds_chosen": 3.020128011703491, "log_odds_ratio": -0.14749939739704132, "logits/chosen": 0.9887619018554688, "logits/rejected": 1.0662295818328857, "logps/chosen": -2.6110715866088867, "logps/rejected": -5.535755157470703, "loss": 0.8037, "nll_loss": 0.7889086008071899, "rewards/accuracies": 0.875, "rewards/chosen": -0.2611071765422821, "rewards/margins": 0.2924683690071106, "rewards/rejected": -0.5535755157470703, "step": 3516 }, { "epoch": 9.6290212183436, "grad_norm": 5.048084259033203, "learning_rate": 5.183561643835617e-07, "log_odds_chosen": 1.4974055290222168, "log_odds_ratio": -0.45783501863479614, "logits/chosen": 1.019258975982666, "logits/rejected": 1.050106167793274, "logps/chosen": -2.455575704574585, "logps/rejected": -3.9462122917175293, "loss": 0.7067, "nll_loss": 0.6608731150627136, "rewards/accuracies": 0.75, "rewards/chosen": -0.24555757641792297, "rewards/margins": 0.14906369149684906, "rewards/rejected": -0.39462125301361084, "step": 3517 }, { "epoch": 9.631759069130732, "grad_norm": 5.310316562652588, "learning_rate": 5.182191780821917e-07, "log_odds_chosen": 1.2262859344482422, "log_odds_ratio": -0.43939125537872314, "logits/chosen": 0.7597501873970032, "logits/rejected": 0.7550083994865417, "logps/chosen": -2.489169120788574, "logps/rejected": -3.6837992668151855, "loss": 0.7899, "nll_loss": 0.7459816932678223, "rewards/accuracies": 0.625, "rewards/chosen": -0.248916894197464, "rewards/margins": 0.1194630041718483, "rewards/rejected": -0.3683799207210541, "step": 3518 }, { "epoch": 9.634496919917865, "grad_norm": 5.180870532989502, "learning_rate": 5.180821917808219e-07, "log_odds_chosen": 1.3051748275756836, "log_odds_ratio": -0.36030030250549316, "logits/chosen": 0.7580055594444275, "logits/rejected": 0.8243601322174072, "logps/chosen": -2.213789463043213, "logps/rejected": -3.4500229358673096, "loss": 0.7969, "nll_loss": 0.7608563899993896, "rewards/accuracies": 0.875, "rewards/chosen": -0.22137892246246338, "rewards/margins": 0.12362337112426758, "rewards/rejected": -0.34500229358673096, "step": 3519 }, { "epoch": 9.637234770704996, "grad_norm": 5.804933547973633, "learning_rate": 5.179452054794521e-07, "log_odds_chosen": 0.8123267889022827, "log_odds_ratio": -0.5474377870559692, "logits/chosen": 0.9250257015228271, "logits/rejected": 0.8849962949752808, "logps/chosen": -2.5298984050750732, "logps/rejected": -3.2546560764312744, "loss": 0.7245, "nll_loss": 0.669800877571106, "rewards/accuracies": 0.625, "rewards/chosen": -0.25298985838890076, "rewards/margins": 0.07247576862573624, "rewards/rejected": -0.3254656195640564, "step": 3520 }, { "epoch": 9.639972621492129, "grad_norm": 5.139296531677246, "learning_rate": 5.178082191780821e-07, "log_odds_chosen": 2.8258001804351807, "log_odds_ratio": -0.32084518671035767, "logits/chosen": 0.8169695734977722, "logits/rejected": 0.7985764741897583, "logps/chosen": -2.280407428741455, "logps/rejected": -5.034714221954346, "loss": 0.8386, "nll_loss": 0.8065042495727539, "rewards/accuracies": 0.875, "rewards/chosen": -0.22804075479507446, "rewards/margins": 0.27543073892593384, "rewards/rejected": -0.5034714937210083, "step": 3521 }, { "epoch": 9.64271047227926, "grad_norm": 6.096158504486084, "learning_rate": 5.176712328767123e-07, "log_odds_chosen": 2.4017763137817383, "log_odds_ratio": -0.37622857093811035, "logits/chosen": 0.7656462788581848, "logits/rejected": 0.7931927442550659, "logps/chosen": -2.21274995803833, "logps/rejected": -4.481606960296631, "loss": 0.6875, "nll_loss": 0.6498352885246277, "rewards/accuracies": 0.875, "rewards/chosen": -0.22127500176429749, "rewards/margins": 0.22688573598861694, "rewards/rejected": -0.44816073775291443, "step": 3522 }, { "epoch": 9.645448323066393, "grad_norm": 6.067314624786377, "learning_rate": 5.175342465753424e-07, "log_odds_chosen": 1.0361846685409546, "log_odds_ratio": -0.41415727138519287, "logits/chosen": 0.9690676927566528, "logits/rejected": 1.0127400159835815, "logps/chosen": -2.42672061920166, "logps/rejected": -3.4006874561309814, "loss": 0.7124, "nll_loss": 0.6710281372070312, "rewards/accuracies": 0.875, "rewards/chosen": -0.24267205595970154, "rewards/margins": 0.09739666432142258, "rewards/rejected": -0.3400687575340271, "step": 3523 }, { "epoch": 9.648186173853524, "grad_norm": 4.8830156326293945, "learning_rate": 5.173972602739725e-07, "log_odds_chosen": 1.5584274530410767, "log_odds_ratio": -0.29085126519203186, "logits/chosen": 0.7473054528236389, "logits/rejected": 0.7166707515716553, "logps/chosen": -1.8540613651275635, "logps/rejected": -3.254021644592285, "loss": 0.8598, "nll_loss": 0.8307417035102844, "rewards/accuracies": 1.0, "rewards/chosen": -0.1854061484336853, "rewards/margins": 0.13999605178833008, "rewards/rejected": -0.325402170419693, "step": 3524 }, { "epoch": 9.650924024640657, "grad_norm": 7.227081298828125, "learning_rate": 5.172602739726027e-07, "log_odds_chosen": 0.5644495487213135, "log_odds_ratio": -0.869295597076416, "logits/chosen": 0.9659691452980042, "logits/rejected": 0.9898089170455933, "logps/chosen": -3.1673853397369385, "logps/rejected": -3.714914321899414, "loss": 0.7712, "nll_loss": 0.6842833161354065, "rewards/accuracies": 0.75, "rewards/chosen": -0.3167385160923004, "rewards/margins": 0.05475291609764099, "rewards/rejected": -0.3714914321899414, "step": 3525 }, { "epoch": 9.653661875427789, "grad_norm": 4.944316387176514, "learning_rate": 5.171232876712328e-07, "log_odds_chosen": 1.4939029216766357, "log_odds_ratio": -0.32046857476234436, "logits/chosen": 0.9999451637268066, "logits/rejected": 1.0523432493209839, "logps/chosen": -2.6869888305664062, "logps/rejected": -4.066703796386719, "loss": 0.6587, "nll_loss": 0.6266785264015198, "rewards/accuracies": 0.875, "rewards/chosen": -0.26869887113571167, "rewards/margins": 0.13797153532505035, "rewards/rejected": -0.4066704213619232, "step": 3526 }, { "epoch": 9.656399726214921, "grad_norm": 5.823974609375, "learning_rate": 5.16986301369863e-07, "log_odds_chosen": 2.1584367752075195, "log_odds_ratio": -0.19982680678367615, "logits/chosen": 0.9340125322341919, "logits/rejected": 0.969862699508667, "logps/chosen": -1.7755234241485596, "logps/rejected": -3.7429776191711426, "loss": 0.7002, "nll_loss": 0.680191695690155, "rewards/accuracies": 1.0, "rewards/chosen": -0.17755234241485596, "rewards/margins": 0.1967453956604004, "rewards/rejected": -0.37429773807525635, "step": 3527 }, { "epoch": 9.659137577002053, "grad_norm": 6.234983921051025, "learning_rate": 5.168493150684931e-07, "log_odds_chosen": 0.9869824051856995, "log_odds_ratio": -0.6433650255203247, "logits/chosen": 0.8272236585617065, "logits/rejected": 0.8028068542480469, "logps/chosen": -2.423926830291748, "logps/rejected": -3.311594009399414, "loss": 0.8165, "nll_loss": 0.7521642446517944, "rewards/accuracies": 0.75, "rewards/chosen": -0.2423926740884781, "rewards/margins": 0.08876673877239227, "rewards/rejected": -0.33115941286087036, "step": 3528 }, { "epoch": 9.661875427789186, "grad_norm": 4.6876373291015625, "learning_rate": 5.167123287671232e-07, "log_odds_chosen": 4.851590156555176, "log_odds_ratio": -0.11280068010091782, "logits/chosen": 1.0341932773590088, "logits/rejected": 1.0702002048492432, "logps/chosen": -2.033461093902588, "logps/rejected": -6.7137532234191895, "loss": 0.7824, "nll_loss": 0.7711374759674072, "rewards/accuracies": 0.875, "rewards/chosen": -0.2033461034297943, "rewards/margins": 0.4680292010307312, "rewards/rejected": -0.6713753342628479, "step": 3529 }, { "epoch": 9.664613278576317, "grad_norm": 6.529905796051025, "learning_rate": 5.165753424657534e-07, "log_odds_chosen": 0.6600466966629028, "log_odds_ratio": -0.5983519554138184, "logits/chosen": 0.8394815325737, "logits/rejected": 0.8397654294967651, "logps/chosen": -2.4653568267822266, "logps/rejected": -3.037033796310425, "loss": 0.7638, "nll_loss": 0.7039413452148438, "rewards/accuracies": 0.875, "rewards/chosen": -0.24653568863868713, "rewards/margins": 0.057167697697877884, "rewards/rejected": -0.3037033677101135, "step": 3530 }, { "epoch": 9.66735112936345, "grad_norm": 5.922357559204102, "learning_rate": 5.164383561643836e-07, "log_odds_chosen": 1.9046494960784912, "log_odds_ratio": -0.4201856255531311, "logits/chosen": 0.8758774995803833, "logits/rejected": 0.8908144235610962, "logps/chosen": -2.3240957260131836, "logps/rejected": -4.090566635131836, "loss": 0.7531, "nll_loss": 0.7110332250595093, "rewards/accuracies": 0.625, "rewards/chosen": -0.23240959644317627, "rewards/margins": 0.1766471266746521, "rewards/rejected": -0.40905672311782837, "step": 3531 }, { "epoch": 9.670088980150581, "grad_norm": 6.374638080596924, "learning_rate": 5.163013698630136e-07, "log_odds_chosen": 2.1214559078216553, "log_odds_ratio": -0.3318172097206116, "logits/chosen": 0.9898054599761963, "logits/rejected": 0.9779286980628967, "logps/chosen": -2.5870513916015625, "logps/rejected": -4.563220024108887, "loss": 0.8566, "nll_loss": 0.8233873248100281, "rewards/accuracies": 0.875, "rewards/chosen": -0.25870513916015625, "rewards/margins": 0.19761687517166138, "rewards/rejected": -0.45632198452949524, "step": 3532 }, { "epoch": 9.672826830937714, "grad_norm": 5.09587287902832, "learning_rate": 5.161643835616438e-07, "log_odds_chosen": 0.9411357641220093, "log_odds_ratio": -0.35121867060661316, "logits/chosen": 0.9175444841384888, "logits/rejected": 0.9143534898757935, "logps/chosen": -2.2353663444519043, "logps/rejected": -3.081968069076538, "loss": 0.7029, "nll_loss": 0.6677832007408142, "rewards/accuracies": 1.0, "rewards/chosen": -0.2235366404056549, "rewards/margins": 0.08466018736362457, "rewards/rejected": -0.3081968128681183, "step": 3533 }, { "epoch": 9.675564681724847, "grad_norm": 4.065733909606934, "learning_rate": 5.16027397260274e-07, "log_odds_chosen": 2.695664882659912, "log_odds_ratio": -0.20431223511695862, "logits/chosen": 0.916717529296875, "logits/rejected": 0.9606730937957764, "logps/chosen": -2.3524227142333984, "logps/rejected": -4.922477722167969, "loss": 0.7899, "nll_loss": 0.769420862197876, "rewards/accuracies": 0.875, "rewards/chosen": -0.23524227738380432, "rewards/margins": 0.257005512714386, "rewards/rejected": -0.4922477900981903, "step": 3534 }, { "epoch": 9.678302532511978, "grad_norm": 4.625452995300293, "learning_rate": 5.15890410958904e-07, "log_odds_chosen": 2.808230400085449, "log_odds_ratio": -0.18508043885231018, "logits/chosen": 0.901362419128418, "logits/rejected": 0.9391040802001953, "logps/chosen": -1.8992873430252075, "logps/rejected": -4.553282737731934, "loss": 0.6652, "nll_loss": 0.6466920375823975, "rewards/accuracies": 1.0, "rewards/chosen": -0.18992874026298523, "rewards/margins": 0.2653995156288147, "rewards/rejected": -0.4553282856941223, "step": 3535 }, { "epoch": 9.681040383299111, "grad_norm": 7.6061906814575195, "learning_rate": 5.157534246575342e-07, "log_odds_chosen": 2.673499345779419, "log_odds_ratio": -0.2216699719429016, "logits/chosen": 1.1331181526184082, "logits/rejected": 1.0958510637283325, "logps/chosen": -3.4995036125183105, "logps/rejected": -6.078272819519043, "loss": 0.7618, "nll_loss": 0.7396492958068848, "rewards/accuracies": 0.875, "rewards/chosen": -0.34995037317276, "rewards/margins": 0.2578769326210022, "rewards/rejected": -0.6078273057937622, "step": 3536 }, { "epoch": 9.683778234086242, "grad_norm": 4.821131706237793, "learning_rate": 5.156164383561643e-07, "log_odds_chosen": 1.4740394353866577, "log_odds_ratio": -0.29269126057624817, "logits/chosen": 0.7712329626083374, "logits/rejected": 0.8148924708366394, "logps/chosen": -2.239083766937256, "logps/rejected": -3.6405115127563477, "loss": 0.7171, "nll_loss": 0.6878107786178589, "rewards/accuracies": 0.875, "rewards/chosen": -0.22390839457511902, "rewards/margins": 0.14014270901679993, "rewards/rejected": -0.36405113339424133, "step": 3537 }, { "epoch": 9.686516084873375, "grad_norm": 5.947987079620361, "learning_rate": 5.154794520547944e-07, "log_odds_chosen": 1.4839438199996948, "log_odds_ratio": -0.4326273500919342, "logits/chosen": 0.9130105376243591, "logits/rejected": 0.8727502822875977, "logps/chosen": -2.0514206886291504, "logps/rejected": -3.4221692085266113, "loss": 0.7671, "nll_loss": 0.7238144278526306, "rewards/accuracies": 0.75, "rewards/chosen": -0.2051420509815216, "rewards/margins": 0.13707487285137177, "rewards/rejected": -0.34221693873405457, "step": 3538 }, { "epoch": 9.689253935660506, "grad_norm": 5.153791904449463, "learning_rate": 5.153424657534246e-07, "log_odds_chosen": 1.0172680616378784, "log_odds_ratio": -0.39309927821159363, "logits/chosen": 1.0266233682632446, "logits/rejected": 1.0782862901687622, "logps/chosen": -2.783917188644409, "logps/rejected": -3.7471399307250977, "loss": 0.7081, "nll_loss": 0.6687950491905212, "rewards/accuracies": 0.875, "rewards/chosen": -0.2783917486667633, "rewards/margins": 0.09632226824760437, "rewards/rejected": -0.3747139573097229, "step": 3539 }, { "epoch": 9.69199178644764, "grad_norm": 5.74622917175293, "learning_rate": 5.152054794520547e-07, "log_odds_chosen": 1.4019438028335571, "log_odds_ratio": -0.40464457869529724, "logits/chosen": 0.8514323830604553, "logits/rejected": 0.9414629340171814, "logps/chosen": -2.6976850032806396, "logps/rejected": -4.040888786315918, "loss": 0.7149, "nll_loss": 0.6744111776351929, "rewards/accuracies": 0.875, "rewards/chosen": -0.26976847648620605, "rewards/margins": 0.13432039320468903, "rewards/rejected": -0.40408891439437866, "step": 3540 }, { "epoch": 9.69472963723477, "grad_norm": 4.6521453857421875, "learning_rate": 5.150684931506849e-07, "log_odds_chosen": 2.1783089637756348, "log_odds_ratio": -0.39766091108322144, "logits/chosen": 0.7308335304260254, "logits/rejected": 0.7630043625831604, "logps/chosen": -2.3482160568237305, "logps/rejected": -4.460714817047119, "loss": 0.7189, "nll_loss": 0.6791054606437683, "rewards/accuracies": 0.75, "rewards/chosen": -0.2348216027021408, "rewards/margins": 0.21124987304210663, "rewards/rejected": -0.44607147574424744, "step": 3541 }, { "epoch": 9.697467488021903, "grad_norm": 4.670864105224609, "learning_rate": 5.14931506849315e-07, "log_odds_chosen": 1.9973889589309692, "log_odds_ratio": -0.2249826341867447, "logits/chosen": 1.0094969272613525, "logits/rejected": 1.0617457628250122, "logps/chosen": -2.657381534576416, "logps/rejected": -4.587502479553223, "loss": 0.7571, "nll_loss": 0.7345696687698364, "rewards/accuracies": 1.0, "rewards/chosen": -0.2657381594181061, "rewards/margins": 0.1930120885372162, "rewards/rejected": -0.45875030755996704, "step": 3542 }, { "epoch": 9.700205338809035, "grad_norm": 4.605874538421631, "learning_rate": 5.147945205479451e-07, "log_odds_chosen": 1.9851491451263428, "log_odds_ratio": -0.2670695185661316, "logits/chosen": 0.7926811575889587, "logits/rejected": 0.8152649998664856, "logps/chosen": -3.0085580348968506, "logps/rejected": -4.904061317443848, "loss": 0.7033, "nll_loss": 0.676607608795166, "rewards/accuracies": 1.0, "rewards/chosen": -0.3008557856082916, "rewards/margins": 0.18955034017562866, "rewards/rejected": -0.4904061555862427, "step": 3543 }, { "epoch": 9.702943189596168, "grad_norm": 4.305016040802002, "learning_rate": 5.146575342465753e-07, "log_odds_chosen": 2.4740846157073975, "log_odds_ratio": -0.24803397059440613, "logits/chosen": 0.7572020292282104, "logits/rejected": 0.765522301197052, "logps/chosen": -2.399400472640991, "logps/rejected": -4.770050048828125, "loss": 0.8591, "nll_loss": 0.8342692255973816, "rewards/accuracies": 0.875, "rewards/chosen": -0.23994004726409912, "rewards/margins": 0.23706498742103577, "rewards/rejected": -0.4770050346851349, "step": 3544 }, { "epoch": 9.705681040383299, "grad_norm": 4.668591499328613, "learning_rate": 5.145205479452054e-07, "log_odds_chosen": 1.8220053911209106, "log_odds_ratio": -0.3991714417934418, "logits/chosen": 0.8792742490768433, "logits/rejected": 0.8546954989433289, "logps/chosen": -1.9757435321807861, "logps/rejected": -3.6739535331726074, "loss": 0.6837, "nll_loss": 0.6438276171684265, "rewards/accuracies": 0.875, "rewards/chosen": -0.19757434725761414, "rewards/margins": 0.16982102394104004, "rewards/rejected": -0.3673953711986542, "step": 3545 }, { "epoch": 9.708418891170432, "grad_norm": 5.76077938079834, "learning_rate": 5.143835616438355e-07, "log_odds_chosen": 1.8177852630615234, "log_odds_ratio": -0.22732669115066528, "logits/chosen": 0.85469651222229, "logits/rejected": 0.8376454710960388, "logps/chosen": -1.9532146453857422, "logps/rejected": -3.640381336212158, "loss": 0.7578, "nll_loss": 0.735055685043335, "rewards/accuracies": 1.0, "rewards/chosen": -0.1953214704990387, "rewards/margins": 0.16871662437915802, "rewards/rejected": -0.3640381097793579, "step": 3546 }, { "epoch": 9.711156741957563, "grad_norm": 4.245889663696289, "learning_rate": 5.142465753424657e-07, "log_odds_chosen": 1.5506484508514404, "log_odds_ratio": -0.24795173108577728, "logits/chosen": 0.880890965461731, "logits/rejected": 0.908881425857544, "logps/chosen": -2.2728376388549805, "logps/rejected": -3.7135019302368164, "loss": 0.7665, "nll_loss": 0.7416908144950867, "rewards/accuracies": 1.0, "rewards/chosen": -0.227283775806427, "rewards/margins": 0.1440664380788803, "rewards/rejected": -0.3713502287864685, "step": 3547 }, { "epoch": 9.713894592744696, "grad_norm": 8.10142707824707, "learning_rate": 5.14109589041096e-07, "log_odds_chosen": 1.5732898712158203, "log_odds_ratio": -0.576598584651947, "logits/chosen": 0.8789839744567871, "logits/rejected": 0.8188509941101074, "logps/chosen": -2.276136875152588, "logps/rejected": -3.7428557872772217, "loss": 0.7825, "nll_loss": 0.7248706817626953, "rewards/accuracies": 0.875, "rewards/chosen": -0.22761371731758118, "rewards/margins": 0.14667189121246338, "rewards/rejected": -0.37428560853004456, "step": 3548 }, { "epoch": 9.716632443531827, "grad_norm": 5.31998872756958, "learning_rate": 5.13972602739726e-07, "log_odds_chosen": 3.2018284797668457, "log_odds_ratio": -0.1424311399459839, "logits/chosen": 0.9846113920211792, "logits/rejected": 1.0391936302185059, "logps/chosen": -2.362738609313965, "logps/rejected": -5.473465919494629, "loss": 0.8754, "nll_loss": 0.8611579537391663, "rewards/accuracies": 1.0, "rewards/chosen": -0.2362738847732544, "rewards/margins": 0.3110727071762085, "rewards/rejected": -0.5473465919494629, "step": 3549 }, { "epoch": 9.71937029431896, "grad_norm": 4.428797245025635, "learning_rate": 5.138356164383562e-07, "log_odds_chosen": 2.5238306522369385, "log_odds_ratio": -0.24305537343025208, "logits/chosen": 0.7226109504699707, "logits/rejected": 0.6899365186691284, "logps/chosen": -2.591367721557617, "logps/rejected": -5.038200378417969, "loss": 0.8325, "nll_loss": 0.8082232475280762, "rewards/accuracies": 0.875, "rewards/chosen": -0.25913679599761963, "rewards/margins": 0.24468329548835754, "rewards/rejected": -0.5038200616836548, "step": 3550 }, { "epoch": 9.722108145106091, "grad_norm": 4.656196117401123, "learning_rate": 5.136986301369864e-07, "log_odds_chosen": 1.647361159324646, "log_odds_ratio": -0.5250796675682068, "logits/chosen": 0.6988235712051392, "logits/rejected": 0.7684053182601929, "logps/chosen": -2.425882339477539, "logps/rejected": -3.9814276695251465, "loss": 0.8463, "nll_loss": 0.793831467628479, "rewards/accuracies": 0.75, "rewards/chosen": -0.24258822202682495, "rewards/margins": 0.15555456280708313, "rewards/rejected": -0.3981427848339081, "step": 3551 }, { "epoch": 9.724845995893224, "grad_norm": 4.831953048706055, "learning_rate": 5.135616438356164e-07, "log_odds_chosen": 2.003828525543213, "log_odds_ratio": -0.4006909132003784, "logits/chosen": 0.9926131963729858, "logits/rejected": 0.9819246530532837, "logps/chosen": -2.064150810241699, "logps/rejected": -3.9466214179992676, "loss": 0.7944, "nll_loss": 0.7542954683303833, "rewards/accuracies": 0.875, "rewards/chosen": -0.2064150869846344, "rewards/margins": 0.18824702501296997, "rewards/rejected": -0.39466211199760437, "step": 3552 }, { "epoch": 9.727583846680355, "grad_norm": 4.9557785987854, "learning_rate": 5.134246575342466e-07, "log_odds_chosen": 2.301445960998535, "log_odds_ratio": -0.27208539843559265, "logits/chosen": 0.6649052500724792, "logits/rejected": 0.6251998543739319, "logps/chosen": -1.8203893899917603, "logps/rejected": -3.9799187183380127, "loss": 0.758, "nll_loss": 0.7308222651481628, "rewards/accuracies": 1.0, "rewards/chosen": -0.18203893303871155, "rewards/margins": 0.21595294773578644, "rewards/rejected": -0.3979918956756592, "step": 3553 }, { "epoch": 9.730321697467488, "grad_norm": 4.547373294830322, "learning_rate": 5.132876712328767e-07, "log_odds_chosen": 2.2356278896331787, "log_odds_ratio": -0.26793044805526733, "logits/chosen": 1.0119749307632446, "logits/rejected": 0.9571446180343628, "logps/chosen": -1.880812168121338, "logps/rejected": -3.9672555923461914, "loss": 0.6914, "nll_loss": 0.6646474003791809, "rewards/accuracies": 1.0, "rewards/chosen": -0.18808123469352722, "rewards/margins": 0.2086443454027176, "rewards/rejected": -0.3967255651950836, "step": 3554 }, { "epoch": 9.73305954825462, "grad_norm": 4.436407566070557, "learning_rate": 5.131506849315069e-07, "log_odds_chosen": 1.5575346946716309, "log_odds_ratio": -0.2374524474143982, "logits/chosen": 0.8693585991859436, "logits/rejected": 0.8624826073646545, "logps/chosen": -2.3872499465942383, "logps/rejected": -3.854572296142578, "loss": 0.6861, "nll_loss": 0.6623585820198059, "rewards/accuracies": 1.0, "rewards/chosen": -0.2387249767780304, "rewards/margins": 0.14673221111297607, "rewards/rejected": -0.38545718789100647, "step": 3555 }, { "epoch": 9.735797399041752, "grad_norm": 4.403624534606934, "learning_rate": 5.13013698630137e-07, "log_odds_chosen": 3.0788655281066895, "log_odds_ratio": -0.30336928367614746, "logits/chosen": 0.8624852299690247, "logits/rejected": 0.9271559119224548, "logps/chosen": -2.556914806365967, "logps/rejected": -5.562337875366211, "loss": 0.7078, "nll_loss": 0.677497386932373, "rewards/accuracies": 0.75, "rewards/chosen": -0.2556914687156677, "rewards/margins": 0.30054232478141785, "rewards/rejected": -0.5562337636947632, "step": 3556 }, { "epoch": 9.738535249828884, "grad_norm": 5.149479866027832, "learning_rate": 5.128767123287671e-07, "log_odds_chosen": 1.3645846843719482, "log_odds_ratio": -0.3563597500324249, "logits/chosen": 0.7584640979766846, "logits/rejected": 0.8264338374137878, "logps/chosen": -2.006326198577881, "logps/rejected": -3.2764956951141357, "loss": 0.709, "nll_loss": 0.6733357906341553, "rewards/accuracies": 0.875, "rewards/chosen": -0.20063263177871704, "rewards/margins": 0.12701693177223206, "rewards/rejected": -0.3276495933532715, "step": 3557 }, { "epoch": 9.741273100616016, "grad_norm": 4.1122727394104, "learning_rate": 5.127397260273973e-07, "log_odds_chosen": 2.449028968811035, "log_odds_ratio": -0.18839865922927856, "logits/chosen": 0.997481107711792, "logits/rejected": 0.9240059852600098, "logps/chosen": -2.1186156272888184, "logps/rejected": -4.371595859527588, "loss": 0.8136, "nll_loss": 0.7947251200675964, "rewards/accuracies": 1.0, "rewards/chosen": -0.21186156570911407, "rewards/margins": 0.22529803216457367, "rewards/rejected": -0.43715959787368774, "step": 3558 }, { "epoch": 9.744010951403148, "grad_norm": 5.660160064697266, "learning_rate": 5.126027397260274e-07, "log_odds_chosen": 1.7791383266448975, "log_odds_ratio": -0.30996763706207275, "logits/chosen": 0.5376647114753723, "logits/rejected": 0.5664442777633667, "logps/chosen": -2.530451774597168, "logps/rejected": -4.218527793884277, "loss": 0.7758, "nll_loss": 0.7447806596755981, "rewards/accuracies": 0.875, "rewards/chosen": -0.2530451714992523, "rewards/margins": 0.16880762577056885, "rewards/rejected": -0.42185282707214355, "step": 3559 }, { "epoch": 9.74674880219028, "grad_norm": 5.987400531768799, "learning_rate": 5.124657534246575e-07, "log_odds_chosen": 2.549529552459717, "log_odds_ratio": -0.5638405084609985, "logits/chosen": 1.0923283100128174, "logits/rejected": 1.1521531343460083, "logps/chosen": -3.2869739532470703, "logps/rejected": -5.804579734802246, "loss": 0.7602, "nll_loss": 0.7037801742553711, "rewards/accuracies": 0.75, "rewards/chosen": -0.32869744300842285, "rewards/margins": 0.2517605721950531, "rewards/rejected": -0.5804579854011536, "step": 3560 }, { "epoch": 9.749486652977414, "grad_norm": 5.0622076988220215, "learning_rate": 5.123287671232877e-07, "log_odds_chosen": 1.2577235698699951, "log_odds_ratio": -0.31618332862854004, "logits/chosen": 0.8480947613716125, "logits/rejected": 0.7643446922302246, "logps/chosen": -1.4162236452102661, "logps/rejected": -2.499542713165283, "loss": 0.6791, "nll_loss": 0.6474336981773376, "rewards/accuracies": 1.0, "rewards/chosen": -0.1416223645210266, "rewards/margins": 0.10833191126585007, "rewards/rejected": -0.24995426833629608, "step": 3561 }, { "epoch": 9.752224503764545, "grad_norm": 5.186189651489258, "learning_rate": 5.121917808219179e-07, "log_odds_chosen": 1.203608512878418, "log_odds_ratio": -0.5939567685127258, "logits/chosen": 0.7070009112358093, "logits/rejected": 0.6504338383674622, "logps/chosen": -2.264455556869507, "logps/rejected": -3.3227734565734863, "loss": 0.7652, "nll_loss": 0.70585036277771, "rewards/accuracies": 0.75, "rewards/chosen": -0.22644557058811188, "rewards/margins": 0.10583177208900452, "rewards/rejected": -0.3322773575782776, "step": 3562 }, { "epoch": 9.754962354551678, "grad_norm": 5.784224033355713, "learning_rate": 5.120547945205479e-07, "log_odds_chosen": 0.7143051624298096, "log_odds_ratio": -0.5261833667755127, "logits/chosen": 0.8778978586196899, "logits/rejected": 0.88763427734375, "logps/chosen": -2.5627236366271973, "logps/rejected": -3.2082581520080566, "loss": 0.6898, "nll_loss": 0.6372079849243164, "rewards/accuracies": 0.875, "rewards/chosen": -0.2562723457813263, "rewards/margins": 0.06455345451831818, "rewards/rejected": -0.32082581520080566, "step": 3563 }, { "epoch": 9.757700205338809, "grad_norm": 5.435128688812256, "learning_rate": 5.119178082191781e-07, "log_odds_chosen": 2.569186210632324, "log_odds_ratio": -0.18373239040374756, "logits/chosen": 0.8277730941772461, "logits/rejected": 0.8886644840240479, "logps/chosen": -2.140718460083008, "logps/rejected": -4.5953369140625, "loss": 0.7134, "nll_loss": 0.6950626373291016, "rewards/accuracies": 1.0, "rewards/chosen": -0.2140718549489975, "rewards/margins": 0.2454618364572525, "rewards/rejected": -0.45953369140625, "step": 3564 }, { "epoch": 9.760438056125942, "grad_norm": 6.101033687591553, "learning_rate": 5.117808219178083e-07, "log_odds_chosen": 1.1438722610473633, "log_odds_ratio": -0.49461260437965393, "logits/chosen": 0.8271721005439758, "logits/rejected": 0.8031129837036133, "logps/chosen": -1.980722427368164, "logps/rejected": -3.0430920124053955, "loss": 0.7854, "nll_loss": 0.7359228134155273, "rewards/accuracies": 0.75, "rewards/chosen": -0.19807225465774536, "rewards/margins": 0.10623696446418762, "rewards/rejected": -0.3043091893196106, "step": 3565 }, { "epoch": 9.763175906913073, "grad_norm": 5.223549842834473, "learning_rate": 5.116438356164383e-07, "log_odds_chosen": 2.39125919342041, "log_odds_ratio": -0.20416508615016937, "logits/chosen": 0.7923810482025146, "logits/rejected": 0.8054915070533752, "logps/chosen": -2.5745315551757812, "logps/rejected": -4.894927024841309, "loss": 0.6757, "nll_loss": 0.6552457809448242, "rewards/accuracies": 1.0, "rewards/chosen": -0.25745317339897156, "rewards/margins": 0.23203951120376587, "rewards/rejected": -0.4894927144050598, "step": 3566 }, { "epoch": 9.765913757700206, "grad_norm": 5.158075332641602, "learning_rate": 5.115068493150685e-07, "log_odds_chosen": 2.2296204566955566, "log_odds_ratio": -0.2651526927947998, "logits/chosen": 0.805113673210144, "logits/rejected": 0.7799746990203857, "logps/chosen": -2.4418392181396484, "logps/rejected": -4.5715532302856445, "loss": 0.8628, "nll_loss": 0.8362786173820496, "rewards/accuracies": 0.875, "rewards/chosen": -0.24418392777442932, "rewards/margins": 0.21297138929367065, "rewards/rejected": -0.4571553170681, "step": 3567 }, { "epoch": 9.768651608487337, "grad_norm": 4.758829116821289, "learning_rate": 5.113698630136986e-07, "log_odds_chosen": 1.7275035381317139, "log_odds_ratio": -0.32070064544677734, "logits/chosen": 0.9294405579566956, "logits/rejected": 0.8517580628395081, "logps/chosen": -2.347959280014038, "logps/rejected": -3.9898228645324707, "loss": 0.8055, "nll_loss": 0.7733866572380066, "rewards/accuracies": 0.75, "rewards/chosen": -0.2347959280014038, "rewards/margins": 0.16418638825416565, "rewards/rejected": -0.39898231625556946, "step": 3568 }, { "epoch": 9.77138945927447, "grad_norm": 5.211530685424805, "learning_rate": 5.112328767123288e-07, "log_odds_chosen": 2.503934383392334, "log_odds_ratio": -0.1570965200662613, "logits/chosen": 0.8551927208900452, "logits/rejected": 0.8965191841125488, "logps/chosen": -2.935166835784912, "logps/rejected": -5.317044258117676, "loss": 0.7839, "nll_loss": 0.7682247757911682, "rewards/accuracies": 1.0, "rewards/chosen": -0.2935166358947754, "rewards/margins": 0.23818780481815338, "rewards/rejected": -0.5317044854164124, "step": 3569 }, { "epoch": 9.774127310061601, "grad_norm": 4.081686496734619, "learning_rate": 5.110958904109589e-07, "log_odds_chosen": 2.508399724960327, "log_odds_ratio": -0.19635583460330963, "logits/chosen": 0.7751439809799194, "logits/rejected": 0.8280725479125977, "logps/chosen": -1.4756128787994385, "logps/rejected": -3.737752676010132, "loss": 0.6784, "nll_loss": 0.6587495803833008, "rewards/accuracies": 1.0, "rewards/chosen": -0.14756129682064056, "rewards/margins": 0.2262139767408371, "rewards/rejected": -0.37377527356147766, "step": 3570 }, { "epoch": 9.776865160848734, "grad_norm": 4.697053909301758, "learning_rate": 5.10958904109589e-07, "log_odds_chosen": 1.8466675281524658, "log_odds_ratio": -0.2273101806640625, "logits/chosen": 0.7991542816162109, "logits/rejected": 0.833545982837677, "logps/chosen": -2.12642765045166, "logps/rejected": -3.8578474521636963, "loss": 0.8202, "nll_loss": 0.7974290251731873, "rewards/accuracies": 1.0, "rewards/chosen": -0.21264278888702393, "rewards/margins": 0.1731419712305069, "rewards/rejected": -0.38578474521636963, "step": 3571 }, { "epoch": 9.779603011635865, "grad_norm": 5.296621799468994, "learning_rate": 5.108219178082192e-07, "log_odds_chosen": 0.03958694636821747, "log_odds_ratio": -0.823238730430603, "logits/chosen": 0.791449248790741, "logits/rejected": 0.7627620697021484, "logps/chosen": -2.0378763675689697, "logps/rejected": -2.076432943344116, "loss": 0.7892, "nll_loss": 0.7068524360656738, "rewards/accuracies": 0.75, "rewards/chosen": -0.2037876546382904, "rewards/margins": 0.0038556382060050964, "rewards/rejected": -0.2076432853937149, "step": 3572 }, { "epoch": 9.782340862422998, "grad_norm": 6.04727029800415, "learning_rate": 5.106849315068493e-07, "log_odds_chosen": 0.5677403807640076, "log_odds_ratio": -0.7758170366287231, "logits/chosen": 0.8010764122009277, "logits/rejected": 0.8459650278091431, "logps/chosen": -2.9751930236816406, "logps/rejected": -3.5163683891296387, "loss": 0.8393, "nll_loss": 0.7617012858390808, "rewards/accuracies": 0.625, "rewards/chosen": -0.2975192666053772, "rewards/margins": 0.054117560386657715, "rewards/rejected": -0.3516368269920349, "step": 3573 }, { "epoch": 9.78507871321013, "grad_norm": 4.5623626708984375, "learning_rate": 5.105479452054794e-07, "log_odds_chosen": 0.9379663467407227, "log_odds_ratio": -0.37394046783447266, "logits/chosen": 1.0060033798217773, "logits/rejected": 1.0072368383407593, "logps/chosen": -1.8684102296829224, "logps/rejected": -2.715839147567749, "loss": 0.7319, "nll_loss": 0.6944891810417175, "rewards/accuracies": 0.875, "rewards/chosen": -0.18684104084968567, "rewards/margins": 0.08474288880825043, "rewards/rejected": -0.2715839147567749, "step": 3574 }, { "epoch": 9.787816563997263, "grad_norm": 5.05456018447876, "learning_rate": 5.104109589041096e-07, "log_odds_chosen": 0.9823975563049316, "log_odds_ratio": -0.410002201795578, "logits/chosen": 0.6737954616546631, "logits/rejected": 0.651077151298523, "logps/chosen": -2.171325922012329, "logps/rejected": -3.0212364196777344, "loss": 0.7554, "nll_loss": 0.7143874168395996, "rewards/accuracies": 0.75, "rewards/chosen": -0.2171325981616974, "rewards/margins": 0.08499104529619217, "rewards/rejected": -0.30212366580963135, "step": 3575 }, { "epoch": 9.790554414784394, "grad_norm": 4.222466945648193, "learning_rate": 5.102739726027398e-07, "log_odds_chosen": 1.8868434429168701, "log_odds_ratio": -0.32708168029785156, "logits/chosen": 0.95808345079422, "logits/rejected": 0.9384576678276062, "logps/chosen": -2.301558256149292, "logps/rejected": -4.108689308166504, "loss": 0.7149, "nll_loss": 0.6822401285171509, "rewards/accuracies": 0.875, "rewards/chosen": -0.2301558405160904, "rewards/margins": 0.18071310222148895, "rewards/rejected": -0.41086894273757935, "step": 3576 }, { "epoch": 9.793292265571527, "grad_norm": 4.490070343017578, "learning_rate": 5.101369863013698e-07, "log_odds_chosen": 1.5871537923812866, "log_odds_ratio": -0.2531648874282837, "logits/chosen": 0.9502560496330261, "logits/rejected": 0.9228310585021973, "logps/chosen": -1.7301952838897705, "logps/rejected": -3.1712565422058105, "loss": 0.7234, "nll_loss": 0.6980656385421753, "rewards/accuracies": 0.875, "rewards/chosen": -0.17301952838897705, "rewards/margins": 0.14410613477230072, "rewards/rejected": -0.31712567806243896, "step": 3577 }, { "epoch": 9.796030116358658, "grad_norm": 4.032671928405762, "learning_rate": 5.1e-07, "log_odds_chosen": 2.6214964389801025, "log_odds_ratio": -0.1540239155292511, "logits/chosen": 0.6057319641113281, "logits/rejected": 0.6440098285675049, "logps/chosen": -1.8876932859420776, "logps/rejected": -4.34816837310791, "loss": 0.6925, "nll_loss": 0.6770682334899902, "rewards/accuracies": 1.0, "rewards/chosen": -0.18876934051513672, "rewards/margins": 0.2460474818944931, "rewards/rejected": -0.434816837310791, "step": 3578 }, { "epoch": 9.79876796714579, "grad_norm": 4.554162502288818, "learning_rate": 5.098630136986302e-07, "log_odds_chosen": 2.456660270690918, "log_odds_ratio": -0.21857813000679016, "logits/chosen": 0.970190167427063, "logits/rejected": 1.014533519744873, "logps/chosen": -2.1509621143341064, "logps/rejected": -4.428357124328613, "loss": 0.6931, "nll_loss": 0.671250581741333, "rewards/accuracies": 1.0, "rewards/chosen": -0.21509620547294617, "rewards/margins": 0.22773945331573486, "rewards/rejected": -0.44283565878868103, "step": 3579 }, { "epoch": 9.801505817932922, "grad_norm": 5.816682815551758, "learning_rate": 5.097260273972602e-07, "log_odds_chosen": 2.4059810638427734, "log_odds_ratio": -0.31653791666030884, "logits/chosen": 0.773777961730957, "logits/rejected": 0.8095434904098511, "logps/chosen": -2.6392924785614014, "logps/rejected": -4.944883346557617, "loss": 0.7878, "nll_loss": 0.756098747253418, "rewards/accuracies": 0.875, "rewards/chosen": -0.26392924785614014, "rewards/margins": 0.2305590957403183, "rewards/rejected": -0.49448835849761963, "step": 3580 }, { "epoch": 9.804243668720055, "grad_norm": 5.400980472564697, "learning_rate": 5.095890410958904e-07, "log_odds_chosen": 2.944960832595825, "log_odds_ratio": -0.2201201617717743, "logits/chosen": 0.8299854397773743, "logits/rejected": 0.849615216255188, "logps/chosen": -2.5596923828125, "logps/rejected": -5.3277363777160645, "loss": 0.8131, "nll_loss": 0.7911186218261719, "rewards/accuracies": 1.0, "rewards/chosen": -0.25596922636032104, "rewards/margins": 0.2768044173717499, "rewards/rejected": -0.5327736139297485, "step": 3581 }, { "epoch": 9.806981519507186, "grad_norm": 5.295613765716553, "learning_rate": 5.094520547945205e-07, "log_odds_chosen": 1.955545425415039, "log_odds_ratio": -0.28181764483451843, "logits/chosen": 0.895807147026062, "logits/rejected": 0.9302309155464172, "logps/chosen": -2.4511680603027344, "logps/rejected": -4.290812969207764, "loss": 0.6995, "nll_loss": 0.6713091135025024, "rewards/accuracies": 0.875, "rewards/chosen": -0.24511680006980896, "rewards/margins": 0.18396449089050293, "rewards/rejected": -0.4290813207626343, "step": 3582 }, { "epoch": 9.809719370294319, "grad_norm": 7.241693496704102, "learning_rate": 5.093150684931507e-07, "log_odds_chosen": 2.212630271911621, "log_odds_ratio": -0.27359944581985474, "logits/chosen": 0.9447003602981567, "logits/rejected": 0.870843768119812, "logps/chosen": -2.0323760509490967, "logps/rejected": -4.138628959655762, "loss": 0.7909, "nll_loss": 0.7635806202888489, "rewards/accuracies": 1.0, "rewards/chosen": -0.2032376080751419, "rewards/margins": 0.2106252908706665, "rewards/rejected": -0.4138629138469696, "step": 3583 }, { "epoch": 9.81245722108145, "grad_norm": 5.071437358856201, "learning_rate": 5.091780821917808e-07, "log_odds_chosen": 3.3404417037963867, "log_odds_ratio": -0.21291962265968323, "logits/chosen": 0.8687689304351807, "logits/rejected": 0.6640852689743042, "logps/chosen": -1.7113901376724243, "logps/rejected": -4.865102767944336, "loss": 0.8058, "nll_loss": 0.7845011949539185, "rewards/accuracies": 0.875, "rewards/chosen": -0.17113903164863586, "rewards/margins": 0.31537124514579773, "rewards/rejected": -0.4865102469921112, "step": 3584 }, { "epoch": 9.815195071868583, "grad_norm": 4.25691556930542, "learning_rate": 5.090410958904109e-07, "log_odds_chosen": 2.2247376441955566, "log_odds_ratio": -0.1762225180864334, "logits/chosen": 0.9242799282073975, "logits/rejected": 0.9998560547828674, "logps/chosen": -1.9473137855529785, "logps/rejected": -4.0267486572265625, "loss": 0.6436, "nll_loss": 0.6259709000587463, "rewards/accuracies": 1.0, "rewards/chosen": -0.19473138451576233, "rewards/margins": 0.20794346928596497, "rewards/rejected": -0.4026748538017273, "step": 3585 }, { "epoch": 9.817932922655714, "grad_norm": 5.591609954833984, "learning_rate": 5.089041095890411e-07, "log_odds_chosen": 1.7284435033798218, "log_odds_ratio": -0.3159582018852234, "logits/chosen": 0.9703775644302368, "logits/rejected": 0.9888916015625, "logps/chosen": -2.5064618587493896, "logps/rejected": -4.132861614227295, "loss": 0.7661, "nll_loss": 0.7345341444015503, "rewards/accuracies": 0.875, "rewards/chosen": -0.2506462037563324, "rewards/margins": 0.16263996064662933, "rewards/rejected": -0.41328614950180054, "step": 3586 }, { "epoch": 9.820670773442847, "grad_norm": 4.208066940307617, "learning_rate": 5.087671232876712e-07, "log_odds_chosen": 1.7203630208969116, "log_odds_ratio": -0.32675525546073914, "logits/chosen": 0.6824848651885986, "logits/rejected": 0.7259749174118042, "logps/chosen": -2.0307259559631348, "logps/rejected": -3.681100368499756, "loss": 0.6989, "nll_loss": 0.6661929488182068, "rewards/accuracies": 0.875, "rewards/chosen": -0.20307260751724243, "rewards/margins": 0.16503743827342987, "rewards/rejected": -0.3681100606918335, "step": 3587 }, { "epoch": 9.82340862422998, "grad_norm": 4.892967224121094, "learning_rate": 5.086301369863013e-07, "log_odds_chosen": 1.6844868659973145, "log_odds_ratio": -0.530381977558136, "logits/chosen": 0.6593526005744934, "logits/rejected": 0.661231517791748, "logps/chosen": -2.6197996139526367, "logps/rejected": -4.253308296203613, "loss": 0.7728, "nll_loss": 0.719738245010376, "rewards/accuracies": 0.875, "rewards/chosen": -0.26197993755340576, "rewards/margins": 0.1633508950471878, "rewards/rejected": -0.42533087730407715, "step": 3588 }, { "epoch": 9.826146475017111, "grad_norm": 5.105629920959473, "learning_rate": 5.084931506849315e-07, "log_odds_chosen": 1.6042366027832031, "log_odds_ratio": -0.3273674547672272, "logits/chosen": 0.8515977263450623, "logits/rejected": 0.8640756607055664, "logps/chosen": -2.20188045501709, "logps/rejected": -3.731095790863037, "loss": 0.646, "nll_loss": 0.6132209300994873, "rewards/accuracies": 0.875, "rewards/chosen": -0.22018803656101227, "rewards/margins": 0.15292155742645264, "rewards/rejected": -0.3731095790863037, "step": 3589 }, { "epoch": 9.828884325804244, "grad_norm": 7.263917446136475, "learning_rate": 5.083561643835617e-07, "log_odds_chosen": 0.9486218094825745, "log_odds_ratio": -0.4816456437110901, "logits/chosen": 0.7475253343582153, "logits/rejected": 0.7361459136009216, "logps/chosen": -2.9804952144622803, "logps/rejected": -3.878131866455078, "loss": 0.7746, "nll_loss": 0.7264151573181152, "rewards/accuracies": 0.625, "rewards/chosen": -0.2980495095252991, "rewards/margins": 0.08976369351148605, "rewards/rejected": -0.3878132104873657, "step": 3590 }, { "epoch": 9.831622176591376, "grad_norm": 5.328027248382568, "learning_rate": 5.082191780821917e-07, "log_odds_chosen": 1.415748953819275, "log_odds_ratio": -0.3703150749206543, "logits/chosen": 0.9436154961585999, "logits/rejected": 0.9560373425483704, "logps/chosen": -2.286396026611328, "logps/rejected": -3.561818838119507, "loss": 0.7544, "nll_loss": 0.7173405885696411, "rewards/accuracies": 0.875, "rewards/chosen": -0.2286396026611328, "rewards/margins": 0.12754228711128235, "rewards/rejected": -0.35618191957473755, "step": 3591 }, { "epoch": 9.834360027378509, "grad_norm": 5.1851701736450195, "learning_rate": 5.080821917808219e-07, "log_odds_chosen": 2.365237236022949, "log_odds_ratio": -0.3212249279022217, "logits/chosen": 0.9897189736366272, "logits/rejected": 1.0205758810043335, "logps/chosen": -2.3630902767181396, "logps/rejected": -4.639791011810303, "loss": 0.6905, "nll_loss": 0.6583913564682007, "rewards/accuracies": 0.75, "rewards/chosen": -0.2363090217113495, "rewards/margins": 0.22767004370689392, "rewards/rejected": -0.4639790654182434, "step": 3592 }, { "epoch": 9.83709787816564, "grad_norm": 6.35830545425415, "learning_rate": 5.079452054794521e-07, "log_odds_chosen": 0.4620710015296936, "log_odds_ratio": -0.6117933988571167, "logits/chosen": 0.881077229976654, "logits/rejected": 0.8799816370010376, "logps/chosen": -2.6977956295013428, "logps/rejected": -3.094388484954834, "loss": 0.7276, "nll_loss": 0.6664252877235413, "rewards/accuracies": 0.75, "rewards/chosen": -0.2697795629501343, "rewards/margins": 0.0396592877805233, "rewards/rejected": -0.30943888425827026, "step": 3593 }, { "epoch": 9.839835728952773, "grad_norm": 4.756717205047607, "learning_rate": 5.078082191780821e-07, "log_odds_chosen": 1.3937453031539917, "log_odds_ratio": -0.2680090069770813, "logits/chosen": 1.065274953842163, "logits/rejected": 1.0852582454681396, "logps/chosen": -2.126422882080078, "logps/rejected": -3.4015588760375977, "loss": 0.7179, "nll_loss": 0.6910502910614014, "rewards/accuracies": 1.0, "rewards/chosen": -0.21264228224754333, "rewards/margins": 0.12751361727714539, "rewards/rejected": -0.3401558995246887, "step": 3594 }, { "epoch": 9.842573579739904, "grad_norm": 4.513736248016357, "learning_rate": 5.076712328767123e-07, "log_odds_chosen": 2.598261833190918, "log_odds_ratio": -0.22260093688964844, "logits/chosen": 0.9481939673423767, "logits/rejected": 1.0013693571090698, "logps/chosen": -1.6625293493270874, "logps/rejected": -4.045870304107666, "loss": 0.6298, "nll_loss": 0.6075242161750793, "rewards/accuracies": 0.875, "rewards/chosen": -0.16625294089317322, "rewards/margins": 0.23833411931991577, "rewards/rejected": -0.4045870900154114, "step": 3595 }, { "epoch": 9.845311430527037, "grad_norm": 5.830620765686035, "learning_rate": 5.075342465753425e-07, "log_odds_chosen": 0.7201187014579773, "log_odds_ratio": -0.5084619522094727, "logits/chosen": 0.8869370222091675, "logits/rejected": 0.883141040802002, "logps/chosen": -2.5413293838500977, "logps/rejected": -3.223461627960205, "loss": 0.7403, "nll_loss": 0.6894816160202026, "rewards/accuracies": 0.75, "rewards/chosen": -0.2541329264640808, "rewards/margins": 0.06821323186159134, "rewards/rejected": -0.32234615087509155, "step": 3596 }, { "epoch": 9.848049281314168, "grad_norm": 4.9280219078063965, "learning_rate": 5.073972602739726e-07, "log_odds_chosen": 1.9661312103271484, "log_odds_ratio": -0.2458513230085373, "logits/chosen": 0.8965991735458374, "logits/rejected": 0.9263657331466675, "logps/chosen": -2.146732807159424, "logps/rejected": -3.9862804412841797, "loss": 0.6867, "nll_loss": 0.6620721817016602, "rewards/accuracies": 1.0, "rewards/chosen": -0.21467328071594238, "rewards/margins": 0.18395476043224335, "rewards/rejected": -0.3986280560493469, "step": 3597 }, { "epoch": 9.850787132101301, "grad_norm": 3.9508557319641113, "learning_rate": 5.072602739726027e-07, "log_odds_chosen": 2.476461172103882, "log_odds_ratio": -0.19594450294971466, "logits/chosen": 0.830011248588562, "logits/rejected": 0.8422045707702637, "logps/chosen": -1.819823980331421, "logps/rejected": -4.1585893630981445, "loss": 0.6675, "nll_loss": 0.6479413509368896, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819823980331421, "rewards/margins": 0.2338765263557434, "rewards/rejected": -0.4158589243888855, "step": 3598 }, { "epoch": 9.853524982888432, "grad_norm": 5.104660987854004, "learning_rate": 5.071232876712328e-07, "log_odds_chosen": 1.7623095512390137, "log_odds_ratio": -0.2037617564201355, "logits/chosen": 0.7290560603141785, "logits/rejected": 0.7710254788398743, "logps/chosen": -2.136481761932373, "logps/rejected": -3.759762763977051, "loss": 0.7541, "nll_loss": 0.7337305545806885, "rewards/accuracies": 1.0, "rewards/chosen": -0.21364817023277283, "rewards/margins": 0.1623280793428421, "rewards/rejected": -0.3759762644767761, "step": 3599 }, { "epoch": 9.856262833675565, "grad_norm": 5.46980094909668, "learning_rate": 5.06986301369863e-07, "log_odds_chosen": 2.5470499992370605, "log_odds_ratio": -0.22479495406150818, "logits/chosen": 0.7767848372459412, "logits/rejected": 0.7004606127738953, "logps/chosen": -2.6175053119659424, "logps/rejected": -5.0410661697387695, "loss": 0.793, "nll_loss": 0.7705092430114746, "rewards/accuracies": 1.0, "rewards/chosen": -0.2617505192756653, "rewards/margins": 0.2423561066389084, "rewards/rejected": -0.5041066408157349, "step": 3600 }, { "epoch": 9.859000684462696, "grad_norm": 4.77255916595459, "learning_rate": 5.068493150684931e-07, "log_odds_chosen": 2.8767857551574707, "log_odds_ratio": -0.15831512212753296, "logits/chosen": 1.2199580669403076, "logits/rejected": 1.2698315382003784, "logps/chosen": -2.364981174468994, "logps/rejected": -5.123447418212891, "loss": 0.6926, "nll_loss": 0.6767370104789734, "rewards/accuracies": 1.0, "rewards/chosen": -0.23649811744689941, "rewards/margins": 0.2758466303348541, "rewards/rejected": -0.5123447775840759, "step": 3601 }, { "epoch": 9.86173853524983, "grad_norm": 4.356050491333008, "learning_rate": 5.067123287671232e-07, "log_odds_chosen": 2.2729642391204834, "log_odds_ratio": -0.19537115097045898, "logits/chosen": 0.9688981771469116, "logits/rejected": 0.9341235160827637, "logps/chosen": -1.730766773223877, "logps/rejected": -3.8318092823028564, "loss": 0.6892, "nll_loss": 0.6696482300758362, "rewards/accuracies": 1.0, "rewards/chosen": -0.17307668924331665, "rewards/margins": 0.21010424196720123, "rewards/rejected": -0.3831809461116791, "step": 3602 }, { "epoch": 9.86447638603696, "grad_norm": 4.381004333496094, "learning_rate": 5.065753424657534e-07, "log_odds_chosen": 2.2318296432495117, "log_odds_ratio": -0.18658949434757233, "logits/chosen": 0.5955836772918701, "logits/rejected": 0.5723426342010498, "logps/chosen": -2.3175320625305176, "logps/rejected": -4.431017875671387, "loss": 0.7011, "nll_loss": 0.6824178695678711, "rewards/accuracies": 1.0, "rewards/chosen": -0.23175323009490967, "rewards/margins": 0.21134856343269348, "rewards/rejected": -0.44310179352760315, "step": 3603 }, { "epoch": 9.867214236824093, "grad_norm": 5.966485977172852, "learning_rate": 5.064383561643836e-07, "log_odds_chosen": 1.5652384757995605, "log_odds_ratio": -0.6130853295326233, "logits/chosen": 0.8307812213897705, "logits/rejected": 0.8771440982818604, "logps/chosen": -2.2111823558807373, "logps/rejected": -3.645350456237793, "loss": 0.7349, "nll_loss": 0.6735929846763611, "rewards/accuracies": 0.75, "rewards/chosen": -0.2211182415485382, "rewards/margins": 0.14341680705547333, "rewards/rejected": -0.36453503370285034, "step": 3604 }, { "epoch": 9.869952087611225, "grad_norm": 6.006649971008301, "learning_rate": 5.063013698630136e-07, "log_odds_chosen": 1.2260340452194214, "log_odds_ratio": -0.3787015378475189, "logits/chosen": 0.6874339580535889, "logits/rejected": 0.6883230209350586, "logps/chosen": -2.155344009399414, "logps/rejected": -3.258603096008301, "loss": 0.6803, "nll_loss": 0.6424754858016968, "rewards/accuracies": 0.875, "rewards/chosen": -0.21553438901901245, "rewards/margins": 0.11032593250274658, "rewards/rejected": -0.32586032152175903, "step": 3605 }, { "epoch": 9.872689938398358, "grad_norm": 5.344951152801514, "learning_rate": 5.061643835616438e-07, "log_odds_chosen": 0.8652927875518799, "log_odds_ratio": -0.48866933584213257, "logits/chosen": 0.9499594569206238, "logits/rejected": 0.9714561700820923, "logps/chosen": -2.584000825881958, "logps/rejected": -3.3540449142456055, "loss": 0.9025, "nll_loss": 0.8536545038223267, "rewards/accuracies": 0.625, "rewards/chosen": -0.2584000825881958, "rewards/margins": 0.07700436562299728, "rewards/rejected": -0.3354044556617737, "step": 3606 }, { "epoch": 9.875427789185489, "grad_norm": 6.1676554679870605, "learning_rate": 5.06027397260274e-07, "log_odds_chosen": 1.395952582359314, "log_odds_ratio": -0.4928499460220337, "logits/chosen": 0.7560591101646423, "logits/rejected": 0.781989336013794, "logps/chosen": -2.4625964164733887, "logps/rejected": -3.759035110473633, "loss": 0.7239, "nll_loss": 0.6746048927307129, "rewards/accuracies": 0.75, "rewards/chosen": -0.2462596446275711, "rewards/margins": 0.12964384257793427, "rewards/rejected": -0.37590348720550537, "step": 3607 }, { "epoch": 9.878165639972622, "grad_norm": 4.477207660675049, "learning_rate": 5.05890410958904e-07, "log_odds_chosen": 2.2934906482696533, "log_odds_ratio": -0.2274448126554489, "logits/chosen": 0.9837720394134521, "logits/rejected": 1.0609744787216187, "logps/chosen": -2.2827086448669434, "logps/rejected": -4.452483177185059, "loss": 0.7228, "nll_loss": 0.7000429034233093, "rewards/accuracies": 1.0, "rewards/chosen": -0.22827087342739105, "rewards/margins": 0.21697741746902466, "rewards/rejected": -0.4452482759952545, "step": 3608 }, { "epoch": 9.880903490759753, "grad_norm": 8.24825668334961, "learning_rate": 5.057534246575342e-07, "log_odds_chosen": 1.8061106204986572, "log_odds_ratio": -0.7290700674057007, "logits/chosen": 0.6897085905075073, "logits/rejected": 0.6758050322532654, "logps/chosen": -2.788217544555664, "logps/rejected": -4.449853420257568, "loss": 0.8844, "nll_loss": 0.8115402460098267, "rewards/accuracies": 0.875, "rewards/chosen": -0.27882176637649536, "rewards/margins": 0.1661635935306549, "rewards/rejected": -0.44498535990715027, "step": 3609 }, { "epoch": 9.883641341546886, "grad_norm": 5.447396278381348, "learning_rate": 5.056164383561644e-07, "log_odds_chosen": 1.1858147382736206, "log_odds_ratio": -0.32792019844055176, "logits/chosen": 1.0123860836029053, "logits/rejected": 1.0207445621490479, "logps/chosen": -1.9338386058807373, "logps/rejected": -3.0317623615264893, "loss": 0.6856, "nll_loss": 0.6527859568595886, "rewards/accuracies": 1.0, "rewards/chosen": -0.1933838427066803, "rewards/margins": 0.10979236662387848, "rewards/rejected": -0.30317622423171997, "step": 3610 }, { "epoch": 9.886379192334019, "grad_norm": 4.151419639587402, "learning_rate": 5.054794520547944e-07, "log_odds_chosen": 1.7818121910095215, "log_odds_ratio": -0.2969132661819458, "logits/chosen": 0.8143925666809082, "logits/rejected": 0.8090898394584656, "logps/chosen": -1.8412225246429443, "logps/rejected": -3.4825403690338135, "loss": 0.7168, "nll_loss": 0.6871228218078613, "rewards/accuracies": 1.0, "rewards/chosen": -0.1841222494840622, "rewards/margins": 0.1641317903995514, "rewards/rejected": -0.34825399518013, "step": 3611 }, { "epoch": 9.88911704312115, "grad_norm": 7.825359344482422, "learning_rate": 5.053424657534246e-07, "log_odds_chosen": 0.6103731393814087, "log_odds_ratio": -0.7359903454780579, "logits/chosen": 0.8009005784988403, "logits/rejected": 0.7252486348152161, "logps/chosen": -2.309384346008301, "logps/rejected": -2.8082542419433594, "loss": 0.7201, "nll_loss": 0.6464784145355225, "rewards/accuracies": 0.875, "rewards/chosen": -0.23093843460083008, "rewards/margins": 0.04988696053624153, "rewards/rejected": -0.2808254063129425, "step": 3612 }, { "epoch": 9.891854893908281, "grad_norm": 4.620652675628662, "learning_rate": 5.052054794520547e-07, "log_odds_chosen": 2.3934648036956787, "log_odds_ratio": -0.1963721215724945, "logits/chosen": 0.7903792262077332, "logits/rejected": 0.8245280385017395, "logps/chosen": -2.256098985671997, "logps/rejected": -4.534080505371094, "loss": 0.668, "nll_loss": 0.6484091281890869, "rewards/accuracies": 0.875, "rewards/chosen": -0.2256098985671997, "rewards/margins": 0.22779811918735504, "rewards/rejected": -0.45340800285339355, "step": 3613 }, { "epoch": 9.894592744695414, "grad_norm": 4.542936325073242, "learning_rate": 5.050684931506849e-07, "log_odds_chosen": 3.2798237800598145, "log_odds_ratio": -0.14288422465324402, "logits/chosen": 1.0928676128387451, "logits/rejected": 1.1441965103149414, "logps/chosen": -1.9838893413543701, "logps/rejected": -5.127890110015869, "loss": 0.6569, "nll_loss": 0.6425837278366089, "rewards/accuracies": 1.0, "rewards/chosen": -0.198388934135437, "rewards/margins": 0.3144000768661499, "rewards/rejected": -0.5127890110015869, "step": 3614 }, { "epoch": 9.897330595482547, "grad_norm": 4.477444171905518, "learning_rate": 5.04931506849315e-07, "log_odds_chosen": 2.5189008712768555, "log_odds_ratio": -0.13162878155708313, "logits/chosen": 0.6094182133674622, "logits/rejected": 0.5465705394744873, "logps/chosen": -2.1981565952301025, "logps/rejected": -4.600730895996094, "loss": 0.6729, "nll_loss": 0.659724235534668, "rewards/accuracies": 1.0, "rewards/chosen": -0.21981565654277802, "rewards/margins": 0.24025742709636688, "rewards/rejected": -0.4600730836391449, "step": 3615 }, { "epoch": 9.900068446269678, "grad_norm": 4.916029453277588, "learning_rate": 5.047945205479451e-07, "log_odds_chosen": 1.1634405851364136, "log_odds_ratio": -0.37408459186553955, "logits/chosen": 0.628341019153595, "logits/rejected": 0.5734683871269226, "logps/chosen": -1.6734213829040527, "logps/rejected": -2.731771945953369, "loss": 0.7081, "nll_loss": 0.6706980466842651, "rewards/accuracies": 0.75, "rewards/chosen": -0.1673421412706375, "rewards/margins": 0.10583505779504776, "rewards/rejected": -0.27317720651626587, "step": 3616 }, { "epoch": 9.902806297056811, "grad_norm": 5.473836898803711, "learning_rate": 5.046575342465753e-07, "log_odds_chosen": 1.7720189094543457, "log_odds_ratio": -0.24087317287921906, "logits/chosen": 1.0648316144943237, "logits/rejected": 1.0964218378067017, "logps/chosen": -2.477473497390747, "logps/rejected": -4.161658763885498, "loss": 0.779, "nll_loss": 0.7549418807029724, "rewards/accuracies": 1.0, "rewards/chosen": -0.24774736166000366, "rewards/margins": 0.1684184968471527, "rewards/rejected": -0.416165828704834, "step": 3617 }, { "epoch": 9.905544147843942, "grad_norm": 4.421024322509766, "learning_rate": 5.045205479452054e-07, "log_odds_chosen": 3.1550116539001465, "log_odds_ratio": -0.1209561824798584, "logits/chosen": 1.0312426090240479, "logits/rejected": 1.1313247680664062, "logps/chosen": -2.239715576171875, "logps/rejected": -5.254795074462891, "loss": 0.6663, "nll_loss": 0.6542260050773621, "rewards/accuracies": 1.0, "rewards/chosen": -0.22397157549858093, "rewards/margins": 0.30150794982910156, "rewards/rejected": -0.5254795551300049, "step": 3618 }, { "epoch": 9.908281998631075, "grad_norm": 4.134166240692139, "learning_rate": 5.043835616438355e-07, "log_odds_chosen": 1.5696754455566406, "log_odds_ratio": -0.2515266239643097, "logits/chosen": 0.8640543222427368, "logits/rejected": 0.8713836073875427, "logps/chosen": -2.2665462493896484, "logps/rejected": -3.751607894897461, "loss": 0.7751, "nll_loss": 0.7499150037765503, "rewards/accuracies": 1.0, "rewards/chosen": -0.22665464878082275, "rewards/margins": 0.14850616455078125, "rewards/rejected": -0.375160813331604, "step": 3619 }, { "epoch": 9.911019849418206, "grad_norm": 3.991964817047119, "learning_rate": 5.042465753424657e-07, "log_odds_chosen": 5.409841537475586, "log_odds_ratio": -0.02694188803434372, "logits/chosen": 0.981271505355835, "logits/rejected": 1.0424530506134033, "logps/chosen": -1.8436939716339111, "logps/rejected": -7.049707889556885, "loss": 0.6612, "nll_loss": 0.6584941744804382, "rewards/accuracies": 1.0, "rewards/chosen": -0.18436940014362335, "rewards/margins": 0.5206013917922974, "rewards/rejected": -0.7049708366394043, "step": 3620 }, { "epoch": 9.91375770020534, "grad_norm": 4.768375396728516, "learning_rate": 5.041095890410959e-07, "log_odds_chosen": 1.5113054513931274, "log_odds_ratio": -0.3908888101577759, "logits/chosen": 0.6790610551834106, "logits/rejected": 0.6569011807441711, "logps/chosen": -1.9639968872070312, "logps/rejected": -3.373786687850952, "loss": 0.7232, "nll_loss": 0.6841334700584412, "rewards/accuracies": 0.75, "rewards/chosen": -0.19639968872070312, "rewards/margins": 0.14097896218299866, "rewards/rejected": -0.33737868070602417, "step": 3621 }, { "epoch": 9.91649555099247, "grad_norm": 5.0845046043396, "learning_rate": 5.039726027397259e-07, "log_odds_chosen": 0.5652371644973755, "log_odds_ratio": -0.49978530406951904, "logits/chosen": 0.7497888803482056, "logits/rejected": 0.7861689329147339, "logps/chosen": -2.892298698425293, "logps/rejected": -3.426135540008545, "loss": 0.7637, "nll_loss": 0.7136739492416382, "rewards/accuracies": 0.75, "rewards/chosen": -0.2892298996448517, "rewards/margins": 0.05338365212082863, "rewards/rejected": -0.3426135182380676, "step": 3622 }, { "epoch": 9.919233401779604, "grad_norm": 4.571034908294678, "learning_rate": 5.038356164383561e-07, "log_odds_chosen": 1.0011389255523682, "log_odds_ratio": -0.3356703817844391, "logits/chosen": 0.8283507823944092, "logits/rejected": 0.8108999729156494, "logps/chosen": -2.4589169025421143, "logps/rejected": -3.372795820236206, "loss": 0.7451, "nll_loss": 0.7115694284439087, "rewards/accuracies": 1.0, "rewards/chosen": -0.24589170515537262, "rewards/margins": 0.09138787537813187, "rewards/rejected": -0.3372795879840851, "step": 3623 }, { "epoch": 9.921971252566735, "grad_norm": 5.885838508605957, "learning_rate": 5.036986301369863e-07, "log_odds_chosen": 2.1843130588531494, "log_odds_ratio": -0.4636721909046173, "logits/chosen": 0.9896447658538818, "logits/rejected": 0.9988292455673218, "logps/chosen": -2.610239028930664, "logps/rejected": -4.711920261383057, "loss": 0.7035, "nll_loss": 0.6570900678634644, "rewards/accuracies": 0.875, "rewards/chosen": -0.2610238790512085, "rewards/margins": 0.21016816794872284, "rewards/rejected": -0.47119203209877014, "step": 3624 }, { "epoch": 9.924709103353868, "grad_norm": 4.333517074584961, "learning_rate": 5.035616438356163e-07, "log_odds_chosen": 3.1789276599884033, "log_odds_ratio": -0.18314021825790405, "logits/chosen": 0.9760097861289978, "logits/rejected": 0.9917051792144775, "logps/chosen": -2.391820192337036, "logps/rejected": -5.213307857513428, "loss": 0.677, "nll_loss": 0.6586512327194214, "rewards/accuracies": 1.0, "rewards/chosen": -0.2391820251941681, "rewards/margins": 0.2821488082408905, "rewards/rejected": -0.5213308334350586, "step": 3625 }, { "epoch": 9.927446954140999, "grad_norm": 6.1942338943481445, "learning_rate": 5.034246575342465e-07, "log_odds_chosen": 1.43532133102417, "log_odds_ratio": -0.5676718354225159, "logits/chosen": 0.936725378036499, "logits/rejected": 1.1062994003295898, "logps/chosen": -2.8548223972320557, "logps/rejected": -4.193840503692627, "loss": 0.8655, "nll_loss": 0.8087305426597595, "rewards/accuracies": 0.75, "rewards/chosen": -0.2854822278022766, "rewards/margins": 0.13390183448791504, "rewards/rejected": -0.41938409209251404, "step": 3626 }, { "epoch": 9.930184804928132, "grad_norm": 4.242059707641602, "learning_rate": 5.032876712328768e-07, "log_odds_chosen": 2.16603946685791, "log_odds_ratio": -0.2971497178077698, "logits/chosen": 0.7453578114509583, "logits/rejected": 0.7788662314414978, "logps/chosen": -2.273052453994751, "logps/rejected": -4.296229839324951, "loss": 0.6578, "nll_loss": 0.6281350255012512, "rewards/accuracies": 0.75, "rewards/chosen": -0.22730523347854614, "rewards/margins": 0.2023177444934845, "rewards/rejected": -0.42962294816970825, "step": 3627 }, { "epoch": 9.932922655715263, "grad_norm": 5.519603252410889, "learning_rate": 5.031506849315069e-07, "log_odds_chosen": 2.299987316131592, "log_odds_ratio": -0.2263704538345337, "logits/chosen": 0.8079766631126404, "logits/rejected": 0.787997305393219, "logps/chosen": -2.153749704360962, "logps/rejected": -4.307290077209473, "loss": 0.818, "nll_loss": 0.7953945398330688, "rewards/accuracies": 1.0, "rewards/chosen": -0.21537496149539948, "rewards/margins": 0.2153540849685669, "rewards/rejected": -0.4307290315628052, "step": 3628 }, { "epoch": 9.935660506502396, "grad_norm": 5.981080055236816, "learning_rate": 5.03013698630137e-07, "log_odds_chosen": 1.5643976926803589, "log_odds_ratio": -0.3996538817882538, "logits/chosen": 0.8860588669776917, "logits/rejected": 0.9153152108192444, "logps/chosen": -2.4577651023864746, "logps/rejected": -3.956956386566162, "loss": 0.8554, "nll_loss": 0.8153887987136841, "rewards/accuracies": 0.75, "rewards/chosen": -0.24577650427818298, "rewards/margins": 0.14991912245750427, "rewards/rejected": -0.39569562673568726, "step": 3629 }, { "epoch": 9.938398357289527, "grad_norm": 7.96376895904541, "learning_rate": 5.02876712328767e-07, "log_odds_chosen": 1.9986475706100464, "log_odds_ratio": -0.3607892394065857, "logits/chosen": 1.0267711877822876, "logits/rejected": 1.0395424365997314, "logps/chosen": -2.5658364295959473, "logps/rejected": -4.484126091003418, "loss": 0.9187, "nll_loss": 0.8826428651809692, "rewards/accuracies": 0.75, "rewards/chosen": -0.25658366084098816, "rewards/margins": 0.19182895123958588, "rewards/rejected": -0.44841259717941284, "step": 3630 }, { "epoch": 9.94113620807666, "grad_norm": 5.140088081359863, "learning_rate": 5.027397260273973e-07, "log_odds_chosen": 2.0836472511291504, "log_odds_ratio": -0.23598960041999817, "logits/chosen": 0.8695275783538818, "logits/rejected": 0.8722847104072571, "logps/chosen": -1.9396336078643799, "logps/rejected": -3.8484957218170166, "loss": 0.6342, "nll_loss": 0.6106301546096802, "rewards/accuracies": 1.0, "rewards/chosen": -0.19396334886550903, "rewards/margins": 0.1908862292766571, "rewards/rejected": -0.38484957814216614, "step": 3631 }, { "epoch": 9.943874058863791, "grad_norm": 4.646302223205566, "learning_rate": 5.026027397260274e-07, "log_odds_chosen": 2.7252016067504883, "log_odds_ratio": -0.2096279263496399, "logits/chosen": 0.7321416139602661, "logits/rejected": 0.7929542660713196, "logps/chosen": -1.9734634160995483, "logps/rejected": -4.506762504577637, "loss": 0.8227, "nll_loss": 0.8017089366912842, "rewards/accuracies": 1.0, "rewards/chosen": -0.19734635949134827, "rewards/margins": 0.25332990288734436, "rewards/rejected": -0.4506762623786926, "step": 3632 }, { "epoch": 9.946611909650924, "grad_norm": 5.737789154052734, "learning_rate": 5.024657534246575e-07, "log_odds_chosen": 2.403444766998291, "log_odds_ratio": -0.2532411515712738, "logits/chosen": 1.0669255256652832, "logits/rejected": 1.0482311248779297, "logps/chosen": -2.171031951904297, "logps/rejected": -4.446040153503418, "loss": 0.6982, "nll_loss": 0.6728424429893494, "rewards/accuracies": 0.875, "rewards/chosen": -0.21710318326950073, "rewards/margins": 0.2275008261203766, "rewards/rejected": -0.4446040093898773, "step": 3633 }, { "epoch": 9.949349760438055, "grad_norm": 5.210937023162842, "learning_rate": 5.023287671232877e-07, "log_odds_chosen": 1.366611123085022, "log_odds_ratio": -0.36285293102264404, "logits/chosen": 0.7230402231216431, "logits/rejected": 0.7111573219299316, "logps/chosen": -2.0572586059570312, "logps/rejected": -3.281233310699463, "loss": 0.741, "nll_loss": 0.7046669721603394, "rewards/accuracies": 0.875, "rewards/chosen": -0.20572587847709656, "rewards/margins": 0.12239749729633331, "rewards/rejected": -0.3281233608722687, "step": 3634 }, { "epoch": 9.952087611225188, "grad_norm": 5.971042633056641, "learning_rate": 5.021917808219179e-07, "log_odds_chosen": 2.782938003540039, "log_odds_ratio": -0.27230116724967957, "logits/chosen": 1.1254558563232422, "logits/rejected": 1.1514973640441895, "logps/chosen": -2.2688417434692383, "logps/rejected": -4.932814598083496, "loss": 0.6079, "nll_loss": 0.5806599855422974, "rewards/accuracies": 0.875, "rewards/chosen": -0.22688418626785278, "rewards/margins": 0.26639726758003235, "rewards/rejected": -0.49328145384788513, "step": 3635 }, { "epoch": 9.95482546201232, "grad_norm": 4.156544208526611, "learning_rate": 5.020547945205479e-07, "log_odds_chosen": 3.284010648727417, "log_odds_ratio": -0.13545174896717072, "logits/chosen": 1.011897325515747, "logits/rejected": 0.9903143644332886, "logps/chosen": -1.7732703685760498, "logps/rejected": -4.863612651824951, "loss": 0.6943, "nll_loss": 0.6807143092155457, "rewards/accuracies": 1.0, "rewards/chosen": -0.17732705175876617, "rewards/margins": 0.30903419852256775, "rewards/rejected": -0.4863612651824951, "step": 3636 }, { "epoch": 9.957563312799453, "grad_norm": 6.104689121246338, "learning_rate": 5.019178082191781e-07, "log_odds_chosen": 1.7284820079803467, "log_odds_ratio": -0.2561224102973938, "logits/chosen": 1.1287970542907715, "logits/rejected": 1.161318302154541, "logps/chosen": -3.0898985862731934, "logps/rejected": -4.740692138671875, "loss": 0.7212, "nll_loss": 0.6955723166465759, "rewards/accuracies": 0.875, "rewards/chosen": -0.30898985266685486, "rewards/margins": 0.16507938504219055, "rewards/rejected": -0.4740692377090454, "step": 3637 }, { "epoch": 9.960301163586585, "grad_norm": 5.509306907653809, "learning_rate": 5.017808219178083e-07, "log_odds_chosen": 1.1590900421142578, "log_odds_ratio": -0.37702760100364685, "logits/chosen": 0.9228813052177429, "logits/rejected": 0.914174497127533, "logps/chosen": -2.9393889904022217, "logps/rejected": -4.015583038330078, "loss": 0.7448, "nll_loss": 0.7071308493614197, "rewards/accuracies": 0.875, "rewards/chosen": -0.29393890500068665, "rewards/margins": 0.10761941224336624, "rewards/rejected": -0.4015583395957947, "step": 3638 }, { "epoch": 9.963039014373717, "grad_norm": 5.519390106201172, "learning_rate": 5.016438356164383e-07, "log_odds_chosen": 1.449872612953186, "log_odds_ratio": -0.37348473072052, "logits/chosen": 1.1119754314422607, "logits/rejected": 1.0751856565475464, "logps/chosen": -2.90006422996521, "logps/rejected": -4.3072190284729, "loss": 0.8458, "nll_loss": 0.8084056377410889, "rewards/accuracies": 0.875, "rewards/chosen": -0.2900064289569855, "rewards/margins": 0.14071547985076904, "rewards/rejected": -0.43072187900543213, "step": 3639 }, { "epoch": 9.965776865160848, "grad_norm": 4.2047624588012695, "learning_rate": 5.015068493150685e-07, "log_odds_chosen": 0.8416481018066406, "log_odds_ratio": -0.5733622312545776, "logits/chosen": 1.0091938972473145, "logits/rejected": 1.117613434791565, "logps/chosen": -2.8821206092834473, "logps/rejected": -3.679091453552246, "loss": 0.7058, "nll_loss": 0.6484715938568115, "rewards/accuracies": 0.875, "rewards/chosen": -0.2882120907306671, "rewards/margins": 0.07969707995653152, "rewards/rejected": -0.36790916323661804, "step": 3640 }, { "epoch": 9.96851471594798, "grad_norm": 5.544960975646973, "learning_rate": 5.013698630136987e-07, "log_odds_chosen": 1.3074946403503418, "log_odds_ratio": -0.30609554052352905, "logits/chosen": 0.898838996887207, "logits/rejected": 0.8668347001075745, "logps/chosen": -2.5257697105407715, "logps/rejected": -3.760007381439209, "loss": 0.7421, "nll_loss": 0.7114592790603638, "rewards/accuracies": 1.0, "rewards/chosen": -0.2525769770145416, "rewards/margins": 0.1234237551689148, "rewards/rejected": -0.3760007321834564, "step": 3641 }, { "epoch": 9.971252566735114, "grad_norm": 4.937760829925537, "learning_rate": 5.012328767123288e-07, "log_odds_chosen": 0.9620224237442017, "log_odds_ratio": -0.4707570970058441, "logits/chosen": 0.7435436248779297, "logits/rejected": 0.7025352120399475, "logps/chosen": -1.8142282962799072, "logps/rejected": -2.698333740234375, "loss": 0.6953, "nll_loss": 0.6482509970664978, "rewards/accuracies": 0.875, "rewards/chosen": -0.18142282962799072, "rewards/margins": 0.08841053396463394, "rewards/rejected": -0.26983335614204407, "step": 3642 }, { "epoch": 9.973990417522245, "grad_norm": 5.1878886222839355, "learning_rate": 5.010958904109589e-07, "log_odds_chosen": 2.235948085784912, "log_odds_ratio": -0.39944618940353394, "logits/chosen": 0.7341236472129822, "logits/rejected": 0.6641026735305786, "logps/chosen": -2.1666793823242188, "logps/rejected": -4.3205108642578125, "loss": 0.8288, "nll_loss": 0.7888540029525757, "rewards/accuracies": 0.875, "rewards/chosen": -0.21666795015335083, "rewards/margins": 0.21538317203521729, "rewards/rejected": -0.4320511221885681, "step": 3643 }, { "epoch": 9.976728268309378, "grad_norm": 5.000331401824951, "learning_rate": 5.00958904109589e-07, "log_odds_chosen": 1.3557775020599365, "log_odds_ratio": -0.37195536494255066, "logits/chosen": 0.9313479661941528, "logits/rejected": 0.9404021501541138, "logps/chosen": -2.0040054321289062, "logps/rejected": -3.2872226238250732, "loss": 0.6334, "nll_loss": 0.5962307453155518, "rewards/accuracies": 0.75, "rewards/chosen": -0.20040054619312286, "rewards/margins": 0.12832172214984894, "rewards/rejected": -0.3287222981452942, "step": 3644 }, { "epoch": 9.979466119096509, "grad_norm": 6.378353595733643, "learning_rate": 5.008219178082192e-07, "log_odds_chosen": 1.1843408346176147, "log_odds_ratio": -0.4384422302246094, "logits/chosen": 1.062553882598877, "logits/rejected": 1.0695903301239014, "logps/chosen": -2.7437148094177246, "logps/rejected": -3.8392231464385986, "loss": 0.6616, "nll_loss": 0.6177651882171631, "rewards/accuracies": 0.875, "rewards/chosen": -0.274371474981308, "rewards/margins": 0.10955087840557098, "rewards/rejected": -0.3839223384857178, "step": 3645 }, { "epoch": 9.982203969883642, "grad_norm": 4.893141746520996, "learning_rate": 5.006849315068493e-07, "log_odds_chosen": 2.1027376651763916, "log_odds_ratio": -0.3657010793685913, "logits/chosen": 0.9961185455322266, "logits/rejected": 1.0494086742401123, "logps/chosen": -2.5762884616851807, "logps/rejected": -4.605055332183838, "loss": 0.8539, "nll_loss": 0.8173419237136841, "rewards/accuracies": 0.75, "rewards/chosen": -0.257628858089447, "rewards/margins": 0.20287668704986572, "rewards/rejected": -0.46050554513931274, "step": 3646 }, { "epoch": 9.984941820670773, "grad_norm": 4.139839172363281, "learning_rate": 5.005479452054794e-07, "log_odds_chosen": 1.93943190574646, "log_odds_ratio": -0.21014530956745148, "logits/chosen": 0.5450223684310913, "logits/rejected": 0.4801725149154663, "logps/chosen": -1.6075150966644287, "logps/rejected": -3.3111376762390137, "loss": 0.699, "nll_loss": 0.6780243515968323, "rewards/accuracies": 1.0, "rewards/chosen": -0.16075152158737183, "rewards/margins": 0.17036227881908417, "rewards/rejected": -0.3311137855052948, "step": 3647 }, { "epoch": 9.987679671457906, "grad_norm": 5.2132391929626465, "learning_rate": 5.004109589041096e-07, "log_odds_chosen": 2.9142165184020996, "log_odds_ratio": -0.3364824056625366, "logits/chosen": 0.9398481845855713, "logits/rejected": 0.931708812713623, "logps/chosen": -2.7777881622314453, "logps/rejected": -5.572427272796631, "loss": 0.7624, "nll_loss": 0.7287123799324036, "rewards/accuracies": 0.75, "rewards/chosen": -0.2777788043022156, "rewards/margins": 0.27946388721466064, "rewards/rejected": -0.557242751121521, "step": 3648 }, { "epoch": 9.990417522245037, "grad_norm": 4.18620491027832, "learning_rate": 5.002739726027398e-07, "log_odds_chosen": 2.0723230838775635, "log_odds_ratio": -0.2274128496646881, "logits/chosen": 0.7567412257194519, "logits/rejected": 0.7069038152694702, "logps/chosen": -2.087130308151245, "logps/rejected": -4.030157566070557, "loss": 0.6941, "nll_loss": 0.6713961362838745, "rewards/accuracies": 1.0, "rewards/chosen": -0.20871305465698242, "rewards/margins": 0.1943027377128601, "rewards/rejected": -0.40301579236984253, "step": 3649 }, { "epoch": 9.99315537303217, "grad_norm": 6.038588523864746, "learning_rate": 5.001369863013698e-07, "log_odds_chosen": 1.8521101474761963, "log_odds_ratio": -0.2096797525882721, "logits/chosen": 1.2131788730621338, "logits/rejected": 1.2098454236984253, "logps/chosen": -2.949277639389038, "logps/rejected": -4.696034908294678, "loss": 0.7168, "nll_loss": 0.6958025097846985, "rewards/accuracies": 0.875, "rewards/chosen": -0.29492777585983276, "rewards/margins": 0.17467573285102844, "rewards/rejected": -0.4696035385131836, "step": 3650 }, { "epoch": 9.995893223819301, "grad_norm": 4.8909783363342285, "learning_rate": 5e-07, "log_odds_chosen": 3.547705888748169, "log_odds_ratio": -0.17080079019069672, "logits/chosen": 0.9058792591094971, "logits/rejected": 0.8759938478469849, "logps/chosen": -2.2667293548583984, "logps/rejected": -5.691409587860107, "loss": 0.7285, "nll_loss": 0.711370050907135, "rewards/accuracies": 1.0, "rewards/chosen": -0.2266729325056076, "rewards/margins": 0.3424680233001709, "rewards/rejected": -0.5691409111022949, "step": 3651 }, { "epoch": 9.998631074606434, "grad_norm": 5.342024326324463, "learning_rate": 4.998630136986301e-07, "log_odds_chosen": 2.6426708698272705, "log_odds_ratio": -0.2546183466911316, "logits/chosen": 0.9703652858734131, "logits/rejected": 1.0624158382415771, "logps/chosen": -2.3404459953308105, "logps/rejected": -4.891003608703613, "loss": 0.6771, "nll_loss": 0.6516380310058594, "rewards/accuracies": 1.0, "rewards/chosen": -0.2340446263551712, "rewards/margins": 0.2550557255744934, "rewards/rejected": -0.4891003668308258, "step": 3652 }, { "epoch": 10.001368925393566, "grad_norm": 4.430479526519775, "learning_rate": 4.997260273972603e-07, "log_odds_chosen": 1.7636390924453735, "log_odds_ratio": -0.2793947458267212, "logits/chosen": 0.7900823950767517, "logits/rejected": 0.8096002340316772, "logps/chosen": -2.3489267826080322, "logps/rejected": -3.9783120155334473, "loss": 0.6644, "nll_loss": 0.6364451050758362, "rewards/accuracies": 0.875, "rewards/chosen": -0.23489268124103546, "rewards/margins": 0.16293853521347046, "rewards/rejected": -0.3978312611579895, "step": 3653 }, { "epoch": 10.004106776180699, "grad_norm": 4.97391414642334, "learning_rate": 4.995890410958904e-07, "log_odds_chosen": 0.9218701720237732, "log_odds_ratio": -0.3700675964355469, "logits/chosen": 0.7425560355186462, "logits/rejected": 0.7560471892356873, "logps/chosen": -2.6340489387512207, "logps/rejected": -3.474113702774048, "loss": 0.8142, "nll_loss": 0.7771525382995605, "rewards/accuracies": 1.0, "rewards/chosen": -0.263404905796051, "rewards/margins": 0.08400648832321167, "rewards/rejected": -0.3474113941192627, "step": 3654 }, { "epoch": 10.00684462696783, "grad_norm": 4.923047065734863, "learning_rate": 4.994520547945205e-07, "log_odds_chosen": 2.848437786102295, "log_odds_ratio": -0.20749789476394653, "logits/chosen": 0.8767062425613403, "logits/rejected": 0.8304933309555054, "logps/chosen": -2.0621142387390137, "logps/rejected": -4.801640033721924, "loss": 0.7955, "nll_loss": 0.7747710943222046, "rewards/accuracies": 1.0, "rewards/chosen": -0.20621143281459808, "rewards/margins": 0.27395254373550415, "rewards/rejected": -0.4801639914512634, "step": 3655 }, { "epoch": 10.009582477754963, "grad_norm": 5.215141296386719, "learning_rate": 4.993150684931507e-07, "log_odds_chosen": 1.2770987749099731, "log_odds_ratio": -0.4533606469631195, "logits/chosen": 0.8002275228500366, "logits/rejected": 0.7391672730445862, "logps/chosen": -1.755784273147583, "logps/rejected": -2.9522132873535156, "loss": 0.7216, "nll_loss": 0.6762985587120056, "rewards/accuracies": 0.75, "rewards/chosen": -0.17557844519615173, "rewards/margins": 0.11964291334152222, "rewards/rejected": -0.29522132873535156, "step": 3656 }, { "epoch": 10.012320328542094, "grad_norm": 5.578174114227295, "learning_rate": 4.991780821917808e-07, "log_odds_chosen": 1.320326328277588, "log_odds_ratio": -0.43890392780303955, "logits/chosen": 1.090248942375183, "logits/rejected": 1.1699016094207764, "logps/chosen": -2.396519899368286, "logps/rejected": -3.6876144409179688, "loss": 0.7338, "nll_loss": 0.6898728609085083, "rewards/accuracies": 0.625, "rewards/chosen": -0.23965200781822205, "rewards/margins": 0.12910941243171692, "rewards/rejected": -0.36876142024993896, "step": 3657 }, { "epoch": 10.015058179329227, "grad_norm": 4.240400791168213, "learning_rate": 4.990410958904109e-07, "log_odds_chosen": 2.9813809394836426, "log_odds_ratio": -0.16534565389156342, "logits/chosen": 0.9543279409408569, "logits/rejected": 1.0232386589050293, "logps/chosen": -2.310378313064575, "logps/rejected": -5.1865668296813965, "loss": 0.7108, "nll_loss": 0.6942962408065796, "rewards/accuracies": 0.875, "rewards/chosen": -0.23103785514831543, "rewards/margins": 0.28761887550354004, "rewards/rejected": -0.5186566710472107, "step": 3658 }, { "epoch": 10.017796030116358, "grad_norm": 4.300406455993652, "learning_rate": 4.989041095890411e-07, "log_odds_chosen": 1.8679226636886597, "log_odds_ratio": -0.228252112865448, "logits/chosen": 0.693529486656189, "logits/rejected": 0.6900734305381775, "logps/chosen": -2.1673383712768555, "logps/rejected": -3.872920513153076, "loss": 0.7036, "nll_loss": 0.6807287931442261, "rewards/accuracies": 1.0, "rewards/chosen": -0.21673382818698883, "rewards/margins": 0.17055821418762207, "rewards/rejected": -0.3872920572757721, "step": 3659 }, { "epoch": 10.020533880903491, "grad_norm": 5.669908046722412, "learning_rate": 4.987671232876712e-07, "log_odds_chosen": 1.554955005645752, "log_odds_ratio": -0.34749481081962585, "logits/chosen": 0.8308933973312378, "logits/rejected": 0.8514169454574585, "logps/chosen": -2.285536289215088, "logps/rejected": -3.7128655910491943, "loss": 0.6799, "nll_loss": 0.6451084613800049, "rewards/accuracies": 0.875, "rewards/chosen": -0.2285536378622055, "rewards/margins": 0.14273294806480408, "rewards/rejected": -0.3712865710258484, "step": 3660 }, { "epoch": 10.023271731690622, "grad_norm": 6.981442928314209, "learning_rate": 4.986301369863014e-07, "log_odds_chosen": 0.7426163554191589, "log_odds_ratio": -0.4527955949306488, "logits/chosen": 0.7483413219451904, "logits/rejected": 0.708419919013977, "logps/chosen": -2.575561761856079, "logps/rejected": -3.2680444717407227, "loss": 0.7138, "nll_loss": 0.6685032248497009, "rewards/accuracies": 0.875, "rewards/chosen": -0.2575562000274658, "rewards/margins": 0.06924828886985779, "rewards/rejected": -0.3268044590950012, "step": 3661 }, { "epoch": 10.026009582477755, "grad_norm": 4.309367656707764, "learning_rate": 4.984931506849315e-07, "log_odds_chosen": 2.463268280029297, "log_odds_ratio": -0.16784930229187012, "logits/chosen": 0.7489099502563477, "logits/rejected": 0.7490987777709961, "logps/chosen": -1.7675994634628296, "logps/rejected": -4.053732872009277, "loss": 0.6828, "nll_loss": 0.6659990549087524, "rewards/accuracies": 1.0, "rewards/chosen": -0.17675994336605072, "rewards/margins": 0.22861336171627045, "rewards/rejected": -0.4053732752799988, "step": 3662 }, { "epoch": 10.028747433264886, "grad_norm": 4.2112717628479, "learning_rate": 4.983561643835616e-07, "log_odds_chosen": 2.007866382598877, "log_odds_ratio": -0.33107349276542664, "logits/chosen": 0.7421226501464844, "logits/rejected": 0.7574119567871094, "logps/chosen": -2.330739974975586, "logps/rejected": -4.232301712036133, "loss": 0.7536, "nll_loss": 0.7204800844192505, "rewards/accuracies": 0.875, "rewards/chosen": -0.23307400941848755, "rewards/margins": 0.19015619158744812, "rewards/rejected": -0.4232301712036133, "step": 3663 }, { "epoch": 10.03148528405202, "grad_norm": 5.565054893493652, "learning_rate": 4.982191780821918e-07, "log_odds_chosen": 0.946304976940155, "log_odds_ratio": -0.49280112981796265, "logits/chosen": 0.7067759037017822, "logits/rejected": 0.7020429372787476, "logps/chosen": -2.44290828704834, "logps/rejected": -3.306786298751831, "loss": 0.6987, "nll_loss": 0.6493725776672363, "rewards/accuracies": 0.75, "rewards/chosen": -0.24429082870483398, "rewards/margins": 0.08638777583837509, "rewards/rejected": -0.3306786119937897, "step": 3664 }, { "epoch": 10.03422313483915, "grad_norm": 5.660787582397461, "learning_rate": 4.980821917808219e-07, "log_odds_chosen": 1.0031452178955078, "log_odds_ratio": -0.48276257514953613, "logits/chosen": 0.8800669312477112, "logits/rejected": 0.8801312446594238, "logps/chosen": -2.3326170444488525, "logps/rejected": -3.2847352027893066, "loss": 0.7744, "nll_loss": 0.7261489033699036, "rewards/accuracies": 0.75, "rewards/chosen": -0.23326170444488525, "rewards/margins": 0.09521180391311646, "rewards/rejected": -0.3284735083580017, "step": 3665 }, { "epoch": 10.036960985626283, "grad_norm": 4.35299015045166, "learning_rate": 4.97945205479452e-07, "log_odds_chosen": 2.701211929321289, "log_odds_ratio": -0.23494966328144073, "logits/chosen": 0.9557809829711914, "logits/rejected": 0.9845112562179565, "logps/chosen": -2.579817771911621, "logps/rejected": -5.215815544128418, "loss": 0.7135, "nll_loss": 0.69004887342453, "rewards/accuracies": 0.875, "rewards/chosen": -0.2579817771911621, "rewards/margins": 0.26359978318214417, "rewards/rejected": -0.5215815901756287, "step": 3666 }, { "epoch": 10.039698836413416, "grad_norm": 5.637401580810547, "learning_rate": 4.978082191780822e-07, "log_odds_chosen": 1.5570300817489624, "log_odds_ratio": -0.2801624834537506, "logits/chosen": 0.761488139629364, "logits/rejected": 0.7386742234230042, "logps/chosen": -2.1100354194641113, "logps/rejected": -3.514474630355835, "loss": 0.6701, "nll_loss": 0.6420769095420837, "rewards/accuracies": 0.875, "rewards/chosen": -0.21100354194641113, "rewards/margins": 0.14044392108917236, "rewards/rejected": -0.3514474630355835, "step": 3667 }, { "epoch": 10.042436687200547, "grad_norm": 4.420457363128662, "learning_rate": 4.976712328767123e-07, "log_odds_chosen": 2.077493190765381, "log_odds_ratio": -0.26714885234832764, "logits/chosen": 0.7706039547920227, "logits/rejected": 0.851199746131897, "logps/chosen": -2.065739631652832, "logps/rejected": -4.028825283050537, "loss": 0.8467, "nll_loss": 0.8199496269226074, "rewards/accuracies": 1.0, "rewards/chosen": -0.2065739631652832, "rewards/margins": 0.19630852341651917, "rewards/rejected": -0.40288251638412476, "step": 3668 }, { "epoch": 10.04517453798768, "grad_norm": 4.817883014678955, "learning_rate": 4.975342465753424e-07, "log_odds_chosen": 2.0546627044677734, "log_odds_ratio": -0.2019422948360443, "logits/chosen": 0.8084426522254944, "logits/rejected": 0.7772045731544495, "logps/chosen": -2.0742030143737793, "logps/rejected": -4.01814079284668, "loss": 0.6806, "nll_loss": 0.6604043245315552, "rewards/accuracies": 1.0, "rewards/chosen": -0.20742028951644897, "rewards/margins": 0.1943938285112381, "rewards/rejected": -0.4018141031265259, "step": 3669 }, { "epoch": 10.047912388774812, "grad_norm": 4.605879306793213, "learning_rate": 4.973972602739726e-07, "log_odds_chosen": 1.758540391921997, "log_odds_ratio": -0.24019378423690796, "logits/chosen": 0.7458745241165161, "logits/rejected": 0.7264354228973389, "logps/chosen": -2.026273012161255, "logps/rejected": -3.650278329849243, "loss": 0.695, "nll_loss": 0.6709338426589966, "rewards/accuracies": 1.0, "rewards/chosen": -0.2026273012161255, "rewards/margins": 0.16240055859088898, "rewards/rejected": -0.36502787470817566, "step": 3670 }, { "epoch": 10.050650239561945, "grad_norm": 4.09990119934082, "learning_rate": 4.972602739726027e-07, "log_odds_chosen": 2.961611747741699, "log_odds_ratio": -0.12235035002231598, "logits/chosen": 0.8470162749290466, "logits/rejected": 0.9099401235580444, "logps/chosen": -1.8831712007522583, "logps/rejected": -4.65529727935791, "loss": 0.6582, "nll_loss": 0.64598149061203, "rewards/accuracies": 1.0, "rewards/chosen": -0.18831712007522583, "rewards/margins": 0.27721261978149414, "rewards/rejected": -0.46552973985671997, "step": 3671 }, { "epoch": 10.053388090349076, "grad_norm": 6.730235576629639, "learning_rate": 4.971232876712328e-07, "log_odds_chosen": 2.511051893234253, "log_odds_ratio": -0.4867221415042877, "logits/chosen": 0.9412635564804077, "logits/rejected": 0.9363421201705933, "logps/chosen": -2.81816029548645, "logps/rejected": -5.229017734527588, "loss": 0.8011, "nll_loss": 0.752426028251648, "rewards/accuracies": 0.875, "rewards/chosen": -0.2818160057067871, "rewards/margins": 0.24108579754829407, "rewards/rejected": -0.5229018330574036, "step": 3672 }, { "epoch": 10.056125941136209, "grad_norm": 7.635586261749268, "learning_rate": 4.96986301369863e-07, "log_odds_chosen": 0.8898366093635559, "log_odds_ratio": -0.7107155323028564, "logits/chosen": 0.8508793711662292, "logits/rejected": 0.8925079107284546, "logps/chosen": -3.140084743499756, "logps/rejected": -3.939540147781372, "loss": 0.752, "nll_loss": 0.6809669137001038, "rewards/accuracies": 0.875, "rewards/chosen": -0.3140084743499756, "rewards/margins": 0.07994554936885834, "rewards/rejected": -0.3939540386199951, "step": 3673 }, { "epoch": 10.05886379192334, "grad_norm": 5.457138538360596, "learning_rate": 4.968493150684931e-07, "log_odds_chosen": 2.3637380599975586, "log_odds_ratio": -0.24489927291870117, "logits/chosen": 0.8165718913078308, "logits/rejected": 0.859287440776825, "logps/chosen": -2.2208991050720215, "logps/rejected": -4.401820182800293, "loss": 0.774, "nll_loss": 0.7494862079620361, "rewards/accuracies": 0.875, "rewards/chosen": -0.22208991646766663, "rewards/margins": 0.21809212863445282, "rewards/rejected": -0.44018206000328064, "step": 3674 }, { "epoch": 10.061601642710473, "grad_norm": 4.965764045715332, "learning_rate": 4.967123287671233e-07, "log_odds_chosen": 1.7198578119277954, "log_odds_ratio": -0.2968671917915344, "logits/chosen": 0.8894996047019958, "logits/rejected": 0.8893308639526367, "logps/chosen": -1.9173338413238525, "logps/rejected": -3.516986846923828, "loss": 0.7523, "nll_loss": 0.7225792407989502, "rewards/accuracies": 1.0, "rewards/chosen": -0.19173339009284973, "rewards/margins": 0.15996527671813965, "rewards/rejected": -0.35169869661331177, "step": 3675 }, { "epoch": 10.064339493497604, "grad_norm": 5.961050987243652, "learning_rate": 4.965753424657534e-07, "log_odds_chosen": 1.9694674015045166, "log_odds_ratio": -0.3792194724082947, "logits/chosen": 0.940951406955719, "logits/rejected": 0.9488141536712646, "logps/chosen": -2.912302255630493, "logps/rejected": -4.779094696044922, "loss": 0.8057, "nll_loss": 0.7677760124206543, "rewards/accuracies": 0.875, "rewards/chosen": -0.2912302315235138, "rewards/margins": 0.18667925894260406, "rewards/rejected": -0.47790947556495667, "step": 3676 }, { "epoch": 10.067077344284737, "grad_norm": 7.533095359802246, "learning_rate": 4.964383561643836e-07, "log_odds_chosen": 0.8303805589675903, "log_odds_ratio": -0.6636984348297119, "logits/chosen": 0.8588083386421204, "logits/rejected": 0.8545770645141602, "logps/chosen": -2.816540479660034, "logps/rejected": -3.605771541595459, "loss": 0.7897, "nll_loss": 0.7233180999755859, "rewards/accuracies": 0.875, "rewards/chosen": -0.2816540598869324, "rewards/margins": 0.07892307639122009, "rewards/rejected": -0.36057713627815247, "step": 3677 }, { "epoch": 10.069815195071868, "grad_norm": 5.9895920753479, "learning_rate": 4.963013698630137e-07, "log_odds_chosen": 1.1709129810333252, "log_odds_ratio": -0.5560488700866699, "logits/chosen": 0.7427173852920532, "logits/rejected": 0.7325292229652405, "logps/chosen": -2.3600564002990723, "logps/rejected": -3.450974464416504, "loss": 0.8521, "nll_loss": 0.7965092062950134, "rewards/accuracies": 0.875, "rewards/chosen": -0.23600564897060394, "rewards/margins": 0.10909183323383331, "rewards/rejected": -0.34509748220443726, "step": 3678 }, { "epoch": 10.072553045859001, "grad_norm": 4.611931324005127, "learning_rate": 4.961643835616438e-07, "log_odds_chosen": 2.437870979309082, "log_odds_ratio": -0.2592763304710388, "logits/chosen": 0.5793566703796387, "logits/rejected": 0.6055324077606201, "logps/chosen": -2.3611550331115723, "logps/rejected": -4.683004856109619, "loss": 0.7077, "nll_loss": 0.6817858219146729, "rewards/accuracies": 0.875, "rewards/chosen": -0.23611551523208618, "rewards/margins": 0.2321849912405014, "rewards/rejected": -0.4683005213737488, "step": 3679 }, { "epoch": 10.075290896646132, "grad_norm": 5.559725761413574, "learning_rate": 4.960273972602739e-07, "log_odds_chosen": 1.5219342708587646, "log_odds_ratio": -0.27149486541748047, "logits/chosen": 0.7843879461288452, "logits/rejected": 0.7524287104606628, "logps/chosen": -2.0379278659820557, "logps/rejected": -3.4238085746765137, "loss": 0.6912, "nll_loss": 0.6640847325325012, "rewards/accuracies": 1.0, "rewards/chosen": -0.2037927657365799, "rewards/margins": 0.1385880708694458, "rewards/rejected": -0.3423808515071869, "step": 3680 }, { "epoch": 10.078028747433265, "grad_norm": 7.77123498916626, "learning_rate": 4.958904109589041e-07, "log_odds_chosen": 0.42535969614982605, "log_odds_ratio": -0.6603199243545532, "logits/chosen": 0.8431463241577148, "logits/rejected": 0.9120190739631653, "logps/chosen": -3.0922417640686035, "logps/rejected": -3.4765076637268066, "loss": 0.7427, "nll_loss": 0.6766292452812195, "rewards/accuracies": 0.625, "rewards/chosen": -0.3092241883277893, "rewards/margins": 0.038426581770181656, "rewards/rejected": -0.34765076637268066, "step": 3681 }, { "epoch": 10.080766598220396, "grad_norm": 6.864396572113037, "learning_rate": 4.957534246575342e-07, "log_odds_chosen": 0.8060034513473511, "log_odds_ratio": -0.6762511730194092, "logits/chosen": 0.8591148853302002, "logits/rejected": 0.8180896043777466, "logps/chosen": -2.872729778289795, "logps/rejected": -3.5454230308532715, "loss": 0.7372, "nll_loss": 0.6696172952651978, "rewards/accuracies": 0.875, "rewards/chosen": -0.28727295994758606, "rewards/margins": 0.06726934015750885, "rewards/rejected": -0.3545423150062561, "step": 3682 }, { "epoch": 10.08350444900753, "grad_norm": 5.000326156616211, "learning_rate": 4.956164383561643e-07, "log_odds_chosen": 2.5680415630340576, "log_odds_ratio": -0.439203679561615, "logits/chosen": 0.7829256057739258, "logits/rejected": 0.8015552759170532, "logps/chosen": -2.1502466201782227, "logps/rejected": -4.545058250427246, "loss": 0.8178, "nll_loss": 0.7738921642303467, "rewards/accuracies": 0.75, "rewards/chosen": -0.2150246500968933, "rewards/margins": 0.23948118090629578, "rewards/rejected": -0.4545058608055115, "step": 3683 }, { "epoch": 10.08624229979466, "grad_norm": 8.308716773986816, "learning_rate": 4.954794520547945e-07, "log_odds_chosen": 1.197162389755249, "log_odds_ratio": -0.7014369368553162, "logits/chosen": 0.9151067733764648, "logits/rejected": 0.9536265730857849, "logps/chosen": -3.33591890335083, "logps/rejected": -4.430528163909912, "loss": 0.8364, "nll_loss": 0.7662885189056396, "rewards/accuracies": 0.625, "rewards/chosen": -0.33359187841415405, "rewards/margins": 0.10946093499660492, "rewards/rejected": -0.44305282831192017, "step": 3684 }, { "epoch": 10.088980150581794, "grad_norm": 4.6426191329956055, "learning_rate": 4.953424657534246e-07, "log_odds_chosen": 1.0395976305007935, "log_odds_ratio": -0.4144940972328186, "logits/chosen": 0.7126795053482056, "logits/rejected": 0.7494626641273499, "logps/chosen": -2.2997660636901855, "logps/rejected": -3.290800094604492, "loss": 0.648, "nll_loss": 0.6065528392791748, "rewards/accuracies": 0.875, "rewards/chosen": -0.2299765944480896, "rewards/margins": 0.0991034284234047, "rewards/rejected": -0.3290800452232361, "step": 3685 }, { "epoch": 10.091718001368925, "grad_norm": 5.5002055168151855, "learning_rate": 4.952054794520547e-07, "log_odds_chosen": 1.6735038757324219, "log_odds_ratio": -0.4802098870277405, "logits/chosen": 0.7246778011322021, "logits/rejected": 0.7435590028762817, "logps/chosen": -2.745607852935791, "logps/rejected": -4.333302974700928, "loss": 0.8082, "nll_loss": 0.76018887758255, "rewards/accuracies": 0.875, "rewards/chosen": -0.2745607793331146, "rewards/margins": 0.15876951813697815, "rewards/rejected": -0.4333302974700928, "step": 3686 }, { "epoch": 10.094455852156058, "grad_norm": 4.013460159301758, "learning_rate": 4.950684931506849e-07, "log_odds_chosen": 2.5804457664489746, "log_odds_ratio": -0.4470250904560089, "logits/chosen": 0.6389808654785156, "logits/rejected": 0.7351216077804565, "logps/chosen": -2.741114616394043, "logps/rejected": -5.209751129150391, "loss": 0.8067, "nll_loss": 0.7619538307189941, "rewards/accuracies": 0.75, "rewards/chosen": -0.27411144971847534, "rewards/margins": 0.24686366319656372, "rewards/rejected": -0.5209751129150391, "step": 3687 }, { "epoch": 10.097193702943189, "grad_norm": 5.086091041564941, "learning_rate": 4.94931506849315e-07, "log_odds_chosen": 2.2894210815429688, "log_odds_ratio": -0.29714345932006836, "logits/chosen": 0.9368981122970581, "logits/rejected": 0.9148316979408264, "logps/chosen": -1.9813451766967773, "logps/rejected": -4.107639312744141, "loss": 0.6747, "nll_loss": 0.6450186371803284, "rewards/accuracies": 0.875, "rewards/chosen": -0.19813451170921326, "rewards/margins": 0.21262946724891663, "rewards/rejected": -0.4107639789581299, "step": 3688 }, { "epoch": 10.099931553730322, "grad_norm": 5.510601043701172, "learning_rate": 4.947945205479452e-07, "log_odds_chosen": 1.3629621267318726, "log_odds_ratio": -0.36267971992492676, "logits/chosen": 0.7161937355995178, "logits/rejected": 0.7018722891807556, "logps/chosen": -2.6080427169799805, "logps/rejected": -3.8360514640808105, "loss": 0.7498, "nll_loss": 0.7135210633277893, "rewards/accuracies": 0.75, "rewards/chosen": -0.26080429553985596, "rewards/margins": 0.12280088663101196, "rewards/rejected": -0.38360515236854553, "step": 3689 }, { "epoch": 10.102669404517453, "grad_norm": 6.257837295532227, "learning_rate": 4.946575342465753e-07, "log_odds_chosen": 3.4021265506744385, "log_odds_ratio": -0.07370907068252563, "logits/chosen": 1.2097247838974, "logits/rejected": 1.299738883972168, "logps/chosen": -2.267871618270874, "logps/rejected": -5.564457893371582, "loss": 0.6512, "nll_loss": 0.6438374519348145, "rewards/accuracies": 1.0, "rewards/chosen": -0.22678716480731964, "rewards/margins": 0.3296586275100708, "rewards/rejected": -0.556445837020874, "step": 3690 }, { "epoch": 10.105407255304586, "grad_norm": 6.99563455581665, "learning_rate": 4.945205479452055e-07, "log_odds_chosen": 2.133922815322876, "log_odds_ratio": -0.38181087374687195, "logits/chosen": 1.072553277015686, "logits/rejected": 1.0534790754318237, "logps/chosen": -2.555765151977539, "logps/rejected": -4.531808376312256, "loss": 0.6971, "nll_loss": 0.6589003801345825, "rewards/accuracies": 0.75, "rewards/chosen": -0.2555765211582184, "rewards/margins": 0.19760432839393616, "rewards/rejected": -0.45318084955215454, "step": 3691 }, { "epoch": 10.108145106091717, "grad_norm": 4.442605018615723, "learning_rate": 4.943835616438356e-07, "log_odds_chosen": 1.3318859338760376, "log_odds_ratio": -0.30483493208885193, "logits/chosen": 0.9224996566772461, "logits/rejected": 0.9946157336235046, "logps/chosen": -2.2586684226989746, "logps/rejected": -3.5098283290863037, "loss": 0.6832, "nll_loss": 0.6527318358421326, "rewards/accuracies": 1.0, "rewards/chosen": -0.22586682438850403, "rewards/margins": 0.1251160055398941, "rewards/rejected": -0.3509828448295593, "step": 3692 }, { "epoch": 10.11088295687885, "grad_norm": 5.299724578857422, "learning_rate": 4.942465753424657e-07, "log_odds_chosen": 1.171892523765564, "log_odds_ratio": -0.295894980430603, "logits/chosen": 0.8023834228515625, "logits/rejected": 0.7104644179344177, "logps/chosen": -2.035980224609375, "logps/rejected": -3.067885637283325, "loss": 0.7564, "nll_loss": 0.7267728447914124, "rewards/accuracies": 1.0, "rewards/chosen": -0.2035980224609375, "rewards/margins": 0.10319057106971741, "rewards/rejected": -0.3067885935306549, "step": 3693 }, { "epoch": 10.113620807665983, "grad_norm": 5.525860786437988, "learning_rate": 4.941095890410958e-07, "log_odds_chosen": 1.1893774271011353, "log_odds_ratio": -0.35776740312576294, "logits/chosen": 0.9491981267929077, "logits/rejected": 0.9163594245910645, "logps/chosen": -2.598540782928467, "logps/rejected": -3.719294548034668, "loss": 0.7261, "nll_loss": 0.6903223395347595, "rewards/accuracies": 1.0, "rewards/chosen": -0.25985410809516907, "rewards/margins": 0.11207538098096848, "rewards/rejected": -0.37192946672439575, "step": 3694 }, { "epoch": 10.116358658453114, "grad_norm": 5.211545944213867, "learning_rate": 4.93972602739726e-07, "log_odds_chosen": 2.3611717224121094, "log_odds_ratio": -0.30480968952178955, "logits/chosen": 0.9516257643699646, "logits/rejected": 0.9873705506324768, "logps/chosen": -2.5310630798339844, "logps/rejected": -4.8384809494018555, "loss": 0.713, "nll_loss": 0.6825020909309387, "rewards/accuracies": 0.875, "rewards/chosen": -0.25310632586479187, "rewards/margins": 0.23074179887771606, "rewards/rejected": -0.48384809494018555, "step": 3695 }, { "epoch": 10.119096509240247, "grad_norm": 8.411258697509766, "learning_rate": 4.938356164383561e-07, "log_odds_chosen": 1.3974274396896362, "log_odds_ratio": -0.5421664714813232, "logits/chosen": 0.8344700932502747, "logits/rejected": 0.7953855991363525, "logps/chosen": -2.2486400604248047, "logps/rejected": -3.50777006149292, "loss": 0.8301, "nll_loss": 0.7758727073669434, "rewards/accuracies": 0.875, "rewards/chosen": -0.22486399114131927, "rewards/margins": 0.12591299414634705, "rewards/rejected": -0.3507769703865051, "step": 3696 }, { "epoch": 10.121834360027378, "grad_norm": 4.800044059753418, "learning_rate": 4.936986301369862e-07, "log_odds_chosen": 1.978428602218628, "log_odds_ratio": -0.44563230872154236, "logits/chosen": 0.849740743637085, "logits/rejected": 0.87557053565979, "logps/chosen": -2.5677237510681152, "logps/rejected": -4.446052551269531, "loss": 0.698, "nll_loss": 0.6534399390220642, "rewards/accuracies": 0.875, "rewards/chosen": -0.25677239894866943, "rewards/margins": 0.1878328174352646, "rewards/rejected": -0.4446052312850952, "step": 3697 }, { "epoch": 10.124572210814511, "grad_norm": 5.82178258895874, "learning_rate": 4.935616438356164e-07, "log_odds_chosen": 1.5892107486724854, "log_odds_ratio": -0.3469797372817993, "logits/chosen": 0.8789379596710205, "logits/rejected": 0.920931339263916, "logps/chosen": -2.4040284156799316, "logps/rejected": -3.9368791580200195, "loss": 0.7979, "nll_loss": 0.7632105350494385, "rewards/accuracies": 0.875, "rewards/chosen": -0.24040284752845764, "rewards/margins": 0.15328507125377655, "rewards/rejected": -0.393687903881073, "step": 3698 }, { "epoch": 10.127310061601642, "grad_norm": 4.40275764465332, "learning_rate": 4.934246575342465e-07, "log_odds_chosen": 1.6390293836593628, "log_odds_ratio": -0.2790845036506653, "logits/chosen": 0.8155189752578735, "logits/rejected": 0.8286764621734619, "logps/chosen": -1.8594942092895508, "logps/rejected": -3.3746776580810547, "loss": 0.7439, "nll_loss": 0.7160147428512573, "rewards/accuracies": 1.0, "rewards/chosen": -0.1859494149684906, "rewards/margins": 0.15151837468147278, "rewards/rejected": -0.3374677896499634, "step": 3699 }, { "epoch": 10.130047912388775, "grad_norm": 6.024715423583984, "learning_rate": 4.932876712328766e-07, "log_odds_chosen": 2.4584357738494873, "log_odds_ratio": -0.21954165399074554, "logits/chosen": 1.0917810201644897, "logits/rejected": 1.1369574069976807, "logps/chosen": -2.4663257598876953, "logps/rejected": -4.8413567543029785, "loss": 0.709, "nll_loss": 0.6870532631874084, "rewards/accuracies": 0.75, "rewards/chosen": -0.24663257598876953, "rewards/margins": 0.23750312626361847, "rewards/rejected": -0.4841357171535492, "step": 3700 }, { "epoch": 10.132785763175907, "grad_norm": 4.006245136260986, "learning_rate": 4.931506849315068e-07, "log_odds_chosen": 4.773874282836914, "log_odds_ratio": -0.06193017587065697, "logits/chosen": 0.7298635244369507, "logits/rejected": 0.6692748069763184, "logps/chosen": -2.4512243270874023, "logps/rejected": -7.078202247619629, "loss": 0.8518, "nll_loss": 0.8456545472145081, "rewards/accuracies": 1.0, "rewards/chosen": -0.24512243270874023, "rewards/margins": 0.462697833776474, "rewards/rejected": -0.7078202366828918, "step": 3701 }, { "epoch": 10.13552361396304, "grad_norm": 6.704588413238525, "learning_rate": 4.930136986301369e-07, "log_odds_chosen": 1.3143675327301025, "log_odds_ratio": -0.5200376510620117, "logits/chosen": 0.9334232807159424, "logits/rejected": 0.9231654405593872, "logps/chosen": -2.6400539875030518, "logps/rejected": -3.860759735107422, "loss": 0.7551, "nll_loss": 0.7031120657920837, "rewards/accuracies": 0.75, "rewards/chosen": -0.2640053927898407, "rewards/margins": 0.1220705658197403, "rewards/rejected": -0.3860759735107422, "step": 3702 }, { "epoch": 10.13826146475017, "grad_norm": 4.295482635498047, "learning_rate": 4.928767123287671e-07, "log_odds_chosen": 3.119170904159546, "log_odds_ratio": -0.18329867720603943, "logits/chosen": 0.8187654614448547, "logits/rejected": 0.7739361524581909, "logps/chosen": -1.9315078258514404, "logps/rejected": -4.933752536773682, "loss": 0.7229, "nll_loss": 0.7045947313308716, "rewards/accuracies": 1.0, "rewards/chosen": -0.19315080344676971, "rewards/margins": 0.3002244532108307, "rewards/rejected": -0.4933752417564392, "step": 3703 }, { "epoch": 10.140999315537304, "grad_norm": 5.119663238525391, "learning_rate": 4.927397260273972e-07, "log_odds_chosen": 1.1509742736816406, "log_odds_ratio": -0.34921663999557495, "logits/chosen": 0.8050992488861084, "logits/rejected": 0.7809449434280396, "logps/chosen": -1.7366033792495728, "logps/rejected": -2.756835699081421, "loss": 0.6561, "nll_loss": 0.6211422681808472, "rewards/accuracies": 1.0, "rewards/chosen": -0.17366033792495728, "rewards/margins": 0.10202324390411377, "rewards/rejected": -0.27568358182907104, "step": 3704 }, { "epoch": 10.143737166324435, "grad_norm": 4.978452682495117, "learning_rate": 4.926027397260273e-07, "log_odds_chosen": 2.900108814239502, "log_odds_ratio": -0.19181233644485474, "logits/chosen": 1.0768884420394897, "logits/rejected": 1.1242929697036743, "logps/chosen": -3.1604115962982178, "logps/rejected": -5.914588451385498, "loss": 0.7151, "nll_loss": 0.6958935260772705, "rewards/accuracies": 1.0, "rewards/chosen": -0.31604114174842834, "rewards/margins": 0.275417685508728, "rewards/rejected": -0.5914588570594788, "step": 3705 }, { "epoch": 10.146475017111568, "grad_norm": 4.808132648468018, "learning_rate": 4.924657534246575e-07, "log_odds_chosen": 2.5124096870422363, "log_odds_ratio": -0.27048784494400024, "logits/chosen": 0.6293714046478271, "logits/rejected": 0.6471751928329468, "logps/chosen": -2.0785200595855713, "logps/rejected": -4.502766132354736, "loss": 0.8106, "nll_loss": 0.783536970615387, "rewards/accuracies": 0.875, "rewards/chosen": -0.20785202085971832, "rewards/margins": 0.24242457747459412, "rewards/rejected": -0.45027661323547363, "step": 3706 }, { "epoch": 10.149212867898699, "grad_norm": 4.503012180328369, "learning_rate": 4.923287671232876e-07, "log_odds_chosen": 1.4279148578643799, "log_odds_ratio": -0.2937646806240082, "logits/chosen": 0.8304887413978577, "logits/rejected": 0.8036498427391052, "logps/chosen": -2.25730037689209, "logps/rejected": -3.576984167098999, "loss": 0.6897, "nll_loss": 0.6603501439094543, "rewards/accuracies": 1.0, "rewards/chosen": -0.2257300615310669, "rewards/margins": 0.13196834921836853, "rewards/rejected": -0.3576984405517578, "step": 3707 }, { "epoch": 10.151950718685832, "grad_norm": 4.94511079788208, "learning_rate": 4.921917808219179e-07, "log_odds_chosen": 2.33892560005188, "log_odds_ratio": -0.19240404665470123, "logits/chosen": 1.0286281108856201, "logits/rejected": 1.0933319330215454, "logps/chosen": -2.4754281044006348, "logps/rejected": -4.717232704162598, "loss": 0.7048, "nll_loss": 0.6855360269546509, "rewards/accuracies": 1.0, "rewards/chosen": -0.2475428283214569, "rewards/margins": 0.2241804450750351, "rewards/rejected": -0.4717232584953308, "step": 3708 }, { "epoch": 10.154688569472963, "grad_norm": 4.491283893585205, "learning_rate": 4.92054794520548e-07, "log_odds_chosen": 2.8243327140808105, "log_odds_ratio": -0.17779889702796936, "logits/chosen": 1.055832862854004, "logits/rejected": 1.088644027709961, "logps/chosen": -1.8205182552337646, "logps/rejected": -4.387242317199707, "loss": 0.6367, "nll_loss": 0.6188983917236328, "rewards/accuracies": 1.0, "rewards/chosen": -0.18205183744430542, "rewards/margins": 0.2566724121570587, "rewards/rejected": -0.43872421979904175, "step": 3709 }, { "epoch": 10.157426420260096, "grad_norm": 5.134969711303711, "learning_rate": 4.91917808219178e-07, "log_odds_chosen": 1.8213269710540771, "log_odds_ratio": -0.23248343169689178, "logits/chosen": 0.6387445330619812, "logits/rejected": 0.7064997553825378, "logps/chosen": -2.635179281234741, "logps/rejected": -4.341976165771484, "loss": 0.759, "nll_loss": 0.7357601523399353, "rewards/accuracies": 1.0, "rewards/chosen": -0.26351791620254517, "rewards/margins": 0.17067968845367432, "rewards/rejected": -0.4341976046562195, "step": 3710 }, { "epoch": 10.160164271047227, "grad_norm": 4.938116073608398, "learning_rate": 4.917808219178081e-07, "log_odds_chosen": 1.9305815696716309, "log_odds_ratio": -0.2803582549095154, "logits/chosen": 0.8094055652618408, "logits/rejected": 0.7189599871635437, "logps/chosen": -2.395379066467285, "logps/rejected": -4.216258525848389, "loss": 0.75, "nll_loss": 0.7219479084014893, "rewards/accuracies": 1.0, "rewards/chosen": -0.23953792452812195, "rewards/margins": 0.18208791315555573, "rewards/rejected": -0.42162585258483887, "step": 3711 }, { "epoch": 10.16290212183436, "grad_norm": 5.140355110168457, "learning_rate": 4.916438356164384e-07, "log_odds_chosen": 3.3175435066223145, "log_odds_ratio": -0.15810143947601318, "logits/chosen": 0.8596295118331909, "logits/rejected": 0.8484934568405151, "logps/chosen": -2.128750801086426, "logps/rejected": -5.324128150939941, "loss": 0.6898, "nll_loss": 0.6739779710769653, "rewards/accuracies": 1.0, "rewards/chosen": -0.21287508308887482, "rewards/margins": 0.3195377290248871, "rewards/rejected": -0.5324128270149231, "step": 3712 }, { "epoch": 10.165639972621491, "grad_norm": 6.76547384262085, "learning_rate": 4.915068493150685e-07, "log_odds_chosen": 2.0496861934661865, "log_odds_ratio": -0.5541836619377136, "logits/chosen": 1.0403670072555542, "logits/rejected": 1.0305283069610596, "logps/chosen": -2.852590322494507, "logps/rejected": -4.8384199142456055, "loss": 0.8868, "nll_loss": 0.8314155340194702, "rewards/accuracies": 0.75, "rewards/chosen": -0.2852590084075928, "rewards/margins": 0.1985829919576645, "rewards/rejected": -0.48384201526641846, "step": 3713 }, { "epoch": 10.168377823408624, "grad_norm": 5.928606986999512, "learning_rate": 4.913698630136986e-07, "log_odds_chosen": 1.036755084991455, "log_odds_ratio": -0.4472084641456604, "logits/chosen": 0.7469474077224731, "logits/rejected": 0.7024033069610596, "logps/chosen": -2.135192632675171, "logps/rejected": -3.0665342807769775, "loss": 0.7916, "nll_loss": 0.7469263076782227, "rewards/accuracies": 0.875, "rewards/chosen": -0.21351926028728485, "rewards/margins": 0.09313417226076126, "rewards/rejected": -0.3066534399986267, "step": 3714 }, { "epoch": 10.171115674195756, "grad_norm": 4.843028545379639, "learning_rate": 4.912328767123288e-07, "log_odds_chosen": 3.8819563388824463, "log_odds_ratio": -0.14764471352100372, "logits/chosen": 0.7656546235084534, "logits/rejected": 0.7637289762496948, "logps/chosen": -2.5797929763793945, "logps/rejected": -6.3737030029296875, "loss": 0.7457, "nll_loss": 0.7309083938598633, "rewards/accuracies": 1.0, "rewards/chosen": -0.25797930359840393, "rewards/margins": 0.37939098477363586, "rewards/rejected": -0.6373702883720398, "step": 3715 }, { "epoch": 10.173853524982889, "grad_norm": 5.347474575042725, "learning_rate": 4.910958904109589e-07, "log_odds_chosen": 1.6509941816329956, "log_odds_ratio": -0.26623713970184326, "logits/chosen": 0.6541028618812561, "logits/rejected": 0.6293783187866211, "logps/chosen": -1.628692626953125, "logps/rejected": -3.1214427947998047, "loss": 0.6608, "nll_loss": 0.6342216730117798, "rewards/accuracies": 1.0, "rewards/chosen": -0.16286927461624146, "rewards/margins": 0.149275004863739, "rewards/rejected": -0.31214427947998047, "step": 3716 }, { "epoch": 10.17659137577002, "grad_norm": 4.449067115783691, "learning_rate": 4.909589041095891e-07, "log_odds_chosen": 2.7416675090789795, "log_odds_ratio": -0.2714445888996124, "logits/chosen": 0.7907742261886597, "logits/rejected": 0.7295190095901489, "logps/chosen": -1.8377366065979004, "logps/rejected": -4.480162143707275, "loss": 0.7133, "nll_loss": 0.6861662864685059, "rewards/accuracies": 0.875, "rewards/chosen": -0.18377366662025452, "rewards/margins": 0.2642425000667572, "rewards/rejected": -0.4480161964893341, "step": 3717 }, { "epoch": 10.179329226557153, "grad_norm": 4.62106990814209, "learning_rate": 4.908219178082192e-07, "log_odds_chosen": 1.8222038745880127, "log_odds_ratio": -0.29281818866729736, "logits/chosen": 0.659659206867218, "logits/rejected": 0.7063740491867065, "logps/chosen": -1.8918190002441406, "logps/rejected": -3.328110694885254, "loss": 0.6509, "nll_loss": 0.6216079592704773, "rewards/accuracies": 0.875, "rewards/chosen": -0.18918190896511078, "rewards/margins": 0.14362914860248566, "rewards/rejected": -0.33281105756759644, "step": 3718 }, { "epoch": 10.182067077344286, "grad_norm": 4.3676886558532715, "learning_rate": 4.906849315068493e-07, "log_odds_chosen": 1.711432933807373, "log_odds_ratio": -0.215958833694458, "logits/chosen": 0.8440953493118286, "logits/rejected": 0.8878271579742432, "logps/chosen": -2.1211934089660645, "logps/rejected": -3.6917829513549805, "loss": 0.6084, "nll_loss": 0.5868332982063293, "rewards/accuracies": 1.0, "rewards/chosen": -0.21211935579776764, "rewards/margins": 0.1570589542388916, "rewards/rejected": -0.36917832493782043, "step": 3719 }, { "epoch": 10.184804928131417, "grad_norm": 5.003667831420898, "learning_rate": 4.905479452054795e-07, "log_odds_chosen": 2.8322246074676514, "log_odds_ratio": -0.2472507655620575, "logits/chosen": 0.5622511506080627, "logits/rejected": 0.4890821576118469, "logps/chosen": -2.0729808807373047, "logps/rejected": -4.8081231117248535, "loss": 0.804, "nll_loss": 0.7792937159538269, "rewards/accuracies": 1.0, "rewards/chosen": -0.20729807019233704, "rewards/margins": 0.2735142409801483, "rewards/rejected": -0.48081231117248535, "step": 3720 }, { "epoch": 10.18754277891855, "grad_norm": 5.243890285491943, "learning_rate": 4.904109589041096e-07, "log_odds_chosen": 1.7619296312332153, "log_odds_ratio": -0.3175923526287079, "logits/chosen": 0.8583178520202637, "logits/rejected": 0.9181342720985413, "logps/chosen": -2.6084847450256348, "logps/rejected": -4.310530662536621, "loss": 0.9374, "nll_loss": 0.9056739807128906, "rewards/accuracies": 1.0, "rewards/chosen": -0.2608484625816345, "rewards/margins": 0.17020459473133087, "rewards/rejected": -0.4310530424118042, "step": 3721 }, { "epoch": 10.190280629705681, "grad_norm": 5.4724249839782715, "learning_rate": 4.902739726027398e-07, "log_odds_chosen": 2.222841739654541, "log_odds_ratio": -0.20247788727283478, "logits/chosen": 0.7638792991638184, "logits/rejected": 0.7114959955215454, "logps/chosen": -2.365288734436035, "logps/rejected": -4.425018310546875, "loss": 0.7686, "nll_loss": 0.7483541965484619, "rewards/accuracies": 0.875, "rewards/chosen": -0.23652885854244232, "rewards/margins": 0.20597298443317413, "rewards/rejected": -0.44250184297561646, "step": 3722 }, { "epoch": 10.193018480492814, "grad_norm": 6.600719451904297, "learning_rate": 4.901369863013699e-07, "log_odds_chosen": 1.4994587898254395, "log_odds_ratio": -0.5655043125152588, "logits/chosen": 0.668201208114624, "logits/rejected": 0.7065478563308716, "logps/chosen": -2.971834421157837, "logps/rejected": -4.37290096282959, "loss": 0.7068, "nll_loss": 0.6502223014831543, "rewards/accuracies": 0.625, "rewards/chosen": -0.29718345403671265, "rewards/margins": 0.140106663107872, "rewards/rejected": -0.4372900724411011, "step": 3723 }, { "epoch": 10.195756331279945, "grad_norm": 7.041897296905518, "learning_rate": 4.9e-07, "log_odds_chosen": 1.8658819198608398, "log_odds_ratio": -0.5236110687255859, "logits/chosen": 0.8264960050582886, "logits/rejected": 0.9420104622840881, "logps/chosen": -3.2684428691864014, "logps/rejected": -5.013511657714844, "loss": 0.8749, "nll_loss": 0.8225185871124268, "rewards/accuracies": 0.75, "rewards/chosen": -0.3268442749977112, "rewards/margins": 0.17450693249702454, "rewards/rejected": -0.5013511776924133, "step": 3724 }, { "epoch": 10.198494182067078, "grad_norm": 3.7836592197418213, "learning_rate": 4.898630136986301e-07, "log_odds_chosen": 2.166044235229492, "log_odds_ratio": -0.16040915250778198, "logits/chosen": 0.8462097644805908, "logits/rejected": 0.9225084185600281, "logps/chosen": -1.8040664196014404, "logps/rejected": -3.8008599281311035, "loss": 0.6773, "nll_loss": 0.6612997651100159, "rewards/accuracies": 1.0, "rewards/chosen": -0.18040664494037628, "rewards/margins": 0.19967937469482422, "rewards/rejected": -0.3800860047340393, "step": 3725 }, { "epoch": 10.20123203285421, "grad_norm": 4.499324798583984, "learning_rate": 4.897260273972603e-07, "log_odds_chosen": 2.870835781097412, "log_odds_ratio": -0.15989695489406586, "logits/chosen": 0.8734332919120789, "logits/rejected": 0.8777193427085876, "logps/chosen": -2.080810308456421, "logps/rejected": -4.787445068359375, "loss": 0.6812, "nll_loss": 0.6652425527572632, "rewards/accuracies": 1.0, "rewards/chosen": -0.20808103680610657, "rewards/margins": 0.2706634998321533, "rewards/rejected": -0.4787445664405823, "step": 3726 }, { "epoch": 10.203969883641342, "grad_norm": 6.167485237121582, "learning_rate": 4.895890410958904e-07, "log_odds_chosen": 0.18196432292461395, "log_odds_ratio": -0.8613777160644531, "logits/chosen": 0.824449360370636, "logits/rejected": 0.8877586126327515, "logps/chosen": -2.961738109588623, "logps/rejected": -3.131357192993164, "loss": 0.8288, "nll_loss": 0.742685079574585, "rewards/accuracies": 0.625, "rewards/chosen": -0.2961738109588623, "rewards/margins": 0.01696193590760231, "rewards/rejected": -0.3131357431411743, "step": 3727 }, { "epoch": 10.206707734428473, "grad_norm": 4.490286827087402, "learning_rate": 4.894520547945205e-07, "log_odds_chosen": 2.3582119941711426, "log_odds_ratio": -0.2084725797176361, "logits/chosen": 0.6967118978500366, "logits/rejected": 0.6574830412864685, "logps/chosen": -1.5063878297805786, "logps/rejected": -3.6005301475524902, "loss": 0.7035, "nll_loss": 0.6826450824737549, "rewards/accuracies": 1.0, "rewards/chosen": -0.15063878893852234, "rewards/margins": 0.20941424369812012, "rewards/rejected": -0.36005306243896484, "step": 3728 }, { "epoch": 10.209445585215606, "grad_norm": 4.660226345062256, "learning_rate": 4.893150684931507e-07, "log_odds_chosen": 2.5292773246765137, "log_odds_ratio": -0.1706823706626892, "logits/chosen": 0.7972193956375122, "logits/rejected": 0.8318783044815063, "logps/chosen": -2.177603006362915, "logps/rejected": -4.539656639099121, "loss": 0.6837, "nll_loss": 0.6666790843009949, "rewards/accuracies": 1.0, "rewards/chosen": -0.21776032447814941, "rewards/margins": 0.23620536923408508, "rewards/rejected": -0.4539656937122345, "step": 3729 }, { "epoch": 10.212183436002737, "grad_norm": 4.8195109367370605, "learning_rate": 4.891780821917808e-07, "log_odds_chosen": 1.1293617486953735, "log_odds_ratio": -0.47341859340667725, "logits/chosen": 0.8775930404663086, "logits/rejected": 0.8713226318359375, "logps/chosen": -2.3505849838256836, "logps/rejected": -3.424520492553711, "loss": 0.7742, "nll_loss": 0.7268750071525574, "rewards/accuracies": 0.75, "rewards/chosen": -0.2350584715604782, "rewards/margins": 0.10739357024431229, "rewards/rejected": -0.3424520492553711, "step": 3730 }, { "epoch": 10.21492128678987, "grad_norm": 5.408951282501221, "learning_rate": 4.89041095890411e-07, "log_odds_chosen": 1.5680853128433228, "log_odds_ratio": -0.3188163638114929, "logits/chosen": 0.8269869685173035, "logits/rejected": 0.8321222066879272, "logps/chosen": -1.9742445945739746, "logps/rejected": -3.437981605529785, "loss": 0.702, "nll_loss": 0.6701582670211792, "rewards/accuracies": 0.875, "rewards/chosen": -0.19742445647716522, "rewards/margins": 0.1463737040758133, "rewards/rejected": -0.3437981605529785, "step": 3731 }, { "epoch": 10.217659137577002, "grad_norm": 6.002597332000732, "learning_rate": 4.889041095890411e-07, "log_odds_chosen": 3.2610409259796143, "log_odds_ratio": -0.20187321305274963, "logits/chosen": 0.9740103483200073, "logits/rejected": 0.9717437028884888, "logps/chosen": -1.952217698097229, "logps/rejected": -5.072014808654785, "loss": 0.7566, "nll_loss": 0.7363930940628052, "rewards/accuracies": 0.875, "rewards/chosen": -0.19522176682949066, "rewards/margins": 0.3119796812534332, "rewards/rejected": -0.5072014331817627, "step": 3732 }, { "epoch": 10.220396988364135, "grad_norm": 5.117536544799805, "learning_rate": 4.887671232876712e-07, "log_odds_chosen": 1.9234381914138794, "log_odds_ratio": -0.34619033336639404, "logits/chosen": 1.0187358856201172, "logits/rejected": 1.0496701002120972, "logps/chosen": -2.127786636352539, "logps/rejected": -3.9312314987182617, "loss": 0.6556, "nll_loss": 0.6209602952003479, "rewards/accuracies": 0.875, "rewards/chosen": -0.21277867257595062, "rewards/margins": 0.18034444749355316, "rewards/rejected": -0.3931231200695038, "step": 3733 }, { "epoch": 10.223134839151266, "grad_norm": 6.6830596923828125, "learning_rate": 4.886301369863014e-07, "log_odds_chosen": 0.5472652912139893, "log_odds_ratio": -0.7710521221160889, "logits/chosen": 0.6635600328445435, "logits/rejected": 0.5798295140266418, "logps/chosen": -2.360496997833252, "logps/rejected": -2.8982157707214355, "loss": 0.769, "nll_loss": 0.6919416189193726, "rewards/accuracies": 0.5, "rewards/chosen": -0.23604971170425415, "rewards/margins": 0.05377187207341194, "rewards/rejected": -0.2898215651512146, "step": 3734 }, { "epoch": 10.225872689938399, "grad_norm": 4.96344518661499, "learning_rate": 4.884931506849315e-07, "log_odds_chosen": 1.4219355583190918, "log_odds_ratio": -0.48166540265083313, "logits/chosen": 0.9736987948417664, "logits/rejected": 0.9916614890098572, "logps/chosen": -2.3168978691101074, "logps/rejected": -3.609891414642334, "loss": 0.7123, "nll_loss": 0.6641066074371338, "rewards/accuracies": 0.75, "rewards/chosen": -0.23168981075286865, "rewards/margins": 0.1292993426322937, "rewards/rejected": -0.36098915338516235, "step": 3735 }, { "epoch": 10.22861054072553, "grad_norm": 5.223428726196289, "learning_rate": 4.883561643835617e-07, "log_odds_chosen": 1.7518774271011353, "log_odds_ratio": -0.253572940826416, "logits/chosen": 1.0337456464767456, "logits/rejected": 1.0525213479995728, "logps/chosen": -2.1534411907196045, "logps/rejected": -3.801980495452881, "loss": 0.6742, "nll_loss": 0.6488584280014038, "rewards/accuracies": 1.0, "rewards/chosen": -0.2153441160917282, "rewards/margins": 0.16485390067100525, "rewards/rejected": -0.38019806146621704, "step": 3736 }, { "epoch": 10.231348391512663, "grad_norm": 4.7842020988464355, "learning_rate": 4.882191780821918e-07, "log_odds_chosen": 3.0420172214508057, "log_odds_ratio": -0.13881009817123413, "logits/chosen": 0.6977065205574036, "logits/rejected": 0.6704140305519104, "logps/chosen": -1.4228804111480713, "logps/rejected": -4.209737777709961, "loss": 0.7814, "nll_loss": 0.7675268054008484, "rewards/accuracies": 1.0, "rewards/chosen": -0.14228802919387817, "rewards/margins": 0.27868571877479553, "rewards/rejected": -0.4209737777709961, "step": 3737 }, { "epoch": 10.234086242299794, "grad_norm": 5.928860187530518, "learning_rate": 4.880821917808219e-07, "log_odds_chosen": 2.2088418006896973, "log_odds_ratio": -0.2352808117866516, "logits/chosen": 0.867454469203949, "logits/rejected": 0.9384351968765259, "logps/chosen": -2.835003614425659, "logps/rejected": -4.917675018310547, "loss": 0.8419, "nll_loss": 0.8183542490005493, "rewards/accuracies": 0.875, "rewards/chosen": -0.2835003733634949, "rewards/margins": 0.20826712250709534, "rewards/rejected": -0.4917674958705902, "step": 3738 }, { "epoch": 10.236824093086927, "grad_norm": 4.302733898162842, "learning_rate": 4.879452054794521e-07, "log_odds_chosen": 1.9986963272094727, "log_odds_ratio": -0.22332286834716797, "logits/chosen": 0.6415682435035706, "logits/rejected": 0.5904074907302856, "logps/chosen": -2.39194917678833, "logps/rejected": -4.306122303009033, "loss": 0.7778, "nll_loss": 0.7554696798324585, "rewards/accuracies": 1.0, "rewards/chosen": -0.23919491469860077, "rewards/margins": 0.19141729176044464, "rewards/rejected": -0.4306122064590454, "step": 3739 }, { "epoch": 10.239561943874058, "grad_norm": 5.06620454788208, "learning_rate": 4.878082191780822e-07, "log_odds_chosen": 0.9228583574295044, "log_odds_ratio": -0.45529237389564514, "logits/chosen": 0.7077046036720276, "logits/rejected": 0.6685424447059631, "logps/chosen": -2.0447659492492676, "logps/rejected": -2.898829460144043, "loss": 0.8149, "nll_loss": 0.7693456411361694, "rewards/accuracies": 0.75, "rewards/chosen": -0.20447659492492676, "rewards/margins": 0.0854063630104065, "rewards/rejected": -0.28988295793533325, "step": 3740 }, { "epoch": 10.242299794661191, "grad_norm": 5.275660514831543, "learning_rate": 4.876712328767123e-07, "log_odds_chosen": 2.2785027027130127, "log_odds_ratio": -0.2041320651769638, "logits/chosen": 0.7541935443878174, "logits/rejected": 0.7917388677597046, "logps/chosen": -1.7789913415908813, "logps/rejected": -3.8817057609558105, "loss": 0.7248, "nll_loss": 0.7043579816818237, "rewards/accuracies": 0.875, "rewards/chosen": -0.17789913713932037, "rewards/margins": 0.2102714329957962, "rewards/rejected": -0.3881705701351166, "step": 3741 }, { "epoch": 10.245037645448322, "grad_norm": 7.91368293762207, "learning_rate": 4.875342465753424e-07, "log_odds_chosen": 1.0361716747283936, "log_odds_ratio": -0.5652827620506287, "logits/chosen": 0.9455515146255493, "logits/rejected": 0.9549522995948792, "logps/chosen": -3.237558364868164, "logps/rejected": -4.2175493240356445, "loss": 0.7496, "nll_loss": 0.6931172013282776, "rewards/accuracies": 0.75, "rewards/chosen": -0.32375583052635193, "rewards/margins": 0.09799908101558685, "rewards/rejected": -0.4217549264431, "step": 3742 }, { "epoch": 10.247775496235455, "grad_norm": 6.106025218963623, "learning_rate": 4.873972602739726e-07, "log_odds_chosen": 2.669492483139038, "log_odds_ratio": -0.34061598777770996, "logits/chosen": 0.9938757419586182, "logits/rejected": 0.9904174208641052, "logps/chosen": -2.2415342330932617, "logps/rejected": -4.776962757110596, "loss": 0.7102, "nll_loss": 0.6761504411697388, "rewards/accuracies": 0.75, "rewards/chosen": -0.22415341436862946, "rewards/margins": 0.25354287028312683, "rewards/rejected": -0.4776962697505951, "step": 3743 }, { "epoch": 10.250513347022586, "grad_norm": 4.65578031539917, "learning_rate": 4.872602739726027e-07, "log_odds_chosen": 3.195720672607422, "log_odds_ratio": -0.20548740029335022, "logits/chosen": 0.8501771092414856, "logits/rejected": 0.8744722604751587, "logps/chosen": -2.1360177993774414, "logps/rejected": -5.1755523681640625, "loss": 0.7953, "nll_loss": 0.7747523188591003, "rewards/accuracies": 0.875, "rewards/chosen": -0.21360178291797638, "rewards/margins": 0.3039534389972687, "rewards/rejected": -0.5175552368164062, "step": 3744 }, { "epoch": 10.25325119780972, "grad_norm": 6.143369674682617, "learning_rate": 4.871232876712328e-07, "log_odds_chosen": 2.0108418464660645, "log_odds_ratio": -0.3028064966201782, "logits/chosen": 0.8042536973953247, "logits/rejected": 0.7859177589416504, "logps/chosen": -2.1282572746276855, "logps/rejected": -4.049185276031494, "loss": 0.7635, "nll_loss": 0.7332605123519897, "rewards/accuracies": 0.875, "rewards/chosen": -0.2128257304430008, "rewards/margins": 0.19209280610084534, "rewards/rejected": -0.40491852164268494, "step": 3745 }, { "epoch": 10.255989048596852, "grad_norm": 5.910327434539795, "learning_rate": 4.86986301369863e-07, "log_odds_chosen": 2.523571729660034, "log_odds_ratio": -0.36343157291412354, "logits/chosen": 1.0362765789031982, "logits/rejected": 1.0333446264266968, "logps/chosen": -2.544090509414673, "logps/rejected": -5.0161213874816895, "loss": 0.7505, "nll_loss": 0.7141247987747192, "rewards/accuracies": 0.875, "rewards/chosen": -0.2544090747833252, "rewards/margins": 0.247203066945076, "rewards/rejected": -0.5016121864318848, "step": 3746 }, { "epoch": 10.258726899383984, "grad_norm": 6.333555221557617, "learning_rate": 4.868493150684931e-07, "log_odds_chosen": 2.2040088176727295, "log_odds_ratio": -0.3261856436729431, "logits/chosen": 0.7762464284896851, "logits/rejected": 0.7380086183547974, "logps/chosen": -2.0629851818084717, "logps/rejected": -4.094645977020264, "loss": 0.6851, "nll_loss": 0.6524689197540283, "rewards/accuracies": 0.875, "rewards/chosen": -0.20629850029945374, "rewards/margins": 0.20316609740257263, "rewards/rejected": -0.40946459770202637, "step": 3747 }, { "epoch": 10.261464750171116, "grad_norm": 5.2585344314575195, "learning_rate": 4.867123287671233e-07, "log_odds_chosen": 2.1556687355041504, "log_odds_ratio": -0.21742485463619232, "logits/chosen": 0.8275418281555176, "logits/rejected": 0.9367177486419678, "logps/chosen": -2.264028787612915, "logps/rejected": -4.255240440368652, "loss": 0.7926, "nll_loss": 0.7708767652511597, "rewards/accuracies": 0.875, "rewards/chosen": -0.2264028787612915, "rewards/margins": 0.19912119209766388, "rewards/rejected": -0.4255240559577942, "step": 3748 }, { "epoch": 10.264202600958248, "grad_norm": 5.3977155685424805, "learning_rate": 4.865753424657534e-07, "log_odds_chosen": 1.56111478805542, "log_odds_ratio": -0.32514625787734985, "logits/chosen": 1.1461342573165894, "logits/rejected": 1.1868386268615723, "logps/chosen": -2.5495357513427734, "logps/rejected": -3.9303016662597656, "loss": 0.693, "nll_loss": 0.6604704856872559, "rewards/accuracies": 0.75, "rewards/chosen": -0.2549535632133484, "rewards/margins": 0.13807658851146698, "rewards/rejected": -0.39303016662597656, "step": 3749 }, { "epoch": 10.26694045174538, "grad_norm": 4.794679641723633, "learning_rate": 4.864383561643836e-07, "log_odds_chosen": 2.2972114086151123, "log_odds_ratio": -0.18307524919509888, "logits/chosen": 1.0033401250839233, "logits/rejected": 1.0058990716934204, "logps/chosen": -2.1457571983337402, "logps/rejected": -4.321313858032227, "loss": 0.7764, "nll_loss": 0.7581097483634949, "rewards/accuracies": 1.0, "rewards/chosen": -0.21457570791244507, "rewards/margins": 0.2175556719303131, "rewards/rejected": -0.43213140964508057, "step": 3750 }, { "epoch": 10.269678302532512, "grad_norm": 4.781768798828125, "learning_rate": 4.863013698630137e-07, "log_odds_chosen": 2.018451690673828, "log_odds_ratio": -0.36984169483184814, "logits/chosen": 0.9723472595214844, "logits/rejected": 1.0097960233688354, "logps/chosen": -2.4828920364379883, "logps/rejected": -4.424526691436768, "loss": 0.6878, "nll_loss": 0.6508230566978455, "rewards/accuracies": 0.875, "rewards/chosen": -0.24828921258449554, "rewards/margins": 0.1941634714603424, "rewards/rejected": -0.44245266914367676, "step": 3751 }, { "epoch": 10.272416153319645, "grad_norm": 5.746877193450928, "learning_rate": 4.861643835616438e-07, "log_odds_chosen": 0.8254712820053101, "log_odds_ratio": -0.44304510951042175, "logits/chosen": 0.978426456451416, "logits/rejected": 0.9837479591369629, "logps/chosen": -2.2058022022247314, "logps/rejected": -2.92984938621521, "loss": 0.7142, "nll_loss": 0.6698498725891113, "rewards/accuracies": 0.875, "rewards/chosen": -0.22058022022247314, "rewards/margins": 0.07240472733974457, "rewards/rejected": -0.2929849326610565, "step": 3752 }, { "epoch": 10.275154004106776, "grad_norm": 4.088195323944092, "learning_rate": 4.86027397260274e-07, "log_odds_chosen": 2.328403949737549, "log_odds_ratio": -0.17313113808631897, "logits/chosen": 1.0774890184402466, "logits/rejected": 1.1301186084747314, "logps/chosen": -2.491490602493286, "logps/rejected": -4.739115238189697, "loss": 0.6957, "nll_loss": 0.6784300208091736, "rewards/accuracies": 1.0, "rewards/chosen": -0.24914908409118652, "rewards/margins": 0.2247624695301056, "rewards/rejected": -0.4739115834236145, "step": 3753 }, { "epoch": 10.277891854893909, "grad_norm": 4.61696195602417, "learning_rate": 4.858904109589041e-07, "log_odds_chosen": 1.7410171031951904, "log_odds_ratio": -0.3640437722206116, "logits/chosen": 0.9073153138160706, "logits/rejected": 0.9890831112861633, "logps/chosen": -3.343641996383667, "logps/rejected": -5.02421760559082, "loss": 0.8422, "nll_loss": 0.8057770729064941, "rewards/accuracies": 0.75, "rewards/chosen": -0.3343642055988312, "rewards/margins": 0.16805756092071533, "rewards/rejected": -0.5024217963218689, "step": 3754 }, { "epoch": 10.28062970568104, "grad_norm": 4.759483814239502, "learning_rate": 4.857534246575342e-07, "log_odds_chosen": 1.3505975008010864, "log_odds_ratio": -0.48785173892974854, "logits/chosen": 0.706843376159668, "logits/rejected": 0.7391959428787231, "logps/chosen": -2.369631290435791, "logps/rejected": -3.704465866088867, "loss": 0.7399, "nll_loss": 0.6911187171936035, "rewards/accuracies": 0.75, "rewards/chosen": -0.23696313798427582, "rewards/margins": 0.13348345458507538, "rewards/rejected": -0.3704465925693512, "step": 3755 }, { "epoch": 10.283367556468173, "grad_norm": 6.730679035186768, "learning_rate": 4.856164383561643e-07, "log_odds_chosen": 1.0714497566223145, "log_odds_ratio": -0.3448137044906616, "logits/chosen": 0.8245821595191956, "logits/rejected": 0.7755827307701111, "logps/chosen": -1.9817957878112793, "logps/rejected": -2.9019033908843994, "loss": 0.7502, "nll_loss": 0.7156805992126465, "rewards/accuracies": 0.875, "rewards/chosen": -0.19817958772182465, "rewards/margins": 0.09201076626777649, "rewards/rejected": -0.29019033908843994, "step": 3756 }, { "epoch": 10.286105407255304, "grad_norm": 4.8856706619262695, "learning_rate": 4.854794520547945e-07, "log_odds_chosen": 0.9379408955574036, "log_odds_ratio": -0.45651552081108093, "logits/chosen": 0.8630442023277283, "logits/rejected": 0.8708749413490295, "logps/chosen": -2.168876886367798, "logps/rejected": -3.0835790634155273, "loss": 0.7064, "nll_loss": 0.6607287526130676, "rewards/accuracies": 0.75, "rewards/chosen": -0.2168876975774765, "rewards/margins": 0.09147021174430847, "rewards/rejected": -0.3083578944206238, "step": 3757 }, { "epoch": 10.288843258042437, "grad_norm": 4.41847562789917, "learning_rate": 4.853424657534246e-07, "log_odds_chosen": 2.7043628692626953, "log_odds_ratio": -0.16093024611473083, "logits/chosen": 0.8670759201049805, "logits/rejected": 0.9153137803077698, "logps/chosen": -2.5421247482299805, "logps/rejected": -5.1339263916015625, "loss": 0.7244, "nll_loss": 0.7083419561386108, "rewards/accuracies": 1.0, "rewards/chosen": -0.25421246886253357, "rewards/margins": 0.2591801583766937, "rewards/rejected": -0.5133926272392273, "step": 3758 }, { "epoch": 10.291581108829568, "grad_norm": 4.177513122558594, "learning_rate": 4.852054794520547e-07, "log_odds_chosen": 2.452857494354248, "log_odds_ratio": -0.18456107378005981, "logits/chosen": 0.6605361700057983, "logits/rejected": 0.6857088208198547, "logps/chosen": -1.983488917350769, "logps/rejected": -4.263291358947754, "loss": 0.6907, "nll_loss": 0.672273576259613, "rewards/accuracies": 0.875, "rewards/chosen": -0.19834890961647034, "rewards/margins": 0.22798024117946625, "rewards/rejected": -0.4263291358947754, "step": 3759 }, { "epoch": 10.294318959616701, "grad_norm": 5.093375205993652, "learning_rate": 4.850684931506849e-07, "log_odds_chosen": 2.037766933441162, "log_odds_ratio": -0.23275232315063477, "logits/chosen": 0.8649892807006836, "logits/rejected": 0.8136386871337891, "logps/chosen": -2.242525100708008, "logps/rejected": -4.169526100158691, "loss": 0.7903, "nll_loss": 0.7670581340789795, "rewards/accuracies": 1.0, "rewards/chosen": -0.22425252199172974, "rewards/margins": 0.19270005822181702, "rewards/rejected": -0.41695258021354675, "step": 3760 }, { "epoch": 10.297056810403832, "grad_norm": 4.662850379943848, "learning_rate": 4.84931506849315e-07, "log_odds_chosen": 2.472249746322632, "log_odds_ratio": -0.2105628103017807, "logits/chosen": 0.8216981887817383, "logits/rejected": 0.8754178285598755, "logps/chosen": -2.1895642280578613, "logps/rejected": -4.536940097808838, "loss": 0.7033, "nll_loss": 0.6822090148925781, "rewards/accuracies": 1.0, "rewards/chosen": -0.21895642578601837, "rewards/margins": 0.2347376048564911, "rewards/rejected": -0.45369401574134827, "step": 3761 }, { "epoch": 10.299794661190965, "grad_norm": 4.5718889236450195, "learning_rate": 4.847945205479452e-07, "log_odds_chosen": 1.7459784746170044, "log_odds_ratio": -0.24088600277900696, "logits/chosen": 0.7619893550872803, "logits/rejected": 0.763968825340271, "logps/chosen": -2.5245273113250732, "logps/rejected": -4.146762371063232, "loss": 0.7095, "nll_loss": 0.6854351162910461, "rewards/accuracies": 0.875, "rewards/chosen": -0.2524527311325073, "rewards/margins": 0.16222351789474487, "rewards/rejected": -0.4146762490272522, "step": 3762 }, { "epoch": 10.302532511978097, "grad_norm": 5.348598957061768, "learning_rate": 4.846575342465753e-07, "log_odds_chosen": 1.3744529485702515, "log_odds_ratio": -0.37990808486938477, "logits/chosen": 0.7507041096687317, "logits/rejected": 0.7283731698989868, "logps/chosen": -2.325444221496582, "logps/rejected": -3.615933895111084, "loss": 0.8004, "nll_loss": 0.7624141573905945, "rewards/accuracies": 0.875, "rewards/chosen": -0.2325444221496582, "rewards/margins": 0.12904897332191467, "rewards/rejected": -0.3615933954715729, "step": 3763 }, { "epoch": 10.30527036276523, "grad_norm": 5.066562175750732, "learning_rate": 4.845205479452055e-07, "log_odds_chosen": 2.319300889968872, "log_odds_ratio": -0.2451106607913971, "logits/chosen": 0.9218509197235107, "logits/rejected": 0.9177737236022949, "logps/chosen": -2.324688196182251, "logps/rejected": -4.558701992034912, "loss": 0.7994, "nll_loss": 0.7749040126800537, "rewards/accuracies": 0.875, "rewards/chosen": -0.23246881365776062, "rewards/margins": 0.22340139746665955, "rewards/rejected": -0.45587024092674255, "step": 3764 }, { "epoch": 10.30800821355236, "grad_norm": 4.775829315185547, "learning_rate": 4.843835616438356e-07, "log_odds_chosen": 2.456303358078003, "log_odds_ratio": -0.19411562383174896, "logits/chosen": 0.9687023162841797, "logits/rejected": 1.0488146543502808, "logps/chosen": -2.6329329013824463, "logps/rejected": -5.020227432250977, "loss": 0.863, "nll_loss": 0.8435969352722168, "rewards/accuracies": 1.0, "rewards/chosen": -0.2632932960987091, "rewards/margins": 0.23872944712638855, "rewards/rejected": -0.5020227432250977, "step": 3765 }, { "epoch": 10.310746064339494, "grad_norm": 4.392145156860352, "learning_rate": 4.842465753424657e-07, "log_odds_chosen": 3.1422905921936035, "log_odds_ratio": -0.24588224291801453, "logits/chosen": 0.6082557439804077, "logits/rejected": 0.609989583492279, "logps/chosen": -2.2404022216796875, "logps/rejected": -5.266341209411621, "loss": 0.7616, "nll_loss": 0.7369661331176758, "rewards/accuracies": 0.875, "rewards/chosen": -0.224040225148201, "rewards/margins": 0.3025938868522644, "rewards/rejected": -0.5266340970993042, "step": 3766 }, { "epoch": 10.313483915126625, "grad_norm": 6.765419960021973, "learning_rate": 4.841095890410959e-07, "log_odds_chosen": 0.6276764869689941, "log_odds_ratio": -0.5994056463241577, "logits/chosen": 0.8111909627914429, "logits/rejected": 0.8490889668464661, "logps/chosen": -2.7115135192871094, "logps/rejected": -3.2893576622009277, "loss": 0.7031, "nll_loss": 0.643160343170166, "rewards/accuracies": 0.875, "rewards/chosen": -0.2711513340473175, "rewards/margins": 0.05778440460562706, "rewards/rejected": -0.3289357125759125, "step": 3767 }, { "epoch": 10.316221765913758, "grad_norm": 4.760539531707764, "learning_rate": 4.83972602739726e-07, "log_odds_chosen": 2.453303098678589, "log_odds_ratio": -0.20245984196662903, "logits/chosen": 1.066293478012085, "logits/rejected": 1.1435691118240356, "logps/chosen": -1.9647817611694336, "logps/rejected": -4.271910667419434, "loss": 0.636, "nll_loss": 0.6157109141349792, "rewards/accuracies": 1.0, "rewards/chosen": -0.19647815823554993, "rewards/margins": 0.2307128757238388, "rewards/rejected": -0.4271910488605499, "step": 3768 }, { "epoch": 10.318959616700889, "grad_norm": 4.29346227645874, "learning_rate": 4.838356164383561e-07, "log_odds_chosen": 3.259110450744629, "log_odds_ratio": -0.10940293222665787, "logits/chosen": 1.061017394065857, "logits/rejected": 1.0773098468780518, "logps/chosen": -2.432779312133789, "logps/rejected": -5.527159214019775, "loss": 0.7209, "nll_loss": 0.7099800109863281, "rewards/accuracies": 1.0, "rewards/chosen": -0.24327793717384338, "rewards/margins": 0.30943799018859863, "rewards/rejected": -0.5527158975601196, "step": 3769 }, { "epoch": 10.321697467488022, "grad_norm": 5.196383476257324, "learning_rate": 4.836986301369862e-07, "log_odds_chosen": 2.315784454345703, "log_odds_ratio": -0.20031733810901642, "logits/chosen": 0.8488987684249878, "logits/rejected": 0.9112846851348877, "logps/chosen": -2.1334726810455322, "logps/rejected": -4.345574378967285, "loss": 0.6941, "nll_loss": 0.6740844249725342, "rewards/accuracies": 0.875, "rewards/chosen": -0.21334727108478546, "rewards/margins": 0.22121016681194305, "rewards/rejected": -0.4345574378967285, "step": 3770 }, { "epoch": 10.324435318275153, "grad_norm": 6.115521430969238, "learning_rate": 4.835616438356164e-07, "log_odds_chosen": 0.9285475015640259, "log_odds_ratio": -0.6240477561950684, "logits/chosen": 0.7601994872093201, "logits/rejected": 0.8222916126251221, "logps/chosen": -2.4080593585968018, "logps/rejected": -3.2926883697509766, "loss": 0.6881, "nll_loss": 0.6257123351097107, "rewards/accuracies": 0.75, "rewards/chosen": -0.24080593883991241, "rewards/margins": 0.08846290409564972, "rewards/rejected": -0.32926884293556213, "step": 3771 }, { "epoch": 10.327173169062286, "grad_norm": 5.337845802307129, "learning_rate": 4.834246575342465e-07, "log_odds_chosen": 0.9940874576568604, "log_odds_ratio": -0.3808329403400421, "logits/chosen": 0.771751880645752, "logits/rejected": 0.6869055032730103, "logps/chosen": -2.427424192428589, "logps/rejected": -3.3714370727539062, "loss": 0.8248, "nll_loss": 0.7866742610931396, "rewards/accuracies": 0.75, "rewards/chosen": -0.2427424192428589, "rewards/margins": 0.09440131485462189, "rewards/rejected": -0.33714374899864197, "step": 3772 }, { "epoch": 10.329911019849419, "grad_norm": 4.847876071929932, "learning_rate": 4.832876712328766e-07, "log_odds_chosen": 3.379702568054199, "log_odds_ratio": -0.1055988073348999, "logits/chosen": 0.8630614876747131, "logits/rejected": 0.9002183079719543, "logps/chosen": -2.1393001079559326, "logps/rejected": -5.383731842041016, "loss": 0.653, "nll_loss": 0.642413854598999, "rewards/accuracies": 1.0, "rewards/chosen": -0.21393001079559326, "rewards/margins": 0.3244432210922241, "rewards/rejected": -0.5383732318878174, "step": 3773 }, { "epoch": 10.33264887063655, "grad_norm": 5.412311553955078, "learning_rate": 4.831506849315068e-07, "log_odds_chosen": 1.3331265449523926, "log_odds_ratio": -0.4310321509838104, "logits/chosen": 0.8732263445854187, "logits/rejected": 0.8432040810585022, "logps/chosen": -2.3771870136260986, "logps/rejected": -3.612828493118286, "loss": 0.7119, "nll_loss": 0.6687905192375183, "rewards/accuracies": 0.875, "rewards/chosen": -0.23771873116493225, "rewards/margins": 0.12356415390968323, "rewards/rejected": -0.3612828850746155, "step": 3774 }, { "epoch": 10.335386721423683, "grad_norm": 8.692452430725098, "learning_rate": 4.830136986301369e-07, "log_odds_chosen": 0.274862140417099, "log_odds_ratio": -0.6920705437660217, "logits/chosen": 0.7869446277618408, "logits/rejected": 0.8062258958816528, "logps/chosen": -2.9843504428863525, "logps/rejected": -3.226952314376831, "loss": 0.7533, "nll_loss": 0.6841301918029785, "rewards/accuracies": 0.5, "rewards/chosen": -0.2984350621700287, "rewards/margins": 0.024260176345705986, "rewards/rejected": -0.32269522547721863, "step": 3775 }, { "epoch": 10.338124572210814, "grad_norm": 5.291442394256592, "learning_rate": 4.828767123287671e-07, "log_odds_chosen": 2.933516025543213, "log_odds_ratio": -0.1338694989681244, "logits/chosen": 0.7140811681747437, "logits/rejected": 0.7444537281990051, "logps/chosen": -1.4356415271759033, "logps/rejected": -4.073159217834473, "loss": 0.6331, "nll_loss": 0.6197049021720886, "rewards/accuracies": 1.0, "rewards/chosen": -0.1435641497373581, "rewards/margins": 0.2637518048286438, "rewards/rejected": -0.4073159694671631, "step": 3776 }, { "epoch": 10.340862422997947, "grad_norm": 4.845241546630859, "learning_rate": 4.827397260273972e-07, "log_odds_chosen": 2.0999908447265625, "log_odds_ratio": -0.2250719964504242, "logits/chosen": 1.2010647058486938, "logits/rejected": 1.2616164684295654, "logps/chosen": -2.8891525268554688, "logps/rejected": -4.913318634033203, "loss": 0.6881, "nll_loss": 0.6655713319778442, "rewards/accuracies": 1.0, "rewards/chosen": -0.2889152467250824, "rewards/margins": 0.20241661369800568, "rewards/rejected": -0.4913318455219269, "step": 3777 }, { "epoch": 10.343600273785079, "grad_norm": 3.908402681350708, "learning_rate": 4.826027397260274e-07, "log_odds_chosen": 3.5115325450897217, "log_odds_ratio": -0.07842355221509933, "logits/chosen": 0.9260395765304565, "logits/rejected": 0.9304240345954895, "logps/chosen": -2.3985815048217773, "logps/rejected": -5.756170272827148, "loss": 0.6972, "nll_loss": 0.6893987655639648, "rewards/accuracies": 1.0, "rewards/chosen": -0.23985815048217773, "rewards/margins": 0.33575892448425293, "rewards/rejected": -0.5756171345710754, "step": 3778 }, { "epoch": 10.346338124572211, "grad_norm": 6.504838943481445, "learning_rate": 4.824657534246575e-07, "log_odds_chosen": 2.3168752193450928, "log_odds_ratio": -0.29854175448417664, "logits/chosen": 1.0442371368408203, "logits/rejected": 1.0921530723571777, "logps/chosen": -2.415492057800293, "logps/rejected": -4.508289337158203, "loss": 0.6615, "nll_loss": 0.6316256523132324, "rewards/accuracies": 0.75, "rewards/chosen": -0.24154922366142273, "rewards/margins": 0.20927971601486206, "rewards/rejected": -0.4508289098739624, "step": 3779 }, { "epoch": 10.349075975359343, "grad_norm": 4.860218048095703, "learning_rate": 4.823287671232876e-07, "log_odds_chosen": 2.617011547088623, "log_odds_ratio": -0.14836299419403076, "logits/chosen": 0.6289477944374084, "logits/rejected": 0.7017412185668945, "logps/chosen": -1.6713799238204956, "logps/rejected": -4.012494087219238, "loss": 0.822, "nll_loss": 0.8071421980857849, "rewards/accuracies": 1.0, "rewards/chosen": -0.167138010263443, "rewards/margins": 0.23411139845848083, "rewards/rejected": -0.40124940872192383, "step": 3780 }, { "epoch": 10.351813826146476, "grad_norm": 6.140284061431885, "learning_rate": 4.821917808219178e-07, "log_odds_chosen": 1.1300252676010132, "log_odds_ratio": -0.6082514524459839, "logits/chosen": 0.8210115432739258, "logits/rejected": 0.8610220551490784, "logps/chosen": -2.4716928005218506, "logps/rejected": -3.5167014598846436, "loss": 0.6755, "nll_loss": 0.6147057414054871, "rewards/accuracies": 0.75, "rewards/chosen": -0.24716928601264954, "rewards/margins": 0.10450085252523422, "rewards/rejected": -0.35167014598846436, "step": 3781 }, { "epoch": 10.354551676933607, "grad_norm": 5.033166885375977, "learning_rate": 4.820547945205479e-07, "log_odds_chosen": 3.1920602321624756, "log_odds_ratio": -0.10836394876241684, "logits/chosen": 0.9855852127075195, "logits/rejected": 0.9942061901092529, "logps/chosen": -1.9631118774414062, "logps/rejected": -5.0054931640625, "loss": 0.6788, "nll_loss": 0.6679918766021729, "rewards/accuracies": 1.0, "rewards/chosen": -0.19631119072437286, "rewards/margins": 0.30423811078071594, "rewards/rejected": -0.50054931640625, "step": 3782 }, { "epoch": 10.35728952772074, "grad_norm": 4.851515293121338, "learning_rate": 4.81917808219178e-07, "log_odds_chosen": 1.5385332107543945, "log_odds_ratio": -0.3531823456287384, "logits/chosen": 0.844745397567749, "logits/rejected": 0.8429670333862305, "logps/chosen": -1.6844573020935059, "logps/rejected": -3.0891811847686768, "loss": 0.7784, "nll_loss": 0.7430490255355835, "rewards/accuracies": 0.875, "rewards/chosen": -0.16844575107097626, "rewards/margins": 0.1404723823070526, "rewards/rejected": -0.3089181184768677, "step": 3783 }, { "epoch": 10.360027378507871, "grad_norm": 4.80409049987793, "learning_rate": 4.817808219178082e-07, "log_odds_chosen": 0.9461474418640137, "log_odds_ratio": -0.4151878356933594, "logits/chosen": 0.8763744831085205, "logits/rejected": 0.9161473512649536, "logps/chosen": -2.000382900238037, "logps/rejected": -2.8075907230377197, "loss": 0.6561, "nll_loss": 0.6145561933517456, "rewards/accuracies": 0.875, "rewards/chosen": -0.20003826916217804, "rewards/margins": 0.08072081208229065, "rewards/rejected": -0.2807590961456299, "step": 3784 }, { "epoch": 10.362765229295004, "grad_norm": 4.972543716430664, "learning_rate": 4.816438356164383e-07, "log_odds_chosen": 1.8817938566207886, "log_odds_ratio": -0.3503345549106598, "logits/chosen": 0.6717302799224854, "logits/rejected": 0.6723276972770691, "logps/chosen": -2.0957071781158447, "logps/rejected": -3.881028652191162, "loss": 0.7248, "nll_loss": 0.6897680759429932, "rewards/accuracies": 0.875, "rewards/chosen": -0.20957070589065552, "rewards/margins": 0.17853213846683502, "rewards/rejected": -0.38810285925865173, "step": 3785 }, { "epoch": 10.365503080082135, "grad_norm": 4.476856231689453, "learning_rate": 4.815068493150684e-07, "log_odds_chosen": 2.002501964569092, "log_odds_ratio": -0.28584352135658264, "logits/chosen": 0.917729377746582, "logits/rejected": 0.9428850412368774, "logps/chosen": -2.4082350730895996, "logps/rejected": -4.332467555999756, "loss": 0.68, "nll_loss": 0.6514573097229004, "rewards/accuracies": 0.875, "rewards/chosen": -0.24082355201244354, "rewards/margins": 0.19242322444915771, "rewards/rejected": -0.43324676156044006, "step": 3786 }, { "epoch": 10.368240930869268, "grad_norm": 6.045807361602783, "learning_rate": 4.813698630136985e-07, "log_odds_chosen": 1.483612060546875, "log_odds_ratio": -0.46883630752563477, "logits/chosen": 0.7501910328865051, "logits/rejected": 0.7582322359085083, "logps/chosen": -2.6318418979644775, "logps/rejected": -4.022211074829102, "loss": 0.7777, "nll_loss": 0.7308111190795898, "rewards/accuracies": 0.875, "rewards/chosen": -0.26318418979644775, "rewards/margins": 0.13903693854808807, "rewards/rejected": -0.40222111344337463, "step": 3787 }, { "epoch": 10.3709787816564, "grad_norm": 4.2970967292785645, "learning_rate": 4.812328767123287e-07, "log_odds_chosen": 2.6972551345825195, "log_odds_ratio": -0.21564917266368866, "logits/chosen": 0.8026421070098877, "logits/rejected": 0.8587585687637329, "logps/chosen": -1.9732956886291504, "logps/rejected": -4.520912170410156, "loss": 0.7409, "nll_loss": 0.7193050384521484, "rewards/accuracies": 0.875, "rewards/chosen": -0.1973295658826828, "rewards/margins": 0.25476163625717163, "rewards/rejected": -0.4520912170410156, "step": 3788 }, { "epoch": 10.373716632443532, "grad_norm": 4.941401481628418, "learning_rate": 4.810958904109588e-07, "log_odds_chosen": 0.866034746170044, "log_odds_ratio": -0.37319034337997437, "logits/chosen": 0.6745648980140686, "logits/rejected": 0.6572769284248352, "logps/chosen": -1.9468034505844116, "logps/rejected": -2.694658041000366, "loss": 0.7121, "nll_loss": 0.6747550964355469, "rewards/accuracies": 1.0, "rewards/chosen": -0.1946803480386734, "rewards/margins": 0.07478546351194382, "rewards/rejected": -0.2694658041000366, "step": 3789 }, { "epoch": 10.376454483230663, "grad_norm": 5.036941051483154, "learning_rate": 4.80958904109589e-07, "log_odds_chosen": 1.7946302890777588, "log_odds_ratio": -0.30431467294692993, "logits/chosen": 0.6379749774932861, "logits/rejected": 0.6209670305252075, "logps/chosen": -1.7511606216430664, "logps/rejected": -3.3723700046539307, "loss": 0.7531, "nll_loss": 0.7226458787918091, "rewards/accuracies": 0.875, "rewards/chosen": -0.17511604726314545, "rewards/margins": 0.16212095320224762, "rewards/rejected": -0.33723700046539307, "step": 3790 }, { "epoch": 10.379192334017796, "grad_norm": 4.898536205291748, "learning_rate": 4.808219178082192e-07, "log_odds_chosen": 2.225923538208008, "log_odds_ratio": -0.3186098039150238, "logits/chosen": 0.8357982039451599, "logits/rejected": 0.844566285610199, "logps/chosen": -1.988031029701233, "logps/rejected": -4.1298418045043945, "loss": 0.6764, "nll_loss": 0.6444978713989258, "rewards/accuracies": 0.875, "rewards/chosen": -0.1988030970096588, "rewards/margins": 0.2141810953617096, "rewards/rejected": -0.4129842221736908, "step": 3791 }, { "epoch": 10.381930184804927, "grad_norm": 5.2905192375183105, "learning_rate": 4.806849315068492e-07, "log_odds_chosen": 0.8884627819061279, "log_odds_ratio": -0.4667002260684967, "logits/chosen": 0.6706793904304504, "logits/rejected": 0.6781700849533081, "logps/chosen": -2.220548391342163, "logps/rejected": -3.0208120346069336, "loss": 0.7141, "nll_loss": 0.6674252152442932, "rewards/accuracies": 0.875, "rewards/chosen": -0.2220548391342163, "rewards/margins": 0.08002637326717377, "rewards/rejected": -0.30208122730255127, "step": 3792 }, { "epoch": 10.38466803559206, "grad_norm": 4.873902797698975, "learning_rate": 4.805479452054795e-07, "log_odds_chosen": 2.751105546951294, "log_odds_ratio": -0.34841275215148926, "logits/chosen": 0.8505865931510925, "logits/rejected": 0.8111560940742493, "logps/chosen": -2.486088752746582, "logps/rejected": -5.1957502365112305, "loss": 0.7759, "nll_loss": 0.7410246133804321, "rewards/accuracies": 0.75, "rewards/chosen": -0.24860888719558716, "rewards/margins": 0.27096617221832275, "rewards/rejected": -0.5195750594139099, "step": 3793 }, { "epoch": 10.387405886379192, "grad_norm": 4.658411026000977, "learning_rate": 4.804109589041096e-07, "log_odds_chosen": 2.0370020866394043, "log_odds_ratio": -0.20985554158687592, "logits/chosen": 0.7313898801803589, "logits/rejected": 0.7044111490249634, "logps/chosen": -2.2430002689361572, "logps/rejected": -4.173638820648193, "loss": 0.7753, "nll_loss": 0.7543531656265259, "rewards/accuracies": 1.0, "rewards/chosen": -0.2243000566959381, "rewards/margins": 0.1930638551712036, "rewards/rejected": -0.41736388206481934, "step": 3794 }, { "epoch": 10.390143737166325, "grad_norm": 4.566185474395752, "learning_rate": 4.802739726027398e-07, "log_odds_chosen": 2.1152684688568115, "log_odds_ratio": -0.31692278385162354, "logits/chosen": 0.8427640199661255, "logits/rejected": 0.8418070673942566, "logps/chosen": -1.809027075767517, "logps/rejected": -3.8406248092651367, "loss": 0.7227, "nll_loss": 0.6910080313682556, "rewards/accuracies": 0.75, "rewards/chosen": -0.18090270459651947, "rewards/margins": 0.20315979421138763, "rewards/rejected": -0.3840625286102295, "step": 3795 }, { "epoch": 10.392881587953456, "grad_norm": 4.18062162399292, "learning_rate": 4.801369863013699e-07, "log_odds_chosen": 1.9726741313934326, "log_odds_ratio": -0.31876394152641296, "logits/chosen": 0.9767825603485107, "logits/rejected": 1.0270686149597168, "logps/chosen": -2.329864025115967, "logps/rejected": -4.233242988586426, "loss": 0.7779, "nll_loss": 0.745980441570282, "rewards/accuracies": 0.75, "rewards/chosen": -0.2329864203929901, "rewards/margins": 0.19033785164356232, "rewards/rejected": -0.4233242869377136, "step": 3796 }, { "epoch": 10.395619438740589, "grad_norm": 5.138948917388916, "learning_rate": 4.8e-07, "log_odds_chosen": 1.2831190824508667, "log_odds_ratio": -0.35940322279930115, "logits/chosen": 0.8304610252380371, "logits/rejected": 0.7976917624473572, "logps/chosen": -1.6646134853363037, "logps/rejected": -2.791010618209839, "loss": 0.6551, "nll_loss": 0.6192076802253723, "rewards/accuracies": 1.0, "rewards/chosen": -0.16646134853363037, "rewards/margins": 0.11263974010944366, "rewards/rejected": -0.27910107374191284, "step": 3797 }, { "epoch": 10.39835728952772, "grad_norm": 5.145086765289307, "learning_rate": 4.798630136986302e-07, "log_odds_chosen": 3.407747983932495, "log_odds_ratio": -0.1332668960094452, "logits/chosen": 0.9291548728942871, "logits/rejected": 0.9732786417007446, "logps/chosen": -2.3498144149780273, "logps/rejected": -5.641040325164795, "loss": 0.7953, "nll_loss": 0.7819591760635376, "rewards/accuracies": 1.0, "rewards/chosen": -0.2349814474582672, "rewards/margins": 0.3291226029396057, "rewards/rejected": -0.5641040205955505, "step": 3798 }, { "epoch": 10.401095140314853, "grad_norm": 4.598321437835693, "learning_rate": 4.797260273972603e-07, "log_odds_chosen": 1.2682569026947021, "log_odds_ratio": -0.303612619638443, "logits/chosen": 0.6928498148918152, "logits/rejected": 0.6925482749938965, "logps/chosen": -1.9177345037460327, "logps/rejected": -3.0454423427581787, "loss": 0.7221, "nll_loss": 0.6917694211006165, "rewards/accuracies": 1.0, "rewards/chosen": -0.1917734444141388, "rewards/margins": 0.11277078092098236, "rewards/rejected": -0.30454424023628235, "step": 3799 }, { "epoch": 10.403832991101986, "grad_norm": 4.877328395843506, "learning_rate": 4.795890410958904e-07, "log_odds_chosen": 0.7317792177200317, "log_odds_ratio": -0.49191099405288696, "logits/chosen": 0.7175450325012207, "logits/rejected": 0.6876144409179688, "logps/chosen": -1.5926876068115234, "logps/rejected": -2.181859016418457, "loss": 0.7196, "nll_loss": 0.6703768968582153, "rewards/accuracies": 0.625, "rewards/chosen": -0.15926875174045563, "rewards/margins": 0.05891713500022888, "rewards/rejected": -0.2181859016418457, "step": 3800 }, { "epoch": 10.406570841889117, "grad_norm": 5.026941776275635, "learning_rate": 4.794520547945205e-07, "log_odds_chosen": 1.8711081743240356, "log_odds_ratio": -0.3696224093437195, "logits/chosen": 0.7272319197654724, "logits/rejected": 0.7490415573120117, "logps/chosen": -2.00001859664917, "logps/rejected": -3.790454387664795, "loss": 0.7154, "nll_loss": 0.6784255504608154, "rewards/accuracies": 0.875, "rewards/chosen": -0.20000183582305908, "rewards/margins": 0.17904359102249146, "rewards/rejected": -0.3790454566478729, "step": 3801 }, { "epoch": 10.40930869267625, "grad_norm": 4.544482707977295, "learning_rate": 4.793150684931507e-07, "log_odds_chosen": 2.8875770568847656, "log_odds_ratio": -0.13866083323955536, "logits/chosen": 0.8289246559143066, "logits/rejected": 0.7994255423545837, "logps/chosen": -2.6824264526367188, "logps/rejected": -5.484959125518799, "loss": 0.8139, "nll_loss": 0.8000174760818481, "rewards/accuracies": 1.0, "rewards/chosen": -0.26824265718460083, "rewards/margins": 0.2802532911300659, "rewards/rejected": -0.5484959483146667, "step": 3802 }, { "epoch": 10.412046543463381, "grad_norm": 4.785038948059082, "learning_rate": 4.791780821917808e-07, "log_odds_chosen": 1.2991927862167358, "log_odds_ratio": -0.3510875701904297, "logits/chosen": 0.7811369895935059, "logits/rejected": 0.8157869577407837, "logps/chosen": -1.7950438261032104, "logps/rejected": -2.9806618690490723, "loss": 0.6707, "nll_loss": 0.6356408596038818, "rewards/accuracies": 0.875, "rewards/chosen": -0.17950439453125, "rewards/margins": 0.11856181174516678, "rewards/rejected": -0.2980661988258362, "step": 3803 }, { "epoch": 10.414784394250514, "grad_norm": 4.381002426147461, "learning_rate": 4.79041095890411e-07, "log_odds_chosen": 1.8414372205734253, "log_odds_ratio": -0.24976350367069244, "logits/chosen": 0.7540335655212402, "logits/rejected": 0.8042483925819397, "logps/chosen": -2.2402236461639404, "logps/rejected": -3.9781150817871094, "loss": 0.7489, "nll_loss": 0.7238843441009521, "rewards/accuracies": 1.0, "rewards/chosen": -0.22402237355709076, "rewards/margins": 0.1737891286611557, "rewards/rejected": -0.39781150221824646, "step": 3804 }, { "epoch": 10.417522245037645, "grad_norm": 5.50533390045166, "learning_rate": 4.789041095890411e-07, "log_odds_chosen": 1.5476882457733154, "log_odds_ratio": -0.2588152289390564, "logits/chosen": 0.9481822848320007, "logits/rejected": 0.9970107078552246, "logps/chosen": -2.7230758666992188, "logps/rejected": -4.206012725830078, "loss": 0.778, "nll_loss": 0.752084493637085, "rewards/accuracies": 1.0, "rewards/chosen": -0.2723075747489929, "rewards/margins": 0.14829367399215698, "rewards/rejected": -0.4206012487411499, "step": 3805 }, { "epoch": 10.420260095824778, "grad_norm": 4.908808708190918, "learning_rate": 4.787671232876712e-07, "log_odds_chosen": 0.589112401008606, "log_odds_ratio": -0.5372698903083801, "logits/chosen": 0.8106632232666016, "logits/rejected": 0.7540117502212524, "logps/chosen": -2.20192289352417, "logps/rejected": -2.7358992099761963, "loss": 0.7998, "nll_loss": 0.7460312843322754, "rewards/accuracies": 0.75, "rewards/chosen": -0.22019228339195251, "rewards/margins": 0.05339762195944786, "rewards/rejected": -0.2735899090766907, "step": 3806 }, { "epoch": 10.42299794661191, "grad_norm": 5.723320007324219, "learning_rate": 4.786301369863014e-07, "log_odds_chosen": 2.400125026702881, "log_odds_ratio": -0.3377853333950043, "logits/chosen": 0.9663746356964111, "logits/rejected": 0.9155523777008057, "logps/chosen": -2.5168957710266113, "logps/rejected": -4.794638156890869, "loss": 0.8256, "nll_loss": 0.791835606098175, "rewards/accuracies": 0.875, "rewards/chosen": -0.2516895532608032, "rewards/margins": 0.2277742475271225, "rewards/rejected": -0.4794638156890869, "step": 3807 }, { "epoch": 10.425735797399042, "grad_norm": 4.951436519622803, "learning_rate": 4.784931506849315e-07, "log_odds_chosen": 2.0479049682617188, "log_odds_ratio": -0.3260962963104248, "logits/chosen": 0.9009928703308105, "logits/rejected": 0.949963390827179, "logps/chosen": -2.7960500717163086, "logps/rejected": -4.793601036071777, "loss": 0.7814, "nll_loss": 0.7487824559211731, "rewards/accuracies": 0.75, "rewards/chosen": -0.2796050012111664, "rewards/margins": 0.19975513219833374, "rewards/rejected": -0.4793601632118225, "step": 3808 }, { "epoch": 10.428473648186174, "grad_norm": 4.984851837158203, "learning_rate": 4.783561643835617e-07, "log_odds_chosen": 1.9672505855560303, "log_odds_ratio": -0.2874491214752197, "logits/chosen": 0.8273733258247375, "logits/rejected": 0.8489649891853333, "logps/chosen": -2.1710963249206543, "logps/rejected": -3.9797072410583496, "loss": 0.7272, "nll_loss": 0.6984252333641052, "rewards/accuracies": 0.875, "rewards/chosen": -0.21710965037345886, "rewards/margins": 0.18086105585098267, "rewards/rejected": -0.39797070622444153, "step": 3809 }, { "epoch": 10.431211498973306, "grad_norm": 5.076430797576904, "learning_rate": 4.782191780821918e-07, "log_odds_chosen": 2.1432034969329834, "log_odds_ratio": -0.25827571749687195, "logits/chosen": 0.8539120554924011, "logits/rejected": 0.8612766861915588, "logps/chosen": -2.9384913444519043, "logps/rejected": -5.012833595275879, "loss": 0.703, "nll_loss": 0.6771791577339172, "rewards/accuracies": 0.875, "rewards/chosen": -0.2938491404056549, "rewards/margins": 0.20743423700332642, "rewards/rejected": -0.5012834072113037, "step": 3810 }, { "epoch": 10.433949349760438, "grad_norm": 4.998409271240234, "learning_rate": 4.780821917808219e-07, "log_odds_chosen": 2.535937786102295, "log_odds_ratio": -0.2642076909542084, "logits/chosen": 0.7678738832473755, "logits/rejected": 0.7074496746063232, "logps/chosen": -1.908539056777954, "logps/rejected": -4.294464588165283, "loss": 0.7916, "nll_loss": 0.7651922702789307, "rewards/accuracies": 1.0, "rewards/chosen": -0.19085392355918884, "rewards/margins": 0.23859255015850067, "rewards/rejected": -0.4294464588165283, "step": 3811 }, { "epoch": 10.43668720054757, "grad_norm": 8.433976173400879, "learning_rate": 4.779452054794521e-07, "log_odds_chosen": 0.9491755962371826, "log_odds_ratio": -0.6654875874519348, "logits/chosen": 0.8687384128570557, "logits/rejected": 0.8256233930587769, "logps/chosen": -2.609668254852295, "logps/rejected": -3.475513219833374, "loss": 0.7673, "nll_loss": 0.700764536857605, "rewards/accuracies": 0.875, "rewards/chosen": -0.26096680760383606, "rewards/margins": 0.08658453077077866, "rewards/rejected": -0.3475513458251953, "step": 3812 }, { "epoch": 10.439425051334702, "grad_norm": 5.1196184158325195, "learning_rate": 4.778082191780822e-07, "log_odds_chosen": 0.7485366463661194, "log_odds_ratio": -0.6101728081703186, "logits/chosen": 0.8911027908325195, "logits/rejected": 0.9227427840232849, "logps/chosen": -2.627213478088379, "logps/rejected": -3.3367576599121094, "loss": 0.7396, "nll_loss": 0.6786110997200012, "rewards/accuracies": 0.625, "rewards/chosen": -0.26272135972976685, "rewards/margins": 0.07095443457365036, "rewards/rejected": -0.3336757719516754, "step": 3813 }, { "epoch": 10.442162902121835, "grad_norm": 4.507637977600098, "learning_rate": 4.776712328767123e-07, "log_odds_chosen": 2.355128288269043, "log_odds_ratio": -0.29637646675109863, "logits/chosen": 1.0499380826950073, "logits/rejected": 1.0358556509017944, "logps/chosen": -2.252830982208252, "logps/rejected": -4.516535758972168, "loss": 0.746, "nll_loss": 0.7164016962051392, "rewards/accuracies": 0.875, "rewards/chosen": -0.22528311610221863, "rewards/margins": 0.2263704538345337, "rewards/rejected": -0.4516535699367523, "step": 3814 }, { "epoch": 10.444900752908966, "grad_norm": 5.619471073150635, "learning_rate": 4.775342465753425e-07, "log_odds_chosen": 1.5142219066619873, "log_odds_ratio": -0.4847102165222168, "logits/chosen": 0.8131102919578552, "logits/rejected": 0.7881099581718445, "logps/chosen": -2.0926990509033203, "logps/rejected": -3.5603370666503906, "loss": 0.7377, "nll_loss": 0.6892163753509521, "rewards/accuracies": 0.75, "rewards/chosen": -0.20926989614963531, "rewards/margins": 0.14676380157470703, "rewards/rejected": -0.35603368282318115, "step": 3815 }, { "epoch": 10.447638603696099, "grad_norm": 4.720746994018555, "learning_rate": 4.773972602739726e-07, "log_odds_chosen": 2.3257999420166016, "log_odds_ratio": -0.20469509065151215, "logits/chosen": 0.9503065347671509, "logits/rejected": 1.029738187789917, "logps/chosen": -2.521827459335327, "logps/rejected": -4.746469974517822, "loss": 0.618, "nll_loss": 0.597578227519989, "rewards/accuracies": 0.875, "rewards/chosen": -0.2521827518939972, "rewards/margins": 0.22246424853801727, "rewards/rejected": -0.47464701533317566, "step": 3816 }, { "epoch": 10.45037645448323, "grad_norm": 5.369627952575684, "learning_rate": 4.772602739726027e-07, "log_odds_chosen": 1.54313063621521, "log_odds_ratio": -0.36575889587402344, "logits/chosen": 0.8658370971679688, "logits/rejected": 0.8386619091033936, "logps/chosen": -1.8919668197631836, "logps/rejected": -3.3512730598449707, "loss": 0.7094, "nll_loss": 0.6728051900863647, "rewards/accuracies": 0.875, "rewards/chosen": -0.18919669091701508, "rewards/margins": 0.14593061804771423, "rewards/rejected": -0.3351272940635681, "step": 3817 }, { "epoch": 10.453114305270363, "grad_norm": 5.013386249542236, "learning_rate": 4.771232876712328e-07, "log_odds_chosen": 2.8803796768188477, "log_odds_ratio": -0.2518330216407776, "logits/chosen": 0.7653377652168274, "logits/rejected": 0.8241971135139465, "logps/chosen": -2.601844072341919, "logps/rejected": -5.405951499938965, "loss": 0.7571, "nll_loss": 0.7319348454475403, "rewards/accuracies": 0.875, "rewards/chosen": -0.2601844072341919, "rewards/margins": 0.2804107666015625, "rewards/rejected": -0.5405951738357544, "step": 3818 }, { "epoch": 10.455852156057494, "grad_norm": 6.676403522491455, "learning_rate": 4.76986301369863e-07, "log_odds_chosen": 1.8461172580718994, "log_odds_ratio": -0.21494892239570618, "logits/chosen": 0.6667704582214355, "logits/rejected": 0.6290623545646667, "logps/chosen": -2.4233953952789307, "logps/rejected": -4.148452281951904, "loss": 0.8726, "nll_loss": 0.8511360883712769, "rewards/accuracies": 1.0, "rewards/chosen": -0.24233953654766083, "rewards/margins": 0.1725056916475296, "rewards/rejected": -0.41484522819519043, "step": 3819 }, { "epoch": 10.458590006844627, "grad_norm": 5.388945579528809, "learning_rate": 4.768493150684931e-07, "log_odds_chosen": 1.7861723899841309, "log_odds_ratio": -0.24050824344158173, "logits/chosen": 1.085944652557373, "logits/rejected": 1.1430342197418213, "logps/chosen": -2.3048365116119385, "logps/rejected": -3.995018720626831, "loss": 0.6302, "nll_loss": 0.6061700582504272, "rewards/accuracies": 1.0, "rewards/chosen": -0.23048365116119385, "rewards/margins": 0.1690182387828827, "rewards/rejected": -0.39950186014175415, "step": 3820 }, { "epoch": 10.461327857631758, "grad_norm": 5.386977672576904, "learning_rate": 4.7671232876712324e-07, "log_odds_chosen": 2.480494976043701, "log_odds_ratio": -0.2579347789287567, "logits/chosen": 0.8511793613433838, "logits/rejected": 0.8638163805007935, "logps/chosen": -2.3449697494506836, "logps/rejected": -4.736045837402344, "loss": 0.7845, "nll_loss": 0.758665919303894, "rewards/accuracies": 1.0, "rewards/chosen": -0.23449698090553284, "rewards/margins": 0.23910757899284363, "rewards/rejected": -0.47360455989837646, "step": 3821 }, { "epoch": 10.464065708418891, "grad_norm": 7.224837779998779, "learning_rate": 4.7657534246575344e-07, "log_odds_chosen": 1.0371991395950317, "log_odds_ratio": -0.41672393679618835, "logits/chosen": 0.848798930644989, "logits/rejected": 0.8333964347839355, "logps/chosen": -1.9975652694702148, "logps/rejected": -2.8832666873931885, "loss": 0.702, "nll_loss": 0.6603332757949829, "rewards/accuracies": 0.875, "rewards/chosen": -0.19975653290748596, "rewards/margins": 0.08857014775276184, "rewards/rejected": -0.2883266806602478, "step": 3822 }, { "epoch": 10.466803559206022, "grad_norm": 4.9426655769348145, "learning_rate": 4.7643835616438354e-07, "log_odds_chosen": 3.4899168014526367, "log_odds_ratio": -0.05339351296424866, "logits/chosen": 0.9142975807189941, "logits/rejected": 0.96495521068573, "logps/chosen": -2.2001466751098633, "logps/rejected": -5.5609917640686035, "loss": 0.7325, "nll_loss": 0.7271726131439209, "rewards/accuracies": 1.0, "rewards/chosen": -0.22001466155052185, "rewards/margins": 0.3360845446586609, "rewards/rejected": -0.5560991764068604, "step": 3823 }, { "epoch": 10.469541409993155, "grad_norm": 7.610705852508545, "learning_rate": 4.763013698630137e-07, "log_odds_chosen": 0.8683298826217651, "log_odds_ratio": -0.7396178245544434, "logits/chosen": 1.0897748470306396, "logits/rejected": 1.076237440109253, "logps/chosen": -3.0238442420959473, "logps/rejected": -3.7906036376953125, "loss": 0.7648, "nll_loss": 0.6908177733421326, "rewards/accuracies": 0.75, "rewards/chosen": -0.3023844361305237, "rewards/margins": 0.07667594403028488, "rewards/rejected": -0.37906038761138916, "step": 3824 }, { "epoch": 10.472279260780287, "grad_norm": 4.7245564460754395, "learning_rate": 4.761643835616438e-07, "log_odds_chosen": 3.1296911239624023, "log_odds_ratio": -0.19098764657974243, "logits/chosen": 0.878575325012207, "logits/rejected": 0.949371337890625, "logps/chosen": -2.1181530952453613, "logps/rejected": -5.1007208824157715, "loss": 0.6459, "nll_loss": 0.6267679333686829, "rewards/accuracies": 1.0, "rewards/chosen": -0.21181531250476837, "rewards/margins": 0.29825687408447266, "rewards/rejected": -0.5100721716880798, "step": 3825 }, { "epoch": 10.47501711156742, "grad_norm": 4.205708980560303, "learning_rate": 4.7602739726027394e-07, "log_odds_chosen": 2.4983103275299072, "log_odds_ratio": -0.15229454636573792, "logits/chosen": 0.945813775062561, "logits/rejected": 0.9661646485328674, "logps/chosen": -2.0049328804016113, "logps/rejected": -4.358551979064941, "loss": 0.6641, "nll_loss": 0.6488920450210571, "rewards/accuracies": 1.0, "rewards/chosen": -0.20049329102039337, "rewards/margins": 0.23536188900470734, "rewards/rejected": -0.4358551800251007, "step": 3826 }, { "epoch": 10.477754962354553, "grad_norm": 5.486094951629639, "learning_rate": 4.758904109589041e-07, "log_odds_chosen": 3.122138500213623, "log_odds_ratio": -0.36849746108055115, "logits/chosen": 1.1161421537399292, "logits/rejected": 1.1653120517730713, "logps/chosen": -2.146195650100708, "logps/rejected": -5.1607136726379395, "loss": 0.7595, "nll_loss": 0.7226455211639404, "rewards/accuracies": 0.875, "rewards/chosen": -0.21461957693099976, "rewards/margins": 0.30145180225372314, "rewards/rejected": -0.5160714387893677, "step": 3827 }, { "epoch": 10.480492813141684, "grad_norm": 6.359212875366211, "learning_rate": 4.757534246575342e-07, "log_odds_chosen": 0.3230709433555603, "log_odds_ratio": -0.6814672946929932, "logits/chosen": 0.8044297695159912, "logits/rejected": 0.7533689737319946, "logps/chosen": -2.9240012168884277, "logps/rejected": -3.108548879623413, "loss": 0.8204, "nll_loss": 0.7522390484809875, "rewards/accuracies": 0.75, "rewards/chosen": -0.2924000918865204, "rewards/margins": 0.01845477893948555, "rewards/rejected": -0.31085488200187683, "step": 3828 }, { "epoch": 10.483230663928817, "grad_norm": 5.414031505584717, "learning_rate": 4.756164383561644e-07, "log_odds_chosen": 2.7578845024108887, "log_odds_ratio": -0.16130945086479187, "logits/chosen": 0.9178466200828552, "logits/rejected": 0.9531917572021484, "logps/chosen": -2.2510948181152344, "logps/rejected": -4.901302337646484, "loss": 0.7306, "nll_loss": 0.7144210934638977, "rewards/accuracies": 1.0, "rewards/chosen": -0.22510947287082672, "rewards/margins": 0.26502078771591187, "rewards/rejected": -0.4901302456855774, "step": 3829 }, { "epoch": 10.485968514715948, "grad_norm": 4.506925582885742, "learning_rate": 4.754794520547945e-07, "log_odds_chosen": 1.243491768836975, "log_odds_ratio": -0.2978746294975281, "logits/chosen": 0.7767523527145386, "logits/rejected": 0.7596129775047302, "logps/chosen": -2.169917583465576, "logps/rejected": -3.286303997039795, "loss": 0.6833, "nll_loss": 0.6535019874572754, "rewards/accuracies": 1.0, "rewards/chosen": -0.21699176728725433, "rewards/margins": 0.1116386204957962, "rewards/rejected": -0.32863038778305054, "step": 3830 }, { "epoch": 10.48870636550308, "grad_norm": 5.611420154571533, "learning_rate": 4.7534246575342465e-07, "log_odds_chosen": 3.1621062755584717, "log_odds_ratio": -0.17988301813602448, "logits/chosen": 0.8857561349868774, "logits/rejected": 0.9318347573280334, "logps/chosen": -1.8196897506713867, "logps/rejected": -4.8015289306640625, "loss": 0.8424, "nll_loss": 0.8244122862815857, "rewards/accuracies": 1.0, "rewards/chosen": -0.18196897208690643, "rewards/margins": 0.2981839179992676, "rewards/rejected": -0.4801529347896576, "step": 3831 }, { "epoch": 10.491444216290212, "grad_norm": 7.084223747253418, "learning_rate": 4.7520547945205475e-07, "log_odds_chosen": 0.3764176368713379, "log_odds_ratio": -0.5979331731796265, "logits/chosen": 0.6754217147827148, "logits/rejected": 0.7219865322113037, "logps/chosen": -3.5673370361328125, "logps/rejected": -3.9282238483428955, "loss": 0.8029, "nll_loss": 0.7430782318115234, "rewards/accuracies": 0.5, "rewards/chosen": -0.3567337095737457, "rewards/margins": 0.03608868271112442, "rewards/rejected": -0.39282238483428955, "step": 3832 }, { "epoch": 10.494182067077345, "grad_norm": 4.6595778465271, "learning_rate": 4.750684931506849e-07, "log_odds_chosen": 1.7918875217437744, "log_odds_ratio": -0.40773671865463257, "logits/chosen": 0.8533959984779358, "logits/rejected": 0.9078308343887329, "logps/chosen": -2.2748024463653564, "logps/rejected": -3.984617233276367, "loss": 0.6929, "nll_loss": 0.6521055698394775, "rewards/accuracies": 1.0, "rewards/chosen": -0.2274802327156067, "rewards/margins": 0.1709814965724945, "rewards/rejected": -0.3984617590904236, "step": 3833 }, { "epoch": 10.496919917864476, "grad_norm": 4.953776836395264, "learning_rate": 4.7493150684931505e-07, "log_odds_chosen": 1.223785161972046, "log_odds_ratio": -0.3284037113189697, "logits/chosen": 0.8237311840057373, "logits/rejected": 0.774838387966156, "logps/chosen": -2.4705259799957275, "logps/rejected": -3.5872159004211426, "loss": 0.7052, "nll_loss": 0.6723524332046509, "rewards/accuracies": 1.0, "rewards/chosen": -0.2470526099205017, "rewards/margins": 0.11166898161172867, "rewards/rejected": -0.3587215840816498, "step": 3834 }, { "epoch": 10.499657768651609, "grad_norm": 6.790884494781494, "learning_rate": 4.7479452054794515e-07, "log_odds_chosen": 1.7601969242095947, "log_odds_ratio": -0.39001208543777466, "logits/chosen": 0.8187861442565918, "logits/rejected": 0.7702563405036926, "logps/chosen": -2.375624418258667, "logps/rejected": -4.016933441162109, "loss": 0.7134, "nll_loss": 0.6744197010993958, "rewards/accuracies": 0.75, "rewards/chosen": -0.23756244778633118, "rewards/margins": 0.16413086652755737, "rewards/rejected": -0.40169331431388855, "step": 3835 }, { "epoch": 10.50239561943874, "grad_norm": 4.6117048263549805, "learning_rate": 4.7465753424657536e-07, "log_odds_chosen": 2.3586652278900146, "log_odds_ratio": -0.23145928978919983, "logits/chosen": 0.7478407621383667, "logits/rejected": 0.7549140453338623, "logps/chosen": -2.3669729232788086, "logps/rejected": -4.614892959594727, "loss": 0.7247, "nll_loss": 0.7015494704246521, "rewards/accuracies": 1.0, "rewards/chosen": -0.2366972714662552, "rewards/margins": 0.2247920036315918, "rewards/rejected": -0.4614892899990082, "step": 3836 }, { "epoch": 10.505133470225873, "grad_norm": 5.384252071380615, "learning_rate": 4.7452054794520546e-07, "log_odds_chosen": 0.9339190125465393, "log_odds_ratio": -0.3517237901687622, "logits/chosen": 0.8849939107894897, "logits/rejected": 0.8054671287536621, "logps/chosen": -2.256706714630127, "logps/rejected": -3.0827865600585938, "loss": 0.7174, "nll_loss": 0.6822057962417603, "rewards/accuracies": 1.0, "rewards/chosen": -0.2256706804037094, "rewards/margins": 0.08260796964168549, "rewards/rejected": -0.3082786202430725, "step": 3837 }, { "epoch": 10.507871321013004, "grad_norm": 4.9492058753967285, "learning_rate": 4.743835616438356e-07, "log_odds_chosen": 3.8574700355529785, "log_odds_ratio": -0.11740315705537796, "logits/chosen": 0.8328114748001099, "logits/rejected": 0.7563924193382263, "logps/chosen": -2.3605434894561768, "logps/rejected": -6.109213829040527, "loss": 0.8217, "nll_loss": 0.8100010752677917, "rewards/accuracies": 1.0, "rewards/chosen": -0.23605436086654663, "rewards/margins": 0.3748670220375061, "rewards/rejected": -0.6109213829040527, "step": 3838 }, { "epoch": 10.510609171800137, "grad_norm": 4.410098552703857, "learning_rate": 4.742465753424657e-07, "log_odds_chosen": 3.4415149688720703, "log_odds_ratio": -0.13389739394187927, "logits/chosen": 0.9008907079696655, "logits/rejected": 0.9116798639297485, "logps/chosen": -2.4414496421813965, "logps/rejected": -5.778722763061523, "loss": 0.7092, "nll_loss": 0.6958314776420593, "rewards/accuracies": 1.0, "rewards/chosen": -0.2441449761390686, "rewards/margins": 0.33372730016708374, "rewards/rejected": -0.5778722763061523, "step": 3839 }, { "epoch": 10.513347022587268, "grad_norm": 5.36427640914917, "learning_rate": 4.7410958904109586e-07, "log_odds_chosen": 2.901278495788574, "log_odds_ratio": -0.21587839722633362, "logits/chosen": 0.8217272758483887, "logits/rejected": 0.8307934999465942, "logps/chosen": -2.3213887214660645, "logps/rejected": -5.08644962310791, "loss": 0.7522, "nll_loss": 0.730582058429718, "rewards/accuracies": 0.875, "rewards/chosen": -0.23213888704776764, "rewards/margins": 0.27650606632232666, "rewards/rejected": -0.5086449384689331, "step": 3840 }, { "epoch": 10.516084873374401, "grad_norm": 4.333804130554199, "learning_rate": 4.73972602739726e-07, "log_odds_chosen": 1.9907581806182861, "log_odds_ratio": -0.25657686591148376, "logits/chosen": 0.9044194221496582, "logits/rejected": 0.8549438714981079, "logps/chosen": -1.6579272747039795, "logps/rejected": -3.466862678527832, "loss": 0.7849, "nll_loss": 0.7592846751213074, "rewards/accuracies": 1.0, "rewards/chosen": -0.16579274833202362, "rewards/margins": 0.18089351058006287, "rewards/rejected": -0.3466862738132477, "step": 3841 }, { "epoch": 10.518822724161533, "grad_norm": 4.31965446472168, "learning_rate": 4.738356164383561e-07, "log_odds_chosen": 2.742431879043579, "log_odds_ratio": -0.21678957343101501, "logits/chosen": 0.7166538238525391, "logits/rejected": 0.7914896607398987, "logps/chosen": -2.352283000946045, "logps/rejected": -4.976562023162842, "loss": 0.8047, "nll_loss": 0.7830528616905212, "rewards/accuracies": 0.875, "rewards/chosen": -0.23522832989692688, "rewards/margins": 0.2624278664588928, "rewards/rejected": -0.4976561963558197, "step": 3842 }, { "epoch": 10.521560574948666, "grad_norm": 4.305450916290283, "learning_rate": 4.736986301369863e-07, "log_odds_chosen": 1.8009538650512695, "log_odds_ratio": -0.255330353975296, "logits/chosen": 0.7530866861343384, "logits/rejected": 0.7831673622131348, "logps/chosen": -2.41688871383667, "logps/rejected": -4.144498348236084, "loss": 0.7535, "nll_loss": 0.7280049920082092, "rewards/accuracies": 1.0, "rewards/chosen": -0.24168889224529266, "rewards/margins": 0.17276091873645782, "rewards/rejected": -0.4144498109817505, "step": 3843 }, { "epoch": 10.524298425735797, "grad_norm": 5.024235725402832, "learning_rate": 4.735616438356164e-07, "log_odds_chosen": 2.5710179805755615, "log_odds_ratio": -0.15118516981601715, "logits/chosen": 0.8622312545776367, "logits/rejected": 0.9307795763015747, "logps/chosen": -2.9154934883117676, "logps/rejected": -5.415388584136963, "loss": 0.8522, "nll_loss": 0.8371126651763916, "rewards/accuracies": 1.0, "rewards/chosen": -0.29154935479164124, "rewards/margins": 0.24998953938484192, "rewards/rejected": -0.5415388941764832, "step": 3844 }, { "epoch": 10.52703627652293, "grad_norm": 5.57368803024292, "learning_rate": 4.734246575342465e-07, "log_odds_chosen": 1.867445707321167, "log_odds_ratio": -0.34694769978523254, "logits/chosen": 0.9775013327598572, "logits/rejected": 1.056740164756775, "logps/chosen": -2.2227652072906494, "logps/rejected": -3.9775466918945312, "loss": 0.7209, "nll_loss": 0.6862366795539856, "rewards/accuracies": 0.625, "rewards/chosen": -0.222276508808136, "rewards/margins": 0.17547816038131714, "rewards/rejected": -0.3977546691894531, "step": 3845 }, { "epoch": 10.529774127310061, "grad_norm": 4.237987041473389, "learning_rate": 4.732876712328767e-07, "log_odds_chosen": 1.738890528678894, "log_odds_ratio": -0.21968159079551697, "logits/chosen": 0.787016749382019, "logits/rejected": 0.8084408640861511, "logps/chosen": -2.366032600402832, "logps/rejected": -4.000149726867676, "loss": 0.681, "nll_loss": 0.6590547561645508, "rewards/accuracies": 1.0, "rewards/chosen": -0.2366032600402832, "rewards/margins": 0.1634116768836975, "rewards/rejected": -0.4000149369239807, "step": 3846 }, { "epoch": 10.532511978097194, "grad_norm": 6.892067909240723, "learning_rate": 4.731506849315068e-07, "log_odds_chosen": 1.1753334999084473, "log_odds_ratio": -0.5904326438903809, "logits/chosen": 0.8023033142089844, "logits/rejected": 0.7984353303909302, "logps/chosen": -2.658477783203125, "logps/rejected": -3.7436928749084473, "loss": 0.8314, "nll_loss": 0.7723402976989746, "rewards/accuracies": 0.75, "rewards/chosen": -0.2658478021621704, "rewards/margins": 0.10852152109146118, "rewards/rejected": -0.3743693232536316, "step": 3847 }, { "epoch": 10.535249828884325, "grad_norm": 4.834316730499268, "learning_rate": 4.7301369863013697e-07, "log_odds_chosen": 1.7705590724945068, "log_odds_ratio": -0.20011653006076813, "logits/chosen": 0.799842357635498, "logits/rejected": 0.7545152306556702, "logps/chosen": -2.0543131828308105, "logps/rejected": -3.666259765625, "loss": 0.7649, "nll_loss": 0.7448946237564087, "rewards/accuracies": 1.0, "rewards/chosen": -0.20543134212493896, "rewards/margins": 0.16119468212127686, "rewards/rejected": -0.36662599444389343, "step": 3848 }, { "epoch": 10.537987679671458, "grad_norm": 6.594061851501465, "learning_rate": 4.7287671232876707e-07, "log_odds_chosen": 1.9161473512649536, "log_odds_ratio": -0.45143356919288635, "logits/chosen": 0.9713991284370422, "logits/rejected": 0.9500830173492432, "logps/chosen": -2.3325395584106445, "logps/rejected": -4.1250691413879395, "loss": 0.8472, "nll_loss": 0.802055835723877, "rewards/accuracies": 0.75, "rewards/chosen": -0.23325397074222565, "rewards/margins": 0.17925293743610382, "rewards/rejected": -0.41250693798065186, "step": 3849 }, { "epoch": 10.54072553045859, "grad_norm": 4.359065532684326, "learning_rate": 4.727397260273973e-07, "log_odds_chosen": 2.956743001937866, "log_odds_ratio": -0.20531240105628967, "logits/chosen": 0.9553594589233398, "logits/rejected": 0.9438210725784302, "logps/chosen": -2.0881271362304688, "logps/rejected": -4.9082865715026855, "loss": 0.7621, "nll_loss": 0.7415469884872437, "rewards/accuracies": 1.0, "rewards/chosen": -0.20881271362304688, "rewards/margins": 0.28201594948768616, "rewards/rejected": -0.49082866311073303, "step": 3850 }, { "epoch": 10.543463381245722, "grad_norm": 4.810606002807617, "learning_rate": 4.726027397260274e-07, "log_odds_chosen": 1.8948869705200195, "log_odds_ratio": -0.24155110120773315, "logits/chosen": 0.9646617770195007, "logits/rejected": 1.0057520866394043, "logps/chosen": -1.9801546335220337, "logps/rejected": -3.716507911682129, "loss": 0.752, "nll_loss": 0.7278041839599609, "rewards/accuracies": 1.0, "rewards/chosen": -0.1980154663324356, "rewards/margins": 0.1736353486776352, "rewards/rejected": -0.3716508150100708, "step": 3851 }, { "epoch": 10.546201232032853, "grad_norm": 6.781152725219727, "learning_rate": 4.724657534246575e-07, "log_odds_chosen": 1.5794873237609863, "log_odds_ratio": -0.30620238184928894, "logits/chosen": 0.8951488733291626, "logits/rejected": 0.9104633331298828, "logps/chosen": -2.2269396781921387, "logps/rejected": -3.7178196907043457, "loss": 0.9152, "nll_loss": 0.8846135139465332, "rewards/accuracies": 1.0, "rewards/chosen": -0.22269397974014282, "rewards/margins": 0.14908798038959503, "rewards/rejected": -0.37178197503089905, "step": 3852 }, { "epoch": 10.548939082819986, "grad_norm": 5.408519744873047, "learning_rate": 4.723287671232877e-07, "log_odds_chosen": 2.6632025241851807, "log_odds_ratio": -0.28909483551979065, "logits/chosen": 0.6412705183029175, "logits/rejected": 0.6109781265258789, "logps/chosen": -2.381976366043091, "logps/rejected": -4.979455947875977, "loss": 0.7278, "nll_loss": 0.6989045739173889, "rewards/accuracies": 0.875, "rewards/chosen": -0.23819762468338013, "rewards/margins": 0.2597479522228241, "rewards/rejected": -0.4979456067085266, "step": 3853 }, { "epoch": 10.55167693360712, "grad_norm": 4.933095455169678, "learning_rate": 4.721917808219178e-07, "log_odds_chosen": 2.5602996349334717, "log_odds_ratio": -0.2421029806137085, "logits/chosen": 0.9436125159263611, "logits/rejected": 1.0169044733047485, "logps/chosen": -1.651750087738037, "logps/rejected": -4.017807960510254, "loss": 0.6402, "nll_loss": 0.6160117387771606, "rewards/accuracies": 0.875, "rewards/chosen": -0.16517502069473267, "rewards/margins": 0.23660582304000854, "rewards/rejected": -0.4017808437347412, "step": 3854 }, { "epoch": 10.55441478439425, "grad_norm": 5.352468967437744, "learning_rate": 4.7205479452054793e-07, "log_odds_chosen": 0.9104292392730713, "log_odds_ratio": -0.46375101804733276, "logits/chosen": 0.8865172863006592, "logits/rejected": 0.9074019193649292, "logps/chosen": -2.3207035064697266, "logps/rejected": -3.1344313621520996, "loss": 0.6911, "nll_loss": 0.6446976065635681, "rewards/accuracies": 0.75, "rewards/chosen": -0.23207035660743713, "rewards/margins": 0.08137276768684387, "rewards/rejected": -0.313443124294281, "step": 3855 }, { "epoch": 10.557152635181383, "grad_norm": 4.845108509063721, "learning_rate": 4.7191780821917803e-07, "log_odds_chosen": 2.196918487548828, "log_odds_ratio": -0.2224644273519516, "logits/chosen": 0.9342736005783081, "logits/rejected": 0.9471620321273804, "logps/chosen": -2.5605382919311523, "logps/rejected": -4.681232929229736, "loss": 0.7603, "nll_loss": 0.7380857467651367, "rewards/accuracies": 1.0, "rewards/chosen": -0.2560538351535797, "rewards/margins": 0.21206945180892944, "rewards/rejected": -0.46812331676483154, "step": 3856 }, { "epoch": 10.559890485968515, "grad_norm": 4.975311756134033, "learning_rate": 4.7178082191780823e-07, "log_odds_chosen": 1.827117681503296, "log_odds_ratio": -0.29753226041793823, "logits/chosen": 0.7325863242149353, "logits/rejected": 0.749470591545105, "logps/chosen": -1.977724313735962, "logps/rejected": -3.689030170440674, "loss": 0.7087, "nll_loss": 0.6789137125015259, "rewards/accuracies": 1.0, "rewards/chosen": -0.19777242839336395, "rewards/margins": 0.17113059759140015, "rewards/rejected": -0.3689030408859253, "step": 3857 }, { "epoch": 10.562628336755647, "grad_norm": 5.076498985290527, "learning_rate": 4.7164383561643833e-07, "log_odds_chosen": 1.5521215200424194, "log_odds_ratio": -0.3217763304710388, "logits/chosen": 0.9734138250350952, "logits/rejected": 1.0283479690551758, "logps/chosen": -3.4491944313049316, "logps/rejected": -4.954171657562256, "loss": 0.8333, "nll_loss": 0.8011175394058228, "rewards/accuracies": 1.0, "rewards/chosen": -0.34491944313049316, "rewards/margins": 0.15049771964550018, "rewards/rejected": -0.49541717767715454, "step": 3858 }, { "epoch": 10.565366187542779, "grad_norm": 6.2743024826049805, "learning_rate": 4.7150684931506843e-07, "log_odds_chosen": 1.9673364162445068, "log_odds_ratio": -0.33298492431640625, "logits/chosen": 0.8990374803543091, "logits/rejected": 0.9022525548934937, "logps/chosen": -2.134106159210205, "logps/rejected": -3.9694221019744873, "loss": 0.7772, "nll_loss": 0.743867039680481, "rewards/accuracies": 0.875, "rewards/chosen": -0.2134106159210205, "rewards/margins": 0.18353161215782166, "rewards/rejected": -0.3969421982765198, "step": 3859 }, { "epoch": 10.568104038329912, "grad_norm": 5.559459686279297, "learning_rate": 4.7136986301369864e-07, "log_odds_chosen": 1.4829473495483398, "log_odds_ratio": -0.3248043656349182, "logits/chosen": 0.804052472114563, "logits/rejected": 0.7964494228363037, "logps/chosen": -1.8566796779632568, "logps/rejected": -3.174014091491699, "loss": 0.7014, "nll_loss": 0.6688747406005859, "rewards/accuracies": 1.0, "rewards/chosen": -0.1856679767370224, "rewards/margins": 0.13173341751098633, "rewards/rejected": -0.3174014091491699, "step": 3860 }, { "epoch": 10.570841889117043, "grad_norm": 6.904274940490723, "learning_rate": 4.7123287671232874e-07, "log_odds_chosen": 2.1420607566833496, "log_odds_ratio": -0.2670961022377014, "logits/chosen": 0.9220035076141357, "logits/rejected": 0.9508260488510132, "logps/chosen": -2.3092808723449707, "logps/rejected": -4.285859107971191, "loss": 0.7691, "nll_loss": 0.7423926591873169, "rewards/accuracies": 0.875, "rewards/chosen": -0.23092809319496155, "rewards/margins": 0.1976577788591385, "rewards/rejected": -0.42858588695526123, "step": 3861 }, { "epoch": 10.573579739904176, "grad_norm": 5.442307472229004, "learning_rate": 4.710958904109589e-07, "log_odds_chosen": 3.38577938079834, "log_odds_ratio": -0.2238629013299942, "logits/chosen": 0.8415645360946655, "logits/rejected": 0.9079371690750122, "logps/chosen": -2.6480770111083984, "logps/rejected": -5.945041179656982, "loss": 0.7505, "nll_loss": 0.7280959486961365, "rewards/accuracies": 0.875, "rewards/chosen": -0.26480770111083984, "rewards/margins": 0.3296964168548584, "rewards/rejected": -0.5945041179656982, "step": 3862 }, { "epoch": 10.576317590691307, "grad_norm": 6.636511325836182, "learning_rate": 4.70958904109589e-07, "log_odds_chosen": 1.3108656406402588, "log_odds_ratio": -0.5217838883399963, "logits/chosen": 0.9254021644592285, "logits/rejected": 0.8903443813323975, "logps/chosen": -2.3582208156585693, "logps/rejected": -3.5997748374938965, "loss": 0.7522, "nll_loss": 0.7000108361244202, "rewards/accuracies": 0.75, "rewards/chosen": -0.23582211136817932, "rewards/margins": 0.12415540218353271, "rewards/rejected": -0.35997748374938965, "step": 3863 }, { "epoch": 10.57905544147844, "grad_norm": 6.334865570068359, "learning_rate": 4.708219178082192e-07, "log_odds_chosen": 0.7359590530395508, "log_odds_ratio": -0.539323091506958, "logits/chosen": 1.0252563953399658, "logits/rejected": 1.0145795345306396, "logps/chosen": -2.2272050380706787, "logps/rejected": -2.9481706619262695, "loss": 0.7592, "nll_loss": 0.70527583360672, "rewards/accuracies": 0.75, "rewards/chosen": -0.22272050380706787, "rewards/margins": 0.07209654897451401, "rewards/rejected": -0.2948170602321625, "step": 3864 }, { "epoch": 10.581793292265571, "grad_norm": 5.527751445770264, "learning_rate": 4.706849315068493e-07, "log_odds_chosen": 1.8709460496902466, "log_odds_ratio": -0.3518888056278229, "logits/chosen": 1.043680191040039, "logits/rejected": 1.0464274883270264, "logps/chosen": -2.1899945735931396, "logps/rejected": -3.9281005859375, "loss": 0.7416, "nll_loss": 0.706460177898407, "rewards/accuracies": 0.875, "rewards/chosen": -0.218999445438385, "rewards/margins": 0.17381060123443604, "rewards/rejected": -0.39281004667282104, "step": 3865 }, { "epoch": 10.584531143052704, "grad_norm": 5.170589923858643, "learning_rate": 4.705479452054794e-07, "log_odds_chosen": 2.180837869644165, "log_odds_ratio": -0.1852802336215973, "logits/chosen": 0.9379937648773193, "logits/rejected": 0.9760537147521973, "logps/chosen": -2.164022207260132, "logps/rejected": -4.19168758392334, "loss": 0.7163, "nll_loss": 0.69777512550354, "rewards/accuracies": 1.0, "rewards/chosen": -0.21640223264694214, "rewards/margins": 0.2027665376663208, "rewards/rejected": -0.41916874051094055, "step": 3866 }, { "epoch": 10.587268993839835, "grad_norm": 6.879335880279541, "learning_rate": 4.704109589041096e-07, "log_odds_chosen": 1.4690101146697998, "log_odds_ratio": -0.5505207777023315, "logits/chosen": 0.7910686731338501, "logits/rejected": 0.7295893430709839, "logps/chosen": -2.155975818634033, "logps/rejected": -3.5256590843200684, "loss": 0.7961, "nll_loss": 0.7410463094711304, "rewards/accuracies": 0.875, "rewards/chosen": -0.21559758484363556, "rewards/margins": 0.13696829974651337, "rewards/rejected": -0.3525658845901489, "step": 3867 }, { "epoch": 10.590006844626968, "grad_norm": 5.192915916442871, "learning_rate": 4.702739726027397e-07, "log_odds_chosen": 2.160590410232544, "log_odds_ratio": -0.21177077293395996, "logits/chosen": 1.1882705688476562, "logits/rejected": 1.216008186340332, "logps/chosen": -2.2698347568511963, "logps/rejected": -4.355547904968262, "loss": 0.6824, "nll_loss": 0.6611743569374084, "rewards/accuracies": 0.875, "rewards/chosen": -0.2269834727048874, "rewards/margins": 0.20857128500938416, "rewards/rejected": -0.43555474281311035, "step": 3868 }, { "epoch": 10.5927446954141, "grad_norm": 4.303857326507568, "learning_rate": 4.7013698630136985e-07, "log_odds_chosen": 2.0639007091522217, "log_odds_ratio": -0.24384565651416779, "logits/chosen": 0.7352159023284912, "logits/rejected": 0.800083577632904, "logps/chosen": -3.2432899475097656, "logps/rejected": -5.231485843658447, "loss": 0.774, "nll_loss": 0.7495816946029663, "rewards/accuracies": 1.0, "rewards/chosen": -0.3243289887905121, "rewards/margins": 0.1988195925951004, "rewards/rejected": -0.5231485962867737, "step": 3869 }, { "epoch": 10.595482546201232, "grad_norm": 6.206543445587158, "learning_rate": 4.6999999999999995e-07, "log_odds_chosen": 0.8692779541015625, "log_odds_ratio": -0.542312741279602, "logits/chosen": 0.754482090473175, "logits/rejected": 0.6716062426567078, "logps/chosen": -3.3850300312042236, "logps/rejected": -4.183223724365234, "loss": 0.7899, "nll_loss": 0.7356932163238525, "rewards/accuracies": 0.625, "rewards/chosen": -0.33850300312042236, "rewards/margins": 0.07981935143470764, "rewards/rejected": -0.4183223247528076, "step": 3870 }, { "epoch": 10.598220396988363, "grad_norm": 5.038290977478027, "learning_rate": 4.6986301369863015e-07, "log_odds_chosen": 4.4192352294921875, "log_odds_ratio": -0.10163983702659607, "logits/chosen": 1.0458576679229736, "logits/rejected": 1.0903584957122803, "logps/chosen": -2.1538209915161133, "logps/rejected": -6.41175651550293, "loss": 0.7402, "nll_loss": 0.7300789952278137, "rewards/accuracies": 1.0, "rewards/chosen": -0.21538209915161133, "rewards/margins": 0.4257936179637909, "rewards/rejected": -0.6411757469177246, "step": 3871 }, { "epoch": 10.600958247775496, "grad_norm": 5.66950798034668, "learning_rate": 4.6972602739726025e-07, "log_odds_chosen": 1.307832956314087, "log_odds_ratio": -0.42880013585090637, "logits/chosen": 0.8317379355430603, "logits/rejected": 0.874189019203186, "logps/chosen": -3.136545419692993, "logps/rejected": -4.412111759185791, "loss": 0.8161, "nll_loss": 0.7732523679733276, "rewards/accuracies": 0.75, "rewards/chosen": -0.3136545419692993, "rewards/margins": 0.12755665183067322, "rewards/rejected": -0.44121119379997253, "step": 3872 }, { "epoch": 10.603696098562628, "grad_norm": 5.504851818084717, "learning_rate": 4.6958904109589035e-07, "log_odds_chosen": 2.827660083770752, "log_odds_ratio": -0.21813881397247314, "logits/chosen": 0.7180889248847961, "logits/rejected": 0.7033522129058838, "logps/chosen": -1.8565492630004883, "logps/rejected": -4.542213439941406, "loss": 0.8285, "nll_loss": 0.8067021369934082, "rewards/accuracies": 1.0, "rewards/chosen": -0.1856549233198166, "rewards/margins": 0.26856639981269836, "rewards/rejected": -0.45422136783599854, "step": 3873 }, { "epoch": 10.60643394934976, "grad_norm": 4.467562198638916, "learning_rate": 4.6945205479452056e-07, "log_odds_chosen": 1.52000093460083, "log_odds_ratio": -0.3240017890930176, "logits/chosen": 0.8028257489204407, "logits/rejected": 0.7913141846656799, "logps/chosen": -2.0362868309020996, "logps/rejected": -3.395864248275757, "loss": 0.7868, "nll_loss": 0.7543542385101318, "rewards/accuracies": 0.875, "rewards/chosen": -0.20362870395183563, "rewards/margins": 0.1359577476978302, "rewards/rejected": -0.33958643674850464, "step": 3874 }, { "epoch": 10.609171800136892, "grad_norm": 4.707484245300293, "learning_rate": 4.6931506849315065e-07, "log_odds_chosen": 2.1370677947998047, "log_odds_ratio": -0.3795120418071747, "logits/chosen": 0.6915017366409302, "logits/rejected": 0.7340246438980103, "logps/chosen": -2.2202751636505127, "logps/rejected": -4.25434684753418, "loss": 0.7402, "nll_loss": 0.7022701501846313, "rewards/accuracies": 0.625, "rewards/chosen": -0.22202752530574799, "rewards/margins": 0.2034071534872055, "rewards/rejected": -0.4254346787929535, "step": 3875 }, { "epoch": 10.611909650924025, "grad_norm": 5.729836463928223, "learning_rate": 4.691780821917808e-07, "log_odds_chosen": 1.380171537399292, "log_odds_ratio": -0.361365407705307, "logits/chosen": 0.9005078673362732, "logits/rejected": 0.9430025219917297, "logps/chosen": -2.5850231647491455, "logps/rejected": -3.905121326446533, "loss": 0.7365, "nll_loss": 0.7003952860832214, "rewards/accuracies": 0.875, "rewards/chosen": -0.258502334356308, "rewards/margins": 0.132009819149971, "rewards/rejected": -0.3905121684074402, "step": 3876 }, { "epoch": 10.614647501711158, "grad_norm": 6.094488620758057, "learning_rate": 4.6904109589041096e-07, "log_odds_chosen": 1.3673593997955322, "log_odds_ratio": -0.28829076886177063, "logits/chosen": 0.8532689213752747, "logits/rejected": 0.8056917786598206, "logps/chosen": -1.295253872871399, "logps/rejected": -2.456425428390503, "loss": 0.6137, "nll_loss": 0.5848714113235474, "rewards/accuracies": 1.0, "rewards/chosen": -0.12952539324760437, "rewards/margins": 0.11611717194318771, "rewards/rejected": -0.2456425428390503, "step": 3877 }, { "epoch": 10.617385352498289, "grad_norm": 4.641721725463867, "learning_rate": 4.689041095890411e-07, "log_odds_chosen": 3.5304226875305176, "log_odds_ratio": -0.2694060206413269, "logits/chosen": 0.7976002097129822, "logits/rejected": 0.8104555010795593, "logps/chosen": -2.8175482749938965, "logps/rejected": -6.28228759765625, "loss": 0.8078, "nll_loss": 0.7808603048324585, "rewards/accuracies": 0.875, "rewards/chosen": -0.28175485134124756, "rewards/margins": 0.34647393226623535, "rewards/rejected": -0.6282287836074829, "step": 3878 }, { "epoch": 10.62012320328542, "grad_norm": 5.272597789764404, "learning_rate": 4.687671232876712e-07, "log_odds_chosen": 1.8599419593811035, "log_odds_ratio": -0.5016339421272278, "logits/chosen": 1.15385901927948, "logits/rejected": 1.2261120080947876, "logps/chosen": -2.6429903507232666, "logps/rejected": -4.446281433105469, "loss": 0.7488, "nll_loss": 0.6986678242683411, "rewards/accuracies": 0.75, "rewards/chosen": -0.26429903507232666, "rewards/margins": 0.1803290992975235, "rewards/rejected": -0.44462811946868896, "step": 3879 }, { "epoch": 10.622861054072553, "grad_norm": 4.4153337478637695, "learning_rate": 4.686301369863013e-07, "log_odds_chosen": 2.2442688941955566, "log_odds_ratio": -0.16175726056098938, "logits/chosen": 0.7901855111122131, "logits/rejected": 0.8218996524810791, "logps/chosen": -2.0241734981536865, "logps/rejected": -4.085331916809082, "loss": 0.68, "nll_loss": 0.6638551950454712, "rewards/accuracies": 1.0, "rewards/chosen": -0.20241735875606537, "rewards/margins": 0.20611584186553955, "rewards/rejected": -0.4085332155227661, "step": 3880 }, { "epoch": 10.625598904859686, "grad_norm": 5.5722198486328125, "learning_rate": 4.684931506849315e-07, "log_odds_chosen": 2.5148167610168457, "log_odds_ratio": -0.15350262820720673, "logits/chosen": 0.6264932751655579, "logits/rejected": 0.49442532658576965, "logps/chosen": -1.7098336219787598, "logps/rejected": -4.036076068878174, "loss": 0.705, "nll_loss": 0.6896538734436035, "rewards/accuracies": 1.0, "rewards/chosen": -0.17098335921764374, "rewards/margins": 0.23262426257133484, "rewards/rejected": -0.4036076068878174, "step": 3881 }, { "epoch": 10.628336755646817, "grad_norm": 5.2491631507873535, "learning_rate": 4.683561643835616e-07, "log_odds_chosen": 2.3610541820526123, "log_odds_ratio": -0.1784006953239441, "logits/chosen": 0.8763778805732727, "logits/rejected": 0.9079170227050781, "logps/chosen": -1.8965198993682861, "logps/rejected": -4.121755123138428, "loss": 0.6561, "nll_loss": 0.638268232345581, "rewards/accuracies": 1.0, "rewards/chosen": -0.1896519958972931, "rewards/margins": 0.2225235253572464, "rewards/rejected": -0.4121755063533783, "step": 3882 }, { "epoch": 10.63107460643395, "grad_norm": 5.794450759887695, "learning_rate": 4.6821917808219177e-07, "log_odds_chosen": 1.2503714561462402, "log_odds_ratio": -0.37365663051605225, "logits/chosen": 0.6930215954780579, "logits/rejected": 0.6906342506408691, "logps/chosen": -2.125004768371582, "logps/rejected": -3.294299602508545, "loss": 0.7179, "nll_loss": 0.6805253624916077, "rewards/accuracies": 0.75, "rewards/chosen": -0.2125004678964615, "rewards/margins": 0.11692947894334793, "rewards/rejected": -0.3294299244880676, "step": 3883 }, { "epoch": 10.633812457221081, "grad_norm": 5.364960670471191, "learning_rate": 4.680821917808219e-07, "log_odds_chosen": 1.3337558507919312, "log_odds_ratio": -0.3157837390899658, "logits/chosen": 0.6685717701911926, "logits/rejected": 0.5762984752655029, "logps/chosen": -1.8050645589828491, "logps/rejected": -2.9312806129455566, "loss": 0.6992, "nll_loss": 0.6676364541053772, "rewards/accuracies": 0.875, "rewards/chosen": -0.18050643801689148, "rewards/margins": 0.11262160539627075, "rewards/rejected": -0.2931280732154846, "step": 3884 }, { "epoch": 10.636550308008214, "grad_norm": 5.8662285804748535, "learning_rate": 4.6794520547945207e-07, "log_odds_chosen": 2.1135241985321045, "log_odds_ratio": -0.2717250883579254, "logits/chosen": 0.8029010891914368, "logits/rejected": 0.7955478429794312, "logps/chosen": -2.163585662841797, "logps/rejected": -4.128319263458252, "loss": 0.7727, "nll_loss": 0.7455085515975952, "rewards/accuracies": 0.875, "rewards/chosen": -0.21635858714580536, "rewards/margins": 0.1964733600616455, "rewards/rejected": -0.41283196210861206, "step": 3885 }, { "epoch": 10.639288158795345, "grad_norm": 4.997552871704102, "learning_rate": 4.6780821917808217e-07, "log_odds_chosen": 3.45434308052063, "log_odds_ratio": -0.182675302028656, "logits/chosen": 0.6820034980773926, "logits/rejected": 0.7210313081741333, "logps/chosen": -2.2405076026916504, "logps/rejected": -5.587374687194824, "loss": 0.7486, "nll_loss": 0.7303195595741272, "rewards/accuracies": 0.875, "rewards/chosen": -0.22405076026916504, "rewards/margins": 0.3346867561340332, "rewards/rejected": -0.5587375164031982, "step": 3886 }, { "epoch": 10.642026009582478, "grad_norm": 4.373393535614014, "learning_rate": 4.6767123287671227e-07, "log_odds_chosen": 2.605604648590088, "log_odds_ratio": -0.2691817581653595, "logits/chosen": 0.9792042970657349, "logits/rejected": 0.99446702003479, "logps/chosen": -1.979030728340149, "logps/rejected": -4.474002361297607, "loss": 0.7755, "nll_loss": 0.7485476732254028, "rewards/accuracies": 1.0, "rewards/chosen": -0.1979030817747116, "rewards/margins": 0.2494971752166748, "rewards/rejected": -0.4474002718925476, "step": 3887 }, { "epoch": 10.64476386036961, "grad_norm": 5.828807830810547, "learning_rate": 4.6753424657534247e-07, "log_odds_chosen": 1.8954074382781982, "log_odds_ratio": -0.28602075576782227, "logits/chosen": 0.7951037287712097, "logits/rejected": 0.837429404258728, "logps/chosen": -2.8018221855163574, "logps/rejected": -4.5975751876831055, "loss": 0.6956, "nll_loss": 0.6669667959213257, "rewards/accuracies": 0.875, "rewards/chosen": -0.28018224239349365, "rewards/margins": 0.17957532405853271, "rewards/rejected": -0.45975756645202637, "step": 3888 }, { "epoch": 10.647501711156742, "grad_norm": 5.148518085479736, "learning_rate": 4.6739726027397257e-07, "log_odds_chosen": 2.710448980331421, "log_odds_ratio": -0.2108570784330368, "logits/chosen": 0.9353882670402527, "logits/rejected": 0.9674746990203857, "logps/chosen": -2.724964141845703, "logps/rejected": -5.314178466796875, "loss": 0.8226, "nll_loss": 0.8015635013580322, "rewards/accuracies": 0.875, "rewards/chosen": -0.27249640226364136, "rewards/margins": 0.25892144441604614, "rewards/rejected": -0.5314178466796875, "step": 3889 }, { "epoch": 10.650239561943874, "grad_norm": 6.638621807098389, "learning_rate": 4.672602739726027e-07, "log_odds_chosen": 0.8090642094612122, "log_odds_ratio": -0.6314632296562195, "logits/chosen": 0.8049834370613098, "logits/rejected": 0.6988080739974976, "logps/chosen": -2.422494411468506, "logps/rejected": -3.1279115676879883, "loss": 0.7764, "nll_loss": 0.713250458240509, "rewards/accuracies": 0.75, "rewards/chosen": -0.24224944412708282, "rewards/margins": 0.07054171711206436, "rewards/rejected": -0.3127911686897278, "step": 3890 }, { "epoch": 10.652977412731007, "grad_norm": 5.435576438903809, "learning_rate": 4.671232876712329e-07, "log_odds_chosen": 1.0805611610412598, "log_odds_ratio": -0.4292888641357422, "logits/chosen": 0.8623204231262207, "logits/rejected": 0.8993802070617676, "logps/chosen": -2.4214065074920654, "logps/rejected": -3.3648223876953125, "loss": 0.6794, "nll_loss": 0.6364624500274658, "rewards/accuracies": 0.75, "rewards/chosen": -0.24214065074920654, "rewards/margins": 0.09434157609939575, "rewards/rejected": -0.3364822268486023, "step": 3891 }, { "epoch": 10.655715263518138, "grad_norm": 7.069151401519775, "learning_rate": 4.66986301369863e-07, "log_odds_chosen": 2.0018460750579834, "log_odds_ratio": -0.1932169497013092, "logits/chosen": 0.9676700234413147, "logits/rejected": 0.9784671068191528, "logps/chosen": -2.7187445163726807, "logps/rejected": -4.640481472015381, "loss": 0.6904, "nll_loss": 0.671096682548523, "rewards/accuracies": 1.0, "rewards/chosen": -0.27187442779541016, "rewards/margins": 0.19217373430728912, "rewards/rejected": -0.46404820680618286, "step": 3892 }, { "epoch": 10.65845311430527, "grad_norm": 4.321492671966553, "learning_rate": 4.6684931506849313e-07, "log_odds_chosen": 2.8306198120117188, "log_odds_ratio": -0.18498563766479492, "logits/chosen": 0.8606362342834473, "logits/rejected": 0.8641590476036072, "logps/chosen": -1.84552001953125, "logps/rejected": -4.5352582931518555, "loss": 0.6979, "nll_loss": 0.6794275641441345, "rewards/accuracies": 1.0, "rewards/chosen": -0.18455201387405396, "rewards/margins": 0.26897385716438293, "rewards/rejected": -0.4535258412361145, "step": 3893 }, { "epoch": 10.661190965092402, "grad_norm": 4.980754852294922, "learning_rate": 4.667123287671232e-07, "log_odds_chosen": 1.1421499252319336, "log_odds_ratio": -0.44684651494026184, "logits/chosen": 0.8468852639198303, "logits/rejected": 0.8155139088630676, "logps/chosen": -2.40561580657959, "logps/rejected": -3.387169122695923, "loss": 0.734, "nll_loss": 0.6893523335456848, "rewards/accuracies": 0.625, "rewards/chosen": -0.2405615746974945, "rewards/margins": 0.09815533459186554, "rewards/rejected": -0.33871689438819885, "step": 3894 }, { "epoch": 10.663928815879535, "grad_norm": 8.658665657043457, "learning_rate": 4.6657534246575343e-07, "log_odds_chosen": 0.5878267288208008, "log_odds_ratio": -0.7739810943603516, "logits/chosen": 0.8407084941864014, "logits/rejected": 0.7555095553398132, "logps/chosen": -2.958827018737793, "logps/rejected": -3.4733967781066895, "loss": 0.8939, "nll_loss": 0.8164888620376587, "rewards/accuracies": 0.625, "rewards/chosen": -0.2958827018737793, "rewards/margins": 0.05145695060491562, "rewards/rejected": -0.3473396897315979, "step": 3895 }, { "epoch": 10.666666666666666, "grad_norm": 5.3876729011535645, "learning_rate": 4.6643835616438353e-07, "log_odds_chosen": 1.117861032485962, "log_odds_ratio": -0.42913922667503357, "logits/chosen": 1.032170295715332, "logits/rejected": 1.0648373365402222, "logps/chosen": -2.2844386100769043, "logps/rejected": -3.3038742542266846, "loss": 0.6796, "nll_loss": 0.6366973519325256, "rewards/accuracies": 0.75, "rewards/chosen": -0.22844386100769043, "rewards/margins": 0.10194355994462967, "rewards/rejected": -0.3303874135017395, "step": 3896 }, { "epoch": 10.669404517453799, "grad_norm": 4.955593585968018, "learning_rate": 4.663013698630137e-07, "log_odds_chosen": 1.2554595470428467, "log_odds_ratio": -0.38780492544174194, "logits/chosen": 0.8296747803688049, "logits/rejected": 0.8050656318664551, "logps/chosen": -1.8725121021270752, "logps/rejected": -3.0387468338012695, "loss": 0.7199, "nll_loss": 0.6811550259590149, "rewards/accuracies": 0.75, "rewards/chosen": -0.18725121021270752, "rewards/margins": 0.11662349104881287, "rewards/rejected": -0.3038747012615204, "step": 3897 }, { "epoch": 10.67214236824093, "grad_norm": 5.135085582733154, "learning_rate": 4.6616438356164383e-07, "log_odds_chosen": 1.1921993494033813, "log_odds_ratio": -0.3709573745727539, "logits/chosen": 0.75168776512146, "logits/rejected": 0.7415491938591003, "logps/chosen": -2.425150156021118, "logps/rejected": -3.507854700088501, "loss": 0.6705, "nll_loss": 0.6333717107772827, "rewards/accuracies": 0.875, "rewards/chosen": -0.24251504242420197, "rewards/margins": 0.10827043652534485, "rewards/rejected": -0.3507854640483856, "step": 3898 }, { "epoch": 10.674880219028063, "grad_norm": 5.416257858276367, "learning_rate": 4.6602739726027393e-07, "log_odds_chosen": 1.1215120553970337, "log_odds_ratio": -0.33294394612312317, "logits/chosen": 0.6483820080757141, "logits/rejected": 0.6516059041023254, "logps/chosen": -2.141092300415039, "logps/rejected": -3.1325502395629883, "loss": 0.636, "nll_loss": 0.6026868224143982, "rewards/accuracies": 0.875, "rewards/chosen": -0.21410925686359406, "rewards/margins": 0.0991457849740982, "rewards/rejected": -0.3132550120353699, "step": 3899 }, { "epoch": 10.677618069815194, "grad_norm": 5.022132873535156, "learning_rate": 4.658904109589041e-07, "log_odds_chosen": 2.1185545921325684, "log_odds_ratio": -0.24939219653606415, "logits/chosen": 0.6365800499916077, "logits/rejected": 0.6405848264694214, "logps/chosen": -2.3908307552337646, "logps/rejected": -4.414460182189941, "loss": 0.6691, "nll_loss": 0.6441757082939148, "rewards/accuracies": 1.0, "rewards/chosen": -0.23908309638500214, "rewards/margins": 0.20236289501190186, "rewards/rejected": -0.4414459764957428, "step": 3900 }, { "epoch": 10.680355920602327, "grad_norm": 6.09471321105957, "learning_rate": 4.657534246575342e-07, "log_odds_chosen": 2.462996006011963, "log_odds_ratio": -0.20000417530536652, "logits/chosen": 1.129529356956482, "logits/rejected": 1.2030824422836304, "logps/chosen": -2.3404769897460938, "logps/rejected": -4.690182685852051, "loss": 0.6207, "nll_loss": 0.6006733179092407, "rewards/accuracies": 0.875, "rewards/chosen": -0.23404772579669952, "rewards/margins": 0.2349705696105957, "rewards/rejected": -0.46901828050613403, "step": 3901 }, { "epoch": 10.683093771389458, "grad_norm": 4.927258491516113, "learning_rate": 4.656164383561644e-07, "log_odds_chosen": 4.287313461303711, "log_odds_ratio": -0.12643808126449585, "logits/chosen": 1.0437874794006348, "logits/rejected": 1.097855806350708, "logps/chosen": -2.8811628818511963, "logps/rejected": -7.099360466003418, "loss": 0.8196, "nll_loss": 0.806936502456665, "rewards/accuracies": 1.0, "rewards/chosen": -0.2881162762641907, "rewards/margins": 0.4218197464942932, "rewards/rejected": -0.7099360227584839, "step": 3902 }, { "epoch": 10.685831622176591, "grad_norm": 5.263660430908203, "learning_rate": 4.654794520547945e-07, "log_odds_chosen": 1.6504604816436768, "log_odds_ratio": -0.2487405389547348, "logits/chosen": 0.8379823565483093, "logits/rejected": 0.8959696888923645, "logps/chosen": -2.0937249660491943, "logps/rejected": -3.610081195831299, "loss": 0.6669, "nll_loss": 0.6420601606369019, "rewards/accuracies": 1.0, "rewards/chosen": -0.20937249064445496, "rewards/margins": 0.15163561701774597, "rewards/rejected": -0.3610081076622009, "step": 3903 }, { "epoch": 10.688569472963724, "grad_norm": 4.739640235900879, "learning_rate": 4.6534246575342464e-07, "log_odds_chosen": 1.114662766456604, "log_odds_ratio": -0.44257211685180664, "logits/chosen": 0.9083502292633057, "logits/rejected": 0.8999271392822266, "logps/chosen": -2.763054132461548, "logps/rejected": -3.8218419551849365, "loss": 0.7638, "nll_loss": 0.7195815443992615, "rewards/accuracies": 0.875, "rewards/chosen": -0.2763054072856903, "rewards/margins": 0.1058787852525711, "rewards/rejected": -0.3821842074394226, "step": 3904 }, { "epoch": 10.691307323750856, "grad_norm": 5.750852584838867, "learning_rate": 4.652054794520548e-07, "log_odds_chosen": 3.833400249481201, "log_odds_ratio": -0.1466447114944458, "logits/chosen": 1.0819839239120483, "logits/rejected": 1.081414818763733, "logps/chosen": -2.5857553482055664, "logps/rejected": -6.292266845703125, "loss": 0.7016, "nll_loss": 0.6869572401046753, "rewards/accuracies": 1.0, "rewards/chosen": -0.25857552886009216, "rewards/margins": 0.3706511855125427, "rewards/rejected": -0.6292266845703125, "step": 3905 }, { "epoch": 10.694045174537987, "grad_norm": 5.0610504150390625, "learning_rate": 4.650684931506849e-07, "log_odds_chosen": 2.0704565048217773, "log_odds_ratio": -0.24498139321804047, "logits/chosen": 0.8589695692062378, "logits/rejected": 0.7949456572532654, "logps/chosen": -1.777208685874939, "logps/rejected": -3.721132278442383, "loss": 0.6571, "nll_loss": 0.632630467414856, "rewards/accuracies": 1.0, "rewards/chosen": -0.17772087454795837, "rewards/margins": 0.1943923532962799, "rewards/rejected": -0.3721132278442383, "step": 3906 }, { "epoch": 10.69678302532512, "grad_norm": 4.675034523010254, "learning_rate": 4.6493150684931504e-07, "log_odds_chosen": 2.1733038425445557, "log_odds_ratio": -0.23429884016513824, "logits/chosen": 0.9587284922599792, "logits/rejected": 0.958315372467041, "logps/chosen": -2.4178225994110107, "logps/rejected": -4.518300533294678, "loss": 0.7836, "nll_loss": 0.7602134943008423, "rewards/accuracies": 1.0, "rewards/chosen": -0.2417822778224945, "rewards/margins": 0.21004776656627655, "rewards/rejected": -0.45183005928993225, "step": 3907 }, { "epoch": 10.699520876112253, "grad_norm": 5.919833183288574, "learning_rate": 4.6479452054794514e-07, "log_odds_chosen": 2.892465114593506, "log_odds_ratio": -0.3433239758014679, "logits/chosen": 1.159971833229065, "logits/rejected": 1.1930484771728516, "logps/chosen": -2.599151134490967, "logps/rejected": -5.398558616638184, "loss": 0.7227, "nll_loss": 0.6883343458175659, "rewards/accuracies": 0.875, "rewards/chosen": -0.2599151134490967, "rewards/margins": 0.27994075417518616, "rewards/rejected": -0.5398558378219604, "step": 3908 }, { "epoch": 10.702258726899384, "grad_norm": 4.510152339935303, "learning_rate": 4.6465753424657535e-07, "log_odds_chosen": 3.2394917011260986, "log_odds_ratio": -0.11726582050323486, "logits/chosen": 1.006762981414795, "logits/rejected": 0.9998978972434998, "logps/chosen": -2.0883216857910156, "logps/rejected": -5.194958686828613, "loss": 0.7208, "nll_loss": 0.7090346813201904, "rewards/accuracies": 1.0, "rewards/chosen": -0.20883218944072723, "rewards/margins": 0.31066370010375977, "rewards/rejected": -0.5194959044456482, "step": 3909 }, { "epoch": 10.704996577686517, "grad_norm": 10.155265808105469, "learning_rate": 4.6452054794520545e-07, "log_odds_chosen": 1.4488341808319092, "log_odds_ratio": -0.7191191911697388, "logits/chosen": 0.9239900708198547, "logits/rejected": 0.9850783348083496, "logps/chosen": -3.551184892654419, "logps/rejected": -4.879006385803223, "loss": 0.9345, "nll_loss": 0.8625549077987671, "rewards/accuracies": 0.75, "rewards/chosen": -0.3551185131072998, "rewards/margins": 0.13278216123580933, "rewards/rejected": -0.48790064454078674, "step": 3910 }, { "epoch": 10.707734428473648, "grad_norm": 5.680988788604736, "learning_rate": 4.643835616438356e-07, "log_odds_chosen": 0.8809348940849304, "log_odds_ratio": -0.5811851024627686, "logits/chosen": 0.818439245223999, "logits/rejected": 0.8141095638275146, "logps/chosen": -2.7935619354248047, "logps/rejected": -3.538114070892334, "loss": 0.7224, "nll_loss": 0.6642866134643555, "rewards/accuracies": 0.875, "rewards/chosen": -0.2793561816215515, "rewards/margins": 0.07445521652698517, "rewards/rejected": -0.3538114130496979, "step": 3911 }, { "epoch": 10.710472279260781, "grad_norm": 5.284367561340332, "learning_rate": 4.6424657534246575e-07, "log_odds_chosen": 1.7665092945098877, "log_odds_ratio": -0.2736140489578247, "logits/chosen": 1.0591973066329956, "logits/rejected": 1.1260535717010498, "logps/chosen": -2.650928497314453, "logps/rejected": -4.354024410247803, "loss": 0.7145, "nll_loss": 0.6871283054351807, "rewards/accuracies": 0.875, "rewards/chosen": -0.2650928795337677, "rewards/margins": 0.17030960321426392, "rewards/rejected": -0.43540245294570923, "step": 3912 }, { "epoch": 10.713210130047912, "grad_norm": 6.100379943847656, "learning_rate": 4.6410958904109585e-07, "log_odds_chosen": 1.7218255996704102, "log_odds_ratio": -0.5144377946853638, "logits/chosen": 1.0016179084777832, "logits/rejected": 1.0697662830352783, "logps/chosen": -2.822145938873291, "logps/rejected": -4.449391841888428, "loss": 0.7346, "nll_loss": 0.6831631660461426, "rewards/accuracies": 0.875, "rewards/chosen": -0.28221461176872253, "rewards/margins": 0.1627245843410492, "rewards/rejected": -0.44493916630744934, "step": 3913 }, { "epoch": 10.715947980835045, "grad_norm": 4.447304725646973, "learning_rate": 4.63972602739726e-07, "log_odds_chosen": 2.000760078430176, "log_odds_ratio": -0.3245973587036133, "logits/chosen": 1.0142499208450317, "logits/rejected": 0.9991188645362854, "logps/chosen": -2.2629125118255615, "logps/rejected": -4.2120256423950195, "loss": 0.7461, "nll_loss": 0.7136049866676331, "rewards/accuracies": 0.75, "rewards/chosen": -0.2262912392616272, "rewards/margins": 0.19491130113601685, "rewards/rejected": -0.42120254039764404, "step": 3914 }, { "epoch": 10.718685831622176, "grad_norm": 8.029563903808594, "learning_rate": 4.6383561643835616e-07, "log_odds_chosen": 2.8180088996887207, "log_odds_ratio": -0.26589661836624146, "logits/chosen": 1.000656247138977, "logits/rejected": 1.0320442914962769, "logps/chosen": -2.8576109409332275, "logps/rejected": -5.582103729248047, "loss": 0.672, "nll_loss": 0.6454055309295654, "rewards/accuracies": 0.75, "rewards/chosen": -0.28576111793518066, "rewards/margins": 0.2724492847919464, "rewards/rejected": -0.5582103729248047, "step": 3915 }, { "epoch": 10.72142368240931, "grad_norm": 6.016305446624756, "learning_rate": 4.636986301369863e-07, "log_odds_chosen": 0.6441739797592163, "log_odds_ratio": -0.47681480646133423, "logits/chosen": 0.9877974987030029, "logits/rejected": 0.9696542620658875, "logps/chosen": -1.8722538948059082, "logps/rejected": -2.4514565467834473, "loss": 0.6553, "nll_loss": 0.6076269745826721, "rewards/accuracies": 0.75, "rewards/chosen": -0.1872253715991974, "rewards/margins": 0.05792025104165077, "rewards/rejected": -0.24514563381671906, "step": 3916 }, { "epoch": 10.72416153319644, "grad_norm": 4.695631504058838, "learning_rate": 4.635616438356164e-07, "log_odds_chosen": 2.58048152923584, "log_odds_ratio": -0.19222410023212433, "logits/chosen": 0.7899742126464844, "logits/rejected": 0.7463210821151733, "logps/chosen": -1.9732575416564941, "logps/rejected": -4.44231653213501, "loss": 0.71, "nll_loss": 0.6908197999000549, "rewards/accuracies": 1.0, "rewards/chosen": -0.19732576608657837, "rewards/margins": 0.2469058781862259, "rewards/rejected": -0.44423162937164307, "step": 3917 }, { "epoch": 10.726899383983573, "grad_norm": 4.761287212371826, "learning_rate": 4.6342465753424656e-07, "log_odds_chosen": 2.3055975437164307, "log_odds_ratio": -0.18975365161895752, "logits/chosen": 0.7256996631622314, "logits/rejected": 0.6955365538597107, "logps/chosen": -1.8216972351074219, "logps/rejected": -3.9317893981933594, "loss": 0.6593, "nll_loss": 0.6403336524963379, "rewards/accuracies": 1.0, "rewards/chosen": -0.18216975033283234, "rewards/margins": 0.2110091745853424, "rewards/rejected": -0.39317893981933594, "step": 3918 }, { "epoch": 10.729637234770705, "grad_norm": 5.451180934906006, "learning_rate": 4.632876712328767e-07, "log_odds_chosen": 1.9173760414123535, "log_odds_ratio": -0.35111671686172485, "logits/chosen": 0.8049750328063965, "logits/rejected": 0.9160991907119751, "logps/chosen": -2.3220369815826416, "logps/rejected": -4.144111156463623, "loss": 0.7467, "nll_loss": 0.7116067409515381, "rewards/accuracies": 0.75, "rewards/chosen": -0.23220369219779968, "rewards/margins": 0.18220742046833038, "rewards/rejected": -0.41441112756729126, "step": 3919 }, { "epoch": 10.732375085557837, "grad_norm": 4.614222526550293, "learning_rate": 4.631506849315068e-07, "log_odds_chosen": 1.8071649074554443, "log_odds_ratio": -0.29782599210739136, "logits/chosen": 0.6635269522666931, "logits/rejected": 0.678158164024353, "logps/chosen": -2.397528886795044, "logps/rejected": -4.0971527099609375, "loss": 0.7282, "nll_loss": 0.6984105110168457, "rewards/accuracies": 1.0, "rewards/chosen": -0.2397528886795044, "rewards/margins": 0.16996237635612488, "rewards/rejected": -0.4097152650356293, "step": 3920 }, { "epoch": 10.735112936344969, "grad_norm": 5.541913986206055, "learning_rate": 4.6301369863013696e-07, "log_odds_chosen": 1.3893625736236572, "log_odds_ratio": -0.31196844577789307, "logits/chosen": 0.7339242100715637, "logits/rejected": 0.7434234619140625, "logps/chosen": -2.225159168243408, "logps/rejected": -3.5244107246398926, "loss": 0.6461, "nll_loss": 0.614865779876709, "rewards/accuracies": 1.0, "rewards/chosen": -0.22251592576503754, "rewards/margins": 0.12992514669895172, "rewards/rejected": -0.35244107246398926, "step": 3921 }, { "epoch": 10.737850787132102, "grad_norm": 4.299445152282715, "learning_rate": 4.628767123287671e-07, "log_odds_chosen": 1.5936692953109741, "log_odds_ratio": -0.2807208001613617, "logits/chosen": 0.8953511118888855, "logits/rejected": 1.0160590410232544, "logps/chosen": -2.4179301261901855, "logps/rejected": -3.9344067573547363, "loss": 0.6789, "nll_loss": 0.6508244872093201, "rewards/accuracies": 0.875, "rewards/chosen": -0.24179302155971527, "rewards/margins": 0.1516476720571518, "rewards/rejected": -0.3934406638145447, "step": 3922 }, { "epoch": 10.740588637919233, "grad_norm": 4.532971382141113, "learning_rate": 4.6273972602739727e-07, "log_odds_chosen": 1.787748098373413, "log_odds_ratio": -0.2943359315395355, "logits/chosen": 0.9552074670791626, "logits/rejected": 1.0268762111663818, "logps/chosen": -1.9509732723236084, "logps/rejected": -3.5685548782348633, "loss": 0.7222, "nll_loss": 0.6927379965782166, "rewards/accuracies": 0.875, "rewards/chosen": -0.19509734213352203, "rewards/margins": 0.1617581695318222, "rewards/rejected": -0.35685551166534424, "step": 3923 }, { "epoch": 10.743326488706366, "grad_norm": 5.235436916351318, "learning_rate": 4.6260273972602737e-07, "log_odds_chosen": 1.5628716945648193, "log_odds_ratio": -0.5465787053108215, "logits/chosen": 0.8057000041007996, "logits/rejected": 0.865908682346344, "logps/chosen": -2.792325019836426, "logps/rejected": -4.225306034088135, "loss": 0.7005, "nll_loss": 0.6458784937858582, "rewards/accuracies": 0.75, "rewards/chosen": -0.2792324721813202, "rewards/margins": 0.1432981640100479, "rewards/rejected": -0.4225306510925293, "step": 3924 }, { "epoch": 10.746064339493497, "grad_norm": 4.720221996307373, "learning_rate": 4.6246575342465746e-07, "log_odds_chosen": 3.5502467155456543, "log_odds_ratio": -0.11302443593740463, "logits/chosen": 1.087235689163208, "logits/rejected": 1.1372358798980713, "logps/chosen": -2.443648338317871, "logps/rejected": -5.8942437171936035, "loss": 0.721, "nll_loss": 0.7097119688987732, "rewards/accuracies": 1.0, "rewards/chosen": -0.24436485767364502, "rewards/margins": 0.3450595438480377, "rewards/rejected": -0.5894243717193604, "step": 3925 }, { "epoch": 10.74880219028063, "grad_norm": 4.834239482879639, "learning_rate": 4.6232876712328767e-07, "log_odds_chosen": 2.6207637786865234, "log_odds_ratio": -0.19961890578269958, "logits/chosen": 0.8812339305877686, "logits/rejected": 0.8979833126068115, "logps/chosen": -2.043649673461914, "logps/rejected": -4.547188758850098, "loss": 0.7167, "nll_loss": 0.6967616677284241, "rewards/accuracies": 1.0, "rewards/chosen": -0.20436495542526245, "rewards/margins": 0.25035393238067627, "rewards/rejected": -0.4547189176082611, "step": 3926 }, { "epoch": 10.751540041067761, "grad_norm": 5.111756801605225, "learning_rate": 4.6219178082191777e-07, "log_odds_chosen": 2.3958423137664795, "log_odds_ratio": -0.18999941647052765, "logits/chosen": 0.6171606779098511, "logits/rejected": 0.6027634143829346, "logps/chosen": -1.8770055770874023, "logps/rejected": -4.0998077392578125, "loss": 0.5859, "nll_loss": 0.5668745040893555, "rewards/accuracies": 1.0, "rewards/chosen": -0.187700554728508, "rewards/margins": 0.22228023409843445, "rewards/rejected": -0.40998080372810364, "step": 3927 }, { "epoch": 10.754277891854894, "grad_norm": 4.585324287414551, "learning_rate": 4.620547945205479e-07, "log_odds_chosen": 1.4265260696411133, "log_odds_ratio": -0.35474786162376404, "logits/chosen": 0.9797624349594116, "logits/rejected": 0.9916683435440063, "logps/chosen": -2.373408555984497, "logps/rejected": -3.747830867767334, "loss": 0.6959, "nll_loss": 0.6604341268539429, "rewards/accuracies": 0.875, "rewards/chosen": -0.23734086751937866, "rewards/margins": 0.1374422311782837, "rewards/rejected": -0.37478309869766235, "step": 3928 }, { "epoch": 10.757015742642025, "grad_norm": 7.410138130187988, "learning_rate": 4.6191780821917807e-07, "log_odds_chosen": 3.3687925338745117, "log_odds_ratio": -0.24202144145965576, "logits/chosen": 1.139665961265564, "logits/rejected": 1.0565859079360962, "logps/chosen": -2.7662100791931152, "logps/rejected": -6.039332389831543, "loss": 0.802, "nll_loss": 0.777763843536377, "rewards/accuracies": 1.0, "rewards/chosen": -0.276621013879776, "rewards/margins": 0.327312171459198, "rewards/rejected": -0.6039332747459412, "step": 3929 }, { "epoch": 10.759753593429158, "grad_norm": 5.678517818450928, "learning_rate": 4.617808219178082e-07, "log_odds_chosen": 2.714198589324951, "log_odds_ratio": -0.24166423082351685, "logits/chosen": 0.8542670607566833, "logits/rejected": 0.8827565908432007, "logps/chosen": -2.100543975830078, "logps/rejected": -4.667250633239746, "loss": 0.7459, "nll_loss": 0.7217190265655518, "rewards/accuracies": 0.875, "rewards/chosen": -0.2100543975830078, "rewards/margins": 0.25667068362236023, "rewards/rejected": -0.46672508120536804, "step": 3930 }, { "epoch": 10.762491444216291, "grad_norm": 7.021218776702881, "learning_rate": 4.616438356164383e-07, "log_odds_chosen": 1.3297107219696045, "log_odds_ratio": -0.3598443865776062, "logits/chosen": 0.894506573677063, "logits/rejected": 0.8523939251899719, "logps/chosen": -3.0508055686950684, "logps/rejected": -4.314944267272949, "loss": 0.8606, "nll_loss": 0.8245662450790405, "rewards/accuracies": 0.875, "rewards/chosen": -0.3050805628299713, "rewards/margins": 0.12641386687755585, "rewards/rejected": -0.43149441480636597, "step": 3931 }, { "epoch": 10.765229295003422, "grad_norm": 7.348206520080566, "learning_rate": 4.615068493150684e-07, "log_odds_chosen": 1.4988921880722046, "log_odds_ratio": -0.4346761703491211, "logits/chosen": 0.8308025598526001, "logits/rejected": 0.8723834156990051, "logps/chosen": -2.6110804080963135, "logps/rejected": -4.0267205238342285, "loss": 0.6501, "nll_loss": 0.6066679954528809, "rewards/accuracies": 0.75, "rewards/chosen": -0.26110801100730896, "rewards/margins": 0.1415640413761139, "rewards/rejected": -0.40267205238342285, "step": 3932 }, { "epoch": 10.767967145790553, "grad_norm": 4.592367172241211, "learning_rate": 4.6136986301369863e-07, "log_odds_chosen": 1.750244379043579, "log_odds_ratio": -0.2330787479877472, "logits/chosen": 0.7948441505432129, "logits/rejected": 0.7586571574211121, "logps/chosen": -1.9622888565063477, "logps/rejected": -3.589726686477661, "loss": 0.7399, "nll_loss": 0.7166104912757874, "rewards/accuracies": 1.0, "rewards/chosen": -0.19622889161109924, "rewards/margins": 0.16274377703666687, "rewards/rejected": -0.3589726686477661, "step": 3933 }, { "epoch": 10.770704996577686, "grad_norm": 5.021825313568115, "learning_rate": 4.6123287671232873e-07, "log_odds_chosen": 2.256150960922241, "log_odds_ratio": -0.23375648260116577, "logits/chosen": 0.9530516266822815, "logits/rejected": 1.0242705345153809, "logps/chosen": -2.767921209335327, "logps/rejected": -4.959239959716797, "loss": 0.6796, "nll_loss": 0.6562100648880005, "rewards/accuracies": 1.0, "rewards/chosen": -0.27679210901260376, "rewards/margins": 0.2191319316625595, "rewards/rejected": -0.49592405557632446, "step": 3934 }, { "epoch": 10.77344284736482, "grad_norm": 5.046154022216797, "learning_rate": 4.610958904109589e-07, "log_odds_chosen": 3.35176420211792, "log_odds_ratio": -0.1519118994474411, "logits/chosen": 0.8009604811668396, "logits/rejected": 0.8137928247451782, "logps/chosen": -1.9238086938858032, "logps/rejected": -5.12284517288208, "loss": 0.6541, "nll_loss": 0.638924777507782, "rewards/accuracies": 1.0, "rewards/chosen": -0.1923808753490448, "rewards/margins": 0.3199036717414856, "rewards/rejected": -0.512284517288208, "step": 3935 }, { "epoch": 10.77618069815195, "grad_norm": 5.155977725982666, "learning_rate": 4.6095890410958903e-07, "log_odds_chosen": 2.0449118614196777, "log_odds_ratio": -0.29720208048820496, "logits/chosen": 0.9277045130729675, "logits/rejected": 0.9328469038009644, "logps/chosen": -1.9571198225021362, "logps/rejected": -3.877876043319702, "loss": 0.6277, "nll_loss": 0.5979313254356384, "rewards/accuracies": 0.875, "rewards/chosen": -0.19571200013160706, "rewards/margins": 0.19207562506198883, "rewards/rejected": -0.3877876102924347, "step": 3936 }, { "epoch": 10.778918548939084, "grad_norm": 5.201253890991211, "learning_rate": 4.608219178082192e-07, "log_odds_chosen": 2.753955125808716, "log_odds_ratio": -0.17447951436042786, "logits/chosen": 0.8069669008255005, "logits/rejected": 0.7892866730690002, "logps/chosen": -2.2097301483154297, "logps/rejected": -4.824310302734375, "loss": 0.6649, "nll_loss": 0.6474081873893738, "rewards/accuracies": 1.0, "rewards/chosen": -0.22097302973270416, "rewards/margins": 0.26145803928375244, "rewards/rejected": -0.4824310541152954, "step": 3937 }, { "epoch": 10.781656399726215, "grad_norm": 5.6727142333984375, "learning_rate": 4.606849315068493e-07, "log_odds_chosen": 0.9364371299743652, "log_odds_ratio": -0.46401333808898926, "logits/chosen": 0.6928785443305969, "logits/rejected": 0.6927872896194458, "logps/chosen": -2.2319397926330566, "logps/rejected": -3.1392016410827637, "loss": 0.83, "nll_loss": 0.7835883498191833, "rewards/accuracies": 0.875, "rewards/chosen": -0.2231939733028412, "rewards/margins": 0.09072616696357727, "rewards/rejected": -0.31392014026641846, "step": 3938 }, { "epoch": 10.784394250513348, "grad_norm": 4.845877170562744, "learning_rate": 4.605479452054794e-07, "log_odds_chosen": 1.2872967720031738, "log_odds_ratio": -0.4240115284919739, "logits/chosen": 0.7945531010627747, "logits/rejected": 0.7932965159416199, "logps/chosen": -2.1867504119873047, "logps/rejected": -3.415161371231079, "loss": 0.6881, "nll_loss": 0.6457338333129883, "rewards/accuracies": 0.75, "rewards/chosen": -0.21867504715919495, "rewards/margins": 0.12284109741449356, "rewards/rejected": -0.3415161371231079, "step": 3939 }, { "epoch": 10.787132101300479, "grad_norm": 4.71101188659668, "learning_rate": 4.604109589041096e-07, "log_odds_chosen": 2.8314833641052246, "log_odds_ratio": -0.2250758856534958, "logits/chosen": 0.7627472877502441, "logits/rejected": 0.7671304941177368, "logps/chosen": -2.1945178508758545, "logps/rejected": -4.923435211181641, "loss": 0.7138, "nll_loss": 0.6913151144981384, "rewards/accuracies": 0.875, "rewards/chosen": -0.21945178508758545, "rewards/margins": 0.2728917598724365, "rewards/rejected": -0.492343544960022, "step": 3940 }, { "epoch": 10.789869952087612, "grad_norm": 5.491253852844238, "learning_rate": 4.602739726027397e-07, "log_odds_chosen": 2.638151168823242, "log_odds_ratio": -0.23565979301929474, "logits/chosen": 0.7515077590942383, "logits/rejected": 0.7192700505256653, "logps/chosen": -1.8782708644866943, "logps/rejected": -4.387499809265137, "loss": 0.6441, "nll_loss": 0.620530366897583, "rewards/accuracies": 1.0, "rewards/chosen": -0.18782708048820496, "rewards/margins": 0.25092291831970215, "rewards/rejected": -0.4387500286102295, "step": 3941 }, { "epoch": 10.792607802874743, "grad_norm": 4.635059833526611, "learning_rate": 4.6013698630136984e-07, "log_odds_chosen": 1.511082649230957, "log_odds_ratio": -0.29289567470550537, "logits/chosen": 0.7174426317214966, "logits/rejected": 0.7427918314933777, "logps/chosen": -1.944492220878601, "logps/rejected": -3.2872977256774902, "loss": 0.6683, "nll_loss": 0.6389894485473633, "rewards/accuracies": 0.875, "rewards/chosen": -0.19444923102855682, "rewards/margins": 0.13428053259849548, "rewards/rejected": -0.3287297487258911, "step": 3942 }, { "epoch": 10.795345653661876, "grad_norm": 5.717427730560303, "learning_rate": 4.6e-07, "log_odds_chosen": 1.1061148643493652, "log_odds_ratio": -0.41080811619758606, "logits/chosen": 0.7873650789260864, "logits/rejected": 0.80270916223526, "logps/chosen": -2.9513697624206543, "logps/rejected": -4.017417907714844, "loss": 0.73, "nll_loss": 0.6889575719833374, "rewards/accuracies": 0.75, "rewards/chosen": -0.2951369881629944, "rewards/margins": 0.10660485923290253, "rewards/rejected": -0.4017418324947357, "step": 3943 }, { "epoch": 10.798083504449007, "grad_norm": 4.928164005279541, "learning_rate": 4.5986301369863014e-07, "log_odds_chosen": 1.5323832035064697, "log_odds_ratio": -0.3294847905635834, "logits/chosen": 0.7590367794036865, "logits/rejected": 0.7552743554115295, "logps/chosen": -2.027954578399658, "logps/rejected": -3.476771116256714, "loss": 0.6562, "nll_loss": 0.6232743263244629, "rewards/accuracies": 0.875, "rewards/chosen": -0.20279544591903687, "rewards/margins": 0.14488166570663452, "rewards/rejected": -0.3476771116256714, "step": 3944 }, { "epoch": 10.80082135523614, "grad_norm": 7.002279281616211, "learning_rate": 4.5972602739726024e-07, "log_odds_chosen": 4.138973236083984, "log_odds_ratio": -0.1718275398015976, "logits/chosen": 1.0853893756866455, "logits/rejected": 1.1944031715393066, "logps/chosen": -2.562135934829712, "logps/rejected": -6.584357261657715, "loss": 0.7785, "nll_loss": 0.7612795829772949, "rewards/accuracies": 0.875, "rewards/chosen": -0.25621360540390015, "rewards/margins": 0.4022220969200134, "rewards/rejected": -0.6584357023239136, "step": 3945 }, { "epoch": 10.803559206023271, "grad_norm": 4.878031253814697, "learning_rate": 4.595890410958904e-07, "log_odds_chosen": 1.7694834470748901, "log_odds_ratio": -0.32774776220321655, "logits/chosen": 0.7946973443031311, "logits/rejected": 0.754370927810669, "logps/chosen": -2.1239376068115234, "logps/rejected": -3.765944480895996, "loss": 0.705, "nll_loss": 0.6722650527954102, "rewards/accuracies": 0.875, "rewards/chosen": -0.21239374577999115, "rewards/margins": 0.16420070827007294, "rewards/rejected": -0.3765944540500641, "step": 3946 }, { "epoch": 10.806297056810404, "grad_norm": 6.439623832702637, "learning_rate": 4.5945205479452055e-07, "log_odds_chosen": 2.7006542682647705, "log_odds_ratio": -0.2159757763147354, "logits/chosen": 0.9513614177703857, "logits/rejected": 0.9607479572296143, "logps/chosen": -2.421962022781372, "logps/rejected": -5.013462066650391, "loss": 0.7529, "nll_loss": 0.7313162088394165, "rewards/accuracies": 0.875, "rewards/chosen": -0.2421962022781372, "rewards/margins": 0.25915002822875977, "rewards/rejected": -0.501346230506897, "step": 3947 }, { "epoch": 10.809034907597535, "grad_norm": 4.666276931762695, "learning_rate": 4.5931506849315064e-07, "log_odds_chosen": 1.3958028554916382, "log_odds_ratio": -0.2993044853210449, "logits/chosen": 0.8078267574310303, "logits/rejected": 0.8012086749076843, "logps/chosen": -2.223874807357788, "logps/rejected": -3.5112111568450928, "loss": 0.7562, "nll_loss": 0.726282000541687, "rewards/accuracies": 0.875, "rewards/chosen": -0.22238749265670776, "rewards/margins": 0.12873363494873047, "rewards/rejected": -0.35112112760543823, "step": 3948 }, { "epoch": 10.811772758384668, "grad_norm": 5.268828392028809, "learning_rate": 4.591780821917808e-07, "log_odds_chosen": 1.9732388257980347, "log_odds_ratio": -0.26560303568840027, "logits/chosen": 0.84393709897995, "logits/rejected": 0.8207478523254395, "logps/chosen": -1.7112572193145752, "logps/rejected": -3.4850363731384277, "loss": 0.6169, "nll_loss": 0.5903053879737854, "rewards/accuracies": 1.0, "rewards/chosen": -0.17112571001052856, "rewards/margins": 0.17737792432308197, "rewards/rejected": -0.34850364923477173, "step": 3949 }, { "epoch": 10.8145106091718, "grad_norm": 5.277281761169434, "learning_rate": 4.5904109589041095e-07, "log_odds_chosen": 1.3846948146820068, "log_odds_ratio": -0.3509434461593628, "logits/chosen": 0.862749457359314, "logits/rejected": 0.8546279668807983, "logps/chosen": -1.627068281173706, "logps/rejected": -2.8669586181640625, "loss": 0.6307, "nll_loss": 0.5955686569213867, "rewards/accuracies": 0.875, "rewards/chosen": -0.16270685195922852, "rewards/margins": 0.12398901581764221, "rewards/rejected": -0.2866958677768707, "step": 3950 }, { "epoch": 10.817248459958932, "grad_norm": 4.650915145874023, "learning_rate": 4.589041095890411e-07, "log_odds_chosen": 2.412261724472046, "log_odds_ratio": -0.31748417019844055, "logits/chosen": 0.73064786195755, "logits/rejected": 0.7405732870101929, "logps/chosen": -1.9301365613937378, "logps/rejected": -4.236363887786865, "loss": 0.7034, "nll_loss": 0.6716046929359436, "rewards/accuracies": 1.0, "rewards/chosen": -0.19301366806030273, "rewards/margins": 0.23062275350093842, "rewards/rejected": -0.42363643646240234, "step": 3951 }, { "epoch": 10.819986310746064, "grad_norm": 5.065726280212402, "learning_rate": 4.587671232876712e-07, "log_odds_chosen": 1.972595453262329, "log_odds_ratio": -0.3462345600128174, "logits/chosen": 0.7005531787872314, "logits/rejected": 0.6782265901565552, "logps/chosen": -1.6725282669067383, "logps/rejected": -3.5452253818511963, "loss": 0.7285, "nll_loss": 0.693893313407898, "rewards/accuracies": 0.875, "rewards/chosen": -0.16725283861160278, "rewards/margins": 0.18726973235607147, "rewards/rejected": -0.35452255606651306, "step": 3952 }, { "epoch": 10.822724161533197, "grad_norm": 7.3852458000183105, "learning_rate": 4.5863013698630135e-07, "log_odds_chosen": 3.222264289855957, "log_odds_ratio": -0.387962281703949, "logits/chosen": 0.816101610660553, "logits/rejected": 0.7823782563209534, "logps/chosen": -2.153210163116455, "logps/rejected": -5.327898025512695, "loss": 0.7575, "nll_loss": 0.7187482714653015, "rewards/accuracies": 0.875, "rewards/chosen": -0.21532100439071655, "rewards/margins": 0.3174688220024109, "rewards/rejected": -0.5327898263931274, "step": 3953 }, { "epoch": 10.825462012320328, "grad_norm": 4.941319942474365, "learning_rate": 4.584931506849315e-07, "log_odds_chosen": 1.7547825574874878, "log_odds_ratio": -0.3509233593940735, "logits/chosen": 0.787594199180603, "logits/rejected": 0.6870465278625488, "logps/chosen": -1.9472829103469849, "logps/rejected": -3.5784945487976074, "loss": 0.7614, "nll_loss": 0.7262763977050781, "rewards/accuracies": 0.875, "rewards/chosen": -0.1947282999753952, "rewards/margins": 0.16312117874622345, "rewards/rejected": -0.35784944891929626, "step": 3954 }, { "epoch": 10.82819986310746, "grad_norm": 5.819097995758057, "learning_rate": 4.583561643835616e-07, "log_odds_chosen": 2.257223606109619, "log_odds_ratio": -0.25638896226882935, "logits/chosen": 0.9212115406990051, "logits/rejected": 0.9308627247810364, "logps/chosen": -2.0817253589630127, "logps/rejected": -4.18681526184082, "loss": 0.6761, "nll_loss": 0.6504915952682495, "rewards/accuracies": 0.875, "rewards/chosen": -0.20817254483699799, "rewards/margins": 0.21050898730754852, "rewards/rejected": -0.4186815321445465, "step": 3955 }, { "epoch": 10.830937713894592, "grad_norm": 4.488353729248047, "learning_rate": 4.5821917808219176e-07, "log_odds_chosen": 1.5887426137924194, "log_odds_ratio": -0.45002949237823486, "logits/chosen": 0.8277860283851624, "logits/rejected": 0.876236081123352, "logps/chosen": -2.4461441040039062, "logps/rejected": -3.9564406871795654, "loss": 0.682, "nll_loss": 0.6370062828063965, "rewards/accuracies": 0.75, "rewards/chosen": -0.2446143925189972, "rewards/margins": 0.15102970600128174, "rewards/rejected": -0.39564406871795654, "step": 3956 }, { "epoch": 10.833675564681725, "grad_norm": 5.2354512214660645, "learning_rate": 4.580821917808219e-07, "log_odds_chosen": 1.1997932195663452, "log_odds_ratio": -0.4311700463294983, "logits/chosen": 0.7341368198394775, "logits/rejected": 0.7704355120658875, "logps/chosen": -2.5962634086608887, "logps/rejected": -3.7226061820983887, "loss": 0.7643, "nll_loss": 0.7211481928825378, "rewards/accuracies": 0.75, "rewards/chosen": -0.2596263289451599, "rewards/margins": 0.11263426393270493, "rewards/rejected": -0.37226060032844543, "step": 3957 }, { "epoch": 10.836413415468858, "grad_norm": 5.90247106552124, "learning_rate": 4.5794520547945206e-07, "log_odds_chosen": 4.179223537445068, "log_odds_ratio": -0.07992144674062729, "logits/chosen": 1.0088751316070557, "logits/rejected": 1.0367481708526611, "logps/chosen": -1.7555053234100342, "logps/rejected": -5.742374420166016, "loss": 0.7095, "nll_loss": 0.7015177607536316, "rewards/accuracies": 1.0, "rewards/chosen": -0.17555053532123566, "rewards/margins": 0.39868688583374023, "rewards/rejected": -0.5742374062538147, "step": 3958 }, { "epoch": 10.839151266255989, "grad_norm": 5.202458381652832, "learning_rate": 4.5780821917808216e-07, "log_odds_chosen": 1.8186763525009155, "log_odds_ratio": -0.37411803007125854, "logits/chosen": 0.5971659421920776, "logits/rejected": 0.575297474861145, "logps/chosen": -1.933709979057312, "logps/rejected": -3.645313262939453, "loss": 0.7184, "nll_loss": 0.6809898018836975, "rewards/accuracies": 0.75, "rewards/chosen": -0.1933709979057312, "rewards/margins": 0.17116034030914307, "rewards/rejected": -0.36453133821487427, "step": 3959 }, { "epoch": 10.841889117043122, "grad_norm": 4.598322868347168, "learning_rate": 4.576712328767123e-07, "log_odds_chosen": 3.2068474292755127, "log_odds_ratio": -0.10830733180046082, "logits/chosen": 0.826092004776001, "logits/rejected": 0.7928359508514404, "logps/chosen": -2.1542165279388428, "logps/rejected": -5.215885162353516, "loss": 0.8055, "nll_loss": 0.7946992516517639, "rewards/accuracies": 1.0, "rewards/chosen": -0.2154216766357422, "rewards/margins": 0.3061668276786804, "rewards/rejected": -0.5215885043144226, "step": 3960 }, { "epoch": 10.844626967830253, "grad_norm": 5.371174335479736, "learning_rate": 4.5753424657534246e-07, "log_odds_chosen": 2.3208956718444824, "log_odds_ratio": -0.2368626743555069, "logits/chosen": 0.6634240746498108, "logits/rejected": 0.6535362601280212, "logps/chosen": -2.453017473220825, "logps/rejected": -4.70751953125, "loss": 0.7325, "nll_loss": 0.7088398933410645, "rewards/accuracies": 1.0, "rewards/chosen": -0.2453017383813858, "rewards/margins": 0.22545024752616882, "rewards/rejected": -0.4707520008087158, "step": 3961 }, { "epoch": 10.847364818617386, "grad_norm": 5.014569282531738, "learning_rate": 4.5739726027397256e-07, "log_odds_chosen": 2.1420693397521973, "log_odds_ratio": -0.37641844153404236, "logits/chosen": 0.8045399785041809, "logits/rejected": 0.74193274974823, "logps/chosen": -2.6958885192871094, "logps/rejected": -4.726045608520508, "loss": 0.714, "nll_loss": 0.6763531565666199, "rewards/accuracies": 0.75, "rewards/chosen": -0.2695888578891754, "rewards/margins": 0.20301571488380432, "rewards/rejected": -0.4726046025753021, "step": 3962 }, { "epoch": 10.850102669404517, "grad_norm": 8.261151313781738, "learning_rate": 4.572602739726027e-07, "log_odds_chosen": 2.194430351257324, "log_odds_ratio": -0.7046246528625488, "logits/chosen": 0.7994120121002197, "logits/rejected": 0.8358365893363953, "logps/chosen": -2.8885064125061035, "logps/rejected": -4.977084636688232, "loss": 0.7868, "nll_loss": 0.7162908911705017, "rewards/accuracies": 0.875, "rewards/chosen": -0.28885066509246826, "rewards/margins": 0.20885783433914185, "rewards/rejected": -0.4977084994316101, "step": 3963 }, { "epoch": 10.85284052019165, "grad_norm": 5.84158182144165, "learning_rate": 4.5712328767123287e-07, "log_odds_chosen": 1.7538862228393555, "log_odds_ratio": -0.33390599489212036, "logits/chosen": 1.0120370388031006, "logits/rejected": 1.0512311458587646, "logps/chosen": -2.3066422939300537, "logps/rejected": -3.974015712738037, "loss": 0.7143, "nll_loss": 0.6809259653091431, "rewards/accuracies": 0.875, "rewards/chosen": -0.2306642383337021, "rewards/margins": 0.16673734784126282, "rewards/rejected": -0.3974016010761261, "step": 3964 }, { "epoch": 10.855578370978781, "grad_norm": 4.454483985900879, "learning_rate": 4.56986301369863e-07, "log_odds_chosen": 1.5083808898925781, "log_odds_ratio": -0.29706746339797974, "logits/chosen": 0.9036965370178223, "logits/rejected": 0.936288595199585, "logps/chosen": -1.92814040184021, "logps/rejected": -3.292072057723999, "loss": 0.7223, "nll_loss": 0.692636251449585, "rewards/accuracies": 0.875, "rewards/chosen": -0.19281405210494995, "rewards/margins": 0.13639315962791443, "rewards/rejected": -0.3292072117328644, "step": 3965 }, { "epoch": 10.858316221765914, "grad_norm": 4.59855318069458, "learning_rate": 4.568493150684931e-07, "log_odds_chosen": 1.4514967203140259, "log_odds_ratio": -0.2930385172367096, "logits/chosen": 0.730871319770813, "logits/rejected": 0.6977365016937256, "logps/chosen": -1.5709712505340576, "logps/rejected": -2.871575355529785, "loss": 0.6426, "nll_loss": 0.6133006811141968, "rewards/accuracies": 1.0, "rewards/chosen": -0.15709711611270905, "rewards/margins": 0.13006040453910828, "rewards/rejected": -0.2871575355529785, "step": 3966 }, { "epoch": 10.861054072553046, "grad_norm": 4.894184589385986, "learning_rate": 4.5671232876712327e-07, "log_odds_chosen": 2.6588134765625, "log_odds_ratio": -0.11875483393669128, "logits/chosen": 0.9983216524124146, "logits/rejected": 1.0747666358947754, "logps/chosen": -2.1198959350585938, "logps/rejected": -4.6284661293029785, "loss": 0.6273, "nll_loss": 0.6153911352157593, "rewards/accuracies": 1.0, "rewards/chosen": -0.2119895964860916, "rewards/margins": 0.25085705518722534, "rewards/rejected": -0.46284663677215576, "step": 3967 }, { "epoch": 10.863791923340179, "grad_norm": 5.216115951538086, "learning_rate": 4.565753424657534e-07, "log_odds_chosen": 1.2411339282989502, "log_odds_ratio": -0.3396351933479309, "logits/chosen": 0.7504931092262268, "logits/rejected": 0.6974462270736694, "logps/chosen": -2.0517261028289795, "logps/rejected": -3.18505597114563, "loss": 0.8273, "nll_loss": 0.7933553457260132, "rewards/accuracies": 0.875, "rewards/chosen": -0.2051726132631302, "rewards/margins": 0.11333297193050385, "rewards/rejected": -0.31850558519363403, "step": 3968 }, { "epoch": 10.86652977412731, "grad_norm": 5.3756842613220215, "learning_rate": 4.564383561643835e-07, "log_odds_chosen": 1.4082908630371094, "log_odds_ratio": -0.4064222276210785, "logits/chosen": 0.6617263555526733, "logits/rejected": 0.8042124509811401, "logps/chosen": -1.7618420124053955, "logps/rejected": -3.0649914741516113, "loss": 0.6993, "nll_loss": 0.6586259007453918, "rewards/accuracies": 0.875, "rewards/chosen": -0.17618420720100403, "rewards/margins": 0.13031494617462158, "rewards/rejected": -0.3064991533756256, "step": 3969 }, { "epoch": 10.869267624914443, "grad_norm": 4.79179573059082, "learning_rate": 4.5630136986301367e-07, "log_odds_chosen": 1.8685202598571777, "log_odds_ratio": -0.30748021602630615, "logits/chosen": 0.7303407788276672, "logits/rejected": 0.7546943426132202, "logps/chosen": -2.4075560569763184, "logps/rejected": -4.193819046020508, "loss": 0.7106, "nll_loss": 0.6798133850097656, "rewards/accuracies": 1.0, "rewards/chosen": -0.2407556176185608, "rewards/margins": 0.17862629890441895, "rewards/rejected": -0.41938191652297974, "step": 3970 }, { "epoch": 10.872005475701574, "grad_norm": 4.730788707733154, "learning_rate": 4.561643835616438e-07, "log_odds_chosen": 3.083481550216675, "log_odds_ratio": -0.1172935962677002, "logits/chosen": 0.9543138742446899, "logits/rejected": 1.0109957456588745, "logps/chosen": -2.0978758335113525, "logps/rejected": -5.042056560516357, "loss": 0.7475, "nll_loss": 0.735727071762085, "rewards/accuracies": 1.0, "rewards/chosen": -0.20978760719299316, "rewards/margins": 0.2944180965423584, "rewards/rejected": -0.5042057037353516, "step": 3971 }, { "epoch": 10.874743326488707, "grad_norm": 4.9551496505737305, "learning_rate": 4.560273972602739e-07, "log_odds_chosen": 2.4486031532287598, "log_odds_ratio": -0.25067561864852905, "logits/chosen": 0.9535425901412964, "logits/rejected": 0.9983082413673401, "logps/chosen": -2.268336772918701, "logps/rejected": -4.571333885192871, "loss": 0.6471, "nll_loss": 0.6220219135284424, "rewards/accuracies": 1.0, "rewards/chosen": -0.22683365643024445, "rewards/margins": 0.23029974102973938, "rewards/rejected": -0.457133412361145, "step": 3972 }, { "epoch": 10.877481177275838, "grad_norm": 4.820240020751953, "learning_rate": 4.558904109589041e-07, "log_odds_chosen": 3.165269374847412, "log_odds_ratio": -0.20744234323501587, "logits/chosen": 0.8386673331260681, "logits/rejected": 0.8415310382843018, "logps/chosen": -2.7693450450897217, "logps/rejected": -5.816259384155273, "loss": 0.7603, "nll_loss": 0.7395859956741333, "rewards/accuracies": 0.875, "rewards/chosen": -0.27693450450897217, "rewards/margins": 0.3046914041042328, "rewards/rejected": -0.5816259384155273, "step": 3973 }, { "epoch": 10.880219028062971, "grad_norm": 4.657230854034424, "learning_rate": 4.5575342465753423e-07, "log_odds_chosen": 2.1589269638061523, "log_odds_ratio": -0.2595706582069397, "logits/chosen": 0.5168442130088806, "logits/rejected": 0.5911312103271484, "logps/chosen": -2.3344764709472656, "logps/rejected": -4.379942893981934, "loss": 0.776, "nll_loss": 0.7500804662704468, "rewards/accuracies": 1.0, "rewards/chosen": -0.23344764113426208, "rewards/margins": 0.20454667508602142, "rewards/rejected": -0.4379943013191223, "step": 3974 }, { "epoch": 10.882956878850102, "grad_norm": 4.484436511993408, "learning_rate": 4.556164383561644e-07, "log_odds_chosen": 1.8655260801315308, "log_odds_ratio": -0.28188633918762207, "logits/chosen": 0.964317262172699, "logits/rejected": 1.0343890190124512, "logps/chosen": -2.6766104698181152, "logps/rejected": -4.417695045471191, "loss": 0.667, "nll_loss": 0.6387661695480347, "rewards/accuracies": 1.0, "rewards/chosen": -0.26766106486320496, "rewards/margins": 0.17410844564437866, "rewards/rejected": -0.441769540309906, "step": 3975 }, { "epoch": 10.885694729637235, "grad_norm": 5.6493425369262695, "learning_rate": 4.554794520547945e-07, "log_odds_chosen": 1.3559715747833252, "log_odds_ratio": -0.36788511276245117, "logits/chosen": 0.7009446620941162, "logits/rejected": 0.7139544486999512, "logps/chosen": -1.933156967163086, "logps/rejected": -3.181946277618408, "loss": 0.8174, "nll_loss": 0.7805943489074707, "rewards/accuracies": 0.875, "rewards/chosen": -0.19331571459770203, "rewards/margins": 0.1248789057135582, "rewards/rejected": -0.3181946277618408, "step": 3976 }, { "epoch": 10.888432580424366, "grad_norm": 5.63628625869751, "learning_rate": 4.5534246575342463e-07, "log_odds_chosen": 2.151486873626709, "log_odds_ratio": -0.22185590863227844, "logits/chosen": 0.8418154120445251, "logits/rejected": 0.9113426804542542, "logps/chosen": -2.602092981338501, "logps/rejected": -4.684991836547852, "loss": 0.6967, "nll_loss": 0.6744703054428101, "rewards/accuracies": 1.0, "rewards/chosen": -0.260209321975708, "rewards/margins": 0.20828992128372192, "rewards/rejected": -0.46849924325942993, "step": 3977 }, { "epoch": 10.8911704312115, "grad_norm": 5.553957462310791, "learning_rate": 4.552054794520548e-07, "log_odds_chosen": 2.148371934890747, "log_odds_ratio": -0.29561883211135864, "logits/chosen": 1.0395395755767822, "logits/rejected": 1.0614075660705566, "logps/chosen": -2.034160852432251, "logps/rejected": -4.016963005065918, "loss": 0.7345, "nll_loss": 0.7049325704574585, "rewards/accuracies": 0.875, "rewards/chosen": -0.20341607928276062, "rewards/margins": 0.1982802450656891, "rewards/rejected": -0.4016963243484497, "step": 3978 }, { "epoch": 10.89390828199863, "grad_norm": 5.186266899108887, "learning_rate": 4.550684931506849e-07, "log_odds_chosen": 2.2528960704803467, "log_odds_ratio": -0.2441697120666504, "logits/chosen": 0.9756371378898621, "logits/rejected": 1.044023036956787, "logps/chosen": -2.9414689540863037, "logps/rejected": -5.173826217651367, "loss": 0.6763, "nll_loss": 0.6519155502319336, "rewards/accuracies": 0.875, "rewards/chosen": -0.29414689540863037, "rewards/margins": 0.22323572635650635, "rewards/rejected": -0.5173826217651367, "step": 3979 }, { "epoch": 10.896646132785763, "grad_norm": 5.518273830413818, "learning_rate": 4.5493150684931503e-07, "log_odds_chosen": 1.3359990119934082, "log_odds_ratio": -0.4044194221496582, "logits/chosen": 0.8376392722129822, "logits/rejected": 0.8868162631988525, "logps/chosen": -2.6522693634033203, "logps/rejected": -3.954019546508789, "loss": 0.696, "nll_loss": 0.6555677652359009, "rewards/accuracies": 0.875, "rewards/chosen": -0.26522693037986755, "rewards/margins": 0.13017499446868896, "rewards/rejected": -0.3954019546508789, "step": 3980 }, { "epoch": 10.899383983572895, "grad_norm": 4.705258846282959, "learning_rate": 4.547945205479452e-07, "log_odds_chosen": 3.4317400455474854, "log_odds_ratio": -0.14518560469150543, "logits/chosen": 0.835526704788208, "logits/rejected": 0.8948622941970825, "logps/chosen": -2.471808433532715, "logps/rejected": -5.77637243270874, "loss": 0.5794, "nll_loss": 0.5648669004440308, "rewards/accuracies": 1.0, "rewards/chosen": -0.24718086421489716, "rewards/margins": 0.330456405878067, "rewards/rejected": -0.577637255191803, "step": 3981 }, { "epoch": 10.902121834360027, "grad_norm": 4.165648460388184, "learning_rate": 4.5465753424657534e-07, "log_odds_chosen": 2.6422626972198486, "log_odds_ratio": -0.2072354257106781, "logits/chosen": 0.6039880514144897, "logits/rejected": 0.6711866855621338, "logps/chosen": -2.2959933280944824, "logps/rejected": -4.807826995849609, "loss": 0.7133, "nll_loss": 0.6925756931304932, "rewards/accuracies": 1.0, "rewards/chosen": -0.22959932684898376, "rewards/margins": 0.2511833906173706, "rewards/rejected": -0.48078271746635437, "step": 3982 }, { "epoch": 10.904859685147159, "grad_norm": 5.443958759307861, "learning_rate": 4.5452054794520544e-07, "log_odds_chosen": 3.3157782554626465, "log_odds_ratio": -0.1320798397064209, "logits/chosen": 0.8170486688613892, "logits/rejected": 0.8048893213272095, "logps/chosen": -2.491827964782715, "logps/rejected": -5.659804821014404, "loss": 0.8521, "nll_loss": 0.8389410972595215, "rewards/accuracies": 1.0, "rewards/chosen": -0.2491828054189682, "rewards/margins": 0.31679767370224, "rewards/rejected": -0.5659804344177246, "step": 3983 }, { "epoch": 10.907597535934292, "grad_norm": 7.915565013885498, "learning_rate": 4.5438356164383564e-07, "log_odds_chosen": 1.504036545753479, "log_odds_ratio": -0.6131884455680847, "logits/chosen": 0.9634988307952881, "logits/rejected": 0.9527450799942017, "logps/chosen": -2.716392755508423, "logps/rejected": -4.120711326599121, "loss": 0.735, "nll_loss": 0.6737087965011597, "rewards/accuracies": 0.625, "rewards/chosen": -0.27163928747177124, "rewards/margins": 0.14043185114860535, "rewards/rejected": -0.412071168422699, "step": 3984 }, { "epoch": 10.910335386721425, "grad_norm": 7.183679103851318, "learning_rate": 4.5424657534246574e-07, "log_odds_chosen": 0.9027388095855713, "log_odds_ratio": -0.5711061954498291, "logits/chosen": 0.909785270690918, "logits/rejected": 0.9679379463195801, "logps/chosen": -2.788895606994629, "logps/rejected": -3.6569855213165283, "loss": 0.891, "nll_loss": 0.8339089751243591, "rewards/accuracies": 0.625, "rewards/chosen": -0.27888956665992737, "rewards/margins": 0.08680898696184158, "rewards/rejected": -0.36569857597351074, "step": 3985 }, { "epoch": 10.913073237508556, "grad_norm": 5.3064117431640625, "learning_rate": 4.5410958904109584e-07, "log_odds_chosen": 1.4839811325073242, "log_odds_ratio": -0.28746771812438965, "logits/chosen": 0.7032189965248108, "logits/rejected": 0.7014926671981812, "logps/chosen": -2.271721363067627, "logps/rejected": -3.661259174346924, "loss": 0.7434, "nll_loss": 0.7146996855735779, "rewards/accuracies": 1.0, "rewards/chosen": -0.2271721363067627, "rewards/margins": 0.13895376026630402, "rewards/rejected": -0.3661258816719055, "step": 3986 }, { "epoch": 10.915811088295689, "grad_norm": 4.792258262634277, "learning_rate": 4.53972602739726e-07, "log_odds_chosen": 2.657697916030884, "log_odds_ratio": -0.16101118922233582, "logits/chosen": 0.9072098731994629, "logits/rejected": 0.9458315372467041, "logps/chosen": -2.1316356658935547, "logps/rejected": -4.635300636291504, "loss": 0.7632, "nll_loss": 0.7471041679382324, "rewards/accuracies": 1.0, "rewards/chosen": -0.2131635844707489, "rewards/margins": 0.2503664493560791, "rewards/rejected": -0.463530033826828, "step": 3987 }, { "epoch": 10.91854893908282, "grad_norm": 5.82186222076416, "learning_rate": 4.5383561643835615e-07, "log_odds_chosen": 1.4582881927490234, "log_odds_ratio": -0.38221248984336853, "logits/chosen": 0.5514630079269409, "logits/rejected": 0.5337725281715393, "logps/chosen": -1.7969160079956055, "logps/rejected": -3.1611266136169434, "loss": 0.7265, "nll_loss": 0.6882388591766357, "rewards/accuracies": 0.875, "rewards/chosen": -0.1796915978193283, "rewards/margins": 0.13642103970050812, "rewards/rejected": -0.3161126375198364, "step": 3988 }, { "epoch": 10.921286789869953, "grad_norm": 5.179989814758301, "learning_rate": 4.536986301369863e-07, "log_odds_chosen": 0.929702877998352, "log_odds_ratio": -0.4089840054512024, "logits/chosen": 0.8095703721046448, "logits/rejected": 0.8260916471481323, "logps/chosen": -2.1282761096954346, "logps/rejected": -2.941016435623169, "loss": 0.7123, "nll_loss": 0.6714287996292114, "rewards/accuracies": 0.75, "rewards/chosen": -0.2128276228904724, "rewards/margins": 0.08127403259277344, "rewards/rejected": -0.29410165548324585, "step": 3989 }, { "epoch": 10.924024640657084, "grad_norm": 6.008708953857422, "learning_rate": 4.535616438356164e-07, "log_odds_chosen": 0.9892297387123108, "log_odds_ratio": -0.42695608735084534, "logits/chosen": 0.8175398111343384, "logits/rejected": 0.7678385376930237, "logps/chosen": -1.5305627584457397, "logps/rejected": -2.3140194416046143, "loss": 0.7214, "nll_loss": 0.6786826848983765, "rewards/accuracies": 0.75, "rewards/chosen": -0.1530562788248062, "rewards/margins": 0.07834567129611969, "rewards/rejected": -0.2314019650220871, "step": 3990 }, { "epoch": 10.926762491444217, "grad_norm": 11.73409652709961, "learning_rate": 4.534246575342466e-07, "log_odds_chosen": 0.7105093002319336, "log_odds_ratio": -0.6611795425415039, "logits/chosen": 1.0398882627487183, "logits/rejected": 0.9050092697143555, "logps/chosen": -2.810523271560669, "logps/rejected": -3.403707265853882, "loss": 0.8291, "nll_loss": 0.7629405856132507, "rewards/accuracies": 0.75, "rewards/chosen": -0.2810523509979248, "rewards/margins": 0.05931837111711502, "rewards/rejected": -0.34037068486213684, "step": 3991 }, { "epoch": 10.929500342231348, "grad_norm": 5.677365303039551, "learning_rate": 4.532876712328767e-07, "log_odds_chosen": 1.1864736080169678, "log_odds_ratio": -0.7916452884674072, "logits/chosen": 0.6711007356643677, "logits/rejected": 0.7709903120994568, "logps/chosen": -2.477642059326172, "logps/rejected": -3.6299431324005127, "loss": 0.734, "nll_loss": 0.6548434495925903, "rewards/accuracies": 0.625, "rewards/chosen": -0.2477641999721527, "rewards/margins": 0.11523009091615677, "rewards/rejected": -0.36299431324005127, "step": 3992 }, { "epoch": 10.932238193018481, "grad_norm": 4.892361164093018, "learning_rate": 4.531506849315068e-07, "log_odds_chosen": 3.027122974395752, "log_odds_ratio": -0.11467241495847702, "logits/chosen": 0.9939420223236084, "logits/rejected": 1.0415072441101074, "logps/chosen": -2.6560006141662598, "logps/rejected": -5.568026542663574, "loss": 0.7251, "nll_loss": 0.7136515378952026, "rewards/accuracies": 1.0, "rewards/chosen": -0.2656000852584839, "rewards/margins": 0.2912025451660156, "rewards/rejected": -0.5568026304244995, "step": 3993 }, { "epoch": 10.934976043805612, "grad_norm": 5.842586517333984, "learning_rate": 4.5301369863013695e-07, "log_odds_chosen": 1.7406443357467651, "log_odds_ratio": -0.3492896556854248, "logits/chosen": 0.7323322296142578, "logits/rejected": 0.6190809011459351, "logps/chosen": -2.5496838092803955, "logps/rejected": -4.216897964477539, "loss": 0.8314, "nll_loss": 0.7964233756065369, "rewards/accuracies": 0.875, "rewards/chosen": -0.2549683749675751, "rewards/margins": 0.1667214334011078, "rewards/rejected": -0.42168983817100525, "step": 3994 }, { "epoch": 10.937713894592745, "grad_norm": 4.531167030334473, "learning_rate": 4.528767123287671e-07, "log_odds_chosen": 1.9130141735076904, "log_odds_ratio": -0.2476138174533844, "logits/chosen": 0.7058432102203369, "logits/rejected": 0.7489754557609558, "logps/chosen": -2.093327760696411, "logps/rejected": -3.8934144973754883, "loss": 0.6662, "nll_loss": 0.6414414644241333, "rewards/accuracies": 1.0, "rewards/chosen": -0.20933279395103455, "rewards/margins": 0.1800086796283722, "rewards/rejected": -0.38934147357940674, "step": 3995 }, { "epoch": 10.940451745379876, "grad_norm": 5.542604446411133, "learning_rate": 4.5273972602739726e-07, "log_odds_chosen": 3.2698378562927246, "log_odds_ratio": -0.30595269799232483, "logits/chosen": 0.8354659676551819, "logits/rejected": 0.8143098950386047, "logps/chosen": -2.8016130924224854, "logps/rejected": -5.999075412750244, "loss": 0.9902, "nll_loss": 0.959648072719574, "rewards/accuracies": 0.875, "rewards/chosen": -0.2801613211631775, "rewards/margins": 0.319746196269989, "rewards/rejected": -0.5999075174331665, "step": 3996 }, { "epoch": 10.94318959616701, "grad_norm": 5.930403709411621, "learning_rate": 4.5260273972602735e-07, "log_odds_chosen": 2.688676118850708, "log_odds_ratio": -0.29185575246810913, "logits/chosen": 0.9400976896286011, "logits/rejected": 0.9547591209411621, "logps/chosen": -2.4750444889068604, "logps/rejected": -5.038585186004639, "loss": 0.756, "nll_loss": 0.7268509268760681, "rewards/accuracies": 0.875, "rewards/chosen": -0.24750444293022156, "rewards/margins": 0.25635403394699097, "rewards/rejected": -0.5038585066795349, "step": 3997 }, { "epoch": 10.94592744695414, "grad_norm": 4.532360553741455, "learning_rate": 4.5246575342465756e-07, "log_odds_chosen": 2.3279001712799072, "log_odds_ratio": -0.3740725517272949, "logits/chosen": 1.066784143447876, "logits/rejected": 1.009843111038208, "logps/chosen": -2.030367136001587, "logps/rejected": -4.288337230682373, "loss": 0.6927, "nll_loss": 0.6552927494049072, "rewards/accuracies": 0.625, "rewards/chosen": -0.20303674042224884, "rewards/margins": 0.22579701244831085, "rewards/rejected": -0.4288337230682373, "step": 3998 }, { "epoch": 10.948665297741274, "grad_norm": 5.444380283355713, "learning_rate": 4.5232876712328766e-07, "log_odds_chosen": 2.233811140060425, "log_odds_ratio": -0.4188401401042938, "logits/chosen": 0.9144359827041626, "logits/rejected": 0.9361796379089355, "logps/chosen": -2.630689859390259, "logps/rejected": -4.7703022956848145, "loss": 0.8968, "nll_loss": 0.854886531829834, "rewards/accuracies": 0.875, "rewards/chosen": -0.2630690038204193, "rewards/margins": 0.21396124362945557, "rewards/rejected": -0.4770302176475525, "step": 3999 }, { "epoch": 10.951403148528405, "grad_norm": 5.606403350830078, "learning_rate": 4.5219178082191776e-07, "log_odds_chosen": 1.8108556270599365, "log_odds_ratio": -0.23841972649097443, "logits/chosen": 0.9568114280700684, "logits/rejected": 0.9429826736450195, "logps/chosen": -2.0250778198242188, "logps/rejected": -3.6779634952545166, "loss": 0.6775, "nll_loss": 0.6536688804626465, "rewards/accuracies": 1.0, "rewards/chosen": -0.20250779390335083, "rewards/margins": 0.16528858244419098, "rewards/rejected": -0.3677963614463806, "step": 4000 }, { "epoch": 10.954140999315538, "grad_norm": 7.120183944702148, "learning_rate": 4.520547945205479e-07, "log_odds_chosen": 3.1418585777282715, "log_odds_ratio": -0.37512320280075073, "logits/chosen": 0.8283377885818481, "logits/rejected": 0.8152138590812683, "logps/chosen": -2.089669942855835, "logps/rejected": -5.041835308074951, "loss": 0.6448, "nll_loss": 0.6073158979415894, "rewards/accuracies": 0.875, "rewards/chosen": -0.20896700024604797, "rewards/margins": 0.29521653056144714, "rewards/rejected": -0.5041835308074951, "step": 4001 }, { "epoch": 10.956878850102669, "grad_norm": 5.469175338745117, "learning_rate": 4.5191780821917806e-07, "log_odds_chosen": 1.836391568183899, "log_odds_ratio": -0.23560990393161774, "logits/chosen": 0.6923835277557373, "logits/rejected": 0.6268581748008728, "logps/chosen": -1.772392988204956, "logps/rejected": -3.4644665718078613, "loss": 0.7318, "nll_loss": 0.7082276344299316, "rewards/accuracies": 1.0, "rewards/chosen": -0.1772392839193344, "rewards/margins": 0.1692073792219162, "rewards/rejected": -0.346446692943573, "step": 4002 }, { "epoch": 10.959616700889802, "grad_norm": 5.1390886306762695, "learning_rate": 4.517808219178082e-07, "log_odds_chosen": 2.404521942138672, "log_odds_ratio": -0.17089995741844177, "logits/chosen": 0.9354995489120483, "logits/rejected": 0.9400659799575806, "logps/chosen": -2.83817195892334, "logps/rejected": -5.127371788024902, "loss": 0.7196, "nll_loss": 0.7025123834609985, "rewards/accuracies": 1.0, "rewards/chosen": -0.28381720185279846, "rewards/margins": 0.22891996800899506, "rewards/rejected": -0.5127371549606323, "step": 4003 }, { "epoch": 10.962354551676933, "grad_norm": 7.7559027671813965, "learning_rate": 4.516438356164383e-07, "log_odds_chosen": 1.5489544868469238, "log_odds_ratio": -0.22859297692775726, "logits/chosen": 0.7597835659980774, "logits/rejected": 0.7513330578804016, "logps/chosen": -2.2360482215881348, "logps/rejected": -3.6859798431396484, "loss": 0.6911, "nll_loss": 0.6682153940200806, "rewards/accuracies": 1.0, "rewards/chosen": -0.22360481321811676, "rewards/margins": 0.14499318599700928, "rewards/rejected": -0.3685980439186096, "step": 4004 }, { "epoch": 10.965092402464066, "grad_norm": 5.1459126472473145, "learning_rate": 4.515068493150685e-07, "log_odds_chosen": 2.406442642211914, "log_odds_ratio": -0.2114853709936142, "logits/chosen": 0.6060910224914551, "logits/rejected": 0.6249396800994873, "logps/chosen": -1.6539361476898193, "logps/rejected": -3.8943114280700684, "loss": 0.6808, "nll_loss": 0.6596110463142395, "rewards/accuracies": 0.875, "rewards/chosen": -0.1653936207294464, "rewards/margins": 0.2240375280380249, "rewards/rejected": -0.3894311785697937, "step": 4005 }, { "epoch": 10.967830253251197, "grad_norm": 5.631771564483643, "learning_rate": 4.513698630136986e-07, "log_odds_chosen": 1.5702630281448364, "log_odds_ratio": -0.4336095452308655, "logits/chosen": 1.1376780271530151, "logits/rejected": 1.1882163286209106, "logps/chosen": -2.922487258911133, "logps/rejected": -4.473153114318848, "loss": 0.7317, "nll_loss": 0.6883834600448608, "rewards/accuracies": 0.75, "rewards/chosen": -0.2922487258911133, "rewards/margins": 0.1550665646791458, "rewards/rejected": -0.4473153054714203, "step": 4006 }, { "epoch": 10.97056810403833, "grad_norm": 4.819651126861572, "learning_rate": 4.512328767123287e-07, "log_odds_chosen": 2.4902467727661133, "log_odds_ratio": -0.17817141115665436, "logits/chosen": 0.8811696767807007, "logits/rejected": 0.9038844704627991, "logps/chosen": -2.5179080963134766, "logps/rejected": -4.9082350730896, "loss": 0.7462, "nll_loss": 0.728373646736145, "rewards/accuracies": 1.0, "rewards/chosen": -0.2517908215522766, "rewards/margins": 0.23903273046016693, "rewards/rejected": -0.49082350730895996, "step": 4007 }, { "epoch": 10.973305954825461, "grad_norm": 4.733094215393066, "learning_rate": 4.5109589041095887e-07, "log_odds_chosen": 2.646074056625366, "log_odds_ratio": -0.1794210970401764, "logits/chosen": 0.9497498869895935, "logits/rejected": 0.9013440608978271, "logps/chosen": -1.9839015007019043, "logps/rejected": -4.489830017089844, "loss": 0.7078, "nll_loss": 0.6898730993270874, "rewards/accuracies": 1.0, "rewards/chosen": -0.1983901709318161, "rewards/margins": 0.2505928575992584, "rewards/rejected": -0.44898301362991333, "step": 4008 }, { "epoch": 10.976043805612594, "grad_norm": 5.1854634284973145, "learning_rate": 4.50958904109589e-07, "log_odds_chosen": 1.3027870655059814, "log_odds_ratio": -0.27508988976478577, "logits/chosen": 0.6354955434799194, "logits/rejected": 0.603390634059906, "logps/chosen": -1.7341957092285156, "logps/rejected": -2.883845090866089, "loss": 0.6432, "nll_loss": 0.6156986951828003, "rewards/accuracies": 1.0, "rewards/chosen": -0.17341956496238708, "rewards/margins": 0.11496495455503464, "rewards/rejected": -0.28838449716567993, "step": 4009 }, { "epoch": 10.978781656399725, "grad_norm": 9.113154411315918, "learning_rate": 4.5082191780821917e-07, "log_odds_chosen": 1.2841359376907349, "log_odds_ratio": -0.480158269405365, "logits/chosen": 1.0239503383636475, "logits/rejected": 0.9239600896835327, "logps/chosen": -2.8331942558288574, "logps/rejected": -4.017214775085449, "loss": 0.7897, "nll_loss": 0.7416421175003052, "rewards/accuracies": 0.75, "rewards/chosen": -0.28331947326660156, "rewards/margins": 0.11840201914310455, "rewards/rejected": -0.4017214775085449, "step": 4010 }, { "epoch": 10.981519507186858, "grad_norm": 5.053077697753906, "learning_rate": 4.5068493150684927e-07, "log_odds_chosen": 2.802960157394409, "log_odds_ratio": -0.25668060779571533, "logits/chosen": 0.7939245104789734, "logits/rejected": 0.7721824645996094, "logps/chosen": -2.050396680831909, "logps/rejected": -4.738813400268555, "loss": 0.6201, "nll_loss": 0.5944452881813049, "rewards/accuracies": 1.0, "rewards/chosen": -0.20503968000411987, "rewards/margins": 0.26884162425994873, "rewards/rejected": -0.4738813042640686, "step": 4011 }, { "epoch": 10.984257357973991, "grad_norm": 6.212726593017578, "learning_rate": 4.505479452054795e-07, "log_odds_chosen": 1.94358491897583, "log_odds_ratio": -0.2614109218120575, "logits/chosen": 0.8537515997886658, "logits/rejected": 1.0666159391403198, "logps/chosen": -2.633751153945923, "logps/rejected": -4.500517845153809, "loss": 0.7388, "nll_loss": 0.7126531600952148, "rewards/accuracies": 0.875, "rewards/chosen": -0.2633751332759857, "rewards/margins": 0.18667668104171753, "rewards/rejected": -0.45005178451538086, "step": 4012 }, { "epoch": 10.986995208761122, "grad_norm": 5.214190483093262, "learning_rate": 4.504109589041096e-07, "log_odds_chosen": 3.1325948238372803, "log_odds_ratio": -0.18910132348537445, "logits/chosen": 0.8731763362884521, "logits/rejected": 0.877389669418335, "logps/chosen": -2.080223560333252, "logps/rejected": -5.093175888061523, "loss": 0.6429, "nll_loss": 0.623961329460144, "rewards/accuracies": 1.0, "rewards/chosen": -0.208022341132164, "rewards/margins": 0.3012952506542206, "rewards/rejected": -0.5093176364898682, "step": 4013 }, { "epoch": 10.989733059548255, "grad_norm": 4.266617298126221, "learning_rate": 4.502739726027397e-07, "log_odds_chosen": 3.140317440032959, "log_odds_ratio": -0.17906515300273895, "logits/chosen": 0.783290684223175, "logits/rejected": 0.7961344718933105, "logps/chosen": -1.7278796434402466, "logps/rejected": -4.6841206550598145, "loss": 0.6708, "nll_loss": 0.6529300212860107, "rewards/accuracies": 1.0, "rewards/chosen": -0.17278797924518585, "rewards/margins": 0.29562413692474365, "rewards/rejected": -0.4684120714664459, "step": 4014 }, { "epoch": 10.992470910335387, "grad_norm": 5.4868693351745605, "learning_rate": 4.501369863013699e-07, "log_odds_chosen": 1.5197569131851196, "log_odds_ratio": -0.37541505694389343, "logits/chosen": 1.083274245262146, "logits/rejected": 1.1819264888763428, "logps/chosen": -2.4326913356781006, "logps/rejected": -3.8818745613098145, "loss": 0.667, "nll_loss": 0.6294357776641846, "rewards/accuracies": 0.875, "rewards/chosen": -0.243269145488739, "rewards/margins": 0.14491835236549377, "rewards/rejected": -0.3881874680519104, "step": 4015 }, { "epoch": 10.99520876112252, "grad_norm": 5.246116638183594, "learning_rate": 4.5e-07, "log_odds_chosen": 1.893937110900879, "log_odds_ratio": -0.2978532016277313, "logits/chosen": 0.8147482872009277, "logits/rejected": 0.7398794889450073, "logps/chosen": -1.4929559230804443, "logps/rejected": -3.2069010734558105, "loss": 0.7132, "nll_loss": 0.6833988428115845, "rewards/accuracies": 0.875, "rewards/chosen": -0.1492955982685089, "rewards/margins": 0.17139452695846558, "rewards/rejected": -0.3206901252269745, "step": 4016 }, { "epoch": 10.99794661190965, "grad_norm": 11.964125633239746, "learning_rate": 4.4986301369863013e-07, "log_odds_chosen": 0.5167903304100037, "log_odds_ratio": -0.9284615516662598, "logits/chosen": 0.709579586982727, "logits/rejected": 0.7166289687156677, "logps/chosen": -3.2940330505371094, "logps/rejected": -3.767061233520508, "loss": 0.7461, "nll_loss": 0.6532189846038818, "rewards/accuracies": 0.5, "rewards/chosen": -0.3294033408164978, "rewards/margins": 0.047302816063165665, "rewards/rejected": -0.37670615315437317, "step": 4017 }, { "epoch": 11.000684462696784, "grad_norm": 4.051665306091309, "learning_rate": 4.4972602739726023e-07, "log_odds_chosen": 1.6752156019210815, "log_odds_ratio": -0.32093942165374756, "logits/chosen": 0.6648297309875488, "logits/rejected": 0.5816096663475037, "logps/chosen": -2.0545873641967773, "logps/rejected": -3.627991199493408, "loss": 0.6939, "nll_loss": 0.6618127226829529, "rewards/accuracies": 1.0, "rewards/chosen": -0.20545876026153564, "rewards/margins": 0.1573403775691986, "rewards/rejected": -0.36279913783073425, "step": 4018 }, { "epoch": 11.003422313483915, "grad_norm": 5.1425371170043945, "learning_rate": 4.495890410958904e-07, "log_odds_chosen": 1.4319076538085938, "log_odds_ratio": -0.3910541236400604, "logits/chosen": 0.8988189697265625, "logits/rejected": 0.9031064510345459, "logps/chosen": -2.1291613578796387, "logps/rejected": -3.4759254455566406, "loss": 0.8075, "nll_loss": 0.7683775424957275, "rewards/accuracies": 0.75, "rewards/chosen": -0.21291615068912506, "rewards/margins": 0.13467639684677124, "rewards/rejected": -0.3475925326347351, "step": 4019 }, { "epoch": 11.006160164271048, "grad_norm": 5.76894998550415, "learning_rate": 4.4945205479452054e-07, "log_odds_chosen": 1.6519534587860107, "log_odds_ratio": -0.26758715510368347, "logits/chosen": 0.929388165473938, "logits/rejected": 0.8179322481155396, "logps/chosen": -1.6198643445968628, "logps/rejected": -3.0818071365356445, "loss": 0.6238, "nll_loss": 0.5970045328140259, "rewards/accuracies": 1.0, "rewards/chosen": -0.16198642551898956, "rewards/margins": 0.14619427919387817, "rewards/rejected": -0.30818068981170654, "step": 4020 }, { "epoch": 11.008898015058179, "grad_norm": 4.979346752166748, "learning_rate": 4.4931506849315063e-07, "log_odds_chosen": 3.2903292179107666, "log_odds_ratio": -0.45794039964675903, "logits/chosen": 0.952311635017395, "logits/rejected": 1.0132869482040405, "logps/chosen": -2.637000560760498, "logps/rejected": -5.790665626525879, "loss": 0.805, "nll_loss": 0.759204089641571, "rewards/accuracies": 0.75, "rewards/chosen": -0.26370006799697876, "rewards/margins": 0.31536656618118286, "rewards/rejected": -0.5790666341781616, "step": 4021 }, { "epoch": 11.011635865845312, "grad_norm": 4.651284217834473, "learning_rate": 4.4917808219178084e-07, "log_odds_chosen": 3.819120407104492, "log_odds_ratio": -0.16379278898239136, "logits/chosen": 0.7862834930419922, "logits/rejected": 0.8213835954666138, "logps/chosen": -2.3809096813201904, "logps/rejected": -6.113813877105713, "loss": 0.7381, "nll_loss": 0.7217113971710205, "rewards/accuracies": 0.875, "rewards/chosen": -0.23809094727039337, "rewards/margins": 0.37329044938087463, "rewards/rejected": -0.6113814115524292, "step": 4022 }, { "epoch": 11.014373716632443, "grad_norm": 4.700437545776367, "learning_rate": 4.4904109589041094e-07, "log_odds_chosen": 1.2231415510177612, "log_odds_ratio": -0.3083191514015198, "logits/chosen": 0.6857496500015259, "logits/rejected": 0.655606210231781, "logps/chosen": -2.396380662918091, "logps/rejected": -3.542123794555664, "loss": 0.7219, "nll_loss": 0.6911047697067261, "rewards/accuracies": 0.875, "rewards/chosen": -0.2396380603313446, "rewards/margins": 0.11457433551549911, "rewards/rejected": -0.3542124032974243, "step": 4023 }, { "epoch": 11.017111567419576, "grad_norm": 5.9561991691589355, "learning_rate": 4.489041095890411e-07, "log_odds_chosen": 1.8738293647766113, "log_odds_ratio": -0.3229915201663971, "logits/chosen": 0.6211459636688232, "logits/rejected": 0.5957412719726562, "logps/chosen": -2.6818766593933105, "logps/rejected": -4.449530601501465, "loss": 0.9677, "nll_loss": 0.9353630542755127, "rewards/accuracies": 0.75, "rewards/chosen": -0.2681877017021179, "rewards/margins": 0.17676536738872528, "rewards/rejected": -0.4449530243873596, "step": 4024 }, { "epoch": 11.019849418206707, "grad_norm": 6.888576984405518, "learning_rate": 4.487671232876712e-07, "log_odds_chosen": 1.7594027519226074, "log_odds_ratio": -0.3245287239551544, "logits/chosen": 0.9199951887130737, "logits/rejected": 0.8694424629211426, "logps/chosen": -2.839311122894287, "logps/rejected": -4.530914306640625, "loss": 0.8196, "nll_loss": 0.7871438264846802, "rewards/accuracies": 0.875, "rewards/chosen": -0.28393110632896423, "rewards/margins": 0.1691603660583496, "rewards/rejected": -0.45309147238731384, "step": 4025 }, { "epoch": 11.02258726899384, "grad_norm": 4.974132061004639, "learning_rate": 4.4863013698630134e-07, "log_odds_chosen": 2.690873622894287, "log_odds_ratio": -0.2855139672756195, "logits/chosen": 1.0344526767730713, "logits/rejected": 1.0374009609222412, "logps/chosen": -2.7477540969848633, "logps/rejected": -5.395689964294434, "loss": 0.6927, "nll_loss": 0.6641477346420288, "rewards/accuracies": 0.875, "rewards/chosen": -0.2747754454612732, "rewards/margins": 0.2647935748100281, "rewards/rejected": -0.5395690202713013, "step": 4026 }, { "epoch": 11.025325119780971, "grad_norm": 4.675046920776367, "learning_rate": 4.484931506849315e-07, "log_odds_chosen": 2.305335521697998, "log_odds_ratio": -0.3446257710456848, "logits/chosen": 0.7504187822341919, "logits/rejected": 0.737077534198761, "logps/chosen": -2.555720806121826, "logps/rejected": -4.774155616760254, "loss": 0.7012, "nll_loss": 0.6667487025260925, "rewards/accuracies": 0.875, "rewards/chosen": -0.2555720806121826, "rewards/margins": 0.22184354066848755, "rewards/rejected": -0.4774155914783478, "step": 4027 }, { "epoch": 11.028062970568104, "grad_norm": 5.844776153564453, "learning_rate": 4.483561643835616e-07, "log_odds_chosen": 2.1838791370391846, "log_odds_ratio": -0.26842018961906433, "logits/chosen": 0.9964917898178101, "logits/rejected": 1.048992395401001, "logps/chosen": -2.6439642906188965, "logps/rejected": -4.727987289428711, "loss": 0.6667, "nll_loss": 0.6398676037788391, "rewards/accuracies": 0.875, "rewards/chosen": -0.26439645886421204, "rewards/margins": 0.20840224623680115, "rewards/rejected": -0.4727987051010132, "step": 4028 }, { "epoch": 11.030800821355236, "grad_norm": 6.629730224609375, "learning_rate": 4.482191780821918e-07, "log_odds_chosen": 0.8247962594032288, "log_odds_ratio": -0.5511462688446045, "logits/chosen": 0.9218506813049316, "logits/rejected": 0.9440101385116577, "logps/chosen": -2.1200644969940186, "logps/rejected": -2.908571481704712, "loss": 0.6681, "nll_loss": 0.6129605770111084, "rewards/accuracies": 0.625, "rewards/chosen": -0.21200644969940186, "rewards/margins": 0.07885070145130157, "rewards/rejected": -0.29085713624954224, "step": 4029 }, { "epoch": 11.033538672142368, "grad_norm": 5.075161933898926, "learning_rate": 4.480821917808219e-07, "log_odds_chosen": 1.9932241439819336, "log_odds_ratio": -0.43233346939086914, "logits/chosen": 0.6774072647094727, "logits/rejected": 0.7082642316818237, "logps/chosen": -2.0589993000030518, "logps/rejected": -3.938711166381836, "loss": 0.8077, "nll_loss": 0.7644919157028198, "rewards/accuracies": 0.875, "rewards/chosen": -0.20589995384216309, "rewards/margins": 0.18797114491462708, "rewards/rejected": -0.39387109875679016, "step": 4030 }, { "epoch": 11.0362765229295, "grad_norm": 5.49521017074585, "learning_rate": 4.4794520547945205e-07, "log_odds_chosen": 1.3668079376220703, "log_odds_ratio": -0.3712090253829956, "logits/chosen": 0.7197330594062805, "logits/rejected": 0.700880229473114, "logps/chosen": -1.7752965688705444, "logps/rejected": -3.016921043395996, "loss": 0.6306, "nll_loss": 0.5934639573097229, "rewards/accuracies": 1.0, "rewards/chosen": -0.17752966284751892, "rewards/margins": 0.12416242808103561, "rewards/rejected": -0.3016921281814575, "step": 4031 }, { "epoch": 11.039014373716633, "grad_norm": 6.742320537567139, "learning_rate": 4.4780821917808215e-07, "log_odds_chosen": 0.8206908702850342, "log_odds_ratio": -0.46474525332450867, "logits/chosen": 0.67142653465271, "logits/rejected": 0.687012791633606, "logps/chosen": -3.0532467365264893, "logps/rejected": -3.845174551010132, "loss": 0.9461, "nll_loss": 0.8996731042861938, "rewards/accuracies": 0.75, "rewards/chosen": -0.3053246736526489, "rewards/margins": 0.07919277250766754, "rewards/rejected": -0.38451746106147766, "step": 4032 }, { "epoch": 11.041752224503764, "grad_norm": 6.881035327911377, "learning_rate": 4.476712328767123e-07, "log_odds_chosen": 2.433915615081787, "log_odds_ratio": -0.3127966821193695, "logits/chosen": 0.5823860764503479, "logits/rejected": 0.6979199647903442, "logps/chosen": -2.3346095085144043, "logps/rejected": -4.684560298919678, "loss": 0.6757, "nll_loss": 0.6443951725959778, "rewards/accuracies": 0.875, "rewards/chosen": -0.23346097767353058, "rewards/margins": 0.23499508202075958, "rewards/rejected": -0.4684560298919678, "step": 4033 }, { "epoch": 11.044490075290897, "grad_norm": 5.132965087890625, "learning_rate": 4.4753424657534245e-07, "log_odds_chosen": 0.7227886319160461, "log_odds_ratio": -0.43161943554878235, "logits/chosen": 0.8060755133628845, "logits/rejected": 0.7479534149169922, "logps/chosen": -1.9540560245513916, "logps/rejected": -2.5848217010498047, "loss": 0.7267, "nll_loss": 0.683512806892395, "rewards/accuracies": 0.875, "rewards/chosen": -0.19540560245513916, "rewards/margins": 0.06307657063007355, "rewards/rejected": -0.2584821581840515, "step": 4034 }, { "epoch": 11.047227926078028, "grad_norm": 5.003687858581543, "learning_rate": 4.4739726027397255e-07, "log_odds_chosen": 3.2394936084747314, "log_odds_ratio": -0.11330120265483856, "logits/chosen": 0.7471226453781128, "logits/rejected": 0.7520629167556763, "logps/chosen": -1.9917179346084595, "logps/rejected": -5.0706787109375, "loss": 0.6751, "nll_loss": 0.6637586355209351, "rewards/accuracies": 1.0, "rewards/chosen": -0.19917181134223938, "rewards/margins": 0.30789607763290405, "rewards/rejected": -0.507067859172821, "step": 4035 }, { "epoch": 11.049965776865161, "grad_norm": 5.560728549957275, "learning_rate": 4.4726027397260276e-07, "log_odds_chosen": 1.0953900814056396, "log_odds_ratio": -0.4112916886806488, "logits/chosen": 0.8436030745506287, "logits/rejected": 0.7944343686103821, "logps/chosen": -1.6177008152008057, "logps/rejected": -2.58650541305542, "loss": 0.7029, "nll_loss": 0.6617937684059143, "rewards/accuracies": 0.875, "rewards/chosen": -0.16177009046077728, "rewards/margins": 0.0968804582953453, "rewards/rejected": -0.258650541305542, "step": 4036 }, { "epoch": 11.052703627652292, "grad_norm": 5.708194255828857, "learning_rate": 4.4712328767123286e-07, "log_odds_chosen": 1.8124728202819824, "log_odds_ratio": -0.3463512063026428, "logits/chosen": 0.6645553112030029, "logits/rejected": 0.6416124701499939, "logps/chosen": -2.0949208736419678, "logps/rejected": -3.731736183166504, "loss": 0.6668, "nll_loss": 0.6321878433227539, "rewards/accuracies": 0.875, "rewards/chosen": -0.20949208736419678, "rewards/margins": 0.16368155181407928, "rewards/rejected": -0.37317365407943726, "step": 4037 }, { "epoch": 11.055441478439425, "grad_norm": 5.430356502532959, "learning_rate": 4.46986301369863e-07, "log_odds_chosen": 1.1817948818206787, "log_odds_ratio": -0.3638036251068115, "logits/chosen": 0.9220592975616455, "logits/rejected": 0.8410650491714478, "logps/chosen": -1.7373015880584717, "logps/rejected": -2.7948009967803955, "loss": 0.6769, "nll_loss": 0.6405567526817322, "rewards/accuracies": 0.875, "rewards/chosen": -0.17373017966747284, "rewards/margins": 0.1057499349117279, "rewards/rejected": -0.27948012948036194, "step": 4038 }, { "epoch": 11.058179329226558, "grad_norm": 5.348987579345703, "learning_rate": 4.468493150684931e-07, "log_odds_chosen": 2.4190514087677, "log_odds_ratio": -0.2862030565738678, "logits/chosen": 0.535434365272522, "logits/rejected": 0.4799731373786926, "logps/chosen": -2.118943691253662, "logps/rejected": -4.436766624450684, "loss": 0.7123, "nll_loss": 0.683677613735199, "rewards/accuracies": 0.875, "rewards/chosen": -0.21189439296722412, "rewards/margins": 0.23178227245807648, "rewards/rejected": -0.4436766803264618, "step": 4039 }, { "epoch": 11.06091718001369, "grad_norm": 6.158752918243408, "learning_rate": 4.4671232876712326e-07, "log_odds_chosen": 1.499329686164856, "log_odds_ratio": -0.32507944107055664, "logits/chosen": 0.8268626928329468, "logits/rejected": 0.7839674949645996, "logps/chosen": -1.9155194759368896, "logps/rejected": -3.292663097381592, "loss": 0.7266, "nll_loss": 0.6940975785255432, "rewards/accuracies": 1.0, "rewards/chosen": -0.19155195355415344, "rewards/margins": 0.13771435618400574, "rewards/rejected": -0.3292662799358368, "step": 4040 }, { "epoch": 11.063655030800822, "grad_norm": 5.331481456756592, "learning_rate": 4.465753424657534e-07, "log_odds_chosen": 2.1086373329162598, "log_odds_ratio": -0.1893599033355713, "logits/chosen": 1.0373899936676025, "logits/rejected": 1.1091797351837158, "logps/chosen": -2.7861876487731934, "logps/rejected": -4.815860271453857, "loss": 0.8112, "nll_loss": 0.792265772819519, "rewards/accuracies": 1.0, "rewards/chosen": -0.2786187529563904, "rewards/margins": 0.20296728610992432, "rewards/rejected": -0.4815860390663147, "step": 4041 }, { "epoch": 11.066392881587953, "grad_norm": 7.99074125289917, "learning_rate": 4.464383561643835e-07, "log_odds_chosen": 1.6982653141021729, "log_odds_ratio": -0.7009866833686829, "logits/chosen": 1.0907775163650513, "logits/rejected": 1.0862133502960205, "logps/chosen": -3.0754451751708984, "logps/rejected": -4.700798034667969, "loss": 0.7746, "nll_loss": 0.7045136094093323, "rewards/accuracies": 0.875, "rewards/chosen": -0.3075445294380188, "rewards/margins": 0.16253523528575897, "rewards/rejected": -0.47007977962493896, "step": 4042 }, { "epoch": 11.069130732375086, "grad_norm": 5.134378910064697, "learning_rate": 4.463013698630137e-07, "log_odds_chosen": 1.617181658744812, "log_odds_ratio": -0.2352764904499054, "logits/chosen": 0.7536471486091614, "logits/rejected": 0.7888973951339722, "logps/chosen": -2.341343402862549, "logps/rejected": -3.860506534576416, "loss": 0.7002, "nll_loss": 0.6766322255134583, "rewards/accuracies": 1.0, "rewards/chosen": -0.23413436114788055, "rewards/margins": 0.15191632509231567, "rewards/rejected": -0.38605067133903503, "step": 4043 }, { "epoch": 11.071868583162217, "grad_norm": 5.186154842376709, "learning_rate": 4.461643835616438e-07, "log_odds_chosen": 0.8181241750717163, "log_odds_ratio": -0.4178576171398163, "logits/chosen": 0.8346199989318848, "logits/rejected": 0.8378559947013855, "logps/chosen": -2.541224479675293, "logps/rejected": -3.303544521331787, "loss": 0.7064, "nll_loss": 0.6646369099617004, "rewards/accuracies": 0.875, "rewards/chosen": -0.25412246584892273, "rewards/margins": 0.07623200863599777, "rewards/rejected": -0.3303544521331787, "step": 4044 }, { "epoch": 11.07460643394935, "grad_norm": 4.683089733123779, "learning_rate": 4.4602739726027397e-07, "log_odds_chosen": 3.2111291885375977, "log_odds_ratio": -0.10896677523851395, "logits/chosen": 1.0397437810897827, "logits/rejected": 1.0840107202529907, "logps/chosen": -2.1404869556427, "logps/rejected": -5.208404064178467, "loss": 0.6828, "nll_loss": 0.6718941330909729, "rewards/accuracies": 1.0, "rewards/chosen": -0.21404871344566345, "rewards/margins": 0.306791752576828, "rewards/rejected": -0.5208404660224915, "step": 4045 }, { "epoch": 11.077344284736482, "grad_norm": 6.2434587478637695, "learning_rate": 4.4589041095890407e-07, "log_odds_chosen": 2.066049337387085, "log_odds_ratio": -0.3288078010082245, "logits/chosen": 0.7832502126693726, "logits/rejected": 0.8192828893661499, "logps/chosen": -2.557068109512329, "logps/rejected": -4.582353591918945, "loss": 0.7729, "nll_loss": 0.7400575876235962, "rewards/accuracies": 0.875, "rewards/chosen": -0.255706787109375, "rewards/margins": 0.20252853631973267, "rewards/rejected": -0.45823532342910767, "step": 4046 }, { "epoch": 11.080082135523615, "grad_norm": 7.5132293701171875, "learning_rate": 4.457534246575342e-07, "log_odds_chosen": 2.7375011444091797, "log_odds_ratio": -0.21063488721847534, "logits/chosen": 0.909334659576416, "logits/rejected": 0.9450203776359558, "logps/chosen": -1.8438136577606201, "logps/rejected": -4.288391590118408, "loss": 0.6849, "nll_loss": 0.6638054847717285, "rewards/accuracies": 1.0, "rewards/chosen": -0.184381365776062, "rewards/margins": 0.24445782601833344, "rewards/rejected": -0.42883917689323425, "step": 4047 }, { "epoch": 11.082819986310746, "grad_norm": 5.381420612335205, "learning_rate": 4.4561643835616437e-07, "log_odds_chosen": 2.741029977798462, "log_odds_ratio": -0.2385689616203308, "logits/chosen": 0.7326021194458008, "logits/rejected": 0.7542000412940979, "logps/chosen": -2.014244556427002, "logps/rejected": -4.646420478820801, "loss": 0.729, "nll_loss": 0.7050995826721191, "rewards/accuracies": 1.0, "rewards/chosen": -0.20142444968223572, "rewards/margins": 0.263217568397522, "rewards/rejected": -0.4646420478820801, "step": 4048 }, { "epoch": 11.085557837097879, "grad_norm": 6.208911418914795, "learning_rate": 4.4547945205479447e-07, "log_odds_chosen": 1.4011203050613403, "log_odds_ratio": -0.46975433826446533, "logits/chosen": 0.8623247742652893, "logits/rejected": 0.9198908805847168, "logps/chosen": -2.6649160385131836, "logps/rejected": -3.942169666290283, "loss": 0.717, "nll_loss": 0.6699926257133484, "rewards/accuracies": 0.875, "rewards/chosen": -0.2664915919303894, "rewards/margins": 0.12772537767887115, "rewards/rejected": -0.39421698451042175, "step": 4049 }, { "epoch": 11.08829568788501, "grad_norm": 6.123716354370117, "learning_rate": 4.453424657534247e-07, "log_odds_chosen": 1.4758377075195312, "log_odds_ratio": -0.6326218247413635, "logits/chosen": 0.7821604013442993, "logits/rejected": 0.7774742245674133, "logps/chosen": -2.120408058166504, "logps/rejected": -3.5218589305877686, "loss": 0.8972, "nll_loss": 0.8339272141456604, "rewards/accuracies": 0.625, "rewards/chosen": -0.21204079687595367, "rewards/margins": 0.14014512300491333, "rewards/rejected": -0.3521859049797058, "step": 4050 }, { "epoch": 11.091033538672143, "grad_norm": 6.073949337005615, "learning_rate": 4.4520547945205477e-07, "log_odds_chosen": 1.0259218215942383, "log_odds_ratio": -0.4698066711425781, "logits/chosen": 0.8719631433486938, "logits/rejected": 0.8253893852233887, "logps/chosen": -2.6047616004943848, "logps/rejected": -3.5734498500823975, "loss": 0.8556, "nll_loss": 0.8086608648300171, "rewards/accuracies": 0.75, "rewards/chosen": -0.26047617197036743, "rewards/margins": 0.09686882048845291, "rewards/rejected": -0.35734498500823975, "step": 4051 }, { "epoch": 11.093771389459274, "grad_norm": 5.640929698944092, "learning_rate": 4.4506849315068487e-07, "log_odds_chosen": 2.084141254425049, "log_odds_ratio": -0.26885300874710083, "logits/chosen": 0.7739982604980469, "logits/rejected": 0.7921371459960938, "logps/chosen": -2.9215621948242188, "logps/rejected": -4.9384965896606445, "loss": 0.7515, "nll_loss": 0.7245665788650513, "rewards/accuracies": 1.0, "rewards/chosen": -0.2921562194824219, "rewards/margins": 0.20169343054294586, "rewards/rejected": -0.49384966492652893, "step": 4052 }, { "epoch": 11.096509240246407, "grad_norm": 5.381252288818359, "learning_rate": 4.449315068493151e-07, "log_odds_chosen": 2.507199287414551, "log_odds_ratio": -0.23827913403511047, "logits/chosen": 0.8424288630485535, "logits/rejected": 0.8931843042373657, "logps/chosen": -2.9575257301330566, "logps/rejected": -5.41489315032959, "loss": 0.7836, "nll_loss": 0.7597951889038086, "rewards/accuracies": 1.0, "rewards/chosen": -0.29575255513191223, "rewards/margins": 0.2457367479801178, "rewards/rejected": -0.54148930311203, "step": 4053 }, { "epoch": 11.099247091033538, "grad_norm": 4.757734775543213, "learning_rate": 4.447945205479452e-07, "log_odds_chosen": 1.732309103012085, "log_odds_ratio": -0.21613390743732452, "logits/chosen": 0.9309543371200562, "logits/rejected": 0.8571815490722656, "logps/chosen": -1.7684764862060547, "logps/rejected": -3.3443620204925537, "loss": 0.6979, "nll_loss": 0.6762872338294983, "rewards/accuracies": 1.0, "rewards/chosen": -0.1768476516008377, "rewards/margins": 0.15758857131004333, "rewards/rejected": -0.33443623781204224, "step": 4054 }, { "epoch": 11.101984941820671, "grad_norm": 7.249759674072266, "learning_rate": 4.4465753424657533e-07, "log_odds_chosen": 2.158153772354126, "log_odds_ratio": -0.269483357667923, "logits/chosen": 1.099665880203247, "logits/rejected": 1.0796054601669312, "logps/chosen": -2.3848366737365723, "logps/rejected": -4.4279279708862305, "loss": 0.7126, "nll_loss": 0.6856203079223633, "rewards/accuracies": 0.875, "rewards/chosen": -0.23848366737365723, "rewards/margins": 0.2043091505765915, "rewards/rejected": -0.4427928328514099, "step": 4055 }, { "epoch": 11.104722792607802, "grad_norm": 4.718945026397705, "learning_rate": 4.4452054794520543e-07, "log_odds_chosen": 2.3736612796783447, "log_odds_ratio": -0.17584320902824402, "logits/chosen": 0.8775714635848999, "logits/rejected": 0.9591947793960571, "logps/chosen": -2.353665351867676, "logps/rejected": -4.614684104919434, "loss": 0.6671, "nll_loss": 0.6494904160499573, "rewards/accuracies": 1.0, "rewards/chosen": -0.23536652326583862, "rewards/margins": 0.22610193490982056, "rewards/rejected": -0.4614684581756592, "step": 4056 }, { "epoch": 11.107460643394935, "grad_norm": 7.390588760375977, "learning_rate": 4.4438356164383563e-07, "log_odds_chosen": 2.2534637451171875, "log_odds_ratio": -0.31494879722595215, "logits/chosen": 0.7790262699127197, "logits/rejected": 0.72712242603302, "logps/chosen": -2.7860608100891113, "logps/rejected": -4.915313720703125, "loss": 0.7842, "nll_loss": 0.7526596784591675, "rewards/accuracies": 0.75, "rewards/chosen": -0.2786060869693756, "rewards/margins": 0.2129252701997757, "rewards/rejected": -0.4915313720703125, "step": 4057 }, { "epoch": 11.110198494182066, "grad_norm": 5.294295787811279, "learning_rate": 4.4424657534246573e-07, "log_odds_chosen": 3.0211992263793945, "log_odds_ratio": -0.13686718046665192, "logits/chosen": 0.9966316819190979, "logits/rejected": 1.0402615070343018, "logps/chosen": -2.8579013347625732, "logps/rejected": -5.7305450439453125, "loss": 0.7035, "nll_loss": 0.6898626089096069, "rewards/accuracies": 1.0, "rewards/chosen": -0.2857901453971863, "rewards/margins": 0.287264347076416, "rewards/rejected": -0.5730545520782471, "step": 4058 }, { "epoch": 11.1129363449692, "grad_norm": 4.819421291351318, "learning_rate": 4.4410958904109583e-07, "log_odds_chosen": 2.4025774002075195, "log_odds_ratio": -0.19919484853744507, "logits/chosen": 0.7073769569396973, "logits/rejected": 0.5414338707923889, "logps/chosen": -1.86592698097229, "logps/rejected": -4.143758296966553, "loss": 0.7085, "nll_loss": 0.6885690093040466, "rewards/accuracies": 1.0, "rewards/chosen": -0.186592698097229, "rewards/margins": 0.22778312861919403, "rewards/rejected": -0.41437584161758423, "step": 4059 }, { "epoch": 11.11567419575633, "grad_norm": 6.177178382873535, "learning_rate": 4.4397260273972604e-07, "log_odds_chosen": 0.3408074378967285, "log_odds_ratio": -0.6575734615325928, "logits/chosen": 0.8136821389198303, "logits/rejected": 0.819681704044342, "logps/chosen": -2.6671621799468994, "logps/rejected": -2.929759979248047, "loss": 0.8619, "nll_loss": 0.7960959672927856, "rewards/accuracies": 0.75, "rewards/chosen": -0.26671621203422546, "rewards/margins": 0.026259776204824448, "rewards/rejected": -0.2929759919643402, "step": 4060 }, { "epoch": 11.118412046543463, "grad_norm": 4.720104694366455, "learning_rate": 4.4383561643835613e-07, "log_odds_chosen": 2.5922956466674805, "log_odds_ratio": -0.19630688428878784, "logits/chosen": 0.8269243240356445, "logits/rejected": 0.8613081574440002, "logps/chosen": -2.429488182067871, "logps/rejected": -4.951416969299316, "loss": 0.915, "nll_loss": 0.8953484296798706, "rewards/accuracies": 1.0, "rewards/chosen": -0.24294881522655487, "rewards/margins": 0.252192884683609, "rewards/rejected": -0.4951416850090027, "step": 4061 }, { "epoch": 11.121149897330595, "grad_norm": 6.766684532165527, "learning_rate": 4.436986301369863e-07, "log_odds_chosen": 2.236586809158325, "log_odds_ratio": -0.3657604455947876, "logits/chosen": 1.1916289329528809, "logits/rejected": 1.2400301694869995, "logps/chosen": -2.937995672225952, "logps/rejected": -5.090762138366699, "loss": 0.7447, "nll_loss": 0.7081001400947571, "rewards/accuracies": 0.875, "rewards/chosen": -0.29379957914352417, "rewards/margins": 0.21527668833732605, "rewards/rejected": -0.5090762376785278, "step": 4062 }, { "epoch": 11.123887748117728, "grad_norm": 4.76344108581543, "learning_rate": 4.435616438356164e-07, "log_odds_chosen": 1.321789264678955, "log_odds_ratio": -0.34111130237579346, "logits/chosen": 0.8529640436172485, "logits/rejected": 0.8678332567214966, "logps/chosen": -1.8476884365081787, "logps/rejected": -3.079327344894409, "loss": 0.6831, "nll_loss": 0.6489694714546204, "rewards/accuracies": 0.875, "rewards/chosen": -0.18476882576942444, "rewards/margins": 0.12316390872001648, "rewards/rejected": -0.3079327344894409, "step": 4063 }, { "epoch": 11.126625598904859, "grad_norm": 5.255992412567139, "learning_rate": 4.434246575342466e-07, "log_odds_chosen": 0.8517800569534302, "log_odds_ratio": -0.4891361594200134, "logits/chosen": 0.732302188873291, "logits/rejected": 0.7056313157081604, "logps/chosen": -2.3128716945648193, "logps/rejected": -3.1436803340911865, "loss": 0.7571, "nll_loss": 0.7081414461135864, "rewards/accuracies": 0.75, "rewards/chosen": -0.2312871664762497, "rewards/margins": 0.08308087289333344, "rewards/rejected": -0.31436803936958313, "step": 4064 }, { "epoch": 11.129363449691992, "grad_norm": 4.82167911529541, "learning_rate": 4.432876712328767e-07, "log_odds_chosen": 2.2866554260253906, "log_odds_ratio": -0.2847241759300232, "logits/chosen": 0.6240962743759155, "logits/rejected": 0.5622752904891968, "logps/chosen": -2.2222158908843994, "logps/rejected": -4.437365531921387, "loss": 0.8694, "nll_loss": 0.8409336805343628, "rewards/accuracies": 0.875, "rewards/chosen": -0.22222158312797546, "rewards/margins": 0.2215149998664856, "rewards/rejected": -0.44373658299446106, "step": 4065 }, { "epoch": 11.132101300479125, "grad_norm": 5.339468955993652, "learning_rate": 4.431506849315068e-07, "log_odds_chosen": 2.4930615425109863, "log_odds_ratio": -0.22635085880756378, "logits/chosen": 0.9627330899238586, "logits/rejected": 0.9769929647445679, "logps/chosen": -1.9143648147583008, "logps/rejected": -4.268339157104492, "loss": 0.7587, "nll_loss": 0.7360891103744507, "rewards/accuracies": 0.875, "rewards/chosen": -0.1914364993572235, "rewards/margins": 0.23539742827415466, "rewards/rejected": -0.4268338978290558, "step": 4066 }, { "epoch": 11.134839151266256, "grad_norm": 6.265776634216309, "learning_rate": 4.43013698630137e-07, "log_odds_chosen": 2.5546810626983643, "log_odds_ratio": -0.28147560358047485, "logits/chosen": 0.9715738892555237, "logits/rejected": 1.0382015705108643, "logps/chosen": -2.667881488800049, "logps/rejected": -5.139994144439697, "loss": 0.6646, "nll_loss": 0.6364580392837524, "rewards/accuracies": 0.75, "rewards/chosen": -0.26678815484046936, "rewards/margins": 0.24721121788024902, "rewards/rejected": -0.5139994025230408, "step": 4067 }, { "epoch": 11.137577002053389, "grad_norm": 5.178365230560303, "learning_rate": 4.428767123287671e-07, "log_odds_chosen": 2.314485788345337, "log_odds_ratio": -0.22648489475250244, "logits/chosen": 0.9796802401542664, "logits/rejected": 1.0424013137817383, "logps/chosen": -2.3108417987823486, "logps/rejected": -4.50874662399292, "loss": 0.6958, "nll_loss": 0.673173189163208, "rewards/accuracies": 0.875, "rewards/chosen": -0.2310841828584671, "rewards/margins": 0.219790518283844, "rewards/rejected": -0.4508746862411499, "step": 4068 }, { "epoch": 11.14031485284052, "grad_norm": 4.4749436378479, "learning_rate": 4.4273972602739725e-07, "log_odds_chosen": 2.578990936279297, "log_odds_ratio": -0.17629437148571014, "logits/chosen": 0.9072601795196533, "logits/rejected": 0.8818727135658264, "logps/chosen": -1.8801976442337036, "logps/rejected": -4.311342239379883, "loss": 0.6076, "nll_loss": 0.589979350566864, "rewards/accuracies": 1.0, "rewards/chosen": -0.1880197674036026, "rewards/margins": 0.2431144416332245, "rewards/rejected": -0.4311341941356659, "step": 4069 }, { "epoch": 11.143052703627653, "grad_norm": 5.342897415161133, "learning_rate": 4.4260273972602734e-07, "log_odds_chosen": 1.3681238889694214, "log_odds_ratio": -0.2642534077167511, "logits/chosen": 0.8909105062484741, "logits/rejected": 0.8653125166893005, "logps/chosen": -2.1673147678375244, "logps/rejected": -3.392611503601074, "loss": 0.7226, "nll_loss": 0.6961339712142944, "rewards/accuracies": 1.0, "rewards/chosen": -0.2167314887046814, "rewards/margins": 0.12252966314554214, "rewards/rejected": -0.33926114439964294, "step": 4070 }, { "epoch": 11.145790554414784, "grad_norm": 5.237985610961914, "learning_rate": 4.4246575342465755e-07, "log_odds_chosen": 3.618189811706543, "log_odds_ratio": -0.14578047394752502, "logits/chosen": 1.089920997619629, "logits/rejected": 1.1597177982330322, "logps/chosen": -2.517451763153076, "logps/rejected": -5.977771759033203, "loss": 0.6935, "nll_loss": 0.6789659857749939, "rewards/accuracies": 1.0, "rewards/chosen": -0.25174519419670105, "rewards/margins": 0.3460320234298706, "rewards/rejected": -0.5977771878242493, "step": 4071 }, { "epoch": 11.148528405201917, "grad_norm": 4.98459529876709, "learning_rate": 4.4232876712328765e-07, "log_odds_chosen": 2.4131925106048584, "log_odds_ratio": -0.21900898218154907, "logits/chosen": 0.8202002644538879, "logits/rejected": 0.8451733589172363, "logps/chosen": -2.543684959411621, "logps/rejected": -4.847097396850586, "loss": 0.7643, "nll_loss": 0.7424224615097046, "rewards/accuracies": 0.875, "rewards/chosen": -0.25436854362487793, "rewards/margins": 0.23034124076366425, "rewards/rejected": -0.484709769487381, "step": 4072 }, { "epoch": 11.151266255989048, "grad_norm": 5.471388816833496, "learning_rate": 4.4219178082191775e-07, "log_odds_chosen": 1.669882893562317, "log_odds_ratio": -0.2693203389644623, "logits/chosen": 0.9990915060043335, "logits/rejected": 1.0451241731643677, "logps/chosen": -2.1079134941101074, "logps/rejected": -3.693458080291748, "loss": 0.6509, "nll_loss": 0.6239209175109863, "rewards/accuracies": 1.0, "rewards/chosen": -0.21079134941101074, "rewards/margins": 0.15855447947978973, "rewards/rejected": -0.36934584379196167, "step": 4073 }, { "epoch": 11.154004106776181, "grad_norm": 3.919640064239502, "learning_rate": 4.4205479452054795e-07, "log_odds_chosen": 2.944612503051758, "log_odds_ratio": -0.22625920176506042, "logits/chosen": 0.8778771758079529, "logits/rejected": 0.8817457556724548, "logps/chosen": -2.0487053394317627, "logps/rejected": -4.852081298828125, "loss": 0.7447, "nll_loss": 0.7220544815063477, "rewards/accuracies": 1.0, "rewards/chosen": -0.2048705518245697, "rewards/margins": 0.28033754229545593, "rewards/rejected": -0.48520809412002563, "step": 4074 }, { "epoch": 11.156741957563312, "grad_norm": 5.177181243896484, "learning_rate": 4.4191780821917805e-07, "log_odds_chosen": 2.6995322704315186, "log_odds_ratio": -0.1502111703157425, "logits/chosen": 1.0514642000198364, "logits/rejected": 1.0296403169631958, "logps/chosen": -3.1584839820861816, "logps/rejected": -5.774937629699707, "loss": 0.7601, "nll_loss": 0.7451260089874268, "rewards/accuracies": 1.0, "rewards/chosen": -0.3158484101295471, "rewards/margins": 0.2616453468799591, "rewards/rejected": -0.5774937272071838, "step": 4075 }, { "epoch": 11.159479808350445, "grad_norm": 7.508964538574219, "learning_rate": 4.417808219178082e-07, "log_odds_chosen": 0.7555585503578186, "log_odds_ratio": -0.6572940349578857, "logits/chosen": 0.7146482467651367, "logits/rejected": 0.7208144068717957, "logps/chosen": -2.2399775981903076, "logps/rejected": -2.848994493484497, "loss": 0.6448, "nll_loss": 0.5790276527404785, "rewards/accuracies": 0.625, "rewards/chosen": -0.22399775683879852, "rewards/margins": 0.060901716351509094, "rewards/rejected": -0.2848994731903076, "step": 4076 }, { "epoch": 11.162217659137577, "grad_norm": 5.633564472198486, "learning_rate": 4.416438356164383e-07, "log_odds_chosen": 0.9422175884246826, "log_odds_ratio": -0.34986579418182373, "logits/chosen": 0.7593024969100952, "logits/rejected": 0.6921619772911072, "logps/chosen": -1.668031930923462, "logps/rejected": -2.47839617729187, "loss": 0.663, "nll_loss": 0.6280273199081421, "rewards/accuracies": 1.0, "rewards/chosen": -0.16680319607257843, "rewards/margins": 0.08103642612695694, "rewards/rejected": -0.24783961474895477, "step": 4077 }, { "epoch": 11.16495550992471, "grad_norm": 6.177253246307373, "learning_rate": 4.415068493150685e-07, "log_odds_chosen": 1.70274019241333, "log_odds_ratio": -0.42358648777008057, "logits/chosen": 0.7148774266242981, "logits/rejected": 0.7561550140380859, "logps/chosen": -1.7125314474105835, "logps/rejected": -3.3032312393188477, "loss": 0.7025, "nll_loss": 0.660099446773529, "rewards/accuracies": 0.75, "rewards/chosen": -0.17125314474105835, "rewards/margins": 0.1590700000524521, "rewards/rejected": -0.33032312989234924, "step": 4078 }, { "epoch": 11.16769336071184, "grad_norm": 6.5292067527771, "learning_rate": 4.413698630136986e-07, "log_odds_chosen": 2.044881820678711, "log_odds_ratio": -0.573136568069458, "logits/chosen": 0.6927735805511475, "logits/rejected": 0.6288301944732666, "logps/chosen": -2.2506368160247803, "logps/rejected": -4.127842903137207, "loss": 0.7454, "nll_loss": 0.6880577206611633, "rewards/accuracies": 0.75, "rewards/chosen": -0.22506369650363922, "rewards/margins": 0.18772059679031372, "rewards/rejected": -0.41278427839279175, "step": 4079 }, { "epoch": 11.170431211498974, "grad_norm": 4.573330879211426, "learning_rate": 4.412328767123287e-07, "log_odds_chosen": 1.7988460063934326, "log_odds_ratio": -0.2583780288696289, "logits/chosen": 0.7565966248512268, "logits/rejected": 0.7917906641960144, "logps/chosen": -2.471081018447876, "logps/rejected": -4.13925838470459, "loss": 0.6688, "nll_loss": 0.6429811120033264, "rewards/accuracies": 1.0, "rewards/chosen": -0.2471081018447876, "rewards/margins": 0.16681772470474243, "rewards/rejected": -0.41392582654953003, "step": 4080 }, { "epoch": 11.173169062286105, "grad_norm": 6.858320713043213, "learning_rate": 4.410958904109589e-07, "log_odds_chosen": 2.7259521484375, "log_odds_ratio": -0.238317608833313, "logits/chosen": 1.0586844682693481, "logits/rejected": 1.0180366039276123, "logps/chosen": -2.7437245845794678, "logps/rejected": -5.304288864135742, "loss": 0.8006, "nll_loss": 0.7768140435218811, "rewards/accuracies": 0.875, "rewards/chosen": -0.2743724584579468, "rewards/margins": 0.25605642795562744, "rewards/rejected": -0.5304288864135742, "step": 4081 }, { "epoch": 11.175906913073238, "grad_norm": 5.226133823394775, "learning_rate": 4.40958904109589e-07, "log_odds_chosen": 2.4121923446655273, "log_odds_ratio": -0.17884816229343414, "logits/chosen": 0.7115265130996704, "logits/rejected": 0.7916443943977356, "logps/chosen": -1.6434974670410156, "logps/rejected": -3.8544278144836426, "loss": 0.6842, "nll_loss": 0.6663154363632202, "rewards/accuracies": 1.0, "rewards/chosen": -0.1643497496843338, "rewards/margins": 0.22109299898147583, "rewards/rejected": -0.3854427635669708, "step": 4082 }, { "epoch": 11.178644763860369, "grad_norm": 7.595870018005371, "learning_rate": 4.4082191780821916e-07, "log_odds_chosen": 0.9824638366699219, "log_odds_ratio": -0.6457991600036621, "logits/chosen": 1.043452262878418, "logits/rejected": 1.087113857269287, "logps/chosen": -3.1088767051696777, "logps/rejected": -3.99607515335083, "loss": 0.8259, "nll_loss": 0.7613140344619751, "rewards/accuracies": 0.5, "rewards/chosen": -0.3108876943588257, "rewards/margins": 0.08871981501579285, "rewards/rejected": -0.39960750937461853, "step": 4083 }, { "epoch": 11.181382614647502, "grad_norm": 5.284752368927002, "learning_rate": 4.406849315068493e-07, "log_odds_chosen": 2.780508279800415, "log_odds_ratio": -0.08170907199382782, "logits/chosen": 0.9411776065826416, "logits/rejected": 0.9594077467918396, "logps/chosen": -2.551011562347412, "logps/rejected": -5.229696750640869, "loss": 0.6426, "nll_loss": 0.6344625949859619, "rewards/accuracies": 1.0, "rewards/chosen": -0.25510114431381226, "rewards/margins": 0.2678685188293457, "rewards/rejected": -0.522969663143158, "step": 4084 }, { "epoch": 11.184120465434633, "grad_norm": 5.52267599105835, "learning_rate": 4.4054794520547947e-07, "log_odds_chosen": 2.7964515686035156, "log_odds_ratio": -0.21788060665130615, "logits/chosen": 0.7647924423217773, "logits/rejected": 0.7373248338699341, "logps/chosen": -1.917733073234558, "logps/rejected": -4.58901834487915, "loss": 0.6553, "nll_loss": 0.6334841251373291, "rewards/accuracies": 1.0, "rewards/chosen": -0.19177329540252686, "rewards/margins": 0.26712849736213684, "rewards/rejected": -0.4589018225669861, "step": 4085 }, { "epoch": 11.186858316221766, "grad_norm": 4.513587474822998, "learning_rate": 4.4041095890410957e-07, "log_odds_chosen": 1.3880329132080078, "log_odds_ratio": -0.2982485890388489, "logits/chosen": 0.7858847379684448, "logits/rejected": 0.7620120048522949, "logps/chosen": -2.2897863388061523, "logps/rejected": -3.5810186862945557, "loss": 0.6633, "nll_loss": 0.6334863901138306, "rewards/accuracies": 0.875, "rewards/chosen": -0.22897863388061523, "rewards/margins": 0.129123255610466, "rewards/rejected": -0.35810190439224243, "step": 4086 }, { "epoch": 11.189596167008897, "grad_norm": 6.577627182006836, "learning_rate": 4.4027397260273967e-07, "log_odds_chosen": 0.45409104228019714, "log_odds_ratio": -0.7343769073486328, "logits/chosen": 0.6059057712554932, "logits/rejected": 0.6548258066177368, "logps/chosen": -2.739938497543335, "logps/rejected": -3.1518642902374268, "loss": 0.7717, "nll_loss": 0.6982935667037964, "rewards/accuracies": 0.625, "rewards/chosen": -0.2739938795566559, "rewards/margins": 0.04119257256388664, "rewards/rejected": -0.31518644094467163, "step": 4087 }, { "epoch": 11.19233401779603, "grad_norm": 5.120904922485352, "learning_rate": 4.4013698630136987e-07, "log_odds_chosen": 1.5481414794921875, "log_odds_ratio": -0.3464393615722656, "logits/chosen": 0.9209885001182556, "logits/rejected": 0.9851309657096863, "logps/chosen": -2.1350820064544678, "logps/rejected": -3.620570182800293, "loss": 0.6795, "nll_loss": 0.6448876857757568, "rewards/accuracies": 0.875, "rewards/chosen": -0.21350820362567902, "rewards/margins": 0.14854884147644043, "rewards/rejected": -0.36205703020095825, "step": 4088 }, { "epoch": 11.195071868583161, "grad_norm": 4.685266971588135, "learning_rate": 4.3999999999999997e-07, "log_odds_chosen": 2.2850112915039062, "log_odds_ratio": -0.20036932826042175, "logits/chosen": 0.9088027477264404, "logits/rejected": 0.9232819080352783, "logps/chosen": -2.2714900970458984, "logps/rejected": -4.368714332580566, "loss": 0.7448, "nll_loss": 0.7247930765151978, "rewards/accuracies": 0.875, "rewards/chosen": -0.22714900970458984, "rewards/margins": 0.2097223699092865, "rewards/rejected": -0.43687140941619873, "step": 4089 }, { "epoch": 11.197809719370294, "grad_norm": 5.0262556076049805, "learning_rate": 4.398630136986301e-07, "log_odds_chosen": 1.9457712173461914, "log_odds_ratio": -0.27870336174964905, "logits/chosen": 0.9028259515762329, "logits/rejected": 0.8965989351272583, "logps/chosen": -2.507755994796753, "logps/rejected": -4.374536514282227, "loss": 0.7716, "nll_loss": 0.7437716722488403, "rewards/accuracies": 0.875, "rewards/chosen": -0.25077560544013977, "rewards/margins": 0.18667805194854736, "rewards/rejected": -0.43745365738868713, "step": 4090 }, { "epoch": 11.200547570157426, "grad_norm": 5.100122928619385, "learning_rate": 4.397260273972603e-07, "log_odds_chosen": 0.910820484161377, "log_odds_ratio": -0.4772406816482544, "logits/chosen": 0.6268147230148315, "logits/rejected": 0.5964264273643494, "logps/chosen": -2.197566032409668, "logps/rejected": -3.0078678131103516, "loss": 0.726, "nll_loss": 0.6782538890838623, "rewards/accuracies": 0.875, "rewards/chosen": -0.2197566032409668, "rewards/margins": 0.08103020489215851, "rewards/rejected": -0.3007867932319641, "step": 4091 }, { "epoch": 11.203285420944558, "grad_norm": 7.947928428649902, "learning_rate": 4.395890410958904e-07, "log_odds_chosen": 2.264510154724121, "log_odds_ratio": -0.3010665774345398, "logits/chosen": 0.8417911529541016, "logits/rejected": 0.9143621325492859, "logps/chosen": -2.9151055812835693, "logps/rejected": -5.091315269470215, "loss": 0.7243, "nll_loss": 0.6941837072372437, "rewards/accuracies": 0.875, "rewards/chosen": -0.29151058197021484, "rewards/margins": 0.21762096881866455, "rewards/rejected": -0.5091315507888794, "step": 4092 }, { "epoch": 11.206023271731691, "grad_norm": 6.276573657989502, "learning_rate": 4.394520547945205e-07, "log_odds_chosen": 2.1022846698760986, "log_odds_ratio": -0.3299994170665741, "logits/chosen": 0.8024210929870605, "logits/rejected": 0.8695352077484131, "logps/chosen": -2.3747804164886475, "logps/rejected": -4.3735246658325195, "loss": 0.7536, "nll_loss": 0.7206054329872131, "rewards/accuracies": 0.875, "rewards/chosen": -0.23747804760932922, "rewards/margins": 0.1998744010925293, "rewards/rejected": -0.4373524487018585, "step": 4093 }, { "epoch": 11.208761122518823, "grad_norm": 6.2723708152771, "learning_rate": 4.393150684931506e-07, "log_odds_chosen": 1.6679011583328247, "log_odds_ratio": -0.3401496112346649, "logits/chosen": 0.761539101600647, "logits/rejected": 0.7328848838806152, "logps/chosen": -3.036494255065918, "logps/rejected": -4.6180901527404785, "loss": 0.6911, "nll_loss": 0.6570903062820435, "rewards/accuracies": 1.0, "rewards/chosen": -0.3036494255065918, "rewards/margins": 0.15815959870815277, "rewards/rejected": -0.46180903911590576, "step": 4094 }, { "epoch": 11.211498973305956, "grad_norm": 5.018943786621094, "learning_rate": 4.3917808219178083e-07, "log_odds_chosen": 1.9665255546569824, "log_odds_ratio": -0.2574197053909302, "logits/chosen": 0.8110083341598511, "logits/rejected": 0.9059198498725891, "logps/chosen": -2.2558646202087402, "logps/rejected": -4.093759536743164, "loss": 0.6554, "nll_loss": 0.6296107769012451, "rewards/accuracies": 0.875, "rewards/chosen": -0.2255864441394806, "rewards/margins": 0.18378949165344238, "rewards/rejected": -0.40937596559524536, "step": 4095 }, { "epoch": 11.214236824093087, "grad_norm": 4.253690719604492, "learning_rate": 4.3904109589041093e-07, "log_odds_chosen": 2.1967878341674805, "log_odds_ratio": -0.2265702188014984, "logits/chosen": 0.7435410022735596, "logits/rejected": 0.7828271389007568, "logps/chosen": -2.0829997062683105, "logps/rejected": -4.1721696853637695, "loss": 0.613, "nll_loss": 0.590377688407898, "rewards/accuracies": 1.0, "rewards/chosen": -0.20829996466636658, "rewards/margins": 0.2089170217514038, "rewards/rejected": -0.4172170162200928, "step": 4096 }, { "epoch": 11.21697467488022, "grad_norm": 4.873932361602783, "learning_rate": 4.389041095890411e-07, "log_odds_chosen": 1.7606630325317383, "log_odds_ratio": -0.34951600432395935, "logits/chosen": 0.9011280536651611, "logits/rejected": 0.9848834872245789, "logps/chosen": -2.2176523208618164, "logps/rejected": -3.898524284362793, "loss": 0.6598, "nll_loss": 0.6248034834861755, "rewards/accuracies": 0.875, "rewards/chosen": -0.22176524996757507, "rewards/margins": 0.1680871844291687, "rewards/rejected": -0.3898524343967438, "step": 4097 }, { "epoch": 11.219712525667351, "grad_norm": 4.50726842880249, "learning_rate": 4.3876712328767123e-07, "log_odds_chosen": 2.813089609146118, "log_odds_ratio": -0.15720434486865997, "logits/chosen": 0.9320160150527954, "logits/rejected": 0.9562656283378601, "logps/chosen": -1.9633514881134033, "logps/rejected": -4.633176803588867, "loss": 0.6813, "nll_loss": 0.6655862331390381, "rewards/accuracies": 1.0, "rewards/chosen": -0.19633513689041138, "rewards/margins": 0.2669825553894043, "rewards/rejected": -0.4633176922798157, "step": 4098 }, { "epoch": 11.222450376454484, "grad_norm": 7.2744460105896, "learning_rate": 4.3863013698630133e-07, "log_odds_chosen": 2.315002918243408, "log_odds_ratio": -0.3066864013671875, "logits/chosen": 0.9871081113815308, "logits/rejected": 1.073623776435852, "logps/chosen": -2.745436668395996, "logps/rejected": -4.958627700805664, "loss": 0.6407, "nll_loss": 0.6100550293922424, "rewards/accuracies": 0.875, "rewards/chosen": -0.2745436429977417, "rewards/margins": 0.22131912410259247, "rewards/rejected": -0.49586278200149536, "step": 4099 }, { "epoch": 11.225188227241615, "grad_norm": 5.805864334106445, "learning_rate": 4.384931506849315e-07, "log_odds_chosen": 1.135240912437439, "log_odds_ratio": -0.5599005222320557, "logits/chosen": 0.8894704580307007, "logits/rejected": 0.914466142654419, "logps/chosen": -2.8539535999298096, "logps/rejected": -3.9761035442352295, "loss": 0.6807, "nll_loss": 0.6246967911720276, "rewards/accuracies": 0.5, "rewards/chosen": -0.2853953540325165, "rewards/margins": 0.11221499741077423, "rewards/rejected": -0.3976103663444519, "step": 4100 }, { "epoch": 11.227926078028748, "grad_norm": 5.850518226623535, "learning_rate": 4.383561643835616e-07, "log_odds_chosen": 1.8863030672073364, "log_odds_ratio": -0.33182770013809204, "logits/chosen": 0.5259758234024048, "logits/rejected": 0.4453285336494446, "logps/chosen": -2.342036247253418, "logps/rejected": -4.132076263427734, "loss": 0.7489, "nll_loss": 0.7156682014465332, "rewards/accuracies": 0.75, "rewards/chosen": -0.23420362174510956, "rewards/margins": 0.1790040135383606, "rewards/rejected": -0.41320765018463135, "step": 4101 }, { "epoch": 11.23066392881588, "grad_norm": 5.596526622772217, "learning_rate": 4.382191780821918e-07, "log_odds_chosen": 3.1551616191864014, "log_odds_ratio": -0.09576436132192612, "logits/chosen": 1.0120619535446167, "logits/rejected": 1.086895227432251, "logps/chosen": -2.08280611038208, "logps/rejected": -5.011814117431641, "loss": 0.6331, "nll_loss": 0.6235544085502625, "rewards/accuracies": 1.0, "rewards/chosen": -0.20828062295913696, "rewards/margins": 0.29290083050727844, "rewards/rejected": -0.5011814832687378, "step": 4102 }, { "epoch": 11.233401779603012, "grad_norm": 7.340667247772217, "learning_rate": 4.380821917808219e-07, "log_odds_chosen": 1.1722514629364014, "log_odds_ratio": -0.2981851398944855, "logits/chosen": 1.003536581993103, "logits/rejected": 1.0317851305007935, "logps/chosen": -2.385037422180176, "logps/rejected": -3.479492664337158, "loss": 0.7039, "nll_loss": 0.6740530133247375, "rewards/accuracies": 1.0, "rewards/chosen": -0.23850375413894653, "rewards/margins": 0.10944551229476929, "rewards/rejected": -0.3479492664337158, "step": 4103 }, { "epoch": 11.236139630390143, "grad_norm": 5.571881294250488, "learning_rate": 4.3794520547945204e-07, "log_odds_chosen": 2.170609712600708, "log_odds_ratio": -0.4254716634750366, "logits/chosen": 0.9447281360626221, "logits/rejected": 1.0706684589385986, "logps/chosen": -2.4512484073638916, "logps/rejected": -4.467380523681641, "loss": 0.8042, "nll_loss": 0.7616346478462219, "rewards/accuracies": 0.75, "rewards/chosen": -0.24512487649917603, "rewards/margins": 0.20161323249340057, "rewards/rejected": -0.4467380940914154, "step": 4104 }, { "epoch": 11.238877481177276, "grad_norm": 5.50055456161499, "learning_rate": 4.378082191780822e-07, "log_odds_chosen": 2.323143243789673, "log_odds_ratio": -0.29848092794418335, "logits/chosen": 0.8439794182777405, "logits/rejected": 0.7623173594474792, "logps/chosen": -2.0880625247955322, "logps/rejected": -4.281569480895996, "loss": 0.7341, "nll_loss": 0.7042306661605835, "rewards/accuracies": 1.0, "rewards/chosen": -0.20880627632141113, "rewards/margins": 0.2193506956100464, "rewards/rejected": -0.4281569719314575, "step": 4105 }, { "epoch": 11.241615331964407, "grad_norm": 5.212931156158447, "learning_rate": 4.376712328767123e-07, "log_odds_chosen": 1.9161262512207031, "log_odds_ratio": -0.22132426500320435, "logits/chosen": 0.7742617726325989, "logits/rejected": 0.8105313777923584, "logps/chosen": -1.75846266746521, "logps/rejected": -3.4899604320526123, "loss": 0.614, "nll_loss": 0.5918951034545898, "rewards/accuracies": 1.0, "rewards/chosen": -0.17584624886512756, "rewards/margins": 0.17314979434013367, "rewards/rejected": -0.3489960730075836, "step": 4106 }, { "epoch": 11.24435318275154, "grad_norm": 5.386999607086182, "learning_rate": 4.3753424657534244e-07, "log_odds_chosen": 2.076469659805298, "log_odds_ratio": -0.28626948595046997, "logits/chosen": 0.7902016639709473, "logits/rejected": 0.8679260611534119, "logps/chosen": -2.8200390338897705, "logps/rejected": -4.8273539543151855, "loss": 0.7525, "nll_loss": 0.7238854169845581, "rewards/accuracies": 0.875, "rewards/chosen": -0.28200387954711914, "rewards/margins": 0.20073151588439941, "rewards/rejected": -0.48273539543151855, "step": 4107 }, { "epoch": 11.247091033538672, "grad_norm": 7.714800834655762, "learning_rate": 4.3739726027397254e-07, "log_odds_chosen": 2.5053870677948, "log_odds_ratio": -0.27530357241630554, "logits/chosen": 1.0809797048568726, "logits/rejected": 1.068633794784546, "logps/chosen": -2.618818759918213, "logps/rejected": -4.998928070068359, "loss": 0.7954, "nll_loss": 0.7678682208061218, "rewards/accuracies": 0.875, "rewards/chosen": -0.26188188791275024, "rewards/margins": 0.2380109429359436, "rewards/rejected": -0.49989283084869385, "step": 4108 }, { "epoch": 11.249828884325805, "grad_norm": 6.457267761230469, "learning_rate": 4.3726027397260275e-07, "log_odds_chosen": 2.316098690032959, "log_odds_ratio": -0.45406004786491394, "logits/chosen": 1.1357779502868652, "logits/rejected": 1.2061961889266968, "logps/chosen": -2.5271105766296387, "logps/rejected": -4.681357383728027, "loss": 0.6601, "nll_loss": 0.6146970987319946, "rewards/accuracies": 0.875, "rewards/chosen": -0.25271105766296387, "rewards/margins": 0.21542470157146454, "rewards/rejected": -0.4681357741355896, "step": 4109 }, { "epoch": 11.252566735112936, "grad_norm": 4.673192977905273, "learning_rate": 4.3712328767123285e-07, "log_odds_chosen": 1.5994858741760254, "log_odds_ratio": -0.3335348665714264, "logits/chosen": 0.851328432559967, "logits/rejected": 0.9262682795524597, "logps/chosen": -2.5623016357421875, "logps/rejected": -4.096545219421387, "loss": 0.6804, "nll_loss": 0.6470186710357666, "rewards/accuracies": 0.875, "rewards/chosen": -0.2562301754951477, "rewards/margins": 0.15342433750629425, "rewards/rejected": -0.40965449810028076, "step": 4110 }, { "epoch": 11.255304585900069, "grad_norm": 5.7312211990356445, "learning_rate": 4.36986301369863e-07, "log_odds_chosen": 1.3408445119857788, "log_odds_ratio": -0.37043654918670654, "logits/chosen": 0.7969682216644287, "logits/rejected": 0.662360429763794, "logps/chosen": -1.6702089309692383, "logps/rejected": -2.877821922302246, "loss": 0.7217, "nll_loss": 0.6846224665641785, "rewards/accuracies": 0.875, "rewards/chosen": -0.16702088713645935, "rewards/margins": 0.12076131999492645, "rewards/rejected": -0.2877821922302246, "step": 4111 }, { "epoch": 11.2580424366872, "grad_norm": 4.2160515785217285, "learning_rate": 4.3684931506849315e-07, "log_odds_chosen": 2.1811251640319824, "log_odds_ratio": -0.19691306352615356, "logits/chosen": 1.0073118209838867, "logits/rejected": 1.0536346435546875, "logps/chosen": -2.573592185974121, "logps/rejected": -4.695590972900391, "loss": 0.6672, "nll_loss": 0.647513210773468, "rewards/accuracies": 0.875, "rewards/chosen": -0.25735923647880554, "rewards/margins": 0.212199866771698, "rewards/rejected": -0.46955907344818115, "step": 4112 }, { "epoch": 11.260780287474333, "grad_norm": 6.608421802520752, "learning_rate": 4.3671232876712325e-07, "log_odds_chosen": 3.1395680904388428, "log_odds_ratio": -0.4286198616027832, "logits/chosen": 1.1407170295715332, "logits/rejected": 1.1122252941131592, "logps/chosen": -2.79128360748291, "logps/rejected": -5.847564697265625, "loss": 0.7284, "nll_loss": 0.6855796575546265, "rewards/accuracies": 0.875, "rewards/chosen": -0.27912837266921997, "rewards/margins": 0.30562809109687805, "rewards/rejected": -0.5847564935684204, "step": 4113 }, { "epoch": 11.263518138261464, "grad_norm": 10.740772247314453, "learning_rate": 4.365753424657534e-07, "log_odds_chosen": 0.4334476888179779, "log_odds_ratio": -0.7307328581809998, "logits/chosen": 0.8558889031410217, "logits/rejected": 0.7783942222595215, "logps/chosen": -3.0116429328918457, "logps/rejected": -3.4142096042633057, "loss": 0.8855, "nll_loss": 0.8124061822891235, "rewards/accuracies": 0.625, "rewards/chosen": -0.30116432905197144, "rewards/margins": 0.040256645530462265, "rewards/rejected": -0.341420978307724, "step": 4114 }, { "epoch": 11.266255989048597, "grad_norm": 5.836751461029053, "learning_rate": 4.364383561643835e-07, "log_odds_chosen": 3.6643738746643066, "log_odds_ratio": -0.25523650646209717, "logits/chosen": 0.6987417936325073, "logits/rejected": 0.7571746706962585, "logps/chosen": -1.5237935781478882, "logps/rejected": -4.919925689697266, "loss": 0.6742, "nll_loss": 0.6486788988113403, "rewards/accuracies": 0.875, "rewards/chosen": -0.1523793488740921, "rewards/margins": 0.3396131992340088, "rewards/rejected": -0.4919925928115845, "step": 4115 }, { "epoch": 11.268993839835728, "grad_norm": 4.1205573081970215, "learning_rate": 4.363013698630137e-07, "log_odds_chosen": 2.916881799697876, "log_odds_ratio": -0.1983746737241745, "logits/chosen": 0.743445873260498, "logits/rejected": 0.7223715782165527, "logps/chosen": -1.6570247411727905, "logps/rejected": -4.413208484649658, "loss": 0.688, "nll_loss": 0.6681180596351624, "rewards/accuracies": 1.0, "rewards/chosen": -0.1657024621963501, "rewards/margins": 0.27561837434768677, "rewards/rejected": -0.44132086634635925, "step": 4116 }, { "epoch": 11.271731690622861, "grad_norm": 5.850159645080566, "learning_rate": 4.361643835616438e-07, "log_odds_chosen": 2.5415544509887695, "log_odds_ratio": -0.36037856340408325, "logits/chosen": 0.6812887191772461, "logits/rejected": 0.6583713293075562, "logps/chosen": -2.229617118835449, "logps/rejected": -4.641702651977539, "loss": 0.6765, "nll_loss": 0.640465259552002, "rewards/accuracies": 0.75, "rewards/chosen": -0.22296172380447388, "rewards/margins": 0.2412084937095642, "rewards/rejected": -0.4641702473163605, "step": 4117 }, { "epoch": 11.274469541409992, "grad_norm": 6.260986328125, "learning_rate": 4.3602739726027396e-07, "log_odds_chosen": 2.353599786758423, "log_odds_ratio": -0.3596268892288208, "logits/chosen": 0.8046802282333374, "logits/rejected": 0.8551316261291504, "logps/chosen": -2.0431225299835205, "logps/rejected": -4.287321090698242, "loss": 0.788, "nll_loss": 0.7520105242729187, "rewards/accuracies": 0.75, "rewards/chosen": -0.20431223511695862, "rewards/margins": 0.22441992163658142, "rewards/rejected": -0.42873215675354004, "step": 4118 }, { "epoch": 11.277207392197125, "grad_norm": 7.3147783279418945, "learning_rate": 4.358904109589041e-07, "log_odds_chosen": 1.5793871879577637, "log_odds_ratio": -0.4056055545806885, "logits/chosen": 0.8490079641342163, "logits/rejected": 0.8097684383392334, "logps/chosen": -3.1148533821105957, "logps/rejected": -4.627843856811523, "loss": 0.8752, "nll_loss": 0.8346505165100098, "rewards/accuracies": 0.875, "rewards/chosen": -0.3114853501319885, "rewards/margins": 0.15129896998405457, "rewards/rejected": -0.4627843499183655, "step": 4119 }, { "epoch": 11.279945242984258, "grad_norm": 6.975412368774414, "learning_rate": 4.357534246575342e-07, "log_odds_chosen": 1.1904287338256836, "log_odds_ratio": -0.48191478848457336, "logits/chosen": 0.6210780143737793, "logits/rejected": 0.6714064478874207, "logps/chosen": -2.7148125171661377, "logps/rejected": -3.856602668762207, "loss": 0.7711, "nll_loss": 0.7228971719741821, "rewards/accuracies": 0.625, "rewards/chosen": -0.2714812755584717, "rewards/margins": 0.11417898535728455, "rewards/rejected": -0.38566023111343384, "step": 4120 }, { "epoch": 11.28268309377139, "grad_norm": 5.290937900543213, "learning_rate": 4.3561643835616436e-07, "log_odds_chosen": 1.323840856552124, "log_odds_ratio": -0.31213268637657166, "logits/chosen": 1.0825728178024292, "logits/rejected": 1.0443445444107056, "logps/chosen": -2.471747875213623, "logps/rejected": -3.68906831741333, "loss": 0.7314, "nll_loss": 0.7001755237579346, "rewards/accuracies": 0.75, "rewards/chosen": -0.24717479944229126, "rewards/margins": 0.12173202633857727, "rewards/rejected": -0.36890679597854614, "step": 4121 }, { "epoch": 11.285420944558522, "grad_norm": 5.528653144836426, "learning_rate": 4.354794520547945e-07, "log_odds_chosen": 3.4005162715911865, "log_odds_ratio": -0.11787296831607819, "logits/chosen": 0.9177443981170654, "logits/rejected": 0.9532569646835327, "logps/chosen": -2.5183022022247314, "logps/rejected": -5.799997329711914, "loss": 0.716, "nll_loss": 0.7042310237884521, "rewards/accuracies": 1.0, "rewards/chosen": -0.25183022022247314, "rewards/margins": 0.32816949486732483, "rewards/rejected": -0.5799996852874756, "step": 4122 }, { "epoch": 11.288158795345653, "grad_norm": 5.153231620788574, "learning_rate": 4.3534246575342466e-07, "log_odds_chosen": 1.5460634231567383, "log_odds_ratio": -0.21542945504188538, "logits/chosen": 0.7620381116867065, "logits/rejected": 0.7544613480567932, "logps/chosen": -1.8847557306289673, "logps/rejected": -3.257798671722412, "loss": 0.74, "nll_loss": 0.7184078693389893, "rewards/accuracies": 1.0, "rewards/chosen": -0.1884755641222, "rewards/margins": 0.13730430603027344, "rewards/rejected": -0.32577985525131226, "step": 4123 }, { "epoch": 11.290896646132786, "grad_norm": 5.610439300537109, "learning_rate": 4.3520547945205476e-07, "log_odds_chosen": 2.0180230140686035, "log_odds_ratio": -0.3067455589771271, "logits/chosen": 1.0279407501220703, "logits/rejected": 1.0489399433135986, "logps/chosen": -2.2781167030334473, "logps/rejected": -4.165692329406738, "loss": 0.8619, "nll_loss": 0.831242024898529, "rewards/accuracies": 0.875, "rewards/chosen": -0.22781164944171906, "rewards/margins": 0.1887575387954712, "rewards/rejected": -0.41656920313835144, "step": 4124 }, { "epoch": 11.293634496919918, "grad_norm": 5.471108436584473, "learning_rate": 4.350684931506849e-07, "log_odds_chosen": 1.7081371545791626, "log_odds_ratio": -0.20275551080703735, "logits/chosen": 0.962497889995575, "logits/rejected": 0.9851160645484924, "logps/chosen": -1.969130039215088, "logps/rejected": -3.515577793121338, "loss": 0.6546, "nll_loss": 0.634364664554596, "rewards/accuracies": 1.0, "rewards/chosen": -0.1969130039215088, "rewards/margins": 0.15464478731155396, "rewards/rejected": -0.35155779123306274, "step": 4125 }, { "epoch": 11.29637234770705, "grad_norm": 5.129861354827881, "learning_rate": 4.3493150684931507e-07, "log_odds_chosen": 0.8270021080970764, "log_odds_ratio": -0.44343698024749756, "logits/chosen": 0.8344770669937134, "logits/rejected": 0.8790014982223511, "logps/chosen": -2.0257039070129395, "logps/rejected": -2.738956928253174, "loss": 0.6753, "nll_loss": 0.6309327483177185, "rewards/accuracies": 0.875, "rewards/chosen": -0.202570378780365, "rewards/margins": 0.07132529467344284, "rewards/rejected": -0.2738956809043884, "step": 4126 }, { "epoch": 11.299110198494182, "grad_norm": 7.679332256317139, "learning_rate": 4.3479452054794517e-07, "log_odds_chosen": 1.6215639114379883, "log_odds_ratio": -0.5088881850242615, "logits/chosen": 0.9468001127243042, "logits/rejected": 0.819391131401062, "logps/chosen": -2.6416563987731934, "logps/rejected": -4.142771244049072, "loss": 0.8137, "nll_loss": 0.7628260850906372, "rewards/accuracies": 0.875, "rewards/chosen": -0.26416563987731934, "rewards/margins": 0.15011149644851685, "rewards/rejected": -0.4142771363258362, "step": 4127 }, { "epoch": 11.301848049281315, "grad_norm": 5.080315589904785, "learning_rate": 4.346575342465753e-07, "log_odds_chosen": 2.0406458377838135, "log_odds_ratio": -0.28270015120506287, "logits/chosen": 0.9503087997436523, "logits/rejected": 0.8940852284431458, "logps/chosen": -1.8478367328643799, "logps/rejected": -3.7559807300567627, "loss": 0.7317, "nll_loss": 0.7033879160881042, "rewards/accuracies": 0.875, "rewards/chosen": -0.18478365242481232, "rewards/margins": 0.19081440567970276, "rewards/rejected": -0.37559807300567627, "step": 4128 }, { "epoch": 11.304585900068446, "grad_norm": 5.540757179260254, "learning_rate": 4.3452054794520547e-07, "log_odds_chosen": 1.187849998474121, "log_odds_ratio": -0.38081789016723633, "logits/chosen": 0.8150481581687927, "logits/rejected": 0.736788272857666, "logps/chosen": -1.707590937614441, "logps/rejected": -2.7656216621398926, "loss": 0.7238, "nll_loss": 0.6857572793960571, "rewards/accuracies": 1.0, "rewards/chosen": -0.17075908184051514, "rewards/margins": 0.10580310225486755, "rewards/rejected": -0.2765622138977051, "step": 4129 }, { "epoch": 11.307323750855579, "grad_norm": 7.723756790161133, "learning_rate": 4.343835616438356e-07, "log_odds_chosen": 1.0792732238769531, "log_odds_ratio": -0.6700241565704346, "logits/chosen": 0.537495493888855, "logits/rejected": 0.48131388425827026, "logps/chosen": -2.710683822631836, "logps/rejected": -3.681614875793457, "loss": 0.7464, "nll_loss": 0.6793826818466187, "rewards/accuracies": 0.75, "rewards/chosen": -0.27106839418411255, "rewards/margins": 0.09709307551383972, "rewards/rejected": -0.36816149950027466, "step": 4130 }, { "epoch": 11.31006160164271, "grad_norm": 6.585747241973877, "learning_rate": 4.342465753424657e-07, "log_odds_chosen": 1.466538906097412, "log_odds_ratio": -0.33595171570777893, "logits/chosen": 0.8825472593307495, "logits/rejected": 0.9270937442779541, "logps/chosen": -3.437502384185791, "logps/rejected": -4.865209102630615, "loss": 0.8346, "nll_loss": 0.8009885549545288, "rewards/accuracies": 0.875, "rewards/chosen": -0.3437502384185791, "rewards/margins": 0.14277061820030212, "rewards/rejected": -0.4865208864212036, "step": 4131 }, { "epoch": 11.312799452429843, "grad_norm": 5.15083646774292, "learning_rate": 4.3410958904109587e-07, "log_odds_chosen": 1.2301310300827026, "log_odds_ratio": -0.3223426342010498, "logits/chosen": 0.7866281270980835, "logits/rejected": 0.7881811857223511, "logps/chosen": -1.7850805521011353, "logps/rejected": -2.879429578781128, "loss": 0.6154, "nll_loss": 0.5831712484359741, "rewards/accuracies": 0.875, "rewards/chosen": -0.17850804328918457, "rewards/margins": 0.10943491756916046, "rewards/rejected": -0.28794294595718384, "step": 4132 }, { "epoch": 11.315537303216974, "grad_norm": 5.329872131347656, "learning_rate": 4.33972602739726e-07, "log_odds_chosen": 1.0231436491012573, "log_odds_ratio": -0.40013182163238525, "logits/chosen": 0.7464368343353271, "logits/rejected": 0.6970140933990479, "logps/chosen": -1.971545696258545, "logps/rejected": -2.873727798461914, "loss": 0.6213, "nll_loss": 0.5812432169914246, "rewards/accuracies": 0.875, "rewards/chosen": -0.19715458154678345, "rewards/margins": 0.0902182012796402, "rewards/rejected": -0.28737279772758484, "step": 4133 }, { "epoch": 11.318275154004107, "grad_norm": 4.530228614807129, "learning_rate": 4.338356164383561e-07, "log_odds_chosen": 2.793813705444336, "log_odds_ratio": -0.25847700238227844, "logits/chosen": 0.8865021467208862, "logits/rejected": 0.8661441206932068, "logps/chosen": -1.6822000741958618, "logps/rejected": -4.325239181518555, "loss": 0.6886, "nll_loss": 0.6627780795097351, "rewards/accuracies": 1.0, "rewards/chosen": -0.16822001338005066, "rewards/margins": 0.26430392265319824, "rewards/rejected": -0.4325239062309265, "step": 4134 }, { "epoch": 11.321013004791238, "grad_norm": 4.488168716430664, "learning_rate": 4.336986301369863e-07, "log_odds_chosen": 3.1376943588256836, "log_odds_ratio": -0.1806412637233734, "logits/chosen": 0.8440762758255005, "logits/rejected": 0.7939333319664001, "logps/chosen": -1.804081916809082, "logps/rejected": -4.784468650817871, "loss": 0.6989, "nll_loss": 0.6808649301528931, "rewards/accuracies": 1.0, "rewards/chosen": -0.18040819466114044, "rewards/margins": 0.2980387210845947, "rewards/rejected": -0.478446900844574, "step": 4135 }, { "epoch": 11.323750855578371, "grad_norm": 5.23898458480835, "learning_rate": 4.3356164383561643e-07, "log_odds_chosen": 1.205916404724121, "log_odds_ratio": -0.5257652401924133, "logits/chosen": 0.6124402284622192, "logits/rejected": 0.6492919325828552, "logps/chosen": -2.2964115142822266, "logps/rejected": -3.3427014350891113, "loss": 0.7072, "nll_loss": 0.6546124219894409, "rewards/accuracies": 0.875, "rewards/chosen": -0.2296411693096161, "rewards/margins": 0.10462895035743713, "rewards/rejected": -0.3342701196670532, "step": 4136 }, { "epoch": 11.326488706365502, "grad_norm": 6.318584442138672, "learning_rate": 4.334246575342466e-07, "log_odds_chosen": 2.168370485305786, "log_odds_ratio": -0.3412255346775055, "logits/chosen": 0.8085985779762268, "logits/rejected": 0.8139238953590393, "logps/chosen": -2.242217540740967, "logps/rejected": -4.271852970123291, "loss": 0.7043, "nll_loss": 0.6702231168746948, "rewards/accuracies": 0.875, "rewards/chosen": -0.22422178089618683, "rewards/margins": 0.20296351611614227, "rewards/rejected": -0.4271852970123291, "step": 4137 }, { "epoch": 11.329226557152635, "grad_norm": 9.275400161743164, "learning_rate": 4.332876712328767e-07, "log_odds_chosen": 2.496938467025757, "log_odds_ratio": -0.3781556785106659, "logits/chosen": 0.6459828019142151, "logits/rejected": 0.5830446481704712, "logps/chosen": -2.4947874546051025, "logps/rejected": -4.906124114990234, "loss": 0.8583, "nll_loss": 0.8204778432846069, "rewards/accuracies": 0.875, "rewards/chosen": -0.24947874248027802, "rewards/margins": 0.2411336749792099, "rewards/rejected": -0.4906124472618103, "step": 4138 }, { "epoch": 11.331964407939767, "grad_norm": 5.65068244934082, "learning_rate": 4.331506849315068e-07, "log_odds_chosen": 2.4794464111328125, "log_odds_ratio": -0.4005557596683502, "logits/chosen": 0.8850659132003784, "logits/rejected": 0.8857777118682861, "logps/chosen": -2.7445483207702637, "logps/rejected": -5.1672515869140625, "loss": 0.8465, "nll_loss": 0.806421160697937, "rewards/accuracies": 0.875, "rewards/chosen": -0.27445486187934875, "rewards/margins": 0.2422703355550766, "rewards/rejected": -0.5167251825332642, "step": 4139 }, { "epoch": 11.3347022587269, "grad_norm": 6.037280082702637, "learning_rate": 4.33013698630137e-07, "log_odds_chosen": 0.9479562044143677, "log_odds_ratio": -0.4285385012626648, "logits/chosen": 0.8326865434646606, "logits/rejected": 0.8420983552932739, "logps/chosen": -1.9880859851837158, "logps/rejected": -2.8335728645324707, "loss": 0.6657, "nll_loss": 0.622804582118988, "rewards/accuracies": 0.875, "rewards/chosen": -0.19880861043930054, "rewards/margins": 0.08454868942499161, "rewards/rejected": -0.28335729241371155, "step": 4140 }, { "epoch": 11.33744010951403, "grad_norm": 10.302617073059082, "learning_rate": 4.328767123287671e-07, "log_odds_chosen": 1.9159033298492432, "log_odds_ratio": -0.5721182823181152, "logits/chosen": 0.9365935921669006, "logits/rejected": 0.9782843589782715, "logps/chosen": -3.0809576511383057, "logps/rejected": -4.855117321014404, "loss": 0.7925, "nll_loss": 0.7352566719055176, "rewards/accuracies": 0.875, "rewards/chosen": -0.3080957531929016, "rewards/margins": 0.17741598188877106, "rewards/rejected": -0.48551174998283386, "step": 4141 }, { "epoch": 11.340177960301164, "grad_norm": 4.9520087242126465, "learning_rate": 4.3273972602739724e-07, "log_odds_chosen": 3.2294600009918213, "log_odds_ratio": -0.13397230207920074, "logits/chosen": 1.0947177410125732, "logits/rejected": 1.1199877262115479, "logps/chosen": -2.1474361419677734, "logps/rejected": -5.237429618835449, "loss": 0.7452, "nll_loss": 0.7317763566970825, "rewards/accuracies": 1.0, "rewards/chosen": -0.21474361419677734, "rewards/margins": 0.3089994192123413, "rewards/rejected": -0.5237430334091187, "step": 4142 }, { "epoch": 11.342915811088295, "grad_norm": 4.599111557006836, "learning_rate": 4.326027397260274e-07, "log_odds_chosen": 1.68867027759552, "log_odds_ratio": -0.2810162603855133, "logits/chosen": 0.8536033630371094, "logits/rejected": 0.8863868117332458, "logps/chosen": -2.047295093536377, "logps/rejected": -3.6410560607910156, "loss": 0.6127, "nll_loss": 0.5845780968666077, "rewards/accuracies": 1.0, "rewards/chosen": -0.20472949743270874, "rewards/margins": 0.1593761295080185, "rewards/rejected": -0.3641056418418884, "step": 4143 }, { "epoch": 11.345653661875428, "grad_norm": 5.844115734100342, "learning_rate": 4.3246575342465754e-07, "log_odds_chosen": 2.0259101390838623, "log_odds_ratio": -0.22641493380069733, "logits/chosen": 0.8238490223884583, "logits/rejected": 0.8516658544540405, "logps/chosen": -2.6353111267089844, "logps/rejected": -4.586258411407471, "loss": 0.9083, "nll_loss": 0.8856419324874878, "rewards/accuracies": 1.0, "rewards/chosen": -0.2635310888290405, "rewards/margins": 0.1950947344303131, "rewards/rejected": -0.45862582325935364, "step": 4144 }, { "epoch": 11.34839151266256, "grad_norm": 4.696376323699951, "learning_rate": 4.3232876712328764e-07, "log_odds_chosen": 2.3640809059143066, "log_odds_ratio": -0.19952180981636047, "logits/chosen": 0.7596566677093506, "logits/rejected": 0.7691672444343567, "logps/chosen": -1.704677939414978, "logps/rejected": -3.9037973880767822, "loss": 0.7164, "nll_loss": 0.6964868903160095, "rewards/accuracies": 1.0, "rewards/chosen": -0.1704677939414978, "rewards/margins": 0.21991196274757385, "rewards/rejected": -0.39037978649139404, "step": 4145 }, { "epoch": 11.351129363449692, "grad_norm": 5.701857566833496, "learning_rate": 4.3219178082191774e-07, "log_odds_chosen": 1.0009045600891113, "log_odds_ratio": -0.5799381732940674, "logits/chosen": 0.6272521018981934, "logits/rejected": 0.6601915955543518, "logps/chosen": -2.115295171737671, "logps/rejected": -3.0556817054748535, "loss": 0.6558, "nll_loss": 0.5977984666824341, "rewards/accuracies": 0.75, "rewards/chosen": -0.21152952313423157, "rewards/margins": 0.09403865039348602, "rewards/rejected": -0.3055681586265564, "step": 4146 }, { "epoch": 11.353867214236825, "grad_norm": 8.146645545959473, "learning_rate": 4.3205479452054794e-07, "log_odds_chosen": 2.534339666366577, "log_odds_ratio": -0.36386823654174805, "logits/chosen": 0.8535259366035461, "logits/rejected": 0.8603449463844299, "logps/chosen": -3.104309558868408, "logps/rejected": -5.544107437133789, "loss": 0.8637, "nll_loss": 0.8273300528526306, "rewards/accuracies": 0.75, "rewards/chosen": -0.31043097376823425, "rewards/margins": 0.24397975206375122, "rewards/rejected": -0.5544107556343079, "step": 4147 }, { "epoch": 11.356605065023956, "grad_norm": 5.472711563110352, "learning_rate": 4.3191780821917804e-07, "log_odds_chosen": 1.808319091796875, "log_odds_ratio": -0.2516956329345703, "logits/chosen": 1.0334599018096924, "logits/rejected": 1.1284098625183105, "logps/chosen": -3.0119385719299316, "logps/rejected": -4.765842437744141, "loss": 0.7424, "nll_loss": 0.7172688245773315, "rewards/accuracies": 0.875, "rewards/chosen": -0.30119383335113525, "rewards/margins": 0.17539039254188538, "rewards/rejected": -0.476584255695343, "step": 4148 }, { "epoch": 11.359342915811089, "grad_norm": 5.003692626953125, "learning_rate": 4.317808219178082e-07, "log_odds_chosen": 1.091640830039978, "log_odds_ratio": -0.4123145639896393, "logits/chosen": 0.6112803220748901, "logits/rejected": 0.6589793562889099, "logps/chosen": -2.274620532989502, "logps/rejected": -3.2963919639587402, "loss": 0.738, "nll_loss": 0.696770191192627, "rewards/accuracies": 0.75, "rewards/chosen": -0.2274620682001114, "rewards/margins": 0.10217711329460144, "rewards/rejected": -0.329639196395874, "step": 4149 }, { "epoch": 11.36208076659822, "grad_norm": 8.908269882202148, "learning_rate": 4.3164383561643835e-07, "log_odds_chosen": 1.0482739210128784, "log_odds_ratio": -0.7775372266769409, "logits/chosen": 0.7866319417953491, "logits/rejected": 0.7446415424346924, "logps/chosen": -2.898883581161499, "logps/rejected": -3.8812952041625977, "loss": 0.8173, "nll_loss": 0.7395681738853455, "rewards/accuracies": 0.75, "rewards/chosen": -0.2898883521556854, "rewards/margins": 0.09824115037918091, "rewards/rejected": -0.38812947273254395, "step": 4150 }, { "epoch": 11.364818617385353, "grad_norm": 5.247653961181641, "learning_rate": 4.315068493150685e-07, "log_odds_chosen": 1.2368748188018799, "log_odds_ratio": -0.36723142862319946, "logits/chosen": 1.2056410312652588, "logits/rejected": 1.2411993741989136, "logps/chosen": -2.0833232402801514, "logps/rejected": -3.192458152770996, "loss": 0.5946, "nll_loss": 0.5578968524932861, "rewards/accuracies": 0.75, "rewards/chosen": -0.20833232998847961, "rewards/margins": 0.1109134703874588, "rewards/rejected": -0.3192458152770996, "step": 4151 }, { "epoch": 11.367556468172484, "grad_norm": 5.117336273193359, "learning_rate": 4.313698630136986e-07, "log_odds_chosen": 2.5004653930664062, "log_odds_ratio": -0.15206119418144226, "logits/chosen": 0.9611468315124512, "logits/rejected": 0.9463410377502441, "logps/chosen": -2.2064051628112793, "logps/rejected": -4.535666465759277, "loss": 0.7246, "nll_loss": 0.7093746066093445, "rewards/accuracies": 1.0, "rewards/chosen": -0.22064052522182465, "rewards/margins": 0.2329261302947998, "rewards/rejected": -0.45356664061546326, "step": 4152 }, { "epoch": 11.370294318959617, "grad_norm": 5.072398662567139, "learning_rate": 4.3123287671232875e-07, "log_odds_chosen": 1.8398971557617188, "log_odds_ratio": -0.3464289903640747, "logits/chosen": 0.7520284652709961, "logits/rejected": 0.681349515914917, "logps/chosen": -1.681208610534668, "logps/rejected": -3.381678342819214, "loss": 0.6564, "nll_loss": 0.6217125654220581, "rewards/accuracies": 0.875, "rewards/chosen": -0.1681208610534668, "rewards/margins": 0.17004697024822235, "rewards/rejected": -0.33816784620285034, "step": 4153 }, { "epoch": 11.373032169746748, "grad_norm": 5.657675743103027, "learning_rate": 4.310958904109589e-07, "log_odds_chosen": 0.684730589389801, "log_odds_ratio": -0.4512074589729309, "logits/chosen": 0.8083256483078003, "logits/rejected": 0.7528858184814453, "logps/chosen": -2.82995867729187, "logps/rejected": -3.4588868618011475, "loss": 0.7556, "nll_loss": 0.7104452848434448, "rewards/accuracies": 0.875, "rewards/chosen": -0.28299587965011597, "rewards/margins": 0.06289278715848923, "rewards/rejected": -0.3458886742591858, "step": 4154 }, { "epoch": 11.375770020533881, "grad_norm": 6.668385028839111, "learning_rate": 4.30958904109589e-07, "log_odds_chosen": 0.9700694680213928, "log_odds_ratio": -0.6513199806213379, "logits/chosen": 0.9552745819091797, "logits/rejected": 0.9277313351631165, "logps/chosen": -2.562966823577881, "logps/rejected": -3.507983446121216, "loss": 0.8769, "nll_loss": 0.8117889761924744, "rewards/accuracies": 0.625, "rewards/chosen": -0.25629669427871704, "rewards/margins": 0.09450166672468185, "rewards/rejected": -0.3507983684539795, "step": 4155 }, { "epoch": 11.378507871321013, "grad_norm": 5.097964286804199, "learning_rate": 4.3082191780821915e-07, "log_odds_chosen": 1.5978254079818726, "log_odds_ratio": -0.2834489941596985, "logits/chosen": 0.8338550329208374, "logits/rejected": 0.8944956064224243, "logps/chosen": -2.3825929164886475, "logps/rejected": -3.882751226425171, "loss": 0.6908, "nll_loss": 0.6624755859375, "rewards/accuracies": 1.0, "rewards/chosen": -0.23825928568840027, "rewards/margins": 0.15001583099365234, "rewards/rejected": -0.3882751166820526, "step": 4156 }, { "epoch": 11.381245722108146, "grad_norm": 5.864412784576416, "learning_rate": 4.306849315068493e-07, "log_odds_chosen": 1.616126537322998, "log_odds_ratio": -0.35946232080459595, "logits/chosen": 0.8865209221839905, "logits/rejected": 0.9370605945587158, "logps/chosen": -3.0737338066101074, "logps/rejected": -4.656307697296143, "loss": 0.8416, "nll_loss": 0.8056124448776245, "rewards/accuracies": 0.875, "rewards/chosen": -0.30737340450286865, "rewards/margins": 0.158257395029068, "rewards/rejected": -0.46563076972961426, "step": 4157 }, { "epoch": 11.383983572895277, "grad_norm": 4.465457439422607, "learning_rate": 4.3054794520547946e-07, "log_odds_chosen": 1.785872459411621, "log_odds_ratio": -0.333838552236557, "logits/chosen": 0.844721257686615, "logits/rejected": 0.8435714244842529, "logps/chosen": -2.028200626373291, "logps/rejected": -3.7306618690490723, "loss": 0.6907, "nll_loss": 0.6573137044906616, "rewards/accuracies": 1.0, "rewards/chosen": -0.2028200626373291, "rewards/margins": 0.17024612426757812, "rewards/rejected": -0.3730662167072296, "step": 4158 }, { "epoch": 11.38672142368241, "grad_norm": 5.999178409576416, "learning_rate": 4.3041095890410956e-07, "log_odds_chosen": 1.4104808568954468, "log_odds_ratio": -0.49634554982185364, "logits/chosen": 0.8344058990478516, "logits/rejected": 0.7788296937942505, "logps/chosen": -2.6661596298217773, "logps/rejected": -4.019235134124756, "loss": 0.7355, "nll_loss": 0.6858657598495483, "rewards/accuracies": 0.75, "rewards/chosen": -0.26661595702171326, "rewards/margins": 0.13530755043029785, "rewards/rejected": -0.4019235372543335, "step": 4159 }, { "epoch": 11.38945927446954, "grad_norm": 5.325863361358643, "learning_rate": 4.302739726027397e-07, "log_odds_chosen": 2.4290659427642822, "log_odds_ratio": -0.4151257574558258, "logits/chosen": 1.0906955003738403, "logits/rejected": 1.1445651054382324, "logps/chosen": -2.9884579181671143, "logps/rejected": -5.319916248321533, "loss": 0.7613, "nll_loss": 0.7198266983032227, "rewards/accuracies": 0.875, "rewards/chosen": -0.2988457679748535, "rewards/margins": 0.2331458181142807, "rewards/rejected": -0.5319916009902954, "step": 4160 }, { "epoch": 11.392197125256674, "grad_norm": 4.489995002746582, "learning_rate": 4.3013698630136986e-07, "log_odds_chosen": 1.790644645690918, "log_odds_ratio": -0.2845270037651062, "logits/chosen": 0.7099664211273193, "logits/rejected": 0.7215470671653748, "logps/chosen": -1.9745218753814697, "logps/rejected": -3.6124820709228516, "loss": 0.7045, "nll_loss": 0.6760014295578003, "rewards/accuracies": 0.875, "rewards/chosen": -0.19745220243930817, "rewards/margins": 0.16379600763320923, "rewards/rejected": -0.3612482249736786, "step": 4161 }, { "epoch": 11.394934976043805, "grad_norm": 5.036065578460693, "learning_rate": 4.2999999999999996e-07, "log_odds_chosen": 2.8175101280212402, "log_odds_ratio": -0.2594153881072998, "logits/chosen": 0.8313668370246887, "logits/rejected": 0.8907945156097412, "logps/chosen": -2.3950953483581543, "logps/rejected": -5.128050327301025, "loss": 0.7654, "nll_loss": 0.7394717335700989, "rewards/accuracies": 1.0, "rewards/chosen": -0.23950953781604767, "rewards/margins": 0.27329549193382263, "rewards/rejected": -0.5128050446510315, "step": 4162 }, { "epoch": 11.397672826830938, "grad_norm": 4.922914505004883, "learning_rate": 4.298630136986301e-07, "log_odds_chosen": 2.574153423309326, "log_odds_ratio": -0.18469588458538055, "logits/chosen": 0.9389961957931519, "logits/rejected": 0.9798321723937988, "logps/chosen": -2.357222318649292, "logps/rejected": -4.844210624694824, "loss": 0.6756, "nll_loss": 0.6570917367935181, "rewards/accuracies": 0.875, "rewards/chosen": -0.23572222888469696, "rewards/margins": 0.24869883060455322, "rewards/rejected": -0.4844210743904114, "step": 4163 }, { "epoch": 11.40041067761807, "grad_norm": 4.286042213439941, "learning_rate": 4.2972602739726026e-07, "log_odds_chosen": 2.153926372528076, "log_odds_ratio": -0.16747376322746277, "logits/chosen": 0.9326255917549133, "logits/rejected": 0.9599586129188538, "logps/chosen": -2.1888163089752197, "logps/rejected": -4.167830467224121, "loss": 0.6369, "nll_loss": 0.6201295256614685, "rewards/accuracies": 1.0, "rewards/chosen": -0.21888163685798645, "rewards/margins": 0.1979014128446579, "rewards/rejected": -0.41678309440612793, "step": 4164 }, { "epoch": 11.403148528405202, "grad_norm": 5.588239669799805, "learning_rate": 4.295890410958904e-07, "log_odds_chosen": 1.9606363773345947, "log_odds_ratio": -0.2799322009086609, "logits/chosen": 1.0123130083084106, "logits/rejected": 1.001293659210205, "logps/chosen": -2.8685121536254883, "logps/rejected": -4.764941215515137, "loss": 0.8222, "nll_loss": 0.7942034006118774, "rewards/accuracies": 1.0, "rewards/chosen": -0.2868512272834778, "rewards/margins": 0.18964287638664246, "rewards/rejected": -0.4764941334724426, "step": 4165 }, { "epoch": 11.405886379192333, "grad_norm": 5.196492671966553, "learning_rate": 4.294520547945205e-07, "log_odds_chosen": 2.2268009185791016, "log_odds_ratio": -0.20679470896720886, "logits/chosen": 0.899258017539978, "logits/rejected": 0.9785470962524414, "logps/chosen": -2.6089088916778564, "logps/rejected": -4.717667579650879, "loss": 0.6284, "nll_loss": 0.6077473163604736, "rewards/accuracies": 0.875, "rewards/chosen": -0.2608909010887146, "rewards/margins": 0.21087588369846344, "rewards/rejected": -0.47176679968833923, "step": 4166 }, { "epoch": 11.408624229979466, "grad_norm": 5.884859085083008, "learning_rate": 4.2931506849315067e-07, "log_odds_chosen": 1.8423399925231934, "log_odds_ratio": -0.23222193121910095, "logits/chosen": 0.8284726142883301, "logits/rejected": 0.8505829572677612, "logps/chosen": -2.887392997741699, "logps/rejected": -4.63355016708374, "loss": 0.7039, "nll_loss": 0.6806429624557495, "rewards/accuracies": 1.0, "rewards/chosen": -0.28873929381370544, "rewards/margins": 0.17461571097373962, "rewards/rejected": -0.46335500478744507, "step": 4167 }, { "epoch": 11.411362080766597, "grad_norm": 5.378640174865723, "learning_rate": 4.291780821917808e-07, "log_odds_chosen": 2.8159103393554688, "log_odds_ratio": -0.43400076031684875, "logits/chosen": 0.8018643260002136, "logits/rejected": 0.8915657997131348, "logps/chosen": -2.921433925628662, "logps/rejected": -5.6494269371032715, "loss": 0.6386, "nll_loss": 0.5952179431915283, "rewards/accuracies": 0.875, "rewards/chosen": -0.29214343428611755, "rewards/margins": 0.2727992832660675, "rewards/rejected": -0.5649427175521851, "step": 4168 }, { "epoch": 11.41409993155373, "grad_norm": 5.856703281402588, "learning_rate": 4.290410958904109e-07, "log_odds_chosen": 1.9151611328125, "log_odds_ratio": -0.3239479660987854, "logits/chosen": 0.8885989785194397, "logits/rejected": 0.9017975330352783, "logps/chosen": -2.3788280487060547, "logps/rejected": -4.174008369445801, "loss": 0.6684, "nll_loss": 0.636014997959137, "rewards/accuracies": 0.875, "rewards/chosen": -0.2378828227519989, "rewards/margins": 0.17951804399490356, "rewards/rejected": -0.41740086674690247, "step": 4169 }, { "epoch": 11.416837782340863, "grad_norm": 5.618472576141357, "learning_rate": 4.2890410958904107e-07, "log_odds_chosen": 1.5852850675582886, "log_odds_ratio": -0.3804627060890198, "logits/chosen": 0.759401798248291, "logits/rejected": 0.6761646270751953, "logps/chosen": -2.6106698513031006, "logps/rejected": -4.107143402099609, "loss": 0.7383, "nll_loss": 0.7002487182617188, "rewards/accuracies": 0.875, "rewards/chosen": -0.2610669732093811, "rewards/margins": 0.1496473252773285, "rewards/rejected": -0.410714328289032, "step": 4170 }, { "epoch": 11.419575633127995, "grad_norm": 5.2433061599731445, "learning_rate": 4.287671232876712e-07, "log_odds_chosen": 1.8722318410873413, "log_odds_ratio": -0.31920912861824036, "logits/chosen": 1.002724289894104, "logits/rejected": 1.0804972648620605, "logps/chosen": -2.6324892044067383, "logps/rejected": -4.4669365882873535, "loss": 0.7061, "nll_loss": 0.6741625070571899, "rewards/accuracies": 0.625, "rewards/chosen": -0.26324892044067383, "rewards/margins": 0.1834447830915451, "rewards/rejected": -0.4466937184333801, "step": 4171 }, { "epoch": 11.422313483915127, "grad_norm": 4.722171306610107, "learning_rate": 4.286301369863014e-07, "log_odds_chosen": 2.5354719161987305, "log_odds_ratio": -0.15097375214099884, "logits/chosen": 0.7739088535308838, "logits/rejected": 0.7967483997344971, "logps/chosen": -1.9399954080581665, "logps/rejected": -4.301580429077148, "loss": 0.6567, "nll_loss": 0.6416211128234863, "rewards/accuracies": 1.0, "rewards/chosen": -0.1939995437860489, "rewards/margins": 0.23615846037864685, "rewards/rejected": -0.43015801906585693, "step": 4172 }, { "epoch": 11.425051334702259, "grad_norm": 4.977502822875977, "learning_rate": 4.2849315068493147e-07, "log_odds_chosen": 2.201122283935547, "log_odds_ratio": -0.19152432680130005, "logits/chosen": 0.9016526341438293, "logits/rejected": 0.9487080574035645, "logps/chosen": -2.5464863777160645, "logps/rejected": -4.620969772338867, "loss": 0.6904, "nll_loss": 0.6712799072265625, "rewards/accuracies": 1.0, "rewards/chosen": -0.2546486556529999, "rewards/margins": 0.2074483036994934, "rewards/rejected": -0.4620969891548157, "step": 4173 }, { "epoch": 11.427789185489392, "grad_norm": 7.841995716094971, "learning_rate": 4.283561643835616e-07, "log_odds_chosen": 2.070866107940674, "log_odds_ratio": -0.5532954335212708, "logits/chosen": 0.6462544202804565, "logits/rejected": 0.6477595567703247, "logps/chosen": -2.845485210418701, "logps/rejected": -4.761120796203613, "loss": 0.7762, "nll_loss": 0.7208706736564636, "rewards/accuracies": 0.875, "rewards/chosen": -0.28454849123954773, "rewards/margins": 0.19156357645988464, "rewards/rejected": -0.4761120676994324, "step": 4174 }, { "epoch": 11.430527036276523, "grad_norm": 4.977473258972168, "learning_rate": 4.282191780821918e-07, "log_odds_chosen": 3.4581351280212402, "log_odds_ratio": -0.08318895846605301, "logits/chosen": 1.117736577987671, "logits/rejected": 1.195544958114624, "logps/chosen": -2.712188482284546, "logps/rejected": -6.102173805236816, "loss": 0.7756, "nll_loss": 0.7672612071037292, "rewards/accuracies": 1.0, "rewards/chosen": -0.27121883630752563, "rewards/margins": 0.3389984965324402, "rewards/rejected": -0.6102173328399658, "step": 4175 }, { "epoch": 11.433264887063656, "grad_norm": 8.936482429504395, "learning_rate": 4.280821917808219e-07, "log_odds_chosen": 1.8872177600860596, "log_odds_ratio": -0.4196532070636749, "logits/chosen": 0.8393316268920898, "logits/rejected": 0.871507465839386, "logps/chosen": -2.200070858001709, "logps/rejected": -3.935699462890625, "loss": 0.69, "nll_loss": 0.6480011940002441, "rewards/accuracies": 0.875, "rewards/chosen": -0.22000707685947418, "rewards/margins": 0.17356285452842712, "rewards/rejected": -0.3935699462890625, "step": 4176 }, { "epoch": 11.436002737850787, "grad_norm": 4.700973987579346, "learning_rate": 4.2794520547945203e-07, "log_odds_chosen": 3.8278820514678955, "log_odds_ratio": -0.16823852062225342, "logits/chosen": 0.9829096794128418, "logits/rejected": 0.9943866729736328, "logps/chosen": -1.7312798500061035, "logps/rejected": -5.334310054779053, "loss": 0.6166, "nll_loss": 0.5997416377067566, "rewards/accuracies": 0.875, "rewards/chosen": -0.17312797904014587, "rewards/margins": 0.36030298471450806, "rewards/rejected": -0.5334309339523315, "step": 4177 }, { "epoch": 11.43874058863792, "grad_norm": 4.653660297393799, "learning_rate": 4.278082191780822e-07, "log_odds_chosen": 2.237619400024414, "log_odds_ratio": -0.22929702699184418, "logits/chosen": 0.8288507461547852, "logits/rejected": 0.8727210760116577, "logps/chosen": -1.8022091388702393, "logps/rejected": -3.865187168121338, "loss": 0.6145, "nll_loss": 0.5915632247924805, "rewards/accuracies": 0.875, "rewards/chosen": -0.18022093176841736, "rewards/margins": 0.20629781484603882, "rewards/rejected": -0.3865187168121338, "step": 4178 }, { "epoch": 11.441478439425051, "grad_norm": 4.616252899169922, "learning_rate": 4.2767123287671233e-07, "log_odds_chosen": 1.8630014657974243, "log_odds_ratio": -0.3546295166015625, "logits/chosen": 0.9160969853401184, "logits/rejected": 0.9453529715538025, "logps/chosen": -2.144740343093872, "logps/rejected": -3.88179874420166, "loss": 0.7409, "nll_loss": 0.7054299116134644, "rewards/accuracies": 1.0, "rewards/chosen": -0.21447403728961945, "rewards/margins": 0.1737058311700821, "rewards/rejected": -0.38817986845970154, "step": 4179 }, { "epoch": 11.444216290212184, "grad_norm": 5.810783386230469, "learning_rate": 4.2753424657534243e-07, "log_odds_chosen": 1.7047855854034424, "log_odds_ratio": -0.2680479884147644, "logits/chosen": 0.9813494682312012, "logits/rejected": 0.869702160358429, "logps/chosen": -1.7131750583648682, "logps/rejected": -3.2405266761779785, "loss": 0.6749, "nll_loss": 0.6481081247329712, "rewards/accuracies": 1.0, "rewards/chosen": -0.17131748795509338, "rewards/margins": 0.15273517370224, "rewards/rejected": -0.3240526616573334, "step": 4180 }, { "epoch": 11.446954140999315, "grad_norm": 5.315943717956543, "learning_rate": 4.273972602739726e-07, "log_odds_chosen": 2.8399479389190674, "log_odds_ratio": -0.15712635219097137, "logits/chosen": 1.0693702697753906, "logits/rejected": 1.1453628540039062, "logps/chosen": -2.3523061275482178, "logps/rejected": -5.038189888000488, "loss": 0.6397, "nll_loss": 0.6239755749702454, "rewards/accuracies": 1.0, "rewards/chosen": -0.23523062467575073, "rewards/margins": 0.2685883641242981, "rewards/rejected": -0.5038189888000488, "step": 4181 }, { "epoch": 11.449691991786448, "grad_norm": 5.071568965911865, "learning_rate": 4.2726027397260274e-07, "log_odds_chosen": 1.903295636177063, "log_odds_ratio": -0.2825922667980194, "logits/chosen": 0.7542307376861572, "logits/rejected": 0.7553229928016663, "logps/chosen": -2.296799421310425, "logps/rejected": -4.117150783538818, "loss": 0.7132, "nll_loss": 0.684907078742981, "rewards/accuracies": 0.875, "rewards/chosen": -0.22967994213104248, "rewards/margins": 0.1820351481437683, "rewards/rejected": -0.4117150902748108, "step": 4182 }, { "epoch": 11.45242984257358, "grad_norm": 5.792971134185791, "learning_rate": 4.2712328767123284e-07, "log_odds_chosen": 0.9395634531974792, "log_odds_ratio": -0.40342825651168823, "logits/chosen": 0.843264102935791, "logits/rejected": 0.8278367519378662, "logps/chosen": -2.0239109992980957, "logps/rejected": -2.8851208686828613, "loss": 0.7586, "nll_loss": 0.718269407749176, "rewards/accuracies": 0.875, "rewards/chosen": -0.202391117811203, "rewards/margins": 0.08612095564603806, "rewards/rejected": -0.28851205110549927, "step": 4183 }, { "epoch": 11.455167693360712, "grad_norm": 5.911170959472656, "learning_rate": 4.26986301369863e-07, "log_odds_chosen": 2.598379135131836, "log_odds_ratio": -0.19967612624168396, "logits/chosen": 0.92451012134552, "logits/rejected": 1.0053426027297974, "logps/chosen": -2.2431414127349854, "logps/rejected": -4.746635913848877, "loss": 0.6109, "nll_loss": 0.590913712978363, "rewards/accuracies": 0.875, "rewards/chosen": -0.2243141531944275, "rewards/margins": 0.2503494620323181, "rewards/rejected": -0.4746636152267456, "step": 4184 }, { "epoch": 11.457905544147843, "grad_norm": 4.88873815536499, "learning_rate": 4.2684931506849314e-07, "log_odds_chosen": 1.1625392436981201, "log_odds_ratio": -0.48775714635849, "logits/chosen": 0.9238033294677734, "logits/rejected": 0.9551022052764893, "logps/chosen": -2.1426162719726562, "logps/rejected": -3.2374067306518555, "loss": 0.7549, "nll_loss": 0.7060955166816711, "rewards/accuracies": 0.75, "rewards/chosen": -0.21426163613796234, "rewards/margins": 0.10947903990745544, "rewards/rejected": -0.3237406611442566, "step": 4185 }, { "epoch": 11.460643394934976, "grad_norm": 6.990400314331055, "learning_rate": 4.2671232876712324e-07, "log_odds_chosen": 1.2808635234832764, "log_odds_ratio": -0.7319185733795166, "logits/chosen": 0.9275669455528259, "logits/rejected": 0.9746502041816711, "logps/chosen": -2.5707309246063232, "logps/rejected": -3.7388052940368652, "loss": 0.6673, "nll_loss": 0.5941396355628967, "rewards/accuracies": 0.75, "rewards/chosen": -0.2570731043815613, "rewards/margins": 0.11680743843317032, "rewards/rejected": -0.373880535364151, "step": 4186 }, { "epoch": 11.463381245722108, "grad_norm": 4.9337286949157715, "learning_rate": 4.265753424657534e-07, "log_odds_chosen": 2.9169058799743652, "log_odds_ratio": -0.28134995698928833, "logits/chosen": 0.8934395909309387, "logits/rejected": 0.8348393440246582, "logps/chosen": -2.466935634613037, "logps/rejected": -5.2746171951293945, "loss": 0.6719, "nll_loss": 0.6438055634498596, "rewards/accuracies": 0.75, "rewards/chosen": -0.24669356644153595, "rewards/margins": 0.28076812624931335, "rewards/rejected": -0.5274616479873657, "step": 4187 }, { "epoch": 11.46611909650924, "grad_norm": 3.925025224685669, "learning_rate": 4.2643835616438354e-07, "log_odds_chosen": 2.1156985759735107, "log_odds_ratio": -0.3075554668903351, "logits/chosen": 0.8293685913085938, "logits/rejected": 0.8274814486503601, "logps/chosen": -1.9795944690704346, "logps/rejected": -3.995323419570923, "loss": 0.7053, "nll_loss": 0.6745821833610535, "rewards/accuracies": 0.75, "rewards/chosen": -0.19795945286750793, "rewards/margins": 0.20157289505004883, "rewards/rejected": -0.3995323181152344, "step": 4188 }, { "epoch": 11.468856947296372, "grad_norm": 4.5415778160095215, "learning_rate": 4.263013698630137e-07, "log_odds_chosen": 2.0465805530548096, "log_odds_ratio": -0.2318452149629593, "logits/chosen": 0.8535338640213013, "logits/rejected": 0.8923671245574951, "logps/chosen": -1.5038423538208008, "logps/rejected": -3.3459630012512207, "loss": 0.626, "nll_loss": 0.6027884483337402, "rewards/accuracies": 1.0, "rewards/chosen": -0.15038421750068665, "rewards/margins": 0.18421204388141632, "rewards/rejected": -0.33459627628326416, "step": 4189 }, { "epoch": 11.471594798083505, "grad_norm": 4.2976531982421875, "learning_rate": 4.261643835616438e-07, "log_odds_chosen": 2.160059690475464, "log_odds_ratio": -0.34822505712509155, "logits/chosen": 1.112702488899231, "logits/rejected": 1.184126377105713, "logps/chosen": -2.1800272464752197, "logps/rejected": -4.212181568145752, "loss": 0.684, "nll_loss": 0.649193525314331, "rewards/accuracies": 0.875, "rewards/chosen": -0.21800273656845093, "rewards/margins": 0.20321540534496307, "rewards/rejected": -0.4212181270122528, "step": 4190 }, { "epoch": 11.474332648870636, "grad_norm": 5.564323425292969, "learning_rate": 4.26027397260274e-07, "log_odds_chosen": 1.9988229274749756, "log_odds_ratio": -0.5609735250473022, "logits/chosen": 0.813438892364502, "logits/rejected": 0.929709792137146, "logps/chosen": -2.2805800437927246, "logps/rejected": -4.188499450683594, "loss": 0.6362, "nll_loss": 0.5800578594207764, "rewards/accuracies": 0.75, "rewards/chosen": -0.22805801033973694, "rewards/margins": 0.19079196453094482, "rewards/rejected": -0.4188499450683594, "step": 4191 }, { "epoch": 11.477070499657769, "grad_norm": 5.038348197937012, "learning_rate": 4.258904109589041e-07, "log_odds_chosen": 1.9224201440811157, "log_odds_ratio": -0.29890745878219604, "logits/chosen": 0.9657331705093384, "logits/rejected": 1.053572416305542, "logps/chosen": -2.243210792541504, "logps/rejected": -4.020902633666992, "loss": 0.6281, "nll_loss": 0.5982301235198975, "rewards/accuracies": 0.875, "rewards/chosen": -0.22432108223438263, "rewards/margins": 0.17776918411254883, "rewards/rejected": -0.40209025144577026, "step": 4192 }, { "epoch": 11.4798083504449, "grad_norm": 5.621075630187988, "learning_rate": 4.257534246575342e-07, "log_odds_chosen": 3.0688693523406982, "log_odds_ratio": -0.13367025554180145, "logits/chosen": 0.7637355327606201, "logits/rejected": 0.8537671566009521, "logps/chosen": -1.9522088766098022, "logps/rejected": -4.85533332824707, "loss": 0.7594, "nll_loss": 0.7460487484931946, "rewards/accuracies": 1.0, "rewards/chosen": -0.19522088766098022, "rewards/margins": 0.2903124690055847, "rewards/rejected": -0.48553335666656494, "step": 4193 }, { "epoch": 11.482546201232033, "grad_norm": 4.529960632324219, "learning_rate": 4.2561643835616435e-07, "log_odds_chosen": 2.2810771465301514, "log_odds_ratio": -0.29873424768447876, "logits/chosen": 1.1629921197891235, "logits/rejected": 1.1639204025268555, "logps/chosen": -1.8332960605621338, "logps/rejected": -4.010313034057617, "loss": 0.6662, "nll_loss": 0.6363124847412109, "rewards/accuracies": 0.875, "rewards/chosen": -0.18332959711551666, "rewards/margins": 0.21770168840885162, "rewards/rejected": -0.4010312855243683, "step": 4194 }, { "epoch": 11.485284052019164, "grad_norm": 5.523836612701416, "learning_rate": 4.254794520547945e-07, "log_odds_chosen": 1.9946677684783936, "log_odds_ratio": -0.24193993210792542, "logits/chosen": 0.8038894534111023, "logits/rejected": 0.9079301953315735, "logps/chosen": -2.7615432739257812, "logps/rejected": -4.6974945068359375, "loss": 0.8164, "nll_loss": 0.7922163009643555, "rewards/accuracies": 1.0, "rewards/chosen": -0.2761543393135071, "rewards/margins": 0.19359512627124786, "rewards/rejected": -0.46974945068359375, "step": 4195 }, { "epoch": 11.488021902806297, "grad_norm": 6.296908378601074, "learning_rate": 4.2534246575342465e-07, "log_odds_chosen": 1.562160611152649, "log_odds_ratio": -0.326677143573761, "logits/chosen": 0.910452127456665, "logits/rejected": 0.9663007259368896, "logps/chosen": -2.32126522064209, "logps/rejected": -3.765899896621704, "loss": 0.6947, "nll_loss": 0.662079393863678, "rewards/accuracies": 0.875, "rewards/chosen": -0.23212651908397675, "rewards/margins": 0.1444634646177292, "rewards/rejected": -0.37658998370170593, "step": 4196 }, { "epoch": 11.49075975359343, "grad_norm": 5.181819915771484, "learning_rate": 4.2520547945205475e-07, "log_odds_chosen": 1.7007899284362793, "log_odds_ratio": -0.28803014755249023, "logits/chosen": 0.6157097816467285, "logits/rejected": 0.5665507316589355, "logps/chosen": -2.0860564708709717, "logps/rejected": -3.674272060394287, "loss": 0.7837, "nll_loss": 0.7548924684524536, "rewards/accuracies": 0.875, "rewards/chosen": -0.20860564708709717, "rewards/margins": 0.15882156789302826, "rewards/rejected": -0.3674272298812866, "step": 4197 }, { "epoch": 11.493497604380561, "grad_norm": 5.121516227722168, "learning_rate": 4.2506849315068496e-07, "log_odds_chosen": 2.462294578552246, "log_odds_ratio": -0.20314525067806244, "logits/chosen": 1.0019617080688477, "logits/rejected": 1.0613361597061157, "logps/chosen": -2.011920928955078, "logps/rejected": -4.3327555656433105, "loss": 0.7643, "nll_loss": 0.7439598441123962, "rewards/accuracies": 1.0, "rewards/chosen": -0.20119211077690125, "rewards/margins": 0.23208343982696533, "rewards/rejected": -0.4332755506038666, "step": 4198 }, { "epoch": 11.496235455167694, "grad_norm": 3.9044175148010254, "learning_rate": 4.2493150684931506e-07, "log_odds_chosen": 2.5398244857788086, "log_odds_ratio": -0.1678829789161682, "logits/chosen": 1.1490179300308228, "logits/rejected": 1.130881905555725, "logps/chosen": -1.8856902122497559, "logps/rejected": -4.255526065826416, "loss": 0.6055, "nll_loss": 0.588712751865387, "rewards/accuracies": 1.0, "rewards/chosen": -0.18856903910636902, "rewards/margins": 0.23698356747627258, "rewards/rejected": -0.4255526065826416, "step": 4199 }, { "epoch": 11.498973305954825, "grad_norm": 4.874216079711914, "learning_rate": 4.2479452054794516e-07, "log_odds_chosen": 2.1516575813293457, "log_odds_ratio": -0.17246440052986145, "logits/chosen": 0.8974206447601318, "logits/rejected": 0.8789731860160828, "logps/chosen": -1.9118614196777344, "logps/rejected": -3.9151248931884766, "loss": 0.703, "nll_loss": 0.6858011484146118, "rewards/accuracies": 1.0, "rewards/chosen": -0.19118613004684448, "rewards/margins": 0.2003263682126999, "rewards/rejected": -0.39151251316070557, "step": 4200 }, { "epoch": 11.501711156741958, "grad_norm": 6.327593803405762, "learning_rate": 4.246575342465753e-07, "log_odds_chosen": 1.8618580102920532, "log_odds_ratio": -0.30956846475601196, "logits/chosen": 0.8338688015937805, "logits/rejected": 0.7669596672058105, "logps/chosen": -1.8272476196289062, "logps/rejected": -3.570073366165161, "loss": 0.6954, "nll_loss": 0.6643935441970825, "rewards/accuracies": 1.0, "rewards/chosen": -0.1827247589826584, "rewards/margins": 0.17428259551525116, "rewards/rejected": -0.35700732469558716, "step": 4201 }, { "epoch": 11.50444900752909, "grad_norm": 4.614299297332764, "learning_rate": 4.2452054794520546e-07, "log_odds_chosen": 2.5709633827209473, "log_odds_ratio": -0.2976769208908081, "logits/chosen": 0.8507764935493469, "logits/rejected": 0.8724180459976196, "logps/chosen": -2.3775691986083984, "logps/rejected": -4.8593597412109375, "loss": 0.7275, "nll_loss": 0.6977320909500122, "rewards/accuracies": 0.875, "rewards/chosen": -0.23775692284107208, "rewards/margins": 0.2481791079044342, "rewards/rejected": -0.4859360456466675, "step": 4202 }, { "epoch": 11.507186858316222, "grad_norm": 5.260666370391846, "learning_rate": 4.243835616438356e-07, "log_odds_chosen": 3.2030978202819824, "log_odds_ratio": -0.2133924663066864, "logits/chosen": 0.8151234984397888, "logits/rejected": 0.8250576257705688, "logps/chosen": -2.0213334560394287, "logps/rejected": -5.011335372924805, "loss": 0.8127, "nll_loss": 0.7913203835487366, "rewards/accuracies": 0.875, "rewards/chosen": -0.20213334262371063, "rewards/margins": 0.29900020360946655, "rewards/rejected": -0.5011335611343384, "step": 4203 }, { "epoch": 11.509924709103354, "grad_norm": 5.058841705322266, "learning_rate": 4.242465753424657e-07, "log_odds_chosen": 2.6638317108154297, "log_odds_ratio": -0.21473923325538635, "logits/chosen": 1.065169334411621, "logits/rejected": 1.0872776508331299, "logps/chosen": -2.3016157150268555, "logps/rejected": -4.883489608764648, "loss": 0.7208, "nll_loss": 0.6993387937545776, "rewards/accuracies": 1.0, "rewards/chosen": -0.23016157746315002, "rewards/margins": 0.25818735361099243, "rewards/rejected": -0.48834896087646484, "step": 4204 }, { "epoch": 11.512662559890487, "grad_norm": 5.792057037353516, "learning_rate": 4.241095890410959e-07, "log_odds_chosen": 1.6864818334579468, "log_odds_ratio": -0.4245513379573822, "logits/chosen": 0.7488456964492798, "logits/rejected": 0.725161075592041, "logps/chosen": -2.0407676696777344, "logps/rejected": -3.6635799407958984, "loss": 0.7079, "nll_loss": 0.6654167771339417, "rewards/accuracies": 0.875, "rewards/chosen": -0.20407679677009583, "rewards/margins": 0.16228118538856506, "rewards/rejected": -0.3663579821586609, "step": 4205 }, { "epoch": 11.515400410677618, "grad_norm": 4.727604866027832, "learning_rate": 4.23972602739726e-07, "log_odds_chosen": 2.6872754096984863, "log_odds_ratio": -0.20846675336360931, "logits/chosen": 0.9805063605308533, "logits/rejected": 0.996650218963623, "logps/chosen": -1.7525050640106201, "logps/rejected": -4.274929523468018, "loss": 0.5975, "nll_loss": 0.5766181349754333, "rewards/accuracies": 1.0, "rewards/chosen": -0.17525051534175873, "rewards/margins": 0.25224244594573975, "rewards/rejected": -0.4274929165840149, "step": 4206 }, { "epoch": 11.51813826146475, "grad_norm": 5.463222980499268, "learning_rate": 4.238356164383561e-07, "log_odds_chosen": 1.8839504718780518, "log_odds_ratio": -0.2486126720905304, "logits/chosen": 0.7787142395973206, "logits/rejected": 0.8276112079620361, "logps/chosen": -2.372659206390381, "logps/rejected": -4.164068698883057, "loss": 0.6399, "nll_loss": 0.6150072813034058, "rewards/accuracies": 1.0, "rewards/chosen": -0.237265944480896, "rewards/margins": 0.17914095520973206, "rewards/rejected": -0.41640686988830566, "step": 4207 }, { "epoch": 11.520876112251882, "grad_norm": 7.505594730377197, "learning_rate": 4.2369863013698627e-07, "log_odds_chosen": 1.6015690565109253, "log_odds_ratio": -0.5055570006370544, "logits/chosen": 0.8439757227897644, "logits/rejected": 0.8711850643157959, "logps/chosen": -2.761864185333252, "logps/rejected": -4.269729137420654, "loss": 0.7184, "nll_loss": 0.6678355932235718, "rewards/accuracies": 0.625, "rewards/chosen": -0.27618640661239624, "rewards/margins": 0.15078651905059814, "rewards/rejected": -0.4269729554653168, "step": 4208 }, { "epoch": 11.523613963039015, "grad_norm": 6.84376335144043, "learning_rate": 4.235616438356164e-07, "log_odds_chosen": 0.9774725437164307, "log_odds_ratio": -0.3549288511276245, "logits/chosen": 0.9532436728477478, "logits/rejected": 0.9287993907928467, "logps/chosen": -1.5689009428024292, "logps/rejected": -2.390063762664795, "loss": 0.6697, "nll_loss": 0.634255051612854, "rewards/accuracies": 1.0, "rewards/chosen": -0.15689009428024292, "rewards/margins": 0.0821162760257721, "rewards/rejected": -0.23900637030601501, "step": 4209 }, { "epoch": 11.526351813826146, "grad_norm": 4.7891645431518555, "learning_rate": 4.2342465753424657e-07, "log_odds_chosen": 1.560097098350525, "log_odds_ratio": -0.29749467968940735, "logits/chosen": 0.6838188767433167, "logits/rejected": 0.7253028154373169, "logps/chosen": -2.0822060108184814, "logps/rejected": -3.5656726360321045, "loss": 0.6968, "nll_loss": 0.6670396327972412, "rewards/accuracies": 0.875, "rewards/chosen": -0.20822060108184814, "rewards/margins": 0.1483466625213623, "rewards/rejected": -0.35656726360321045, "step": 4210 }, { "epoch": 11.529089664613279, "grad_norm": 5.151943206787109, "learning_rate": 4.2328767123287667e-07, "log_odds_chosen": 1.1322576999664307, "log_odds_ratio": -0.3719339370727539, "logits/chosen": 0.742725670337677, "logits/rejected": 0.7400944232940674, "logps/chosen": -1.8269413709640503, "logps/rejected": -2.860621452331543, "loss": 0.6273, "nll_loss": 0.5900843143463135, "rewards/accuracies": 0.875, "rewards/chosen": -0.18269416689872742, "rewards/margins": 0.10336800664663315, "rewards/rejected": -0.2860621511936188, "step": 4211 }, { "epoch": 11.53182751540041, "grad_norm": 5.542103290557861, "learning_rate": 4.231506849315069e-07, "log_odds_chosen": 2.172229290008545, "log_odds_ratio": -0.27033698558807373, "logits/chosen": 0.7959585189819336, "logits/rejected": 0.8156964182853699, "logps/chosen": -2.2814090251922607, "logps/rejected": -4.329182147979736, "loss": 0.7005, "nll_loss": 0.673494279384613, "rewards/accuracies": 0.875, "rewards/chosen": -0.2281409054994583, "rewards/margins": 0.2047773152589798, "rewards/rejected": -0.4329182207584381, "step": 4212 }, { "epoch": 11.534565366187543, "grad_norm": 7.91402006149292, "learning_rate": 4.23013698630137e-07, "log_odds_chosen": 2.780585289001465, "log_odds_ratio": -0.506430983543396, "logits/chosen": 1.0164594650268555, "logits/rejected": 0.96933513879776, "logps/chosen": -2.376849889755249, "logps/rejected": -5.0251665115356445, "loss": 0.7438, "nll_loss": 0.6931930780410767, "rewards/accuracies": 0.875, "rewards/chosen": -0.23768500983715057, "rewards/margins": 0.2648317217826843, "rewards/rejected": -0.5025166869163513, "step": 4213 }, { "epoch": 11.537303216974674, "grad_norm": 4.745065689086914, "learning_rate": 4.2287671232876707e-07, "log_odds_chosen": 2.5190348625183105, "log_odds_ratio": -0.15750442445278168, "logits/chosen": 0.8928844928741455, "logits/rejected": 0.9080961346626282, "logps/chosen": -2.5375382900238037, "logps/rejected": -4.935739517211914, "loss": 0.7214, "nll_loss": 0.7056710720062256, "rewards/accuracies": 1.0, "rewards/chosen": -0.2537538409233093, "rewards/margins": 0.23982012271881104, "rewards/rejected": -0.49357396364212036, "step": 4214 }, { "epoch": 11.540041067761807, "grad_norm": 5.0847649574279785, "learning_rate": 4.227397260273972e-07, "log_odds_chosen": 1.9379363059997559, "log_odds_ratio": -0.36377283930778503, "logits/chosen": 0.8735125064849854, "logits/rejected": 0.8355064392089844, "logps/chosen": -2.3549797534942627, "logps/rejected": -4.212066173553467, "loss": 0.6909, "nll_loss": 0.6544792652130127, "rewards/accuracies": 0.75, "rewards/chosen": -0.23549798130989075, "rewards/margins": 0.1857086718082428, "rewards/rejected": -0.42120665311813354, "step": 4215 }, { "epoch": 11.542778918548938, "grad_norm": 4.744349479675293, "learning_rate": 4.226027397260274e-07, "log_odds_chosen": 1.8908931016921997, "log_odds_ratio": -0.5070298314094543, "logits/chosen": 0.9708420038223267, "logits/rejected": 0.9978275299072266, "logps/chosen": -2.07486891746521, "logps/rejected": -3.840325355529785, "loss": 0.7407, "nll_loss": 0.6900401711463928, "rewards/accuracies": 0.75, "rewards/chosen": -0.20748689770698547, "rewards/margins": 0.176545649766922, "rewards/rejected": -0.38403254747390747, "step": 4216 }, { "epoch": 11.545516769336071, "grad_norm": 5.892232418060303, "learning_rate": 4.2246575342465753e-07, "log_odds_chosen": 1.9249670505523682, "log_odds_ratio": -0.19401119649410248, "logits/chosen": 0.918907880783081, "logits/rejected": 0.9124504923820496, "logps/chosen": -1.7271554470062256, "logps/rejected": -3.4908018112182617, "loss": 0.6625, "nll_loss": 0.6431083679199219, "rewards/accuracies": 1.0, "rewards/chosen": -0.17271554470062256, "rewards/margins": 0.17636464536190033, "rewards/rejected": -0.3490802049636841, "step": 4217 }, { "epoch": 11.548254620123203, "grad_norm": 4.274880886077881, "learning_rate": 4.2232876712328763e-07, "log_odds_chosen": 4.017458915710449, "log_odds_ratio": -0.1715734899044037, "logits/chosen": 0.8986002802848816, "logits/rejected": 0.9180493950843811, "logps/chosen": -2.344348669052124, "logps/rejected": -6.257975101470947, "loss": 0.7022, "nll_loss": 0.6850870847702026, "rewards/accuracies": 0.875, "rewards/chosen": -0.23443487286567688, "rewards/margins": 0.39136266708374023, "rewards/rejected": -0.6257975101470947, "step": 4218 }, { "epoch": 11.550992470910336, "grad_norm": 5.106364727020264, "learning_rate": 4.2219178082191783e-07, "log_odds_chosen": 1.3297559022903442, "log_odds_ratio": -0.37720632553100586, "logits/chosen": 0.8649277091026306, "logits/rejected": 0.9061062335968018, "logps/chosen": -2.0434648990631104, "logps/rejected": -3.279207229614258, "loss": 0.7414, "nll_loss": 0.7036389112472534, "rewards/accuracies": 1.0, "rewards/chosen": -0.20434650778770447, "rewards/margins": 0.12357418239116669, "rewards/rejected": -0.32792070508003235, "step": 4219 }, { "epoch": 11.553730321697467, "grad_norm": 4.872440814971924, "learning_rate": 4.2205479452054793e-07, "log_odds_chosen": 2.6826553344726562, "log_odds_ratio": -0.1635211855173111, "logits/chosen": 0.6493346691131592, "logits/rejected": 0.6769345998764038, "logps/chosen": -2.408250570297241, "logps/rejected": -4.983166694641113, "loss": 0.7305, "nll_loss": 0.7141209840774536, "rewards/accuracies": 1.0, "rewards/chosen": -0.24082505702972412, "rewards/margins": 0.2574916183948517, "rewards/rejected": -0.4983166456222534, "step": 4220 }, { "epoch": 11.5564681724846, "grad_norm": 4.054308891296387, "learning_rate": 4.2191780821917803e-07, "log_odds_chosen": 1.8523942232131958, "log_odds_ratio": -0.31125640869140625, "logits/chosen": 0.7430737018585205, "logits/rejected": 0.7810831069946289, "logps/chosen": -2.048931121826172, "logps/rejected": -3.80887508392334, "loss": 0.7046, "nll_loss": 0.673480749130249, "rewards/accuracies": 0.875, "rewards/chosen": -0.2048931121826172, "rewards/margins": 0.175994411110878, "rewards/rejected": -0.380887508392334, "step": 4221 }, { "epoch": 11.55920602327173, "grad_norm": 5.410475254058838, "learning_rate": 4.2178082191780824e-07, "log_odds_chosen": 2.0319385528564453, "log_odds_ratio": -0.33718517422676086, "logits/chosen": 0.805343508720398, "logits/rejected": 0.7001652717590332, "logps/chosen": -1.9233574867248535, "logps/rejected": -3.835556983947754, "loss": 0.7041, "nll_loss": 0.6703974008560181, "rewards/accuracies": 0.875, "rewards/chosen": -0.19233575463294983, "rewards/margins": 0.1912199705839157, "rewards/rejected": -0.38355574011802673, "step": 4222 }, { "epoch": 11.561943874058864, "grad_norm": 5.089946269989014, "learning_rate": 4.2164383561643834e-07, "log_odds_chosen": 1.4902353286743164, "log_odds_ratio": -0.2590322494506836, "logits/chosen": 0.7682743668556213, "logits/rejected": 0.631166934967041, "logps/chosen": -1.7584738731384277, "logps/rejected": -3.0690152645111084, "loss": 0.672, "nll_loss": 0.6461267471313477, "rewards/accuracies": 1.0, "rewards/chosen": -0.1758473962545395, "rewards/margins": 0.1310541331768036, "rewards/rejected": -0.3069015145301819, "step": 4223 }, { "epoch": 11.564681724845997, "grad_norm": 4.661965847015381, "learning_rate": 4.215068493150685e-07, "log_odds_chosen": 1.5224876403808594, "log_odds_ratio": -0.2608433961868286, "logits/chosen": 0.706652820110321, "logits/rejected": 0.7361505031585693, "logps/chosen": -1.818047285079956, "logps/rejected": -3.223034381866455, "loss": 0.6388, "nll_loss": 0.6127622723579407, "rewards/accuracies": 1.0, "rewards/chosen": -0.18180474638938904, "rewards/margins": 0.14049869775772095, "rewards/rejected": -0.32230344414711, "step": 4224 }, { "epoch": 11.567419575633128, "grad_norm": 4.590210437774658, "learning_rate": 4.213698630136986e-07, "log_odds_chosen": 1.9522011280059814, "log_odds_ratio": -0.23234643042087555, "logits/chosen": 0.8074231147766113, "logits/rejected": 0.8437227010726929, "logps/chosen": -2.031147003173828, "logps/rejected": -3.874314785003662, "loss": 0.7823, "nll_loss": 0.7590783834457397, "rewards/accuracies": 1.0, "rewards/chosen": -0.20311470329761505, "rewards/margins": 0.18431678414344788, "rewards/rejected": -0.3874315023422241, "step": 4225 }, { "epoch": 11.570157426420261, "grad_norm": 5.129886150360107, "learning_rate": 4.212328767123288e-07, "log_odds_chosen": 2.814281940460205, "log_odds_ratio": -0.3043602705001831, "logits/chosen": 0.8697655200958252, "logits/rejected": 0.8758119940757751, "logps/chosen": -2.0394039154052734, "logps/rejected": -4.698279857635498, "loss": 0.6775, "nll_loss": 0.6471013426780701, "rewards/accuracies": 0.875, "rewards/chosen": -0.20394039154052734, "rewards/margins": 0.265887588262558, "rewards/rejected": -0.4698279798030853, "step": 4226 }, { "epoch": 11.572895277207392, "grad_norm": 5.99323034286499, "learning_rate": 4.210958904109589e-07, "log_odds_chosen": 2.6688623428344727, "log_odds_ratio": -0.1925753951072693, "logits/chosen": 0.835726261138916, "logits/rejected": 0.9377763271331787, "logps/chosen": -1.9895261526107788, "logps/rejected": -4.210822105407715, "loss": 0.6519, "nll_loss": 0.6326359510421753, "rewards/accuracies": 0.875, "rewards/chosen": -0.19895261526107788, "rewards/margins": 0.22212958335876465, "rewards/rejected": -0.42108219861984253, "step": 4227 }, { "epoch": 11.575633127994525, "grad_norm": 5.688154697418213, "learning_rate": 4.20958904109589e-07, "log_odds_chosen": 1.4271140098571777, "log_odds_ratio": -0.4051850438117981, "logits/chosen": 0.8575252294540405, "logits/rejected": 0.8811353445053101, "logps/chosen": -2.147517204284668, "logps/rejected": -3.444063186645508, "loss": 0.7903, "nll_loss": 0.7498195171356201, "rewards/accuracies": 0.75, "rewards/chosen": -0.2147517204284668, "rewards/margins": 0.12965461611747742, "rewards/rejected": -0.3444063365459442, "step": 4228 }, { "epoch": 11.578370978781656, "grad_norm": 4.647944450378418, "learning_rate": 4.208219178082192e-07, "log_odds_chosen": 1.5010192394256592, "log_odds_ratio": -0.27483808994293213, "logits/chosen": 0.9110297560691833, "logits/rejected": 0.9327630996704102, "logps/chosen": -1.9788001775741577, "logps/rejected": -3.377876043319702, "loss": 0.6901, "nll_loss": 0.6625964045524597, "rewards/accuracies": 1.0, "rewards/chosen": -0.19788002967834473, "rewards/margins": 0.1399075835943222, "rewards/rejected": -0.33778759837150574, "step": 4229 }, { "epoch": 11.58110882956879, "grad_norm": 4.787071704864502, "learning_rate": 4.206849315068493e-07, "log_odds_chosen": 2.6053199768066406, "log_odds_ratio": -0.29203924536705017, "logits/chosen": 0.7390297651290894, "logits/rejected": 0.7450753450393677, "logps/chosen": -2.2686407566070557, "logps/rejected": -4.761756420135498, "loss": 0.7866, "nll_loss": 0.7573981285095215, "rewards/accuracies": 0.875, "rewards/chosen": -0.22686409950256348, "rewards/margins": 0.24931156635284424, "rewards/rejected": -0.4761756360530853, "step": 4230 }, { "epoch": 11.58384668035592, "grad_norm": 5.558908939361572, "learning_rate": 4.2054794520547945e-07, "log_odds_chosen": 2.171724319458008, "log_odds_ratio": -0.4085518419742584, "logits/chosen": 0.7483865022659302, "logits/rejected": 0.7395610809326172, "logps/chosen": -2.2703893184661865, "logps/rejected": -4.340331077575684, "loss": 0.7112, "nll_loss": 0.6703405976295471, "rewards/accuracies": 0.75, "rewards/chosen": -0.2270389199256897, "rewards/margins": 0.20699414610862732, "rewards/rejected": -0.434033066034317, "step": 4231 }, { "epoch": 11.586584531143053, "grad_norm": 4.650881767272949, "learning_rate": 4.2041095890410955e-07, "log_odds_chosen": 1.7920438051223755, "log_odds_ratio": -0.19890590012073517, "logits/chosen": 0.6650710701942444, "logits/rejected": 0.7764483094215393, "logps/chosen": -1.6912412643432617, "logps/rejected": -3.267272710800171, "loss": 0.6727, "nll_loss": 0.6527669429779053, "rewards/accuracies": 1.0, "rewards/chosen": -0.16912412643432617, "rewards/margins": 0.1576031595468521, "rewards/rejected": -0.3267272710800171, "step": 4232 }, { "epoch": 11.589322381930184, "grad_norm": 4.8773322105407715, "learning_rate": 4.202739726027397e-07, "log_odds_chosen": 2.3375375270843506, "log_odds_ratio": -0.2838060259819031, "logits/chosen": 0.5931986570358276, "logits/rejected": 0.6279572248458862, "logps/chosen": -2.4121758937835693, "logps/rejected": -4.635130882263184, "loss": 0.6897, "nll_loss": 0.6613335609436035, "rewards/accuracies": 0.875, "rewards/chosen": -0.24121758341789246, "rewards/margins": 0.22229552268981934, "rewards/rejected": -0.4635131061077118, "step": 4233 }, { "epoch": 11.592060232717317, "grad_norm": 5.301600456237793, "learning_rate": 4.2013698630136985e-07, "log_odds_chosen": 1.7637890577316284, "log_odds_ratio": -0.30801481008529663, "logits/chosen": 0.6452298164367676, "logits/rejected": 0.6254841089248657, "logps/chosen": -1.886919379234314, "logps/rejected": -3.4911270141601562, "loss": 0.7435, "nll_loss": 0.7126911878585815, "rewards/accuracies": 1.0, "rewards/chosen": -0.18869194388389587, "rewards/margins": 0.16042077541351318, "rewards/rejected": -0.34911271929740906, "step": 4234 }, { "epoch": 11.594798083504449, "grad_norm": 5.9432830810546875, "learning_rate": 4.1999999999999995e-07, "log_odds_chosen": 1.768592357635498, "log_odds_ratio": -0.2611815929412842, "logits/chosen": 1.0315179824829102, "logits/rejected": 0.9692562818527222, "logps/chosen": -2.055100917816162, "logps/rejected": -3.660949468612671, "loss": 0.6657, "nll_loss": 0.6395553350448608, "rewards/accuracies": 1.0, "rewards/chosen": -0.20551010966300964, "rewards/margins": 0.16058485209941864, "rewards/rejected": -0.3660949468612671, "step": 4235 }, { "epoch": 11.597535934291582, "grad_norm": 5.542301654815674, "learning_rate": 4.1986301369863015e-07, "log_odds_chosen": 3.0585238933563232, "log_odds_ratio": -0.08392678946256638, "logits/chosen": 0.8255741596221924, "logits/rejected": 0.8293445110321045, "logps/chosen": -2.423896312713623, "logps/rejected": -5.35543155670166, "loss": 0.6847, "nll_loss": 0.6763283610343933, "rewards/accuracies": 1.0, "rewards/chosen": -0.24238964915275574, "rewards/margins": 0.2931535243988037, "rewards/rejected": -0.5355432033538818, "step": 4236 }, { "epoch": 11.600273785078713, "grad_norm": 5.696358680725098, "learning_rate": 4.1972602739726025e-07, "log_odds_chosen": 2.424323797225952, "log_odds_ratio": -0.18041075766086578, "logits/chosen": 0.7252638339996338, "logits/rejected": 0.7550806999206543, "logps/chosen": -1.888540506362915, "logps/rejected": -4.123106002807617, "loss": 0.6399, "nll_loss": 0.6218850016593933, "rewards/accuracies": 1.0, "rewards/chosen": -0.18885406851768494, "rewards/margins": 0.22345653176307678, "rewards/rejected": -0.4123106002807617, "step": 4237 }, { "epoch": 11.603011635865846, "grad_norm": 6.938772678375244, "learning_rate": 4.195890410958904e-07, "log_odds_chosen": 1.7860107421875, "log_odds_ratio": -0.3922388553619385, "logits/chosen": 1.0463924407958984, "logits/rejected": 1.0327297449111938, "logps/chosen": -3.4602198600769043, "logps/rejected": -5.209136009216309, "loss": 0.916, "nll_loss": 0.8767375946044922, "rewards/accuracies": 0.75, "rewards/chosen": -0.34602198004722595, "rewards/margins": 0.1748916357755661, "rewards/rejected": -0.5209136009216309, "step": 4238 }, { "epoch": 11.605749486652977, "grad_norm": 6.717133522033691, "learning_rate": 4.194520547945205e-07, "log_odds_chosen": 0.9734777212142944, "log_odds_ratio": -0.4903620779514313, "logits/chosen": 0.8190250992774963, "logits/rejected": 0.875941276550293, "logps/chosen": -2.80155348777771, "logps/rejected": -3.7044410705566406, "loss": 0.7544, "nll_loss": 0.7053611278533936, "rewards/accuracies": 0.75, "rewards/chosen": -0.28015533089637756, "rewards/margins": 0.09028875827789307, "rewards/rejected": -0.3704441487789154, "step": 4239 }, { "epoch": 11.60848733744011, "grad_norm": 6.76910924911499, "learning_rate": 4.1931506849315066e-07, "log_odds_chosen": 3.3740310668945312, "log_odds_ratio": -0.21008940041065216, "logits/chosen": 0.8874788284301758, "logits/rejected": 0.8942064642906189, "logps/chosen": -2.37239670753479, "logps/rejected": -5.64548397064209, "loss": 0.7526, "nll_loss": 0.7316135764122009, "rewards/accuracies": 1.0, "rewards/chosen": -0.23723967373371124, "rewards/margins": 0.32730868458747864, "rewards/rejected": -0.5645483732223511, "step": 4240 }, { "epoch": 11.611225188227241, "grad_norm": 6.720132827758789, "learning_rate": 4.191780821917808e-07, "log_odds_chosen": 2.3547439575195312, "log_odds_ratio": -0.43881237506866455, "logits/chosen": 0.853661298751831, "logits/rejected": 0.8685396909713745, "logps/chosen": -2.707859516143799, "logps/rejected": -4.971747398376465, "loss": 0.7297, "nll_loss": 0.6858136057853699, "rewards/accuracies": 0.875, "rewards/chosen": -0.27078598737716675, "rewards/margins": 0.22638878226280212, "rewards/rejected": -0.49717479944229126, "step": 4241 }, { "epoch": 11.613963039014374, "grad_norm": 7.09639835357666, "learning_rate": 4.190410958904109e-07, "log_odds_chosen": 2.425840139389038, "log_odds_ratio": -0.24025748670101166, "logits/chosen": 0.8465268611907959, "logits/rejected": 0.8130233883857727, "logps/chosen": -2.04616379737854, "logps/rejected": -4.323718070983887, "loss": 0.8078, "nll_loss": 0.7837770581245422, "rewards/accuracies": 1.0, "rewards/chosen": -0.20461639761924744, "rewards/margins": 0.22775539755821228, "rewards/rejected": -0.4323717951774597, "step": 4242 }, { "epoch": 11.616700889801505, "grad_norm": 4.954171180725098, "learning_rate": 4.189041095890411e-07, "log_odds_chosen": 2.5024642944335938, "log_odds_ratio": -0.2084815800189972, "logits/chosen": 0.9130359292030334, "logits/rejected": 0.9723432064056396, "logps/chosen": -2.1401822566986084, "logps/rejected": -4.402352333068848, "loss": 0.6722, "nll_loss": 0.6513477563858032, "rewards/accuracies": 1.0, "rewards/chosen": -0.21401825547218323, "rewards/margins": 0.22621700167655945, "rewards/rejected": -0.4402352571487427, "step": 4243 }, { "epoch": 11.619438740588638, "grad_norm": 5.272676944732666, "learning_rate": 4.187671232876712e-07, "log_odds_chosen": 1.275951862335205, "log_odds_ratio": -0.319851815700531, "logits/chosen": 0.9127624034881592, "logits/rejected": 1.00822913646698, "logps/chosen": -1.9992797374725342, "logps/rejected": -3.1816177368164062, "loss": 0.6074, "nll_loss": 0.5754534006118774, "rewards/accuracies": 0.875, "rewards/chosen": -0.19992798566818237, "rewards/margins": 0.11823379993438721, "rewards/rejected": -0.3181617856025696, "step": 4244 }, { "epoch": 11.62217659137577, "grad_norm": 5.8948140144348145, "learning_rate": 4.1863013698630136e-07, "log_odds_chosen": 1.4326159954071045, "log_odds_ratio": -0.27961471676826477, "logits/chosen": 0.884027898311615, "logits/rejected": 0.8322760462760925, "logps/chosen": -1.799276351928711, "logps/rejected": -3.1115331649780273, "loss": 0.6482, "nll_loss": 0.6202691793441772, "rewards/accuracies": 1.0, "rewards/chosen": -0.17992761731147766, "rewards/margins": 0.13122572004795074, "rewards/rejected": -0.3111533522605896, "step": 4245 }, { "epoch": 11.624914442162902, "grad_norm": 7.011460781097412, "learning_rate": 4.1849315068493146e-07, "log_odds_chosen": 2.555922031402588, "log_odds_ratio": -0.1967114508152008, "logits/chosen": 1.1791104078292847, "logits/rejected": 1.2110990285873413, "logps/chosen": -2.835620641708374, "logps/rejected": -5.31970739364624, "loss": 0.6782, "nll_loss": 0.658530056476593, "rewards/accuracies": 1.0, "rewards/chosen": -0.2835620939731598, "rewards/margins": 0.24840864539146423, "rewards/rejected": -0.531970739364624, "step": 4246 }, { "epoch": 11.627652292950033, "grad_norm": 6.356874942779541, "learning_rate": 4.183561643835616e-07, "log_odds_chosen": 1.2254462242126465, "log_odds_ratio": -0.35186293721199036, "logits/chosen": 0.889041006565094, "logits/rejected": 0.8301974534988403, "logps/chosen": -1.8280489444732666, "logps/rejected": -2.9656524658203125, "loss": 0.6573, "nll_loss": 0.6221579313278198, "rewards/accuracies": 0.875, "rewards/chosen": -0.1828049123287201, "rewards/margins": 0.11376035213470459, "rewards/rejected": -0.2965652346611023, "step": 4247 }, { "epoch": 11.630390143737166, "grad_norm": 4.720250129699707, "learning_rate": 4.1821917808219177e-07, "log_odds_chosen": 2.8118743896484375, "log_odds_ratio": -0.2276497185230255, "logits/chosen": 0.6440492868423462, "logits/rejected": 0.61576247215271, "logps/chosen": -1.630640983581543, "logps/rejected": -4.249958038330078, "loss": 0.7424, "nll_loss": 0.7196517586708069, "rewards/accuracies": 1.0, "rewards/chosen": -0.16306409239768982, "rewards/margins": 0.26193171739578247, "rewards/rejected": -0.4249958097934723, "step": 4248 }, { "epoch": 11.633127994524298, "grad_norm": 6.461691856384277, "learning_rate": 4.1808219178082187e-07, "log_odds_chosen": 1.6266279220581055, "log_odds_ratio": -0.24411901831626892, "logits/chosen": 0.8003233671188354, "logits/rejected": 0.8356213569641113, "logps/chosen": -2.1797268390655518, "logps/rejected": -3.6161348819732666, "loss": 0.7053, "nll_loss": 0.6809009313583374, "rewards/accuracies": 1.0, "rewards/chosen": -0.21797268092632294, "rewards/margins": 0.14364078640937805, "rewards/rejected": -0.3616134822368622, "step": 4249 }, { "epoch": 11.63586584531143, "grad_norm": 6.597568035125732, "learning_rate": 4.1794520547945207e-07, "log_odds_chosen": 2.3869078159332275, "log_odds_ratio": -0.2590485215187073, "logits/chosen": 0.8967506885528564, "logits/rejected": 0.8674547076225281, "logps/chosen": -1.5801780223846436, "logps/rejected": -3.7853240966796875, "loss": 0.6236, "nll_loss": 0.5976932048797607, "rewards/accuracies": 1.0, "rewards/chosen": -0.15801779925823212, "rewards/margins": 0.22051462531089783, "rewards/rejected": -0.37853240966796875, "step": 4250 }, { "epoch": 11.638603696098563, "grad_norm": 5.208794116973877, "learning_rate": 4.1780821917808217e-07, "log_odds_chosen": 2.096947193145752, "log_odds_ratio": -0.35416078567504883, "logits/chosen": 0.9157935380935669, "logits/rejected": 0.9149459004402161, "logps/chosen": -3.272979259490967, "logps/rejected": -5.2556939125061035, "loss": 0.8349, "nll_loss": 0.7994954586029053, "rewards/accuracies": 0.75, "rewards/chosen": -0.3272979259490967, "rewards/margins": 0.19827143847942352, "rewards/rejected": -0.5255693793296814, "step": 4251 }, { "epoch": 11.641341546885695, "grad_norm": 5.343075752258301, "learning_rate": 4.176712328767123e-07, "log_odds_chosen": 3.039316415786743, "log_odds_ratio": -0.17864257097244263, "logits/chosen": 0.9995031356811523, "logits/rejected": 1.0231201648712158, "logps/chosen": -2.4954123497009277, "logps/rejected": -5.45017671585083, "loss": 0.8224, "nll_loss": 0.8045433759689331, "rewards/accuracies": 0.875, "rewards/chosen": -0.249541237950325, "rewards/margins": 0.29547643661499023, "rewards/rejected": -0.545017659664154, "step": 4252 }, { "epoch": 11.644079397672828, "grad_norm": 4.984814643859863, "learning_rate": 4.175342465753424e-07, "log_odds_chosen": 2.154848337173462, "log_odds_ratio": -0.18161675333976746, "logits/chosen": 0.9199155569076538, "logits/rejected": 0.8713827729225159, "logps/chosen": -1.9439141750335693, "logps/rejected": -3.9033401012420654, "loss": 0.6987, "nll_loss": 0.6804900169372559, "rewards/accuracies": 1.0, "rewards/chosen": -0.1943914145231247, "rewards/margins": 0.19594261050224304, "rewards/rejected": -0.39033401012420654, "step": 4253 }, { "epoch": 11.646817248459959, "grad_norm": 4.3774237632751465, "learning_rate": 4.173972602739726e-07, "log_odds_chosen": 2.865095853805542, "log_odds_ratio": -0.18017281591892242, "logits/chosen": 0.7905018329620361, "logits/rejected": 0.6736626625061035, "logps/chosen": -2.328958511352539, "logps/rejected": -5.097288131713867, "loss": 0.7804, "nll_loss": 0.7623662948608398, "rewards/accuracies": 0.875, "rewards/chosen": -0.2328958809375763, "rewards/margins": 0.2768329977989197, "rewards/rejected": -0.5097289085388184, "step": 4254 }, { "epoch": 11.649555099247092, "grad_norm": 4.812199115753174, "learning_rate": 4.172602739726027e-07, "log_odds_chosen": 2.921724796295166, "log_odds_ratio": -0.16640865802764893, "logits/chosen": 0.8270955681800842, "logits/rejected": 0.8512316942214966, "logps/chosen": -2.0498321056365967, "logps/rejected": -4.828409671783447, "loss": 0.7661, "nll_loss": 0.7494637966156006, "rewards/accuracies": 1.0, "rewards/chosen": -0.2049832046031952, "rewards/margins": 0.27785778045654297, "rewards/rejected": -0.48284098505973816, "step": 4255 }, { "epoch": 11.652292950034223, "grad_norm": 5.785193920135498, "learning_rate": 4.171232876712328e-07, "log_odds_chosen": 2.583177089691162, "log_odds_ratio": -0.20346423983573914, "logits/chosen": 1.1152279376983643, "logits/rejected": 1.0818369388580322, "logps/chosen": -2.3722097873687744, "logps/rejected": -4.855655670166016, "loss": 0.772, "nll_loss": 0.7516761422157288, "rewards/accuracies": 1.0, "rewards/chosen": -0.23722100257873535, "rewards/margins": 0.2483445703983307, "rewards/rejected": -0.48556557297706604, "step": 4256 }, { "epoch": 11.655030800821356, "grad_norm": 5.541811943054199, "learning_rate": 4.1698630136986303e-07, "log_odds_chosen": 1.852113962173462, "log_odds_ratio": -0.29834413528442383, "logits/chosen": 0.7528202533721924, "logits/rejected": 0.8563984632492065, "logps/chosen": -2.4247329235076904, "logps/rejected": -4.16583776473999, "loss": 0.8011, "nll_loss": 0.7712414860725403, "rewards/accuracies": 1.0, "rewards/chosen": -0.2424733191728592, "rewards/margins": 0.17411047220230103, "rewards/rejected": -0.416583776473999, "step": 4257 }, { "epoch": 11.657768651608487, "grad_norm": 5.322002410888672, "learning_rate": 4.1684931506849313e-07, "log_odds_chosen": 2.077122449874878, "log_odds_ratio": -0.32293081283569336, "logits/chosen": 0.9627190232276917, "logits/rejected": 0.9995191097259521, "logps/chosen": -2.720649480819702, "logps/rejected": -4.744630336761475, "loss": 0.7973, "nll_loss": 0.7650251984596252, "rewards/accuracies": 1.0, "rewards/chosen": -0.2720649242401123, "rewards/margins": 0.20239809155464172, "rewards/rejected": -0.4744630455970764, "step": 4258 }, { "epoch": 11.66050650239562, "grad_norm": 4.732740879058838, "learning_rate": 4.167123287671233e-07, "log_odds_chosen": 3.6759753227233887, "log_odds_ratio": -0.31710121035575867, "logits/chosen": 0.8237204551696777, "logits/rejected": 0.9334797859191895, "logps/chosen": -2.2129156589508057, "logps/rejected": -5.816373825073242, "loss": 0.7438, "nll_loss": 0.7120919227600098, "rewards/accuracies": 0.875, "rewards/chosen": -0.22129157185554504, "rewards/margins": 0.36034584045410156, "rewards/rejected": -0.5816373825073242, "step": 4259 }, { "epoch": 11.663244353182751, "grad_norm": 6.2647175788879395, "learning_rate": 4.1657534246575343e-07, "log_odds_chosen": 2.6684939861297607, "log_odds_ratio": -0.5391848087310791, "logits/chosen": 0.743672251701355, "logits/rejected": 0.7859665751457214, "logps/chosen": -3.088031530380249, "logps/rejected": -5.7028303146362305, "loss": 0.7934, "nll_loss": 0.7394336462020874, "rewards/accuracies": 0.625, "rewards/chosen": -0.30880317091941833, "rewards/margins": 0.261479914188385, "rewards/rejected": -0.570283055305481, "step": 4260 }, { "epoch": 11.665982203969884, "grad_norm": 5.470571517944336, "learning_rate": 4.1643835616438353e-07, "log_odds_chosen": 2.318807363510132, "log_odds_ratio": -0.3122949004173279, "logits/chosen": 0.7607008218765259, "logits/rejected": 0.7904832363128662, "logps/chosen": -2.09718656539917, "logps/rejected": -4.331556797027588, "loss": 0.6888, "nll_loss": 0.6575656533241272, "rewards/accuracies": 1.0, "rewards/chosen": -0.20971864461898804, "rewards/margins": 0.22343702614307404, "rewards/rejected": -0.4331556558609009, "step": 4261 }, { "epoch": 11.668720054757015, "grad_norm": 8.04559326171875, "learning_rate": 4.163013698630137e-07, "log_odds_chosen": 1.6020458936691284, "log_odds_ratio": -0.6239513158798218, "logits/chosen": 0.7703820466995239, "logits/rejected": 0.7889790534973145, "logps/chosen": -2.8131585121154785, "logps/rejected": -4.323666095733643, "loss": 0.9634, "nll_loss": 0.9010353684425354, "rewards/accuracies": 0.75, "rewards/chosen": -0.2813158631324768, "rewards/margins": 0.15105074644088745, "rewards/rejected": -0.43236660957336426, "step": 4262 }, { "epoch": 11.671457905544148, "grad_norm": 4.938143730163574, "learning_rate": 4.161643835616438e-07, "log_odds_chosen": 1.5617609024047852, "log_odds_ratio": -0.292629599571228, "logits/chosen": 0.704089343547821, "logits/rejected": 0.692314863204956, "logps/chosen": -2.5161802768707275, "logps/rejected": -3.9801998138427734, "loss": 0.6875, "nll_loss": 0.6581889390945435, "rewards/accuracies": 1.0, "rewards/chosen": -0.25161802768707275, "rewards/margins": 0.14640192687511444, "rewards/rejected": -0.3980199694633484, "step": 4263 }, { "epoch": 11.67419575633128, "grad_norm": 7.522622108459473, "learning_rate": 4.16027397260274e-07, "log_odds_chosen": 1.999191403388977, "log_odds_ratio": -0.3965560495853424, "logits/chosen": 0.7972610592842102, "logits/rejected": 0.8435322642326355, "logps/chosen": -2.902219295501709, "logps/rejected": -4.858705520629883, "loss": 0.8033, "nll_loss": 0.7636892795562744, "rewards/accuracies": 0.625, "rewards/chosen": -0.2902219295501709, "rewards/margins": 0.19564859569072723, "rewards/rejected": -0.4858705401420593, "step": 4264 }, { "epoch": 11.676933607118412, "grad_norm": 4.997756004333496, "learning_rate": 4.158904109589041e-07, "log_odds_chosen": 1.6697137355804443, "log_odds_ratio": -0.2732810080051422, "logits/chosen": 0.6844283938407898, "logits/rejected": 0.7825719714164734, "logps/chosen": -2.6804747581481934, "logps/rejected": -4.267767429351807, "loss": 0.6573, "nll_loss": 0.6299515962600708, "rewards/accuracies": 0.875, "rewards/chosen": -0.2680474817752838, "rewards/margins": 0.15872925519943237, "rewards/rejected": -0.4267767071723938, "step": 4265 }, { "epoch": 11.679671457905544, "grad_norm": 5.917574882507324, "learning_rate": 4.157534246575342e-07, "log_odds_chosen": 1.4937807321548462, "log_odds_ratio": -0.41179555654525757, "logits/chosen": 0.8123748898506165, "logits/rejected": 0.7885980606079102, "logps/chosen": -2.0180327892303467, "logps/rejected": -3.39639949798584, "loss": 0.7405, "nll_loss": 0.6993694305419922, "rewards/accuracies": 0.625, "rewards/chosen": -0.2018032968044281, "rewards/margins": 0.13783666491508484, "rewards/rejected": -0.33963996171951294, "step": 4266 }, { "epoch": 11.682409308692677, "grad_norm": 5.774782180786133, "learning_rate": 4.156164383561644e-07, "log_odds_chosen": 1.5202877521514893, "log_odds_ratio": -0.27582210302352905, "logits/chosen": 0.9777311086654663, "logits/rejected": 0.9991295337677002, "logps/chosen": -2.606966018676758, "logps/rejected": -4.0251617431640625, "loss": 0.6509, "nll_loss": 0.6232877969741821, "rewards/accuracies": 1.0, "rewards/chosen": -0.2606965899467468, "rewards/margins": 0.14181958138942719, "rewards/rejected": -0.4025161862373352, "step": 4267 }, { "epoch": 11.685147159479808, "grad_norm": 5.364213943481445, "learning_rate": 4.154794520547945e-07, "log_odds_chosen": 3.07130765914917, "log_odds_ratio": -0.3369964361190796, "logits/chosen": 0.5771793723106384, "logits/rejected": 0.5711491107940674, "logps/chosen": -2.669656276702881, "logps/rejected": -5.659104824066162, "loss": 0.7164, "nll_loss": 0.6826651096343994, "rewards/accuracies": 0.75, "rewards/chosen": -0.2669656574726105, "rewards/margins": 0.2989448606967926, "rewards/rejected": -0.5659104585647583, "step": 4268 }, { "epoch": 11.68788501026694, "grad_norm": 5.706352710723877, "learning_rate": 4.1534246575342464e-07, "log_odds_chosen": 2.2107558250427246, "log_odds_ratio": -0.28685304522514343, "logits/chosen": 1.01360285282135, "logits/rejected": 1.0053678750991821, "logps/chosen": -2.1302826404571533, "logps/rejected": -4.251339912414551, "loss": 0.7451, "nll_loss": 0.7164171934127808, "rewards/accuracies": 1.0, "rewards/chosen": -0.21302825212478638, "rewards/margins": 0.21210570633411407, "rewards/rejected": -0.42513397336006165, "step": 4269 }, { "epoch": 11.690622861054072, "grad_norm": 6.467859268188477, "learning_rate": 4.1520547945205474e-07, "log_odds_chosen": 1.5594773292541504, "log_odds_ratio": -0.3739028573036194, "logits/chosen": 0.793810248374939, "logits/rejected": 0.8714736700057983, "logps/chosen": -2.4158060550689697, "logps/rejected": -3.8720645904541016, "loss": 0.7092, "nll_loss": 0.6718518733978271, "rewards/accuracies": 0.875, "rewards/chosen": -0.24158062040805817, "rewards/margins": 0.14562584459781647, "rewards/rejected": -0.38720643520355225, "step": 4270 }, { "epoch": 11.693360711841205, "grad_norm": 6.070236682891846, "learning_rate": 4.1506849315068495e-07, "log_odds_chosen": 1.7789578437805176, "log_odds_ratio": -0.28866511583328247, "logits/chosen": 0.7556224465370178, "logits/rejected": 0.7312700152397156, "logps/chosen": -2.412217617034912, "logps/rejected": -4.086032390594482, "loss": 0.6294, "nll_loss": 0.6005160808563232, "rewards/accuracies": 1.0, "rewards/chosen": -0.24122178554534912, "rewards/margins": 0.16738145053386688, "rewards/rejected": -0.4086032509803772, "step": 4271 }, { "epoch": 11.696098562628336, "grad_norm": 6.234546184539795, "learning_rate": 4.1493150684931505e-07, "log_odds_chosen": 2.3887507915496826, "log_odds_ratio": -0.3511902689933777, "logits/chosen": 0.7784294486045837, "logits/rejected": 0.8253402709960938, "logps/chosen": -2.388875722885132, "logps/rejected": -4.6030683517456055, "loss": 0.7244, "nll_loss": 0.6892834901809692, "rewards/accuracies": 0.875, "rewards/chosen": -0.23888757824897766, "rewards/margins": 0.22141927480697632, "rewards/rejected": -0.46030688285827637, "step": 4272 }, { "epoch": 11.698836413415469, "grad_norm": 4.985469818115234, "learning_rate": 4.1479452054794515e-07, "log_odds_chosen": 1.349020004272461, "log_odds_ratio": -0.45862841606140137, "logits/chosen": 0.845108151435852, "logits/rejected": 0.8889731168746948, "logps/chosen": -2.226330041885376, "logps/rejected": -3.4671971797943115, "loss": 0.6557, "nll_loss": 0.6098171472549438, "rewards/accuracies": 0.75, "rewards/chosen": -0.2226329892873764, "rewards/margins": 0.12408672273159027, "rewards/rejected": -0.3467197120189667, "step": 4273 }, { "epoch": 11.7015742642026, "grad_norm": 5.222201347351074, "learning_rate": 4.1465753424657535e-07, "log_odds_chosen": 2.476280450820923, "log_odds_ratio": -0.1915644109249115, "logits/chosen": 0.9871140718460083, "logits/rejected": 1.0256905555725098, "logps/chosen": -2.641988754272461, "logps/rejected": -5.021274566650391, "loss": 0.8062, "nll_loss": 0.7870187759399414, "rewards/accuracies": 1.0, "rewards/chosen": -0.2641988694667816, "rewards/margins": 0.23792864382266998, "rewards/rejected": -0.5021275281906128, "step": 4274 }, { "epoch": 11.704312114989733, "grad_norm": 5.359273910522461, "learning_rate": 4.1452054794520545e-07, "log_odds_chosen": 3.3463897705078125, "log_odds_ratio": -0.12633521854877472, "logits/chosen": 0.9237980246543884, "logits/rejected": 0.9654121994972229, "logps/chosen": -2.1785969734191895, "logps/rejected": -5.3917670249938965, "loss": 0.634, "nll_loss": 0.6213693618774414, "rewards/accuracies": 1.0, "rewards/chosen": -0.21785970032215118, "rewards/margins": 0.32131707668304443, "rewards/rejected": -0.5391767621040344, "step": 4275 }, { "epoch": 11.707049965776864, "grad_norm": 5.789260387420654, "learning_rate": 4.143835616438356e-07, "log_odds_chosen": 0.4868142008781433, "log_odds_ratio": -0.6035975217819214, "logits/chosen": 0.7134802341461182, "logits/rejected": 0.7662910223007202, "logps/chosen": -2.3987345695495605, "logps/rejected": -2.832181453704834, "loss": 0.7666, "nll_loss": 0.7062470316886902, "rewards/accuracies": 0.625, "rewards/chosen": -0.23987345397472382, "rewards/margins": 0.04334469139575958, "rewards/rejected": -0.2832181453704834, "step": 4276 }, { "epoch": 11.709787816563997, "grad_norm": 4.592422962188721, "learning_rate": 4.142465753424657e-07, "log_odds_chosen": 3.2541513442993164, "log_odds_ratio": -0.26417022943496704, "logits/chosen": 0.876595675945282, "logits/rejected": 0.9046038389205933, "logps/chosen": -2.5603535175323486, "logps/rejected": -5.697630882263184, "loss": 0.728, "nll_loss": 0.7015982866287231, "rewards/accuracies": 0.875, "rewards/chosen": -0.25603538751602173, "rewards/margins": 0.3137277364730835, "rewards/rejected": -0.5697630643844604, "step": 4277 }, { "epoch": 11.71252566735113, "grad_norm": 6.826895713806152, "learning_rate": 4.141095890410959e-07, "log_odds_chosen": 1.0625473260879517, "log_odds_ratio": -0.6985766887664795, "logits/chosen": 0.7961799502372742, "logits/rejected": 0.8886834979057312, "logps/chosen": -2.706463575363159, "logps/rejected": -3.7134227752685547, "loss": 0.8175, "nll_loss": 0.7476018667221069, "rewards/accuracies": 0.875, "rewards/chosen": -0.2706463634967804, "rewards/margins": 0.10069595277309418, "rewards/rejected": -0.3713423013687134, "step": 4278 }, { "epoch": 11.715263518138261, "grad_norm": 4.964737892150879, "learning_rate": 4.13972602739726e-07, "log_odds_chosen": 1.2950241565704346, "log_odds_ratio": -0.3218872547149658, "logits/chosen": 0.9438038468360901, "logits/rejected": 0.980618953704834, "logps/chosen": -1.929703950881958, "logps/rejected": -3.084287166595459, "loss": 0.5889, "nll_loss": 0.5566742420196533, "rewards/accuracies": 1.0, "rewards/chosen": -0.1929703950881958, "rewards/margins": 0.11545833945274353, "rewards/rejected": -0.30842873454093933, "step": 4279 }, { "epoch": 11.718001368925394, "grad_norm": 5.749798774719238, "learning_rate": 4.138356164383561e-07, "log_odds_chosen": 2.493412733078003, "log_odds_ratio": -0.3029291033744812, "logits/chosen": 0.9969367384910583, "logits/rejected": 1.0129815340042114, "logps/chosen": -2.601144790649414, "logps/rejected": -4.997261047363281, "loss": 0.6533, "nll_loss": 0.6230485439300537, "rewards/accuracies": 0.875, "rewards/chosen": -0.26011449098587036, "rewards/margins": 0.23961162567138672, "rewards/rejected": -0.4997261166572571, "step": 4280 }, { "epoch": 11.720739219712526, "grad_norm": 5.829331874847412, "learning_rate": 4.136986301369863e-07, "log_odds_chosen": 1.7591056823730469, "log_odds_ratio": -0.4756665825843811, "logits/chosen": 0.8381505012512207, "logits/rejected": 0.8956344723701477, "logps/chosen": -2.8862109184265137, "logps/rejected": -4.576833248138428, "loss": 0.7722, "nll_loss": 0.7246133089065552, "rewards/accuracies": 0.75, "rewards/chosen": -0.28862112760543823, "rewards/margins": 0.16906222701072693, "rewards/rejected": -0.4576833248138428, "step": 4281 }, { "epoch": 11.723477070499658, "grad_norm": 6.816400527954102, "learning_rate": 4.135616438356164e-07, "log_odds_chosen": 1.8175263404846191, "log_odds_ratio": -0.2964935302734375, "logits/chosen": 0.7200751304626465, "logits/rejected": 0.7776536345481873, "logps/chosen": -2.622539520263672, "logps/rejected": -4.333071708679199, "loss": 0.6195, "nll_loss": 0.5898609757423401, "rewards/accuracies": 0.875, "rewards/chosen": -0.2622539699077606, "rewards/margins": 0.1710532307624817, "rewards/rejected": -0.4333071708679199, "step": 4282 }, { "epoch": 11.72621492128679, "grad_norm": 6.0264787673950195, "learning_rate": 4.1342465753424656e-07, "log_odds_chosen": 2.1953482627868652, "log_odds_ratio": -0.318254292011261, "logits/chosen": 1.0354992151260376, "logits/rejected": 1.12581205368042, "logps/chosen": -2.0408952236175537, "logps/rejected": -4.097992897033691, "loss": 0.6309, "nll_loss": 0.599055290222168, "rewards/accuracies": 0.875, "rewards/chosen": -0.20408952236175537, "rewards/margins": 0.2057097852230072, "rewards/rejected": -0.4097992777824402, "step": 4283 }, { "epoch": 11.728952772073923, "grad_norm": 5.337868690490723, "learning_rate": 4.1328767123287666e-07, "log_odds_chosen": 2.0298917293548584, "log_odds_ratio": -0.2824278771877289, "logits/chosen": 0.8271325826644897, "logits/rejected": 0.8214505910873413, "logps/chosen": -2.190549612045288, "logps/rejected": -4.141510486602783, "loss": 0.7408, "nll_loss": 0.7125706076622009, "rewards/accuracies": 0.875, "rewards/chosen": -0.2190549671649933, "rewards/margins": 0.19509607553482056, "rewards/rejected": -0.41415104269981384, "step": 4284 }, { "epoch": 11.731690622861054, "grad_norm": 4.351956844329834, "learning_rate": 4.1315068493150686e-07, "log_odds_chosen": 2.1751909255981445, "log_odds_ratio": -0.18005990982055664, "logits/chosen": 0.6698337197303772, "logits/rejected": 0.6657091379165649, "logps/chosen": -1.9652127027511597, "logps/rejected": -3.986482620239258, "loss": 0.6913, "nll_loss": 0.6733291745185852, "rewards/accuracies": 1.0, "rewards/chosen": -0.19652128219604492, "rewards/margins": 0.20212697982788086, "rewards/rejected": -0.3986482620239258, "step": 4285 }, { "epoch": 11.734428473648187, "grad_norm": 9.248007774353027, "learning_rate": 4.1301369863013696e-07, "log_odds_chosen": 1.166212797164917, "log_odds_ratio": -0.5113205313682556, "logits/chosen": 0.9361304044723511, "logits/rejected": 0.8602182269096375, "logps/chosen": -2.580538511276245, "logps/rejected": -3.652989625930786, "loss": 0.7626, "nll_loss": 0.7115023732185364, "rewards/accuracies": 0.75, "rewards/chosen": -0.25805386900901794, "rewards/margins": 0.10724511742591858, "rewards/rejected": -0.3652989864349365, "step": 4286 }, { "epoch": 11.737166324435318, "grad_norm": 5.196310043334961, "learning_rate": 4.1287671232876706e-07, "log_odds_chosen": 2.1969351768493652, "log_odds_ratio": -0.3844308853149414, "logits/chosen": 0.9792222380638123, "logits/rejected": 1.0509611368179321, "logps/chosen": -3.0951027870178223, "logps/rejected": -5.1805572509765625, "loss": 0.7532, "nll_loss": 0.7147195935249329, "rewards/accuracies": 0.875, "rewards/chosen": -0.3095102906227112, "rewards/margins": 0.20854541659355164, "rewards/rejected": -0.5180556774139404, "step": 4287 }, { "epoch": 11.739904175222451, "grad_norm": 5.76945161819458, "learning_rate": 4.1273972602739727e-07, "log_odds_chosen": 1.1277292966842651, "log_odds_ratio": -0.36042582988739014, "logits/chosen": 0.8797513842582703, "logits/rejected": 0.8578804731369019, "logps/chosen": -2.0762438774108887, "logps/rejected": -3.112591505050659, "loss": 0.6869, "nll_loss": 0.6508244872093201, "rewards/accuracies": 0.875, "rewards/chosen": -0.2076243907213211, "rewards/margins": 0.10363475978374481, "rewards/rejected": -0.3112591803073883, "step": 4288 }, { "epoch": 11.742642026009582, "grad_norm": 5.292492866516113, "learning_rate": 4.1260273972602737e-07, "log_odds_chosen": 1.8590655326843262, "log_odds_ratio": -0.3458990752696991, "logits/chosen": 1.0793423652648926, "logits/rejected": 0.9767782688140869, "logps/chosen": -2.175790309906006, "logps/rejected": -3.9341437816619873, "loss": 0.853, "nll_loss": 0.8184584975242615, "rewards/accuracies": 0.875, "rewards/chosen": -0.21757903695106506, "rewards/margins": 0.17583535611629486, "rewards/rejected": -0.3934144079685211, "step": 4289 }, { "epoch": 11.745379876796715, "grad_norm": 5.962698459625244, "learning_rate": 4.124657534246575e-07, "log_odds_chosen": 2.5441596508026123, "log_odds_ratio": -0.15807051956653595, "logits/chosen": 0.6187857985496521, "logits/rejected": 0.5480425953865051, "logps/chosen": -1.6744476556777954, "logps/rejected": -3.9643940925598145, "loss": 0.6656, "nll_loss": 0.6497873067855835, "rewards/accuracies": 1.0, "rewards/chosen": -0.16744476556777954, "rewards/margins": 0.22899466753005981, "rewards/rejected": -0.39643943309783936, "step": 4290 }, { "epoch": 11.748117727583846, "grad_norm": 5.8247175216674805, "learning_rate": 4.1232876712328767e-07, "log_odds_chosen": 1.1244370937347412, "log_odds_ratio": -0.43552833795547485, "logits/chosen": 0.9623450636863708, "logits/rejected": 1.012857437133789, "logps/chosen": -2.7880241870880127, "logps/rejected": -3.865186929702759, "loss": 0.744, "nll_loss": 0.700438916683197, "rewards/accuracies": 0.75, "rewards/chosen": -0.27880245447158813, "rewards/margins": 0.10771626234054565, "rewards/rejected": -0.3865186870098114, "step": 4291 }, { "epoch": 11.75085557837098, "grad_norm": 5.102170467376709, "learning_rate": 4.121917808219178e-07, "log_odds_chosen": 1.4385228157043457, "log_odds_ratio": -0.3601570725440979, "logits/chosen": 0.8749427795410156, "logits/rejected": 0.9113540649414062, "logps/chosen": -2.4505157470703125, "logps/rejected": -3.822681188583374, "loss": 0.7472, "nll_loss": 0.7112060189247131, "rewards/accuracies": 0.875, "rewards/chosen": -0.24505159258842468, "rewards/margins": 0.13721655309200287, "rewards/rejected": -0.38226813077926636, "step": 4292 }, { "epoch": 11.75359342915811, "grad_norm": 4.220002174377441, "learning_rate": 4.120547945205479e-07, "log_odds_chosen": 1.7603225708007812, "log_odds_ratio": -0.22654220461845398, "logits/chosen": 0.8495275974273682, "logits/rejected": 0.8055778741836548, "logps/chosen": -2.0247855186462402, "logps/rejected": -3.636044502258301, "loss": 0.658, "nll_loss": 0.6353765726089478, "rewards/accuracies": 0.875, "rewards/chosen": -0.2024785578250885, "rewards/margins": 0.16112592816352844, "rewards/rejected": -0.36360445618629456, "step": 4293 }, { "epoch": 11.756331279945243, "grad_norm": 6.755341053009033, "learning_rate": 4.11917808219178e-07, "log_odds_chosen": 2.2506167888641357, "log_odds_ratio": -0.2448999285697937, "logits/chosen": 0.5512697100639343, "logits/rejected": 0.48585793375968933, "logps/chosen": -2.1443426609039307, "logps/rejected": -4.289691925048828, "loss": 0.6374, "nll_loss": 0.6128779053688049, "rewards/accuracies": 1.0, "rewards/chosen": -0.21443428099155426, "rewards/margins": 0.2145349383354187, "rewards/rejected": -0.4289691746234894, "step": 4294 }, { "epoch": 11.759069130732374, "grad_norm": 6.418888568878174, "learning_rate": 4.1178082191780823e-07, "log_odds_chosen": 1.8410425186157227, "log_odds_ratio": -0.24692285060882568, "logits/chosen": 1.0384266376495361, "logits/rejected": 1.0552903413772583, "logps/chosen": -2.2375240325927734, "logps/rejected": -3.9711594581604004, "loss": 0.6133, "nll_loss": 0.5886214971542358, "rewards/accuracies": 0.875, "rewards/chosen": -0.22375242412090302, "rewards/margins": 0.17336353659629822, "rewards/rejected": -0.39711594581604004, "step": 4295 }, { "epoch": 11.761806981519507, "grad_norm": 5.3641157150268555, "learning_rate": 4.116438356164383e-07, "log_odds_chosen": 1.046022653579712, "log_odds_ratio": -0.33224064111709595, "logits/chosen": 0.7895247936248779, "logits/rejected": 0.7887217402458191, "logps/chosen": -1.606940507888794, "logps/rejected": -2.480052947998047, "loss": 0.629, "nll_loss": 0.5957275629043579, "rewards/accuracies": 1.0, "rewards/chosen": -0.16069406270980835, "rewards/margins": 0.08731123805046082, "rewards/rejected": -0.24800530076026917, "step": 4296 }, { "epoch": 11.764544832306639, "grad_norm": 6.059620380401611, "learning_rate": 4.115068493150685e-07, "log_odds_chosen": 0.524093508720398, "log_odds_ratio": -0.6634460687637329, "logits/chosen": 0.9778817892074585, "logits/rejected": 1.0248314142227173, "logps/chosen": -3.0217196941375732, "logps/rejected": -3.510239601135254, "loss": 0.6939, "nll_loss": 0.6275544762611389, "rewards/accuracies": 0.75, "rewards/chosen": -0.3021719753742218, "rewards/margins": 0.048851996660232544, "rewards/rejected": -0.35102397203445435, "step": 4297 }, { "epoch": 11.767282683093772, "grad_norm": 5.482853889465332, "learning_rate": 4.1136986301369863e-07, "log_odds_chosen": 2.7519679069519043, "log_odds_ratio": -0.12979789078235626, "logits/chosen": 1.129960060119629, "logits/rejected": 1.2200355529785156, "logps/chosen": -2.6402320861816406, "logps/rejected": -5.310859680175781, "loss": 0.6032, "nll_loss": 0.5902233123779297, "rewards/accuracies": 1.0, "rewards/chosen": -0.26402318477630615, "rewards/margins": 0.267062783241272, "rewards/rejected": -0.5310859680175781, "step": 4298 }, { "epoch": 11.770020533880903, "grad_norm": 6.518218517303467, "learning_rate": 4.112328767123288e-07, "log_odds_chosen": 1.1725280284881592, "log_odds_ratio": -0.6623599529266357, "logits/chosen": 0.6027260422706604, "logits/rejected": 0.5551006197929382, "logps/chosen": -2.1616430282592773, "logps/rejected": -3.2955052852630615, "loss": 0.6837, "nll_loss": 0.6174862384796143, "rewards/accuracies": 0.625, "rewards/chosen": -0.21616432070732117, "rewards/margins": 0.11338622868061066, "rewards/rejected": -0.32955053448677063, "step": 4299 }, { "epoch": 11.772758384668036, "grad_norm": 4.709168434143066, "learning_rate": 4.110958904109589e-07, "log_odds_chosen": 2.0165834426879883, "log_odds_ratio": -0.23034951090812683, "logits/chosen": 0.9470409154891968, "logits/rejected": 0.9815958738327026, "logps/chosen": -1.9438104629516602, "logps/rejected": -3.8198654651641846, "loss": 0.6508, "nll_loss": 0.627771258354187, "rewards/accuracies": 0.875, "rewards/chosen": -0.19438105821609497, "rewards/margins": 0.18760550022125244, "rewards/rejected": -0.3819865584373474, "step": 4300 }, { "epoch": 11.775496235455167, "grad_norm": 5.324069499969482, "learning_rate": 4.10958904109589e-07, "log_odds_chosen": 3.6808533668518066, "log_odds_ratio": -0.1971205770969391, "logits/chosen": 0.8933781385421753, "logits/rejected": 0.9291890859603882, "logps/chosen": -2.5276424884796143, "logps/rejected": -6.145399570465088, "loss": 0.7307, "nll_loss": 0.7109910249710083, "rewards/accuracies": 1.0, "rewards/chosen": -0.2527642250061035, "rewards/margins": 0.3617756962776184, "rewards/rejected": -0.6145399808883667, "step": 4301 }, { "epoch": 11.7782340862423, "grad_norm": 5.815959453582764, "learning_rate": 4.108219178082192e-07, "log_odds_chosen": 1.8702855110168457, "log_odds_ratio": -0.2775866985321045, "logits/chosen": 0.8689651489257812, "logits/rejected": 0.9671831130981445, "logps/chosen": -2.3309829235076904, "logps/rejected": -4.113238334655762, "loss": 0.7523, "nll_loss": 0.724575936794281, "rewards/accuracies": 0.875, "rewards/chosen": -0.23309829831123352, "rewards/margins": 0.17822551727294922, "rewards/rejected": -0.41132381558418274, "step": 4302 }, { "epoch": 11.780971937029431, "grad_norm": 5.609950065612793, "learning_rate": 4.106849315068493e-07, "log_odds_chosen": 2.137460231781006, "log_odds_ratio": -0.28361010551452637, "logits/chosen": 1.0183910131454468, "logits/rejected": 0.9223380088806152, "logps/chosen": -2.6044087409973145, "logps/rejected": -4.627860069274902, "loss": 0.7694, "nll_loss": 0.7409902811050415, "rewards/accuracies": 0.875, "rewards/chosen": -0.260440856218338, "rewards/margins": 0.20234516263008118, "rewards/rejected": -0.4627860188484192, "step": 4303 }, { "epoch": 11.783709787816564, "grad_norm": 5.052125453948975, "learning_rate": 4.1054794520547944e-07, "log_odds_chosen": 1.872462511062622, "log_odds_ratio": -0.2301788330078125, "logits/chosen": 0.8266618251800537, "logits/rejected": 0.7663733959197998, "logps/chosen": -1.791021466255188, "logps/rejected": -3.5198745727539062, "loss": 0.6717, "nll_loss": 0.6486439108848572, "rewards/accuracies": 0.875, "rewards/chosen": -0.17910215258598328, "rewards/margins": 0.17288529872894287, "rewards/rejected": -0.35198748111724854, "step": 4304 }, { "epoch": 11.786447638603697, "grad_norm": 4.957860946655273, "learning_rate": 4.104109589041096e-07, "log_odds_chosen": 1.8530468940734863, "log_odds_ratio": -0.2695024609565735, "logits/chosen": 0.5994522571563721, "logits/rejected": 0.6316210627555847, "logps/chosen": -2.5165274143218994, "logps/rejected": -4.282299041748047, "loss": 0.6948, "nll_loss": 0.6678464412689209, "rewards/accuracies": 0.875, "rewards/chosen": -0.2516527473926544, "rewards/margins": 0.17657716572284698, "rewards/rejected": -0.4282299280166626, "step": 4305 }, { "epoch": 11.789185489390828, "grad_norm": 5.178873062133789, "learning_rate": 4.1027397260273974e-07, "log_odds_chosen": 2.0915255546569824, "log_odds_ratio": -0.29296231269836426, "logits/chosen": 0.9115806818008423, "logits/rejected": 0.995466947555542, "logps/chosen": -3.2144877910614014, "logps/rejected": -5.174562454223633, "loss": 0.852, "nll_loss": 0.8227409720420837, "rewards/accuracies": 0.875, "rewards/chosen": -0.32144877314567566, "rewards/margins": 0.19600746035575867, "rewards/rejected": -0.5174562335014343, "step": 4306 }, { "epoch": 11.791923340177961, "grad_norm": 5.786200046539307, "learning_rate": 4.1013698630136984e-07, "log_odds_chosen": 3.2378458976745605, "log_odds_ratio": -0.1076044887304306, "logits/chosen": 0.8488812446594238, "logits/rejected": 0.8319575190544128, "logps/chosen": -2.1260693073272705, "logps/rejected": -5.206620693206787, "loss": 0.6284, "nll_loss": 0.61768639087677, "rewards/accuracies": 1.0, "rewards/chosen": -0.21260693669319153, "rewards/margins": 0.30805516242980957, "rewards/rejected": -0.5206620693206787, "step": 4307 }, { "epoch": 11.794661190965092, "grad_norm": 5.155423641204834, "learning_rate": 4.0999999999999994e-07, "log_odds_chosen": 3.067002534866333, "log_odds_ratio": -0.18135863542556763, "logits/chosen": 0.8789446353912354, "logits/rejected": 0.922855794429779, "logps/chosen": -2.264050006866455, "logps/rejected": -5.240278720855713, "loss": 0.6678, "nll_loss": 0.6497134566307068, "rewards/accuracies": 0.875, "rewards/chosen": -0.22640500962734222, "rewards/margins": 0.29762282967567444, "rewards/rejected": -0.5240278840065002, "step": 4308 }, { "epoch": 11.797399041752225, "grad_norm": 4.937459468841553, "learning_rate": 4.0986301369863014e-07, "log_odds_chosen": 1.380723237991333, "log_odds_ratio": -0.2859131097793579, "logits/chosen": 0.8542768359184265, "logits/rejected": 0.8769971132278442, "logps/chosen": -2.210137367248535, "logps/rejected": -3.47611927986145, "loss": 0.6455, "nll_loss": 0.6169572472572327, "rewards/accuracies": 1.0, "rewards/chosen": -0.22101375460624695, "rewards/margins": 0.12659817934036255, "rewards/rejected": -0.3476119637489319, "step": 4309 }, { "epoch": 11.800136892539356, "grad_norm": 4.347824573516846, "learning_rate": 4.0972602739726024e-07, "log_odds_chosen": 2.97457218170166, "log_odds_ratio": -0.12387512624263763, "logits/chosen": 0.8506565690040588, "logits/rejected": 0.8449947834014893, "logps/chosen": -1.5269023180007935, "logps/rejected": -4.218094825744629, "loss": 0.6209, "nll_loss": 0.6084756851196289, "rewards/accuracies": 1.0, "rewards/chosen": -0.15269023180007935, "rewards/margins": 0.2691192328929901, "rewards/rejected": -0.42180946469306946, "step": 4310 }, { "epoch": 11.80287474332649, "grad_norm": 5.241888523101807, "learning_rate": 4.095890410958904e-07, "log_odds_chosen": 2.4166181087493896, "log_odds_ratio": -0.26718834042549133, "logits/chosen": 1.0302786827087402, "logits/rejected": 1.0125739574432373, "logps/chosen": -2.27351450920105, "logps/rejected": -4.6065521240234375, "loss": 0.7994, "nll_loss": 0.772716760635376, "rewards/accuracies": 0.875, "rewards/chosen": -0.22735142707824707, "rewards/margins": 0.23330381512641907, "rewards/rejected": -0.46065524220466614, "step": 4311 }, { "epoch": 11.80561259411362, "grad_norm": 6.215758323669434, "learning_rate": 4.0945205479452055e-07, "log_odds_chosen": 1.0318597555160522, "log_odds_ratio": -0.6076791286468506, "logits/chosen": 0.8646131157875061, "logits/rejected": 0.8851323127746582, "logps/chosen": -2.9063336849212646, "logps/rejected": -3.8864688873291016, "loss": 0.7096, "nll_loss": 0.6488431096076965, "rewards/accuracies": 0.75, "rewards/chosen": -0.2906333804130554, "rewards/margins": 0.09801353514194489, "rewards/rejected": -0.3886469006538391, "step": 4312 }, { "epoch": 11.808350444900753, "grad_norm": 4.647152423858643, "learning_rate": 4.0931506849315065e-07, "log_odds_chosen": 2.531552314758301, "log_odds_ratio": -0.20348724722862244, "logits/chosen": 0.9982479810714722, "logits/rejected": 1.0204544067382812, "logps/chosen": -2.4539618492126465, "logps/rejected": -4.9023261070251465, "loss": 0.7235, "nll_loss": 0.7031650543212891, "rewards/accuracies": 1.0, "rewards/chosen": -0.24539616703987122, "rewards/margins": 0.2448364496231079, "rewards/rejected": -0.4902326166629791, "step": 4313 }, { "epoch": 11.811088295687885, "grad_norm": 5.43098783493042, "learning_rate": 4.091780821917808e-07, "log_odds_chosen": 1.441542148590088, "log_odds_ratio": -0.4500395357608795, "logits/chosen": 0.8401788473129272, "logits/rejected": 0.8085956573486328, "logps/chosen": -1.9973334074020386, "logps/rejected": -3.369260311126709, "loss": 0.6798, "nll_loss": 0.634765625, "rewards/accuracies": 0.75, "rewards/chosen": -0.19973334670066833, "rewards/margins": 0.1371927112340927, "rewards/rejected": -0.33692604303359985, "step": 4314 }, { "epoch": 11.813826146475018, "grad_norm": 5.0614752769470215, "learning_rate": 4.090410958904109e-07, "log_odds_chosen": 1.881359577178955, "log_odds_ratio": -0.2742674946784973, "logits/chosen": 0.7839255332946777, "logits/rejected": 0.7954570055007935, "logps/chosen": -2.088416814804077, "logps/rejected": -3.8555564880371094, "loss": 0.7537, "nll_loss": 0.7262833118438721, "rewards/accuracies": 1.0, "rewards/chosen": -0.2088416963815689, "rewards/margins": 0.1767139732837677, "rewards/rejected": -0.385555624961853, "step": 4315 }, { "epoch": 11.816563997262149, "grad_norm": 4.564892768859863, "learning_rate": 4.089041095890411e-07, "log_odds_chosen": 1.267539381980896, "log_odds_ratio": -0.3121749758720398, "logits/chosen": 0.6954706311225891, "logits/rejected": 0.7545578479766846, "logps/chosen": -2.3788750171661377, "logps/rejected": -3.5737814903259277, "loss": 0.6707, "nll_loss": 0.6394933462142944, "rewards/accuracies": 0.875, "rewards/chosen": -0.23788753151893616, "rewards/margins": 0.1194906234741211, "rewards/rejected": -0.35737815499305725, "step": 4316 }, { "epoch": 11.819301848049282, "grad_norm": 5.436697006225586, "learning_rate": 4.087671232876712e-07, "log_odds_chosen": 2.06174898147583, "log_odds_ratio": -0.19271452724933624, "logits/chosen": 0.7068495750427246, "logits/rejected": 0.7608739733695984, "logps/chosen": -2.0955095291137695, "logps/rejected": -4.02386474609375, "loss": 0.6326, "nll_loss": 0.6133026480674744, "rewards/accuracies": 1.0, "rewards/chosen": -0.20955093204975128, "rewards/margins": 0.19283553957939148, "rewards/rejected": -0.40238648653030396, "step": 4317 }, { "epoch": 11.822039698836413, "grad_norm": 4.393119812011719, "learning_rate": 4.0863013698630135e-07, "log_odds_chosen": 2.1207027435302734, "log_odds_ratio": -0.2177387773990631, "logits/chosen": 0.5203827023506165, "logits/rejected": 0.4915986657142639, "logps/chosen": -1.7522883415222168, "logps/rejected": -3.718071699142456, "loss": 0.6534, "nll_loss": 0.6316387057304382, "rewards/accuracies": 1.0, "rewards/chosen": -0.17522884905338287, "rewards/margins": 0.19657830893993378, "rewards/rejected": -0.37180715799331665, "step": 4318 }, { "epoch": 11.824777549623546, "grad_norm": 5.394079685211182, "learning_rate": 4.084931506849315e-07, "log_odds_chosen": 2.285649061203003, "log_odds_ratio": -0.20817425847053528, "logits/chosen": 0.7718140482902527, "logits/rejected": 0.8698518872261047, "logps/chosen": -1.8084607124328613, "logps/rejected": -3.941887617111206, "loss": 0.7033, "nll_loss": 0.6824793815612793, "rewards/accuracies": 1.0, "rewards/chosen": -0.18084608018398285, "rewards/margins": 0.21334269642829895, "rewards/rejected": -0.394188791513443, "step": 4319 }, { "epoch": 11.827515400410677, "grad_norm": 5.1043620109558105, "learning_rate": 4.083561643835616e-07, "log_odds_chosen": 1.2893643379211426, "log_odds_ratio": -0.3077804148197174, "logits/chosen": 0.8043242692947388, "logits/rejected": 0.7955367565155029, "logps/chosen": -1.7604401111602783, "logps/rejected": -2.9005470275878906, "loss": 0.7218, "nll_loss": 0.6910645961761475, "rewards/accuracies": 1.0, "rewards/chosen": -0.1760440170764923, "rewards/margins": 0.11401067674160004, "rewards/rejected": -0.29005467891693115, "step": 4320 }, { "epoch": 11.83025325119781, "grad_norm": 5.856202602386475, "learning_rate": 4.0821917808219176e-07, "log_odds_chosen": 0.9662094116210938, "log_odds_ratio": -0.47961920499801636, "logits/chosen": 0.9865002632141113, "logits/rejected": 1.0121326446533203, "logps/chosen": -2.1932156085968018, "logps/rejected": -3.0916054248809814, "loss": 0.66, "nll_loss": 0.6120362281799316, "rewards/accuracies": 0.875, "rewards/chosen": -0.21932154893875122, "rewards/margins": 0.08983898162841797, "rewards/rejected": -0.3091605305671692, "step": 4321 }, { "epoch": 11.832991101984941, "grad_norm": 6.009917259216309, "learning_rate": 4.0808219178082186e-07, "log_odds_chosen": 1.2137057781219482, "log_odds_ratio": -0.49360036849975586, "logits/chosen": 0.9822834730148315, "logits/rejected": 0.9301493763923645, "logps/chosen": -2.185666084289551, "logps/rejected": -3.3465449810028076, "loss": 0.6837, "nll_loss": 0.6343165636062622, "rewards/accuracies": 0.75, "rewards/chosen": -0.21856659650802612, "rewards/margins": 0.1160878911614418, "rewards/rejected": -0.33465448021888733, "step": 4322 }, { "epoch": 11.835728952772074, "grad_norm": 4.702744960784912, "learning_rate": 4.0794520547945206e-07, "log_odds_chosen": 2.604271173477173, "log_odds_ratio": -0.3125818371772766, "logits/chosen": 0.957645058631897, "logits/rejected": 0.9940485954284668, "logps/chosen": -2.5927858352661133, "logps/rejected": -5.141919136047363, "loss": 0.7369, "nll_loss": 0.7056888341903687, "rewards/accuracies": 0.75, "rewards/chosen": -0.2592785954475403, "rewards/margins": 0.2549133598804474, "rewards/rejected": -0.5141919255256653, "step": 4323 }, { "epoch": 11.838466803559205, "grad_norm": 4.835788726806641, "learning_rate": 4.0780821917808216e-07, "log_odds_chosen": 1.696157693862915, "log_odds_ratio": -0.33158764243125916, "logits/chosen": 0.7179104089736938, "logits/rejected": 0.7808186411857605, "logps/chosen": -1.9213372468948364, "logps/rejected": -3.5290517807006836, "loss": 0.6456, "nll_loss": 0.6124531030654907, "rewards/accuracies": 0.625, "rewards/chosen": -0.19213373959064484, "rewards/margins": 0.1607714742422104, "rewards/rejected": -0.35290518403053284, "step": 4324 }, { "epoch": 11.841204654346338, "grad_norm": 5.843076705932617, "learning_rate": 4.076712328767123e-07, "log_odds_chosen": 2.657862424850464, "log_odds_ratio": -0.19284588098526, "logits/chosen": 0.600641131401062, "logits/rejected": 0.646665632724762, "logps/chosen": -2.0744783878326416, "logps/rejected": -4.596051216125488, "loss": 0.6904, "nll_loss": 0.6710705161094666, "rewards/accuracies": 1.0, "rewards/chosen": -0.2074478566646576, "rewards/margins": 0.2521572709083557, "rewards/rejected": -0.4596051275730133, "step": 4325 }, { "epoch": 11.84394250513347, "grad_norm": 4.621960163116455, "learning_rate": 4.0753424657534246e-07, "log_odds_chosen": 2.258324146270752, "log_odds_ratio": -0.1967260092496872, "logits/chosen": 0.8942742347717285, "logits/rejected": 0.8799612522125244, "logps/chosen": -2.0517690181732178, "logps/rejected": -4.0955119132995605, "loss": 0.6641, "nll_loss": 0.6444239020347595, "rewards/accuracies": 1.0, "rewards/chosen": -0.20517690479755402, "rewards/margins": 0.2043742835521698, "rewards/rejected": -0.4095511734485626, "step": 4326 }, { "epoch": 11.846680355920602, "grad_norm": 5.313949108123779, "learning_rate": 4.0739726027397256e-07, "log_odds_chosen": 2.1497509479522705, "log_odds_ratio": -0.19476553797721863, "logits/chosen": 0.7146298289299011, "logits/rejected": 0.7722664475440979, "logps/chosen": -2.471445322036743, "logps/rejected": -4.434723377227783, "loss": 0.7195, "nll_loss": 0.7000527381896973, "rewards/accuracies": 1.0, "rewards/chosen": -0.24714453518390656, "rewards/margins": 0.196327805519104, "rewards/rejected": -0.44347232580184937, "step": 4327 }, { "epoch": 11.849418206707734, "grad_norm": 5.346414566040039, "learning_rate": 4.072602739726027e-07, "log_odds_chosen": 3.6041133403778076, "log_odds_ratio": -0.139530748128891, "logits/chosen": 0.8099130988121033, "logits/rejected": 0.8609482049942017, "logps/chosen": -3.176297187805176, "logps/rejected": -6.701024055480957, "loss": 0.8168, "nll_loss": 0.8028794527053833, "rewards/accuracies": 1.0, "rewards/chosen": -0.31762972474098206, "rewards/margins": 0.3524726629257202, "rewards/rejected": -0.6701024174690247, "step": 4328 }, { "epoch": 11.852156057494867, "grad_norm": 8.893091201782227, "learning_rate": 4.0712328767123287e-07, "log_odds_chosen": 0.652173638343811, "log_odds_ratio": -0.6273736953735352, "logits/chosen": 0.8243274092674255, "logits/rejected": 0.6927946209907532, "logps/chosen": -2.666027069091797, "logps/rejected": -3.2161970138549805, "loss": 0.8536, "nll_loss": 0.7908632159233093, "rewards/accuracies": 0.875, "rewards/chosen": -0.2666027247905731, "rewards/margins": 0.05501699820160866, "rewards/rejected": -0.3216197192668915, "step": 4329 }, { "epoch": 11.854893908281998, "grad_norm": 5.57350492477417, "learning_rate": 4.06986301369863e-07, "log_odds_chosen": 1.2567908763885498, "log_odds_ratio": -0.3409777879714966, "logits/chosen": 0.8587385416030884, "logits/rejected": 0.9296201467514038, "logps/chosen": -2.8294661045074463, "logps/rejected": -4.05012845993042, "loss": 0.689, "nll_loss": 0.6548913717269897, "rewards/accuracies": 0.875, "rewards/chosen": -0.2829466462135315, "rewards/margins": 0.1220662072300911, "rewards/rejected": -0.405012845993042, "step": 4330 }, { "epoch": 11.85763175906913, "grad_norm": 9.370177268981934, "learning_rate": 4.068493150684931e-07, "log_odds_chosen": 3.468327760696411, "log_odds_ratio": -0.294118732213974, "logits/chosen": 0.9249956011772156, "logits/rejected": 0.92006516456604, "logps/chosen": -2.4902944564819336, "logps/rejected": -5.756109714508057, "loss": 0.7227, "nll_loss": 0.6933267116546631, "rewards/accuracies": 0.875, "rewards/chosen": -0.2490294724702835, "rewards/margins": 0.32658150792121887, "rewards/rejected": -0.5756109952926636, "step": 4331 }, { "epoch": 11.860369609856264, "grad_norm": 4.909741401672363, "learning_rate": 4.0671232876712327e-07, "log_odds_chosen": 1.7378212213516235, "log_odds_ratio": -0.4497652053833008, "logits/chosen": 0.8741977214813232, "logits/rejected": 0.8834118843078613, "logps/chosen": -2.140840768814087, "logps/rejected": -3.7603328227996826, "loss": 0.7341, "nll_loss": 0.6891327500343323, "rewards/accuracies": 0.875, "rewards/chosen": -0.21408408880233765, "rewards/margins": 0.16194918751716614, "rewards/rejected": -0.3760332763195038, "step": 4332 }, { "epoch": 11.863107460643395, "grad_norm": 5.094040393829346, "learning_rate": 4.065753424657534e-07, "log_odds_chosen": 2.0048704147338867, "log_odds_ratio": -0.29676055908203125, "logits/chosen": 0.9452667236328125, "logits/rejected": 0.9983458518981934, "logps/chosen": -2.1201179027557373, "logps/rejected": -3.975131034851074, "loss": 0.6181, "nll_loss": 0.5884506106376648, "rewards/accuracies": 1.0, "rewards/chosen": -0.21201178431510925, "rewards/margins": 0.1855013072490692, "rewards/rejected": -0.39751309156417847, "step": 4333 }, { "epoch": 11.865845311430528, "grad_norm": 6.124685287475586, "learning_rate": 4.064383561643835e-07, "log_odds_chosen": 1.0550698041915894, "log_odds_ratio": -0.44927752017974854, "logits/chosen": 1.0914392471313477, "logits/rejected": 1.0876972675323486, "logps/chosen": -2.7881150245666504, "logps/rejected": -3.7828023433685303, "loss": 0.7121, "nll_loss": 0.6671603918075562, "rewards/accuracies": 0.875, "rewards/chosen": -0.2788114845752716, "rewards/margins": 0.09946875274181366, "rewards/rejected": -0.37828025221824646, "step": 4334 }, { "epoch": 11.868583162217659, "grad_norm": 5.540321350097656, "learning_rate": 4.063013698630137e-07, "log_odds_chosen": 2.8089540004730225, "log_odds_ratio": -0.25849804282188416, "logits/chosen": 0.8587983846664429, "logits/rejected": 0.9182940125465393, "logps/chosen": -2.486032485961914, "logps/rejected": -5.2448625564575195, "loss": 0.7208, "nll_loss": 0.6949614882469177, "rewards/accuracies": 0.875, "rewards/chosen": -0.24860325455665588, "rewards/margins": 0.2758830785751343, "rewards/rejected": -0.5244863629341125, "step": 4335 }, { "epoch": 11.871321013004792, "grad_norm": 5.378660202026367, "learning_rate": 4.0616438356164383e-07, "log_odds_chosen": 1.6637173891067505, "log_odds_ratio": -0.29490453004837036, "logits/chosen": 0.9769766330718994, "logits/rejected": 0.947925329208374, "logps/chosen": -1.635292649269104, "logps/rejected": -3.132481813430786, "loss": 0.6149, "nll_loss": 0.5854175090789795, "rewards/accuracies": 0.875, "rewards/chosen": -0.16352924704551697, "rewards/margins": 0.14971892535686493, "rewards/rejected": -0.3132481873035431, "step": 4336 }, { "epoch": 11.874058863791923, "grad_norm": 4.8215250968933105, "learning_rate": 4.06027397260274e-07, "log_odds_chosen": 2.105074167251587, "log_odds_ratio": -0.26051706075668335, "logits/chosen": 1.019815444946289, "logits/rejected": 1.0978211164474487, "logps/chosen": -2.4979333877563477, "logps/rejected": -4.5177178382873535, "loss": 0.6537, "nll_loss": 0.6276693940162659, "rewards/accuracies": 1.0, "rewards/chosen": -0.24979336559772491, "rewards/margins": 0.20197844505310059, "rewards/rejected": -0.4517717957496643, "step": 4337 }, { "epoch": 11.876796714579056, "grad_norm": 5.382550239562988, "learning_rate": 4.058904109589041e-07, "log_odds_chosen": 2.686680316925049, "log_odds_ratio": -0.15875029563903809, "logits/chosen": 0.988767683506012, "logits/rejected": 1.050142765045166, "logps/chosen": -2.0089564323425293, "logps/rejected": -4.557912349700928, "loss": 0.5607, "nll_loss": 0.5448640584945679, "rewards/accuracies": 1.0, "rewards/chosen": -0.20089565217494965, "rewards/margins": 0.2548955976963043, "rewards/rejected": -0.4557912349700928, "step": 4338 }, { "epoch": 11.879534565366187, "grad_norm": 4.721887111663818, "learning_rate": 4.0575342465753423e-07, "log_odds_chosen": 2.0019941329956055, "log_odds_ratio": -0.41285598278045654, "logits/chosen": 1.0930826663970947, "logits/rejected": 1.1062335968017578, "logps/chosen": -2.8060288429260254, "logps/rejected": -4.761457443237305, "loss": 0.6903, "nll_loss": 0.648994505405426, "rewards/accuracies": 0.75, "rewards/chosen": -0.28060290217399597, "rewards/margins": 0.19554290175437927, "rewards/rejected": -0.47614580392837524, "step": 4339 }, { "epoch": 11.88227241615332, "grad_norm": 6.383324146270752, "learning_rate": 4.056164383561644e-07, "log_odds_chosen": 1.6710205078125, "log_odds_ratio": -0.35259878635406494, "logits/chosen": 0.5915353894233704, "logits/rejected": 0.5762755870819092, "logps/chosen": -2.2608327865600586, "logps/rejected": -3.810945987701416, "loss": 0.7238, "nll_loss": 0.6885602474212646, "rewards/accuracies": 1.0, "rewards/chosen": -0.22608327865600586, "rewards/margins": 0.15501132607460022, "rewards/rejected": -0.3810946047306061, "step": 4340 }, { "epoch": 11.885010266940451, "grad_norm": 4.290897846221924, "learning_rate": 4.054794520547945e-07, "log_odds_chosen": 2.1942977905273438, "log_odds_ratio": -0.29057180881500244, "logits/chosen": 0.7819582223892212, "logits/rejected": 0.8239184617996216, "logps/chosen": -1.9956588745117188, "logps/rejected": -4.085955619812012, "loss": 0.6507, "nll_loss": 0.6216500997543335, "rewards/accuracies": 0.875, "rewards/chosen": -0.19956588745117188, "rewards/margins": 0.20902970433235168, "rewards/rejected": -0.40859556198120117, "step": 4341 }, { "epoch": 11.887748117727584, "grad_norm": 5.706943035125732, "learning_rate": 4.0534246575342463e-07, "log_odds_chosen": 1.7883042097091675, "log_odds_ratio": -0.3074987530708313, "logits/chosen": 0.7247106432914734, "logits/rejected": 0.6855956315994263, "logps/chosen": -1.7783472537994385, "logps/rejected": -3.4406232833862305, "loss": 0.6603, "nll_loss": 0.6295458078384399, "rewards/accuracies": 1.0, "rewards/chosen": -0.17783473432064056, "rewards/margins": 0.16622760891914368, "rewards/rejected": -0.34406235814094543, "step": 4342 }, { "epoch": 11.890485968514716, "grad_norm": 4.597379207611084, "learning_rate": 4.052054794520548e-07, "log_odds_chosen": 1.4020509719848633, "log_odds_ratio": -0.3543551564216614, "logits/chosen": 0.7032870054244995, "logits/rejected": 0.6941660642623901, "logps/chosen": -1.9153704643249512, "logps/rejected": -3.1699020862579346, "loss": 0.6709, "nll_loss": 0.6355093717575073, "rewards/accuracies": 0.875, "rewards/chosen": -0.1915370523929596, "rewards/margins": 0.1254531741142273, "rewards/rejected": -0.3169902265071869, "step": 4343 }, { "epoch": 11.893223819301848, "grad_norm": 5.248594284057617, "learning_rate": 4.0506849315068494e-07, "log_odds_chosen": 1.9057958126068115, "log_odds_ratio": -0.18483540415763855, "logits/chosen": 0.8504198789596558, "logits/rejected": 0.8858786225318909, "logps/chosen": -2.200233221054077, "logps/rejected": -3.926398277282715, "loss": 0.6158, "nll_loss": 0.5973081588745117, "rewards/accuracies": 1.0, "rewards/chosen": -0.22002333402633667, "rewards/margins": 0.17261648178100586, "rewards/rejected": -0.39263981580734253, "step": 4344 }, { "epoch": 11.89596167008898, "grad_norm": 5.051280498504639, "learning_rate": 4.0493150684931504e-07, "log_odds_chosen": 3.447122812271118, "log_odds_ratio": -0.07790566235780716, "logits/chosen": 1.1819438934326172, "logits/rejected": 1.184206485748291, "logps/chosen": -1.8337409496307373, "logps/rejected": -5.04531192779541, "loss": 0.6054, "nll_loss": 0.597646951675415, "rewards/accuracies": 1.0, "rewards/chosen": -0.1833740919828415, "rewards/margins": 0.3211570978164673, "rewards/rejected": -0.50453120470047, "step": 4345 }, { "epoch": 11.898699520876113, "grad_norm": 6.214677333831787, "learning_rate": 4.0479452054794514e-07, "log_odds_chosen": 1.569532871246338, "log_odds_ratio": -0.39189741015434265, "logits/chosen": 0.7370909452438354, "logits/rejected": 0.6703552603721619, "logps/chosen": -2.190682888031006, "logps/rejected": -3.6099255084991455, "loss": 0.7307, "nll_loss": 0.6915563344955444, "rewards/accuracies": 0.875, "rewards/chosen": -0.21906831860542297, "rewards/margins": 0.14192423224449158, "rewards/rejected": -0.36099255084991455, "step": 4346 }, { "epoch": 11.901437371663244, "grad_norm": 5.271181583404541, "learning_rate": 4.0465753424657534e-07, "log_odds_chosen": 2.0194313526153564, "log_odds_ratio": -0.21052248775959015, "logits/chosen": 0.8754127025604248, "logits/rejected": 0.8768044114112854, "logps/chosen": -2.3546862602233887, "logps/rejected": -4.209721088409424, "loss": 0.6603, "nll_loss": 0.6392890214920044, "rewards/accuracies": 1.0, "rewards/chosen": -0.23546862602233887, "rewards/margins": 0.18550345301628113, "rewards/rejected": -0.4209721088409424, "step": 4347 }, { "epoch": 11.904175222450377, "grad_norm": 4.651180267333984, "learning_rate": 4.0452054794520544e-07, "log_odds_chosen": 1.9710711240768433, "log_odds_ratio": -0.28351306915283203, "logits/chosen": 0.9703102111816406, "logits/rejected": 0.9889726042747498, "logps/chosen": -1.7646576166152954, "logps/rejected": -3.584453821182251, "loss": 0.6435, "nll_loss": 0.6151363849639893, "rewards/accuracies": 1.0, "rewards/chosen": -0.17646576464176178, "rewards/margins": 0.18197962641716003, "rewards/rejected": -0.358445405960083, "step": 4348 }, { "epoch": 11.906913073237508, "grad_norm": 7.659347057342529, "learning_rate": 4.043835616438356e-07, "log_odds_chosen": 2.245401382446289, "log_odds_ratio": -0.15894940495491028, "logits/chosen": 0.9456424713134766, "logits/rejected": 1.0122642517089844, "logps/chosen": -2.496001720428467, "logps/rejected": -4.635429859161377, "loss": 0.8481, "nll_loss": 0.8321733474731445, "rewards/accuracies": 1.0, "rewards/chosen": -0.24960017204284668, "rewards/margins": 0.21394282579421997, "rewards/rejected": -0.46354299783706665, "step": 4349 }, { "epoch": 11.90965092402464, "grad_norm": 6.235615253448486, "learning_rate": 4.0424657534246574e-07, "log_odds_chosen": 1.967537760734558, "log_odds_ratio": -0.2726208567619324, "logits/chosen": 0.868679940700531, "logits/rejected": 0.9665471315383911, "logps/chosen": -2.600950241088867, "logps/rejected": -4.4667792320251465, "loss": 0.5938, "nll_loss": 0.5665013790130615, "rewards/accuracies": 0.875, "rewards/chosen": -0.2600950300693512, "rewards/margins": 0.18658289313316345, "rewards/rejected": -0.44667792320251465, "step": 4350 }, { "epoch": 11.912388774811772, "grad_norm": 4.936039924621582, "learning_rate": 4.041095890410959e-07, "log_odds_chosen": 1.9543455839157104, "log_odds_ratio": -0.19904151558876038, "logits/chosen": 0.8979678153991699, "logits/rejected": 0.9001767635345459, "logps/chosen": -2.1799304485321045, "logps/rejected": -4.012748718261719, "loss": 0.5895, "nll_loss": 0.5696082711219788, "rewards/accuracies": 1.0, "rewards/chosen": -0.21799305081367493, "rewards/margins": 0.183281809091568, "rewards/rejected": -0.4012748599052429, "step": 4351 }, { "epoch": 11.915126625598905, "grad_norm": 5.675723075866699, "learning_rate": 4.03972602739726e-07, "log_odds_chosen": 2.5230236053466797, "log_odds_ratio": -0.2942875921726227, "logits/chosen": 0.7470926642417908, "logits/rejected": 0.7679204940795898, "logps/chosen": -2.2701892852783203, "logps/rejected": -4.671777248382568, "loss": 0.6604, "nll_loss": 0.6309711933135986, "rewards/accuracies": 0.875, "rewards/chosen": -0.22701892256736755, "rewards/margins": 0.2401588261127472, "rewards/rejected": -0.46717774868011475, "step": 4352 }, { "epoch": 11.917864476386036, "grad_norm": 5.217162609100342, "learning_rate": 4.038356164383561e-07, "log_odds_chosen": 1.345687985420227, "log_odds_ratio": -0.44257184863090515, "logits/chosen": 1.0564662218093872, "logits/rejected": 1.0837600231170654, "logps/chosen": -2.0957558155059814, "logps/rejected": -3.3948988914489746, "loss": 0.7423, "nll_loss": 0.6979936361312866, "rewards/accuracies": 0.75, "rewards/chosen": -0.2095755934715271, "rewards/margins": 0.1299142986536026, "rewards/rejected": -0.3394898772239685, "step": 4353 }, { "epoch": 11.92060232717317, "grad_norm": 8.644638061523438, "learning_rate": 4.036986301369863e-07, "log_odds_chosen": 2.934509515762329, "log_odds_ratio": -0.3543354570865631, "logits/chosen": 0.977994978427887, "logits/rejected": 1.0208451747894287, "logps/chosen": -3.5111770629882812, "logps/rejected": -6.384824752807617, "loss": 0.8612, "nll_loss": 0.8257945775985718, "rewards/accuracies": 0.75, "rewards/chosen": -0.35111773014068604, "rewards/margins": 0.28736478090286255, "rewards/rejected": -0.6384824514389038, "step": 4354 }, { "epoch": 11.923340177960302, "grad_norm": 6.496732711791992, "learning_rate": 4.035616438356164e-07, "log_odds_chosen": 3.0813331604003906, "log_odds_ratio": -0.19920262694358826, "logits/chosen": 0.8580727577209473, "logits/rejected": 0.9132462739944458, "logps/chosen": -2.9875924587249756, "logps/rejected": -5.931353569030762, "loss": 0.8138, "nll_loss": 0.7938579320907593, "rewards/accuracies": 0.875, "rewards/chosen": -0.29875922203063965, "rewards/margins": 0.2943761348724365, "rewards/rejected": -0.5931353569030762, "step": 4355 }, { "epoch": 11.926078028747433, "grad_norm": 5.436683177947998, "learning_rate": 4.0342465753424655e-07, "log_odds_chosen": 2.856797695159912, "log_odds_ratio": -0.23367975652217865, "logits/chosen": 0.7811204195022583, "logits/rejected": 0.7984977960586548, "logps/chosen": -2.948521137237549, "logps/rejected": -5.745670318603516, "loss": 0.7507, "nll_loss": 0.7273654937744141, "rewards/accuracies": 1.0, "rewards/chosen": -0.2948521077632904, "rewards/margins": 0.2797149419784546, "rewards/rejected": -0.5745670199394226, "step": 4356 }, { "epoch": 11.928815879534564, "grad_norm": 5.94028377532959, "learning_rate": 4.032876712328767e-07, "log_odds_chosen": 0.09512362629175186, "log_odds_ratio": -0.8061737418174744, "logits/chosen": 0.7587358355522156, "logits/rejected": 0.8302831053733826, "logps/chosen": -2.4703831672668457, "logps/rejected": -2.548090934753418, "loss": 0.8188, "nll_loss": 0.7381713390350342, "rewards/accuracies": 0.625, "rewards/chosen": -0.2470383197069168, "rewards/margins": 0.007770768366754055, "rewards/rejected": -0.25480908155441284, "step": 4357 }, { "epoch": 11.931553730321697, "grad_norm": 6.887415409088135, "learning_rate": 4.0315068493150685e-07, "log_odds_chosen": 1.7613928318023682, "log_odds_ratio": -0.32142847776412964, "logits/chosen": 0.8659738302230835, "logits/rejected": 0.8630287647247314, "logps/chosen": -2.567892551422119, "logps/rejected": -4.158506870269775, "loss": 0.8051, "nll_loss": 0.7729589939117432, "rewards/accuracies": 0.75, "rewards/chosen": -0.25678926706314087, "rewards/margins": 0.15906140208244324, "rewards/rejected": -0.4158506691455841, "step": 4358 }, { "epoch": 11.93429158110883, "grad_norm": 5.254761695861816, "learning_rate": 4.0301369863013695e-07, "log_odds_chosen": 2.873063325881958, "log_odds_ratio": -0.21784330904483795, "logits/chosen": 0.9093819260597229, "logits/rejected": 0.8793653845787048, "logps/chosen": -2.1435317993164062, "logps/rejected": -4.814455509185791, "loss": 0.7915, "nll_loss": 0.7696672677993774, "rewards/accuracies": 1.0, "rewards/chosen": -0.21435317397117615, "rewards/margins": 0.26709237694740295, "rewards/rejected": -0.4814455509185791, "step": 4359 }, { "epoch": 11.937029431895962, "grad_norm": 6.381385803222656, "learning_rate": 4.028767123287671e-07, "log_odds_chosen": 2.944814920425415, "log_odds_ratio": -0.2877797484397888, "logits/chosen": 1.0437088012695312, "logits/rejected": 1.0272663831710815, "logps/chosen": -2.508108615875244, "logps/rejected": -5.360586166381836, "loss": 0.8425, "nll_loss": 0.813735842704773, "rewards/accuracies": 0.75, "rewards/chosen": -0.2508108913898468, "rewards/margins": 0.28524768352508545, "rewards/rejected": -0.5360586047172546, "step": 4360 }, { "epoch": 11.939767282683095, "grad_norm": 5.991314888000488, "learning_rate": 4.0273972602739726e-07, "log_odds_chosen": 2.443528652191162, "log_odds_ratio": -0.47771185636520386, "logits/chosen": 0.7846052646636963, "logits/rejected": 0.8789651989936829, "logps/chosen": -2.4238548278808594, "logps/rejected": -4.788729667663574, "loss": 0.7475, "nll_loss": 0.6997755765914917, "rewards/accuracies": 0.875, "rewards/chosen": -0.24238547682762146, "rewards/margins": 0.2364875078201294, "rewards/rejected": -0.47887301445007324, "step": 4361 }, { "epoch": 11.942505133470226, "grad_norm": 4.733981609344482, "learning_rate": 4.0260273972602736e-07, "log_odds_chosen": 3.4676551818847656, "log_odds_ratio": -0.09104275703430176, "logits/chosen": 0.870555579662323, "logits/rejected": 0.8664368391036987, "logps/chosen": -1.543091058731079, "logps/rejected": -4.755063533782959, "loss": 0.6748, "nll_loss": 0.6656721234321594, "rewards/accuracies": 1.0, "rewards/chosen": -0.15430910885334015, "rewards/margins": 0.3211972713470459, "rewards/rejected": -0.47550636529922485, "step": 4362 }, { "epoch": 11.945242984257359, "grad_norm": 4.456704616546631, "learning_rate": 4.024657534246575e-07, "log_odds_chosen": 1.6368244886398315, "log_odds_ratio": -0.2407861351966858, "logits/chosen": 0.771205723285675, "logits/rejected": 0.7679845690727234, "logps/chosen": -2.0900521278381348, "logps/rejected": -3.6066324710845947, "loss": 0.6446, "nll_loss": 0.6205700635910034, "rewards/accuracies": 1.0, "rewards/chosen": -0.2090052217245102, "rewards/margins": 0.15165801346302032, "rewards/rejected": -0.3606632649898529, "step": 4363 }, { "epoch": 11.94798083504449, "grad_norm": 4.991613864898682, "learning_rate": 4.0232876712328766e-07, "log_odds_chosen": 1.6638715267181396, "log_odds_ratio": -0.2936292290687561, "logits/chosen": 0.9929276704788208, "logits/rejected": 0.9099204540252686, "logps/chosen": -1.5895315408706665, "logps/rejected": -3.0562124252319336, "loss": 0.6365, "nll_loss": 0.6071730256080627, "rewards/accuracies": 0.875, "rewards/chosen": -0.15895316004753113, "rewards/margins": 0.14666807651519775, "rewards/rejected": -0.3056212365627289, "step": 4364 }, { "epoch": 11.950718685831623, "grad_norm": 5.131479263305664, "learning_rate": 4.021917808219178e-07, "log_odds_chosen": 2.438913345336914, "log_odds_ratio": -0.21711626648902893, "logits/chosen": 0.9500094652175903, "logits/rejected": 1.0133246183395386, "logps/chosen": -1.7619280815124512, "logps/rejected": -4.0562238693237305, "loss": 0.637, "nll_loss": 0.6152714490890503, "rewards/accuracies": 0.875, "rewards/chosen": -0.17619279026985168, "rewards/margins": 0.22942957282066345, "rewards/rejected": -0.4056223928928375, "step": 4365 }, { "epoch": 11.953456536618754, "grad_norm": 6.206832408905029, "learning_rate": 4.020547945205479e-07, "log_odds_chosen": 1.2154271602630615, "log_odds_ratio": -0.28881150484085083, "logits/chosen": 0.9249919056892395, "logits/rejected": 0.9031946659088135, "logps/chosen": -1.4269752502441406, "logps/rejected": -2.4443132877349854, "loss": 0.5704, "nll_loss": 0.5414939522743225, "rewards/accuracies": 1.0, "rewards/chosen": -0.1426975131034851, "rewards/margins": 0.10173380374908447, "rewards/rejected": -0.24443131685256958, "step": 4366 }, { "epoch": 11.956194387405887, "grad_norm": 5.531547546386719, "learning_rate": 4.0191780821917806e-07, "log_odds_chosen": 1.7043943405151367, "log_odds_ratio": -0.28321436047554016, "logits/chosen": 0.7429810762405396, "logits/rejected": 0.7059253454208374, "logps/chosen": -1.5532039403915405, "logps/rejected": -3.119494915008545, "loss": 0.6561, "nll_loss": 0.6278036832809448, "rewards/accuracies": 1.0, "rewards/chosen": -0.155320405960083, "rewards/margins": 0.15662908554077148, "rewards/rejected": -0.3119494915008545, "step": 4367 }, { "epoch": 11.958932238193018, "grad_norm": 7.642994403839111, "learning_rate": 4.017808219178082e-07, "log_odds_chosen": 2.5502352714538574, "log_odds_ratio": -0.12630999088287354, "logits/chosen": 1.0377440452575684, "logits/rejected": 1.0870407819747925, "logps/chosen": -2.554147720336914, "logps/rejected": -5.002660274505615, "loss": 0.7077, "nll_loss": 0.6950438022613525, "rewards/accuracies": 1.0, "rewards/chosen": -0.25541478395462036, "rewards/margins": 0.2448512315750122, "rewards/rejected": -0.5002660155296326, "step": 4368 }, { "epoch": 11.961670088980151, "grad_norm": 6.217457294464111, "learning_rate": 4.016438356164383e-07, "log_odds_chosen": 1.186985731124878, "log_odds_ratio": -0.34948670864105225, "logits/chosen": 0.7559010982513428, "logits/rejected": 0.6922324299812317, "logps/chosen": -2.0963315963745117, "logps/rejected": -3.1271591186523438, "loss": 0.663, "nll_loss": 0.6280160546302795, "rewards/accuracies": 0.875, "rewards/chosen": -0.20963317155838013, "rewards/margins": 0.10308274626731873, "rewards/rejected": -0.31271591782569885, "step": 4369 }, { "epoch": 11.964407939767282, "grad_norm": 5.639647006988525, "learning_rate": 4.0150684931506847e-07, "log_odds_chosen": 2.9095239639282227, "log_odds_ratio": -0.155047208070755, "logits/chosen": 0.9504707455635071, "logits/rejected": 1.009595513343811, "logps/chosen": -2.3399720191955566, "logps/rejected": -5.133318901062012, "loss": 0.6874, "nll_loss": 0.6718553900718689, "rewards/accuracies": 0.875, "rewards/chosen": -0.23399722576141357, "rewards/margins": 0.2793346643447876, "rewards/rejected": -0.5133318901062012, "step": 4370 }, { "epoch": 11.967145790554415, "grad_norm": 4.458034038543701, "learning_rate": 4.013698630136986e-07, "log_odds_chosen": 3.016141891479492, "log_odds_ratio": -0.1186734065413475, "logits/chosen": 0.8057191967964172, "logits/rejected": 0.8836542367935181, "logps/chosen": -2.0732078552246094, "logps/rejected": -4.898531436920166, "loss": 0.6878, "nll_loss": 0.6759669184684753, "rewards/accuracies": 1.0, "rewards/chosen": -0.20732077956199646, "rewards/margins": 0.28253239393234253, "rewards/rejected": -0.489853173494339, "step": 4371 }, { "epoch": 11.969883641341546, "grad_norm": 5.654265403747559, "learning_rate": 4.0123287671232877e-07, "log_odds_chosen": 1.008174180984497, "log_odds_ratio": -0.5096116065979004, "logits/chosen": 0.9792211651802063, "logits/rejected": 0.9244375228881836, "logps/chosen": -1.9586420059204102, "logps/rejected": -2.8999476432800293, "loss": 0.6381, "nll_loss": 0.587170422077179, "rewards/accuracies": 0.75, "rewards/chosen": -0.19586420059204102, "rewards/margins": 0.09413056075572968, "rewards/rejected": -0.2899947464466095, "step": 4372 }, { "epoch": 11.97262149212868, "grad_norm": 6.056888580322266, "learning_rate": 4.0109589041095887e-07, "log_odds_chosen": 1.8733614683151245, "log_odds_ratio": -0.18408319354057312, "logits/chosen": 0.6848320960998535, "logits/rejected": 0.6673938035964966, "logps/chosen": -1.7016234397888184, "logps/rejected": -3.3154959678649902, "loss": 0.7401, "nll_loss": 0.7216472625732422, "rewards/accuracies": 1.0, "rewards/chosen": -0.1701623499393463, "rewards/margins": 0.16138726472854614, "rewards/rejected": -0.33154961466789246, "step": 4373 }, { "epoch": 11.97535934291581, "grad_norm": 4.380295753479004, "learning_rate": 4.00958904109589e-07, "log_odds_chosen": 1.652832269668579, "log_odds_ratio": -0.32141855359077454, "logits/chosen": 0.7671734094619751, "logits/rejected": 0.7335470914840698, "logps/chosen": -1.890427589416504, "logps/rejected": -3.436455249786377, "loss": 0.6515, "nll_loss": 0.6193573474884033, "rewards/accuracies": 0.875, "rewards/chosen": -0.18904277682304382, "rewards/margins": 0.1546027511358261, "rewards/rejected": -0.34364551305770874, "step": 4374 }, { "epoch": 11.978097193702943, "grad_norm": 5.339345455169678, "learning_rate": 4.008219178082192e-07, "log_odds_chosen": 0.7741695642471313, "log_odds_ratio": -0.5116620659828186, "logits/chosen": 0.6525010466575623, "logits/rejected": 0.6757840514183044, "logps/chosen": -2.693667411804199, "logps/rejected": -3.4655747413635254, "loss": 0.8067, "nll_loss": 0.7554953098297119, "rewards/accuracies": 0.75, "rewards/chosen": -0.2693667411804199, "rewards/margins": 0.07719071209430695, "rewards/rejected": -0.34655749797821045, "step": 4375 }, { "epoch": 11.980835044490075, "grad_norm": 5.436420440673828, "learning_rate": 4.006849315068493e-07, "log_odds_chosen": 1.1324909925460815, "log_odds_ratio": -0.349825382232666, "logits/chosen": 0.7067616581916809, "logits/rejected": 0.7826426029205322, "logps/chosen": -2.2122342586517334, "logps/rejected": -3.2524125576019287, "loss": 0.6293, "nll_loss": 0.5943416357040405, "rewards/accuracies": 0.875, "rewards/chosen": -0.22122342884540558, "rewards/margins": 0.10401783883571625, "rewards/rejected": -0.3252412676811218, "step": 4376 }, { "epoch": 11.983572895277208, "grad_norm": 4.869734287261963, "learning_rate": 4.0054794520547943e-07, "log_odds_chosen": 2.840365409851074, "log_odds_ratio": -0.23180413246154785, "logits/chosen": 0.8134926557540894, "logits/rejected": 0.8193941116333008, "logps/chosen": -2.092486619949341, "logps/rejected": -4.839491844177246, "loss": 0.6027, "nll_loss": 0.5795688629150391, "rewards/accuracies": 0.875, "rewards/chosen": -0.2092486470937729, "rewards/margins": 0.2747005522251129, "rewards/rejected": -0.4839491844177246, "step": 4377 }, { "epoch": 11.986310746064339, "grad_norm": 5.838184356689453, "learning_rate": 4.004109589041096e-07, "log_odds_chosen": 1.7701905965805054, "log_odds_ratio": -0.40088415145874023, "logits/chosen": 0.7398037910461426, "logits/rejected": 0.7944714426994324, "logps/chosen": -1.9210712909698486, "logps/rejected": -3.5575735569000244, "loss": 0.6259, "nll_loss": 0.5858017802238464, "rewards/accuracies": 0.75, "rewards/chosen": -0.192107155919075, "rewards/margins": 0.16365021467208862, "rewards/rejected": -0.35575735569000244, "step": 4378 }, { "epoch": 11.989048596851472, "grad_norm": 5.989921569824219, "learning_rate": 4.0027397260273973e-07, "log_odds_chosen": 2.434039831161499, "log_odds_ratio": -0.28280341625213623, "logits/chosen": 0.7134720683097839, "logits/rejected": 0.7577646970748901, "logps/chosen": -2.3930912017822266, "logps/rejected": -4.774199962615967, "loss": 0.7975, "nll_loss": 0.7691878080368042, "rewards/accuracies": 0.875, "rewards/chosen": -0.23930911719799042, "rewards/margins": 0.23811088502407074, "rewards/rejected": -0.47742003202438354, "step": 4379 }, { "epoch": 11.991786447638603, "grad_norm": 5.607054233551025, "learning_rate": 4.0013698630136983e-07, "log_odds_chosen": 2.6874496936798096, "log_odds_ratio": -0.15646040439605713, "logits/chosen": 1.0794789791107178, "logits/rejected": 1.1066486835479736, "logps/chosen": -1.9282526969909668, "logps/rejected": -4.42503023147583, "loss": 0.7338, "nll_loss": 0.7181966304779053, "rewards/accuracies": 1.0, "rewards/chosen": -0.1928252875804901, "rewards/margins": 0.24967777729034424, "rewards/rejected": -0.44250303506851196, "step": 4380 }, { "epoch": 11.994524298425736, "grad_norm": 5.52714204788208, "learning_rate": 4e-07, "log_odds_chosen": 2.383660316467285, "log_odds_ratio": -0.247052863240242, "logits/chosen": 0.7905654907226562, "logits/rejected": 0.7182512879371643, "logps/chosen": -1.9440109729766846, "logps/rejected": -4.138619422912598, "loss": 0.8288, "nll_loss": 0.804122269153595, "rewards/accuracies": 0.875, "rewards/chosen": -0.1944011002779007, "rewards/margins": 0.21946083009243011, "rewards/rejected": -0.4138619303703308, "step": 4381 }, { "epoch": 11.997262149212869, "grad_norm": 4.9073710441589355, "learning_rate": 3.9986301369863013e-07, "log_odds_chosen": 3.4378199577331543, "log_odds_ratio": -0.11585605144500732, "logits/chosen": 0.9250555038452148, "logits/rejected": 0.9462526440620422, "logps/chosen": -1.983961820602417, "logps/rejected": -5.261631965637207, "loss": 0.6933, "nll_loss": 0.6817090511322021, "rewards/accuracies": 1.0, "rewards/chosen": -0.19839617609977722, "rewards/margins": 0.327767014503479, "rewards/rejected": -0.5261631608009338, "step": 4382 }, { "epoch": 12.0, "grad_norm": 5.637520790100098, "learning_rate": 3.9972602739726023e-07, "log_odds_chosen": 2.510556221008301, "log_odds_ratio": -0.23076042532920837, "logits/chosen": 1.070571780204773, "logits/rejected": 1.1593061685562134, "logps/chosen": -2.8934428691864014, "logps/rejected": -5.329205513000488, "loss": 0.7904, "nll_loss": 0.7673037052154541, "rewards/accuracies": 0.875, "rewards/chosen": -0.28934428095817566, "rewards/margins": 0.243576318025589, "rewards/rejected": -0.5329205989837646, "step": 4383 }, { "epoch": 12.002737850787133, "grad_norm": 5.211965084075928, "learning_rate": 3.995890410958904e-07, "log_odds_chosen": 2.0878896713256836, "log_odds_ratio": -0.17778420448303223, "logits/chosen": 0.7679668068885803, "logits/rejected": 0.7468475103378296, "logps/chosen": -1.7808852195739746, "logps/rejected": -3.697326183319092, "loss": 0.648, "nll_loss": 0.6301913261413574, "rewards/accuracies": 1.0, "rewards/chosen": -0.17808853089809418, "rewards/margins": 0.1916441023349762, "rewards/rejected": -0.36973264813423157, "step": 4384 }, { "epoch": 12.005475701574264, "grad_norm": 6.167577743530273, "learning_rate": 3.9945205479452054e-07, "log_odds_chosen": 2.2930173873901367, "log_odds_ratio": -0.20361392199993134, "logits/chosen": 1.0160961151123047, "logits/rejected": 1.0789265632629395, "logps/chosen": -2.1628918647766113, "logps/rejected": -4.304920196533203, "loss": 0.6204, "nll_loss": 0.6000758409500122, "rewards/accuracies": 1.0, "rewards/chosen": -0.2162891924381256, "rewards/margins": 0.21420282125473022, "rewards/rejected": -0.43049201369285583, "step": 4385 }, { "epoch": 12.008213552361397, "grad_norm": 4.79391622543335, "learning_rate": 3.993150684931507e-07, "log_odds_chosen": 4.295569896697998, "log_odds_ratio": -0.20352625846862793, "logits/chosen": 1.0325357913970947, "logits/rejected": 1.0768795013427734, "logps/chosen": -2.503913640975952, "logps/rejected": -6.721076488494873, "loss": 0.7571, "nll_loss": 0.7367452383041382, "rewards/accuracies": 0.875, "rewards/chosen": -0.2503913938999176, "rewards/margins": 0.42171624302864075, "rewards/rejected": -0.6721076369285583, "step": 4386 }, { "epoch": 12.010951403148528, "grad_norm": 5.564986705780029, "learning_rate": 3.991780821917808e-07, "log_odds_chosen": 2.0834462642669678, "log_odds_ratio": -0.31388184428215027, "logits/chosen": 1.097623586654663, "logits/rejected": 1.1439906358718872, "logps/chosen": -2.4204463958740234, "logps/rejected": -4.3657965660095215, "loss": 0.6544, "nll_loss": 0.622965931892395, "rewards/accuracies": 0.875, "rewards/chosen": -0.24204464256763458, "rewards/margins": 0.1945350468158722, "rewards/rejected": -0.43657970428466797, "step": 4387 }, { "epoch": 12.013689253935661, "grad_norm": 8.76879596710205, "learning_rate": 3.9904109589041094e-07, "log_odds_chosen": 1.2451350688934326, "log_odds_ratio": -0.55316162109375, "logits/chosen": 1.0336400270462036, "logits/rejected": 1.0053362846374512, "logps/chosen": -2.81010103225708, "logps/rejected": -3.940311908721924, "loss": 0.7048, "nll_loss": 0.6494814157485962, "rewards/accuracies": 0.75, "rewards/chosen": -0.28101012110710144, "rewards/margins": 0.11302110552787781, "rewards/rejected": -0.39403119683265686, "step": 4388 }, { "epoch": 12.016427104722792, "grad_norm": 4.594333648681641, "learning_rate": 3.989041095890411e-07, "log_odds_chosen": 2.338385820388794, "log_odds_ratio": -0.17743858695030212, "logits/chosen": 0.9723728895187378, "logits/rejected": 1.0381367206573486, "logps/chosen": -2.579435348510742, "logps/rejected": -4.822790622711182, "loss": 0.6354, "nll_loss": 0.6176835894584656, "rewards/accuracies": 1.0, "rewards/chosen": -0.2579435408115387, "rewards/margins": 0.22433553636074066, "rewards/rejected": -0.48227906227111816, "step": 4389 }, { "epoch": 12.019164955509925, "grad_norm": 5.228434085845947, "learning_rate": 3.987671232876712e-07, "log_odds_chosen": 1.6561840772628784, "log_odds_ratio": -0.24273338913917542, "logits/chosen": 0.7542301416397095, "logits/rejected": 0.7885488867759705, "logps/chosen": -2.412238359451294, "logps/rejected": -3.93992280960083, "loss": 0.6935, "nll_loss": 0.6691768169403076, "rewards/accuracies": 1.0, "rewards/chosen": -0.24122382700443268, "rewards/margins": 0.15276844799518585, "rewards/rejected": -0.39399227499961853, "step": 4390 }, { "epoch": 12.021902806297057, "grad_norm": 6.645068645477295, "learning_rate": 3.9863013698630134e-07, "log_odds_chosen": 2.963381052017212, "log_odds_ratio": -0.3063991665840149, "logits/chosen": 0.7973021268844604, "logits/rejected": 0.8447974324226379, "logps/chosen": -2.924570322036743, "logps/rejected": -5.83321475982666, "loss": 0.6579, "nll_loss": 0.6272111535072327, "rewards/accuracies": 0.875, "rewards/chosen": -0.29245704412460327, "rewards/margins": 0.29086440801620483, "rewards/rejected": -0.5833215117454529, "step": 4391 }, { "epoch": 12.02464065708419, "grad_norm": 6.381728172302246, "learning_rate": 3.984931506849315e-07, "log_odds_chosen": 1.269167423248291, "log_odds_ratio": -0.5737162828445435, "logits/chosen": 0.8205976486206055, "logits/rejected": 0.7979040145874023, "logps/chosen": -2.641228675842285, "logps/rejected": -3.7914226055145264, "loss": 0.7528, "nll_loss": 0.6954599022865295, "rewards/accuracies": 0.875, "rewards/chosen": -0.2641228437423706, "rewards/margins": 0.11501942574977875, "rewards/rejected": -0.37914228439331055, "step": 4392 }, { "epoch": 12.02737850787132, "grad_norm": 8.418173789978027, "learning_rate": 3.983561643835616e-07, "log_odds_chosen": 2.2357163429260254, "log_odds_ratio": -0.436435341835022, "logits/chosen": 0.9401305913925171, "logits/rejected": 0.8780529499053955, "logps/chosen": -2.967350482940674, "logps/rejected": -5.097104072570801, "loss": 0.7344, "nll_loss": 0.6907852292060852, "rewards/accuracies": 0.875, "rewards/chosen": -0.2967350482940674, "rewards/margins": 0.212975412607193, "rewards/rejected": -0.509710431098938, "step": 4393 }, { "epoch": 12.030116358658454, "grad_norm": 4.9948577880859375, "learning_rate": 3.9821917808219175e-07, "log_odds_chosen": 1.4435913562774658, "log_odds_ratio": -0.3706074655056, "logits/chosen": 0.6775873303413391, "logits/rejected": 0.6682850122451782, "logps/chosen": -2.3919782638549805, "logps/rejected": -3.7499988079071045, "loss": 0.7275, "nll_loss": 0.6904382705688477, "rewards/accuracies": 1.0, "rewards/chosen": -0.23919783532619476, "rewards/margins": 0.13580209016799927, "rewards/rejected": -0.37499991059303284, "step": 4394 }, { "epoch": 12.032854209445585, "grad_norm": 5.823260307312012, "learning_rate": 3.980821917808219e-07, "log_odds_chosen": 2.56345796585083, "log_odds_ratio": -0.14352098107337952, "logits/chosen": 0.9805144667625427, "logits/rejected": 0.9994398951530457, "logps/chosen": -1.9279334545135498, "logps/rejected": -4.2804412841796875, "loss": 0.6155, "nll_loss": 0.6011647582054138, "rewards/accuracies": 1.0, "rewards/chosen": -0.1927933394908905, "rewards/margins": 0.2352508008480072, "rewards/rejected": -0.4280441403388977, "step": 4395 }, { "epoch": 12.035592060232718, "grad_norm": 8.509506225585938, "learning_rate": 3.9794520547945205e-07, "log_odds_chosen": 1.1728053092956543, "log_odds_ratio": -0.7174383997917175, "logits/chosen": 0.6390535235404968, "logits/rejected": 0.618062436580658, "logps/chosen": -3.0787787437438965, "logps/rejected": -4.134695053100586, "loss": 0.8292, "nll_loss": 0.7574366331100464, "rewards/accuracies": 0.875, "rewards/chosen": -0.30787786841392517, "rewards/margins": 0.10559161007404327, "rewards/rejected": -0.41346949338912964, "step": 4396 }, { "epoch": 12.038329911019849, "grad_norm": 7.8070220947265625, "learning_rate": 3.9780821917808215e-07, "log_odds_chosen": 1.4871752262115479, "log_odds_ratio": -0.6607305407524109, "logits/chosen": 0.9798903465270996, "logits/rejected": 1.0280241966247559, "logps/chosen": -2.8057045936584473, "logps/rejected": -4.210207462310791, "loss": 0.7327, "nll_loss": 0.6666760444641113, "rewards/accuracies": 0.875, "rewards/chosen": -0.28057047724723816, "rewards/margins": 0.14045023918151855, "rewards/rejected": -0.4210207164287567, "step": 4397 }, { "epoch": 12.041067761806982, "grad_norm": 6.949318885803223, "learning_rate": 3.9767123287671236e-07, "log_odds_chosen": 1.8471369743347168, "log_odds_ratio": -0.3094072937965393, "logits/chosen": 0.7352029085159302, "logits/rejected": 0.6795620322227478, "logps/chosen": -2.822176933288574, "logps/rejected": -4.583597183227539, "loss": 0.8955, "nll_loss": 0.8645334243774414, "rewards/accuracies": 0.875, "rewards/chosen": -0.28221768140792847, "rewards/margins": 0.17614203691482544, "rewards/rejected": -0.4583597183227539, "step": 4398 }, { "epoch": 12.043805612594113, "grad_norm": 5.55206823348999, "learning_rate": 3.9753424657534245e-07, "log_odds_chosen": 0.7744684815406799, "log_odds_ratio": -0.407513827085495, "logits/chosen": 0.7443338632583618, "logits/rejected": 0.7200046181678772, "logps/chosen": -1.9949426651000977, "logps/rejected": -2.6984610557556152, "loss": 0.6282, "nll_loss": 0.5874004364013672, "rewards/accuracies": 1.0, "rewards/chosen": -0.19949427247047424, "rewards/margins": 0.07035183906555176, "rewards/rejected": -0.269846111536026, "step": 4399 }, { "epoch": 12.046543463381246, "grad_norm": 5.170628547668457, "learning_rate": 3.9739726027397255e-07, "log_odds_chosen": 2.8393990993499756, "log_odds_ratio": -0.15917271375656128, "logits/chosen": 0.9554899334907532, "logits/rejected": 0.9762040376663208, "logps/chosen": -2.210836410522461, "logps/rejected": -4.951146125793457, "loss": 0.6322, "nll_loss": 0.6162492632865906, "rewards/accuracies": 1.0, "rewards/chosen": -0.2210836410522461, "rewards/margins": 0.27403098344802856, "rewards/rejected": -0.49511462450027466, "step": 4400 }, { "epoch": 12.049281314168377, "grad_norm": 5.3917059898376465, "learning_rate": 3.972602739726027e-07, "log_odds_chosen": 1.7074856758117676, "log_odds_ratio": -0.29509279131889343, "logits/chosen": 0.7300581336021423, "logits/rejected": 0.7981248497962952, "logps/chosen": -3.0296993255615234, "logps/rejected": -4.670039653778076, "loss": 0.8128, "nll_loss": 0.7832750082015991, "rewards/accuracies": 0.875, "rewards/chosen": -0.30296993255615234, "rewards/margins": 0.16403399407863617, "rewards/rejected": -0.4670039713382721, "step": 4401 }, { "epoch": 12.05201916495551, "grad_norm": 4.841877460479736, "learning_rate": 3.9712328767123286e-07, "log_odds_chosen": 2.8886661529541016, "log_odds_ratio": -0.22235243022441864, "logits/chosen": 0.9616329073905945, "logits/rejected": 0.989081859588623, "logps/chosen": -2.333954334259033, "logps/rejected": -5.119579792022705, "loss": 0.7265, "nll_loss": 0.7042455077171326, "rewards/accuracies": 0.875, "rewards/chosen": -0.23339541256427765, "rewards/margins": 0.2785625755786896, "rewards/rejected": -0.5119580030441284, "step": 4402 }, { "epoch": 12.054757015742641, "grad_norm": 7.365901470184326, "learning_rate": 3.96986301369863e-07, "log_odds_chosen": 1.6718323230743408, "log_odds_ratio": -0.4045476019382477, "logits/chosen": 0.654843807220459, "logits/rejected": 0.6247732639312744, "logps/chosen": -2.151167392730713, "logps/rejected": -3.685620069503784, "loss": 0.6685, "nll_loss": 0.6280577182769775, "rewards/accuracies": 0.875, "rewards/chosen": -0.2151167392730713, "rewards/margins": 0.1534452736377716, "rewards/rejected": -0.3685620427131653, "step": 4403 }, { "epoch": 12.057494866529774, "grad_norm": 9.963303565979004, "learning_rate": 3.968493150684931e-07, "log_odds_chosen": 2.0563018321990967, "log_odds_ratio": -0.5028834939002991, "logits/chosen": 0.8780487775802612, "logits/rejected": 0.8620296716690063, "logps/chosen": -2.388467311859131, "logps/rejected": -4.384552955627441, "loss": 0.7312, "nll_loss": 0.6809272766113281, "rewards/accuracies": 0.875, "rewards/chosen": -0.23884671926498413, "rewards/margins": 0.19960857927799225, "rewards/rejected": -0.4384553134441376, "step": 4404 }, { "epoch": 12.060232717316905, "grad_norm": 5.156576156616211, "learning_rate": 3.967123287671233e-07, "log_odds_chosen": 1.911384105682373, "log_odds_ratio": -0.2528013586997986, "logits/chosen": 0.7047362923622131, "logits/rejected": 0.7362308502197266, "logps/chosen": -2.2358787059783936, "logps/rejected": -4.034055233001709, "loss": 0.7794, "nll_loss": 0.7540949583053589, "rewards/accuracies": 1.0, "rewards/chosen": -0.22358787059783936, "rewards/margins": 0.17981764674186707, "rewards/rejected": -0.4034055173397064, "step": 4405 }, { "epoch": 12.062970568104038, "grad_norm": 6.163547515869141, "learning_rate": 3.965753424657534e-07, "log_odds_chosen": 2.2434492111206055, "log_odds_ratio": -0.25322216749191284, "logits/chosen": 0.8817455172538757, "logits/rejected": 0.9186588525772095, "logps/chosen": -2.912830114364624, "logps/rejected": -5.071820259094238, "loss": 0.8354, "nll_loss": 0.8100953698158264, "rewards/accuracies": 1.0, "rewards/chosen": -0.2912830412387848, "rewards/margins": 0.2158990204334259, "rewards/rejected": -0.5071820020675659, "step": 4406 }, { "epoch": 12.06570841889117, "grad_norm": 8.047924041748047, "learning_rate": 3.964383561643835e-07, "log_odds_chosen": 2.14387583732605, "log_odds_ratio": -0.4237460494041443, "logits/chosen": 0.8714292049407959, "logits/rejected": 0.8590148091316223, "logps/chosen": -2.2962241172790527, "logps/rejected": -4.302692890167236, "loss": 0.8072, "nll_loss": 0.7648281455039978, "rewards/accuracies": 0.875, "rewards/chosen": -0.22962239384651184, "rewards/margins": 0.20064690709114075, "rewards/rejected": -0.4302693009376526, "step": 4407 }, { "epoch": 12.068446269678303, "grad_norm": 9.685803413391113, "learning_rate": 3.9630136986301366e-07, "log_odds_chosen": 2.076303005218506, "log_odds_ratio": -0.41199469566345215, "logits/chosen": 1.1630862951278687, "logits/rejected": 1.1673707962036133, "logps/chosen": -2.9466090202331543, "logps/rejected": -4.950148582458496, "loss": 0.7974, "nll_loss": 0.7561761140823364, "rewards/accuracies": 0.75, "rewards/chosen": -0.29466089606285095, "rewards/margins": 0.20035399496555328, "rewards/rejected": -0.49501487612724304, "step": 4408 }, { "epoch": 12.071184120465434, "grad_norm": 5.7818603515625, "learning_rate": 3.961643835616438e-07, "log_odds_chosen": 0.6754441261291504, "log_odds_ratio": -0.5298779606819153, "logits/chosen": 0.802818238735199, "logits/rejected": 0.8142576217651367, "logps/chosen": -1.8120530843734741, "logps/rejected": -2.3322651386260986, "loss": 0.6394, "nll_loss": 0.5864456295967102, "rewards/accuracies": 0.75, "rewards/chosen": -0.18120528757572174, "rewards/margins": 0.052021220326423645, "rewards/rejected": -0.23322652280330658, "step": 4409 }, { "epoch": 12.073921971252567, "grad_norm": 7.4440598487854, "learning_rate": 3.9602739726027397e-07, "log_odds_chosen": 2.626197099685669, "log_odds_ratio": -0.22139288485050201, "logits/chosen": 0.6755943298339844, "logits/rejected": 0.6966205835342407, "logps/chosen": -2.573633909225464, "logps/rejected": -5.10557222366333, "loss": 0.8564, "nll_loss": 0.834309458732605, "rewards/accuracies": 1.0, "rewards/chosen": -0.2573634088039398, "rewards/margins": 0.25319385528564453, "rewards/rejected": -0.510557234287262, "step": 4410 }, { "epoch": 12.0766598220397, "grad_norm": 7.358902931213379, "learning_rate": 3.9589041095890407e-07, "log_odds_chosen": 1.4005935192108154, "log_odds_ratio": -0.6756134629249573, "logits/chosen": 1.103674292564392, "logits/rejected": 1.1468735933303833, "logps/chosen": -2.814450979232788, "logps/rejected": -4.168038368225098, "loss": 0.7889, "nll_loss": 0.7213113903999329, "rewards/accuracies": 0.75, "rewards/chosen": -0.28144511580467224, "rewards/margins": 0.13535872101783752, "rewards/rejected": -0.41680383682250977, "step": 4411 }, { "epoch": 12.07939767282683, "grad_norm": 5.218108654022217, "learning_rate": 3.9575342465753427e-07, "log_odds_chosen": 1.9887809753417969, "log_odds_ratio": -0.222461998462677, "logits/chosen": 0.7680235505104065, "logits/rejected": 0.7909941077232361, "logps/chosen": -1.8104605674743652, "logps/rejected": -3.5860958099365234, "loss": 0.6343, "nll_loss": 0.6120458841323853, "rewards/accuracies": 1.0, "rewards/chosen": -0.18104606866836548, "rewards/margins": 0.17756350338459015, "rewards/rejected": -0.35860955715179443, "step": 4412 }, { "epoch": 12.082135523613964, "grad_norm": 6.641576290130615, "learning_rate": 3.9561643835616437e-07, "log_odds_chosen": 0.9320279359817505, "log_odds_ratio": -0.4628964364528656, "logits/chosen": 0.9783000349998474, "logits/rejected": 0.939575731754303, "logps/chosen": -1.7020460367202759, "logps/rejected": -2.530017614364624, "loss": 0.6114, "nll_loss": 0.5650666356086731, "rewards/accuracies": 0.625, "rewards/chosen": -0.17020460963249207, "rewards/margins": 0.08279716968536377, "rewards/rejected": -0.25300177931785583, "step": 4413 }, { "epoch": 12.084873374401095, "grad_norm": 4.949863910675049, "learning_rate": 3.9547945205479447e-07, "log_odds_chosen": 1.2922033071517944, "log_odds_ratio": -0.3846854567527771, "logits/chosen": 0.9690518379211426, "logits/rejected": 1.0446834564208984, "logps/chosen": -2.330566883087158, "logps/rejected": -3.495333433151245, "loss": 0.6316, "nll_loss": 0.5931072235107422, "rewards/accuracies": 0.75, "rewards/chosen": -0.23305672407150269, "rewards/margins": 0.1164766401052475, "rewards/rejected": -0.349533349275589, "step": 4414 }, { "epoch": 12.087611225188228, "grad_norm": 5.464788913726807, "learning_rate": 3.953424657534246e-07, "log_odds_chosen": 0.9014280438423157, "log_odds_ratio": -0.5039928555488586, "logits/chosen": 0.755623996257782, "logits/rejected": 0.7925863862037659, "logps/chosen": -2.131471633911133, "logps/rejected": -2.974795341491699, "loss": 0.7094, "nll_loss": 0.6590263247489929, "rewards/accuracies": 0.625, "rewards/chosen": -0.21314717829227448, "rewards/margins": 0.08433237671852112, "rewards/rejected": -0.2974795699119568, "step": 4415 }, { "epoch": 12.09034907597536, "grad_norm": 4.636231422424316, "learning_rate": 3.952054794520548e-07, "log_odds_chosen": 1.7604626417160034, "log_odds_ratio": -0.2254256010055542, "logits/chosen": 0.6907973289489746, "logits/rejected": 0.7090015411376953, "logps/chosen": -1.9191601276397705, "logps/rejected": -3.5152645111083984, "loss": 0.6557, "nll_loss": 0.6331546306610107, "rewards/accuracies": 1.0, "rewards/chosen": -0.19191600382328033, "rewards/margins": 0.15961045026779175, "rewards/rejected": -0.3515264391899109, "step": 4416 }, { "epoch": 12.093086926762492, "grad_norm": 5.431897163391113, "learning_rate": 3.9506849315068493e-07, "log_odds_chosen": 1.7960166931152344, "log_odds_ratio": -0.2346765249967575, "logits/chosen": 0.8788624405860901, "logits/rejected": 0.8098501563072205, "logps/chosen": -1.6595404148101807, "logps/rejected": -3.2710108757019043, "loss": 0.714, "nll_loss": 0.690516471862793, "rewards/accuracies": 1.0, "rewards/chosen": -0.16595405340194702, "rewards/margins": 0.16114702820777893, "rewards/rejected": -0.32710111141204834, "step": 4417 }, { "epoch": 12.095824777549623, "grad_norm": 5.001114368438721, "learning_rate": 3.94931506849315e-07, "log_odds_chosen": 2.2323076725006104, "log_odds_ratio": -0.2547876536846161, "logits/chosen": 0.8973836898803711, "logits/rejected": 0.9228030443191528, "logps/chosen": -2.16225004196167, "logps/rejected": -4.263007164001465, "loss": 0.6378, "nll_loss": 0.6122789978981018, "rewards/accuracies": 1.0, "rewards/chosen": -0.21622499823570251, "rewards/margins": 0.21007570624351501, "rewards/rejected": -0.4263007342815399, "step": 4418 }, { "epoch": 12.098562628336756, "grad_norm": 5.8499674797058105, "learning_rate": 3.9479452054794523e-07, "log_odds_chosen": 2.529156446456909, "log_odds_ratio": -0.5128903388977051, "logits/chosen": 0.7289605736732483, "logits/rejected": 0.7222282886505127, "logps/chosen": -2.3323652744293213, "logps/rejected": -4.698238372802734, "loss": 0.7668, "nll_loss": 0.7155494093894958, "rewards/accuracies": 0.875, "rewards/chosen": -0.23323653638362885, "rewards/margins": 0.2365873008966446, "rewards/rejected": -0.46982383728027344, "step": 4419 }, { "epoch": 12.101300479123887, "grad_norm": 6.175076007843018, "learning_rate": 3.9465753424657533e-07, "log_odds_chosen": 2.633427143096924, "log_odds_ratio": -0.20841790735721588, "logits/chosen": 0.7293556928634644, "logits/rejected": 0.7111930251121521, "logps/chosen": -2.0613067150115967, "logps/rejected": -4.585803031921387, "loss": 0.7404, "nll_loss": 0.7195384502410889, "rewards/accuracies": 1.0, "rewards/chosen": -0.20613068342208862, "rewards/margins": 0.2524496614933014, "rewards/rejected": -0.45858034491539, "step": 4420 }, { "epoch": 12.10403832991102, "grad_norm": 5.101027488708496, "learning_rate": 3.9452054794520543e-07, "log_odds_chosen": 1.6790003776550293, "log_odds_ratio": -0.25840625166893005, "logits/chosen": 0.8414466977119446, "logits/rejected": 0.8674905300140381, "logps/chosen": -2.0219228267669678, "logps/rejected": -3.591458797454834, "loss": 0.6646, "nll_loss": 0.638776421546936, "rewards/accuracies": 0.875, "rewards/chosen": -0.2021922916173935, "rewards/margins": 0.1569536030292511, "rewards/rejected": -0.3591459095478058, "step": 4421 }, { "epoch": 12.106776180698152, "grad_norm": 6.781673908233643, "learning_rate": 3.943835616438356e-07, "log_odds_chosen": 2.789553642272949, "log_odds_ratio": -0.26550132036209106, "logits/chosen": 0.6963150501251221, "logits/rejected": 0.7012839317321777, "logps/chosen": -2.4900457859039307, "logps/rejected": -5.180422306060791, "loss": 0.6241, "nll_loss": 0.5975344181060791, "rewards/accuracies": 0.875, "rewards/chosen": -0.2490045726299286, "rewards/margins": 0.2690376341342926, "rewards/rejected": -0.5180422067642212, "step": 4422 }, { "epoch": 12.109514031485284, "grad_norm": 5.757086753845215, "learning_rate": 3.9424657534246573e-07, "log_odds_chosen": 0.8353726267814636, "log_odds_ratio": -0.5894120931625366, "logits/chosen": 0.9573286175727844, "logits/rejected": 0.9722843170166016, "logps/chosen": -2.8348865509033203, "logps/rejected": -3.567399501800537, "loss": 0.6424, "nll_loss": 0.5834282636642456, "rewards/accuracies": 0.625, "rewards/chosen": -0.2834886610507965, "rewards/margins": 0.07325129956007004, "rewards/rejected": -0.35673993825912476, "step": 4423 }, { "epoch": 12.112251882272416, "grad_norm": 5.673149585723877, "learning_rate": 3.941095890410959e-07, "log_odds_chosen": 1.1788824796676636, "log_odds_ratio": -0.3111332654953003, "logits/chosen": 0.7724194526672363, "logits/rejected": 0.7581865787506104, "logps/chosen": -2.2145638465881348, "logps/rejected": -3.259744167327881, "loss": 0.6557, "nll_loss": 0.624581515789032, "rewards/accuracies": 1.0, "rewards/chosen": -0.2214564085006714, "rewards/margins": 0.10451799631118774, "rewards/rejected": -0.32597440481185913, "step": 4424 }, { "epoch": 12.114989733059549, "grad_norm": 4.860079288482666, "learning_rate": 3.93972602739726e-07, "log_odds_chosen": 2.424572229385376, "log_odds_ratio": -0.14522488415241241, "logits/chosen": 0.9372607469558716, "logits/rejected": 0.9208564758300781, "logps/chosen": -2.336062431335449, "logps/rejected": -4.614927291870117, "loss": 0.6169, "nll_loss": 0.6023432612419128, "rewards/accuracies": 1.0, "rewards/chosen": -0.233606219291687, "rewards/margins": 0.22788651287555695, "rewards/rejected": -0.46149274706840515, "step": 4425 }, { "epoch": 12.11772758384668, "grad_norm": 5.530359268188477, "learning_rate": 3.938356164383562e-07, "log_odds_chosen": 2.026409864425659, "log_odds_ratio": -0.3094991445541382, "logits/chosen": 0.7235874533653259, "logits/rejected": 0.8271608948707581, "logps/chosen": -2.0290658473968506, "logps/rejected": -3.898793935775757, "loss": 0.7681, "nll_loss": 0.7371426820755005, "rewards/accuracies": 0.875, "rewards/chosen": -0.20290657877922058, "rewards/margins": 0.18697282671928406, "rewards/rejected": -0.389879435300827, "step": 4426 }, { "epoch": 12.120465434633813, "grad_norm": 5.261804580688477, "learning_rate": 3.936986301369863e-07, "log_odds_chosen": 0.7148914337158203, "log_odds_ratio": -0.44851598143577576, "logits/chosen": 1.0720770359039307, "logits/rejected": 1.0155103206634521, "logps/chosen": -1.9171202182769775, "logps/rejected": -2.5617198944091797, "loss": 0.6675, "nll_loss": 0.6226427555084229, "rewards/accuracies": 0.875, "rewards/chosen": -0.19171202182769775, "rewards/margins": 0.06445997953414917, "rewards/rejected": -0.2561720013618469, "step": 4427 }, { "epoch": 12.123203285420944, "grad_norm": 5.85692834854126, "learning_rate": 3.935616438356164e-07, "log_odds_chosen": 1.585780143737793, "log_odds_ratio": -0.2820378839969635, "logits/chosen": 0.6759812831878662, "logits/rejected": 0.6618329286575317, "logps/chosen": -2.020458936691284, "logps/rejected": -3.4726853370666504, "loss": 0.6721, "nll_loss": 0.6438932418823242, "rewards/accuracies": 1.0, "rewards/chosen": -0.20204590260982513, "rewards/margins": 0.14522261917591095, "rewards/rejected": -0.3472685217857361, "step": 4428 }, { "epoch": 12.125941136208077, "grad_norm": 4.891250133514404, "learning_rate": 3.934246575342466e-07, "log_odds_chosen": 2.4195563793182373, "log_odds_ratio": -0.19718675315380096, "logits/chosen": 0.9209855794906616, "logits/rejected": 0.8941515684127808, "logps/chosen": -2.1670172214508057, "logps/rejected": -4.463320732116699, "loss": 0.7062, "nll_loss": 0.6864578127861023, "rewards/accuracies": 1.0, "rewards/chosen": -0.21670173108577728, "rewards/margins": 0.22963036596775055, "rewards/rejected": -0.44633209705352783, "step": 4429 }, { "epoch": 12.128678986995208, "grad_norm": 6.351071834564209, "learning_rate": 3.932876712328767e-07, "log_odds_chosen": 1.986046314239502, "log_odds_ratio": -0.21520113945007324, "logits/chosen": 0.7971707582473755, "logits/rejected": 0.7376567125320435, "logps/chosen": -2.2026174068450928, "logps/rejected": -4.0647077560424805, "loss": 0.6319, "nll_loss": 0.6103885769844055, "rewards/accuracies": 1.0, "rewards/chosen": -0.22026173770427704, "rewards/margins": 0.18620899319648743, "rewards/rejected": -0.40647077560424805, "step": 4430 }, { "epoch": 12.131416837782341, "grad_norm": 5.931532382965088, "learning_rate": 3.9315068493150684e-07, "log_odds_chosen": 2.5839176177978516, "log_odds_ratio": -0.279890775680542, "logits/chosen": 0.9459648728370667, "logits/rejected": 0.9433670043945312, "logps/chosen": -2.2135672569274902, "logps/rejected": -4.646885871887207, "loss": 0.7722, "nll_loss": 0.7442039251327515, "rewards/accuracies": 0.875, "rewards/chosen": -0.22135674953460693, "rewards/margins": 0.24333184957504272, "rewards/rejected": -0.46468859910964966, "step": 4431 }, { "epoch": 12.134154688569472, "grad_norm": 5.595576286315918, "learning_rate": 3.9301369863013694e-07, "log_odds_chosen": 2.442136526107788, "log_odds_ratio": -0.190800279378891, "logits/chosen": 1.0225528478622437, "logits/rejected": 1.0513083934783936, "logps/chosen": -2.173414707183838, "logps/rejected": -4.464242935180664, "loss": 0.6804, "nll_loss": 0.6613225340843201, "rewards/accuracies": 1.0, "rewards/chosen": -0.21734146773815155, "rewards/margins": 0.2290828675031662, "rewards/rejected": -0.44642436504364014, "step": 4432 }, { "epoch": 12.136892539356605, "grad_norm": 5.866451263427734, "learning_rate": 3.9287671232876715e-07, "log_odds_chosen": 1.5286239385604858, "log_odds_ratio": -0.28669148683547974, "logits/chosen": 0.9038454294204712, "logits/rejected": 0.9851769208908081, "logps/chosen": -2.6876583099365234, "logps/rejected": -4.146707534790039, "loss": 0.7241, "nll_loss": 0.695424497127533, "rewards/accuracies": 1.0, "rewards/chosen": -0.2687658369541168, "rewards/margins": 0.14590492844581604, "rewards/rejected": -0.41467076539993286, "step": 4433 }, { "epoch": 12.139630390143736, "grad_norm": 5.29115104675293, "learning_rate": 3.9273972602739725e-07, "log_odds_chosen": 1.7415401935577393, "log_odds_ratio": -0.3452873229980469, "logits/chosen": 0.8316064476966858, "logits/rejected": 0.8372138738632202, "logps/chosen": -1.8497254848480225, "logps/rejected": -3.475046157836914, "loss": 0.7002, "nll_loss": 0.6656308770179749, "rewards/accuracies": 0.875, "rewards/chosen": -0.18497255444526672, "rewards/margins": 0.16253209114074707, "rewards/rejected": -0.3475046455860138, "step": 4434 }, { "epoch": 12.14236824093087, "grad_norm": 5.072933197021484, "learning_rate": 3.9260273972602735e-07, "log_odds_chosen": 0.9767937660217285, "log_odds_ratio": -0.4511534571647644, "logits/chosen": 0.6670573949813843, "logits/rejected": 0.6872082948684692, "logps/chosen": -2.193037748336792, "logps/rejected": -3.0927891731262207, "loss": 0.8109, "nll_loss": 0.7658228874206543, "rewards/accuracies": 0.875, "rewards/chosen": -0.21930377185344696, "rewards/margins": 0.08997515588998795, "rewards/rejected": -0.3092789351940155, "step": 4435 }, { "epoch": 12.145106091718002, "grad_norm": 5.9376606941223145, "learning_rate": 3.9246575342465755e-07, "log_odds_chosen": 1.7878245115280151, "log_odds_ratio": -0.26928627490997314, "logits/chosen": 0.5931728482246399, "logits/rejected": 0.6520677804946899, "logps/chosen": -2.516542434692383, "logps/rejected": -4.199759006500244, "loss": 0.8924, "nll_loss": 0.8654338121414185, "rewards/accuracies": 0.875, "rewards/chosen": -0.2516542673110962, "rewards/margins": 0.16832169890403748, "rewards/rejected": -0.4199759364128113, "step": 4436 }, { "epoch": 12.147843942505133, "grad_norm": 5.676847457885742, "learning_rate": 3.9232876712328765e-07, "log_odds_chosen": 2.193995475769043, "log_odds_ratio": -0.3321683406829834, "logits/chosen": 0.8421726226806641, "logits/rejected": 0.8897954225540161, "logps/chosen": -2.2414894104003906, "logps/rejected": -4.36779260635376, "loss": 0.7039, "nll_loss": 0.6706861257553101, "rewards/accuracies": 0.875, "rewards/chosen": -0.2241489440202713, "rewards/margins": 0.21263030171394348, "rewards/rejected": -0.436779260635376, "step": 4437 }, { "epoch": 12.150581793292266, "grad_norm": 4.982344150543213, "learning_rate": 3.921917808219178e-07, "log_odds_chosen": 1.9255248308181763, "log_odds_ratio": -0.3773287534713745, "logits/chosen": 0.9094405770301819, "logits/rejected": 0.9940695762634277, "logps/chosen": -2.998018980026245, "logps/rejected": -4.826744556427002, "loss": 0.672, "nll_loss": 0.6342551112174988, "rewards/accuracies": 0.875, "rewards/chosen": -0.29980194568634033, "rewards/margins": 0.18287256360054016, "rewards/rejected": -0.4826744794845581, "step": 4438 }, { "epoch": 12.153319644079398, "grad_norm": 5.370428085327148, "learning_rate": 3.920547945205479e-07, "log_odds_chosen": 2.0735931396484375, "log_odds_ratio": -0.18069154024124146, "logits/chosen": 0.9552267789840698, "logits/rejected": 0.9736443758010864, "logps/chosen": -2.3451037406921387, "logps/rejected": -4.3034443855285645, "loss": 0.7471, "nll_loss": 0.7290332317352295, "rewards/accuracies": 1.0, "rewards/chosen": -0.2345103919506073, "rewards/margins": 0.19583407044410706, "rewards/rejected": -0.43034443259239197, "step": 4439 }, { "epoch": 12.15605749486653, "grad_norm": 5.650258541107178, "learning_rate": 3.919178082191781e-07, "log_odds_chosen": 2.185335159301758, "log_odds_ratio": -0.3825429081916809, "logits/chosen": 0.7614016532897949, "logits/rejected": 0.7532752156257629, "logps/chosen": -1.8297717571258545, "logps/rejected": -3.8084144592285156, "loss": 0.721, "nll_loss": 0.6827127933502197, "rewards/accuracies": 0.875, "rewards/chosen": -0.18297718465328217, "rewards/margins": 0.19786429405212402, "rewards/rejected": -0.3808414936065674, "step": 4440 }, { "epoch": 12.158795345653662, "grad_norm": 6.162161827087402, "learning_rate": 3.917808219178082e-07, "log_odds_chosen": 2.6012046337127686, "log_odds_ratio": -0.22615386545658112, "logits/chosen": 0.9713039398193359, "logits/rejected": 0.9710359573364258, "logps/chosen": -2.196276903152466, "logps/rejected": -4.657329559326172, "loss": 0.7155, "nll_loss": 0.6928373575210571, "rewards/accuracies": 0.875, "rewards/chosen": -0.21962769329547882, "rewards/margins": 0.24610528349876404, "rewards/rejected": -0.46573299169540405, "step": 4441 }, { "epoch": 12.161533196440795, "grad_norm": 4.399871826171875, "learning_rate": 3.916438356164383e-07, "log_odds_chosen": 1.8087706565856934, "log_odds_ratio": -0.2958398461341858, "logits/chosen": 0.9400433897972107, "logits/rejected": 0.9527105689048767, "logps/chosen": -1.8550622463226318, "logps/rejected": -3.541268825531006, "loss": 0.5987, "nll_loss": 0.5691320896148682, "rewards/accuracies": 1.0, "rewards/chosen": -0.18550622463226318, "rewards/margins": 0.16862064599990845, "rewards/rejected": -0.35412687063217163, "step": 4442 }, { "epoch": 12.164271047227926, "grad_norm": 4.712357044219971, "learning_rate": 3.915068493150685e-07, "log_odds_chosen": 3.3504223823547363, "log_odds_ratio": -0.2121332436800003, "logits/chosen": 0.5376898646354675, "logits/rejected": 0.5266256332397461, "logps/chosen": -1.973877191543579, "logps/rejected": -5.1893181800842285, "loss": 0.7121, "nll_loss": 0.6908648014068604, "rewards/accuracies": 1.0, "rewards/chosen": -0.1973877102136612, "rewards/margins": 0.3215440809726715, "rewards/rejected": -0.5189318060874939, "step": 4443 }, { "epoch": 12.167008898015059, "grad_norm": 5.238769054412842, "learning_rate": 3.913698630136986e-07, "log_odds_chosen": 2.5898802280426025, "log_odds_ratio": -0.12340140342712402, "logits/chosen": 0.8334083557128906, "logits/rejected": 0.9199353456497192, "logps/chosen": -1.9311442375183105, "logps/rejected": -4.369309425354004, "loss": 0.5893, "nll_loss": 0.5769281387329102, "rewards/accuracies": 1.0, "rewards/chosen": -0.19311441481113434, "rewards/margins": 0.243816539645195, "rewards/rejected": -0.43693095445632935, "step": 4444 }, { "epoch": 12.16974674880219, "grad_norm": 6.011771202087402, "learning_rate": 3.9123287671232876e-07, "log_odds_chosen": 1.5672725439071655, "log_odds_ratio": -0.565173327922821, "logits/chosen": 0.7384350895881653, "logits/rejected": 0.8123253583908081, "logps/chosen": -2.5764598846435547, "logps/rejected": -4.052382469177246, "loss": 0.9005, "nll_loss": 0.8439887762069702, "rewards/accuracies": 0.75, "rewards/chosen": -0.25764599442481995, "rewards/margins": 0.14759227633476257, "rewards/rejected": -0.4052382707595825, "step": 4445 }, { "epoch": 12.172484599589323, "grad_norm": 5.8611016273498535, "learning_rate": 3.9109589041095886e-07, "log_odds_chosen": 2.8323569297790527, "log_odds_ratio": -0.1415475308895111, "logits/chosen": 1.016122817993164, "logits/rejected": 1.0234020948410034, "logps/chosen": -2.0191195011138916, "logps/rejected": -4.660923004150391, "loss": 0.6233, "nll_loss": 0.6091653108596802, "rewards/accuracies": 1.0, "rewards/chosen": -0.20191195607185364, "rewards/margins": 0.26418033242225647, "rewards/rejected": -0.4660922884941101, "step": 4446 }, { "epoch": 12.175222450376454, "grad_norm": 5.9672746658325195, "learning_rate": 3.90958904109589e-07, "log_odds_chosen": 1.4011101722717285, "log_odds_ratio": -0.3193950057029724, "logits/chosen": 0.6967728137969971, "logits/rejected": 0.6910653710365295, "logps/chosen": -2.1358954906463623, "logps/rejected": -3.4205660820007324, "loss": 0.7895, "nll_loss": 0.7575268745422363, "rewards/accuracies": 0.875, "rewards/chosen": -0.21358954906463623, "rewards/margins": 0.12846708297729492, "rewards/rejected": -0.34205663204193115, "step": 4447 }, { "epoch": 12.177960301163587, "grad_norm": 4.576272964477539, "learning_rate": 3.9082191780821917e-07, "log_odds_chosen": 1.9666352272033691, "log_odds_ratio": -0.25723370909690857, "logits/chosen": 0.7986896634101868, "logits/rejected": 0.8188700079917908, "logps/chosen": -1.7549798488616943, "logps/rejected": -3.5654172897338867, "loss": 0.6182, "nll_loss": 0.5924656391143799, "rewards/accuracies": 1.0, "rewards/chosen": -0.17549799382686615, "rewards/margins": 0.18104372918605804, "rewards/rejected": -0.3565417230129242, "step": 4448 }, { "epoch": 12.180698151950718, "grad_norm": 4.779268741607666, "learning_rate": 3.9068493150684926e-07, "log_odds_chosen": 1.3789422512054443, "log_odds_ratio": -0.3966297209262848, "logits/chosen": 0.6461586952209473, "logits/rejected": 0.7400500774383545, "logps/chosen": -2.4866485595703125, "logps/rejected": -3.768795967102051, "loss": 0.7347, "nll_loss": 0.6950761079788208, "rewards/accuracies": 0.75, "rewards/chosen": -0.24866487085819244, "rewards/margins": 0.12821471691131592, "rewards/rejected": -0.37687957286834717, "step": 4449 }, { "epoch": 12.183436002737851, "grad_norm": 5.778764724731445, "learning_rate": 3.9054794520547947e-07, "log_odds_chosen": 3.1852850914001465, "log_odds_ratio": -0.33808520436286926, "logits/chosen": 0.8537333011627197, "logits/rejected": 0.9068893194198608, "logps/chosen": -2.101823091506958, "logps/rejected": -5.131763458251953, "loss": 0.8144, "nll_loss": 0.7806122899055481, "rewards/accuracies": 0.75, "rewards/chosen": -0.210182324051857, "rewards/margins": 0.302994042634964, "rewards/rejected": -0.5131763815879822, "step": 4450 }, { "epoch": 12.186173853524982, "grad_norm": 4.52207612991333, "learning_rate": 3.9041095890410957e-07, "log_odds_chosen": 2.8259971141815186, "log_odds_ratio": -0.1571500599384308, "logits/chosen": 0.8879261016845703, "logits/rejected": 0.9440337419509888, "logps/chosen": -2.129733085632324, "logps/rejected": -4.841948509216309, "loss": 0.6016, "nll_loss": 0.5858821868896484, "rewards/accuracies": 1.0, "rewards/chosen": -0.21297331154346466, "rewards/margins": 0.2712215781211853, "rewards/rejected": -0.48419490456581116, "step": 4451 }, { "epoch": 12.188911704312115, "grad_norm": 4.403652667999268, "learning_rate": 3.902739726027397e-07, "log_odds_chosen": 3.102919340133667, "log_odds_ratio": -0.15626662969589233, "logits/chosen": 0.9933162331581116, "logits/rejected": 0.9858589172363281, "logps/chosen": -2.2293622493743896, "logps/rejected": -5.209291934967041, "loss": 0.7015, "nll_loss": 0.6859155893325806, "rewards/accuracies": 1.0, "rewards/chosen": -0.2229362279176712, "rewards/margins": 0.297993004322052, "rewards/rejected": -0.520929217338562, "step": 4452 }, { "epoch": 12.191649555099247, "grad_norm": 6.446103096008301, "learning_rate": 3.901369863013698e-07, "log_odds_chosen": 1.826666235923767, "log_odds_ratio": -0.4166640043258667, "logits/chosen": 0.9275586009025574, "logits/rejected": 0.8900603652000427, "logps/chosen": -1.6848050355911255, "logps/rejected": -3.354992151260376, "loss": 0.6188, "nll_loss": 0.5771016478538513, "rewards/accuracies": 0.875, "rewards/chosen": -0.1684805005788803, "rewards/margins": 0.16701869666576385, "rewards/rejected": -0.33549919724464417, "step": 4453 }, { "epoch": 12.19438740588638, "grad_norm": 4.939554691314697, "learning_rate": 3.8999999999999997e-07, "log_odds_chosen": 2.0822272300720215, "log_odds_ratio": -0.2790854275226593, "logits/chosen": 0.8028137683868408, "logits/rejected": 0.8369084596633911, "logps/chosen": -1.9916553497314453, "logps/rejected": -3.971917152404785, "loss": 0.7367, "nll_loss": 0.7087706923484802, "rewards/accuracies": 1.0, "rewards/chosen": -0.19916555285453796, "rewards/margins": 0.1980261504650116, "rewards/rejected": -0.3971916735172272, "step": 4454 }, { "epoch": 12.19712525667351, "grad_norm": 5.545426845550537, "learning_rate": 3.898630136986301e-07, "log_odds_chosen": 1.259263277053833, "log_odds_ratio": -0.3430560231208801, "logits/chosen": 0.9428043365478516, "logits/rejected": 0.9588695764541626, "logps/chosen": -2.1894643306732178, "logps/rejected": -3.3802051544189453, "loss": 0.6265, "nll_loss": 0.5922014117240906, "rewards/accuracies": 0.875, "rewards/chosen": -0.2189464271068573, "rewards/margins": 0.11907409131526947, "rewards/rejected": -0.33802053332328796, "step": 4455 }, { "epoch": 12.199863107460644, "grad_norm": 5.9918131828308105, "learning_rate": 3.897260273972602e-07, "log_odds_chosen": 1.5978083610534668, "log_odds_ratio": -0.3001672625541687, "logits/chosen": 0.6922751665115356, "logits/rejected": 0.6722140908241272, "logps/chosen": -1.6403427124023438, "logps/rejected": -3.0374650955200195, "loss": 0.6671, "nll_loss": 0.6370915174484253, "rewards/accuracies": 0.875, "rewards/chosen": -0.16403427720069885, "rewards/margins": 0.13971225917339325, "rewards/rejected": -0.3037465512752533, "step": 4456 }, { "epoch": 12.202600958247775, "grad_norm": 5.2177958488464355, "learning_rate": 3.8958904109589043e-07, "log_odds_chosen": 1.7966450452804565, "log_odds_ratio": -0.2979663908481598, "logits/chosen": 0.8804808855056763, "logits/rejected": 0.8801288604736328, "logps/chosen": -2.1847729682922363, "logps/rejected": -3.899050712585449, "loss": 0.6839, "nll_loss": 0.6541491150856018, "rewards/accuracies": 1.0, "rewards/chosen": -0.2184772938489914, "rewards/margins": 0.17142777144908905, "rewards/rejected": -0.38990503549575806, "step": 4457 }, { "epoch": 12.205338809034908, "grad_norm": 5.444390296936035, "learning_rate": 3.8945205479452053e-07, "log_odds_chosen": 1.8261829614639282, "log_odds_ratio": -0.2605644464492798, "logits/chosen": 0.656583845615387, "logits/rejected": 0.6719714403152466, "logps/chosen": -2.076268196105957, "logps/rejected": -3.7666521072387695, "loss": 0.6588, "nll_loss": 0.632766604423523, "rewards/accuracies": 0.75, "rewards/chosen": -0.2076268047094345, "rewards/margins": 0.16903841495513916, "rewards/rejected": -0.37666523456573486, "step": 4458 }, { "epoch": 12.208076659822039, "grad_norm": 4.784642696380615, "learning_rate": 3.893150684931507e-07, "log_odds_chosen": 2.855879783630371, "log_odds_ratio": -0.12199406325817108, "logits/chosen": 0.7520690560340881, "logits/rejected": 0.818160891532898, "logps/chosen": -1.9148731231689453, "logps/rejected": -4.586599349975586, "loss": 0.6063, "nll_loss": 0.5940683484077454, "rewards/accuracies": 1.0, "rewards/chosen": -0.19148731231689453, "rewards/margins": 0.26717257499694824, "rewards/rejected": -0.4586598873138428, "step": 4459 }, { "epoch": 12.210814510609172, "grad_norm": 6.024068355560303, "learning_rate": 3.891780821917808e-07, "log_odds_chosen": 2.1006345748901367, "log_odds_ratio": -0.49463921785354614, "logits/chosen": 0.6936997175216675, "logits/rejected": 0.7017629146575928, "logps/chosen": -2.379101276397705, "logps/rejected": -4.412712097167969, "loss": 0.7316, "nll_loss": 0.6820963025093079, "rewards/accuracies": 0.625, "rewards/chosen": -0.23791012167930603, "rewards/margins": 0.20336109399795532, "rewards/rejected": -0.44127121567726135, "step": 4460 }, { "epoch": 12.213552361396303, "grad_norm": 6.667670249938965, "learning_rate": 3.8904109589041093e-07, "log_odds_chosen": 1.23892080783844, "log_odds_ratio": -0.3574466109275818, "logits/chosen": 0.6148499250411987, "logits/rejected": 0.6532617211341858, "logps/chosen": -2.40447735786438, "logps/rejected": -3.5224905014038086, "loss": 0.7739, "nll_loss": 0.738175630569458, "rewards/accuracies": 0.875, "rewards/chosen": -0.2404477596282959, "rewards/margins": 0.11180128902196884, "rewards/rejected": -0.35224905610084534, "step": 4461 }, { "epoch": 12.216290212183436, "grad_norm": 5.0645246505737305, "learning_rate": 3.889041095890411e-07, "log_odds_chosen": 2.9331305027008057, "log_odds_ratio": -0.3123398423194885, "logits/chosen": 0.8855207562446594, "logits/rejected": 0.8635830879211426, "logps/chosen": -1.9860899448394775, "logps/rejected": -4.750011444091797, "loss": 0.7766, "nll_loss": 0.7453754544258118, "rewards/accuracies": 0.75, "rewards/chosen": -0.19860899448394775, "rewards/margins": 0.2763921916484833, "rewards/rejected": -0.47500115633010864, "step": 4462 }, { "epoch": 12.219028062970569, "grad_norm": 6.287399768829346, "learning_rate": 3.887671232876712e-07, "log_odds_chosen": 1.6908376216888428, "log_odds_ratio": -0.324888676404953, "logits/chosen": 0.7830629348754883, "logits/rejected": 0.751562237739563, "logps/chosen": -2.3803212642669678, "logps/rejected": -4.012089729309082, "loss": 0.7395, "nll_loss": 0.707047700881958, "rewards/accuracies": 0.875, "rewards/chosen": -0.23803214728832245, "rewards/margins": 0.16317687928676605, "rewards/rejected": -0.4012089967727661, "step": 4463 }, { "epoch": 12.2217659137577, "grad_norm": 5.427430629730225, "learning_rate": 3.886301369863014e-07, "log_odds_chosen": 2.0569117069244385, "log_odds_ratio": -0.49762603640556335, "logits/chosen": 0.6847789883613586, "logits/rejected": 0.7067009210586548, "logps/chosen": -2.464221477508545, "logps/rejected": -4.470266342163086, "loss": 0.7434, "nll_loss": 0.6935965418815613, "rewards/accuracies": 0.75, "rewards/chosen": -0.2464221566915512, "rewards/margins": 0.20060451328754425, "rewards/rejected": -0.44702664017677307, "step": 4464 }, { "epoch": 12.224503764544833, "grad_norm": 6.0854058265686035, "learning_rate": 3.884931506849315e-07, "log_odds_chosen": 1.6869137287139893, "log_odds_ratio": -0.23219147324562073, "logits/chosen": 0.8923126459121704, "logits/rejected": 0.9398930668830872, "logps/chosen": -1.9952442646026611, "logps/rejected": -3.5412116050720215, "loss": 0.6635, "nll_loss": 0.6403027176856995, "rewards/accuracies": 1.0, "rewards/chosen": -0.1995244324207306, "rewards/margins": 0.15459676086902618, "rewards/rejected": -0.35412120819091797, "step": 4465 }, { "epoch": 12.227241615331964, "grad_norm": 4.470058917999268, "learning_rate": 3.8835616438356164e-07, "log_odds_chosen": 2.806800365447998, "log_odds_ratio": -0.12566964328289032, "logits/chosen": 0.9645029306411743, "logits/rejected": 1.021568775177002, "logps/chosen": -2.0033843517303467, "logps/rejected": -4.677940845489502, "loss": 0.5815, "nll_loss": 0.5689294934272766, "rewards/accuracies": 1.0, "rewards/chosen": -0.2003384232521057, "rewards/margins": 0.2674556374549866, "rewards/rejected": -0.4677940607070923, "step": 4466 }, { "epoch": 12.229979466119097, "grad_norm": 5.33413553237915, "learning_rate": 3.882191780821918e-07, "log_odds_chosen": 2.6272058486938477, "log_odds_ratio": -0.15072672069072723, "logits/chosen": 0.8479868769645691, "logits/rejected": 0.8654553890228271, "logps/chosen": -2.211726188659668, "logps/rejected": -4.71238374710083, "loss": 0.676, "nll_loss": 0.6608865261077881, "rewards/accuracies": 1.0, "rewards/chosen": -0.22117264568805695, "rewards/margins": 0.25006574392318726, "rewards/rejected": -0.4712384343147278, "step": 4467 }, { "epoch": 12.232717316906228, "grad_norm": 6.245835781097412, "learning_rate": 3.880821917808219e-07, "log_odds_chosen": 3.377837896347046, "log_odds_ratio": -0.08041268587112427, "logits/chosen": 1.109609842300415, "logits/rejected": 1.1739091873168945, "logps/chosen": -2.444679021835327, "logps/rejected": -5.714020252227783, "loss": 0.6367, "nll_loss": 0.6286704540252686, "rewards/accuracies": 1.0, "rewards/chosen": -0.24446791410446167, "rewards/margins": 0.3269341289997101, "rewards/rejected": -0.5714020133018494, "step": 4468 }, { "epoch": 12.235455167693361, "grad_norm": 12.569741249084473, "learning_rate": 3.8794520547945204e-07, "log_odds_chosen": 0.6068423390388489, "log_odds_ratio": -0.588233470916748, "logits/chosen": 1.0385403633117676, "logits/rejected": 0.9712092876434326, "logps/chosen": -3.6605772972106934, "logps/rejected": -4.208565711975098, "loss": 0.7487, "nll_loss": 0.6899193525314331, "rewards/accuracies": 0.75, "rewards/chosen": -0.36605772376060486, "rewards/margins": 0.05479887127876282, "rewards/rejected": -0.42085662484169006, "step": 4469 }, { "epoch": 12.238193018480493, "grad_norm": 7.604499340057373, "learning_rate": 3.8780821917808214e-07, "log_odds_chosen": 1.008293867111206, "log_odds_ratio": -0.5983763337135315, "logits/chosen": 0.7621448636054993, "logits/rejected": 0.7672725319862366, "logps/chosen": -2.9983391761779785, "logps/rejected": -3.945988178253174, "loss": 0.7868, "nll_loss": 0.7269325256347656, "rewards/accuracies": 0.75, "rewards/chosen": -0.29983392357826233, "rewards/margins": 0.09476488828659058, "rewards/rejected": -0.3945988118648529, "step": 4470 }, { "epoch": 12.240930869267626, "grad_norm": 7.105100631713867, "learning_rate": 3.8767123287671235e-07, "log_odds_chosen": 2.2481112480163574, "log_odds_ratio": -0.29844921827316284, "logits/chosen": 0.850979208946228, "logits/rejected": 0.9316384792327881, "logps/chosen": -3.003484010696411, "logps/rejected": -5.205869674682617, "loss": 0.8271, "nll_loss": 0.7972855567932129, "rewards/accuracies": 0.875, "rewards/chosen": -0.3003484010696411, "rewards/margins": 0.220238596200943, "rewards/rejected": -0.5205869674682617, "step": 4471 }, { "epoch": 12.243668720054757, "grad_norm": 4.743283748626709, "learning_rate": 3.8753424657534244e-07, "log_odds_chosen": 2.7495791912078857, "log_odds_ratio": -0.22726354002952576, "logits/chosen": 0.8376480937004089, "logits/rejected": 0.8165212869644165, "logps/chosen": -1.8104493618011475, "logps/rejected": -4.341732978820801, "loss": 0.7467, "nll_loss": 0.7239395380020142, "rewards/accuracies": 1.0, "rewards/chosen": -0.18104493618011475, "rewards/margins": 0.25312837958335876, "rewards/rejected": -0.4341733157634735, "step": 4472 }, { "epoch": 12.24640657084189, "grad_norm": 5.716590881347656, "learning_rate": 3.873972602739726e-07, "log_odds_chosen": 2.3998310565948486, "log_odds_ratio": -0.20295768976211548, "logits/chosen": 1.0052047967910767, "logits/rejected": 0.9731760025024414, "logps/chosen": -2.5212931632995605, "logps/rejected": -4.85280704498291, "loss": 0.7527, "nll_loss": 0.7323707342147827, "rewards/accuracies": 1.0, "rewards/chosen": -0.25212931632995605, "rewards/margins": 0.2331513911485672, "rewards/rejected": -0.48528075218200684, "step": 4473 }, { "epoch": 12.24914442162902, "grad_norm": 5.364027500152588, "learning_rate": 3.8726027397260275e-07, "log_odds_chosen": 1.287209153175354, "log_odds_ratio": -0.40123671293258667, "logits/chosen": 1.0097718238830566, "logits/rejected": 1.0471241474151611, "logps/chosen": -1.8712446689605713, "logps/rejected": -3.055971622467041, "loss": 0.6647, "nll_loss": 0.624546229839325, "rewards/accuracies": 0.75, "rewards/chosen": -0.18712446093559265, "rewards/margins": 0.11847268044948578, "rewards/rejected": -0.30559712648391724, "step": 4474 }, { "epoch": 12.251882272416154, "grad_norm": 5.821450233459473, "learning_rate": 3.8712328767123285e-07, "log_odds_chosen": 1.572909951210022, "log_odds_ratio": -0.32305657863616943, "logits/chosen": 0.7069591879844666, "logits/rejected": 0.6794643998146057, "logps/chosen": -2.488786220550537, "logps/rejected": -3.9920668601989746, "loss": 0.7689, "nll_loss": 0.7365556955337524, "rewards/accuracies": 0.875, "rewards/chosen": -0.24887864291667938, "rewards/margins": 0.15032805502414703, "rewards/rejected": -0.3992066979408264, "step": 4475 }, { "epoch": 12.254620123203285, "grad_norm": 5.602487564086914, "learning_rate": 3.86986301369863e-07, "log_odds_chosen": 3.5603442192077637, "log_odds_ratio": -0.07599370181560516, "logits/chosen": 1.0565603971481323, "logits/rejected": 1.1052446365356445, "logps/chosen": -2.4968433380126953, "logps/rejected": -5.958537578582764, "loss": 0.6342, "nll_loss": 0.6265886425971985, "rewards/accuracies": 1.0, "rewards/chosen": -0.24968433380126953, "rewards/margins": 0.34616944193840027, "rewards/rejected": -0.5958537459373474, "step": 4476 }, { "epoch": 12.257357973990418, "grad_norm": 4.957798957824707, "learning_rate": 3.868493150684931e-07, "log_odds_chosen": 2.4623403549194336, "log_odds_ratio": -0.3030867576599121, "logits/chosen": 1.018763542175293, "logits/rejected": 1.039638638496399, "logps/chosen": -2.2203445434570312, "logps/rejected": -4.417375564575195, "loss": 0.7358, "nll_loss": 0.7054643630981445, "rewards/accuracies": 0.875, "rewards/chosen": -0.22203442454338074, "rewards/margins": 0.21970310807228088, "rewards/rejected": -0.4417375326156616, "step": 4477 }, { "epoch": 12.260095824777549, "grad_norm": 4.644389629364014, "learning_rate": 3.867123287671233e-07, "log_odds_chosen": 2.5734920501708984, "log_odds_ratio": -0.25814610719680786, "logits/chosen": 0.7969906330108643, "logits/rejected": 0.8161463737487793, "logps/chosen": -2.1692934036254883, "logps/rejected": -4.659956932067871, "loss": 0.6078, "nll_loss": 0.58195960521698, "rewards/accuracies": 1.0, "rewards/chosen": -0.2169293463230133, "rewards/margins": 0.2490663379430771, "rewards/rejected": -0.4659956991672516, "step": 4478 }, { "epoch": 12.262833675564682, "grad_norm": 4.973734378814697, "learning_rate": 3.865753424657534e-07, "log_odds_chosen": 2.18017315864563, "log_odds_ratio": -0.17306356132030487, "logits/chosen": 0.8773751258850098, "logits/rejected": 0.93341463804245, "logps/chosen": -1.975468397140503, "logps/rejected": -3.965460777282715, "loss": 0.6618, "nll_loss": 0.6444635391235352, "rewards/accuracies": 1.0, "rewards/chosen": -0.1975468397140503, "rewards/margins": 0.19899925589561462, "rewards/rejected": -0.3965460956096649, "step": 4479 }, { "epoch": 12.265571526351813, "grad_norm": 5.757497787475586, "learning_rate": 3.864383561643835e-07, "log_odds_chosen": 1.6471776962280273, "log_odds_ratio": -0.5020991563796997, "logits/chosen": 0.8242705464363098, "logits/rejected": 0.9832422137260437, "logps/chosen": -2.56129789352417, "logps/rejected": -4.072979927062988, "loss": 0.6569, "nll_loss": 0.6067097187042236, "rewards/accuracies": 0.875, "rewards/chosen": -0.25612983107566833, "rewards/margins": 0.15116822719573975, "rewards/rejected": -0.4072980284690857, "step": 4480 }, { "epoch": 12.268309377138946, "grad_norm": 5.786773681640625, "learning_rate": 3.863013698630137e-07, "log_odds_chosen": 2.6756398677825928, "log_odds_ratio": -0.3910747170448303, "logits/chosen": 0.8614049553871155, "logits/rejected": 0.8998491764068604, "logps/chosen": -2.481734037399292, "logps/rejected": -4.989805698394775, "loss": 0.6675, "nll_loss": 0.6283533573150635, "rewards/accuracies": 0.875, "rewards/chosen": -0.24817340075969696, "rewards/margins": 0.25080716609954834, "rewards/rejected": -0.4989805817604065, "step": 4481 }, { "epoch": 12.271047227926077, "grad_norm": 5.073740005493164, "learning_rate": 3.861643835616438e-07, "log_odds_chosen": 2.4763436317443848, "log_odds_ratio": -0.19838491082191467, "logits/chosen": 1.0585458278656006, "logits/rejected": 1.0606945753097534, "logps/chosen": -1.8632851839065552, "logps/rejected": -4.166282653808594, "loss": 0.6976, "nll_loss": 0.6777575016021729, "rewards/accuracies": 1.0, "rewards/chosen": -0.18632853031158447, "rewards/margins": 0.23029974102973938, "rewards/rejected": -0.41662824153900146, "step": 4482 }, { "epoch": 12.27378507871321, "grad_norm": 5.63710355758667, "learning_rate": 3.8602739726027396e-07, "log_odds_chosen": 3.4534969329833984, "log_odds_ratio": -0.25203606486320496, "logits/chosen": 0.9251015186309814, "logits/rejected": 0.999626636505127, "logps/chosen": -2.259261131286621, "logps/rejected": -5.629645347595215, "loss": 0.7352, "nll_loss": 0.7100258469581604, "rewards/accuracies": 0.875, "rewards/chosen": -0.22592610120773315, "rewards/margins": 0.33703845739364624, "rewards/rejected": -0.5629645586013794, "step": 4483 }, { "epoch": 12.276522929500342, "grad_norm": 6.017048358917236, "learning_rate": 3.8589041095890406e-07, "log_odds_chosen": 0.870303213596344, "log_odds_ratio": -0.4324226677417755, "logits/chosen": 0.9034743905067444, "logits/rejected": 0.9454509615898132, "logps/chosen": -3.028752326965332, "logps/rejected": -3.8315365314483643, "loss": 0.7536, "nll_loss": 0.7103664875030518, "rewards/accuracies": 0.75, "rewards/chosen": -0.30287522077560425, "rewards/margins": 0.0802784413099289, "rewards/rejected": -0.38315367698669434, "step": 4484 }, { "epoch": 12.279260780287474, "grad_norm": 5.037604331970215, "learning_rate": 3.8575342465753426e-07, "log_odds_chosen": 1.9769059419631958, "log_odds_ratio": -0.19808386266231537, "logits/chosen": 1.1792078018188477, "logits/rejected": 1.1301803588867188, "logps/chosen": -2.1043291091918945, "logps/rejected": -3.9725849628448486, "loss": 0.6673, "nll_loss": 0.6474494338035583, "rewards/accuracies": 1.0, "rewards/chosen": -0.21043291687965393, "rewards/margins": 0.18682563304901123, "rewards/rejected": -0.39725854992866516, "step": 4485 }, { "epoch": 12.281998631074606, "grad_norm": 7.878341197967529, "learning_rate": 3.8561643835616436e-07, "log_odds_chosen": 1.4574244022369385, "log_odds_ratio": -0.491784930229187, "logits/chosen": 0.7246309518814087, "logits/rejected": 0.7776395678520203, "logps/chosen": -2.207848072052002, "logps/rejected": -3.50740909576416, "loss": 0.6718, "nll_loss": 0.6225773096084595, "rewards/accuracies": 0.75, "rewards/chosen": -0.22078479826450348, "rewards/margins": 0.12995611131191254, "rewards/rejected": -0.3507409393787384, "step": 4486 }, { "epoch": 12.284736481861739, "grad_norm": 5.571208477020264, "learning_rate": 3.8547945205479446e-07, "log_odds_chosen": 2.4307467937469482, "log_odds_ratio": -0.294162392616272, "logits/chosen": 0.8420318365097046, "logits/rejected": 0.8704086542129517, "logps/chosen": -2.2891061305999756, "logps/rejected": -4.572965145111084, "loss": 0.7594, "nll_loss": 0.7299922108650208, "rewards/accuracies": 0.875, "rewards/chosen": -0.22891061007976532, "rewards/margins": 0.22838589549064636, "rewards/rejected": -0.4572965204715729, "step": 4487 }, { "epoch": 12.28747433264887, "grad_norm": 4.966530799865723, "learning_rate": 3.8534246575342467e-07, "log_odds_chosen": 1.216575026512146, "log_odds_ratio": -0.4423336386680603, "logits/chosen": 0.8709869384765625, "logits/rejected": 1.0030847787857056, "logps/chosen": -2.2305147647857666, "logps/rejected": -3.368762969970703, "loss": 0.7195, "nll_loss": 0.6752524375915527, "rewards/accuracies": 0.75, "rewards/chosen": -0.22305145859718323, "rewards/margins": 0.11382482945919037, "rewards/rejected": -0.3368763029575348, "step": 4488 }, { "epoch": 12.290212183436003, "grad_norm": 7.319636821746826, "learning_rate": 3.8520547945205477e-07, "log_odds_chosen": 1.3399721384048462, "log_odds_ratio": -0.3669145107269287, "logits/chosen": 0.7479442358016968, "logits/rejected": 0.7333271503448486, "logps/chosen": -2.713688850402832, "logps/rejected": -3.993100166320801, "loss": 0.6818, "nll_loss": 0.6451003551483154, "rewards/accuracies": 0.75, "rewards/chosen": -0.2713688910007477, "rewards/margins": 0.12794113159179688, "rewards/rejected": -0.39931002259254456, "step": 4489 }, { "epoch": 12.292950034223136, "grad_norm": 5.441120624542236, "learning_rate": 3.850684931506849e-07, "log_odds_chosen": 1.2503315210342407, "log_odds_ratio": -0.4267817735671997, "logits/chosen": 0.8356466293334961, "logits/rejected": 0.8077001571655273, "logps/chosen": -2.302107810974121, "logps/rejected": -3.399958610534668, "loss": 0.6772, "nll_loss": 0.6345018148422241, "rewards/accuracies": 0.75, "rewards/chosen": -0.2302107810974121, "rewards/margins": 0.10978510975837708, "rewards/rejected": -0.3399958610534668, "step": 4490 }, { "epoch": 12.295687885010267, "grad_norm": 5.306065559387207, "learning_rate": 3.84931506849315e-07, "log_odds_chosen": 2.673082113265991, "log_odds_ratio": -0.2207927405834198, "logits/chosen": 0.7125078439712524, "logits/rejected": 0.692762017250061, "logps/chosen": -1.7654087543487549, "logps/rejected": -4.286899089813232, "loss": 0.6581, "nll_loss": 0.6360620856285095, "rewards/accuracies": 1.0, "rewards/chosen": -0.17654085159301758, "rewards/margins": 0.2521490454673767, "rewards/rejected": -0.4286898970603943, "step": 4491 }, { "epoch": 12.2984257357974, "grad_norm": 5.833584785461426, "learning_rate": 3.847945205479452e-07, "log_odds_chosen": 0.8944716453552246, "log_odds_ratio": -0.6457727551460266, "logits/chosen": 0.6293768882751465, "logits/rejected": 0.6616542935371399, "logps/chosen": -2.579043388366699, "logps/rejected": -3.4050004482269287, "loss": 0.724, "nll_loss": 0.6594035029411316, "rewards/accuracies": 0.75, "rewards/chosen": -0.2579043507575989, "rewards/margins": 0.08259569108486176, "rewards/rejected": -0.3405000567436218, "step": 4492 }, { "epoch": 12.301163586584531, "grad_norm": 5.7591471672058105, "learning_rate": 3.846575342465753e-07, "log_odds_chosen": 1.2320090532302856, "log_odds_ratio": -0.4148731827735901, "logits/chosen": 0.6427414417266846, "logits/rejected": 0.5510663986206055, "logps/chosen": -2.8281445503234863, "logps/rejected": -4.015436172485352, "loss": 0.7649, "nll_loss": 0.7234041094779968, "rewards/accuracies": 1.0, "rewards/chosen": -0.2828144431114197, "rewards/margins": 0.11872916668653488, "rewards/rejected": -0.40154361724853516, "step": 4493 }, { "epoch": 12.303901437371664, "grad_norm": 6.021173000335693, "learning_rate": 3.845205479452054e-07, "log_odds_chosen": 1.8532006740570068, "log_odds_ratio": -0.31083613634109497, "logits/chosen": 0.8364749550819397, "logits/rejected": 0.7015659809112549, "logps/chosen": -2.0887577533721924, "logps/rejected": -3.7826929092407227, "loss": 0.7534, "nll_loss": 0.7222853899002075, "rewards/accuracies": 0.875, "rewards/chosen": -0.20887577533721924, "rewards/margins": 0.16939350962638855, "rewards/rejected": -0.3782693147659302, "step": 4494 }, { "epoch": 12.306639288158795, "grad_norm": 6.093019485473633, "learning_rate": 3.843835616438356e-07, "log_odds_chosen": 1.4371492862701416, "log_odds_ratio": -0.3659067749977112, "logits/chosen": 0.6062678098678589, "logits/rejected": 0.6623905301094055, "logps/chosen": -2.1776227951049805, "logps/rejected": -3.5155556201934814, "loss": 0.7358, "nll_loss": 0.6992477178573608, "rewards/accuracies": 0.875, "rewards/chosen": -0.217762291431427, "rewards/margins": 0.13379329442977905, "rewards/rejected": -0.35155558586120605, "step": 4495 }, { "epoch": 12.309377138945928, "grad_norm": 4.2158427238464355, "learning_rate": 3.842465753424657e-07, "log_odds_chosen": 2.5790653228759766, "log_odds_ratio": -0.18927030265331268, "logits/chosen": 0.8491230010986328, "logits/rejected": 0.9368892908096313, "logps/chosen": -1.8884470462799072, "logps/rejected": -4.256678104400635, "loss": 0.6847, "nll_loss": 0.6657320261001587, "rewards/accuracies": 1.0, "rewards/chosen": -0.1888447105884552, "rewards/margins": 0.23682314157485962, "rewards/rejected": -0.42566782236099243, "step": 4496 }, { "epoch": 12.31211498973306, "grad_norm": 5.121829986572266, "learning_rate": 3.841095890410959e-07, "log_odds_chosen": 1.4084504842758179, "log_odds_ratio": -0.3621915280818939, "logits/chosen": 0.8649185299873352, "logits/rejected": 0.8913719654083252, "logps/chosen": -2.521395683288574, "logps/rejected": -3.8745007514953613, "loss": 0.6737, "nll_loss": 0.6375207901000977, "rewards/accuracies": 0.75, "rewards/chosen": -0.2521395683288574, "rewards/margins": 0.13531051576137543, "rewards/rejected": -0.38745009899139404, "step": 4497 }, { "epoch": 12.314852840520192, "grad_norm": 6.492371559143066, "learning_rate": 3.8397260273972603e-07, "log_odds_chosen": 1.9500885009765625, "log_odds_ratio": -0.2359265685081482, "logits/chosen": 0.9145015478134155, "logits/rejected": 0.8792911767959595, "logps/chosen": -1.4954965114593506, "logps/rejected": -3.2477352619171143, "loss": 0.6344, "nll_loss": 0.6108423471450806, "rewards/accuracies": 0.875, "rewards/chosen": -0.14954964816570282, "rewards/margins": 0.17522387206554413, "rewards/rejected": -0.32477352023124695, "step": 4498 }, { "epoch": 12.317590691307323, "grad_norm": 5.4536027908325195, "learning_rate": 3.838356164383562e-07, "log_odds_chosen": 1.1014810800552368, "log_odds_ratio": -0.43717753887176514, "logits/chosen": 0.8773353099822998, "logits/rejected": 0.8557911515235901, "logps/chosen": -2.1361210346221924, "logps/rejected": -3.149963855743408, "loss": 0.7023, "nll_loss": 0.6586107611656189, "rewards/accuracies": 0.875, "rewards/chosen": -0.2136121243238449, "rewards/margins": 0.10138429701328278, "rewards/rejected": -0.3149964213371277, "step": 4499 }, { "epoch": 12.320328542094456, "grad_norm": 4.2473015785217285, "learning_rate": 3.836986301369863e-07, "log_odds_chosen": 4.015387535095215, "log_odds_ratio": -0.14854560792446136, "logits/chosen": 0.8731493949890137, "logits/rejected": 0.8633884191513062, "logps/chosen": -1.940595269203186, "logps/rejected": -5.786889553070068, "loss": 0.6069, "nll_loss": 0.5920177698135376, "rewards/accuracies": 1.0, "rewards/chosen": -0.19405952095985413, "rewards/margins": 0.3846294581890106, "rewards/rejected": -0.5786889791488647, "step": 4500 }, { "epoch": 12.323066392881588, "grad_norm": 5.207429885864258, "learning_rate": 3.835616438356164e-07, "log_odds_chosen": 2.247746706008911, "log_odds_ratio": -0.24102503061294556, "logits/chosen": 0.657721221446991, "logits/rejected": 0.6707698106765747, "logps/chosen": -1.6329723596572876, "logps/rejected": -3.7313852310180664, "loss": 0.6198, "nll_loss": 0.5956726670265198, "rewards/accuracies": 0.875, "rewards/chosen": -0.16329723596572876, "rewards/margins": 0.2098412960767746, "rewards/rejected": -0.37313854694366455, "step": 4501 }, { "epoch": 12.32580424366872, "grad_norm": 8.131893157958984, "learning_rate": 3.834246575342466e-07, "log_odds_chosen": 2.4413304328918457, "log_odds_ratio": -0.6799980401992798, "logits/chosen": 0.8908137083053589, "logits/rejected": 0.9693785309791565, "logps/chosen": -3.172208786010742, "logps/rejected": -5.556354522705078, "loss": 0.7518, "nll_loss": 0.6838216185569763, "rewards/accuracies": 0.75, "rewards/chosen": -0.31722086668014526, "rewards/margins": 0.23841455578804016, "rewards/rejected": -0.5556354522705078, "step": 4502 }, { "epoch": 12.328542094455852, "grad_norm": 6.5654802322387695, "learning_rate": 3.832876712328767e-07, "log_odds_chosen": 1.3786362409591675, "log_odds_ratio": -0.48279017210006714, "logits/chosen": 0.8997287750244141, "logits/rejected": 0.9460636377334595, "logps/chosen": -2.9724745750427246, "logps/rejected": -4.267716884613037, "loss": 0.749, "nll_loss": 0.7007113099098206, "rewards/accuracies": 0.75, "rewards/chosen": -0.2972474694252014, "rewards/margins": 0.12952423095703125, "rewards/rejected": -0.42677170038223267, "step": 4503 }, { "epoch": 12.331279945242985, "grad_norm": 4.910542964935303, "learning_rate": 3.8315068493150683e-07, "log_odds_chosen": 1.6529620885849, "log_odds_ratio": -0.3194526731967926, "logits/chosen": 0.9098619222640991, "logits/rejected": 0.9598592519760132, "logps/chosen": -2.2966723442077637, "logps/rejected": -3.8485608100891113, "loss": 0.6984, "nll_loss": 0.6664191484451294, "rewards/accuracies": 0.875, "rewards/chosen": -0.22966724634170532, "rewards/margins": 0.15518882870674133, "rewards/rejected": -0.38485607504844666, "step": 4504 }, { "epoch": 12.334017796030116, "grad_norm": 5.528773784637451, "learning_rate": 3.83013698630137e-07, "log_odds_chosen": 2.235398054122925, "log_odds_ratio": -0.18015813827514648, "logits/chosen": 0.7485073804855347, "logits/rejected": 0.8339523077011108, "logps/chosen": -2.621119976043701, "logps/rejected": -4.78541374206543, "loss": 0.8537, "nll_loss": 0.835690975189209, "rewards/accuracies": 1.0, "rewards/chosen": -0.262112021446228, "rewards/margins": 0.21642935276031494, "rewards/rejected": -0.47854137420654297, "step": 4505 }, { "epoch": 12.336755646817249, "grad_norm": Infinity, "learning_rate": 3.83013698630137e-07, "log_odds_chosen": 1.9651941061019897, "log_odds_ratio": -0.6446444988250732, "logits/chosen": 0.9711931347846985, "logits/rejected": 0.9611833691596985, "logps/chosen": -2.890462875366211, "logps/rejected": -4.757635116577148, "loss": 0.7491, "nll_loss": 0.6845983266830444, "rewards/accuracies": 0.75, "rewards/chosen": -0.2890462875366211, "rewards/margins": 0.1867172122001648, "rewards/rejected": -0.4757634997367859, "step": 4506 }, { "epoch": 12.33949349760438, "grad_norm": 5.166070938110352, "learning_rate": 3.8287671232876714e-07, "log_odds_chosen": 2.266125440597534, "log_odds_ratio": -0.2877528965473175, "logits/chosen": 0.7241382598876953, "logits/rejected": 0.6969902515411377, "logps/chosen": -2.274648904800415, "logps/rejected": -4.401586532592773, "loss": 0.7103, "nll_loss": 0.681536078453064, "rewards/accuracies": 0.75, "rewards/chosen": -0.22746489942073822, "rewards/margins": 0.21269376575946808, "rewards/rejected": -0.4401586651802063, "step": 4507 }, { "epoch": 12.342231348391513, "grad_norm": 6.108517169952393, "learning_rate": 3.8273972602739724e-07, "log_odds_chosen": 2.8904409408569336, "log_odds_ratio": -0.2393667846918106, "logits/chosen": 0.6406500339508057, "logits/rejected": 0.49181807041168213, "logps/chosen": -1.990315318107605, "logps/rejected": -4.738668441772461, "loss": 0.7628, "nll_loss": 0.7388501167297363, "rewards/accuracies": 1.0, "rewards/chosen": -0.1990315318107605, "rewards/margins": 0.27483534812927246, "rewards/rejected": -0.47386687994003296, "step": 4508 }, { "epoch": 12.344969199178644, "grad_norm": 6.969463348388672, "learning_rate": 3.8260273972602734e-07, "log_odds_chosen": 1.6631311178207397, "log_odds_ratio": -0.337014764547348, "logits/chosen": 0.9717269539833069, "logits/rejected": 1.0444256067276, "logps/chosen": -3.0306589603424072, "logps/rejected": -4.627647399902344, "loss": 0.6691, "nll_loss": 0.6354120969772339, "rewards/accuracies": 0.875, "rewards/chosen": -0.3030658960342407, "rewards/margins": 0.15969884395599365, "rewards/rejected": -0.462764710187912, "step": 4509 }, { "epoch": 12.347707049965777, "grad_norm": 7.270900726318359, "learning_rate": 3.8246575342465754e-07, "log_odds_chosen": 1.987483263015747, "log_odds_ratio": -0.5589956045150757, "logits/chosen": 0.8020816445350647, "logits/rejected": 0.7926797270774841, "logps/chosen": -2.444873094558716, "logps/rejected": -4.322090148925781, "loss": 0.7752, "nll_loss": 0.7192692160606384, "rewards/accuracies": 0.875, "rewards/chosen": -0.24448730051517487, "rewards/margins": 0.18772171437740326, "rewards/rejected": -0.4322090148925781, "step": 4510 }, { "epoch": 12.350444900752908, "grad_norm": 12.582112312316895, "learning_rate": 3.8232876712328764e-07, "log_odds_chosen": 1.058774471282959, "log_odds_ratio": -0.764504611492157, "logits/chosen": 1.117289423942566, "logits/rejected": 1.0372729301452637, "logps/chosen": -3.283482789993286, "logps/rejected": -4.26098108291626, "loss": 0.8308, "nll_loss": 0.754397988319397, "rewards/accuracies": 0.75, "rewards/chosen": -0.3283482789993286, "rewards/margins": 0.09774985909461975, "rewards/rejected": -0.42609816789627075, "step": 4511 }, { "epoch": 12.353182751540041, "grad_norm": 5.214152812957764, "learning_rate": 3.821917808219178e-07, "log_odds_chosen": 2.4555559158325195, "log_odds_ratio": -0.27921876311302185, "logits/chosen": 1.113970398902893, "logits/rejected": 1.0944530963897705, "logps/chosen": -1.7731573581695557, "logps/rejected": -4.089007377624512, "loss": 0.6772, "nll_loss": 0.6492975950241089, "rewards/accuracies": 0.875, "rewards/chosen": -0.17731572687625885, "rewards/margins": 0.23158498108386993, "rewards/rejected": -0.4089006781578064, "step": 4512 }, { "epoch": 12.355920602327172, "grad_norm": 4.617013931274414, "learning_rate": 3.8205479452054795e-07, "log_odds_chosen": 2.308328151702881, "log_odds_ratio": -0.2781118154525757, "logits/chosen": 0.744917631149292, "logits/rejected": 0.8241710662841797, "logps/chosen": -1.9136936664581299, "logps/rejected": -4.090869426727295, "loss": 0.6104, "nll_loss": 0.5825967788696289, "rewards/accuracies": 0.875, "rewards/chosen": -0.19136938452720642, "rewards/margins": 0.21771755814552307, "rewards/rejected": -0.4090869426727295, "step": 4513 }, { "epoch": 12.358658453114305, "grad_norm": 5.756022930145264, "learning_rate": 3.819178082191781e-07, "log_odds_chosen": 2.167046070098877, "log_odds_ratio": -0.17381437122821808, "logits/chosen": 1.1158716678619385, "logits/rejected": 1.1682097911834717, "logps/chosen": -2.1180858612060547, "logps/rejected": -4.146513938903809, "loss": 0.5598, "nll_loss": 0.5423778295516968, "rewards/accuracies": 1.0, "rewards/chosen": -0.21180857717990875, "rewards/margins": 0.2028428018093109, "rewards/rejected": -0.41465139389038086, "step": 4514 }, { "epoch": 12.361396303901437, "grad_norm": 4.726258277893066, "learning_rate": 3.817808219178082e-07, "log_odds_chosen": 2.2692689895629883, "log_odds_ratio": -0.2007221132516861, "logits/chosen": 0.8833463788032532, "logits/rejected": 0.8427405953407288, "logps/chosen": -1.9567550420761108, "logps/rejected": -4.056282997131348, "loss": 0.6416, "nll_loss": 0.6214988827705383, "rewards/accuracies": 1.0, "rewards/chosen": -0.19567550718784332, "rewards/margins": 0.20995281636714935, "rewards/rejected": -0.4056283235549927, "step": 4515 }, { "epoch": 12.36413415468857, "grad_norm": 4.516965389251709, "learning_rate": 3.816438356164383e-07, "log_odds_chosen": 2.6894805431365967, "log_odds_ratio": -0.13260823488235474, "logits/chosen": 0.9120092391967773, "logits/rejected": 0.905521810054779, "logps/chosen": -1.9989644289016724, "logps/rejected": -4.5236358642578125, "loss": 0.6058, "nll_loss": 0.5925290584564209, "rewards/accuracies": 1.0, "rewards/chosen": -0.199896439909935, "rewards/margins": 0.2524670958518982, "rewards/rejected": -0.4523635804653168, "step": 4516 }, { "epoch": 12.366872005475702, "grad_norm": 6.720715522766113, "learning_rate": 3.815068493150685e-07, "log_odds_chosen": 4.045217514038086, "log_odds_ratio": -0.08545716106891632, "logits/chosen": 1.1759620904922485, "logits/rejected": 1.2605345249176025, "logps/chosen": -2.0581178665161133, "logps/rejected": -5.932541847229004, "loss": 0.7195, "nll_loss": 0.7109816074371338, "rewards/accuracies": 1.0, "rewards/chosen": -0.20581179857254028, "rewards/margins": 0.38744235038757324, "rewards/rejected": -0.5932541489601135, "step": 4517 }, { "epoch": 12.369609856262834, "grad_norm": 5.6012701988220215, "learning_rate": 3.813698630136986e-07, "log_odds_chosen": 0.9009230732917786, "log_odds_ratio": -0.4211077094078064, "logits/chosen": 0.9404121041297913, "logits/rejected": 0.9213966131210327, "logps/chosen": -2.3733253479003906, "logps/rejected": -3.2206406593322754, "loss": 0.6896, "nll_loss": 0.6475099921226501, "rewards/accuracies": 0.75, "rewards/chosen": -0.2373325526714325, "rewards/margins": 0.08473151922225952, "rewards/rejected": -0.3220641016960144, "step": 4518 }, { "epoch": 12.372347707049967, "grad_norm": 6.470519542694092, "learning_rate": 3.8123287671232875e-07, "log_odds_chosen": 1.1323829889297485, "log_odds_ratio": -0.3774479329586029, "logits/chosen": 0.8896632194519043, "logits/rejected": 0.833805501461029, "logps/chosen": -1.7231431007385254, "logps/rejected": -2.7018847465515137, "loss": 0.5986, "nll_loss": 0.5608426332473755, "rewards/accuracies": 0.875, "rewards/chosen": -0.17231431603431702, "rewards/margins": 0.09787414968013763, "rewards/rejected": -0.27018845081329346, "step": 4519 }, { "epoch": 12.375085557837098, "grad_norm": 5.333268642425537, "learning_rate": 3.810958904109589e-07, "log_odds_chosen": 0.5735296010971069, "log_odds_ratio": -0.6243445873260498, "logits/chosen": 0.49551641941070557, "logits/rejected": 0.6139665246009827, "logps/chosen": -1.8810274600982666, "logps/rejected": -2.419660806655884, "loss": 0.7463, "nll_loss": 0.6838343739509583, "rewards/accuracies": 0.625, "rewards/chosen": -0.18810275197029114, "rewards/margins": 0.053863346576690674, "rewards/rejected": -0.24196608364582062, "step": 4520 }, { "epoch": 12.37782340862423, "grad_norm": 5.053901672363281, "learning_rate": 3.8095890410958906e-07, "log_odds_chosen": 1.147674798965454, "log_odds_ratio": -0.47942274808883667, "logits/chosen": 0.9472923278808594, "logits/rejected": 0.9495071768760681, "logps/chosen": -2.397826671600342, "logps/rejected": -3.508928060531616, "loss": 0.6819, "nll_loss": 0.6339612603187561, "rewards/accuracies": 0.625, "rewards/chosen": -0.2397826611995697, "rewards/margins": 0.1111101359128952, "rewards/rejected": -0.3508928120136261, "step": 4521 }, { "epoch": 12.380561259411362, "grad_norm": 5.134453773498535, "learning_rate": 3.8082191780821916e-07, "log_odds_chosen": 1.1757001876831055, "log_odds_ratio": -0.45316046476364136, "logits/chosen": 0.9401764273643494, "logits/rejected": 0.9354780912399292, "logps/chosen": -2.6164069175720215, "logps/rejected": -3.748713493347168, "loss": 0.6963, "nll_loss": 0.6510212421417236, "rewards/accuracies": 0.625, "rewards/chosen": -0.26164066791534424, "rewards/margins": 0.11323065310716629, "rewards/rejected": -0.37487131357192993, "step": 4522 }, { "epoch": 12.383299110198495, "grad_norm": 4.793206214904785, "learning_rate": 3.8068493150684925e-07, "log_odds_chosen": 3.1654860973358154, "log_odds_ratio": -0.16749706864356995, "logits/chosen": 1.145058274269104, "logits/rejected": 1.2153668403625488, "logps/chosen": -2.543342113494873, "logps/rejected": -5.62333869934082, "loss": 0.5793, "nll_loss": 0.5625389218330383, "rewards/accuracies": 0.875, "rewards/chosen": -0.2543342113494873, "rewards/margins": 0.3079996705055237, "rewards/rejected": -0.562333881855011, "step": 4523 }, { "epoch": 12.386036960985626, "grad_norm": 5.216194152832031, "learning_rate": 3.8054794520547946e-07, "log_odds_chosen": 1.5096772909164429, "log_odds_ratio": -0.30653977394104004, "logits/chosen": 0.7102129459381104, "logits/rejected": 0.735461950302124, "logps/chosen": -2.571744203567505, "logps/rejected": -3.988598585128784, "loss": 0.6665, "nll_loss": 0.6358041763305664, "rewards/accuracies": 0.875, "rewards/chosen": -0.25717443227767944, "rewards/margins": 0.14168547093868256, "rewards/rejected": -0.3988599181175232, "step": 4524 }, { "epoch": 12.388774811772759, "grad_norm": 6.858760356903076, "learning_rate": 3.8041095890410956e-07, "log_odds_chosen": 2.178300619125366, "log_odds_ratio": -0.26218685507774353, "logits/chosen": 0.7361510992050171, "logits/rejected": 0.746000349521637, "logps/chosen": -1.7421661615371704, "logps/rejected": -3.760065793991089, "loss": 0.6205, "nll_loss": 0.5942813158035278, "rewards/accuracies": 1.0, "rewards/chosen": -0.1742166131734848, "rewards/margins": 0.2017899453639984, "rewards/rejected": -0.3760065734386444, "step": 4525 }, { "epoch": 12.39151266255989, "grad_norm": 5.587344169616699, "learning_rate": 3.802739726027397e-07, "log_odds_chosen": 3.523508071899414, "log_odds_ratio": -0.17887142300605774, "logits/chosen": 0.8380041122436523, "logits/rejected": 0.8997625112533569, "logps/chosen": -1.7128820419311523, "logps/rejected": -4.984018325805664, "loss": 0.6614, "nll_loss": 0.6435176134109497, "rewards/accuracies": 1.0, "rewards/chosen": -0.17128820717334747, "rewards/margins": 0.32711365818977356, "rewards/rejected": -0.4984018802642822, "step": 4526 }, { "epoch": 12.394250513347023, "grad_norm": 5.607108116149902, "learning_rate": 3.8013698630136986e-07, "log_odds_chosen": 1.1419860124588013, "log_odds_ratio": -0.3904813528060913, "logits/chosen": 0.7186263203620911, "logits/rejected": 0.7222838997840881, "logps/chosen": -2.231299877166748, "logps/rejected": -3.2808868885040283, "loss": 0.8076, "nll_loss": 0.7685999870300293, "rewards/accuracies": 1.0, "rewards/chosen": -0.2231300175189972, "rewards/margins": 0.10495869815349579, "rewards/rejected": -0.3280887305736542, "step": 4527 }, { "epoch": 12.396988364134154, "grad_norm": 5.169353485107422, "learning_rate": 3.7999999999999996e-07, "log_odds_chosen": 2.3013906478881836, "log_odds_ratio": -0.2853715121746063, "logits/chosen": 0.7812955379486084, "logits/rejected": 0.6947141885757446, "logps/chosen": -1.4211952686309814, "logps/rejected": -3.5341694355010986, "loss": 0.6649, "nll_loss": 0.6364051699638367, "rewards/accuracies": 0.875, "rewards/chosen": -0.14211952686309814, "rewards/margins": 0.21129745244979858, "rewards/rejected": -0.35341694951057434, "step": 4528 }, { "epoch": 12.399726214921287, "grad_norm": 4.504246711730957, "learning_rate": 3.798630136986301e-07, "log_odds_chosen": 2.1066269874572754, "log_odds_ratio": -0.16053169965744019, "logits/chosen": 1.041228175163269, "logits/rejected": 1.0595324039459229, "logps/chosen": -2.009718894958496, "logps/rejected": -3.9812893867492676, "loss": 0.5978, "nll_loss": 0.5817031860351562, "rewards/accuracies": 1.0, "rewards/chosen": -0.20097190141677856, "rewards/margins": 0.19715702533721924, "rewards/rejected": -0.3981289267539978, "step": 4529 }, { "epoch": 12.402464065708418, "grad_norm": 4.745695114135742, "learning_rate": 3.797260273972602e-07, "log_odds_chosen": 2.2676992416381836, "log_odds_ratio": -0.20717273652553558, "logits/chosen": 0.805034875869751, "logits/rejected": 0.7813011407852173, "logps/chosen": -2.246169328689575, "logps/rejected": -4.408364295959473, "loss": 0.7414, "nll_loss": 0.7206571102142334, "rewards/accuracies": 1.0, "rewards/chosen": -0.2246169149875641, "rewards/margins": 0.21621952950954437, "rewards/rejected": -0.44083648920059204, "step": 4530 }, { "epoch": 12.405201916495551, "grad_norm": 5.136749267578125, "learning_rate": 3.795890410958904e-07, "log_odds_chosen": 2.1169495582580566, "log_odds_ratio": -0.2312474101781845, "logits/chosen": 0.8372834920883179, "logits/rejected": 0.8561005592346191, "logps/chosen": -1.9698843955993652, "logps/rejected": -3.971295118331909, "loss": 0.5779, "nll_loss": 0.5547906756401062, "rewards/accuracies": 1.0, "rewards/chosen": -0.19698844850063324, "rewards/margins": 0.2001410722732544, "rewards/rejected": -0.39712953567504883, "step": 4531 }, { "epoch": 12.407939767282683, "grad_norm": 6.148413181304932, "learning_rate": 3.794520547945205e-07, "log_odds_chosen": 1.7045538425445557, "log_odds_ratio": -0.28639712929725647, "logits/chosen": 0.7620174288749695, "logits/rejected": 0.6738825440406799, "logps/chosen": -1.7285921573638916, "logps/rejected": -3.2680394649505615, "loss": 0.7773, "nll_loss": 0.748673677444458, "rewards/accuracies": 1.0, "rewards/chosen": -0.17285922169685364, "rewards/margins": 0.153944730758667, "rewards/rejected": -0.326803982257843, "step": 4532 }, { "epoch": 12.410677618069816, "grad_norm": 4.880377769470215, "learning_rate": 3.7931506849315067e-07, "log_odds_chosen": 2.4675543308258057, "log_odds_ratio": -0.3900231122970581, "logits/chosen": 1.048535943031311, "logits/rejected": 1.0438194274902344, "logps/chosen": -2.1949973106384277, "logps/rejected": -4.608026027679443, "loss": 0.7071, "nll_loss": 0.6681002974510193, "rewards/accuracies": 0.875, "rewards/chosen": -0.21949973702430725, "rewards/margins": 0.24130286276340485, "rewards/rejected": -0.4608025848865509, "step": 4533 }, { "epoch": 12.413415468856947, "grad_norm": 6.04748010635376, "learning_rate": 3.791780821917808e-07, "log_odds_chosen": 0.09675147384405136, "log_odds_ratio": -0.7658134698867798, "logits/chosen": 0.9917086362838745, "logits/rejected": 1.0091798305511475, "logps/chosen": -2.524629831314087, "logps/rejected": -2.5772454738616943, "loss": 0.7262, "nll_loss": 0.6496562361717224, "rewards/accuracies": 0.75, "rewards/chosen": -0.2524629533290863, "rewards/margins": 0.005261596292257309, "rewards/rejected": -0.2577245831489563, "step": 4534 }, { "epoch": 12.41615331964408, "grad_norm": 5.241530895233154, "learning_rate": 3.790410958904109e-07, "log_odds_chosen": 2.294914722442627, "log_odds_ratio": -0.2216779589653015, "logits/chosen": 0.9872251749038696, "logits/rejected": 1.0607311725616455, "logps/chosen": -2.43411922454834, "logps/rejected": -4.603581428527832, "loss": 0.6197, "nll_loss": 0.5975269079208374, "rewards/accuracies": 1.0, "rewards/chosen": -0.24341192841529846, "rewards/margins": 0.21694622933864594, "rewards/rejected": -0.4603581130504608, "step": 4535 }, { "epoch": 12.41889117043121, "grad_norm": 5.529972553253174, "learning_rate": 3.7890410958904107e-07, "log_odds_chosen": 2.242929458618164, "log_odds_ratio": -0.2936433255672455, "logits/chosen": 0.8645338416099548, "logits/rejected": 0.8899509906768799, "logps/chosen": -1.9635682106018066, "logps/rejected": -4.0719380378723145, "loss": 0.5868, "nll_loss": 0.5573905110359192, "rewards/accuracies": 0.875, "rewards/chosen": -0.19635683298110962, "rewards/margins": 0.2108369916677475, "rewards/rejected": -0.4071938395500183, "step": 4536 }, { "epoch": 12.421629021218344, "grad_norm": 5.31204891204834, "learning_rate": 3.787671232876712e-07, "log_odds_chosen": 2.076960563659668, "log_odds_ratio": -0.32033392786979675, "logits/chosen": 0.873738169670105, "logits/rejected": 0.8951537013053894, "logps/chosen": -2.0240273475646973, "logps/rejected": -4.0127458572387695, "loss": 0.7816, "nll_loss": 0.7495664358139038, "rewards/accuracies": 0.625, "rewards/chosen": -0.2024027407169342, "rewards/margins": 0.19887185096740723, "rewards/rejected": -0.4012746214866638, "step": 4537 }, { "epoch": 12.424366872005475, "grad_norm": 5.67024040222168, "learning_rate": 3.786301369863014e-07, "log_odds_chosen": 2.801400899887085, "log_odds_ratio": -0.2694273293018341, "logits/chosen": 0.6440352201461792, "logits/rejected": 0.6855192184448242, "logps/chosen": -2.1932451725006104, "logps/rejected": -4.841344833374023, "loss": 0.7562, "nll_loss": 0.7292305827140808, "rewards/accuracies": 0.875, "rewards/chosen": -0.2193244993686676, "rewards/margins": 0.2648100256919861, "rewards/rejected": -0.4841344952583313, "step": 4538 }, { "epoch": 12.427104722792608, "grad_norm": 4.518130302429199, "learning_rate": 3.784931506849315e-07, "log_odds_chosen": 1.661759614944458, "log_odds_ratio": -0.30252397060394287, "logits/chosen": 0.5422155261039734, "logits/rejected": 0.5573809146881104, "logps/chosen": -2.320348024368286, "logps/rejected": -3.9231297969818115, "loss": 0.664, "nll_loss": 0.6337451338768005, "rewards/accuracies": 1.0, "rewards/chosen": -0.2320348024368286, "rewards/margins": 0.16027815639972687, "rewards/rejected": -0.3923129737377167, "step": 4539 }, { "epoch": 12.429842573579739, "grad_norm": 5.666649341583252, "learning_rate": 3.7835616438356163e-07, "log_odds_chosen": 1.7142083644866943, "log_odds_ratio": -0.29971614480018616, "logits/chosen": 0.8469226360321045, "logits/rejected": 0.8737984299659729, "logps/chosen": -2.171191692352295, "logps/rejected": -3.8089871406555176, "loss": 0.6647, "nll_loss": 0.6347367167472839, "rewards/accuracies": 1.0, "rewards/chosen": -0.21711915731430054, "rewards/margins": 0.16377957165241241, "rewards/rejected": -0.38089871406555176, "step": 4540 }, { "epoch": 12.432580424366872, "grad_norm": 6.98116397857666, "learning_rate": 3.782191780821918e-07, "log_odds_chosen": 1.9873673915863037, "log_odds_ratio": -0.5032397508621216, "logits/chosen": 0.7479391694068909, "logits/rejected": 0.6682770848274231, "logps/chosen": -3.043161392211914, "logps/rejected": -4.89182186126709, "loss": 0.7301, "nll_loss": 0.6797959804534912, "rewards/accuracies": 0.75, "rewards/chosen": -0.30431613326072693, "rewards/margins": 0.1848660707473755, "rewards/rejected": -0.48918217420578003, "step": 4541 }, { "epoch": 12.435318275154003, "grad_norm": 5.696784019470215, "learning_rate": 3.780821917808219e-07, "log_odds_chosen": 2.4072389602661133, "log_odds_ratio": -0.22656062245368958, "logits/chosen": 0.694837212562561, "logits/rejected": 0.683739423751831, "logps/chosen": -2.2234859466552734, "logps/rejected": -4.4722208976745605, "loss": 0.6659, "nll_loss": 0.6431964635848999, "rewards/accuracies": 1.0, "rewards/chosen": -0.22234860062599182, "rewards/margins": 0.22487352788448334, "rewards/rejected": -0.44722211360931396, "step": 4542 }, { "epoch": 12.438056125941136, "grad_norm": 4.560232162475586, "learning_rate": 3.7794520547945203e-07, "log_odds_chosen": 2.4232451915740967, "log_odds_ratio": -0.1550852209329605, "logits/chosen": 0.7190563082695007, "logits/rejected": 0.7223986387252808, "logps/chosen": -1.6292507648468018, "logps/rejected": -3.78462553024292, "loss": 0.6446, "nll_loss": 0.6290580034255981, "rewards/accuracies": 1.0, "rewards/chosen": -0.16292506456375122, "rewards/margins": 0.21553745865821838, "rewards/rejected": -0.378462553024292, "step": 4543 }, { "epoch": 12.44079397672827, "grad_norm": 4.930911540985107, "learning_rate": 3.778082191780822e-07, "log_odds_chosen": 2.0172507762908936, "log_odds_ratio": -0.2527466118335724, "logits/chosen": 0.5456258654594421, "logits/rejected": 0.6034865975379944, "logps/chosen": -1.598003625869751, "logps/rejected": -3.4190587997436523, "loss": 0.628, "nll_loss": 0.6027136445045471, "rewards/accuracies": 1.0, "rewards/chosen": -0.15980038046836853, "rewards/margins": 0.18210551142692566, "rewards/rejected": -0.3419058620929718, "step": 4544 }, { "epoch": 12.4435318275154, "grad_norm": 8.065837860107422, "learning_rate": 3.7767123287671234e-07, "log_odds_chosen": 0.8533312082290649, "log_odds_ratio": -0.5677425861358643, "logits/chosen": 0.8876254558563232, "logits/rejected": 0.8742175698280334, "logps/chosen": -3.1790473461151123, "logps/rejected": -3.9396920204162598, "loss": 0.7755, "nll_loss": 0.7186970710754395, "rewards/accuracies": 0.75, "rewards/chosen": -0.3179047405719757, "rewards/margins": 0.07606449723243713, "rewards/rejected": -0.39396923780441284, "step": 4545 }, { "epoch": 12.446269678302533, "grad_norm": 4.64310359954834, "learning_rate": 3.7753424657534243e-07, "log_odds_chosen": 2.264505386352539, "log_odds_ratio": -0.14214035868644714, "logits/chosen": 0.6152247190475464, "logits/rejected": 0.6023063659667969, "logps/chosen": -1.7093044519424438, "logps/rejected": -3.7667317390441895, "loss": 0.629, "nll_loss": 0.6147775650024414, "rewards/accuracies": 1.0, "rewards/chosen": -0.17093044519424438, "rewards/margins": 0.2057427167892456, "rewards/rejected": -0.3766731917858124, "step": 4546 }, { "epoch": 12.449007529089664, "grad_norm": 7.351363182067871, "learning_rate": 3.773972602739726e-07, "log_odds_chosen": 1.387303352355957, "log_odds_ratio": -0.41064774990081787, "logits/chosen": 0.8925803899765015, "logits/rejected": 0.8612511157989502, "logps/chosen": -2.344573974609375, "logps/rejected": -3.698674440383911, "loss": 0.6962, "nll_loss": 0.6551812291145325, "rewards/accuracies": 0.75, "rewards/chosen": -0.23445740342140198, "rewards/margins": 0.13541007041931152, "rewards/rejected": -0.3698675036430359, "step": 4547 }, { "epoch": 12.451745379876797, "grad_norm": 4.736738204956055, "learning_rate": 3.7726027397260274e-07, "log_odds_chosen": 2.0530736446380615, "log_odds_ratio": -0.24526268243789673, "logits/chosen": 0.8214752674102783, "logits/rejected": 0.8855470418930054, "logps/chosen": -2.1857352256774902, "logps/rejected": -4.097979545593262, "loss": 0.6819, "nll_loss": 0.6574140787124634, "rewards/accuracies": 1.0, "rewards/chosen": -0.21857352554798126, "rewards/margins": 0.19122442603111267, "rewards/rejected": -0.40979793667793274, "step": 4548 }, { "epoch": 12.454483230663929, "grad_norm": 5.92969274520874, "learning_rate": 3.7712328767123284e-07, "log_odds_chosen": 1.8578299283981323, "log_odds_ratio": -0.35452109575271606, "logits/chosen": 0.7987595796585083, "logits/rejected": 0.8606406450271606, "logps/chosen": -2.6314079761505127, "logps/rejected": -4.416048049926758, "loss": 0.8659, "nll_loss": 0.830464243888855, "rewards/accuracies": 0.75, "rewards/chosen": -0.26314079761505127, "rewards/margins": 0.17846399545669556, "rewards/rejected": -0.4416047930717468, "step": 4549 }, { "epoch": 12.457221081451062, "grad_norm": 5.833315372467041, "learning_rate": 3.76986301369863e-07, "log_odds_chosen": 2.1427927017211914, "log_odds_ratio": -0.2312183529138565, "logits/chosen": 0.8875021934509277, "logits/rejected": 0.9294770956039429, "logps/chosen": -2.4968690872192383, "logps/rejected": -4.522403717041016, "loss": 0.6033, "nll_loss": 0.5802140235900879, "rewards/accuracies": 1.0, "rewards/chosen": -0.24968692660331726, "rewards/margins": 0.20255345106124878, "rewards/rejected": -0.45224037766456604, "step": 4550 }, { "epoch": 12.459958932238193, "grad_norm": 5.93606424331665, "learning_rate": 3.7684931506849314e-07, "log_odds_chosen": 2.270007610321045, "log_odds_ratio": -0.24710144102573395, "logits/chosen": 1.0257806777954102, "logits/rejected": 1.1010117530822754, "logps/chosen": -2.319164276123047, "logps/rejected": -4.42207145690918, "loss": 0.6177, "nll_loss": 0.59303879737854, "rewards/accuracies": 0.875, "rewards/chosen": -0.23191644251346588, "rewards/margins": 0.21029071509838104, "rewards/rejected": -0.4422071576118469, "step": 4551 }, { "epoch": 12.462696783025326, "grad_norm": 5.057880878448486, "learning_rate": 3.767123287671233e-07, "log_odds_chosen": 1.85930597782135, "log_odds_ratio": -0.2247697114944458, "logits/chosen": 0.7410378456115723, "logits/rejected": 0.7278592586517334, "logps/chosen": -1.652938723564148, "logps/rejected": -3.280991554260254, "loss": 0.6836, "nll_loss": 0.66114342212677, "rewards/accuracies": 1.0, "rewards/chosen": -0.1652938723564148, "rewards/margins": 0.16280525922775269, "rewards/rejected": -0.32809916138648987, "step": 4552 }, { "epoch": 12.465434633812457, "grad_norm": 6.013382911682129, "learning_rate": 3.765753424657534e-07, "log_odds_chosen": 2.5735721588134766, "log_odds_ratio": -0.1423213928937912, "logits/chosen": 0.9292276501655579, "logits/rejected": 0.9516267776489258, "logps/chosen": -2.8068583011627197, "logps/rejected": -5.292322635650635, "loss": 0.657, "nll_loss": 0.6427321434020996, "rewards/accuracies": 1.0, "rewards/chosen": -0.28068581223487854, "rewards/margins": 0.24854645133018494, "rewards/rejected": -0.5292322635650635, "step": 4553 }, { "epoch": 12.46817248459959, "grad_norm": 5.13489294052124, "learning_rate": 3.7643835616438355e-07, "log_odds_chosen": 3.4104833602905273, "log_odds_ratio": -0.20314675569534302, "logits/chosen": 0.8841487169265747, "logits/rejected": 0.839044451713562, "logps/chosen": -1.926649570465088, "logps/rejected": -5.189625263214111, "loss": 0.7661, "nll_loss": 0.7457495927810669, "rewards/accuracies": 1.0, "rewards/chosen": -0.1926649510860443, "rewards/margins": 0.3262976109981537, "rewards/rejected": -0.518962562084198, "step": 4554 }, { "epoch": 12.470910335386721, "grad_norm": 8.614387512207031, "learning_rate": 3.763013698630137e-07, "log_odds_chosen": 1.7727349996566772, "log_odds_ratio": -0.5506255626678467, "logits/chosen": 0.7729557752609253, "logits/rejected": 0.7759206891059875, "logps/chosen": -2.4990320205688477, "logps/rejected": -4.118679046630859, "loss": 0.7723, "nll_loss": 0.7172162532806396, "rewards/accuracies": 0.875, "rewards/chosen": -0.24990320205688477, "rewards/margins": 0.16196472942829132, "rewards/rejected": -0.4118679165840149, "step": 4555 }, { "epoch": 12.473648186173854, "grad_norm": 8.40307331085205, "learning_rate": 3.761643835616438e-07, "log_odds_chosen": 4.407880783081055, "log_odds_ratio": -0.27147290110588074, "logits/chosen": 0.9223368167877197, "logits/rejected": 0.9402508735656738, "logps/chosen": -2.690890312194824, "logps/rejected": -6.9896931648254395, "loss": 0.6996, "nll_loss": 0.6724766492843628, "rewards/accuracies": 0.875, "rewards/chosen": -0.2690890431404114, "rewards/margins": 0.4298802614212036, "rewards/rejected": -0.698969304561615, "step": 4556 }, { "epoch": 12.476386036960985, "grad_norm": 4.514106273651123, "learning_rate": 3.7602739726027395e-07, "log_odds_chosen": 2.9162232875823975, "log_odds_ratio": -0.15882569551467896, "logits/chosen": 0.9736480116844177, "logits/rejected": 1.0257608890533447, "logps/chosen": -2.8376729488372803, "logps/rejected": -5.674861431121826, "loss": 0.6992, "nll_loss": 0.6833582520484924, "rewards/accuracies": 1.0, "rewards/chosen": -0.28376731276512146, "rewards/margins": 0.2837188243865967, "rewards/rejected": -0.5674861669540405, "step": 4557 }, { "epoch": 12.479123887748118, "grad_norm": 6.956470489501953, "learning_rate": 3.758904109589041e-07, "log_odds_chosen": 2.2936723232269287, "log_odds_ratio": -0.38232293725013733, "logits/chosen": 1.2228896617889404, "logits/rejected": 1.2454609870910645, "logps/chosen": -3.311680316925049, "logps/rejected": -5.556769847869873, "loss": 0.7154, "nll_loss": 0.6771816611289978, "rewards/accuracies": 0.875, "rewards/chosen": -0.3311680257320404, "rewards/margins": 0.22450897097587585, "rewards/rejected": -0.5556769967079163, "step": 4558 }, { "epoch": 12.48186173853525, "grad_norm": 6.1173319816589355, "learning_rate": 3.7575342465753425e-07, "log_odds_chosen": 2.0538504123687744, "log_odds_ratio": -0.3674927353858948, "logits/chosen": 0.8873727321624756, "logits/rejected": 0.9131895899772644, "logps/chosen": -2.555410861968994, "logps/rejected": -4.476479530334473, "loss": 0.7288, "nll_loss": 0.6920930743217468, "rewards/accuracies": 0.75, "rewards/chosen": -0.2555410861968994, "rewards/margins": 0.19210685789585114, "rewards/rejected": -0.44764792919158936, "step": 4559 }, { "epoch": 12.484599589322382, "grad_norm": 5.474445819854736, "learning_rate": 3.7561643835616435e-07, "log_odds_chosen": 1.419036626815796, "log_odds_ratio": -0.3472331166267395, "logits/chosen": 0.6909621953964233, "logits/rejected": 0.6348189115524292, "logps/chosen": -2.0431416034698486, "logps/rejected": -3.3632094860076904, "loss": 0.7121, "nll_loss": 0.6773809194564819, "rewards/accuracies": 0.75, "rewards/chosen": -0.20431415736675262, "rewards/margins": 0.13200680911540985, "rewards/rejected": -0.3363209664821625, "step": 4560 }, { "epoch": 12.487337440109513, "grad_norm": 5.247344970703125, "learning_rate": 3.7547945205479445e-07, "log_odds_chosen": 1.5118988752365112, "log_odds_ratio": -0.30482739210128784, "logits/chosen": 1.0550988912582397, "logits/rejected": 1.0231401920318604, "logps/chosen": -1.976335048675537, "logps/rejected": -3.366490364074707, "loss": 0.6871, "nll_loss": 0.6566469073295593, "rewards/accuracies": 1.0, "rewards/chosen": -0.1976335048675537, "rewards/margins": 0.1390155702829361, "rewards/rejected": -0.336649090051651, "step": 4561 }, { "epoch": 12.490075290896646, "grad_norm": 5.872710227966309, "learning_rate": 3.7534246575342466e-07, "log_odds_chosen": 1.2697696685791016, "log_odds_ratio": -0.35519665479660034, "logits/chosen": 0.7695423364639282, "logits/rejected": 0.8271899819374084, "logps/chosen": -1.931096076965332, "logps/rejected": -3.0691256523132324, "loss": 0.6477, "nll_loss": 0.612221896648407, "rewards/accuracies": 1.0, "rewards/chosen": -0.19310961663722992, "rewards/margins": 0.1138029471039772, "rewards/rejected": -0.3069125711917877, "step": 4562 }, { "epoch": 12.492813141683778, "grad_norm": 5.678431510925293, "learning_rate": 3.7520547945205475e-07, "log_odds_chosen": 1.1207771301269531, "log_odds_ratio": -0.3983922600746155, "logits/chosen": 0.8475914001464844, "logits/rejected": 0.848273515701294, "logps/chosen": -2.9430408477783203, "logps/rejected": -3.973757743835449, "loss": 0.7224, "nll_loss": 0.6825645565986633, "rewards/accuracies": 0.75, "rewards/chosen": -0.29430410265922546, "rewards/margins": 0.10307169705629349, "rewards/rejected": -0.39737582206726074, "step": 4563 }, { "epoch": 12.49555099247091, "grad_norm": 6.392941951751709, "learning_rate": 3.750684931506849e-07, "log_odds_chosen": 2.1188883781433105, "log_odds_ratio": -0.2457723617553711, "logits/chosen": 0.8652799129486084, "logits/rejected": 0.8391923904418945, "logps/chosen": -2.0340213775634766, "logps/rejected": -3.99503755569458, "loss": 0.6803, "nll_loss": 0.6557382345199585, "rewards/accuracies": 0.875, "rewards/chosen": -0.20340213179588318, "rewards/margins": 0.1961016207933426, "rewards/rejected": -0.3995037376880646, "step": 4564 }, { "epoch": 12.498288843258042, "grad_norm": 5.482553005218506, "learning_rate": 3.7493150684931506e-07, "log_odds_chosen": 2.248732566833496, "log_odds_ratio": -0.29132774472236633, "logits/chosen": 0.838973343372345, "logits/rejected": 0.8383139967918396, "logps/chosen": -2.4518909454345703, "logps/rejected": -4.6353912353515625, "loss": 0.7366, "nll_loss": 0.7075061798095703, "rewards/accuracies": 0.875, "rewards/chosen": -0.2451891154050827, "rewards/margins": 0.21834999322891235, "rewards/rejected": -0.46353912353515625, "step": 4565 }, { "epoch": 12.501026694045175, "grad_norm": 5.780806064605713, "learning_rate": 3.747945205479452e-07, "log_odds_chosen": 1.3612887859344482, "log_odds_ratio": -0.2997399866580963, "logits/chosen": 0.88157057762146, "logits/rejected": 0.7910133004188538, "logps/chosen": -1.8585070371627808, "logps/rejected": -3.1039440631866455, "loss": 0.7015, "nll_loss": 0.6715534925460815, "rewards/accuracies": 1.0, "rewards/chosen": -0.18585069477558136, "rewards/margins": 0.1245437040925026, "rewards/rejected": -0.31039440631866455, "step": 4566 }, { "epoch": 12.503764544832306, "grad_norm": 5.994814872741699, "learning_rate": 3.746575342465753e-07, "log_odds_chosen": 2.311652660369873, "log_odds_ratio": -0.21508626639842987, "logits/chosen": 0.9836847186088562, "logits/rejected": 1.0508358478546143, "logps/chosen": -2.6838200092315674, "logps/rejected": -4.918752670288086, "loss": 0.674, "nll_loss": 0.6525092720985413, "rewards/accuracies": 0.875, "rewards/chosen": -0.2683820128440857, "rewards/margins": 0.22349324822425842, "rewards/rejected": -0.4918752908706665, "step": 4567 }, { "epoch": 12.506502395619439, "grad_norm": 4.321448802947998, "learning_rate": 3.745205479452055e-07, "log_odds_chosen": 3.9046216011047363, "log_odds_ratio": -0.13107328116893768, "logits/chosen": 0.9872891902923584, "logits/rejected": 1.0322209596633911, "logps/chosen": -2.1303703784942627, "logps/rejected": -5.862771034240723, "loss": 0.657, "nll_loss": 0.6438827514648438, "rewards/accuracies": 1.0, "rewards/chosen": -0.21303704380989075, "rewards/margins": 0.37324002385139465, "rewards/rejected": -0.5862770676612854, "step": 4568 }, { "epoch": 12.50924024640657, "grad_norm": 5.837528705596924, "learning_rate": 3.743835616438356e-07, "log_odds_chosen": 2.730565309524536, "log_odds_ratio": -0.10783283412456512, "logits/chosen": 0.9382263422012329, "logits/rejected": 1.016235589981079, "logps/chosen": -2.3399417400360107, "logps/rejected": -4.9456281661987305, "loss": 0.7181, "nll_loss": 0.7073007822036743, "rewards/accuracies": 1.0, "rewards/chosen": -0.23399418592453003, "rewards/margins": 0.2605687081813812, "rewards/rejected": -0.49456286430358887, "step": 4569 }, { "epoch": 12.511978097193703, "grad_norm": 6.242986679077148, "learning_rate": 3.742465753424657e-07, "log_odds_chosen": 2.972287178039551, "log_odds_ratio": -0.3132331669330597, "logits/chosen": 0.7496942281723022, "logits/rejected": 0.8282346129417419, "logps/chosen": -2.006998062133789, "logps/rejected": -4.815919399261475, "loss": 0.7089, "nll_loss": 0.677569568157196, "rewards/accuracies": 1.0, "rewards/chosen": -0.2006998062133789, "rewards/margins": 0.28089216351509094, "rewards/rejected": -0.48159196972846985, "step": 4570 }, { "epoch": 12.514715947980836, "grad_norm": 5.264981746673584, "learning_rate": 3.7410958904109587e-07, "log_odds_chosen": 1.6916078329086304, "log_odds_ratio": -0.28200626373291016, "logits/chosen": 0.7759160399436951, "logits/rejected": 0.7754185199737549, "logps/chosen": -2.4512343406677246, "logps/rejected": -3.9909005165100098, "loss": 0.6646, "nll_loss": 0.6364307403564453, "rewards/accuracies": 0.875, "rewards/chosen": -0.24512341618537903, "rewards/margins": 0.15396663546562195, "rewards/rejected": -0.399090051651001, "step": 4571 }, { "epoch": 12.517453798767967, "grad_norm": 5.183577537536621, "learning_rate": 3.73972602739726e-07, "log_odds_chosen": 3.4898672103881836, "log_odds_ratio": -0.29100483655929565, "logits/chosen": 0.9099746942520142, "logits/rejected": 0.9491233825683594, "logps/chosen": -2.6844534873962402, "logps/rejected": -6.121917724609375, "loss": 0.7106, "nll_loss": 0.6815062165260315, "rewards/accuracies": 0.875, "rewards/chosen": -0.26844531297683716, "rewards/margins": 0.34374651312828064, "rewards/rejected": -0.6121918559074402, "step": 4572 }, { "epoch": 12.5201916495551, "grad_norm": 4.924615859985352, "learning_rate": 3.7383561643835617e-07, "log_odds_chosen": 3.1953911781311035, "log_odds_ratio": -0.09996744245290756, "logits/chosen": 1.0073113441467285, "logits/rejected": 1.0809261798858643, "logps/chosen": -1.8962464332580566, "logps/rejected": -4.909880638122559, "loss": 0.6137, "nll_loss": 0.6036573648452759, "rewards/accuracies": 1.0, "rewards/chosen": -0.1896246373653412, "rewards/margins": 0.30136340856552124, "rewards/rejected": -0.4909880459308624, "step": 4573 }, { "epoch": 12.522929500342231, "grad_norm": 5.657700538635254, "learning_rate": 3.7369863013698627e-07, "log_odds_chosen": 1.694056510925293, "log_odds_ratio": -0.3848607838153839, "logits/chosen": 0.9551230072975159, "logits/rejected": 0.9909828305244446, "logps/chosen": -2.7124128341674805, "logps/rejected": -4.314620018005371, "loss": 0.6974, "nll_loss": 0.6588817834854126, "rewards/accuracies": 0.875, "rewards/chosen": -0.27124130725860596, "rewards/margins": 0.1602206975221634, "rewards/rejected": -0.43146198987960815, "step": 4574 }, { "epoch": 12.525667351129364, "grad_norm": 4.7877888679504395, "learning_rate": 3.735616438356164e-07, "log_odds_chosen": 1.820665955543518, "log_odds_ratio": -0.21020358800888062, "logits/chosen": 0.6715446710586548, "logits/rejected": 0.6245763897895813, "logps/chosen": -1.9827601909637451, "logps/rejected": -3.624412775039673, "loss": 0.5964, "nll_loss": 0.5754009485244751, "rewards/accuracies": 1.0, "rewards/chosen": -0.19827601313591003, "rewards/margins": 0.16416525840759277, "rewards/rejected": -0.3624413013458252, "step": 4575 }, { "epoch": 12.528405201916495, "grad_norm": 6.999873161315918, "learning_rate": 3.7342465753424657e-07, "log_odds_chosen": 1.9356406927108765, "log_odds_ratio": -0.5259972810745239, "logits/chosen": 0.9484107494354248, "logits/rejected": 0.9299502372741699, "logps/chosen": -2.6336283683776855, "logps/rejected": -4.498147010803223, "loss": 0.6909, "nll_loss": 0.6382910013198853, "rewards/accuracies": 0.75, "rewards/chosen": -0.2633628249168396, "rewards/margins": 0.18645186722278595, "rewards/rejected": -0.44981470704078674, "step": 4576 }, { "epoch": 12.531143052703628, "grad_norm": 5.401217937469482, "learning_rate": 3.7328767123287667e-07, "log_odds_chosen": 1.985987663269043, "log_odds_ratio": -0.18026888370513916, "logits/chosen": 0.7448886632919312, "logits/rejected": 0.6870706677436829, "logps/chosen": -1.7652854919433594, "logps/rejected": -3.5898025035858154, "loss": 0.6412, "nll_loss": 0.6231353282928467, "rewards/accuracies": 1.0, "rewards/chosen": -0.17652854323387146, "rewards/margins": 0.18245171010494232, "rewards/rejected": -0.358980268239975, "step": 4577 }, { "epoch": 12.53388090349076, "grad_norm": 4.418911933898926, "learning_rate": 3.731506849315068e-07, "log_odds_chosen": 1.6979591846466064, "log_odds_ratio": -0.20398752391338348, "logits/chosen": 0.8485676050186157, "logits/rejected": 0.8910072445869446, "logps/chosen": -2.0428521633148193, "logps/rejected": -3.6097464561462402, "loss": 0.6584, "nll_loss": 0.6380484700202942, "rewards/accuracies": 1.0, "rewards/chosen": -0.20428521931171417, "rewards/margins": 0.15668943524360657, "rewards/rejected": -0.36097466945648193, "step": 4578 }, { "epoch": 12.536618754277892, "grad_norm": 6.681446075439453, "learning_rate": 3.73013698630137e-07, "log_odds_chosen": 1.4970489740371704, "log_odds_ratio": -0.33852246403694153, "logits/chosen": 0.9267407059669495, "logits/rejected": 0.9453816413879395, "logps/chosen": -2.711171865463257, "logps/rejected": -4.157262802124023, "loss": 0.6633, "nll_loss": 0.629432201385498, "rewards/accuracies": 0.875, "rewards/chosen": -0.2711172103881836, "rewards/margins": 0.14460906386375427, "rewards/rejected": -0.4157262444496155, "step": 4579 }, { "epoch": 12.539356605065024, "grad_norm": 4.89616584777832, "learning_rate": 3.7287671232876713e-07, "log_odds_chosen": 2.3119139671325684, "log_odds_ratio": -0.24458391964435577, "logits/chosen": 0.9633945226669312, "logits/rejected": 0.9410799741744995, "logps/chosen": -2.498725652694702, "logps/rejected": -4.72599983215332, "loss": 0.7737, "nll_loss": 0.7492156624794006, "rewards/accuracies": 0.875, "rewards/chosen": -0.24987256526947021, "rewards/margins": 0.22272738814353943, "rewards/rejected": -0.47259998321533203, "step": 4580 }, { "epoch": 12.542094455852157, "grad_norm": 4.59708309173584, "learning_rate": 3.7273972602739723e-07, "log_odds_chosen": 3.3770651817321777, "log_odds_ratio": -0.15258686244487762, "logits/chosen": 0.6898090839385986, "logits/rejected": 0.69315105676651, "logps/chosen": -2.161501884460449, "logps/rejected": -5.357190132141113, "loss": 0.7846, "nll_loss": 0.7693576812744141, "rewards/accuracies": 1.0, "rewards/chosen": -0.2161502093076706, "rewards/margins": 0.3195687532424927, "rewards/rejected": -0.5357189774513245, "step": 4581 }, { "epoch": 12.544832306639288, "grad_norm": 5.479652404785156, "learning_rate": 3.726027397260274e-07, "log_odds_chosen": 2.246029853820801, "log_odds_ratio": -0.2223908007144928, "logits/chosen": 0.9211438894271851, "logits/rejected": 0.9978852272033691, "logps/chosen": -2.456000566482544, "logps/rejected": -4.574319839477539, "loss": 0.6536, "nll_loss": 0.6313194036483765, "rewards/accuracies": 0.875, "rewards/chosen": -0.24560005962848663, "rewards/margins": 0.2118319272994995, "rewards/rejected": -0.45743200182914734, "step": 4582 }, { "epoch": 12.54757015742642, "grad_norm": 5.233863353729248, "learning_rate": 3.7246575342465753e-07, "log_odds_chosen": 2.0716311931610107, "log_odds_ratio": -0.20601613819599152, "logits/chosen": 0.7600136399269104, "logits/rejected": 0.7842503190040588, "logps/chosen": -2.3118650913238525, "logps/rejected": -4.2668561935424805, "loss": 0.6194, "nll_loss": 0.5987571477890015, "rewards/accuracies": 1.0, "rewards/chosen": -0.23118647933006287, "rewards/margins": 0.19549915194511414, "rewards/rejected": -0.426685631275177, "step": 4583 }, { "epoch": 12.550308008213552, "grad_norm": 5.8319411277771, "learning_rate": 3.7232876712328763e-07, "log_odds_chosen": 2.266357898712158, "log_odds_ratio": -0.23439881205558777, "logits/chosen": 0.6779203414916992, "logits/rejected": 0.8014475107192993, "logps/chosen": -2.3529796600341797, "logps/rejected": -4.494227409362793, "loss": 0.6826, "nll_loss": 0.6591598987579346, "rewards/accuracies": 1.0, "rewards/chosen": -0.23529797792434692, "rewards/margins": 0.21412476897239685, "rewards/rejected": -0.4494227468967438, "step": 4584 }, { "epoch": 12.553045859000685, "grad_norm": 6.739555835723877, "learning_rate": 3.721917808219178e-07, "log_odds_chosen": 1.1039249897003174, "log_odds_ratio": -0.32962578535079956, "logits/chosen": 0.7184659242630005, "logits/rejected": 0.6641196012496948, "logps/chosen": -2.74680233001709, "logps/rejected": -3.7819559574127197, "loss": 0.7377, "nll_loss": 0.704749345779419, "rewards/accuracies": 0.875, "rewards/chosen": -0.2746802270412445, "rewards/margins": 0.10351536422967911, "rewards/rejected": -0.3781956136226654, "step": 4585 }, { "epoch": 12.555783709787816, "grad_norm": 5.008030891418457, "learning_rate": 3.7205479452054794e-07, "log_odds_chosen": 2.8157966136932373, "log_odds_ratio": -0.18522286415100098, "logits/chosen": 0.647399365901947, "logits/rejected": 0.7104696035385132, "logps/chosen": -1.6100857257843018, "logps/rejected": -4.205630302429199, "loss": 0.578, "nll_loss": 0.5594797134399414, "rewards/accuracies": 1.0, "rewards/chosen": -0.1610085666179657, "rewards/margins": 0.2595544755458832, "rewards/rejected": -0.4205630421638489, "step": 4586 }, { "epoch": 12.558521560574949, "grad_norm": 5.187954902648926, "learning_rate": 3.719178082191781e-07, "log_odds_chosen": 1.929879903793335, "log_odds_ratio": -0.2284768968820572, "logits/chosen": 0.8443132638931274, "logits/rejected": 0.9010331630706787, "logps/chosen": -2.1918787956237793, "logps/rejected": -3.989650011062622, "loss": 0.6576, "nll_loss": 0.6347163319587708, "rewards/accuracies": 1.0, "rewards/chosen": -0.2191878855228424, "rewards/margins": 0.1797771453857422, "rewards/rejected": -0.3989650011062622, "step": 4587 }, { "epoch": 12.56125941136208, "grad_norm": 4.911180019378662, "learning_rate": 3.717808219178082e-07, "log_odds_chosen": 2.084113121032715, "log_odds_ratio": -0.27140089869499207, "logits/chosen": 0.9197292327880859, "logits/rejected": 0.921773374080658, "logps/chosen": -2.372257947921753, "logps/rejected": -4.356667995452881, "loss": 0.7128, "nll_loss": 0.6856893301010132, "rewards/accuracies": 1.0, "rewards/chosen": -0.23722580075263977, "rewards/margins": 0.19844099879264832, "rewards/rejected": -0.4356667995452881, "step": 4588 }, { "epoch": 12.563997262149213, "grad_norm": 4.368553638458252, "learning_rate": 3.7164383561643834e-07, "log_odds_chosen": 1.9776180982589722, "log_odds_ratio": -0.26783519983291626, "logits/chosen": 0.995303213596344, "logits/rejected": 1.0069313049316406, "logps/chosen": -2.044769048690796, "logps/rejected": -3.935967206954956, "loss": 0.6718, "nll_loss": 0.645051896572113, "rewards/accuracies": 1.0, "rewards/chosen": -0.20447690784931183, "rewards/margins": 0.1891198307275772, "rewards/rejected": -0.39359673857688904, "step": 4589 }, { "epoch": 12.566735112936344, "grad_norm": 7.407226085662842, "learning_rate": 3.715068493150685e-07, "log_odds_chosen": 0.43860098719596863, "log_odds_ratio": -0.5669711828231812, "logits/chosen": 1.0202088356018066, "logits/rejected": 1.01143217086792, "logps/chosen": -1.8742014169692993, "logps/rejected": -2.225770950317383, "loss": 0.6064, "nll_loss": 0.5496673583984375, "rewards/accuracies": 0.625, "rewards/chosen": -0.18742015957832336, "rewards/margins": 0.03515695035457611, "rewards/rejected": -0.22257709503173828, "step": 4590 }, { "epoch": 12.569472963723477, "grad_norm": 8.367209434509277, "learning_rate": 3.713698630136986e-07, "log_odds_chosen": 1.6883225440979004, "log_odds_ratio": -0.6454980373382568, "logits/chosen": 1.11958646774292, "logits/rejected": 1.085876703262329, "logps/chosen": -2.6770176887512207, "logps/rejected": -4.230779647827148, "loss": 0.7276, "nll_loss": 0.6630723476409912, "rewards/accuracies": 0.75, "rewards/chosen": -0.26770177483558655, "rewards/margins": 0.15537619590759277, "rewards/rejected": -0.4230779707431793, "step": 4591 }, { "epoch": 12.572210814510608, "grad_norm": 4.755762100219727, "learning_rate": 3.7123287671232874e-07, "log_odds_chosen": 2.5610907077789307, "log_odds_ratio": -0.12424063682556152, "logits/chosen": 0.9148064851760864, "logits/rejected": 0.9468272924423218, "logps/chosen": -2.482815742492676, "logps/rejected": -4.949185371398926, "loss": 0.623, "nll_loss": 0.6105784773826599, "rewards/accuracies": 1.0, "rewards/chosen": -0.2482815533876419, "rewards/margins": 0.2466369867324829, "rewards/rejected": -0.4949185252189636, "step": 4592 }, { "epoch": 12.574948665297741, "grad_norm": 5.881563186645508, "learning_rate": 3.710958904109589e-07, "log_odds_chosen": 2.8661105632781982, "log_odds_ratio": -0.2929915189743042, "logits/chosen": 1.0106583833694458, "logits/rejected": 0.9641422033309937, "logps/chosen": -1.982393503189087, "logps/rejected": -4.705787658691406, "loss": 0.7307, "nll_loss": 0.7014018297195435, "rewards/accuracies": 0.75, "rewards/chosen": -0.19823935627937317, "rewards/margins": 0.272339403629303, "rewards/rejected": -0.47057878971099854, "step": 4593 }, { "epoch": 12.577686516084874, "grad_norm": 6.20153284072876, "learning_rate": 3.7095890410958905e-07, "log_odds_chosen": 2.222320556640625, "log_odds_ratio": -0.25215429067611694, "logits/chosen": 0.8383055925369263, "logits/rejected": 0.8537101745605469, "logps/chosen": -2.410670042037964, "logps/rejected": -4.523036003112793, "loss": 0.7299, "nll_loss": 0.704682469367981, "rewards/accuracies": 0.875, "rewards/chosen": -0.24106702208518982, "rewards/margins": 0.2112366259098053, "rewards/rejected": -0.4523036479949951, "step": 4594 }, { "epoch": 12.580424366872005, "grad_norm": 5.509934902191162, "learning_rate": 3.7082191780821914e-07, "log_odds_chosen": 2.3904662132263184, "log_odds_ratio": -0.2667938768863678, "logits/chosen": 0.6228907108306885, "logits/rejected": 0.5597366094589233, "logps/chosen": -2.266890525817871, "logps/rejected": -4.540975093841553, "loss": 0.6563, "nll_loss": 0.629608154296875, "rewards/accuracies": 0.875, "rewards/chosen": -0.22668907046318054, "rewards/margins": 0.22740843892097473, "rewards/rejected": -0.4540975093841553, "step": 4595 }, { "epoch": 12.583162217659137, "grad_norm": 5.473081588745117, "learning_rate": 3.706849315068493e-07, "log_odds_chosen": 1.8945708274841309, "log_odds_ratio": -0.2689780294895172, "logits/chosen": 0.7797625064849854, "logits/rejected": 0.7705999612808228, "logps/chosen": -2.391352415084839, "logps/rejected": -4.1327972412109375, "loss": 0.7322, "nll_loss": 0.705324649810791, "rewards/accuracies": 0.875, "rewards/chosen": -0.2391352355480194, "rewards/margins": 0.17414447665214539, "rewards/rejected": -0.4132797420024872, "step": 4596 }, { "epoch": 12.58590006844627, "grad_norm": 5.638172626495361, "learning_rate": 3.7054794520547945e-07, "log_odds_chosen": 1.3223179578781128, "log_odds_ratio": -0.33226367831230164, "logits/chosen": 0.9760285019874573, "logits/rejected": 0.9289950132369995, "logps/chosen": -2.0232768058776855, "logps/rejected": -3.262165069580078, "loss": 0.6378, "nll_loss": 0.6045573353767395, "rewards/accuracies": 0.875, "rewards/chosen": -0.202327698469162, "rewards/margins": 0.12388882040977478, "rewards/rejected": -0.32621651887893677, "step": 4597 }, { "epoch": 12.588637919233403, "grad_norm": 5.308889865875244, "learning_rate": 3.7041095890410955e-07, "log_odds_chosen": 1.8057528734207153, "log_odds_ratio": -0.28047335147857666, "logits/chosen": 0.7961071133613586, "logits/rejected": 0.7438066005706787, "logps/chosen": -1.677351713180542, "logps/rejected": -3.322169303894043, "loss": 0.668, "nll_loss": 0.6399568319320679, "rewards/accuracies": 1.0, "rewards/chosen": -0.16773515939712524, "rewards/margins": 0.1644817739725113, "rewards/rejected": -0.33221691846847534, "step": 4598 }, { "epoch": 12.591375770020534, "grad_norm": 9.061616897583008, "learning_rate": 3.7027397260273975e-07, "log_odds_chosen": 1.1798758506774902, "log_odds_ratio": -0.6119173765182495, "logits/chosen": 1.0602946281433105, "logits/rejected": 1.0344712734222412, "logps/chosen": -2.744903087615967, "logps/rejected": -3.9024014472961426, "loss": 0.7314, "nll_loss": 0.670257568359375, "rewards/accuracies": 0.75, "rewards/chosen": -0.2744902968406677, "rewards/margins": 0.11574984341859818, "rewards/rejected": -0.3902401626110077, "step": 4599 }, { "epoch": 12.594113620807667, "grad_norm": 5.699807167053223, "learning_rate": 3.7013698630136985e-07, "log_odds_chosen": 1.1963214874267578, "log_odds_ratio": -0.3761288523674011, "logits/chosen": 0.8274449110031128, "logits/rejected": 0.826658308506012, "logps/chosen": -2.3659183979034424, "logps/rejected": -3.4664063453674316, "loss": 0.6527, "nll_loss": 0.615044355392456, "rewards/accuracies": 0.875, "rewards/chosen": -0.23659184575080872, "rewards/margins": 0.11004878580570221, "rewards/rejected": -0.34664061665534973, "step": 4600 }, { "epoch": 12.596851471594798, "grad_norm": 4.684084415435791, "learning_rate": 3.7e-07, "log_odds_chosen": 2.6707024574279785, "log_odds_ratio": -0.12947896122932434, "logits/chosen": 1.2401187419891357, "logits/rejected": 1.2413597106933594, "logps/chosen": -2.2680182456970215, "logps/rejected": -4.825629234313965, "loss": 0.6181, "nll_loss": 0.605191707611084, "rewards/accuracies": 1.0, "rewards/chosen": -0.2268018275499344, "rewards/margins": 0.2557610869407654, "rewards/rejected": -0.4825628995895386, "step": 4601 }, { "epoch": 12.59958932238193, "grad_norm": 6.26150369644165, "learning_rate": 3.698630136986301e-07, "log_odds_chosen": 3.0248289108276367, "log_odds_ratio": -0.12985725700855255, "logits/chosen": 1.1735422611236572, "logits/rejected": 1.2797555923461914, "logps/chosen": -1.898801565170288, "logps/rejected": -4.735082149505615, "loss": 0.5838, "nll_loss": 0.5708453059196472, "rewards/accuracies": 1.0, "rewards/chosen": -0.1898801624774933, "rewards/margins": 0.28362807631492615, "rewards/rejected": -0.47350820899009705, "step": 4602 }, { "epoch": 12.602327173169062, "grad_norm": 6.869597434997559, "learning_rate": 3.6972602739726026e-07, "log_odds_chosen": 0.8744656443595886, "log_odds_ratio": -0.6104235649108887, "logits/chosen": 0.5216862559318542, "logits/rejected": 0.5638073682785034, "logps/chosen": -2.5425145626068115, "logps/rejected": -3.3610992431640625, "loss": 0.8066, "nll_loss": 0.7455528974533081, "rewards/accuracies": 0.75, "rewards/chosen": -0.2542514503002167, "rewards/margins": 0.08185845613479614, "rewards/rejected": -0.3361099362373352, "step": 4603 }, { "epoch": 12.605065023956195, "grad_norm": 5.5022759437561035, "learning_rate": 3.695890410958904e-07, "log_odds_chosen": 1.8620059490203857, "log_odds_ratio": -0.2546234130859375, "logits/chosen": 0.7285106778144836, "logits/rejected": 0.7215936183929443, "logps/chosen": -2.5308680534362793, "logps/rejected": -4.290755271911621, "loss": 0.7741, "nll_loss": 0.7486541271209717, "rewards/accuracies": 0.875, "rewards/chosen": -0.25308677554130554, "rewards/margins": 0.17598873376846313, "rewards/rejected": -0.42907553911209106, "step": 4604 }, { "epoch": 12.607802874743326, "grad_norm": 5.315486907958984, "learning_rate": 3.694520547945205e-07, "log_odds_chosen": 1.9490631818771362, "log_odds_ratio": -0.23671400547027588, "logits/chosen": 0.6090227365493774, "logits/rejected": 0.5380667448043823, "logps/chosen": -2.495990037918091, "logps/rejected": -4.309885025024414, "loss": 0.6756, "nll_loss": 0.651935338973999, "rewards/accuracies": 1.0, "rewards/chosen": -0.24959900975227356, "rewards/margins": 0.1813894808292389, "rewards/rejected": -0.43098849058151245, "step": 4605 }, { "epoch": 12.61054072553046, "grad_norm": 8.363395690917969, "learning_rate": 3.693150684931507e-07, "log_odds_chosen": 2.9377942085266113, "log_odds_ratio": -0.15548881888389587, "logits/chosen": 0.9762435555458069, "logits/rejected": 0.9860588908195496, "logps/chosen": -2.5481929779052734, "logps/rejected": -5.390031337738037, "loss": 0.6827, "nll_loss": 0.6671865582466125, "rewards/accuracies": 1.0, "rewards/chosen": -0.2548193037509918, "rewards/margins": 0.2841838002204895, "rewards/rejected": -0.5390030741691589, "step": 4606 }, { "epoch": 12.61327857631759, "grad_norm": 6.002018451690674, "learning_rate": 3.691780821917808e-07, "log_odds_chosen": 0.9334370493888855, "log_odds_ratio": -0.4061441421508789, "logits/chosen": 0.7654293179512024, "logits/rejected": 0.620574951171875, "logps/chosen": -2.2807180881500244, "logps/rejected": -3.1439261436462402, "loss": 0.8553, "nll_loss": 0.8147109150886536, "rewards/accuracies": 0.875, "rewards/chosen": -0.22807180881500244, "rewards/margins": 0.08632081747055054, "rewards/rejected": -0.314392626285553, "step": 4607 }, { "epoch": 12.616016427104723, "grad_norm": 4.491105079650879, "learning_rate": 3.690410958904109e-07, "log_odds_chosen": 2.9489917755126953, "log_odds_ratio": -0.18963290750980377, "logits/chosen": 0.6768096089363098, "logits/rejected": 0.6179929971694946, "logps/chosen": -2.0678791999816895, "logps/rejected": -4.868596076965332, "loss": 0.6681, "nll_loss": 0.6491438150405884, "rewards/accuracies": 1.0, "rewards/chosen": -0.20678791403770447, "rewards/margins": 0.2800717055797577, "rewards/rejected": -0.48685958981513977, "step": 4608 }, { "epoch": 12.618754277891854, "grad_norm": 5.357102394104004, "learning_rate": 3.6890410958904106e-07, "log_odds_chosen": 3.2965266704559326, "log_odds_ratio": -0.23302042484283447, "logits/chosen": 0.7075852751731873, "logits/rejected": 0.7722390294075012, "logps/chosen": -1.879238486289978, "logps/rejected": -5.018945693969727, "loss": 0.7147, "nll_loss": 0.691432535648346, "rewards/accuracies": 0.875, "rewards/chosen": -0.187923863530159, "rewards/margins": 0.3139707148075104, "rewards/rejected": -0.5018945932388306, "step": 4609 }, { "epoch": 12.621492128678987, "grad_norm": 5.251101493835449, "learning_rate": 3.687671232876712e-07, "log_odds_chosen": 1.2095592021942139, "log_odds_ratio": -0.42185842990875244, "logits/chosen": 0.9322377443313599, "logits/rejected": 0.9226022362709045, "logps/chosen": -2.1468842029571533, "logps/rejected": -3.3009936809539795, "loss": 0.6768, "nll_loss": 0.6346005797386169, "rewards/accuracies": 0.875, "rewards/chosen": -0.21468842029571533, "rewards/margins": 0.1154109314084053, "rewards/rejected": -0.33009934425354004, "step": 4610 }, { "epoch": 12.624229979466119, "grad_norm": 4.602899551391602, "learning_rate": 3.6863013698630137e-07, "log_odds_chosen": 1.9608657360076904, "log_odds_ratio": -0.2383139282464981, "logits/chosen": 0.7690723538398743, "logits/rejected": 0.7437252998352051, "logps/chosen": -2.301597833633423, "logps/rejected": -4.12452507019043, "loss": 0.6387, "nll_loss": 0.6148384809494019, "rewards/accuracies": 1.0, "rewards/chosen": -0.23015978932380676, "rewards/margins": 0.18229272961616516, "rewards/rejected": -0.4124525189399719, "step": 4611 }, { "epoch": 12.626967830253252, "grad_norm": 4.693680286407471, "learning_rate": 3.6849315068493147e-07, "log_odds_chosen": 3.5533182621002197, "log_odds_ratio": -0.10497195273637772, "logits/chosen": 1.0052870512008667, "logits/rejected": 1.1078827381134033, "logps/chosen": -1.9854862689971924, "logps/rejected": -5.369932651519775, "loss": 0.6147, "nll_loss": 0.6041693091392517, "rewards/accuracies": 1.0, "rewards/chosen": -0.19854861497879028, "rewards/margins": 0.33844462037086487, "rewards/rejected": -0.5369932651519775, "step": 4612 }, { "epoch": 12.629705681040383, "grad_norm": 5.3971357345581055, "learning_rate": 3.6835616438356167e-07, "log_odds_chosen": 1.8640761375427246, "log_odds_ratio": -0.2525021731853485, "logits/chosen": 0.8549326658248901, "logits/rejected": 0.9442949295043945, "logps/chosen": -2.6396493911743164, "logps/rejected": -4.444981575012207, "loss": 0.75, "nll_loss": 0.7247509360313416, "rewards/accuracies": 1.0, "rewards/chosen": -0.2639649510383606, "rewards/margins": 0.18053323030471802, "rewards/rejected": -0.4444981813430786, "step": 4613 }, { "epoch": 12.632443531827516, "grad_norm": 5.547245502471924, "learning_rate": 3.6821917808219177e-07, "log_odds_chosen": 1.1033353805541992, "log_odds_ratio": -0.3715580403804779, "logits/chosen": 0.8378533720970154, "logits/rejected": 0.7970426082611084, "logps/chosen": -2.6706595420837402, "logps/rejected": -3.6817383766174316, "loss": 0.7321, "nll_loss": 0.6949790716171265, "rewards/accuracies": 0.75, "rewards/chosen": -0.26706594228744507, "rewards/margins": 0.1011078804731369, "rewards/rejected": -0.36817383766174316, "step": 4614 }, { "epoch": 12.635181382614647, "grad_norm": 4.526909828186035, "learning_rate": 3.6808219178082187e-07, "log_odds_chosen": 3.0400149822235107, "log_odds_ratio": -0.1592150181531906, "logits/chosen": 0.9044289588928223, "logits/rejected": 0.9821746349334717, "logps/chosen": -1.8526184558868408, "logps/rejected": -4.715000629425049, "loss": 0.6327, "nll_loss": 0.6167706847190857, "rewards/accuracies": 1.0, "rewards/chosen": -0.18526186048984528, "rewards/margins": 0.2862381935119629, "rewards/rejected": -0.47150006890296936, "step": 4615 }, { "epoch": 12.63791923340178, "grad_norm": 5.196521282196045, "learning_rate": 3.67945205479452e-07, "log_odds_chosen": 2.3129727840423584, "log_odds_ratio": -0.20353960990905762, "logits/chosen": 0.9627403020858765, "logits/rejected": 1.0286387205123901, "logps/chosen": -2.6322288513183594, "logps/rejected": -4.882647514343262, "loss": 0.7089, "nll_loss": 0.6885111927986145, "rewards/accuracies": 0.875, "rewards/chosen": -0.263222873210907, "rewards/margins": 0.22504185140132904, "rewards/rejected": -0.4882647395133972, "step": 4616 }, { "epoch": 12.640657084188911, "grad_norm": 4.766549110412598, "learning_rate": 3.6780821917808217e-07, "log_odds_chosen": 3.901400566101074, "log_odds_ratio": -0.11414645612239838, "logits/chosen": 0.9063759446144104, "logits/rejected": 0.8920292258262634, "logps/chosen": -2.610308885574341, "logps/rejected": -6.367269515991211, "loss": 0.8136, "nll_loss": 0.8021896481513977, "rewards/accuracies": 1.0, "rewards/chosen": -0.261030912399292, "rewards/margins": 0.375696063041687, "rewards/rejected": -0.636726975440979, "step": 4617 }, { "epoch": 12.643394934976044, "grad_norm": 5.608424186706543, "learning_rate": 3.676712328767123e-07, "log_odds_chosen": 2.2022147178649902, "log_odds_ratio": -0.1534096598625183, "logits/chosen": 1.012709617614746, "logits/rejected": 1.1182219982147217, "logps/chosen": -2.454920768737793, "logps/rejected": -4.5774946212768555, "loss": 0.6197, "nll_loss": 0.6043159365653992, "rewards/accuracies": 1.0, "rewards/chosen": -0.24549207091331482, "rewards/margins": 0.21225741505622864, "rewards/rejected": -0.45774948596954346, "step": 4618 }, { "epoch": 12.646132785763175, "grad_norm": 4.882852554321289, "learning_rate": 3.675342465753424e-07, "log_odds_chosen": 2.388183832168579, "log_odds_ratio": -0.23779213428497314, "logits/chosen": 0.599211573600769, "logits/rejected": 0.6361563801765442, "logps/chosen": -3.440826892852783, "logps/rejected": -5.756524085998535, "loss": 0.8377, "nll_loss": 0.8139290809631348, "rewards/accuracies": 0.875, "rewards/chosen": -0.34408271312713623, "rewards/margins": 0.23156972229480743, "rewards/rejected": -0.5756524801254272, "step": 4619 }, { "epoch": 12.648870636550308, "grad_norm": 5.212899208068848, "learning_rate": 3.6739726027397263e-07, "log_odds_chosen": 1.0912315845489502, "log_odds_ratio": -0.4138841927051544, "logits/chosen": 0.837357223033905, "logits/rejected": 0.8548545241355896, "logps/chosen": -4.279541969299316, "logps/rejected": -5.348410606384277, "loss": 0.9947, "nll_loss": 0.95329749584198, "rewards/accuracies": 0.75, "rewards/chosen": -0.42795413732528687, "rewards/margins": 0.10688689351081848, "rewards/rejected": -0.5348410606384277, "step": 4620 }, { "epoch": 12.651608487337441, "grad_norm": 9.380678176879883, "learning_rate": 3.6726027397260273e-07, "log_odds_chosen": 0.7762308120727539, "log_odds_ratio": -0.628930389881134, "logits/chosen": 1.0401532649993896, "logits/rejected": 1.0193560123443604, "logps/chosen": -3.029170513153076, "logps/rejected": -3.738433837890625, "loss": 0.8989, "nll_loss": 0.8360041379928589, "rewards/accuracies": 0.75, "rewards/chosen": -0.3029170334339142, "rewards/margins": 0.07092633843421936, "rewards/rejected": -0.37384340167045593, "step": 4621 }, { "epoch": 12.654346338124572, "grad_norm": 4.6822686195373535, "learning_rate": 3.6712328767123283e-07, "log_odds_chosen": 1.9297678470611572, "log_odds_ratio": -0.37457275390625, "logits/chosen": 0.7853136658668518, "logits/rejected": 0.864938497543335, "logps/chosen": -2.5239624977111816, "logps/rejected": -4.38882303237915, "loss": 0.7375, "nll_loss": 0.7000406384468079, "rewards/accuracies": 0.75, "rewards/chosen": -0.25239628553390503, "rewards/margins": 0.18648603558540344, "rewards/rejected": -0.4388822913169861, "step": 4622 }, { "epoch": 12.657084188911703, "grad_norm": 7.576404094696045, "learning_rate": 3.66986301369863e-07, "log_odds_chosen": 2.4287562370300293, "log_odds_ratio": -0.35660696029663086, "logits/chosen": 0.869817852973938, "logits/rejected": 0.898899257183075, "logps/chosen": -2.471165418624878, "logps/rejected": -4.847092151641846, "loss": 0.8652, "nll_loss": 0.8295400142669678, "rewards/accuracies": 0.875, "rewards/chosen": -0.24711653590202332, "rewards/margins": 0.2375926822423935, "rewards/rejected": -0.4847092032432556, "step": 4623 }, { "epoch": 12.659822039698836, "grad_norm": 4.744293212890625, "learning_rate": 3.6684931506849313e-07, "log_odds_chosen": 3.1703240871429443, "log_odds_ratio": -0.2223229706287384, "logits/chosen": 1.04762601852417, "logits/rejected": 1.0933890342712402, "logps/chosen": -2.733395576477051, "logps/rejected": -5.82858943939209, "loss": 0.7069, "nll_loss": 0.6846752166748047, "rewards/accuracies": 0.875, "rewards/chosen": -0.27333956956863403, "rewards/margins": 0.30951935052871704, "rewards/rejected": -0.5828589200973511, "step": 4624 }, { "epoch": 12.66255989048597, "grad_norm": 5.495741844177246, "learning_rate": 3.667123287671233e-07, "log_odds_chosen": 1.9656314849853516, "log_odds_ratio": -0.2677328586578369, "logits/chosen": 0.5971007943153381, "logits/rejected": 0.5843631029129028, "logps/chosen": -1.9399088621139526, "logps/rejected": -3.745712995529175, "loss": 0.6058, "nll_loss": 0.5790227055549622, "rewards/accuracies": 1.0, "rewards/chosen": -0.19399090111255646, "rewards/margins": 0.18058040738105774, "rewards/rejected": -0.3745713233947754, "step": 4625 }, { "epoch": 12.6652977412731, "grad_norm": 5.288745880126953, "learning_rate": 3.665753424657534e-07, "log_odds_chosen": 1.9596363306045532, "log_odds_ratio": -0.3160243034362793, "logits/chosen": 1.0336068868637085, "logits/rejected": 1.043599009513855, "logps/chosen": -2.469027519226074, "logps/rejected": -4.354105472564697, "loss": 0.658, "nll_loss": 0.6264436841011047, "rewards/accuracies": 0.875, "rewards/chosen": -0.24690276384353638, "rewards/margins": 0.1885078251361847, "rewards/rejected": -0.4354105591773987, "step": 4626 }, { "epoch": 12.668035592060233, "grad_norm": 6.3735671043396, "learning_rate": 3.664383561643836e-07, "log_odds_chosen": 2.7834038734436035, "log_odds_ratio": -0.1660631000995636, "logits/chosen": 1.0206892490386963, "logits/rejected": 1.0941972732543945, "logps/chosen": -2.789571762084961, "logps/rejected": -5.497597694396973, "loss": 0.7867, "nll_loss": 0.7700872421264648, "rewards/accuracies": 1.0, "rewards/chosen": -0.27895718812942505, "rewards/margins": 0.2708026170730591, "rewards/rejected": -0.5497598052024841, "step": 4627 }, { "epoch": 12.670773442847365, "grad_norm": 5.044332981109619, "learning_rate": 3.663013698630137e-07, "log_odds_chosen": 1.9489010572433472, "log_odds_ratio": -0.25411781668663025, "logits/chosen": 0.7340211868286133, "logits/rejected": 0.7347816228866577, "logps/chosen": -1.8049951791763306, "logps/rejected": -3.5954267978668213, "loss": 0.654, "nll_loss": 0.6285852789878845, "rewards/accuracies": 0.875, "rewards/chosen": -0.18049952387809753, "rewards/margins": 0.17904315888881683, "rewards/rejected": -0.35954269766807556, "step": 4628 }, { "epoch": 12.673511293634498, "grad_norm": 4.5129594802856445, "learning_rate": 3.661643835616438e-07, "log_odds_chosen": 2.219331741333008, "log_odds_ratio": -0.21028295159339905, "logits/chosen": 0.7653837203979492, "logits/rejected": 0.8118778467178345, "logps/chosen": -2.5031604766845703, "logps/rejected": -4.659730911254883, "loss": 0.8099, "nll_loss": 0.7888551354408264, "rewards/accuracies": 1.0, "rewards/chosen": -0.2503160238265991, "rewards/margins": 0.2156570702791214, "rewards/rejected": -0.4659730792045593, "step": 4629 }, { "epoch": 12.676249144421629, "grad_norm": 6.32966947555542, "learning_rate": 3.6602739726027394e-07, "log_odds_chosen": 0.9239203929901123, "log_odds_ratio": -0.5002254247665405, "logits/chosen": 0.5842390060424805, "logits/rejected": 0.5722638368606567, "logps/chosen": -2.8611903190612793, "logps/rejected": -3.703202724456787, "loss": 0.6545, "nll_loss": 0.6044429540634155, "rewards/accuracies": 0.75, "rewards/chosen": -0.2861190140247345, "rewards/margins": 0.08420126140117645, "rewards/rejected": -0.37032026052474976, "step": 4630 }, { "epoch": 12.678986995208762, "grad_norm": 5.719058513641357, "learning_rate": 3.658904109589041e-07, "log_odds_chosen": 2.3688220977783203, "log_odds_ratio": -0.2486661970615387, "logits/chosen": 0.6640966534614563, "logits/rejected": 0.7144200205802917, "logps/chosen": -2.114696502685547, "logps/rejected": -4.357663631439209, "loss": 0.6318, "nll_loss": 0.6069480776786804, "rewards/accuracies": 0.875, "rewards/chosen": -0.21146968007087708, "rewards/margins": 0.2242967188358307, "rewards/rejected": -0.435766339302063, "step": 4631 }, { "epoch": 12.681724845995893, "grad_norm": 4.904270172119141, "learning_rate": 3.6575342465753424e-07, "log_odds_chosen": 2.4924368858337402, "log_odds_ratio": -0.23997043073177338, "logits/chosen": 0.8304638862609863, "logits/rejected": 0.8581440448760986, "logps/chosen": -2.3074114322662354, "logps/rejected": -4.725185394287109, "loss": 0.6575, "nll_loss": 0.6335516571998596, "rewards/accuracies": 0.875, "rewards/chosen": -0.23074114322662354, "rewards/margins": 0.24177740514278412, "rewards/rejected": -0.47251856327056885, "step": 4632 }, { "epoch": 12.684462696783026, "grad_norm": 5.829933166503906, "learning_rate": 3.6561643835616434e-07, "log_odds_chosen": 2.152975559234619, "log_odds_ratio": -0.45891815423965454, "logits/chosen": 1.0735563039779663, "logits/rejected": 1.1444016695022583, "logps/chosen": -3.2852225303649902, "logps/rejected": -5.400213241577148, "loss": 0.6455, "nll_loss": 0.5996258854866028, "rewards/accuracies": 0.875, "rewards/chosen": -0.328522264957428, "rewards/margins": 0.21149910986423492, "rewards/rejected": -0.5400214195251465, "step": 4633 }, { "epoch": 12.687200547570157, "grad_norm": 5.166065692901611, "learning_rate": 3.6547945205479455e-07, "log_odds_chosen": 1.960617184638977, "log_odds_ratio": -0.6673923134803772, "logits/chosen": 0.7972638607025146, "logits/rejected": 0.8958659172058105, "logps/chosen": -2.4600067138671875, "logps/rejected": -4.3246259689331055, "loss": 0.6256, "nll_loss": 0.5588327050209045, "rewards/accuracies": 0.75, "rewards/chosen": -0.24600067734718323, "rewards/margins": 0.18646195530891418, "rewards/rejected": -0.4324626326560974, "step": 4634 }, { "epoch": 12.68993839835729, "grad_norm": 5.64666223526001, "learning_rate": 3.6534246575342465e-07, "log_odds_chosen": 2.142746686935425, "log_odds_ratio": -0.25832629203796387, "logits/chosen": 0.9074480533599854, "logits/rejected": 0.9415926933288574, "logps/chosen": -2.6575210094451904, "logps/rejected": -4.708273887634277, "loss": 0.8372, "nll_loss": 0.8113320469856262, "rewards/accuracies": 0.875, "rewards/chosen": -0.2657521069049835, "rewards/margins": 0.20507526397705078, "rewards/rejected": -0.4708273708820343, "step": 4635 }, { "epoch": 12.692676249144421, "grad_norm": 4.107969284057617, "learning_rate": 3.6520547945205474e-07, "log_odds_chosen": 5.334965229034424, "log_odds_ratio": -0.015105586498975754, "logits/chosen": 1.1165478229522705, "logits/rejected": 1.176077961921692, "logps/chosen": -1.9893419742584229, "logps/rejected": -7.087225914001465, "loss": 0.5757, "nll_loss": 0.5741609930992126, "rewards/accuracies": 1.0, "rewards/chosen": -0.19893419742584229, "rewards/margins": 0.5097883939743042, "rewards/rejected": -0.7087225317955017, "step": 4636 }, { "epoch": 12.695414099931554, "grad_norm": 4.532530784606934, "learning_rate": 3.6506849315068495e-07, "log_odds_chosen": 2.6253302097320557, "log_odds_ratio": -0.3311551809310913, "logits/chosen": 0.6636063456535339, "logits/rejected": 0.7091391086578369, "logps/chosen": -2.398568868637085, "logps/rejected": -4.934376239776611, "loss": 0.7806, "nll_loss": 0.7474541664123535, "rewards/accuracies": 0.875, "rewards/chosen": -0.23985689878463745, "rewards/margins": 0.2535807490348816, "rewards/rejected": -0.49343761801719666, "step": 4637 }, { "epoch": 12.698151950718685, "grad_norm": 4.89215087890625, "learning_rate": 3.6493150684931505e-07, "log_odds_chosen": 2.209235429763794, "log_odds_ratio": -0.21417108178138733, "logits/chosen": 0.9933719635009766, "logits/rejected": 1.0411723852157593, "logps/chosen": -2.067737102508545, "logps/rejected": -4.108267784118652, "loss": 0.5823, "nll_loss": 0.560924768447876, "rewards/accuracies": 0.875, "rewards/chosen": -0.20677368342876434, "rewards/margins": 0.20405313372612, "rewards/rejected": -0.41082683205604553, "step": 4638 }, { "epoch": 12.700889801505818, "grad_norm": 5.551787853240967, "learning_rate": 3.647945205479452e-07, "log_odds_chosen": 2.0318474769592285, "log_odds_ratio": -0.1950160413980484, "logits/chosen": 0.5862694382667542, "logits/rejected": 0.5720617771148682, "logps/chosen": -1.8416874408721924, "logps/rejected": -3.729961633682251, "loss": 0.6181, "nll_loss": 0.5985532402992249, "rewards/accuracies": 1.0, "rewards/chosen": -0.1841687560081482, "rewards/margins": 0.18882742524147034, "rewards/rejected": -0.37299618124961853, "step": 4639 }, { "epoch": 12.70362765229295, "grad_norm": 4.197137355804443, "learning_rate": 3.646575342465753e-07, "log_odds_chosen": 2.743208646774292, "log_odds_ratio": -0.23342838883399963, "logits/chosen": 0.94626384973526, "logits/rejected": 0.9275575280189514, "logps/chosen": -2.2674612998962402, "logps/rejected": -4.901264190673828, "loss": 0.7102, "nll_loss": 0.6868591904640198, "rewards/accuracies": 1.0, "rewards/chosen": -0.2267461121082306, "rewards/margins": 0.2633802890777588, "rewards/rejected": -0.49012646079063416, "step": 4640 }, { "epoch": 12.706365503080082, "grad_norm": 4.9598917961120605, "learning_rate": 3.645205479452055e-07, "log_odds_chosen": 1.718016505241394, "log_odds_ratio": -0.20036281645298004, "logits/chosen": 0.7391373515129089, "logits/rejected": 0.8187581300735474, "logps/chosen": -2.15061354637146, "logps/rejected": -3.738203525543213, "loss": 0.5807, "nll_loss": 0.5607020854949951, "rewards/accuracies": 1.0, "rewards/chosen": -0.21506136655807495, "rewards/margins": 0.1587589979171753, "rewards/rejected": -0.37382036447525024, "step": 4641 }, { "epoch": 12.709103353867214, "grad_norm": 4.1435651779174805, "learning_rate": 3.643835616438356e-07, "log_odds_chosen": 3.722689151763916, "log_odds_ratio": -0.058786287903785706, "logits/chosen": 0.9309872388839722, "logits/rejected": 1.010804295539856, "logps/chosen": -1.6567950248718262, "logps/rejected": -5.125224590301514, "loss": 0.7159, "nll_loss": 0.7099823355674744, "rewards/accuracies": 1.0, "rewards/chosen": -0.16567951440811157, "rewards/margins": 0.3468429446220398, "rewards/rejected": -0.5125224590301514, "step": 4642 }, { "epoch": 12.711841204654347, "grad_norm": 6.8277435302734375, "learning_rate": 3.642465753424657e-07, "log_odds_chosen": 3.159773588180542, "log_odds_ratio": -0.19884541630744934, "logits/chosen": 0.9200596213340759, "logits/rejected": 0.9596527218818665, "logps/chosen": -3.2602643966674805, "logps/rejected": -6.31669807434082, "loss": 0.6861, "nll_loss": 0.6662565469741821, "rewards/accuracies": 1.0, "rewards/chosen": -0.32602646946907043, "rewards/margins": 0.30564334988594055, "rewards/rejected": -0.631669819355011, "step": 4643 }, { "epoch": 12.714579055441478, "grad_norm": 5.200026512145996, "learning_rate": 3.641095890410959e-07, "log_odds_chosen": 2.1627960205078125, "log_odds_ratio": -0.22095422446727753, "logits/chosen": 0.8398770093917847, "logits/rejected": 0.9299110770225525, "logps/chosen": -2.6420719623565674, "logps/rejected": -4.744289398193359, "loss": 0.7747, "nll_loss": 0.7526097893714905, "rewards/accuracies": 0.875, "rewards/chosen": -0.2642071843147278, "rewards/margins": 0.2102217674255371, "rewards/rejected": -0.4744289517402649, "step": 4644 }, { "epoch": 12.71731690622861, "grad_norm": 6.950234889984131, "learning_rate": 3.63972602739726e-07, "log_odds_chosen": 1.5058979988098145, "log_odds_ratio": -0.375484824180603, "logits/chosen": 1.1629559993743896, "logits/rejected": 1.189009428024292, "logps/chosen": -2.8788681030273438, "logps/rejected": -4.298260688781738, "loss": 0.7202, "nll_loss": 0.6826700568199158, "rewards/accuracies": 0.875, "rewards/chosen": -0.2878868281841278, "rewards/margins": 0.14193931221961975, "rewards/rejected": -0.4298260807991028, "step": 4645 }, { "epoch": 12.720054757015742, "grad_norm": 5.801889419555664, "learning_rate": 3.6383561643835616e-07, "log_odds_chosen": 2.4278616905212402, "log_odds_ratio": -0.1897929310798645, "logits/chosen": 0.7152328491210938, "logits/rejected": 0.6870353817939758, "logps/chosen": -2.519134759902954, "logps/rejected": -4.845461845397949, "loss": 0.6694, "nll_loss": 0.6503850221633911, "rewards/accuracies": 1.0, "rewards/chosen": -0.25191348791122437, "rewards/margins": 0.23263272643089294, "rewards/rejected": -0.4845461845397949, "step": 4646 }, { "epoch": 12.722792607802875, "grad_norm": 9.065105438232422, "learning_rate": 3.6369863013698626e-07, "log_odds_chosen": 1.619492530822754, "log_odds_ratio": -0.3100411295890808, "logits/chosen": 1.0181084871292114, "logits/rejected": 1.0859299898147583, "logps/chosen": -2.768605947494507, "logps/rejected": -4.327868461608887, "loss": 0.6862, "nll_loss": 0.6551504731178284, "rewards/accuracies": 1.0, "rewards/chosen": -0.2768605947494507, "rewards/margins": 0.15592627227306366, "rewards/rejected": -0.43278688192367554, "step": 4647 }, { "epoch": 12.725530458590008, "grad_norm": 5.5383782386779785, "learning_rate": 3.6356164383561646e-07, "log_odds_chosen": 1.9562921524047852, "log_odds_ratio": -0.28903764486312866, "logits/chosen": 0.8115123510360718, "logits/rejected": 0.7673623561859131, "logps/chosen": -2.239558458328247, "logps/rejected": -4.054498672485352, "loss": 0.7527, "nll_loss": 0.7238356471061707, "rewards/accuracies": 0.875, "rewards/chosen": -0.22395583987236023, "rewards/margins": 0.18149404227733612, "rewards/rejected": -0.40544986724853516, "step": 4648 }, { "epoch": 12.728268309377139, "grad_norm": 5.804716110229492, "learning_rate": 3.6342465753424656e-07, "log_odds_chosen": 1.5007081031799316, "log_odds_ratio": -0.3368246853351593, "logits/chosen": 1.0667498111724854, "logits/rejected": 1.021567463874817, "logps/chosen": -1.8255850076675415, "logps/rejected": -3.179208278656006, "loss": 0.6995, "nll_loss": 0.665866494178772, "rewards/accuracies": 0.875, "rewards/chosen": -0.18255850672721863, "rewards/margins": 0.13536232709884644, "rewards/rejected": -0.31792083382606506, "step": 4649 }, { "epoch": 12.73100616016427, "grad_norm": 5.510984420776367, "learning_rate": 3.6328767123287666e-07, "log_odds_chosen": 3.0827372074127197, "log_odds_ratio": -0.13416901230812073, "logits/chosen": 0.6970291137695312, "logits/rejected": 0.7173023223876953, "logps/chosen": -2.0582950115203857, "logps/rejected": -4.981551647186279, "loss": 0.5955, "nll_loss": 0.5820991396903992, "rewards/accuracies": 1.0, "rewards/chosen": -0.20582950115203857, "rewards/margins": 0.2923256754875183, "rewards/rejected": -0.4981551766395569, "step": 4650 }, { "epoch": 12.733744010951403, "grad_norm": 5.933116436004639, "learning_rate": 3.6315068493150687e-07, "log_odds_chosen": 1.1176478862762451, "log_odds_ratio": -0.4793546199798584, "logits/chosen": 0.7838835120201111, "logits/rejected": 0.7463288903236389, "logps/chosen": -2.0363152027130127, "logps/rejected": -3.060067892074585, "loss": 0.6894, "nll_loss": 0.6414727568626404, "rewards/accuracies": 0.875, "rewards/chosen": -0.20363155007362366, "rewards/margins": 0.10237523913383484, "rewards/rejected": -0.3060067594051361, "step": 4651 }, { "epoch": 12.736481861738536, "grad_norm": 5.273066520690918, "learning_rate": 3.6301369863013697e-07, "log_odds_chosen": 2.8992676734924316, "log_odds_ratio": -0.19410069286823273, "logits/chosen": 0.8214437961578369, "logits/rejected": 0.7714328169822693, "logps/chosen": -1.7807029485702515, "logps/rejected": -4.536972999572754, "loss": 0.6755, "nll_loss": 0.6561242341995239, "rewards/accuracies": 1.0, "rewards/chosen": -0.1780703067779541, "rewards/margins": 0.2756270468235016, "rewards/rejected": -0.4536973237991333, "step": 4652 }, { "epoch": 12.739219712525667, "grad_norm": 5.845469951629639, "learning_rate": 3.628767123287671e-07, "log_odds_chosen": 1.985016942024231, "log_odds_ratio": -0.35439497232437134, "logits/chosen": 0.9437565803527832, "logits/rejected": 0.9808393716812134, "logps/chosen": -2.2828726768493652, "logps/rejected": -4.203408241271973, "loss": 0.6935, "nll_loss": 0.6580548882484436, "rewards/accuracies": 0.875, "rewards/chosen": -0.22828729450702667, "rewards/margins": 0.19205355644226074, "rewards/rejected": -0.4203408360481262, "step": 4653 }, { "epoch": 12.7419575633128, "grad_norm": 4.806219577789307, "learning_rate": 3.627397260273972e-07, "log_odds_chosen": 4.558496475219727, "log_odds_ratio": -0.05873045697808266, "logits/chosen": 1.0684936046600342, "logits/rejected": 1.1194723844528198, "logps/chosen": -1.7774814367294312, "logps/rejected": -6.13081693649292, "loss": 0.6277, "nll_loss": 0.6218605041503906, "rewards/accuracies": 1.0, "rewards/chosen": -0.1777481585741043, "rewards/margins": 0.4353335499763489, "rewards/rejected": -0.613081693649292, "step": 4654 }, { "epoch": 12.744695414099931, "grad_norm": 5.890581130981445, "learning_rate": 3.6260273972602737e-07, "log_odds_chosen": 2.0152747631073, "log_odds_ratio": -0.448850154876709, "logits/chosen": 0.8438941836357117, "logits/rejected": 0.9195349812507629, "logps/chosen": -2.263711452484131, "logps/rejected": -4.219238758087158, "loss": 0.7139, "nll_loss": 0.6689777374267578, "rewards/accuracies": 0.75, "rewards/chosen": -0.2263711541891098, "rewards/margins": 0.1955527365207672, "rewards/rejected": -0.4219238758087158, "step": 4655 }, { "epoch": 12.747433264887064, "grad_norm": 5.218923091888428, "learning_rate": 3.624657534246575e-07, "log_odds_chosen": 3.9809587001800537, "log_odds_ratio": -0.1368785947561264, "logits/chosen": 0.7063701152801514, "logits/rejected": 0.7363325357437134, "logps/chosen": -1.775758147239685, "logps/rejected": -5.557118892669678, "loss": 0.7019, "nll_loss": 0.6882559061050415, "rewards/accuracies": 0.875, "rewards/chosen": -0.17757581174373627, "rewards/margins": 0.3781360983848572, "rewards/rejected": -0.5557118654251099, "step": 4656 }, { "epoch": 12.750171115674195, "grad_norm": 5.269175052642822, "learning_rate": 3.623287671232876e-07, "log_odds_chosen": 2.706254005432129, "log_odds_ratio": -0.3224239647388458, "logits/chosen": 0.9483503103256226, "logits/rejected": 0.9998694062232971, "logps/chosen": -1.812888264656067, "logps/rejected": -4.235954284667969, "loss": 0.6321, "nll_loss": 0.5998396873474121, "rewards/accuracies": 0.875, "rewards/chosen": -0.18128880858421326, "rewards/margins": 0.24230659008026123, "rewards/rejected": -0.4235954284667969, "step": 4657 }, { "epoch": 12.752908966461328, "grad_norm": 5.574504852294922, "learning_rate": 3.621917808219178e-07, "log_odds_chosen": 3.9000844955444336, "log_odds_ratio": -0.1530863493680954, "logits/chosen": 0.9599284529685974, "logits/rejected": 0.9480059146881104, "logps/chosen": -2.424777030944824, "logps/rejected": -6.205362319946289, "loss": 0.8186, "nll_loss": 0.8032547831535339, "rewards/accuracies": 1.0, "rewards/chosen": -0.24247771501541138, "rewards/margins": 0.378058522939682, "rewards/rejected": -0.620536208152771, "step": 4658 }, { "epoch": 12.75564681724846, "grad_norm": 4.783747673034668, "learning_rate": 3.620547945205479e-07, "log_odds_chosen": 1.8890416622161865, "log_odds_ratio": -0.23725812137126923, "logits/chosen": 0.873852550983429, "logits/rejected": 0.8035192489624023, "logps/chosen": -1.7457036972045898, "logps/rejected": -3.4469380378723145, "loss": 0.6561, "nll_loss": 0.6323962807655334, "rewards/accuracies": 1.0, "rewards/chosen": -0.17457038164138794, "rewards/margins": 0.1701233983039856, "rewards/rejected": -0.34469377994537354, "step": 4659 }, { "epoch": 12.758384668035593, "grad_norm": 5.460665225982666, "learning_rate": 3.619178082191781e-07, "log_odds_chosen": 2.784993886947632, "log_odds_ratio": -0.15421387553215027, "logits/chosen": 0.7943670749664307, "logits/rejected": 0.8445638418197632, "logps/chosen": -2.3053619861602783, "logps/rejected": -4.9234538078308105, "loss": 0.8449, "nll_loss": 0.8294823169708252, "rewards/accuracies": 1.0, "rewards/chosen": -0.23053619265556335, "rewards/margins": 0.26180920004844666, "rewards/rejected": -0.49234539270401, "step": 4660 }, { "epoch": 12.761122518822724, "grad_norm": 7.8688130378723145, "learning_rate": 3.617808219178082e-07, "log_odds_chosen": 1.853548288345337, "log_odds_ratio": -0.4268079698085785, "logits/chosen": 0.8430129289627075, "logits/rejected": 0.7028218507766724, "logps/chosen": -2.729315757751465, "logps/rejected": -4.475529670715332, "loss": 0.8264, "nll_loss": 0.7837157249450684, "rewards/accuracies": 0.875, "rewards/chosen": -0.2729315757751465, "rewards/margins": 0.1746213585138321, "rewards/rejected": -0.44755294919013977, "step": 4661 }, { "epoch": 12.763860369609857, "grad_norm": 6.022003650665283, "learning_rate": 3.6164383561643833e-07, "log_odds_chosen": 1.5067743062973022, "log_odds_ratio": -0.35189422965049744, "logits/chosen": 0.9627777934074402, "logits/rejected": 1.0167618989944458, "logps/chosen": -1.8783109188079834, "logps/rejected": -3.2777326107025146, "loss": 0.6676, "nll_loss": 0.6323971748352051, "rewards/accuracies": 0.875, "rewards/chosen": -0.1878311038017273, "rewards/margins": 0.1399421989917755, "rewards/rejected": -0.3277732729911804, "step": 4662 }, { "epoch": 12.766598220396988, "grad_norm": 5.184551239013672, "learning_rate": 3.615068493150685e-07, "log_odds_chosen": 3.48502516746521, "log_odds_ratio": -0.18379302322864532, "logits/chosen": 0.7642742395401001, "logits/rejected": 0.7259351015090942, "logps/chosen": -2.0148887634277344, "logps/rejected": -5.370790958404541, "loss": 0.6528, "nll_loss": 0.6343868970870972, "rewards/accuracies": 0.875, "rewards/chosen": -0.20148888230323792, "rewards/margins": 0.3355902433395386, "rewards/rejected": -0.5370790958404541, "step": 4663 }, { "epoch": 12.76933607118412, "grad_norm": 5.650922775268555, "learning_rate": 3.613698630136986e-07, "log_odds_chosen": 2.7341322898864746, "log_odds_ratio": -0.2868656814098358, "logits/chosen": 1.017121434211731, "logits/rejected": 1.015154242515564, "logps/chosen": -2.11100435256958, "logps/rejected": -4.772943496704102, "loss": 0.7225, "nll_loss": 0.6938199996948242, "rewards/accuracies": 0.875, "rewards/chosen": -0.21110044419765472, "rewards/margins": 0.2661939263343811, "rewards/rejected": -0.47729435563087463, "step": 4664 }, { "epoch": 12.772073921971252, "grad_norm": 6.167977809906006, "learning_rate": 3.612328767123288e-07, "log_odds_chosen": 1.4576534032821655, "log_odds_ratio": -0.44182300567626953, "logits/chosen": 0.8242286443710327, "logits/rejected": 0.8016465902328491, "logps/chosen": -2.0369515419006348, "logps/rejected": -3.414177417755127, "loss": 0.622, "nll_loss": 0.577772319316864, "rewards/accuracies": 0.875, "rewards/chosen": -0.203695148229599, "rewards/margins": 0.13772259652614594, "rewards/rejected": -0.34141775965690613, "step": 4665 }, { "epoch": 12.774811772758385, "grad_norm": 5.273536205291748, "learning_rate": 3.610958904109589e-07, "log_odds_chosen": 2.196903705596924, "log_odds_ratio": -0.19293712079524994, "logits/chosen": 0.9235357642173767, "logits/rejected": 0.9729871153831482, "logps/chosen": -1.9463329315185547, "logps/rejected": -4.0275044441223145, "loss": 0.6089, "nll_loss": 0.5895694494247437, "rewards/accuracies": 1.0, "rewards/chosen": -0.19463331997394562, "rewards/margins": 0.20811715722084045, "rewards/rejected": -0.40275049209594727, "step": 4666 }, { "epoch": 12.777549623545516, "grad_norm": 5.366629123687744, "learning_rate": 3.6095890410958904e-07, "log_odds_chosen": 1.655871868133545, "log_odds_ratio": -0.4080418348312378, "logits/chosen": 0.8402682542800903, "logits/rejected": 0.8492594361305237, "logps/chosen": -2.322072982788086, "logps/rejected": -3.8870363235473633, "loss": 0.6146, "nll_loss": 0.5738350749015808, "rewards/accuracies": 0.75, "rewards/chosen": -0.2322072982788086, "rewards/margins": 0.1564963459968567, "rewards/rejected": -0.3887036442756653, "step": 4667 }, { "epoch": 12.780287474332649, "grad_norm": 5.460969924926758, "learning_rate": 3.608219178082192e-07, "log_odds_chosen": 1.6097412109375, "log_odds_ratio": -0.27388715744018555, "logits/chosen": 0.8478554487228394, "logits/rejected": 0.9438048601150513, "logps/chosen": -2.398315668106079, "logps/rejected": -3.923671245574951, "loss": 0.7404, "nll_loss": 0.7129812836647034, "rewards/accuracies": 0.875, "rewards/chosen": -0.2398315668106079, "rewards/margins": 0.1525355726480484, "rewards/rejected": -0.3923671245574951, "step": 4668 }, { "epoch": 12.78302532511978, "grad_norm": 4.322186470031738, "learning_rate": 3.606849315068493e-07, "log_odds_chosen": 3.3248114585876465, "log_odds_ratio": -0.1604725569486618, "logits/chosen": 0.8191132545471191, "logits/rejected": 0.833990752696991, "logps/chosen": -2.031266450881958, "logps/rejected": -5.216153144836426, "loss": 0.7287, "nll_loss": 0.7126508951187134, "rewards/accuracies": 1.0, "rewards/chosen": -0.20312663912773132, "rewards/margins": 0.3184887170791626, "rewards/rejected": -0.5216153264045715, "step": 4669 }, { "epoch": 12.785763175906913, "grad_norm": 5.663464546203613, "learning_rate": 3.6054794520547944e-07, "log_odds_chosen": 2.4110584259033203, "log_odds_ratio": -0.16386380791664124, "logits/chosen": 0.8570001125335693, "logits/rejected": 0.9146357774734497, "logps/chosen": -2.324779987335205, "logps/rejected": -4.559046745300293, "loss": 0.6816, "nll_loss": 0.6652208566665649, "rewards/accuracies": 1.0, "rewards/chosen": -0.23247800767421722, "rewards/margins": 0.22342665493488312, "rewards/rejected": -0.45590466260910034, "step": 4670 }, { "epoch": 12.788501026694044, "grad_norm": 6.420121669769287, "learning_rate": 3.6041095890410954e-07, "log_odds_chosen": 3.0850584506988525, "log_odds_ratio": -0.19236692786216736, "logits/chosen": 0.8411571979522705, "logits/rejected": 0.8678702712059021, "logps/chosen": -2.1632204055786133, "logps/rejected": -5.156072616577148, "loss": 0.6917, "nll_loss": 0.6724966764450073, "rewards/accuracies": 1.0, "rewards/chosen": -0.21632201969623566, "rewards/margins": 0.29928526282310486, "rewards/rejected": -0.5156072974205017, "step": 4671 }, { "epoch": 12.791238877481177, "grad_norm": 4.97166633605957, "learning_rate": 3.6027397260273974e-07, "log_odds_chosen": 1.698946237564087, "log_odds_ratio": -0.2916475236415863, "logits/chosen": 0.7536132335662842, "logits/rejected": 0.7498140335083008, "logps/chosen": -2.694551467895508, "logps/rejected": -4.295602321624756, "loss": 0.6747, "nll_loss": 0.645561933517456, "rewards/accuracies": 0.875, "rewards/chosen": -0.2694551646709442, "rewards/margins": 0.16010507941246033, "rewards/rejected": -0.42956024408340454, "step": 4672 }, { "epoch": 12.793976728268309, "grad_norm": 4.990200042724609, "learning_rate": 3.6013698630136984e-07, "log_odds_chosen": 3.5698509216308594, "log_odds_ratio": -0.20079359412193298, "logits/chosen": 0.940839946269989, "logits/rejected": 0.9109620451927185, "logps/chosen": -2.0402660369873047, "logps/rejected": -5.475794792175293, "loss": 0.6722, "nll_loss": 0.652108371257782, "rewards/accuracies": 1.0, "rewards/chosen": -0.20402660965919495, "rewards/margins": 0.3435528874397278, "rewards/rejected": -0.5475794672966003, "step": 4673 }, { "epoch": 12.796714579055442, "grad_norm": 4.785251140594482, "learning_rate": 3.6e-07, "log_odds_chosen": 1.6246283054351807, "log_odds_ratio": -0.3257399797439575, "logits/chosen": 1.0152275562286377, "logits/rejected": 1.0038167238235474, "logps/chosen": -2.1177163124084473, "logps/rejected": -3.671391487121582, "loss": 0.7011, "nll_loss": 0.6685726642608643, "rewards/accuracies": 1.0, "rewards/chosen": -0.211771622300148, "rewards/margins": 0.15536753833293915, "rewards/rejected": -0.36713916063308716, "step": 4674 }, { "epoch": 12.799452429842574, "grad_norm": 6.2696213722229, "learning_rate": 3.5986301369863015e-07, "log_odds_chosen": 2.2641265392303467, "log_odds_ratio": -0.22249342501163483, "logits/chosen": 0.8179264068603516, "logits/rejected": 0.8360459804534912, "logps/chosen": -1.9931087493896484, "logps/rejected": -4.10462760925293, "loss": 0.6017, "nll_loss": 0.5794588923454285, "rewards/accuracies": 1.0, "rewards/chosen": -0.19931088387966156, "rewards/margins": 0.21115189790725708, "rewards/rejected": -0.41046276688575745, "step": 4675 }, { "epoch": 12.802190280629706, "grad_norm": 4.978602409362793, "learning_rate": 3.5972602739726025e-07, "log_odds_chosen": 1.5017528533935547, "log_odds_ratio": -0.3561534285545349, "logits/chosen": 0.6534737348556519, "logits/rejected": 0.6716779470443726, "logps/chosen": -2.3733558654785156, "logps/rejected": -3.7946088314056396, "loss": 0.6429, "nll_loss": 0.6072388887405396, "rewards/accuracies": 0.875, "rewards/chosen": -0.23733560740947723, "rewards/margins": 0.14212530851364136, "rewards/rejected": -0.379460871219635, "step": 4676 }, { "epoch": 12.804928131416839, "grad_norm": 5.044859886169434, "learning_rate": 3.595890410958904e-07, "log_odds_chosen": 1.0758213996887207, "log_odds_ratio": -0.3465659022331238, "logits/chosen": 0.7694783806800842, "logits/rejected": 0.7956371903419495, "logps/chosen": -1.8186860084533691, "logps/rejected": -2.787607431411743, "loss": 0.6556, "nll_loss": 0.6209635734558105, "rewards/accuracies": 1.0, "rewards/chosen": -0.18186861276626587, "rewards/margins": 0.09689214825630188, "rewards/rejected": -0.27876076102256775, "step": 4677 }, { "epoch": 12.80766598220397, "grad_norm": 5.525074481964111, "learning_rate": 3.594520547945205e-07, "log_odds_chosen": 2.72367525100708, "log_odds_ratio": -0.3263469338417053, "logits/chosen": 0.7291562557220459, "logits/rejected": 0.7891098260879517, "logps/chosen": -2.1836469173431396, "logps/rejected": -4.792177200317383, "loss": 0.686, "nll_loss": 0.6533488035202026, "rewards/accuracies": 0.875, "rewards/chosen": -0.2183646857738495, "rewards/margins": 0.26085302233695984, "rewards/rejected": -0.4792177379131317, "step": 4678 }, { "epoch": 12.810403832991103, "grad_norm": 5.063370704650879, "learning_rate": 3.593150684931507e-07, "log_odds_chosen": 2.5525903701782227, "log_odds_ratio": -0.3174273371696472, "logits/chosen": 0.9225034713745117, "logits/rejected": 0.952918291091919, "logps/chosen": -1.779714822769165, "logps/rejected": -4.00667667388916, "loss": 0.6146, "nll_loss": 0.5828558802604675, "rewards/accuracies": 0.875, "rewards/chosen": -0.1779714822769165, "rewards/margins": 0.2226961851119995, "rewards/rejected": -0.4006677269935608, "step": 4679 }, { "epoch": 12.813141683778234, "grad_norm": 5.873549461364746, "learning_rate": 3.591780821917808e-07, "log_odds_chosen": 1.937849760055542, "log_odds_ratio": -0.45738354325294495, "logits/chosen": 0.7601158618927002, "logits/rejected": 0.8158864974975586, "logps/chosen": -2.676450490951538, "logps/rejected": -4.5652756690979, "loss": 0.7187, "nll_loss": 0.6730090379714966, "rewards/accuracies": 0.625, "rewards/chosen": -0.26764506101608276, "rewards/margins": 0.18888252973556519, "rewards/rejected": -0.45652759075164795, "step": 4680 }, { "epoch": 12.815879534565367, "grad_norm": 4.576785087585449, "learning_rate": 3.5904109589041095e-07, "log_odds_chosen": 1.6881194114685059, "log_odds_ratio": -0.2626100778579712, "logits/chosen": 0.8698606491088867, "logits/rejected": 0.9004789590835571, "logps/chosen": -1.9760181903839111, "logps/rejected": -3.5305190086364746, "loss": 0.6234, "nll_loss": 0.5971295237541199, "rewards/accuracies": 1.0, "rewards/chosen": -0.1976018249988556, "rewards/margins": 0.15545004606246948, "rewards/rejected": -0.3530518710613251, "step": 4681 }, { "epoch": 12.818617385352498, "grad_norm": 5.356914043426514, "learning_rate": 3.589041095890411e-07, "log_odds_chosen": 3.0806937217712402, "log_odds_ratio": -0.14948725700378418, "logits/chosen": 0.975601077079773, "logits/rejected": 0.9048988223075867, "logps/chosen": -2.3516807556152344, "logps/rejected": -5.308925628662109, "loss": 0.762, "nll_loss": 0.7470822930335999, "rewards/accuracies": 1.0, "rewards/chosen": -0.23516808450222015, "rewards/margins": 0.2957245111465454, "rewards/rejected": -0.5308926105499268, "step": 4682 }, { "epoch": 12.821355236139631, "grad_norm": 5.203782081604004, "learning_rate": 3.587671232876712e-07, "log_odds_chosen": 1.020981788635254, "log_odds_ratio": -0.4194971024990082, "logits/chosen": 0.8986291289329529, "logits/rejected": 0.944861650466919, "logps/chosen": -2.3892030715942383, "logps/rejected": -3.3270821571350098, "loss": 0.7468, "nll_loss": 0.7048486471176147, "rewards/accuracies": 0.75, "rewards/chosen": -0.23892030119895935, "rewards/margins": 0.09378794580698013, "rewards/rejected": -0.3327082395553589, "step": 4683 }, { "epoch": 12.824093086926762, "grad_norm": 5.500260829925537, "learning_rate": 3.5863013698630136e-07, "log_odds_chosen": 3.099395751953125, "log_odds_ratio": -0.23030515015125275, "logits/chosen": 0.818463921546936, "logits/rejected": 0.7856523394584656, "logps/chosen": -1.9161932468414307, "logps/rejected": -4.817028045654297, "loss": 0.6811, "nll_loss": 0.6580213904380798, "rewards/accuracies": 0.875, "rewards/chosen": -0.19161933660507202, "rewards/margins": 0.29008349776268005, "rewards/rejected": -0.4817028045654297, "step": 4684 }, { "epoch": 12.826830937713895, "grad_norm": 4.9958720207214355, "learning_rate": 3.5849315068493146e-07, "log_odds_chosen": 2.023216724395752, "log_odds_ratio": -0.2084578573703766, "logits/chosen": 0.8075916171073914, "logits/rejected": 0.6602035164833069, "logps/chosen": -1.8493425846099854, "logps/rejected": -3.7073636054992676, "loss": 0.7847, "nll_loss": 0.7638448476791382, "rewards/accuracies": 1.0, "rewards/chosen": -0.18493427336215973, "rewards/margins": 0.18580207228660583, "rewards/rejected": -0.37073636054992676, "step": 4685 }, { "epoch": 12.829568788501026, "grad_norm": 5.3546552658081055, "learning_rate": 3.5835616438356166e-07, "log_odds_chosen": 1.740594506263733, "log_odds_ratio": -0.23167866468429565, "logits/chosen": 0.7147006392478943, "logits/rejected": 0.760977029800415, "logps/chosen": -2.4057888984680176, "logps/rejected": -4.026020050048828, "loss": 0.6505, "nll_loss": 0.6273760795593262, "rewards/accuracies": 1.0, "rewards/chosen": -0.24057888984680176, "rewards/margins": 0.16202311217784882, "rewards/rejected": -0.40260201692581177, "step": 4686 }, { "epoch": 12.83230663928816, "grad_norm": 6.048254489898682, "learning_rate": 3.5821917808219176e-07, "log_odds_chosen": 1.1436145305633545, "log_odds_ratio": -0.5600041151046753, "logits/chosen": 0.7149852514266968, "logits/rejected": 0.7084896564483643, "logps/chosen": -2.3347253799438477, "logps/rejected": -3.362126588821411, "loss": 0.6356, "nll_loss": 0.5795818567276001, "rewards/accuracies": 0.75, "rewards/chosen": -0.2334725558757782, "rewards/margins": 0.1027401015162468, "rewards/rejected": -0.3362126648426056, "step": 4687 }, { "epoch": 12.83504449007529, "grad_norm": 7.095000743865967, "learning_rate": 3.5808219178082186e-07, "log_odds_chosen": 1.9488282203674316, "log_odds_ratio": -0.2650798559188843, "logits/chosen": 1.1711223125457764, "logits/rejected": 1.1326723098754883, "logps/chosen": -2.557797908782959, "logps/rejected": -4.41952657699585, "loss": 0.6513, "nll_loss": 0.6247676014900208, "rewards/accuracies": 1.0, "rewards/chosen": -0.25577977299690247, "rewards/margins": 0.18617290258407593, "rewards/rejected": -0.4419527053833008, "step": 4688 }, { "epoch": 12.837782340862423, "grad_norm": 7.586130142211914, "learning_rate": 3.5794520547945206e-07, "log_odds_chosen": 4.973309516906738, "log_odds_ratio": -0.18710847198963165, "logits/chosen": 1.0376499891281128, "logits/rejected": 1.0884262323379517, "logps/chosen": -2.844156265258789, "logps/rejected": -7.692948341369629, "loss": 0.7605, "nll_loss": 0.7418311834335327, "rewards/accuracies": 0.875, "rewards/chosen": -0.284415602684021, "rewards/margins": 0.4848792552947998, "rewards/rejected": -0.7692948579788208, "step": 4689 }, { "epoch": 12.840520191649555, "grad_norm": 7.625100612640381, "learning_rate": 3.5780821917808216e-07, "log_odds_chosen": 2.244339942932129, "log_odds_ratio": -0.25009432435035706, "logits/chosen": 1.0411202907562256, "logits/rejected": 0.975200355052948, "logps/chosen": -2.052309513092041, "logps/rejected": -4.1256256103515625, "loss": 0.7517, "nll_loss": 0.7266767621040344, "rewards/accuracies": 0.875, "rewards/chosen": -0.2052309662103653, "rewards/margins": 0.2073315978050232, "rewards/rejected": -0.41256260871887207, "step": 4690 }, { "epoch": 12.843258042436688, "grad_norm": 4.54398250579834, "learning_rate": 3.576712328767123e-07, "log_odds_chosen": 2.471653938293457, "log_odds_ratio": -0.23596295714378357, "logits/chosen": 0.72361159324646, "logits/rejected": 0.8313113451004028, "logps/chosen": -1.9625988006591797, "logps/rejected": -4.293508052825928, "loss": 0.7243, "nll_loss": 0.7006716132164001, "rewards/accuracies": 0.875, "rewards/chosen": -0.19625988602638245, "rewards/margins": 0.23309093713760376, "rewards/rejected": -0.4293507933616638, "step": 4691 }, { "epoch": 12.845995893223819, "grad_norm": 6.951075553894043, "learning_rate": 3.575342465753424e-07, "log_odds_chosen": 3.8749849796295166, "log_odds_ratio": -0.17360341548919678, "logits/chosen": 1.0894134044647217, "logits/rejected": 1.1611706018447876, "logps/chosen": -2.243781566619873, "logps/rejected": -5.93444299697876, "loss": 0.8646, "nll_loss": 0.8471924662590027, "rewards/accuracies": 0.875, "rewards/chosen": -0.22437816858291626, "rewards/margins": 0.36906614899635315, "rewards/rejected": -0.593444287776947, "step": 4692 }, { "epoch": 12.848733744010952, "grad_norm": 7.0046586990356445, "learning_rate": 3.573972602739726e-07, "log_odds_chosen": 1.6815452575683594, "log_odds_ratio": -0.33275726437568665, "logits/chosen": 0.9828104972839355, "logits/rejected": 0.917289137840271, "logps/chosen": -2.0841760635375977, "logps/rejected": -3.6299848556518555, "loss": 0.6998, "nll_loss": 0.6665661334991455, "rewards/accuracies": 0.875, "rewards/chosen": -0.2084175944328308, "rewards/margins": 0.15458089113235474, "rewards/rejected": -0.36299848556518555, "step": 4693 }, { "epoch": 12.851471594798083, "grad_norm": 8.274375915527344, "learning_rate": 3.572602739726027e-07, "log_odds_chosen": 1.4120392799377441, "log_odds_ratio": -0.652024507522583, "logits/chosen": 0.9220988750457764, "logits/rejected": 0.827600359916687, "logps/chosen": -2.60986328125, "logps/rejected": -3.8993053436279297, "loss": 0.7675, "nll_loss": 0.7023391127586365, "rewards/accuracies": 0.625, "rewards/chosen": -0.260986328125, "rewards/margins": 0.12894420325756073, "rewards/rejected": -0.3899305462837219, "step": 4694 }, { "epoch": 12.854209445585216, "grad_norm": 5.897411823272705, "learning_rate": 3.571232876712328e-07, "log_odds_chosen": 3.15855073928833, "log_odds_ratio": -0.25793832540512085, "logits/chosen": 0.8939045667648315, "logits/rejected": 0.9784754514694214, "logps/chosen": -2.1902213096618652, "logps/rejected": -5.203551769256592, "loss": 0.6903, "nll_loss": 0.6645498871803284, "rewards/accuracies": 0.875, "rewards/chosen": -0.21902213990688324, "rewards/margins": 0.30133306980133057, "rewards/rejected": -0.520355224609375, "step": 4695 }, { "epoch": 12.856947296372347, "grad_norm": 5.899514675140381, "learning_rate": 3.56986301369863e-07, "log_odds_chosen": 2.4057724475860596, "log_odds_ratio": -0.2029980719089508, "logits/chosen": 0.7830241918563843, "logits/rejected": 0.8118159174919128, "logps/chosen": -2.415647506713867, "logps/rejected": -4.723862171173096, "loss": 0.7314, "nll_loss": 0.7110508680343628, "rewards/accuracies": 1.0, "rewards/chosen": -0.2415647655725479, "rewards/margins": 0.23082147538661957, "rewards/rejected": -0.4723862409591675, "step": 4696 }, { "epoch": 12.85968514715948, "grad_norm": 6.024284362792969, "learning_rate": 3.568493150684931e-07, "log_odds_chosen": 2.5864949226379395, "log_odds_ratio": -0.2890262007713318, "logits/chosen": 0.737673282623291, "logits/rejected": 0.7388815879821777, "logps/chosen": -1.6907744407653809, "logps/rejected": -4.140160083770752, "loss": 0.6359, "nll_loss": 0.606968879699707, "rewards/accuracies": 0.875, "rewards/chosen": -0.16907745599746704, "rewards/margins": 0.24493855237960815, "rewards/rejected": -0.4140159785747528, "step": 4697 }, { "epoch": 12.862422997946611, "grad_norm": 6.247210502624512, "learning_rate": 3.567123287671233e-07, "log_odds_chosen": 1.0435154438018799, "log_odds_ratio": -0.4294314384460449, "logits/chosen": 0.6832151412963867, "logits/rejected": 0.6721988320350647, "logps/chosen": -2.355196714401245, "logps/rejected": -3.315443277359009, "loss": 0.7118, "nll_loss": 0.6688531041145325, "rewards/accuracies": 0.875, "rewards/chosen": -0.235519677400589, "rewards/margins": 0.09602466225624084, "rewards/rejected": -0.33154433965682983, "step": 4698 }, { "epoch": 12.865160848733744, "grad_norm": 5.243535041809082, "learning_rate": 3.5657534246575337e-07, "log_odds_chosen": 2.2677371501922607, "log_odds_ratio": -0.38203197717666626, "logits/chosen": 0.691663384437561, "logits/rejected": 0.7025864720344543, "logps/chosen": -2.129481554031372, "logps/rejected": -4.349699974060059, "loss": 0.6474, "nll_loss": 0.6091915369033813, "rewards/accuracies": 0.875, "rewards/chosen": -0.21294817328453064, "rewards/margins": 0.22202184796333313, "rewards/rejected": -0.43497002124786377, "step": 4699 }, { "epoch": 12.867898699520875, "grad_norm": 5.880860328674316, "learning_rate": 3.564383561643836e-07, "log_odds_chosen": 2.3635940551757812, "log_odds_ratio": -0.20878081023693085, "logits/chosen": 0.7776751518249512, "logits/rejected": 0.7919594049453735, "logps/chosen": -2.368021249771118, "logps/rejected": -4.631947994232178, "loss": 0.786, "nll_loss": 0.7651040554046631, "rewards/accuracies": 0.875, "rewards/chosen": -0.2368021309375763, "rewards/margins": 0.22639265656471252, "rewards/rejected": -0.4631947875022888, "step": 4700 }, { "epoch": 12.870636550308008, "grad_norm": 5.613768100738525, "learning_rate": 3.563013698630137e-07, "log_odds_chosen": 2.145045757293701, "log_odds_ratio": -0.2508203387260437, "logits/chosen": 0.8999820351600647, "logits/rejected": 0.9430435299873352, "logps/chosen": -2.311734914779663, "logps/rejected": -4.335033416748047, "loss": 0.6569, "nll_loss": 0.6317914724349976, "rewards/accuracies": 0.875, "rewards/chosen": -0.23117350041866302, "rewards/margins": 0.2023298591375351, "rewards/rejected": -0.43350332975387573, "step": 4701 }, { "epoch": 12.873374401095141, "grad_norm": 5.091819763183594, "learning_rate": 3.561643835616438e-07, "log_odds_chosen": 1.6777650117874146, "log_odds_ratio": -0.24074675142765045, "logits/chosen": 0.7138086557388306, "logits/rejected": 0.684234619140625, "logps/chosen": -1.9781184196472168, "logps/rejected": -3.5082039833068848, "loss": 0.7254, "nll_loss": 0.7013176083564758, "rewards/accuracies": 1.0, "rewards/chosen": -0.19781184196472168, "rewards/margins": 0.1530085802078247, "rewards/rejected": -0.350820392370224, "step": 4702 }, { "epoch": 12.876112251882272, "grad_norm": 4.809392929077148, "learning_rate": 3.56027397260274e-07, "log_odds_chosen": 2.2235946655273438, "log_odds_ratio": -0.20786939561367035, "logits/chosen": 0.7106760740280151, "logits/rejected": 0.7441940307617188, "logps/chosen": -2.2250871658325195, "logps/rejected": -4.306799411773682, "loss": 0.7124, "nll_loss": 0.6915856003761292, "rewards/accuracies": 1.0, "rewards/chosen": -0.22250869870185852, "rewards/margins": 0.20817124843597412, "rewards/rejected": -0.43067997694015503, "step": 4703 }, { "epoch": 12.878850102669405, "grad_norm": 5.973328590393066, "learning_rate": 3.558904109589041e-07, "log_odds_chosen": 0.8557954430580139, "log_odds_ratio": -0.5424292087554932, "logits/chosen": 0.8595609664916992, "logits/rejected": 0.8483288288116455, "logps/chosen": -1.8220937252044678, "logps/rejected": -2.5328235626220703, "loss": 0.7032, "nll_loss": 0.6489853858947754, "rewards/accuracies": 0.75, "rewards/chosen": -0.18220938742160797, "rewards/margins": 0.07107295840978622, "rewards/rejected": -0.2532823383808136, "step": 4704 }, { "epoch": 12.881587953456537, "grad_norm": 6.196476459503174, "learning_rate": 3.5575342465753423e-07, "log_odds_chosen": 2.1011674404144287, "log_odds_ratio": -0.23746272921562195, "logits/chosen": 1.0010782480239868, "logits/rejected": 1.0819445848464966, "logps/chosen": -2.5838773250579834, "logps/rejected": -4.602329254150391, "loss": 0.6033, "nll_loss": 0.5795798301696777, "rewards/accuracies": 1.0, "rewards/chosen": -0.2583877742290497, "rewards/margins": 0.2018451988697052, "rewards/rejected": -0.4602329432964325, "step": 4705 }, { "epoch": 12.88432580424367, "grad_norm": 4.369016170501709, "learning_rate": 3.556164383561644e-07, "log_odds_chosen": 2.521120071411133, "log_odds_ratio": -0.23485049605369568, "logits/chosen": 0.5871208906173706, "logits/rejected": 0.6603841781616211, "logps/chosen": -1.867084264755249, "logps/rejected": -4.219812393188477, "loss": 0.7375, "nll_loss": 0.7140254378318787, "rewards/accuracies": 0.875, "rewards/chosen": -0.18670843541622162, "rewards/margins": 0.2352728247642517, "rewards/rejected": -0.4219812750816345, "step": 4706 }, { "epoch": 12.8870636550308, "grad_norm": 7.141397953033447, "learning_rate": 3.5547945205479454e-07, "log_odds_chosen": 1.880784273147583, "log_odds_ratio": -0.3500117361545563, "logits/chosen": 0.9053062200546265, "logits/rejected": 0.9122742414474487, "logps/chosen": -3.2036356925964355, "logps/rejected": -4.9961395263671875, "loss": 0.8246, "nll_loss": 0.7895797491073608, "rewards/accuracies": 0.875, "rewards/chosen": -0.3203635811805725, "rewards/margins": 0.17925035953521729, "rewards/rejected": -0.4996139407157898, "step": 4707 }, { "epoch": 12.889801505817934, "grad_norm": 5.832777976989746, "learning_rate": 3.5534246575342464e-07, "log_odds_chosen": 0.41734546422958374, "log_odds_ratio": -0.5525590181350708, "logits/chosen": 0.9949736595153809, "logits/rejected": 0.960639476776123, "logps/chosen": -1.775496006011963, "logps/rejected": -2.118387222290039, "loss": 0.6699, "nll_loss": 0.6146513819694519, "rewards/accuracies": 0.75, "rewards/chosen": -0.17754961550235748, "rewards/margins": 0.034289129078388214, "rewards/rejected": -0.2118387520313263, "step": 4708 }, { "epoch": 12.892539356605065, "grad_norm": 5.420888900756836, "learning_rate": 3.5520547945205473e-07, "log_odds_chosen": 3.0193541049957275, "log_odds_ratio": -0.46854183077812195, "logits/chosen": 0.755090594291687, "logits/rejected": 0.6802979707717896, "logps/chosen": -2.6882803440093994, "logps/rejected": -5.6233391761779785, "loss": 0.8206, "nll_loss": 0.7737897038459778, "rewards/accuracies": 0.875, "rewards/chosen": -0.26882803440093994, "rewards/margins": 0.2935059070587158, "rewards/rejected": -0.5623339414596558, "step": 4709 }, { "epoch": 12.895277207392198, "grad_norm": 5.470292091369629, "learning_rate": 3.5506849315068494e-07, "log_odds_chosen": 2.0705275535583496, "log_odds_ratio": -0.2785457968711853, "logits/chosen": 0.51889967918396, "logits/rejected": 0.48359811305999756, "logps/chosen": -1.7731157541275024, "logps/rejected": -3.704191207885742, "loss": 0.6997, "nll_loss": 0.671838641166687, "rewards/accuracies": 0.875, "rewards/chosen": -0.17731156945228577, "rewards/margins": 0.19310754537582397, "rewards/rejected": -0.37041914463043213, "step": 4710 }, { "epoch": 12.898015058179329, "grad_norm": 6.206537246704102, "learning_rate": 3.5493150684931504e-07, "log_odds_chosen": 2.1625404357910156, "log_odds_ratio": -0.3002643883228302, "logits/chosen": 0.7564374208450317, "logits/rejected": 0.7889404296875, "logps/chosen": -2.0480268001556396, "logps/rejected": -4.089622497558594, "loss": 0.6658, "nll_loss": 0.6358091831207275, "rewards/accuracies": 1.0, "rewards/chosen": -0.20480269193649292, "rewards/margins": 0.20415958762168884, "rewards/rejected": -0.4089622497558594, "step": 4711 }, { "epoch": 12.900752908966462, "grad_norm": 6.378556251525879, "learning_rate": 3.547945205479452e-07, "log_odds_chosen": 1.3541017770767212, "log_odds_ratio": -0.5346561074256897, "logits/chosen": 0.7625205516815186, "logits/rejected": 0.7956035733222961, "logps/chosen": -2.738680362701416, "logps/rejected": -3.9761343002319336, "loss": 0.7666, "nll_loss": 0.7131364941596985, "rewards/accuracies": 0.625, "rewards/chosen": -0.27386799454689026, "rewards/margins": 0.12374541163444519, "rewards/rejected": -0.39761340618133545, "step": 4712 }, { "epoch": 12.903490759753593, "grad_norm": 5.799249172210693, "learning_rate": 3.5465753424657534e-07, "log_odds_chosen": 1.7973167896270752, "log_odds_ratio": -0.22742429375648499, "logits/chosen": 0.8431229591369629, "logits/rejected": 0.8303098678588867, "logps/chosen": -2.1776540279388428, "logps/rejected": -3.8082799911499023, "loss": 0.7196, "nll_loss": 0.6969054937362671, "rewards/accuracies": 1.0, "rewards/chosen": -0.21776539087295532, "rewards/margins": 0.16306260228157043, "rewards/rejected": -0.38082802295684814, "step": 4713 }, { "epoch": 12.906228610540726, "grad_norm": 5.872676372528076, "learning_rate": 3.545205479452055e-07, "log_odds_chosen": 2.553513526916504, "log_odds_ratio": -0.23401296138763428, "logits/chosen": 0.6822950839996338, "logits/rejected": 0.6579446792602539, "logps/chosen": -2.001485824584961, "logps/rejected": -4.406805992126465, "loss": 0.6689, "nll_loss": 0.6454863548278809, "rewards/accuracies": 1.0, "rewards/chosen": -0.2001485973596573, "rewards/margins": 0.2405320256948471, "rewards/rejected": -0.4406806230545044, "step": 4714 }, { "epoch": 12.908966461327857, "grad_norm": 5.776073932647705, "learning_rate": 3.543835616438356e-07, "log_odds_chosen": 2.4818077087402344, "log_odds_ratio": -0.2715957760810852, "logits/chosen": 0.7759077548980713, "logits/rejected": 0.8118332624435425, "logps/chosen": -2.279712200164795, "logps/rejected": -4.683453559875488, "loss": 0.7746, "nll_loss": 0.7474126815795898, "rewards/accuracies": 1.0, "rewards/chosen": -0.22797125577926636, "rewards/margins": 0.24037408828735352, "rewards/rejected": -0.4683453142642975, "step": 4715 }, { "epoch": 12.91170431211499, "grad_norm": 7.392397403717041, "learning_rate": 3.542465753424657e-07, "log_odds_chosen": 2.8975017070770264, "log_odds_ratio": -0.3541111648082733, "logits/chosen": 0.8002023100852966, "logits/rejected": 0.783108651638031, "logps/chosen": -2.411306858062744, "logps/rejected": -5.128859519958496, "loss": 0.8205, "nll_loss": 0.7850411534309387, "rewards/accuracies": 0.875, "rewards/chosen": -0.24113067984580994, "rewards/margins": 0.2717552185058594, "rewards/rejected": -0.5128859281539917, "step": 4716 }, { "epoch": 12.914442162902121, "grad_norm": 6.167550086975098, "learning_rate": 3.541095890410959e-07, "log_odds_chosen": 1.9406064748764038, "log_odds_ratio": -0.28643202781677246, "logits/chosen": 0.6553363800048828, "logits/rejected": 0.6105498671531677, "logps/chosen": -1.7907068729400635, "logps/rejected": -3.5781478881835938, "loss": 0.6782, "nll_loss": 0.6496019959449768, "rewards/accuracies": 1.0, "rewards/chosen": -0.17907068133354187, "rewards/margins": 0.17874407768249512, "rewards/rejected": -0.357814759016037, "step": 4717 }, { "epoch": 12.917180013689254, "grad_norm": 5.660332202911377, "learning_rate": 3.53972602739726e-07, "log_odds_chosen": 1.329837441444397, "log_odds_ratio": -0.41202205419540405, "logits/chosen": 1.0129215717315674, "logits/rejected": 0.9841030240058899, "logps/chosen": -2.122344493865967, "logps/rejected": -3.339073657989502, "loss": 0.6962, "nll_loss": 0.6549660563468933, "rewards/accuracies": 0.75, "rewards/chosen": -0.2122344672679901, "rewards/margins": 0.12167289853096008, "rewards/rejected": -0.3339073657989502, "step": 4718 }, { "epoch": 12.919917864476385, "grad_norm": 5.236647605895996, "learning_rate": 3.5383561643835615e-07, "log_odds_chosen": 2.000558376312256, "log_odds_ratio": -0.3922663927078247, "logits/chosen": 0.6427484154701233, "logits/rejected": 0.6536290645599365, "logps/chosen": -2.549548625946045, "logps/rejected": -4.523217678070068, "loss": 0.7174, "nll_loss": 0.6781696677207947, "rewards/accuracies": 0.75, "rewards/chosen": -0.25495484471321106, "rewards/margins": 0.19736690819263458, "rewards/rejected": -0.45232176780700684, "step": 4719 }, { "epoch": 12.922655715263518, "grad_norm": 6.227966785430908, "learning_rate": 3.536986301369863e-07, "log_odds_chosen": 1.7058968544006348, "log_odds_ratio": -0.38449835777282715, "logits/chosen": 0.6793015003204346, "logits/rejected": 0.721997857093811, "logps/chosen": -2.72491455078125, "logps/rejected": -4.36185359954834, "loss": 0.7173, "nll_loss": 0.6788792014122009, "rewards/accuracies": 0.875, "rewards/chosen": -0.272491455078125, "rewards/margins": 0.16369391977787018, "rewards/rejected": -0.436185359954834, "step": 4720 }, { "epoch": 12.92539356605065, "grad_norm": 6.07023811340332, "learning_rate": 3.5356164383561645e-07, "log_odds_chosen": 2.553433895111084, "log_odds_ratio": -0.1888996958732605, "logits/chosen": 0.9847105741500854, "logits/rejected": 1.015528678894043, "logps/chosen": -2.2221503257751465, "logps/rejected": -4.6522321701049805, "loss": 0.692, "nll_loss": 0.673130989074707, "rewards/accuracies": 1.0, "rewards/chosen": -0.22221502661705017, "rewards/margins": 0.24300819635391235, "rewards/rejected": -0.4652232229709625, "step": 4721 }, { "epoch": 12.928131416837783, "grad_norm": 5.890531539916992, "learning_rate": 3.5342465753424655e-07, "log_odds_chosen": 1.7894177436828613, "log_odds_ratio": -0.24546799063682556, "logits/chosen": 0.6771570444107056, "logits/rejected": 0.7199637293815613, "logps/chosen": -1.8975210189819336, "logps/rejected": -3.5695619583129883, "loss": 0.6489, "nll_loss": 0.6243263483047485, "rewards/accuracies": 0.875, "rewards/chosen": -0.18975210189819336, "rewards/margins": 0.1672040969133377, "rewards/rejected": -0.35695621371269226, "step": 4722 }, { "epoch": 12.930869267624914, "grad_norm": 6.498327255249023, "learning_rate": 3.5328767123287665e-07, "log_odds_chosen": 1.8486171960830688, "log_odds_ratio": -0.29130467772483826, "logits/chosen": 0.8997821807861328, "logits/rejected": 0.9815422296524048, "logps/chosen": -3.187244415283203, "logps/rejected": -4.997434139251709, "loss": 0.7354, "nll_loss": 0.7062416672706604, "rewards/accuracies": 0.875, "rewards/chosen": -0.31872445344924927, "rewards/margins": 0.18101897835731506, "rewards/rejected": -0.4997434616088867, "step": 4723 }, { "epoch": 12.933607118412047, "grad_norm": 9.413350105285645, "learning_rate": 3.5315068493150686e-07, "log_odds_chosen": 1.0881186723709106, "log_odds_ratio": -0.708277702331543, "logits/chosen": 0.8093849420547485, "logits/rejected": 0.8511077761650085, "logps/chosen": -3.311016082763672, "logps/rejected": -4.339135646820068, "loss": 0.9334, "nll_loss": 0.8625998497009277, "rewards/accuracies": 0.75, "rewards/chosen": -0.3311016261577606, "rewards/margins": 0.10281192511320114, "rewards/rejected": -0.43391358852386475, "step": 4724 }, { "epoch": 12.936344969199178, "grad_norm": 5.512854099273682, "learning_rate": 3.5301369863013696e-07, "log_odds_chosen": 1.276789665222168, "log_odds_ratio": -0.4612843990325928, "logits/chosen": 0.8361226916313171, "logits/rejected": 0.8709147572517395, "logps/chosen": -1.8117775917053223, "logps/rejected": -3.0143561363220215, "loss": 0.5802, "nll_loss": 0.5341147184371948, "rewards/accuracies": 0.75, "rewards/chosen": -0.1811777502298355, "rewards/margins": 0.12025785446166992, "rewards/rejected": -0.30143558979034424, "step": 4725 }, { "epoch": 12.93908281998631, "grad_norm": 5.203879356384277, "learning_rate": 3.528767123287671e-07, "log_odds_chosen": 3.290834426879883, "log_odds_ratio": -0.12209690362215042, "logits/chosen": 0.7305765151977539, "logits/rejected": 0.7573512196540833, "logps/chosen": -2.531080722808838, "logps/rejected": -5.724771976470947, "loss": 0.6558, "nll_loss": 0.6435486674308777, "rewards/accuracies": 1.0, "rewards/chosen": -0.25310805439949036, "rewards/margins": 0.31936919689178467, "rewards/rejected": -0.5724772214889526, "step": 4726 }, { "epoch": 12.941820670773442, "grad_norm": 5.152643203735352, "learning_rate": 3.5273972602739726e-07, "log_odds_chosen": 2.17849063873291, "log_odds_ratio": -0.23979458212852478, "logits/chosen": 0.9538794755935669, "logits/rejected": 0.995754599571228, "logps/chosen": -1.976701021194458, "logps/rejected": -4.0548601150512695, "loss": 0.64, "nll_loss": 0.6160035729408264, "rewards/accuracies": 0.875, "rewards/chosen": -0.197670117020607, "rewards/margins": 0.20781588554382324, "rewards/rejected": -0.40548598766326904, "step": 4727 }, { "epoch": 12.944558521560575, "grad_norm": 7.057816505432129, "learning_rate": 3.526027397260274e-07, "log_odds_chosen": 2.354708671569824, "log_odds_ratio": -0.395557701587677, "logits/chosen": 0.8091785907745361, "logits/rejected": 0.7793042659759521, "logps/chosen": -2.779407024383545, "logps/rejected": -5.029367923736572, "loss": 0.8183, "nll_loss": 0.7787068486213684, "rewards/accuracies": 0.75, "rewards/chosen": -0.2779407203197479, "rewards/margins": 0.22499607503414154, "rewards/rejected": -0.5029367804527283, "step": 4728 }, { "epoch": 12.947296372347708, "grad_norm": 4.662196159362793, "learning_rate": 3.524657534246575e-07, "log_odds_chosen": 2.3541760444641113, "log_odds_ratio": -0.14884594082832336, "logits/chosen": 0.917339563369751, "logits/rejected": 0.934407114982605, "logps/chosen": -2.042297840118408, "logps/rejected": -4.2245635986328125, "loss": 0.6109, "nll_loss": 0.5960507392883301, "rewards/accuracies": 1.0, "rewards/chosen": -0.20422980189323425, "rewards/margins": 0.21822655200958252, "rewards/rejected": -0.42245638370513916, "step": 4729 }, { "epoch": 12.950034223134839, "grad_norm": 5.796079158782959, "learning_rate": 3.523287671232876e-07, "log_odds_chosen": 3.1416895389556885, "log_odds_ratio": -0.12215238809585571, "logits/chosen": 0.7783542275428772, "logits/rejected": 0.8049976229667664, "logps/chosen": -2.576061964035034, "logps/rejected": -5.5690741539001465, "loss": 0.7285, "nll_loss": 0.7162367701530457, "rewards/accuracies": 1.0, "rewards/chosen": -0.2576062083244324, "rewards/margins": 0.2993012070655823, "rewards/rejected": -0.5569074153900146, "step": 4730 }, { "epoch": 12.952772073921972, "grad_norm": 6.216515064239502, "learning_rate": 3.521917808219178e-07, "log_odds_chosen": 1.265733242034912, "log_odds_ratio": -0.3391752541065216, "logits/chosen": 0.873935878276825, "logits/rejected": 0.7673786282539368, "logps/chosen": -2.927069664001465, "logps/rejected": -4.092006206512451, "loss": 0.7453, "nll_loss": 0.7114283442497253, "rewards/accuracies": 0.875, "rewards/chosen": -0.2927069664001465, "rewards/margins": 0.11649364978075027, "rewards/rejected": -0.40920060873031616, "step": 4731 }, { "epoch": 12.955509924709103, "grad_norm": 5.827545642852783, "learning_rate": 3.520547945205479e-07, "log_odds_chosen": 2.5647785663604736, "log_odds_ratio": -0.2460433542728424, "logits/chosen": 0.7511253952980042, "logits/rejected": 0.7659875154495239, "logps/chosen": -2.363758087158203, "logps/rejected": -4.830045223236084, "loss": 0.7612, "nll_loss": 0.7365546226501465, "rewards/accuracies": 1.0, "rewards/chosen": -0.2363758087158203, "rewards/margins": 0.24662870168685913, "rewards/rejected": -0.48300454020500183, "step": 4732 }, { "epoch": 12.958247775496236, "grad_norm": 8.084256172180176, "learning_rate": 3.5191780821917807e-07, "log_odds_chosen": 3.0202550888061523, "log_odds_ratio": -0.2663869261741638, "logits/chosen": 0.8940718173980713, "logits/rejected": 0.9791666269302368, "logps/chosen": -3.0819928646087646, "logps/rejected": -6.0146613121032715, "loss": 0.7959, "nll_loss": 0.7692182064056396, "rewards/accuracies": 0.75, "rewards/chosen": -0.30819928646087646, "rewards/margins": 0.29326683282852173, "rewards/rejected": -0.601466178894043, "step": 4733 }, { "epoch": 12.960985626283367, "grad_norm": 5.254901885986328, "learning_rate": 3.517808219178082e-07, "log_odds_chosen": 1.5639058351516724, "log_odds_ratio": -0.3718734383583069, "logits/chosen": 0.7865937352180481, "logits/rejected": 0.8615140914916992, "logps/chosen": -1.9616084098815918, "logps/rejected": -3.396883249282837, "loss": 0.6523, "nll_loss": 0.6151247024536133, "rewards/accuracies": 0.875, "rewards/chosen": -0.19616085290908813, "rewards/margins": 0.14352746307849884, "rewards/rejected": -0.3396883010864258, "step": 4734 }, { "epoch": 12.9637234770705, "grad_norm": 5.900447368621826, "learning_rate": 3.5164383561643837e-07, "log_odds_chosen": 1.3087060451507568, "log_odds_ratio": -0.4276134669780731, "logits/chosen": 0.7250852584838867, "logits/rejected": 0.6541332006454468, "logps/chosen": -1.73256254196167, "logps/rejected": -2.9483132362365723, "loss": 0.6514, "nll_loss": 0.6086146831512451, "rewards/accuracies": 0.75, "rewards/chosen": -0.17325624823570251, "rewards/margins": 0.12157505005598068, "rewards/rejected": -0.2948313057422638, "step": 4735 }, { "epoch": 12.966461327857632, "grad_norm": 5.51589822769165, "learning_rate": 3.5150684931506847e-07, "log_odds_chosen": 1.4070727825164795, "log_odds_ratio": -0.32978829741477966, "logits/chosen": 0.6674051880836487, "logits/rejected": 0.6416312456130981, "logps/chosen": -2.1862833499908447, "logps/rejected": -3.459804058074951, "loss": 0.7919, "nll_loss": 0.7589490413665771, "rewards/accuracies": 1.0, "rewards/chosen": -0.21862834692001343, "rewards/margins": 0.1273520588874817, "rewards/rejected": -0.3459804058074951, "step": 4736 }, { "epoch": 12.969199178644764, "grad_norm": 6.665643215179443, "learning_rate": 3.513698630136986e-07, "log_odds_chosen": 3.4629297256469727, "log_odds_ratio": -0.06970614194869995, "logits/chosen": 1.2014238834381104, "logits/rejected": 1.2745404243469238, "logps/chosen": -1.8410956859588623, "logps/rejected": -5.0486016273498535, "loss": 0.5642, "nll_loss": 0.5572168827056885, "rewards/accuracies": 1.0, "rewards/chosen": -0.18410956859588623, "rewards/margins": 0.3207505941390991, "rewards/rejected": -0.5048601627349854, "step": 4737 }, { "epoch": 12.971937029431896, "grad_norm": 6.725864887237549, "learning_rate": 3.512328767123288e-07, "log_odds_chosen": 1.9528629779815674, "log_odds_ratio": -0.2710750997066498, "logits/chosen": 1.065811038017273, "logits/rejected": 1.0286527872085571, "logps/chosen": -1.5082383155822754, "logps/rejected": -3.2939608097076416, "loss": 0.5294, "nll_loss": 0.5023373961448669, "rewards/accuracies": 1.0, "rewards/chosen": -0.15082383155822754, "rewards/margins": 0.17857226729393005, "rewards/rejected": -0.3293960988521576, "step": 4738 }, { "epoch": 12.974674880219029, "grad_norm": 4.1283698081970215, "learning_rate": 3.5109589041095887e-07, "log_odds_chosen": 2.307919979095459, "log_odds_ratio": -0.17876997590065002, "logits/chosen": 0.9712607264518738, "logits/rejected": 1.0219262838363647, "logps/chosen": -1.9614496231079102, "logps/rejected": -4.088614463806152, "loss": 0.5845, "nll_loss": 0.5666005611419678, "rewards/accuracies": 0.875, "rewards/chosen": -0.1961449682712555, "rewards/margins": 0.2127164602279663, "rewards/rejected": -0.4088614583015442, "step": 4739 }, { "epoch": 12.97741273100616, "grad_norm": 5.568068504333496, "learning_rate": 3.50958904109589e-07, "log_odds_chosen": 0.9316509366035461, "log_odds_ratio": -0.3673759400844574, "logits/chosen": 0.9527286887168884, "logits/rejected": 0.9454135894775391, "logps/chosen": -1.4090756177902222, "logps/rejected": -2.1699447631835938, "loss": 0.5385, "nll_loss": 0.5017328262329102, "rewards/accuracies": 0.875, "rewards/chosen": -0.14090757071971893, "rewards/margins": 0.07608690857887268, "rewards/rejected": -0.21699446439743042, "step": 4740 }, { "epoch": 12.980150581793293, "grad_norm": 5.456722736358643, "learning_rate": 3.508219178082192e-07, "log_odds_chosen": 1.730553150177002, "log_odds_ratio": -0.3529737889766693, "logits/chosen": 1.018249750137329, "logits/rejected": 1.0514109134674072, "logps/chosen": -2.269831657409668, "logps/rejected": -3.9138689041137695, "loss": 0.6419, "nll_loss": 0.6066135168075562, "rewards/accuracies": 0.75, "rewards/chosen": -0.2269831895828247, "rewards/margins": 0.16440372169017792, "rewards/rejected": -0.39138689637184143, "step": 4741 }, { "epoch": 12.982888432580424, "grad_norm": 5.652493476867676, "learning_rate": 3.506849315068493e-07, "log_odds_chosen": 2.7716495990753174, "log_odds_ratio": -0.21510638296604156, "logits/chosen": 0.879833459854126, "logits/rejected": 0.9457173347473145, "logps/chosen": -2.3971548080444336, "logps/rejected": -5.095187187194824, "loss": 0.8486, "nll_loss": 0.8271138668060303, "rewards/accuracies": 1.0, "rewards/chosen": -0.23971545696258545, "rewards/margins": 0.2698032557964325, "rewards/rejected": -0.5095187425613403, "step": 4742 }, { "epoch": 12.985626283367557, "grad_norm": 7.904626369476318, "learning_rate": 3.5054794520547943e-07, "log_odds_chosen": 2.4153518676757812, "log_odds_ratio": -0.3459870219230652, "logits/chosen": 0.5469579696655273, "logits/rejected": 0.4954223930835724, "logps/chosen": -2.2557663917541504, "logps/rejected": -4.572159767150879, "loss": 0.8676, "nll_loss": 0.832952618598938, "rewards/accuracies": 0.875, "rewards/chosen": -0.22557665407657623, "rewards/margins": 0.2316393256187439, "rewards/rejected": -0.4572159945964813, "step": 4743 }, { "epoch": 12.988364134154688, "grad_norm": 5.106321334838867, "learning_rate": 3.504109589041096e-07, "log_odds_chosen": 2.8659167289733887, "log_odds_ratio": -0.15045912563800812, "logits/chosen": 0.91698157787323, "logits/rejected": 0.8632373809814453, "logps/chosen": -2.192416191101074, "logps/rejected": -4.9463605880737305, "loss": 0.6228, "nll_loss": 0.6077236533164978, "rewards/accuracies": 1.0, "rewards/chosen": -0.21924161911010742, "rewards/margins": 0.2753944993019104, "rewards/rejected": -0.49463608860969543, "step": 4744 }, { "epoch": 12.991101984941821, "grad_norm": 5.891036033630371, "learning_rate": 3.5027397260273973e-07, "log_odds_chosen": 1.5153483152389526, "log_odds_ratio": -0.29884451627731323, "logits/chosen": 0.9273166060447693, "logits/rejected": 0.8744492530822754, "logps/chosen": -1.274543285369873, "logps/rejected": -2.584409236907959, "loss": 0.5492, "nll_loss": 0.519318163394928, "rewards/accuracies": 1.0, "rewards/chosen": -0.12745434045791626, "rewards/margins": 0.13098658621311188, "rewards/rejected": -0.25844094157218933, "step": 4745 }, { "epoch": 12.993839835728952, "grad_norm": 5.173647880554199, "learning_rate": 3.5013698630136983e-07, "log_odds_chosen": 1.0291540622711182, "log_odds_ratio": -0.35742056369781494, "logits/chosen": 0.943253219127655, "logits/rejected": 0.955661416053772, "logps/chosen": -2.043527364730835, "logps/rejected": -2.9265642166137695, "loss": 0.6117, "nll_loss": 0.5759256482124329, "rewards/accuracies": 0.875, "rewards/chosen": -0.2043527364730835, "rewards/margins": 0.08830370754003525, "rewards/rejected": -0.29265642166137695, "step": 4746 }, { "epoch": 12.996577686516085, "grad_norm": 4.8697991371154785, "learning_rate": 3.5e-07, "log_odds_chosen": 2.7860074043273926, "log_odds_ratio": -0.19714674353599548, "logits/chosen": 0.7522177696228027, "logits/rejected": 0.7376989722251892, "logps/chosen": -1.8145109415054321, "logps/rejected": -4.41687536239624, "loss": 0.6667, "nll_loss": 0.646952211856842, "rewards/accuracies": 1.0, "rewards/chosen": -0.18145109713077545, "rewards/margins": 0.2602364718914032, "rewards/rejected": -0.44168761372566223, "step": 4747 }, { "epoch": 12.999315537303216, "grad_norm": 8.137750625610352, "learning_rate": 3.4986301369863014e-07, "log_odds_chosen": 0.8962283134460449, "log_odds_ratio": -0.5208398103713989, "logits/chosen": 0.927289605140686, "logits/rejected": 0.9301344156265259, "logps/chosen": -2.749803066253662, "logps/rejected": -3.5544707775115967, "loss": 0.6547, "nll_loss": 0.6026365756988525, "rewards/accuracies": 0.875, "rewards/chosen": -0.2749803066253662, "rewards/margins": 0.08046678453683853, "rewards/rejected": -0.35544708371162415, "step": 4748 }, { "epoch": 13.00205338809035, "grad_norm": 5.383620738983154, "learning_rate": 3.4972602739726024e-07, "log_odds_chosen": 3.025848865509033, "log_odds_ratio": -0.1534569263458252, "logits/chosen": 0.781351625919342, "logits/rejected": 0.7529494762420654, "logps/chosen": -2.105782985687256, "logps/rejected": -4.976954460144043, "loss": 0.8103, "nll_loss": 0.794959306716919, "rewards/accuracies": 1.0, "rewards/chosen": -0.2105783224105835, "rewards/margins": 0.2871171236038208, "rewards/rejected": -0.4976954162120819, "step": 4749 }, { "epoch": 13.00479123887748, "grad_norm": 7.990303039550781, "learning_rate": 3.495890410958904e-07, "log_odds_chosen": 1.3937947750091553, "log_odds_ratio": -0.30139702558517456, "logits/chosen": 0.8568620681762695, "logits/rejected": 0.9388848543167114, "logps/chosen": -2.4960741996765137, "logps/rejected": -3.790432929992676, "loss": 0.6408, "nll_loss": 0.6107029914855957, "rewards/accuracies": 1.0, "rewards/chosen": -0.24960742890834808, "rewards/margins": 0.12943583726882935, "rewards/rejected": -0.379043310880661, "step": 4750 }, { "epoch": 13.007529089664613, "grad_norm": 5.480325222015381, "learning_rate": 3.4945205479452054e-07, "log_odds_chosen": 1.6957542896270752, "log_odds_ratio": -0.25604546070098877, "logits/chosen": 0.8067437410354614, "logits/rejected": 0.8298635482788086, "logps/chosen": -2.2282698154449463, "logps/rejected": -3.8435654640197754, "loss": 0.685, "nll_loss": 0.6593976616859436, "rewards/accuracies": 0.875, "rewards/chosen": -0.2228269875049591, "rewards/margins": 0.16152958571910858, "rewards/rejected": -0.3843565583229065, "step": 4751 }, { "epoch": 13.010266940451745, "grad_norm": 6.845708847045898, "learning_rate": 3.493150684931507e-07, "log_odds_chosen": 1.0115219354629517, "log_odds_ratio": -0.35715585947036743, "logits/chosen": 0.8402008414268494, "logits/rejected": 0.8417865037918091, "logps/chosen": -2.335374116897583, "logps/rejected": -3.2798564434051514, "loss": 0.7843, "nll_loss": 0.7485713362693787, "rewards/accuracies": 1.0, "rewards/chosen": -0.23353740572929382, "rewards/margins": 0.09444823861122131, "rewards/rejected": -0.32798564434051514, "step": 4752 }, { "epoch": 13.013004791238878, "grad_norm": 4.9901323318481445, "learning_rate": 3.491780821917808e-07, "log_odds_chosen": 1.9200464487075806, "log_odds_ratio": -0.2942654490470886, "logits/chosen": 0.7208452820777893, "logits/rejected": 0.7414423227310181, "logps/chosen": -1.8733755350112915, "logps/rejected": -3.654423713684082, "loss": 0.5674, "nll_loss": 0.5380168557167053, "rewards/accuracies": 0.875, "rewards/chosen": -0.18733756244182587, "rewards/margins": 0.17810481786727905, "rewards/rejected": -0.3654423654079437, "step": 4753 }, { "epoch": 13.015742642026009, "grad_norm": 5.399178504943848, "learning_rate": 3.4904109589041094e-07, "log_odds_chosen": 2.6638131141662598, "log_odds_ratio": -0.10457414388656616, "logits/chosen": 0.9208916425704956, "logits/rejected": 0.9827355146408081, "logps/chosen": -2.238070487976074, "logps/rejected": -4.7652387619018555, "loss": 0.6857, "nll_loss": 0.6752424836158752, "rewards/accuracies": 1.0, "rewards/chosen": -0.22380705177783966, "rewards/margins": 0.2527168393135071, "rewards/rejected": -0.47652387619018555, "step": 4754 }, { "epoch": 13.018480492813142, "grad_norm": 6.160069942474365, "learning_rate": 3.489041095890411e-07, "log_odds_chosen": 2.443296432495117, "log_odds_ratio": -0.28380870819091797, "logits/chosen": 0.630512535572052, "logits/rejected": 0.5919181108474731, "logps/chosen": -1.7287964820861816, "logps/rejected": -4.048020839691162, "loss": 0.859, "nll_loss": 0.8305700421333313, "rewards/accuracies": 0.875, "rewards/chosen": -0.1728796511888504, "rewards/margins": 0.2319224327802658, "rewards/rejected": -0.4048020839691162, "step": 4755 }, { "epoch": 13.021218343600275, "grad_norm": 5.353577613830566, "learning_rate": 3.487671232876712e-07, "log_odds_chosen": 2.646808385848999, "log_odds_ratio": -0.10901230573654175, "logits/chosen": 0.9848130941390991, "logits/rejected": 1.0696948766708374, "logps/chosen": -2.172407627105713, "logps/rejected": -4.651890754699707, "loss": 0.5452, "nll_loss": 0.5342492461204529, "rewards/accuracies": 1.0, "rewards/chosen": -0.21724076569080353, "rewards/margins": 0.2479483038187027, "rewards/rejected": -0.4651890993118286, "step": 4756 }, { "epoch": 13.023956194387406, "grad_norm": 4.763069152832031, "learning_rate": 3.4863013698630135e-07, "log_odds_chosen": 2.8486838340759277, "log_odds_ratio": -0.20515842735767365, "logits/chosen": 0.8152016997337341, "logits/rejected": 0.8737167119979858, "logps/chosen": -2.4457006454467773, "logps/rejected": -5.18349552154541, "loss": 0.707, "nll_loss": 0.6864923238754272, "rewards/accuracies": 1.0, "rewards/chosen": -0.2445700615644455, "rewards/margins": 0.2737794816493988, "rewards/rejected": -0.5183495879173279, "step": 4757 }, { "epoch": 13.026694045174539, "grad_norm": 5.183945655822754, "learning_rate": 3.484931506849315e-07, "log_odds_chosen": 4.282285690307617, "log_odds_ratio": -0.16815432906150818, "logits/chosen": 0.7606316804885864, "logits/rejected": 0.8116050362586975, "logps/chosen": -1.6234219074249268, "logps/rejected": -5.7076616287231445, "loss": 0.7092, "nll_loss": 0.6924158930778503, "rewards/accuracies": 0.875, "rewards/chosen": -0.16234219074249268, "rewards/margins": 0.4084240198135376, "rewards/rejected": -0.5707662105560303, "step": 4758 }, { "epoch": 13.02943189596167, "grad_norm": 5.562800407409668, "learning_rate": 3.4835616438356165e-07, "log_odds_chosen": 2.187563896179199, "log_odds_ratio": -0.227705717086792, "logits/chosen": 0.7267721891403198, "logits/rejected": 0.7484426498413086, "logps/chosen": -1.9204427003860474, "logps/rejected": -3.8873369693756104, "loss": 0.5999, "nll_loss": 0.5770971775054932, "rewards/accuracies": 1.0, "rewards/chosen": -0.19204425811767578, "rewards/margins": 0.1966894119977951, "rewards/rejected": -0.38873371481895447, "step": 4759 }, { "epoch": 13.032169746748803, "grad_norm": 6.058023452758789, "learning_rate": 3.4821917808219175e-07, "log_odds_chosen": 1.8783775568008423, "log_odds_ratio": -0.2754405736923218, "logits/chosen": 0.6267406940460205, "logits/rejected": 0.6181344985961914, "logps/chosen": -1.9619321823120117, "logps/rejected": -3.724210739135742, "loss": 0.7838, "nll_loss": 0.7562780380249023, "rewards/accuracies": 1.0, "rewards/chosen": -0.19619323313236237, "rewards/margins": 0.176227867603302, "rewards/rejected": -0.3724210858345032, "step": 4760 }, { "epoch": 13.034907597535934, "grad_norm": 5.281550884246826, "learning_rate": 3.480821917808219e-07, "log_odds_chosen": 3.3688135147094727, "log_odds_ratio": -0.13180765509605408, "logits/chosen": 0.7776385545730591, "logits/rejected": 0.7446034550666809, "logps/chosen": -1.7702158689498901, "logps/rejected": -4.912669658660889, "loss": 0.6383, "nll_loss": 0.6251009702682495, "rewards/accuracies": 1.0, "rewards/chosen": -0.1770215928554535, "rewards/margins": 0.3142453730106354, "rewards/rejected": -0.49126699566841125, "step": 4761 }, { "epoch": 13.037645448323067, "grad_norm": 5.016962051391602, "learning_rate": 3.4794520547945205e-07, "log_odds_chosen": 2.934453248977661, "log_odds_ratio": -0.14623279869556427, "logits/chosen": 0.6634647846221924, "logits/rejected": 0.6162752509117126, "logps/chosen": -2.095395088195801, "logps/rejected": -4.898369789123535, "loss": 0.7221, "nll_loss": 0.7074365019798279, "rewards/accuracies": 1.0, "rewards/chosen": -0.2095395028591156, "rewards/margins": 0.2802974581718445, "rewards/rejected": -0.48983699083328247, "step": 4762 }, { "epoch": 13.040383299110198, "grad_norm": 4.523342609405518, "learning_rate": 3.4780821917808215e-07, "log_odds_chosen": 2.0716233253479004, "log_odds_ratio": -0.20980969071388245, "logits/chosen": 0.7411509156227112, "logits/rejected": 0.7409709095954895, "logps/chosen": -2.125343084335327, "logps/rejected": -4.081568717956543, "loss": 0.657, "nll_loss": 0.6359760165214539, "rewards/accuracies": 1.0, "rewards/chosen": -0.2125343233346939, "rewards/margins": 0.1956225335597992, "rewards/rejected": -0.4081568717956543, "step": 4763 }, { "epoch": 13.043121149897331, "grad_norm": 4.445913791656494, "learning_rate": 3.476712328767123e-07, "log_odds_chosen": 2.415933132171631, "log_odds_ratio": -0.2588626444339752, "logits/chosen": 0.9887554049491882, "logits/rejected": 1.0178216695785522, "logps/chosen": -1.6194543838500977, "logps/rejected": -3.813497304916382, "loss": 0.7382, "nll_loss": 0.7123581767082214, "rewards/accuracies": 0.875, "rewards/chosen": -0.1619454324245453, "rewards/margins": 0.21940430998802185, "rewards/rejected": -0.38134974241256714, "step": 4764 }, { "epoch": 13.045859000684462, "grad_norm": 6.604310512542725, "learning_rate": 3.4753424657534246e-07, "log_odds_chosen": 2.567629337310791, "log_odds_ratio": -0.20537300407886505, "logits/chosen": 0.7928215265274048, "logits/rejected": 0.7622661590576172, "logps/chosen": -2.330549716949463, "logps/rejected": -4.803893089294434, "loss": 0.849, "nll_loss": 0.8284617066383362, "rewards/accuracies": 1.0, "rewards/chosen": -0.2330549657344818, "rewards/margins": 0.24733436107635498, "rewards/rejected": -0.4803893566131592, "step": 4765 }, { "epoch": 13.048596851471595, "grad_norm": 5.412745475769043, "learning_rate": 3.473972602739726e-07, "log_odds_chosen": 1.5501081943511963, "log_odds_ratio": -0.31126075983047485, "logits/chosen": 0.9161791801452637, "logits/rejected": 0.9493799209594727, "logps/chosen": -2.095564842224121, "logps/rejected": -3.5020017623901367, "loss": 0.6686, "nll_loss": 0.6374796032905579, "rewards/accuracies": 0.875, "rewards/chosen": -0.2095564603805542, "rewards/margins": 0.14064371585845947, "rewards/rejected": -0.35020020604133606, "step": 4766 }, { "epoch": 13.051334702258726, "grad_norm": 5.101344585418701, "learning_rate": 3.472602739726027e-07, "log_odds_chosen": 1.8600225448608398, "log_odds_ratio": -0.4665941894054413, "logits/chosen": 0.6839902400970459, "logits/rejected": 0.7204605340957642, "logps/chosen": -2.306565761566162, "logps/rejected": -4.010894775390625, "loss": 0.7052, "nll_loss": 0.6585884094238281, "rewards/accuracies": 0.875, "rewards/chosen": -0.23065657913684845, "rewards/margins": 0.17043286561965942, "rewards/rejected": -0.4010894298553467, "step": 4767 }, { "epoch": 13.05407255304586, "grad_norm": 5.320840358734131, "learning_rate": 3.4712328767123286e-07, "log_odds_chosen": 1.9634443521499634, "log_odds_ratio": -0.25392359495162964, "logits/chosen": 0.8464927077293396, "logits/rejected": 0.9043059349060059, "logps/chosen": -2.5702297687530518, "logps/rejected": -4.466475486755371, "loss": 0.8503, "nll_loss": 0.8249136805534363, "rewards/accuracies": 1.0, "rewards/chosen": -0.2570229768753052, "rewards/margins": 0.18962454795837402, "rewards/rejected": -0.4466475248336792, "step": 4768 }, { "epoch": 13.05681040383299, "grad_norm": 4.850307464599609, "learning_rate": 3.46986301369863e-07, "log_odds_chosen": 1.423783779144287, "log_odds_ratio": -0.3874332308769226, "logits/chosen": 0.9943647980690002, "logits/rejected": 1.0010299682617188, "logps/chosen": -2.1688361167907715, "logps/rejected": -3.48244309425354, "loss": 0.6516, "nll_loss": 0.6129035353660583, "rewards/accuracies": 0.875, "rewards/chosen": -0.2168836146593094, "rewards/margins": 0.13136067986488342, "rewards/rejected": -0.348244309425354, "step": 4769 }, { "epoch": 13.059548254620124, "grad_norm": 6.105327606201172, "learning_rate": 3.468493150684931e-07, "log_odds_chosen": 2.328892469406128, "log_odds_ratio": -0.26849812269210815, "logits/chosen": 0.7347936630249023, "logits/rejected": 0.8233713507652283, "logps/chosen": -2.2749686241149902, "logps/rejected": -4.520899772644043, "loss": 0.75, "nll_loss": 0.7231659889221191, "rewards/accuracies": 0.875, "rewards/chosen": -0.22749686241149902, "rewards/margins": 0.2245931327342987, "rewards/rejected": -0.45208996534347534, "step": 4770 }, { "epoch": 13.062286105407255, "grad_norm": 6.831900119781494, "learning_rate": 3.4671232876712326e-07, "log_odds_chosen": 3.0331368446350098, "log_odds_ratio": -0.47518277168273926, "logits/chosen": 0.9712921977043152, "logits/rejected": 0.9797122478485107, "logps/chosen": -2.9182536602020264, "logps/rejected": -5.846133708953857, "loss": 0.7918, "nll_loss": 0.7442841529846191, "rewards/accuracies": 0.625, "rewards/chosen": -0.29182538390159607, "rewards/margins": 0.292788028717041, "rewards/rejected": -0.5846133828163147, "step": 4771 }, { "epoch": 13.065023956194388, "grad_norm": 4.942351818084717, "learning_rate": 3.465753424657534e-07, "log_odds_chosen": 1.8377463817596436, "log_odds_ratio": -0.23541511595249176, "logits/chosen": 1.0257388353347778, "logits/rejected": 0.9915309548377991, "logps/chosen": -1.7017021179199219, "logps/rejected": -3.397891044616699, "loss": 0.6134, "nll_loss": 0.5898212790489197, "rewards/accuracies": 1.0, "rewards/chosen": -0.17017021775245667, "rewards/margins": 0.1696188896894455, "rewards/rejected": -0.33978912234306335, "step": 4772 }, { "epoch": 13.067761806981519, "grad_norm": 4.862150192260742, "learning_rate": 3.4643835616438357e-07, "log_odds_chosen": 2.9046359062194824, "log_odds_ratio": -0.12363488972187042, "logits/chosen": 0.7260265350341797, "logits/rejected": 0.6828550100326538, "logps/chosen": -1.4555280208587646, "logps/rejected": -4.085491180419922, "loss": 0.6333, "nll_loss": 0.620914101600647, "rewards/accuracies": 1.0, "rewards/chosen": -0.14555279910564423, "rewards/margins": 0.2629963159561157, "rewards/rejected": -0.40854915976524353, "step": 4773 }, { "epoch": 13.070499657768652, "grad_norm": 5.608672142028809, "learning_rate": 3.4630136986301367e-07, "log_odds_chosen": 1.7100210189819336, "log_odds_ratio": -0.4550401568412781, "logits/chosen": 0.6506115198135376, "logits/rejected": 0.7364029884338379, "logps/chosen": -2.3514819145202637, "logps/rejected": -3.972651243209839, "loss": 0.656, "nll_loss": 0.6105265021324158, "rewards/accuracies": 0.875, "rewards/chosen": -0.23514820635318756, "rewards/margins": 0.16211694478988647, "rewards/rejected": -0.39726513624191284, "step": 4774 }, { "epoch": 13.073237508555783, "grad_norm": 9.490652084350586, "learning_rate": 3.4616438356164387e-07, "log_odds_chosen": 0.6447738409042358, "log_odds_ratio": -0.6236461400985718, "logits/chosen": 0.8433090448379517, "logits/rejected": 0.8796042203903198, "logps/chosen": -2.892871379852295, "logps/rejected": -3.417466163635254, "loss": 0.6768, "nll_loss": 0.6143978834152222, "rewards/accuracies": 0.75, "rewards/chosen": -0.28928714990615845, "rewards/margins": 0.05245945230126381, "rewards/rejected": -0.34174659848213196, "step": 4775 }, { "epoch": 13.075975359342916, "grad_norm": 7.1601362228393555, "learning_rate": 3.4602739726027397e-07, "log_odds_chosen": 0.9236422181129456, "log_odds_ratio": -0.4913980960845947, "logits/chosen": 0.7763644456863403, "logits/rejected": 0.7352156639099121, "logps/chosen": -2.6762337684631348, "logps/rejected": -3.5231828689575195, "loss": 0.639, "nll_loss": 0.5898935794830322, "rewards/accuracies": 0.75, "rewards/chosen": -0.2676233947277069, "rewards/margins": 0.08469490706920624, "rewards/rejected": -0.35231831669807434, "step": 4776 }, { "epoch": 13.078713210130047, "grad_norm": 5.169609069824219, "learning_rate": 3.4589041095890407e-07, "log_odds_chosen": 1.463173747062683, "log_odds_ratio": -0.38624173402786255, "logits/chosen": 0.8534786105155945, "logits/rejected": 0.9232312440872192, "logps/chosen": -2.1641974449157715, "logps/rejected": -3.4190878868103027, "loss": 0.6313, "nll_loss": 0.592644214630127, "rewards/accuracies": 0.75, "rewards/chosen": -0.2164197564125061, "rewards/margins": 0.12548905611038208, "rewards/rejected": -0.3419088125228882, "step": 4777 }, { "epoch": 13.08145106091718, "grad_norm": 5.137088775634766, "learning_rate": 3.457534246575342e-07, "log_odds_chosen": 2.5859274864196777, "log_odds_ratio": -0.19378530979156494, "logits/chosen": 1.0130727291107178, "logits/rejected": 1.0674318075180054, "logps/chosen": -2.2720108032226562, "logps/rejected": -4.718973159790039, "loss": 0.689, "nll_loss": 0.6695855855941772, "rewards/accuracies": 1.0, "rewards/chosen": -0.22720110416412354, "rewards/margins": 0.2446962296962738, "rewards/rejected": -0.47189730405807495, "step": 4778 }, { "epoch": 13.084188911704311, "grad_norm": 5.162903308868408, "learning_rate": 3.456164383561644e-07, "log_odds_chosen": 1.459335446357727, "log_odds_ratio": -0.2603471875190735, "logits/chosen": 0.8445819020271301, "logits/rejected": 0.8469635844230652, "logps/chosen": -1.5256208181381226, "logps/rejected": -2.81561541557312, "loss": 0.5936, "nll_loss": 0.5675501227378845, "rewards/accuracies": 1.0, "rewards/chosen": -0.15256208181381226, "rewards/margins": 0.12899945676326752, "rewards/rejected": -0.28156155347824097, "step": 4779 }, { "epoch": 13.086926762491444, "grad_norm": 6.3042449951171875, "learning_rate": 3.454794520547945e-07, "log_odds_chosen": 1.6112078428268433, "log_odds_ratio": -0.28765982389450073, "logits/chosen": 0.8303987979888916, "logits/rejected": 0.7402074337005615, "logps/chosen": -1.9013547897338867, "logps/rejected": -3.365622043609619, "loss": 0.6633, "nll_loss": 0.6345553994178772, "rewards/accuracies": 1.0, "rewards/chosen": -0.19013547897338867, "rewards/margins": 0.1464267373085022, "rewards/rejected": -0.33656221628189087, "step": 4780 }, { "epoch": 13.089664613278575, "grad_norm": 6.993435859680176, "learning_rate": 3.453424657534246e-07, "log_odds_chosen": 1.8828731775283813, "log_odds_ratio": -0.26406794786453247, "logits/chosen": 0.8862658143043518, "logits/rejected": 0.905102550983429, "logps/chosen": -2.5681941509246826, "logps/rejected": -4.35432243347168, "loss": 0.6289, "nll_loss": 0.6025081872940063, "rewards/accuracies": 1.0, "rewards/chosen": -0.2568194270133972, "rewards/margins": 0.1786128282546997, "rewards/rejected": -0.4354322552680969, "step": 4781 }, { "epoch": 13.092402464065708, "grad_norm": 5.758163928985596, "learning_rate": 3.4520547945205483e-07, "log_odds_chosen": 2.0272562503814697, "log_odds_ratio": -0.2942861318588257, "logits/chosen": 0.690185010433197, "logits/rejected": 0.720975935459137, "logps/chosen": -2.461777687072754, "logps/rejected": -4.368234634399414, "loss": 0.6788, "nll_loss": 0.6494203209877014, "rewards/accuracies": 0.875, "rewards/chosen": -0.2461777925491333, "rewards/margins": 0.1906457245349884, "rewards/rejected": -0.4368234872817993, "step": 4782 }, { "epoch": 13.095140314852841, "grad_norm": 5.104135513305664, "learning_rate": 3.4506849315068493e-07, "log_odds_chosen": 2.26719069480896, "log_odds_ratio": -0.21352365612983704, "logits/chosen": 0.6806355118751526, "logits/rejected": 0.6318073272705078, "logps/chosen": -1.9240145683288574, "logps/rejected": -4.000718116760254, "loss": 0.6754, "nll_loss": 0.654045581817627, "rewards/accuracies": 0.875, "rewards/chosen": -0.1924014687538147, "rewards/margins": 0.20767036080360413, "rewards/rejected": -0.40007179975509644, "step": 4783 }, { "epoch": 13.097878165639973, "grad_norm": 4.858240604400635, "learning_rate": 3.4493150684931503e-07, "log_odds_chosen": 1.8280274868011475, "log_odds_ratio": -0.3092346787452698, "logits/chosen": 0.6289752721786499, "logits/rejected": 0.6453538537025452, "logps/chosen": -2.2029333114624023, "logps/rejected": -3.9384350776672363, "loss": 0.6828, "nll_loss": 0.6518828272819519, "rewards/accuracies": 0.75, "rewards/chosen": -0.220293328166008, "rewards/margins": 0.17355018854141235, "rewards/rejected": -0.39384353160858154, "step": 4784 }, { "epoch": 13.100616016427105, "grad_norm": 4.635016441345215, "learning_rate": 3.447945205479452e-07, "log_odds_chosen": 2.875206470489502, "log_odds_ratio": -0.18433088064193726, "logits/chosen": 0.7309474349021912, "logits/rejected": 0.7377289533615112, "logps/chosen": -1.8767080307006836, "logps/rejected": -4.619733810424805, "loss": 0.7064, "nll_loss": 0.6879847645759583, "rewards/accuracies": 1.0, "rewards/chosen": -0.18767079710960388, "rewards/margins": 0.27430260181427, "rewards/rejected": -0.4619733691215515, "step": 4785 }, { "epoch": 13.103353867214237, "grad_norm": 5.175690174102783, "learning_rate": 3.4465753424657533e-07, "log_odds_chosen": 2.0939557552337646, "log_odds_ratio": -0.2537338435649872, "logits/chosen": 0.9547722339630127, "logits/rejected": 1.0654723644256592, "logps/chosen": -2.5093822479248047, "logps/rejected": -4.52900505065918, "loss": 0.7218, "nll_loss": 0.6964290738105774, "rewards/accuracies": 0.875, "rewards/chosen": -0.2509382367134094, "rewards/margins": 0.20196226239204407, "rewards/rejected": -0.4529004991054535, "step": 4786 }, { "epoch": 13.10609171800137, "grad_norm": 5.291319847106934, "learning_rate": 3.445205479452055e-07, "log_odds_chosen": 1.818987488746643, "log_odds_ratio": -0.2578714191913605, "logits/chosen": 0.9668764472007751, "logits/rejected": 0.9922999143600464, "logps/chosen": -1.6309289932250977, "logps/rejected": -3.2877328395843506, "loss": 0.6561, "nll_loss": 0.6303055286407471, "rewards/accuracies": 1.0, "rewards/chosen": -0.16309291124343872, "rewards/margins": 0.16568036377429962, "rewards/rejected": -0.32877328991889954, "step": 4787 }, { "epoch": 13.1088295687885, "grad_norm": 5.347402095794678, "learning_rate": 3.443835616438356e-07, "log_odds_chosen": 2.594386100769043, "log_odds_ratio": -0.4093366861343384, "logits/chosen": 0.8249033689498901, "logits/rejected": 0.810982346534729, "logps/chosen": -2.6946425437927246, "logps/rejected": -5.133180141448975, "loss": 0.7216, "nll_loss": 0.680694580078125, "rewards/accuracies": 0.875, "rewards/chosen": -0.26946428418159485, "rewards/margins": 0.24385377764701843, "rewards/rejected": -0.5133180618286133, "step": 4788 }, { "epoch": 13.111567419575634, "grad_norm": Infinity, "learning_rate": 3.443835616438356e-07, "log_odds_chosen": 0.5993391275405884, "log_odds_ratio": -0.8680381774902344, "logits/chosen": 1.0184555053710938, "logits/rejected": 1.0756011009216309, "logps/chosen": -3.392770290374756, "logps/rejected": -3.9151968955993652, "loss": 0.7317, "nll_loss": 0.6448885798454285, "rewards/accuracies": 0.625, "rewards/chosen": -0.3392770290374756, "rewards/margins": 0.052242666482925415, "rewards/rejected": -0.3915197253227234, "step": 4789 }, { "epoch": 13.114305270362765, "grad_norm": 5.179281711578369, "learning_rate": 3.4424657534246574e-07, "log_odds_chosen": 2.5763838291168213, "log_odds_ratio": -0.13742564618587494, "logits/chosen": 0.671831488609314, "logits/rejected": 0.627753734588623, "logps/chosen": -1.385716199874878, "logps/rejected": -3.5898337364196777, "loss": 0.5937, "nll_loss": 0.5799748301506042, "rewards/accuracies": 1.0, "rewards/chosen": -0.1385716199874878, "rewards/margins": 0.2204117476940155, "rewards/rejected": -0.3589833676815033, "step": 4790 }, { "epoch": 13.117043121149898, "grad_norm": 7.622625350952148, "learning_rate": 3.441095890410959e-07, "log_odds_chosen": 1.7468748092651367, "log_odds_ratio": -0.5699987411499023, "logits/chosen": 0.9077839255332947, "logits/rejected": 0.9033915996551514, "logps/chosen": -2.776235342025757, "logps/rejected": -4.388162612915039, "loss": 0.695, "nll_loss": 0.6379696726799011, "rewards/accuracies": 0.75, "rewards/chosen": -0.27762356400489807, "rewards/margins": 0.16119273006916046, "rewards/rejected": -0.43881624937057495, "step": 4791 }, { "epoch": 13.119780971937029, "grad_norm": 5.15522575378418, "learning_rate": 3.43972602739726e-07, "log_odds_chosen": 1.408075213432312, "log_odds_ratio": -0.6045607328414917, "logits/chosen": 0.7869957685470581, "logits/rejected": 0.8233226537704468, "logps/chosen": -2.3718812465667725, "logps/rejected": -3.747112274169922, "loss": 0.6699, "nll_loss": 0.6094381213188171, "rewards/accuracies": 0.75, "rewards/chosen": -0.23718813061714172, "rewards/margins": 0.1375230997800827, "rewards/rejected": -0.37471121549606323, "step": 4792 }, { "epoch": 13.122518822724162, "grad_norm": 5.154094219207764, "learning_rate": 3.4383561643835614e-07, "log_odds_chosen": 2.2396068572998047, "log_odds_ratio": -0.29346925020217896, "logits/chosen": 0.8470664024353027, "logits/rejected": 0.8292504549026489, "logps/chosen": -2.0753045082092285, "logps/rejected": -4.157486915588379, "loss": 0.6597, "nll_loss": 0.6303569078445435, "rewards/accuracies": 0.875, "rewards/chosen": -0.2075304388999939, "rewards/margins": 0.2082182615995407, "rewards/rejected": -0.4157487154006958, "step": 4793 }, { "epoch": 13.125256673511293, "grad_norm": 8.686846733093262, "learning_rate": 3.436986301369863e-07, "log_odds_chosen": 2.4826555252075195, "log_odds_ratio": -0.5349580645561218, "logits/chosen": 1.0830732583999634, "logits/rejected": 1.0592265129089355, "logps/chosen": -2.5889947414398193, "logps/rejected": -4.957067489624023, "loss": 0.6617, "nll_loss": 0.6082096099853516, "rewards/accuracies": 0.875, "rewards/chosen": -0.2588994801044464, "rewards/margins": 0.23680733144283295, "rewards/rejected": -0.49570679664611816, "step": 4794 }, { "epoch": 13.127994524298426, "grad_norm": 6.880042552947998, "learning_rate": 3.4356164383561644e-07, "log_odds_chosen": 0.6982683539390564, "log_odds_ratio": -0.4576287269592285, "logits/chosen": 0.8752198815345764, "logits/rejected": 0.8297266960144043, "logps/chosen": -2.4550890922546387, "logps/rejected": -3.069579601287842, "loss": 0.6974, "nll_loss": 0.6516302824020386, "rewards/accuracies": 0.875, "rewards/chosen": -0.24550893902778625, "rewards/margins": 0.06144902482628822, "rewards/rejected": -0.3069579601287842, "step": 4795 }, { "epoch": 13.130732375085557, "grad_norm": 5.101177215576172, "learning_rate": 3.4342465753424654e-07, "log_odds_chosen": 2.724532127380371, "log_odds_ratio": -0.22803090512752533, "logits/chosen": 0.6572216749191284, "logits/rejected": 0.6467998027801514, "logps/chosen": -2.6601967811584473, "logps/rejected": -5.300689220428467, "loss": 0.638, "nll_loss": 0.6152265667915344, "rewards/accuracies": 0.75, "rewards/chosen": -0.26601967215538025, "rewards/margins": 0.2640492916107178, "rewards/rejected": -0.5300689339637756, "step": 4796 }, { "epoch": 13.13347022587269, "grad_norm": 5.510645389556885, "learning_rate": 3.432876712328767e-07, "log_odds_chosen": 1.6189396381378174, "log_odds_ratio": -0.3303437829017639, "logits/chosen": 1.0481066703796387, "logits/rejected": 1.1238398551940918, "logps/chosen": -2.740567684173584, "logps/rejected": -4.286835193634033, "loss": 0.7148, "nll_loss": 0.6817762851715088, "rewards/accuracies": 1.0, "rewards/chosen": -0.2740567922592163, "rewards/margins": 0.1546267569065094, "rewards/rejected": -0.4286835193634033, "step": 4797 }, { "epoch": 13.136208076659821, "grad_norm": 5.692996978759766, "learning_rate": 3.4315068493150685e-07, "log_odds_chosen": 4.00777530670166, "log_odds_ratio": -0.14377403259277344, "logits/chosen": 0.9780821800231934, "logits/rejected": 1.0301768779754639, "logps/chosen": -1.8568296432495117, "logps/rejected": -5.689935207366943, "loss": 0.6368, "nll_loss": 0.622431218624115, "rewards/accuracies": 1.0, "rewards/chosen": -0.1856829673051834, "rewards/margins": 0.38331058621406555, "rewards/rejected": -0.5689935684204102, "step": 4798 }, { "epoch": 13.138945927446954, "grad_norm": 5.882728099822998, "learning_rate": 3.4301369863013695e-07, "log_odds_chosen": 1.431254506111145, "log_odds_ratio": -0.5349859595298767, "logits/chosen": 0.807280421257019, "logits/rejected": 0.838448166847229, "logps/chosen": -2.631139039993286, "logps/rejected": -4.02537727355957, "loss": 0.6716, "nll_loss": 0.6180878281593323, "rewards/accuracies": 0.625, "rewards/chosen": -0.26311391592025757, "rewards/margins": 0.13942384719848633, "rewards/rejected": -0.4025377333164215, "step": 4799 }, { "epoch": 13.141683778234086, "grad_norm": 5.099698066711426, "learning_rate": 3.428767123287671e-07, "log_odds_chosen": 1.5607631206512451, "log_odds_ratio": -0.32006362080574036, "logits/chosen": 0.9665305018424988, "logits/rejected": 1.0216667652130127, "logps/chosen": -1.788156509399414, "logps/rejected": -3.223491668701172, "loss": 0.6063, "nll_loss": 0.5743095874786377, "rewards/accuracies": 0.875, "rewards/chosen": -0.17881566286087036, "rewards/margins": 0.14353351294994354, "rewards/rejected": -0.3223491609096527, "step": 4800 }, { "epoch": 13.144421629021219, "grad_norm": 4.275941371917725, "learning_rate": 3.4273972602739725e-07, "log_odds_chosen": 2.038337230682373, "log_odds_ratio": -0.20861971378326416, "logits/chosen": 0.8608390688896179, "logits/rejected": 0.8480806350708008, "logps/chosen": -1.7530509233474731, "logps/rejected": -3.6153862476348877, "loss": 0.5942, "nll_loss": 0.573330819606781, "rewards/accuracies": 1.0, "rewards/chosen": -0.1753050833940506, "rewards/margins": 0.1862335503101349, "rewards/rejected": -0.3615386486053467, "step": 4801 }, { "epoch": 13.14715947980835, "grad_norm": 5.642663955688477, "learning_rate": 3.426027397260274e-07, "log_odds_chosen": 2.0049242973327637, "log_odds_ratio": -0.21578256785869598, "logits/chosen": 0.9321728348731995, "logits/rejected": 0.929844081401825, "logps/chosen": -1.7714639902114868, "logps/rejected": -3.595780611038208, "loss": 0.5712, "nll_loss": 0.549578845500946, "rewards/accuracies": 1.0, "rewards/chosen": -0.17714640498161316, "rewards/margins": 0.1824316531419754, "rewards/rejected": -0.35957807302474976, "step": 4802 }, { "epoch": 13.149897330595483, "grad_norm": 4.749226093292236, "learning_rate": 3.424657534246575e-07, "log_odds_chosen": 2.058978319168091, "log_odds_ratio": -0.27957868576049805, "logits/chosen": 0.8326422572135925, "logits/rejected": 0.9106797575950623, "logps/chosen": -1.9090754985809326, "logps/rejected": -3.8726296424865723, "loss": 0.6216, "nll_loss": 0.5936128497123718, "rewards/accuracies": 1.0, "rewards/chosen": -0.1909075528383255, "rewards/margins": 0.19635538756847382, "rewards/rejected": -0.3872629404067993, "step": 4803 }, { "epoch": 13.152635181382614, "grad_norm": 4.744678974151611, "learning_rate": 3.4232876712328765e-07, "log_odds_chosen": 2.9428741931915283, "log_odds_ratio": -0.1338593065738678, "logits/chosen": 0.957038402557373, "logits/rejected": 0.9197607636451721, "logps/chosen": -1.8702867031097412, "logps/rejected": -4.651930809020996, "loss": 0.7781, "nll_loss": 0.76468425989151, "rewards/accuracies": 1.0, "rewards/chosen": -0.1870286762714386, "rewards/margins": 0.2781643867492676, "rewards/rejected": -0.4651930630207062, "step": 4804 }, { "epoch": 13.155373032169747, "grad_norm": 5.814322471618652, "learning_rate": 3.421917808219178e-07, "log_odds_chosen": 2.911532402038574, "log_odds_ratio": -0.21888627111911774, "logits/chosen": 1.0194268226623535, "logits/rejected": 1.0590453147888184, "logps/chosen": -2.524277925491333, "logps/rejected": -5.370542526245117, "loss": 0.644, "nll_loss": 0.6220967769622803, "rewards/accuracies": 0.875, "rewards/chosen": -0.2524278163909912, "rewards/margins": 0.28462645411491394, "rewards/rejected": -0.5370542407035828, "step": 4805 }, { "epoch": 13.158110882956878, "grad_norm": 5.345872402191162, "learning_rate": 3.420547945205479e-07, "log_odds_chosen": 1.260208010673523, "log_odds_ratio": -0.3610542416572571, "logits/chosen": 0.9267878532409668, "logits/rejected": 0.9921880960464478, "logps/chosen": -2.5927677154541016, "logps/rejected": -3.7866079807281494, "loss": 0.6038, "nll_loss": 0.5677053928375244, "rewards/accuracies": 1.0, "rewards/chosen": -0.25927677750587463, "rewards/margins": 0.1193840429186821, "rewards/rejected": -0.37866082787513733, "step": 4806 }, { "epoch": 13.160848733744011, "grad_norm": 4.9288554191589355, "learning_rate": 3.419178082191781e-07, "log_odds_chosen": 3.0705366134643555, "log_odds_ratio": -0.2316749095916748, "logits/chosen": 0.5355561971664429, "logits/rejected": 0.5173124074935913, "logps/chosen": -1.8206143379211426, "logps/rejected": -4.7209978103637695, "loss": 0.5835, "nll_loss": 0.5603784322738647, "rewards/accuracies": 0.875, "rewards/chosen": -0.18206141889095306, "rewards/margins": 0.29003840684890747, "rewards/rejected": -0.47209981083869934, "step": 4807 }, { "epoch": 13.163586584531142, "grad_norm": 6.462345600128174, "learning_rate": 3.417808219178082e-07, "log_odds_chosen": 0.23558001220226288, "log_odds_ratio": -0.6099940538406372, "logits/chosen": 0.9354833364486694, "logits/rejected": 0.9087175130844116, "logps/chosen": -1.7774158716201782, "logps/rejected": -1.9722201824188232, "loss": 0.7786, "nll_loss": 0.7176392078399658, "rewards/accuracies": 0.5, "rewards/chosen": -0.17774158716201782, "rewards/margins": 0.019480450078845024, "rewards/rejected": -0.1972220242023468, "step": 4808 }, { "epoch": 13.166324435318275, "grad_norm": 4.709517955780029, "learning_rate": 3.4164383561643836e-07, "log_odds_chosen": 1.5273590087890625, "log_odds_ratio": -0.34328997135162354, "logits/chosen": 1.0244395732879639, "logits/rejected": 1.054282307624817, "logps/chosen": -2.159864902496338, "logps/rejected": -3.5817997455596924, "loss": 0.6773, "nll_loss": 0.6430044174194336, "rewards/accuracies": 0.875, "rewards/chosen": -0.2159864902496338, "rewards/margins": 0.1421934813261032, "rewards/rejected": -0.3581799864768982, "step": 4809 }, { "epoch": 13.169062286105408, "grad_norm": 6.088039875030518, "learning_rate": 3.4150684931506846e-07, "log_odds_chosen": 1.8598049879074097, "log_odds_ratio": -0.32039201259613037, "logits/chosen": 0.7427774667739868, "logits/rejected": 0.7129881381988525, "logps/chosen": -1.899792194366455, "logps/rejected": -3.661930561065674, "loss": 0.5814, "nll_loss": 0.5493389368057251, "rewards/accuracies": 0.75, "rewards/chosen": -0.18997922539710999, "rewards/margins": 0.1762138307094574, "rewards/rejected": -0.3661930561065674, "step": 4810 }, { "epoch": 13.17180013689254, "grad_norm": 8.72723388671875, "learning_rate": 3.413698630136986e-07, "log_odds_chosen": 2.349135398864746, "log_odds_ratio": -0.4103245437145233, "logits/chosen": 0.8818612694740295, "logits/rejected": 0.8386766910552979, "logps/chosen": -2.7562737464904785, "logps/rejected": -5.00670862197876, "loss": 0.7922, "nll_loss": 0.7511971592903137, "rewards/accuracies": 0.75, "rewards/chosen": -0.27562740445137024, "rewards/margins": 0.22504350543022156, "rewards/rejected": -0.500670850276947, "step": 4811 }, { "epoch": 13.174537987679672, "grad_norm": 5.228219509124756, "learning_rate": 3.4123287671232876e-07, "log_odds_chosen": 2.3747267723083496, "log_odds_ratio": -0.26614078879356384, "logits/chosen": 0.7469927668571472, "logits/rejected": 0.6741524934768677, "logps/chosen": -1.4446407556533813, "logps/rejected": -3.6541757583618164, "loss": 0.6676, "nll_loss": 0.6409518122673035, "rewards/accuracies": 0.875, "rewards/chosen": -0.14446407556533813, "rewards/margins": 0.22095349431037903, "rewards/rejected": -0.36541759967803955, "step": 4812 }, { "epoch": 13.177275838466803, "grad_norm": 5.785517692565918, "learning_rate": 3.4109589041095886e-07, "log_odds_chosen": 2.1674606800079346, "log_odds_ratio": -0.28990015387535095, "logits/chosen": 0.9413838386535645, "logits/rejected": 0.9551399946212769, "logps/chosen": -2.262986898422241, "logps/rejected": -4.299156188964844, "loss": 0.7574, "nll_loss": 0.728446900844574, "rewards/accuracies": 0.875, "rewards/chosen": -0.22629868984222412, "rewards/margins": 0.2036169469356537, "rewards/rejected": -0.4299156665802002, "step": 4813 }, { "epoch": 13.180013689253936, "grad_norm": 5.568059921264648, "learning_rate": 3.4095890410958907e-07, "log_odds_chosen": 1.001265287399292, "log_odds_ratio": -0.4660448729991913, "logits/chosen": 1.0324879884719849, "logits/rejected": 1.0613068342208862, "logps/chosen": -1.9883723258972168, "logps/rejected": -2.917778730392456, "loss": 0.6702, "nll_loss": 0.6236131191253662, "rewards/accuracies": 0.75, "rewards/chosen": -0.19883722066879272, "rewards/margins": 0.09294067323207855, "rewards/rejected": -0.2917778789997101, "step": 4814 }, { "epoch": 13.182751540041068, "grad_norm": 5.604564666748047, "learning_rate": 3.4082191780821917e-07, "log_odds_chosen": 0.6566210389137268, "log_odds_ratio": -0.4871147871017456, "logits/chosen": 0.8092858791351318, "logits/rejected": 0.7253297567367554, "logps/chosen": -1.9587337970733643, "logps/rejected": -2.5304315090179443, "loss": 0.7101, "nll_loss": 0.661429762840271, "rewards/accuracies": 0.75, "rewards/chosen": -0.19587337970733643, "rewards/margins": 0.057169780135154724, "rewards/rejected": -0.25304314494132996, "step": 4815 }, { "epoch": 13.1854893908282, "grad_norm": 5.2390031814575195, "learning_rate": 3.406849315068493e-07, "log_odds_chosen": 2.784942626953125, "log_odds_ratio": -0.14220523834228516, "logits/chosen": 0.8663507103919983, "logits/rejected": 0.9334684014320374, "logps/chosen": -1.7045338153839111, "logps/rejected": -4.283698081970215, "loss": 0.6052, "nll_loss": 0.5909647345542908, "rewards/accuracies": 1.0, "rewards/chosen": -0.17045339941978455, "rewards/margins": 0.2579163908958435, "rewards/rejected": -0.42836979031562805, "step": 4816 }, { "epoch": 13.188227241615332, "grad_norm": 5.527275085449219, "learning_rate": 3.405479452054794e-07, "log_odds_chosen": 1.871207356452942, "log_odds_ratio": -0.3212028443813324, "logits/chosen": 0.7262759804725647, "logits/rejected": 0.7809332013130188, "logps/chosen": -2.461158275604248, "logps/rejected": -4.2086286544799805, "loss": 0.7678, "nll_loss": 0.7356541752815247, "rewards/accuracies": 0.875, "rewards/chosen": -0.24611583352088928, "rewards/margins": 0.1747470200061798, "rewards/rejected": -0.4208628535270691, "step": 4817 }, { "epoch": 13.190965092402465, "grad_norm": 6.3872199058532715, "learning_rate": 3.4041095890410957e-07, "log_odds_chosen": 3.689924955368042, "log_odds_ratio": -0.1792300045490265, "logits/chosen": 0.7770296335220337, "logits/rejected": 0.8195233345031738, "logps/chosen": -2.0777816772460938, "logps/rejected": -5.646049499511719, "loss": 0.7005, "nll_loss": 0.6825332045555115, "rewards/accuracies": 1.0, "rewards/chosen": -0.2077781856060028, "rewards/margins": 0.3568267822265625, "rewards/rejected": -0.5646049976348877, "step": 4818 }, { "epoch": 13.193702943189596, "grad_norm": 5.056344509124756, "learning_rate": 3.402739726027397e-07, "log_odds_chosen": 4.418719291687012, "log_odds_ratio": -0.09911756962537766, "logits/chosen": 0.9538188576698303, "logits/rejected": 1.0150322914123535, "logps/chosen": -3.268428325653076, "logps/rejected": -7.485903263092041, "loss": 0.9388, "nll_loss": 0.9289291501045227, "rewards/accuracies": 1.0, "rewards/chosen": -0.32684287428855896, "rewards/margins": 0.42174750566482544, "rewards/rejected": -0.748590350151062, "step": 4819 }, { "epoch": 13.196440793976729, "grad_norm": 7.265458583831787, "learning_rate": 3.401369863013698e-07, "log_odds_chosen": 2.03969669342041, "log_odds_ratio": -0.4359629452228546, "logits/chosen": 0.933815598487854, "logits/rejected": 0.9321390390396118, "logps/chosen": -2.0921428203582764, "logps/rejected": -4.0165228843688965, "loss": 0.6909, "nll_loss": 0.6472963094711304, "rewards/accuracies": 0.75, "rewards/chosen": -0.20921427011489868, "rewards/margins": 0.19243799149990082, "rewards/rejected": -0.4016522467136383, "step": 4820 }, { "epoch": 13.19917864476386, "grad_norm": 6.093865394592285, "learning_rate": 3.4000000000000003e-07, "log_odds_chosen": 3.880897045135498, "log_odds_ratio": -0.042471516877412796, "logits/chosen": 1.0629695653915405, "logits/rejected": 1.107100009918213, "logps/chosen": -2.211221694946289, "logps/rejected": -5.955292224884033, "loss": 0.6217, "nll_loss": 0.6174471974372864, "rewards/accuracies": 1.0, "rewards/chosen": -0.221122145652771, "rewards/margins": 0.3744070827960968, "rewards/rejected": -0.5955291986465454, "step": 4821 }, { "epoch": 13.201916495550993, "grad_norm": 8.327030181884766, "learning_rate": 3.398630136986301e-07, "log_odds_chosen": 2.5081210136413574, "log_odds_ratio": -0.39186301827430725, "logits/chosen": 0.9421581029891968, "logits/rejected": 1.0449142456054688, "logps/chosen": -2.641911029815674, "logps/rejected": -5.077936172485352, "loss": 0.7438, "nll_loss": 0.7046035528182983, "rewards/accuracies": 0.75, "rewards/chosen": -0.2641911208629608, "rewards/margins": 0.2436024695634842, "rewards/rejected": -0.5077935457229614, "step": 4822 }, { "epoch": 13.204654346338124, "grad_norm": 6.540102481842041, "learning_rate": 3.397260273972602e-07, "log_odds_chosen": 1.1748114824295044, "log_odds_ratio": -0.4768194854259491, "logits/chosen": 1.0317232608795166, "logits/rejected": 0.9761956930160522, "logps/chosen": -2.6792104244232178, "logps/rejected": -3.7854809761047363, "loss": 0.7327, "nll_loss": 0.6849851012229919, "rewards/accuracies": 0.875, "rewards/chosen": -0.2679210305213928, "rewards/margins": 0.11062707006931305, "rewards/rejected": -0.3785480856895447, "step": 4823 }, { "epoch": 13.207392197125257, "grad_norm": 4.6989264488220215, "learning_rate": 3.395890410958904e-07, "log_odds_chosen": 2.8058972358703613, "log_odds_ratio": -0.1670600175857544, "logits/chosen": 0.8998318314552307, "logits/rejected": 0.8465563654899597, "logps/chosen": -1.6901249885559082, "logps/rejected": -4.289239406585693, "loss": 0.6275, "nll_loss": 0.6107498407363892, "rewards/accuracies": 1.0, "rewards/chosen": -0.16901251673698425, "rewards/margins": 0.2599114179611206, "rewards/rejected": -0.42892390489578247, "step": 4824 }, { "epoch": 13.210130047912388, "grad_norm": 5.788471221923828, "learning_rate": 3.3945205479452053e-07, "log_odds_chosen": 1.9124948978424072, "log_odds_ratio": -0.23042550683021545, "logits/chosen": 0.8337477445602417, "logits/rejected": 0.9067800045013428, "logps/chosen": -2.203700304031372, "logps/rejected": -4.017083168029785, "loss": 0.6396, "nll_loss": 0.6165618896484375, "rewards/accuracies": 1.0, "rewards/chosen": -0.22037002444267273, "rewards/margins": 0.18133828043937683, "rewards/rejected": -0.40170830488204956, "step": 4825 }, { "epoch": 13.212867898699521, "grad_norm": 5.700066566467285, "learning_rate": 3.393150684931507e-07, "log_odds_chosen": 3.6074981689453125, "log_odds_ratio": -0.08017370104789734, "logits/chosen": 0.7373648285865784, "logits/rejected": 0.7572054862976074, "logps/chosen": -1.3614908456802368, "logps/rejected": -4.66044807434082, "loss": 0.4867, "nll_loss": 0.478682279586792, "rewards/accuracies": 1.0, "rewards/chosen": -0.1361490935087204, "rewards/margins": 0.3298957347869873, "rewards/rejected": -0.4660448431968689, "step": 4826 }, { "epoch": 13.215605749486652, "grad_norm": 4.868828296661377, "learning_rate": 3.391780821917808e-07, "log_odds_chosen": 3.068995952606201, "log_odds_ratio": -0.09434632211923599, "logits/chosen": 1.0115264654159546, "logits/rejected": 1.052660346031189, "logps/chosen": -1.641963243484497, "logps/rejected": -4.500404357910156, "loss": 0.601, "nll_loss": 0.5915464162826538, "rewards/accuracies": 1.0, "rewards/chosen": -0.16419632732868195, "rewards/margins": 0.285844087600708, "rewards/rejected": -0.45004042983055115, "step": 4827 }, { "epoch": 13.218343600273785, "grad_norm": 6.122102737426758, "learning_rate": 3.39041095890411e-07, "log_odds_chosen": 2.3422164916992188, "log_odds_ratio": -0.17301224172115326, "logits/chosen": 1.06703782081604, "logits/rejected": 1.143620491027832, "logps/chosen": -2.4013359546661377, "logps/rejected": -4.642453193664551, "loss": 0.7134, "nll_loss": 0.6960645318031311, "rewards/accuracies": 1.0, "rewards/chosen": -0.2401336133480072, "rewards/margins": 0.22411170601844788, "rewards/rejected": -0.4642453193664551, "step": 4828 }, { "epoch": 13.221081451060916, "grad_norm": 6.148950576782227, "learning_rate": 3.389041095890411e-07, "log_odds_chosen": 1.325655221939087, "log_odds_ratio": -0.42619210481643677, "logits/chosen": 0.7414988279342651, "logits/rejected": 0.6471396684646606, "logps/chosen": -2.0399389266967773, "logps/rejected": -3.294513702392578, "loss": 0.6946, "nll_loss": 0.6519871354103088, "rewards/accuracies": 0.875, "rewards/chosen": -0.20399388670921326, "rewards/margins": 0.12545748054981232, "rewards/rejected": -0.32945138216018677, "step": 4829 }, { "epoch": 13.22381930184805, "grad_norm": 5.081315994262695, "learning_rate": 3.387671232876712e-07, "log_odds_chosen": 2.283493757247925, "log_odds_ratio": -0.1992517113685608, "logits/chosen": 0.6596479415893555, "logits/rejected": 0.62923264503479, "logps/chosen": -2.039787769317627, "logps/rejected": -4.161496162414551, "loss": 0.6137, "nll_loss": 0.593805193901062, "rewards/accuracies": 1.0, "rewards/chosen": -0.2039787769317627, "rewards/margins": 0.21217085421085358, "rewards/rejected": -0.4161496162414551, "step": 4830 }, { "epoch": 13.22655715263518, "grad_norm": 6.45013952255249, "learning_rate": 3.3863013698630134e-07, "log_odds_chosen": 0.35039421916007996, "log_odds_ratio": -0.5729750394821167, "logits/chosen": 0.8046308755874634, "logits/rejected": 0.7903900742530823, "logps/chosen": -2.445871591567993, "logps/rejected": -2.762376070022583, "loss": 0.7342, "nll_loss": 0.6768760085105896, "rewards/accuracies": 0.75, "rewards/chosen": -0.24458715319633484, "rewards/margins": 0.03165045380592346, "rewards/rejected": -0.2762376070022583, "step": 4831 }, { "epoch": 13.229295003422314, "grad_norm": 6.0105814933776855, "learning_rate": 3.384931506849315e-07, "log_odds_chosen": 0.5481719970703125, "log_odds_ratio": -0.634688138961792, "logits/chosen": 0.8416948914527893, "logits/rejected": 0.9141440987586975, "logps/chosen": -2.758883476257324, "logps/rejected": -3.2488272190093994, "loss": 0.6668, "nll_loss": 0.6033044457435608, "rewards/accuracies": 0.75, "rewards/chosen": -0.2758883535861969, "rewards/margins": 0.048994362354278564, "rewards/rejected": -0.32488271594047546, "step": 4832 }, { "epoch": 13.232032854209445, "grad_norm": 5.042108535766602, "learning_rate": 3.3835616438356164e-07, "log_odds_chosen": 3.421721935272217, "log_odds_ratio": -0.16580913960933685, "logits/chosen": 1.1141619682312012, "logits/rejected": 1.1662328243255615, "logps/chosen": -2.2256689071655273, "logps/rejected": -5.495140075683594, "loss": 0.6627, "nll_loss": 0.6461191773414612, "rewards/accuracies": 1.0, "rewards/chosen": -0.2225668877363205, "rewards/margins": 0.3269471526145935, "rewards/rejected": -0.5495139956474304, "step": 4833 }, { "epoch": 13.234770704996578, "grad_norm": 6.552184581756592, "learning_rate": 3.3821917808219174e-07, "log_odds_chosen": 2.9410510063171387, "log_odds_ratio": -0.292157381772995, "logits/chosen": 0.6741356253623962, "logits/rejected": 0.5889524221420288, "logps/chosen": -2.1858129501342773, "logps/rejected": -5.022681713104248, "loss": 0.7427, "nll_loss": 0.7134436368942261, "rewards/accuracies": 1.0, "rewards/chosen": -0.21858128905296326, "rewards/margins": 0.28368690609931946, "rewards/rejected": -0.5022681951522827, "step": 4834 }, { "epoch": 13.23750855578371, "grad_norm": 5.726476192474365, "learning_rate": 3.3808219178082194e-07, "log_odds_chosen": 1.6313714981079102, "log_odds_ratio": -0.37897780537605286, "logits/chosen": 0.5299288630485535, "logits/rejected": 0.5424725413322449, "logps/chosen": -2.4046146869659424, "logps/rejected": -3.94757080078125, "loss": 0.7117, "nll_loss": 0.6737527251243591, "rewards/accuracies": 0.75, "rewards/chosen": -0.24046145379543304, "rewards/margins": 0.1542956531047821, "rewards/rejected": -0.39475709199905396, "step": 4835 }, { "epoch": 13.240246406570842, "grad_norm": 7.8035783767700195, "learning_rate": 3.3794520547945204e-07, "log_odds_chosen": 3.4741463661193848, "log_odds_ratio": -0.21438901126384735, "logits/chosen": 1.0070031881332397, "logits/rejected": 0.9920791387557983, "logps/chosen": -3.7400929927825928, "logps/rejected": -7.142614364624023, "loss": 0.9016, "nll_loss": 0.8801156282424927, "rewards/accuracies": 1.0, "rewards/chosen": -0.37400931119918823, "rewards/margins": 0.3402521014213562, "rewards/rejected": -0.7142614126205444, "step": 4836 }, { "epoch": 13.242984257357975, "grad_norm": 5.249125957489014, "learning_rate": 3.3780821917808214e-07, "log_odds_chosen": 2.2430479526519775, "log_odds_ratio": -0.19590936601161957, "logits/chosen": 0.874778687953949, "logits/rejected": 0.9216553568840027, "logps/chosen": -2.0309839248657227, "logps/rejected": -4.125965118408203, "loss": 0.6734, "nll_loss": 0.6537818908691406, "rewards/accuracies": 1.0, "rewards/chosen": -0.20309840142726898, "rewards/margins": 0.20949813723564148, "rewards/rejected": -0.41259652376174927, "step": 4837 }, { "epoch": 13.245722108145106, "grad_norm": 6.548923492431641, "learning_rate": 3.376712328767123e-07, "log_odds_chosen": 2.488888740539551, "log_odds_ratio": -0.2683975398540497, "logits/chosen": 0.9159259796142578, "logits/rejected": 0.9245074391365051, "logps/chosen": -2.960751533508301, "logps/rejected": -5.361361026763916, "loss": 0.7956, "nll_loss": 0.7687822580337524, "rewards/accuracies": 0.875, "rewards/chosen": -0.29607513546943665, "rewards/margins": 0.2400609701871872, "rewards/rejected": -0.5361360907554626, "step": 4838 }, { "epoch": 13.248459958932239, "grad_norm": 7.470399856567383, "learning_rate": 3.3753424657534245e-07, "log_odds_chosen": 3.093278646469116, "log_odds_ratio": -0.2750774621963501, "logits/chosen": 1.1250925064086914, "logits/rejected": 1.1189217567443848, "logps/chosen": -2.6788692474365234, "logps/rejected": -5.695204734802246, "loss": 0.7061, "nll_loss": 0.6786050796508789, "rewards/accuracies": 0.875, "rewards/chosen": -0.2678869366645813, "rewards/margins": 0.3016335070133209, "rewards/rejected": -0.5695204734802246, "step": 4839 }, { "epoch": 13.25119780971937, "grad_norm": 5.38200044631958, "learning_rate": 3.373972602739726e-07, "log_odds_chosen": 2.298880100250244, "log_odds_ratio": -0.15283720195293427, "logits/chosen": 1.0556674003601074, "logits/rejected": 1.1287821531295776, "logps/chosen": -2.1781158447265625, "logps/rejected": -4.306475639343262, "loss": 0.6239, "nll_loss": 0.6086165904998779, "rewards/accuracies": 1.0, "rewards/chosen": -0.21781159937381744, "rewards/margins": 0.21283596754074097, "rewards/rejected": -0.4306475818157196, "step": 4840 }, { "epoch": 13.253935660506503, "grad_norm": 6.7335052490234375, "learning_rate": 3.372602739726027e-07, "log_odds_chosen": 2.481788396835327, "log_odds_ratio": -0.2516207695007324, "logits/chosen": 0.7470805644989014, "logits/rejected": 0.7434396743774414, "logps/chosen": -2.6853442192077637, "logps/rejected": -5.057253837585449, "loss": 0.7157, "nll_loss": 0.6905497312545776, "rewards/accuracies": 0.875, "rewards/chosen": -0.26853442192077637, "rewards/margins": 0.23719091713428497, "rewards/rejected": -0.5057253241539001, "step": 4841 }, { "epoch": 13.256673511293634, "grad_norm": 5.343530654907227, "learning_rate": 3.371232876712329e-07, "log_odds_chosen": 1.1463576555252075, "log_odds_ratio": -0.3739928901195526, "logits/chosen": 1.146411657333374, "logits/rejected": 1.1395081281661987, "logps/chosen": -2.1727943420410156, "logps/rejected": -3.23758864402771, "loss": 0.6496, "nll_loss": 0.6122433543205261, "rewards/accuracies": 0.875, "rewards/chosen": -0.21727943420410156, "rewards/margins": 0.10647940635681152, "rewards/rejected": -0.3237588405609131, "step": 4842 }, { "epoch": 13.259411362080767, "grad_norm": 4.8253068923950195, "learning_rate": 3.36986301369863e-07, "log_odds_chosen": 3.0191116333007812, "log_odds_ratio": -0.21845398843288422, "logits/chosen": 0.8438523411750793, "logits/rejected": 0.9125286340713501, "logps/chosen": -2.2099661827087402, "logps/rejected": -5.137391090393066, "loss": 0.8024, "nll_loss": 0.7805813550949097, "rewards/accuracies": 1.0, "rewards/chosen": -0.22099661827087402, "rewards/margins": 0.2927425503730774, "rewards/rejected": -0.5137391686439514, "step": 4843 }, { "epoch": 13.262149212867898, "grad_norm": 5.230648994445801, "learning_rate": 3.368493150684931e-07, "log_odds_chosen": 3.269676685333252, "log_odds_ratio": -0.14568525552749634, "logits/chosen": 0.8474076390266418, "logits/rejected": 0.8214202523231506, "logps/chosen": -1.6660573482513428, "logps/rejected": -4.662655353546143, "loss": 0.7604, "nll_loss": 0.7458038330078125, "rewards/accuracies": 1.0, "rewards/chosen": -0.16660574078559875, "rewards/margins": 0.299659788608551, "rewards/rejected": -0.46626555919647217, "step": 4844 }, { "epoch": 13.264887063655031, "grad_norm": 4.275208473205566, "learning_rate": 3.367123287671233e-07, "log_odds_chosen": 3.3515264987945557, "log_odds_ratio": -0.1494414359331131, "logits/chosen": 0.8626422882080078, "logits/rejected": 0.8827004432678223, "logps/chosen": -1.8791868686676025, "logps/rejected": -5.042364120483398, "loss": 0.6443, "nll_loss": 0.6293821334838867, "rewards/accuracies": 1.0, "rewards/chosen": -0.18791869282722473, "rewards/margins": 0.31631773710250854, "rewards/rejected": -0.5042364001274109, "step": 4845 }, { "epoch": 13.267624914442163, "grad_norm": 5.73556661605835, "learning_rate": 3.365753424657534e-07, "log_odds_chosen": 1.2023346424102783, "log_odds_ratio": -0.474971741437912, "logits/chosen": 1.018921136856079, "logits/rejected": 1.0236660242080688, "logps/chosen": -3.0694289207458496, "logps/rejected": -4.183727264404297, "loss": 0.6869, "nll_loss": 0.6393963098526001, "rewards/accuracies": 0.75, "rewards/chosen": -0.306942880153656, "rewards/margins": 0.11142988502979279, "rewards/rejected": -0.4183727502822876, "step": 4846 }, { "epoch": 13.270362765229295, "grad_norm": 5.145096778869629, "learning_rate": 3.3643835616438356e-07, "log_odds_chosen": 1.4135196208953857, "log_odds_ratio": -0.3367282450199127, "logits/chosen": 0.698056697845459, "logits/rejected": 0.735248327255249, "logps/chosen": -2.349452495574951, "logps/rejected": -3.683131217956543, "loss": 0.7106, "nll_loss": 0.6769691705703735, "rewards/accuracies": 0.875, "rewards/chosen": -0.23494523763656616, "rewards/margins": 0.13336791098117828, "rewards/rejected": -0.36831313371658325, "step": 4847 }, { "epoch": 13.273100616016427, "grad_norm": 4.862466335296631, "learning_rate": 3.3630136986301366e-07, "log_odds_chosen": 2.462405204772949, "log_odds_ratio": -0.21867981553077698, "logits/chosen": 0.7603460550308228, "logits/rejected": 0.8201457858085632, "logps/chosen": -2.4104630947113037, "logps/rejected": -4.713696002960205, "loss": 0.7611, "nll_loss": 0.7391852140426636, "rewards/accuracies": 1.0, "rewards/chosen": -0.24104630947113037, "rewards/margins": 0.23032326996326447, "rewards/rejected": -0.47136959433555603, "step": 4848 }, { "epoch": 13.27583846680356, "grad_norm": 5.7318196296691895, "learning_rate": 3.3616438356164386e-07, "log_odds_chosen": 1.9322068691253662, "log_odds_ratio": -0.24553319811820984, "logits/chosen": 0.9833452701568604, "logits/rejected": 0.9978066086769104, "logps/chosen": -2.2799019813537598, "logps/rejected": -4.097405910491943, "loss": 0.5845, "nll_loss": 0.5599339008331299, "rewards/accuracies": 1.0, "rewards/chosen": -0.22799018025398254, "rewards/margins": 0.18175041675567627, "rewards/rejected": -0.4097405970096588, "step": 4849 }, { "epoch": 13.27857631759069, "grad_norm": 5.655622482299805, "learning_rate": 3.3602739726027396e-07, "log_odds_chosen": 1.2776858806610107, "log_odds_ratio": -0.37531161308288574, "logits/chosen": 0.6303423643112183, "logits/rejected": 0.6179483532905579, "logps/chosen": -2.090223789215088, "logps/rejected": -3.2969956398010254, "loss": 0.6045, "nll_loss": 0.566951334476471, "rewards/accuracies": 0.875, "rewards/chosen": -0.2090224027633667, "rewards/margins": 0.1206771731376648, "rewards/rejected": -0.3296995759010315, "step": 4850 }, { "epoch": 13.281314168377824, "grad_norm": 5.2629852294921875, "learning_rate": 3.3589041095890406e-07, "log_odds_chosen": 2.8620262145996094, "log_odds_ratio": -0.35897591710090637, "logits/chosen": 0.6455891132354736, "logits/rejected": 0.6843904852867126, "logps/chosen": -2.0670528411865234, "logps/rejected": -4.809942245483398, "loss": 0.6631, "nll_loss": 0.6272017955780029, "rewards/accuracies": 0.875, "rewards/chosen": -0.20670530200004578, "rewards/margins": 0.27428892254829407, "rewards/rejected": -0.48099422454833984, "step": 4851 }, { "epoch": 13.284052019164955, "grad_norm": 7.295691967010498, "learning_rate": 3.3575342465753426e-07, "log_odds_chosen": 1.5259398221969604, "log_odds_ratio": -0.5623388886451721, "logits/chosen": 0.9490694403648376, "logits/rejected": 0.9335137605667114, "logps/chosen": -2.396888494491577, "logps/rejected": -3.8111422061920166, "loss": 0.8051, "nll_loss": 0.7489156723022461, "rewards/accuracies": 0.75, "rewards/chosen": -0.23968884348869324, "rewards/margins": 0.14142538607120514, "rewards/rejected": -0.38111424446105957, "step": 4852 }, { "epoch": 13.286789869952088, "grad_norm": 6.2276716232299805, "learning_rate": 3.3561643835616436e-07, "log_odds_chosen": 2.2896995544433594, "log_odds_ratio": -0.23323409259319305, "logits/chosen": 1.0323266983032227, "logits/rejected": 1.1265738010406494, "logps/chosen": -3.014106273651123, "logps/rejected": -5.228731632232666, "loss": 0.6432, "nll_loss": 0.6198388338088989, "rewards/accuracies": 0.875, "rewards/chosen": -0.30141061544418335, "rewards/margins": 0.22146253287792206, "rewards/rejected": -0.5228731632232666, "step": 4853 }, { "epoch": 13.289527720739219, "grad_norm": 7.019647121429443, "learning_rate": 3.354794520547945e-07, "log_odds_chosen": 3.6375787258148193, "log_odds_ratio": -0.18891602754592896, "logits/chosen": 1.082213044166565, "logits/rejected": 1.1374976634979248, "logps/chosen": -2.6530656814575195, "logps/rejected": -6.185135841369629, "loss": 0.7939, "nll_loss": 0.7749743461608887, "rewards/accuracies": 0.875, "rewards/chosen": -0.26530659198760986, "rewards/margins": 0.3532070517539978, "rewards/rejected": -0.6185135841369629, "step": 4854 }, { "epoch": 13.292265571526352, "grad_norm": 5.528489589691162, "learning_rate": 3.353424657534246e-07, "log_odds_chosen": 3.0330777168273926, "log_odds_ratio": -0.22408664226531982, "logits/chosen": 0.7919991612434387, "logits/rejected": 0.8368430733680725, "logps/chosen": -2.3753600120544434, "logps/rejected": -5.299752235412598, "loss": 0.6912, "nll_loss": 0.6687670350074768, "rewards/accuracies": 1.0, "rewards/chosen": -0.2375360131263733, "rewards/margins": 0.2924392521381378, "rewards/rejected": -0.5299752950668335, "step": 4855 }, { "epoch": 13.295003422313483, "grad_norm": 5.435190200805664, "learning_rate": 3.352054794520548e-07, "log_odds_chosen": 3.6961615085601807, "log_odds_ratio": -0.11994633823633194, "logits/chosen": 0.9792293310165405, "logits/rejected": 0.9607248306274414, "logps/chosen": -1.815140962600708, "logps/rejected": -5.332755088806152, "loss": 0.692, "nll_loss": 0.6799881458282471, "rewards/accuracies": 1.0, "rewards/chosen": -0.18151411414146423, "rewards/margins": 0.3517614006996155, "rewards/rejected": -0.5332754850387573, "step": 4856 }, { "epoch": 13.297741273100616, "grad_norm": 5.535460472106934, "learning_rate": 3.350684931506849e-07, "log_odds_chosen": 4.343334197998047, "log_odds_ratio": -0.029317690059542656, "logits/chosen": 1.0302538871765137, "logits/rejected": 1.1219451427459717, "logps/chosen": -2.458806037902832, "logps/rejected": -6.618339538574219, "loss": 0.8083, "nll_loss": 0.8054176568984985, "rewards/accuracies": 1.0, "rewards/chosen": -0.2458806037902832, "rewards/margins": 0.4159533381462097, "rewards/rejected": -0.6618339419364929, "step": 4857 }, { "epoch": 13.300479123887747, "grad_norm": 6.979022026062012, "learning_rate": 3.34931506849315e-07, "log_odds_chosen": 1.5801652669906616, "log_odds_ratio": -0.3937951624393463, "logits/chosen": 0.8719134330749512, "logits/rejected": 0.8826443552970886, "logps/chosen": -2.394925355911255, "logps/rejected": -3.88508939743042, "loss": 0.7305, "nll_loss": 0.6910934448242188, "rewards/accuracies": 0.875, "rewards/chosen": -0.2394925355911255, "rewards/margins": 0.14901642501354218, "rewards/rejected": -0.38850894570350647, "step": 4858 }, { "epoch": 13.30321697467488, "grad_norm": 4.787155628204346, "learning_rate": 3.347945205479452e-07, "log_odds_chosen": 2.0378527641296387, "log_odds_ratio": -0.2165507972240448, "logits/chosen": 1.1307435035705566, "logits/rejected": 1.186744213104248, "logps/chosen": -2.4192819595336914, "logps/rejected": -4.293766975402832, "loss": 0.6126, "nll_loss": 0.5909731984138489, "rewards/accuracies": 1.0, "rewards/chosen": -0.24192821979522705, "rewards/margins": 0.18744851648807526, "rewards/rejected": -0.4293767213821411, "step": 4859 }, { "epoch": 13.305954825462011, "grad_norm": 5.267174243927002, "learning_rate": 3.346575342465753e-07, "log_odds_chosen": 1.7849620580673218, "log_odds_ratio": -0.279822438955307, "logits/chosen": 0.7911041975021362, "logits/rejected": 0.7128261923789978, "logps/chosen": -2.08686900138855, "logps/rejected": -3.73675537109375, "loss": 0.728, "nll_loss": 0.700046718120575, "rewards/accuracies": 0.875, "rewards/chosen": -0.2086869180202484, "rewards/margins": 0.16498863697052002, "rewards/rejected": -0.37367552518844604, "step": 4860 }, { "epoch": 13.308692676249144, "grad_norm": 5.151487350463867, "learning_rate": 3.345205479452055e-07, "log_odds_chosen": 2.3097753524780273, "log_odds_ratio": -0.21289241313934326, "logits/chosen": 1.1143001317977905, "logits/rejected": 1.141536831855774, "logps/chosen": -2.30151629447937, "logps/rejected": -4.526742935180664, "loss": 0.6481, "nll_loss": 0.626807451248169, "rewards/accuracies": 0.875, "rewards/chosen": -0.23015162348747253, "rewards/margins": 0.22252264618873596, "rewards/rejected": -0.4526742696762085, "step": 4861 }, { "epoch": 13.311430527036277, "grad_norm": 6.059231758117676, "learning_rate": 3.343835616438356e-07, "log_odds_chosen": 2.853619337081909, "log_odds_ratio": -0.26518791913986206, "logits/chosen": 0.8864511847496033, "logits/rejected": 0.9553533792495728, "logps/chosen": -2.420668125152588, "logps/rejected": -5.126539707183838, "loss": 0.6779, "nll_loss": 0.6513857841491699, "rewards/accuracies": 0.875, "rewards/chosen": -0.24206683039665222, "rewards/margins": 0.27058714628219604, "rewards/rejected": -0.5126539468765259, "step": 4862 }, { "epoch": 13.314168377823409, "grad_norm": 7.060525417327881, "learning_rate": 3.342465753424658e-07, "log_odds_chosen": 3.471433162689209, "log_odds_ratio": -0.38134026527404785, "logits/chosen": 1.0028162002563477, "logits/rejected": 1.0157448053359985, "logps/chosen": -2.661119222640991, "logps/rejected": -6.026029586791992, "loss": 0.7868, "nll_loss": 0.7487013339996338, "rewards/accuracies": 0.875, "rewards/chosen": -0.26611194014549255, "rewards/margins": 0.33649104833602905, "rewards/rejected": -0.602603018283844, "step": 4863 }, { "epoch": 13.316906228610542, "grad_norm": 6.57725191116333, "learning_rate": 3.341095890410959e-07, "log_odds_chosen": 2.563284397125244, "log_odds_ratio": -0.2866622507572174, "logits/chosen": 0.9568719863891602, "logits/rejected": 0.934932291507721, "logps/chosen": -2.3908610343933105, "logps/rejected": -4.855958938598633, "loss": 0.805, "nll_loss": 0.7763333320617676, "rewards/accuracies": 1.0, "rewards/chosen": -0.2390861064195633, "rewards/margins": 0.24650979042053223, "rewards/rejected": -0.4855958819389343, "step": 4864 }, { "epoch": 13.319644079397673, "grad_norm": 6.157006740570068, "learning_rate": 3.33972602739726e-07, "log_odds_chosen": 1.1423779726028442, "log_odds_ratio": -0.5363478064537048, "logits/chosen": 0.8789008259773254, "logits/rejected": 0.8374574184417725, "logps/chosen": -2.9898524284362793, "logps/rejected": -4.065550804138184, "loss": 0.8625, "nll_loss": 0.80882328748703, "rewards/accuracies": 0.75, "rewards/chosen": -0.29898524284362793, "rewards/margins": 0.10756983608007431, "rewards/rejected": -0.4065551161766052, "step": 4865 }, { "epoch": 13.322381930184806, "grad_norm": 6.227217197418213, "learning_rate": 3.338356164383562e-07, "log_odds_chosen": 1.0677893161773682, "log_odds_ratio": -0.5537356734275818, "logits/chosen": 0.8253211975097656, "logits/rejected": 0.8849424719810486, "logps/chosen": -2.4814653396606445, "logps/rejected": -3.477365493774414, "loss": 0.7147, "nll_loss": 0.6593315005302429, "rewards/accuracies": 0.75, "rewards/chosen": -0.24814654886722565, "rewards/margins": 0.09959003329277039, "rewards/rejected": -0.34773653745651245, "step": 4866 }, { "epoch": 13.325119780971937, "grad_norm": 6.616859436035156, "learning_rate": 3.336986301369863e-07, "log_odds_chosen": 0.6956597566604614, "log_odds_ratio": -0.50022292137146, "logits/chosen": 0.804617702960968, "logits/rejected": 0.845042884349823, "logps/chosen": -3.137160301208496, "logps/rejected": -3.7933390140533447, "loss": 0.7421, "nll_loss": 0.6921273469924927, "rewards/accuracies": 0.75, "rewards/chosen": -0.3137160539627075, "rewards/margins": 0.0656178817152977, "rewards/rejected": -0.3793339133262634, "step": 4867 }, { "epoch": 13.32785763175907, "grad_norm": 4.599228382110596, "learning_rate": 3.3356164383561643e-07, "log_odds_chosen": 2.5989179611206055, "log_odds_ratio": -0.2230910211801529, "logits/chosen": 0.711184561252594, "logits/rejected": 0.7364316582679749, "logps/chosen": -1.5180566310882568, "logps/rejected": -3.876155138015747, "loss": 0.6222, "nll_loss": 0.5999310612678528, "rewards/accuracies": 1.0, "rewards/chosen": -0.15180566906929016, "rewards/margins": 0.2358098328113556, "rewards/rejected": -0.38761553168296814, "step": 4868 }, { "epoch": 13.330595482546201, "grad_norm": 5.098674774169922, "learning_rate": 3.3342465753424653e-07, "log_odds_chosen": 2.085336446762085, "log_odds_ratio": -0.2209983468055725, "logits/chosen": 0.6291464567184448, "logits/rejected": 0.6436858177185059, "logps/chosen": -1.745339274406433, "logps/rejected": -3.627807378768921, "loss": 0.5754, "nll_loss": 0.5533077716827393, "rewards/accuracies": 1.0, "rewards/chosen": -0.1745339184999466, "rewards/margins": 0.18824680149555206, "rewards/rejected": -0.36278074979782104, "step": 4869 }, { "epoch": 13.333333333333334, "grad_norm": 7.207891941070557, "learning_rate": 3.332876712328767e-07, "log_odds_chosen": 2.1594643592834473, "log_odds_ratio": -0.4613600969314575, "logits/chosen": 0.82229083776474, "logits/rejected": 0.8326465487480164, "logps/chosen": -2.643423080444336, "logps/rejected": -4.7567138671875, "loss": 0.6993, "nll_loss": 0.6531198024749756, "rewards/accuracies": 0.75, "rewards/chosen": -0.264342337846756, "rewards/margins": 0.21132907271385193, "rewards/rejected": -0.4756714105606079, "step": 4870 }, { "epoch": 13.336071184120465, "grad_norm": 4.654568195343018, "learning_rate": 3.3315068493150684e-07, "log_odds_chosen": 2.74021053314209, "log_odds_ratio": -0.14152273535728455, "logits/chosen": 1.0443848371505737, "logits/rejected": 1.0294960737228394, "logps/chosen": -2.5236752033233643, "logps/rejected": -5.135058879852295, "loss": 0.6394, "nll_loss": 0.6252095103263855, "rewards/accuracies": 1.0, "rewards/chosen": -0.2523675262928009, "rewards/margins": 0.261138379573822, "rewards/rejected": -0.5135059356689453, "step": 4871 }, { "epoch": 13.338809034907598, "grad_norm": 5.639859199523926, "learning_rate": 3.3301369863013694e-07, "log_odds_chosen": 1.2623015642166138, "log_odds_ratio": -0.3910832405090332, "logits/chosen": 0.7425755262374878, "logits/rejected": 0.7677242159843445, "logps/chosen": -2.0194358825683594, "logps/rejected": -3.183393955230713, "loss": 0.6016, "nll_loss": 0.5624425411224365, "rewards/accuracies": 0.875, "rewards/chosen": -0.20194359123706818, "rewards/margins": 0.11639580875635147, "rewards/rejected": -0.31833943724632263, "step": 4872 }, { "epoch": 13.34154688569473, "grad_norm": 4.785380840301514, "learning_rate": 3.3287671232876714e-07, "log_odds_chosen": 1.7987914085388184, "log_odds_ratio": -0.21272294223308563, "logits/chosen": 0.861579954624176, "logits/rejected": 0.8792352080345154, "logps/chosen": -2.1075901985168457, "logps/rejected": -3.789750099182129, "loss": 0.6604, "nll_loss": 0.6390866637229919, "rewards/accuracies": 1.0, "rewards/chosen": -0.2107590138912201, "rewards/margins": 0.1682160198688507, "rewards/rejected": -0.3789750337600708, "step": 4873 }, { "epoch": 13.344284736481862, "grad_norm": 5.231635093688965, "learning_rate": 3.3273972602739724e-07, "log_odds_chosen": 2.397676944732666, "log_odds_ratio": -0.2931136190891266, "logits/chosen": 0.6030806303024292, "logits/rejected": 0.6517754793167114, "logps/chosen": -1.4816029071807861, "logps/rejected": -3.6962685585021973, "loss": 0.6019, "nll_loss": 0.5725888609886169, "rewards/accuracies": 0.875, "rewards/chosen": -0.14816029369831085, "rewards/margins": 0.2214665561914444, "rewards/rejected": -0.36962684988975525, "step": 4874 }, { "epoch": 13.347022587268993, "grad_norm": 4.328283309936523, "learning_rate": 3.326027397260274e-07, "log_odds_chosen": 2.806790828704834, "log_odds_ratio": -0.15517596900463104, "logits/chosen": 0.9567019939422607, "logits/rejected": 1.0036497116088867, "logps/chosen": -1.990891695022583, "logps/rejected": -4.623828411102295, "loss": 0.5701, "nll_loss": 0.5546289682388306, "rewards/accuracies": 1.0, "rewards/chosen": -0.1990891546010971, "rewards/margins": 0.26329371333122253, "rewards/rejected": -0.46238285303115845, "step": 4875 }, { "epoch": 13.349760438056126, "grad_norm": 6.720550060272217, "learning_rate": 3.3246575342465754e-07, "log_odds_chosen": 1.1124863624572754, "log_odds_ratio": -0.4132406413555145, "logits/chosen": 0.980915904045105, "logits/rejected": 0.9526224136352539, "logps/chosen": -3.218003273010254, "logps/rejected": -4.249444961547852, "loss": 0.7298, "nll_loss": 0.6884575486183167, "rewards/accuracies": 0.75, "rewards/chosen": -0.3218003511428833, "rewards/margins": 0.10314414650201797, "rewards/rejected": -0.42494451999664307, "step": 4876 }, { "epoch": 13.352498288843258, "grad_norm": 5.2004523277282715, "learning_rate": 3.3232876712328764e-07, "log_odds_chosen": 2.0908443927764893, "log_odds_ratio": -0.194726824760437, "logits/chosen": 0.8808444738388062, "logits/rejected": 0.8171460032463074, "logps/chosen": -2.474029064178467, "logps/rejected": -4.441766262054443, "loss": 0.7882, "nll_loss": 0.7687460780143738, "rewards/accuracies": 1.0, "rewards/chosen": -0.24740290641784668, "rewards/margins": 0.1967737227678299, "rewards/rejected": -0.44417664408683777, "step": 4877 }, { "epoch": 13.35523613963039, "grad_norm": 5.534481048583984, "learning_rate": 3.321917808219178e-07, "log_odds_chosen": 2.4502205848693848, "log_odds_ratio": -0.1707744300365448, "logits/chosen": 0.7008254528045654, "logits/rejected": 0.722466230392456, "logps/chosen": -2.0413010120391846, "logps/rejected": -4.3086347579956055, "loss": 0.6334, "nll_loss": 0.6162739992141724, "rewards/accuracies": 1.0, "rewards/chosen": -0.2041301131248474, "rewards/margins": 0.22673338651657104, "rewards/rejected": -0.43086349964141846, "step": 4878 }, { "epoch": 13.357973990417522, "grad_norm": 5.665250778198242, "learning_rate": 3.320547945205479e-07, "log_odds_chosen": 2.6439528465270996, "log_odds_ratio": -0.33973655104637146, "logits/chosen": 0.5212438702583313, "logits/rejected": 0.6239335536956787, "logps/chosen": -2.176119804382324, "logps/rejected": -4.736202716827393, "loss": 0.6538, "nll_loss": 0.6198552846908569, "rewards/accuracies": 0.875, "rewards/chosen": -0.21761196851730347, "rewards/margins": 0.2560082972049713, "rewards/rejected": -0.47362029552459717, "step": 4879 }, { "epoch": 13.360711841204655, "grad_norm": 7.70318078994751, "learning_rate": 3.319178082191781e-07, "log_odds_chosen": 1.5327211618423462, "log_odds_ratio": -0.329830139875412, "logits/chosen": 1.1183018684387207, "logits/rejected": 1.0714479684829712, "logps/chosen": -2.885256052017212, "logps/rejected": -4.358809471130371, "loss": 0.789, "nll_loss": 0.7560302019119263, "rewards/accuracies": 1.0, "rewards/chosen": -0.28852561116218567, "rewards/margins": 0.1473553627729416, "rewards/rejected": -0.43588095903396606, "step": 4880 }, { "epoch": 13.363449691991786, "grad_norm": 5.716253757476807, "learning_rate": 3.317808219178082e-07, "log_odds_chosen": 1.5014183521270752, "log_odds_ratio": -0.47847780585289, "logits/chosen": 0.7015912532806396, "logits/rejected": 0.768260657787323, "logps/chosen": -2.983226776123047, "logps/rejected": -4.428876876831055, "loss": 0.8498, "nll_loss": 0.8019837141036987, "rewards/accuracies": 0.75, "rewards/chosen": -0.2983226776123047, "rewards/margins": 0.14456504583358765, "rewards/rejected": -0.44288772344589233, "step": 4881 }, { "epoch": 13.366187542778919, "grad_norm": 5.312529563903809, "learning_rate": 3.3164383561643835e-07, "log_odds_chosen": 1.7788662910461426, "log_odds_ratio": -0.31151700019836426, "logits/chosen": 0.7658401727676392, "logits/rejected": 0.680426836013794, "logps/chosen": -3.0066421031951904, "logps/rejected": -4.709710597991943, "loss": 0.8099, "nll_loss": 0.7787332534790039, "rewards/accuracies": 0.875, "rewards/chosen": -0.30066418647766113, "rewards/margins": 0.17030684649944305, "rewards/rejected": -0.4709710478782654, "step": 4882 }, { "epoch": 13.36892539356605, "grad_norm": 5.2537007331848145, "learning_rate": 3.315068493150685e-07, "log_odds_chosen": 1.7872788906097412, "log_odds_ratio": -0.25163739919662476, "logits/chosen": 0.7748504877090454, "logits/rejected": 0.7295078039169312, "logps/chosen": -2.5062167644500732, "logps/rejected": -4.173774242401123, "loss": 0.6455, "nll_loss": 0.6203548312187195, "rewards/accuracies": 1.0, "rewards/chosen": -0.2506216764450073, "rewards/margins": 0.16675575077533722, "rewards/rejected": -0.41737741231918335, "step": 4883 }, { "epoch": 13.371663244353183, "grad_norm": 6.102927207946777, "learning_rate": 3.313698630136986e-07, "log_odds_chosen": 2.1788978576660156, "log_odds_ratio": -0.323098361492157, "logits/chosen": 0.7008056640625, "logits/rejected": 0.6582680940628052, "logps/chosen": -2.6976828575134277, "logps/rejected": -4.819647789001465, "loss": 0.8627, "nll_loss": 0.8303619623184204, "rewards/accuracies": 0.875, "rewards/chosen": -0.26976829767227173, "rewards/margins": 0.2121964991092682, "rewards/rejected": -0.4819648265838623, "step": 4884 }, { "epoch": 13.374401095140314, "grad_norm": 5.673065185546875, "learning_rate": 3.3123287671232875e-07, "log_odds_chosen": 1.650693416595459, "log_odds_ratio": -0.29618972539901733, "logits/chosen": 0.9834587574005127, "logits/rejected": 0.944417417049408, "logps/chosen": -2.0394341945648193, "logps/rejected": -3.5953774452209473, "loss": 0.7099, "nll_loss": 0.680290162563324, "rewards/accuracies": 1.0, "rewards/chosen": -0.2039434164762497, "rewards/margins": 0.1555943489074707, "rewards/rejected": -0.3595377802848816, "step": 4885 }, { "epoch": 13.377138945927447, "grad_norm": 5.357364177703857, "learning_rate": 3.3109589041095885e-07, "log_odds_chosen": 1.3596887588500977, "log_odds_ratio": -0.2978418171405792, "logits/chosen": 0.7760306000709534, "logits/rejected": 0.7817909121513367, "logps/chosen": -2.309574604034424, "logps/rejected": -3.6084489822387695, "loss": 0.7029, "nll_loss": 0.6731041073799133, "rewards/accuracies": 0.875, "rewards/chosen": -0.23095744848251343, "rewards/margins": 0.1298874467611313, "rewards/rejected": -0.3608449101448059, "step": 4886 }, { "epoch": 13.37987679671458, "grad_norm": 4.751715183258057, "learning_rate": 3.3095890410958906e-07, "log_odds_chosen": 3.931215524673462, "log_odds_ratio": -0.06694361567497253, "logits/chosen": 0.874711811542511, "logits/rejected": 0.8998421430587769, "logps/chosen": -2.413165330886841, "logps/rejected": -6.158700942993164, "loss": 0.6097, "nll_loss": 0.6030037999153137, "rewards/accuracies": 1.0, "rewards/chosen": -0.241316556930542, "rewards/margins": 0.3745536506175995, "rewards/rejected": -0.6158701777458191, "step": 4887 }, { "epoch": 13.382614647501711, "grad_norm": 6.059805393218994, "learning_rate": 3.3082191780821916e-07, "log_odds_chosen": 2.819218635559082, "log_odds_ratio": -0.28635698556900024, "logits/chosen": 0.9655083417892456, "logits/rejected": 0.9944053888320923, "logps/chosen": -2.710759401321411, "logps/rejected": -5.454126358032227, "loss": 0.7739, "nll_loss": 0.7452723383903503, "rewards/accuracies": 0.875, "rewards/chosen": -0.27107593417167664, "rewards/margins": 0.27433669567108154, "rewards/rejected": -0.5454126596450806, "step": 4888 }, { "epoch": 13.385352498288844, "grad_norm": 5.499622821807861, "learning_rate": 3.306849315068493e-07, "log_odds_chosen": 3.2090163230895996, "log_odds_ratio": -0.11707541346549988, "logits/chosen": 0.9563206434249878, "logits/rejected": 1.012718915939331, "logps/chosen": -2.4306750297546387, "logps/rejected": -5.489146709442139, "loss": 0.6822, "nll_loss": 0.6704561710357666, "rewards/accuracies": 1.0, "rewards/chosen": -0.24306748807430267, "rewards/margins": 0.3058471977710724, "rewards/rejected": -0.5489147305488586, "step": 4889 }, { "epoch": 13.388090349075975, "grad_norm": 5.274240970611572, "learning_rate": 3.3054794520547946e-07, "log_odds_chosen": 2.0202126502990723, "log_odds_ratio": -0.3208807706832886, "logits/chosen": 0.9803473353385925, "logits/rejected": 1.0515398979187012, "logps/chosen": -2.443472146987915, "logps/rejected": -4.3992156982421875, "loss": 0.7088, "nll_loss": 0.676708996295929, "rewards/accuracies": 0.75, "rewards/chosen": -0.2443472295999527, "rewards/margins": 0.1955743134021759, "rewards/rejected": -0.4399215579032898, "step": 4890 }, { "epoch": 13.390828199863108, "grad_norm": 5.557748794555664, "learning_rate": 3.3041095890410956e-07, "log_odds_chosen": 1.9264404773712158, "log_odds_ratio": -0.34446489810943604, "logits/chosen": 0.8191383481025696, "logits/rejected": 0.8413667678833008, "logps/chosen": -2.7643399238586426, "logps/rejected": -4.577191352844238, "loss": 0.705, "nll_loss": 0.6706016659736633, "rewards/accuracies": 0.875, "rewards/chosen": -0.2764340043067932, "rewards/margins": 0.18128514289855957, "rewards/rejected": -0.4577191472053528, "step": 4891 }, { "epoch": 13.39356605065024, "grad_norm": 4.734990119934082, "learning_rate": 3.302739726027397e-07, "log_odds_chosen": 3.1035261154174805, "log_odds_ratio": -0.23400326073169708, "logits/chosen": 0.7741048336029053, "logits/rejected": 0.7937819957733154, "logps/chosen": -2.231628894805908, "logps/rejected": -5.196167945861816, "loss": 0.758, "nll_loss": 0.7346117496490479, "rewards/accuracies": 0.875, "rewards/chosen": -0.22316290438175201, "rewards/margins": 0.29645389318466187, "rewards/rejected": -0.5196167826652527, "step": 4892 }, { "epoch": 13.396303901437372, "grad_norm": 5.158606052398682, "learning_rate": 3.301369863013698e-07, "log_odds_chosen": 3.300955295562744, "log_odds_ratio": -0.08575510233640671, "logits/chosen": 0.9299171566963196, "logits/rejected": 1.045752763748169, "logps/chosen": -2.1918957233428955, "logps/rejected": -5.303863525390625, "loss": 0.778, "nll_loss": 0.7693817615509033, "rewards/accuracies": 1.0, "rewards/chosen": -0.21918955445289612, "rewards/margins": 0.31119677424430847, "rewards/rejected": -0.5303863286972046, "step": 4893 }, { "epoch": 13.399041752224504, "grad_norm": 9.62338638305664, "learning_rate": 3.3e-07, "log_odds_chosen": 0.671914279460907, "log_odds_ratio": -0.7806535959243774, "logits/chosen": 0.8155663013458252, "logits/rejected": 0.8098505139350891, "logps/chosen": -3.720498561859131, "logps/rejected": -4.352211952209473, "loss": 0.8542, "nll_loss": 0.7760857939720154, "rewards/accuracies": 0.75, "rewards/chosen": -0.37204986810684204, "rewards/margins": 0.06317135691642761, "rewards/rejected": -0.43522122502326965, "step": 4894 }, { "epoch": 13.401779603011637, "grad_norm": 6.350490570068359, "learning_rate": 3.298630136986301e-07, "log_odds_chosen": 2.0140457153320312, "log_odds_ratio": -0.31056517362594604, "logits/chosen": 0.5340325236320496, "logits/rejected": 0.5307731628417969, "logps/chosen": -2.0740554332733154, "logps/rejected": -3.966752529144287, "loss": 0.6875, "nll_loss": 0.6563974022865295, "rewards/accuracies": 0.875, "rewards/chosen": -0.20740553736686707, "rewards/margins": 0.18926972150802612, "rewards/rejected": -0.3966752588748932, "step": 4895 }, { "epoch": 13.404517453798768, "grad_norm": 5.628235816955566, "learning_rate": 3.2972602739726027e-07, "log_odds_chosen": 1.774655818939209, "log_odds_ratio": -0.292798787355423, "logits/chosen": 0.8709257245063782, "logits/rejected": 0.851064920425415, "logps/chosen": -1.6049280166625977, "logps/rejected": -3.2474801540374756, "loss": 0.6243, "nll_loss": 0.5949745178222656, "rewards/accuracies": 1.0, "rewards/chosen": -0.16049280762672424, "rewards/margins": 0.16425520181655884, "rewards/rejected": -0.3247480094432831, "step": 4896 }, { "epoch": 13.4072553045859, "grad_norm": 6.005814075469971, "learning_rate": 3.295890410958904e-07, "log_odds_chosen": 1.8519600629806519, "log_odds_ratio": -0.4759851396083832, "logits/chosen": 1.152475357055664, "logits/rejected": 1.1558070182800293, "logps/chosen": -2.290653944015503, "logps/rejected": -4.0479960441589355, "loss": 0.6875, "nll_loss": 0.6398589015007019, "rewards/accuracies": 0.625, "rewards/chosen": -0.22906538844108582, "rewards/margins": 0.17573422193527222, "rewards/rejected": -0.40479961037635803, "step": 4897 }, { "epoch": 13.409993155373032, "grad_norm": 5.096158027648926, "learning_rate": 3.294520547945205e-07, "log_odds_chosen": 4.950582504272461, "log_odds_ratio": -0.09882130473852158, "logits/chosen": 0.8808086514472961, "logits/rejected": 0.9317456483840942, "logps/chosen": -2.2714502811431885, "logps/rejected": -7.109386444091797, "loss": 0.6765, "nll_loss": 0.6665685772895813, "rewards/accuracies": 1.0, "rewards/chosen": -0.2271450161933899, "rewards/margins": 0.4837936758995056, "rewards/rejected": -0.7109386920928955, "step": 4898 }, { "epoch": 13.412731006160165, "grad_norm": 10.186278343200684, "learning_rate": 3.2931506849315067e-07, "log_odds_chosen": 0.6790962815284729, "log_odds_ratio": -0.8122308850288391, "logits/chosen": 0.7874770164489746, "logits/rejected": 0.7442749738693237, "logps/chosen": -2.7042036056518555, "logps/rejected": -3.306919574737549, "loss": 0.7461, "nll_loss": 0.6648999452590942, "rewards/accuracies": 0.625, "rewards/chosen": -0.2704203426837921, "rewards/margins": 0.06027162820100784, "rewards/rejected": -0.33069199323654175, "step": 4899 }, { "epoch": 13.415468856947296, "grad_norm": 5.225533485412598, "learning_rate": 3.2917808219178077e-07, "log_odds_chosen": 3.9100050926208496, "log_odds_ratio": -0.10942388325929642, "logits/chosen": 0.9599028825759888, "logits/rejected": 1.0001040697097778, "logps/chosen": -1.8827309608459473, "logps/rejected": -5.551477432250977, "loss": 0.6532, "nll_loss": 0.6422327756881714, "rewards/accuracies": 1.0, "rewards/chosen": -0.1882731169462204, "rewards/margins": 0.36687466502189636, "rewards/rejected": -0.5551477670669556, "step": 4900 }, { "epoch": 13.418206707734429, "grad_norm": 5.697085380554199, "learning_rate": 3.29041095890411e-07, "log_odds_chosen": 2.4782371520996094, "log_odds_ratio": -0.16370604932308197, "logits/chosen": 0.7291540503501892, "logits/rejected": 0.7020116448402405, "logps/chosen": -1.4282052516937256, "logps/rejected": -3.6441428661346436, "loss": 0.5921, "nll_loss": 0.575760006904602, "rewards/accuracies": 1.0, "rewards/chosen": -0.14282052218914032, "rewards/margins": 0.22159379720687866, "rewards/rejected": -0.3644143044948578, "step": 4901 }, { "epoch": 13.42094455852156, "grad_norm": 5.84132719039917, "learning_rate": 3.289041095890411e-07, "log_odds_chosen": 2.827465295791626, "log_odds_ratio": -0.39536523818969727, "logits/chosen": 0.7941818237304688, "logits/rejected": 0.8022785782814026, "logps/chosen": -1.7862515449523926, "logps/rejected": -4.540886878967285, "loss": 0.7191, "nll_loss": 0.6795998811721802, "rewards/accuracies": 0.75, "rewards/chosen": -0.1786251664161682, "rewards/margins": 0.2754635214805603, "rewards/rejected": -0.4540886878967285, "step": 4902 }, { "epoch": 13.423682409308693, "grad_norm": 6.493135452270508, "learning_rate": 3.287671232876712e-07, "log_odds_chosen": 2.3860793113708496, "log_odds_ratio": -0.2781432867050171, "logits/chosen": 0.7433367371559143, "logits/rejected": 0.7284103035926819, "logps/chosen": -2.4009757041931152, "logps/rejected": -4.6893792152404785, "loss": 0.729, "nll_loss": 0.701181948184967, "rewards/accuracies": 0.875, "rewards/chosen": -0.24009758234024048, "rewards/margins": 0.22884033620357513, "rewards/rejected": -0.4689379334449768, "step": 4903 }, { "epoch": 13.426420260095824, "grad_norm": 9.447375297546387, "learning_rate": 3.286301369863014e-07, "log_odds_chosen": 3.6928539276123047, "log_odds_ratio": -0.0661255493760109, "logits/chosen": 0.7987778186798096, "logits/rejected": 0.8148910999298096, "logps/chosen": -2.393291711807251, "logps/rejected": -5.9274749755859375, "loss": 0.7381, "nll_loss": 0.7315148115158081, "rewards/accuracies": 1.0, "rewards/chosen": -0.23932917416095734, "rewards/margins": 0.35341838002204895, "rewards/rejected": -0.5927475094795227, "step": 4904 }, { "epoch": 13.429158110882957, "grad_norm": 5.28309440612793, "learning_rate": 3.284931506849315e-07, "log_odds_chosen": 1.8584935665130615, "log_odds_ratio": -0.31194978952407837, "logits/chosen": 1.1401352882385254, "logits/rejected": 1.1330249309539795, "logps/chosen": -2.207775115966797, "logps/rejected": -3.9681479930877686, "loss": 0.6948, "nll_loss": 0.6635842323303223, "rewards/accuracies": 0.875, "rewards/chosen": -0.2207775115966797, "rewards/margins": 0.17603729665279388, "rewards/rejected": -0.39681482315063477, "step": 4905 }, { "epoch": 13.431895961670088, "grad_norm": 5.452115535736084, "learning_rate": 3.2835616438356163e-07, "log_odds_chosen": 1.4115586280822754, "log_odds_ratio": -0.41103535890579224, "logits/chosen": 0.6761811971664429, "logits/rejected": 0.7275506258010864, "logps/chosen": -1.8791522979736328, "logps/rejected": -3.1528751850128174, "loss": 0.5821, "nll_loss": 0.5410277843475342, "rewards/accuracies": 0.75, "rewards/chosen": -0.18791523575782776, "rewards/margins": 0.12737229466438293, "rewards/rejected": -0.3152875304222107, "step": 4906 }, { "epoch": 13.434633812457221, "grad_norm": 6.6097588539123535, "learning_rate": 3.2821917808219173e-07, "log_odds_chosen": 1.9930511713027954, "log_odds_ratio": -0.28741908073425293, "logits/chosen": 0.85955810546875, "logits/rejected": 0.8484175205230713, "logps/chosen": -1.8651516437530518, "logps/rejected": -3.7309017181396484, "loss": 0.6424, "nll_loss": 0.6136881113052368, "rewards/accuracies": 0.875, "rewards/chosen": -0.18651515245437622, "rewards/margins": 0.1865750253200531, "rewards/rejected": -0.3730901777744293, "step": 4907 }, { "epoch": 13.437371663244353, "grad_norm": 5.5292792320251465, "learning_rate": 3.2808219178082193e-07, "log_odds_chosen": 1.7861149311065674, "log_odds_ratio": -0.22547011077404022, "logits/chosen": 0.8832659125328064, "logits/rejected": 0.9459371566772461, "logps/chosen": -2.460775852203369, "logps/rejected": -4.130068778991699, "loss": 0.6946, "nll_loss": 0.6720583438873291, "rewards/accuracies": 1.0, "rewards/chosen": -0.24607756733894348, "rewards/margins": 0.16692928969860077, "rewards/rejected": -0.41300687193870544, "step": 4908 }, { "epoch": 13.440109514031485, "grad_norm": 5.870518207550049, "learning_rate": 3.2794520547945203e-07, "log_odds_chosen": 1.430849313735962, "log_odds_ratio": -0.28814712166786194, "logits/chosen": 0.9390772581100464, "logits/rejected": 0.9546242952346802, "logps/chosen": -2.134096622467041, "logps/rejected": -3.435143232345581, "loss": 0.6859, "nll_loss": 0.6570720672607422, "rewards/accuracies": 1.0, "rewards/chosen": -0.2134096622467041, "rewards/margins": 0.1301046758890152, "rewards/rejected": -0.3435143530368805, "step": 4909 }, { "epoch": 13.442847364818617, "grad_norm": 5.5337324142456055, "learning_rate": 3.2780821917808213e-07, "log_odds_chosen": 2.4725937843322754, "log_odds_ratio": -0.16923055052757263, "logits/chosen": 0.6071062684059143, "logits/rejected": 0.6300883889198303, "logps/chosen": -1.6684006452560425, "logps/rejected": -3.9533958435058594, "loss": 0.5375, "nll_loss": 0.5206022262573242, "rewards/accuracies": 1.0, "rewards/chosen": -0.1668400764465332, "rewards/margins": 0.22849951684474945, "rewards/rejected": -0.39533960819244385, "step": 4910 }, { "epoch": 13.44558521560575, "grad_norm": 4.825159072875977, "learning_rate": 3.2767123287671234e-07, "log_odds_chosen": 1.9629490375518799, "log_odds_ratio": -0.22801733016967773, "logits/chosen": 0.9603818655014038, "logits/rejected": 0.9305005073547363, "logps/chosen": -1.8609665632247925, "logps/rejected": -3.670725107192993, "loss": 0.6095, "nll_loss": 0.5867241621017456, "rewards/accuracies": 1.0, "rewards/chosen": -0.186096653342247, "rewards/margins": 0.18097586929798126, "rewards/rejected": -0.36707255244255066, "step": 4911 }, { "epoch": 13.44832306639288, "grad_norm": 5.333054065704346, "learning_rate": 3.2753424657534244e-07, "log_odds_chosen": 1.71649968624115, "log_odds_ratio": -0.24859122931957245, "logits/chosen": 0.7593198418617249, "logits/rejected": 0.8035797476768494, "logps/chosen": -1.9176666736602783, "logps/rejected": -3.448129177093506, "loss": 0.5526, "nll_loss": 0.527726411819458, "rewards/accuracies": 1.0, "rewards/chosen": -0.1917666643857956, "rewards/margins": 0.15304625034332275, "rewards/rejected": -0.34481292963027954, "step": 4912 }, { "epoch": 13.451060917180014, "grad_norm": 5.536218643188477, "learning_rate": 3.273972602739726e-07, "log_odds_chosen": 2.9754395484924316, "log_odds_ratio": -0.10400402545928955, "logits/chosen": 0.6194500923156738, "logits/rejected": 0.660535454750061, "logps/chosen": -2.1501963138580322, "logps/rejected": -4.978084564208984, "loss": 0.5561, "nll_loss": 0.5457322001457214, "rewards/accuracies": 1.0, "rewards/chosen": -0.21501964330673218, "rewards/margins": 0.28278884291648865, "rewards/rejected": -0.49780845642089844, "step": 4913 }, { "epoch": 13.453798767967147, "grad_norm": 4.950290203094482, "learning_rate": 3.2726027397260274e-07, "log_odds_chosen": 2.1881766319274902, "log_odds_ratio": -0.20899838209152222, "logits/chosen": 0.7848923206329346, "logits/rejected": 0.756238579750061, "logps/chosen": -1.8606027364730835, "logps/rejected": -3.8955564498901367, "loss": 0.8035, "nll_loss": 0.7825608849525452, "rewards/accuracies": 1.0, "rewards/chosen": -0.18606027960777283, "rewards/margins": 0.20349538326263428, "rewards/rejected": -0.3895556628704071, "step": 4914 }, { "epoch": 13.456536618754278, "grad_norm": 8.20202922821045, "learning_rate": 3.271232876712329e-07, "log_odds_chosen": 0.5293052792549133, "log_odds_ratio": -0.5978338718414307, "logits/chosen": 0.9460446834564209, "logits/rejected": 1.0405395030975342, "logps/chosen": -2.906595468521118, "logps/rejected": -3.377495765686035, "loss": 0.7257, "nll_loss": 0.6658931970596313, "rewards/accuracies": 0.75, "rewards/chosen": -0.2906595468521118, "rewards/margins": 0.04709003120660782, "rewards/rejected": -0.3377496004104614, "step": 4915 }, { "epoch": 13.45927446954141, "grad_norm": 5.432215690612793, "learning_rate": 3.26986301369863e-07, "log_odds_chosen": 2.131070375442505, "log_odds_ratio": -0.183425173163414, "logits/chosen": 0.8252475261688232, "logits/rejected": 0.8503978848457336, "logps/chosen": -1.8455860614776611, "logps/rejected": -3.828927516937256, "loss": 0.6024, "nll_loss": 0.5840592384338379, "rewards/accuracies": 1.0, "rewards/chosen": -0.18455860018730164, "rewards/margins": 0.19833414256572723, "rewards/rejected": -0.38289278745651245, "step": 4916 }, { "epoch": 13.462012320328542, "grad_norm": 5.339135646820068, "learning_rate": 3.268493150684931e-07, "log_odds_chosen": 4.0425238609313965, "log_odds_ratio": -0.10205048322677612, "logits/chosen": 0.8512198328971863, "logits/rejected": 0.9063864946365356, "logps/chosen": -1.8731520175933838, "logps/rejected": -5.687991619110107, "loss": 0.6111, "nll_loss": 0.600856363773346, "rewards/accuracies": 1.0, "rewards/chosen": -0.1873151957988739, "rewards/margins": 0.38148394227027893, "rewards/rejected": -0.5687991380691528, "step": 4917 }, { "epoch": 13.464750171115675, "grad_norm": 5.187985420227051, "learning_rate": 3.267123287671233e-07, "log_odds_chosen": 1.513985514640808, "log_odds_ratio": -0.43924444913864136, "logits/chosen": 1.0048186779022217, "logits/rejected": 1.0162692070007324, "logps/chosen": -2.4148049354553223, "logps/rejected": -3.867279052734375, "loss": 0.6896, "nll_loss": 0.6456457376480103, "rewards/accuracies": 0.75, "rewards/chosen": -0.2414805144071579, "rewards/margins": 0.1452474147081375, "rewards/rejected": -0.3867279291152954, "step": 4918 }, { "epoch": 13.467488021902806, "grad_norm": 5.370485782623291, "learning_rate": 3.265753424657534e-07, "log_odds_chosen": 1.5260438919067383, "log_odds_ratio": -0.23657628893852234, "logits/chosen": 0.8649954795837402, "logits/rejected": 0.8982830047607422, "logps/chosen": -2.15817928314209, "logps/rejected": -3.551191806793213, "loss": 0.667, "nll_loss": 0.6433703899383545, "rewards/accuracies": 1.0, "rewards/chosen": -0.21581792831420898, "rewards/margins": 0.13930124044418335, "rewards/rejected": -0.35511916875839233, "step": 4919 }, { "epoch": 13.470225872689939, "grad_norm": 4.861704349517822, "learning_rate": 3.2643835616438355e-07, "log_odds_chosen": 1.1861568689346313, "log_odds_ratio": -0.3304039239883423, "logits/chosen": 0.7420738935470581, "logits/rejected": 0.8159563541412354, "logps/chosen": -1.7405025959014893, "logps/rejected": -2.793452262878418, "loss": 0.6371, "nll_loss": 0.6040238738059998, "rewards/accuracies": 1.0, "rewards/chosen": -0.17405027151107788, "rewards/margins": 0.10529496520757675, "rewards/rejected": -0.27934524416923523, "step": 4920 }, { "epoch": 13.47296372347707, "grad_norm": 5.7623467445373535, "learning_rate": 3.263013698630137e-07, "log_odds_chosen": 4.093822479248047, "log_odds_ratio": -0.0881008580327034, "logits/chosen": 1.04075288772583, "logits/rejected": 1.0829859972000122, "logps/chosen": -1.9720489978790283, "logps/rejected": -5.927631378173828, "loss": 0.6033, "nll_loss": 0.5945274829864502, "rewards/accuracies": 1.0, "rewards/chosen": -0.19720490276813507, "rewards/margins": 0.39555823802948, "rewards/rejected": -0.5927631258964539, "step": 4921 }, { "epoch": 13.475701574264203, "grad_norm": 5.4340128898620605, "learning_rate": 3.2616438356164385e-07, "log_odds_chosen": 2.3817973136901855, "log_odds_ratio": -0.15657754242420197, "logits/chosen": 0.5652887225151062, "logits/rejected": 0.4916890859603882, "logps/chosen": -2.1142663955688477, "logps/rejected": -4.372575283050537, "loss": 0.7337, "nll_loss": 0.7180842757225037, "rewards/accuracies": 1.0, "rewards/chosen": -0.21142661571502686, "rewards/margins": 0.22583088278770447, "rewards/rejected": -0.4372575283050537, "step": 4922 }, { "epoch": 13.478439425051334, "grad_norm": 8.657330513000488, "learning_rate": 3.2602739726027395e-07, "log_odds_chosen": 2.381300449371338, "log_odds_ratio": -0.3583720922470093, "logits/chosen": 1.2087169885635376, "logits/rejected": 1.2956277132034302, "logps/chosen": -2.4951653480529785, "logps/rejected": -4.80665397644043, "loss": 0.7063, "nll_loss": 0.6704574823379517, "rewards/accuracies": 0.875, "rewards/chosen": -0.2495165467262268, "rewards/margins": 0.2311488687992096, "rewards/rejected": -0.4806654155254364, "step": 4923 }, { "epoch": 13.481177275838467, "grad_norm": 5.0510687828063965, "learning_rate": 3.2589041095890405e-07, "log_odds_chosen": 2.562519073486328, "log_odds_ratio": -0.1662333607673645, "logits/chosen": 0.6815633177757263, "logits/rejected": 0.6971286535263062, "logps/chosen": -1.9626004695892334, "logps/rejected": -4.388696193695068, "loss": 0.6507, "nll_loss": 0.6341259479522705, "rewards/accuracies": 1.0, "rewards/chosen": -0.19626004993915558, "rewards/margins": 0.24260957539081573, "rewards/rejected": -0.4388696253299713, "step": 4924 }, { "epoch": 13.483915126625599, "grad_norm": 5.354010105133057, "learning_rate": 3.2575342465753425e-07, "log_odds_chosen": 1.8635571002960205, "log_odds_ratio": -0.2998560070991516, "logits/chosen": 0.9166292548179626, "logits/rejected": 0.9342761039733887, "logps/chosen": -1.9903862476348877, "logps/rejected": -3.757570743560791, "loss": 0.6968, "nll_loss": 0.6668450832366943, "rewards/accuracies": 0.75, "rewards/chosen": -0.19903862476348877, "rewards/margins": 0.17671844363212585, "rewards/rejected": -0.375757098197937, "step": 4925 }, { "epoch": 13.486652977412732, "grad_norm": 4.925257682800293, "learning_rate": 3.2561643835616435e-07, "log_odds_chosen": 2.186495542526245, "log_odds_ratio": -0.2941451072692871, "logits/chosen": 0.6852832436561584, "logits/rejected": 0.7185211181640625, "logps/chosen": -2.0005760192871094, "logps/rejected": -4.038031101226807, "loss": 0.788, "nll_loss": 0.7585547566413879, "rewards/accuracies": 0.875, "rewards/chosen": -0.20005759596824646, "rewards/margins": 0.203745499253273, "rewards/rejected": -0.4038030803203583, "step": 4926 }, { "epoch": 13.489390828199863, "grad_norm": 6.049318313598633, "learning_rate": 3.254794520547945e-07, "log_odds_chosen": 1.1513729095458984, "log_odds_ratio": -0.3866361379623413, "logits/chosen": 0.8848947882652283, "logits/rejected": 0.8267748951911926, "logps/chosen": -1.9243743419647217, "logps/rejected": -2.917177677154541, "loss": 0.679, "nll_loss": 0.6403627395629883, "rewards/accuracies": 0.875, "rewards/chosen": -0.19243744015693665, "rewards/margins": 0.09928033500909805, "rewards/rejected": -0.2917177677154541, "step": 4927 }, { "epoch": 13.492128678986996, "grad_norm": 6.589008808135986, "learning_rate": 3.2534246575342466e-07, "log_odds_chosen": 2.768120527267456, "log_odds_ratio": -0.16173553466796875, "logits/chosen": 0.9607435464859009, "logits/rejected": 0.9202748537063599, "logps/chosen": -2.345163106918335, "logps/rejected": -4.9455413818359375, "loss": 0.6968, "nll_loss": 0.6806476712226868, "rewards/accuracies": 1.0, "rewards/chosen": -0.23451630771160126, "rewards/margins": 0.2600378394126892, "rewards/rejected": -0.49455416202545166, "step": 4928 }, { "epoch": 13.494866529774127, "grad_norm": 4.834415912628174, "learning_rate": 3.252054794520548e-07, "log_odds_chosen": 2.401963710784912, "log_odds_ratio": -0.2226564586162567, "logits/chosen": 0.9143657684326172, "logits/rejected": 0.9124858975410461, "logps/chosen": -1.5496071577072144, "logps/rejected": -3.741940975189209, "loss": 0.5861, "nll_loss": 0.5637909173965454, "rewards/accuracies": 1.0, "rewards/chosen": -0.1549607217311859, "rewards/margins": 0.21923337876796722, "rewards/rejected": -0.37419408559799194, "step": 4929 }, { "epoch": 13.49760438056126, "grad_norm": 9.086695671081543, "learning_rate": 3.250684931506849e-07, "log_odds_chosen": 0.9577178955078125, "log_odds_ratio": -0.7159345746040344, "logits/chosen": 0.9726301431655884, "logits/rejected": 1.1130387783050537, "logps/chosen": -3.3845551013946533, "logps/rejected": -4.301318168640137, "loss": 0.7582, "nll_loss": 0.6866235136985779, "rewards/accuracies": 0.875, "rewards/chosen": -0.33845552802085876, "rewards/margins": 0.09167627990245819, "rewards/rejected": -0.43013179302215576, "step": 4930 }, { "epoch": 13.500342231348391, "grad_norm": 6.86394739151001, "learning_rate": 3.24931506849315e-07, "log_odds_chosen": 1.8666582107543945, "log_odds_ratio": -0.35529106855392456, "logits/chosen": 0.8241959810256958, "logits/rejected": 0.9023270010948181, "logps/chosen": -2.8530941009521484, "logps/rejected": -4.648040294647217, "loss": 0.819, "nll_loss": 0.7835052013397217, "rewards/accuracies": 0.75, "rewards/chosen": -0.285309374332428, "rewards/margins": 0.17949466407299042, "rewards/rejected": -0.4648040533065796, "step": 4931 }, { "epoch": 13.503080082135524, "grad_norm": 4.556541919708252, "learning_rate": 3.247945205479452e-07, "log_odds_chosen": 2.982368230819702, "log_odds_ratio": -0.2634667754173279, "logits/chosen": 1.1712177991867065, "logits/rejected": 1.1318471431732178, "logps/chosen": -1.8264669179916382, "logps/rejected": -4.65869665145874, "loss": 0.5686, "nll_loss": 0.5422213673591614, "rewards/accuracies": 1.0, "rewards/chosen": -0.18264669179916382, "rewards/margins": 0.2832229733467102, "rewards/rejected": -0.465869665145874, "step": 4932 }, { "epoch": 13.505817932922655, "grad_norm": 5.239351272583008, "learning_rate": 3.246575342465753e-07, "log_odds_chosen": 1.7641175985336304, "log_odds_ratio": -0.25453487038612366, "logits/chosen": 0.6640472412109375, "logits/rejected": 0.6609704494476318, "logps/chosen": -2.138073682785034, "logps/rejected": -3.6932578086853027, "loss": 0.6161, "nll_loss": 0.5906875133514404, "rewards/accuracies": 1.0, "rewards/chosen": -0.2138073742389679, "rewards/margins": 0.15551838278770447, "rewards/rejected": -0.36932575702667236, "step": 4933 }, { "epoch": 13.508555783709788, "grad_norm": 4.595739364624023, "learning_rate": 3.2452054794520546e-07, "log_odds_chosen": 2.4426956176757812, "log_odds_ratio": -0.1644660234451294, "logits/chosen": 0.7445269227027893, "logits/rejected": 0.7715709209442139, "logps/chosen": -2.2054288387298584, "logps/rejected": -4.528697967529297, "loss": 0.6507, "nll_loss": 0.6342961192131042, "rewards/accuracies": 1.0, "rewards/chosen": -0.22054287791252136, "rewards/margins": 0.23232696950435638, "rewards/rejected": -0.45286983251571655, "step": 4934 }, { "epoch": 13.51129363449692, "grad_norm": 5.392508506774902, "learning_rate": 3.243835616438356e-07, "log_odds_chosen": 3.1386542320251465, "log_odds_ratio": -0.22957491874694824, "logits/chosen": 0.7864710092544556, "logits/rejected": 0.8200103640556335, "logps/chosen": -3.014596462249756, "logps/rejected": -6.087545394897461, "loss": 0.6949, "nll_loss": 0.6719743609428406, "rewards/accuracies": 1.0, "rewards/chosen": -0.3014596700668335, "rewards/margins": 0.3072948753833771, "rewards/rejected": -0.6087545156478882, "step": 4935 }, { "epoch": 13.514031485284052, "grad_norm": 5.705800533294678, "learning_rate": 3.2424657534246577e-07, "log_odds_chosen": 3.2356772422790527, "log_odds_ratio": -0.19504252076148987, "logits/chosen": 0.8092552423477173, "logits/rejected": 0.8545101881027222, "logps/chosen": -2.096316337585449, "logps/rejected": -5.1628007888793945, "loss": 0.6167, "nll_loss": 0.5972157716751099, "rewards/accuracies": 1.0, "rewards/chosen": -0.20963165163993835, "rewards/margins": 0.3066484332084656, "rewards/rejected": -0.5162800550460815, "step": 4936 }, { "epoch": 13.516769336071183, "grad_norm": 4.626126766204834, "learning_rate": 3.2410958904109587e-07, "log_odds_chosen": 1.9591295719146729, "log_odds_ratio": -0.17085246741771698, "logits/chosen": 0.980619490146637, "logits/rejected": 1.014723539352417, "logps/chosen": -3.0030601024627686, "logps/rejected": -4.856723785400391, "loss": 0.7307, "nll_loss": 0.7136033773422241, "rewards/accuracies": 1.0, "rewards/chosen": -0.3003060221672058, "rewards/margins": 0.18536631762981415, "rewards/rejected": -0.48567235469818115, "step": 4937 }, { "epoch": 13.519507186858316, "grad_norm": 5.02241325378418, "learning_rate": 3.2397260273972597e-07, "log_odds_chosen": 2.002497673034668, "log_odds_ratio": -0.27036699652671814, "logits/chosen": 0.8237853646278381, "logits/rejected": 0.8654642105102539, "logps/chosen": -2.383559226989746, "logps/rejected": -4.296690464019775, "loss": 0.6521, "nll_loss": 0.6250291466712952, "rewards/accuracies": 1.0, "rewards/chosen": -0.23835593461990356, "rewards/margins": 0.19131311774253845, "rewards/rejected": -0.4296690821647644, "step": 4938 }, { "epoch": 13.522245037645447, "grad_norm": 6.199919700622559, "learning_rate": 3.2383561643835617e-07, "log_odds_chosen": 1.203178882598877, "log_odds_ratio": -0.449003666639328, "logits/chosen": 0.8035985231399536, "logits/rejected": 0.8083080053329468, "logps/chosen": -2.5725533962249756, "logps/rejected": -3.703881025314331, "loss": 0.7459, "nll_loss": 0.7010212540626526, "rewards/accuracies": 0.875, "rewards/chosen": -0.25725534558296204, "rewards/margins": 0.11313274502754211, "rewards/rejected": -0.37038809061050415, "step": 4939 }, { "epoch": 13.52498288843258, "grad_norm": 5.333446502685547, "learning_rate": 3.2369863013698627e-07, "log_odds_chosen": 2.5351152420043945, "log_odds_ratio": -0.14328047633171082, "logits/chosen": 0.9727380275726318, "logits/rejected": 0.9452704191207886, "logps/chosen": -1.9621143341064453, "logps/rejected": -4.358778476715088, "loss": 0.5857, "nll_loss": 0.5713673233985901, "rewards/accuracies": 1.0, "rewards/chosen": -0.19621142745018005, "rewards/margins": 0.2396664023399353, "rewards/rejected": -0.43587782979011536, "step": 4940 }, { "epoch": 13.527720739219713, "grad_norm": 6.904790878295898, "learning_rate": 3.235616438356164e-07, "log_odds_chosen": 1.7506060600280762, "log_odds_ratio": -0.3488699793815613, "logits/chosen": 0.8023990392684937, "logits/rejected": 0.8170548677444458, "logps/chosen": -2.043954849243164, "logps/rejected": -3.6497085094451904, "loss": 0.6556, "nll_loss": 0.6207262873649597, "rewards/accuracies": 0.75, "rewards/chosen": -0.20439550280570984, "rewards/margins": 0.16057534515857697, "rewards/rejected": -0.364970862865448, "step": 4941 }, { "epoch": 13.530458590006845, "grad_norm": 5.209219932556152, "learning_rate": 3.234246575342466e-07, "log_odds_chosen": 2.7195303440093994, "log_odds_ratio": -0.21602380275726318, "logits/chosen": 0.9401595592498779, "logits/rejected": 1.0039746761322021, "logps/chosen": -2.3117432594299316, "logps/rejected": -4.9444379806518555, "loss": 0.6995, "nll_loss": 0.6779172420501709, "rewards/accuracies": 0.875, "rewards/chosen": -0.2311743199825287, "rewards/margins": 0.26326948404312134, "rewards/rejected": -0.49444380402565, "step": 4942 }, { "epoch": 13.533196440793978, "grad_norm": 5.205095291137695, "learning_rate": 3.2328767123287673e-07, "log_odds_chosen": 1.9450669288635254, "log_odds_ratio": -0.21765495836734772, "logits/chosen": 0.8245558738708496, "logits/rejected": 0.8298885822296143, "logps/chosen": -2.210125207901001, "logps/rejected": -4.045129776000977, "loss": 0.6352, "nll_loss": 0.6133939623832703, "rewards/accuracies": 1.0, "rewards/chosen": -0.22101251780986786, "rewards/margins": 0.18350045382976532, "rewards/rejected": -0.40451300144195557, "step": 4943 }, { "epoch": 13.535934291581109, "grad_norm": 6.64583158493042, "learning_rate": 3.2315068493150683e-07, "log_odds_chosen": 1.9207148551940918, "log_odds_ratio": -0.26913687586784363, "logits/chosen": 0.9964911937713623, "logits/rejected": 0.9742976427078247, "logps/chosen": -2.7851004600524902, "logps/rejected": -4.638611316680908, "loss": 0.7264, "nll_loss": 0.6994666457176208, "rewards/accuracies": 1.0, "rewards/chosen": -0.27851006388664246, "rewards/margins": 0.18535107374191284, "rewards/rejected": -0.4638611078262329, "step": 4944 }, { "epoch": 13.538672142368242, "grad_norm": 5.689730167388916, "learning_rate": 3.23013698630137e-07, "log_odds_chosen": 2.2468252182006836, "log_odds_ratio": -0.31816428899765015, "logits/chosen": 0.857968807220459, "logits/rejected": 0.8962356448173523, "logps/chosen": -2.2624263763427734, "logps/rejected": -4.446629524230957, "loss": 0.6787, "nll_loss": 0.6468701362609863, "rewards/accuracies": 1.0, "rewards/chosen": -0.22624263167381287, "rewards/margins": 0.21842029690742493, "rewards/rejected": -0.4446629285812378, "step": 4945 }, { "epoch": 13.541409993155373, "grad_norm": 4.922756195068359, "learning_rate": 3.2287671232876713e-07, "log_odds_chosen": 2.9165680408477783, "log_odds_ratio": -0.11461695283651352, "logits/chosen": 0.9478403925895691, "logits/rejected": 0.9635862112045288, "logps/chosen": -2.1097824573516846, "logps/rejected": -4.849147319793701, "loss": 0.6072, "nll_loss": 0.5957193374633789, "rewards/accuracies": 1.0, "rewards/chosen": -0.21097823977470398, "rewards/margins": 0.27393651008605957, "rewards/rejected": -0.48491474986076355, "step": 4946 }, { "epoch": 13.544147843942506, "grad_norm": 7.903217792510986, "learning_rate": 3.2273972602739723e-07, "log_odds_chosen": 0.9241494536399841, "log_odds_ratio": -0.6104356646537781, "logits/chosen": 1.0089970827102661, "logits/rejected": 1.0195484161376953, "logps/chosen": -2.8884973526000977, "logps/rejected": -3.7607662677764893, "loss": 0.6628, "nll_loss": 0.6017512083053589, "rewards/accuracies": 0.75, "rewards/chosen": -0.28884971141815186, "rewards/margins": 0.08722691237926483, "rewards/rejected": -0.3760766088962555, "step": 4947 }, { "epoch": 13.546885694729637, "grad_norm": 5.115761756896973, "learning_rate": 3.226027397260274e-07, "log_odds_chosen": 3.1061062812805176, "log_odds_ratio": -0.12092045694589615, "logits/chosen": 0.9971137046813965, "logits/rejected": 0.9503961205482483, "logps/chosen": -2.4798665046691895, "logps/rejected": -5.477374076843262, "loss": 0.8391, "nll_loss": 0.827033281326294, "rewards/accuracies": 1.0, "rewards/chosen": -0.24798665940761566, "rewards/margins": 0.29975077509880066, "rewards/rejected": -0.5477374792098999, "step": 4948 }, { "epoch": 13.54962354551677, "grad_norm": 4.948269844055176, "learning_rate": 3.2246575342465753e-07, "log_odds_chosen": 1.7272762060165405, "log_odds_ratio": -0.2654268145561218, "logits/chosen": 0.7789151072502136, "logits/rejected": 0.8354110717773438, "logps/chosen": -2.136279582977295, "logps/rejected": -3.7665810585021973, "loss": 0.6625, "nll_loss": 0.6359293460845947, "rewards/accuracies": 1.0, "rewards/chosen": -0.21362794935703278, "rewards/margins": 0.16303014755249023, "rewards/rejected": -0.3766581416130066, "step": 4949 }, { "epoch": 13.552361396303901, "grad_norm": 6.217525959014893, "learning_rate": 3.2232876712328763e-07, "log_odds_chosen": 1.576632022857666, "log_odds_ratio": -0.2984314262866974, "logits/chosen": 0.6292895674705505, "logits/rejected": 0.6392335891723633, "logps/chosen": -2.136976718902588, "logps/rejected": -3.5631263256073, "loss": 0.714, "nll_loss": 0.6841709017753601, "rewards/accuracies": 0.875, "rewards/chosen": -0.2136976718902588, "rewards/margins": 0.14261497557163239, "rewards/rejected": -0.35631266236305237, "step": 4950 }, { "epoch": 13.555099247091034, "grad_norm": 5.15066385269165, "learning_rate": 3.221917808219178e-07, "log_odds_chosen": 1.5030165910720825, "log_odds_ratio": -0.23372209072113037, "logits/chosen": 0.8153736591339111, "logits/rejected": 0.8256027698516846, "logps/chosen": -1.9809467792510986, "logps/rejected": -3.3549489974975586, "loss": 0.7123, "nll_loss": 0.6889368295669556, "rewards/accuracies": 1.0, "rewards/chosen": -0.1980946809053421, "rewards/margins": 0.13740022480487823, "rewards/rejected": -0.33549490571022034, "step": 4951 }, { "epoch": 13.557837097878165, "grad_norm": 4.20636510848999, "learning_rate": 3.2205479452054794e-07, "log_odds_chosen": 5.080771446228027, "log_odds_ratio": -0.03275405615568161, "logits/chosen": 0.9381860494613647, "logits/rejected": 0.9118101596832275, "logps/chosen": -2.255601644515991, "logps/rejected": -7.166625499725342, "loss": 0.6719, "nll_loss": 0.6686520576477051, "rewards/accuracies": 1.0, "rewards/chosen": -0.22556017339229584, "rewards/margins": 0.4911023676395416, "rewards/rejected": -0.7166625261306763, "step": 4952 }, { "epoch": 13.560574948665298, "grad_norm": 7.561925411224365, "learning_rate": 3.219178082191781e-07, "log_odds_chosen": 1.114464282989502, "log_odds_ratio": -0.5758053064346313, "logits/chosen": 1.0442242622375488, "logits/rejected": 1.01778244972229, "logps/chosen": -2.7756524085998535, "logps/rejected": -3.821272373199463, "loss": 0.7655, "nll_loss": 0.707878589630127, "rewards/accuracies": 0.875, "rewards/chosen": -0.27756524085998535, "rewards/margins": 0.10456197708845139, "rewards/rejected": -0.3821272552013397, "step": 4953 }, { "epoch": 13.56331279945243, "grad_norm": 6.418466091156006, "learning_rate": 3.217808219178082e-07, "log_odds_chosen": 2.508350133895874, "log_odds_ratio": -0.19388271868228912, "logits/chosen": 0.7456825971603394, "logits/rejected": 0.726518988609314, "logps/chosen": -2.2441115379333496, "logps/rejected": -4.535841941833496, "loss": 0.8271, "nll_loss": 0.807727575302124, "rewards/accuracies": 1.0, "rewards/chosen": -0.22441115975379944, "rewards/margins": 0.22917306423187256, "rewards/rejected": -0.4535842537879944, "step": 4954 }, { "epoch": 13.566050650239562, "grad_norm": 5.850602626800537, "learning_rate": 3.2164383561643834e-07, "log_odds_chosen": 1.5569407939910889, "log_odds_ratio": -0.2576473653316498, "logits/chosen": 0.8333292603492737, "logits/rejected": 0.7938652634620667, "logps/chosen": -1.8676811456680298, "logps/rejected": -3.2838239669799805, "loss": 0.6728, "nll_loss": 0.6470544934272766, "rewards/accuracies": 1.0, "rewards/chosen": -0.18676812946796417, "rewards/margins": 0.14161428809165955, "rewards/rejected": -0.3283824026584625, "step": 4955 }, { "epoch": 13.568788501026694, "grad_norm": 5.14963960647583, "learning_rate": 3.215068493150685e-07, "log_odds_chosen": 1.4208792448043823, "log_odds_ratio": -0.4346192181110382, "logits/chosen": 1.034699559211731, "logits/rejected": 1.0616662502288818, "logps/chosen": -2.071772336959839, "logps/rejected": -3.4310386180877686, "loss": 0.6464, "nll_loss": 0.6029490828514099, "rewards/accuracies": 0.75, "rewards/chosen": -0.20717723667621613, "rewards/margins": 0.13592661917209625, "rewards/rejected": -0.34310388565063477, "step": 4956 }, { "epoch": 13.571526351813826, "grad_norm": 10.14084243774414, "learning_rate": 3.213698630136986e-07, "log_odds_chosen": 0.595871090888977, "log_odds_ratio": -0.6552983522415161, "logits/chosen": 0.8009711503982544, "logits/rejected": 0.7640287280082703, "logps/chosen": -2.46439790725708, "logps/rejected": -2.961204767227173, "loss": 0.7345, "nll_loss": 0.6689244508743286, "rewards/accuracies": 0.75, "rewards/chosen": -0.24643976986408234, "rewards/margins": 0.04968070983886719, "rewards/rejected": -0.29612046480178833, "step": 4957 }, { "epoch": 13.574264202600958, "grad_norm": 6.3514533042907715, "learning_rate": 3.2123287671232874e-07, "log_odds_chosen": 3.1045165061950684, "log_odds_ratio": -0.19785766303539276, "logits/chosen": 0.9587020874023438, "logits/rejected": 1.0753525495529175, "logps/chosen": -2.635225296020508, "logps/rejected": -5.635549545288086, "loss": 0.6305, "nll_loss": 0.6107114553451538, "rewards/accuracies": 1.0, "rewards/chosen": -0.26352250576019287, "rewards/margins": 0.30003246665000916, "rewards/rejected": -0.5635550022125244, "step": 4958 }, { "epoch": 13.57700205338809, "grad_norm": 6.8313117027282715, "learning_rate": 3.210958904109589e-07, "log_odds_chosen": 0.9790657162666321, "log_odds_ratio": -0.47020092606544495, "logits/chosen": 0.7749989032745361, "logits/rejected": 0.7879316806793213, "logps/chosen": -2.2394087314605713, "logps/rejected": -3.1468279361724854, "loss": 0.7181, "nll_loss": 0.6710301041603088, "rewards/accuracies": 0.75, "rewards/chosen": -0.2239408940076828, "rewards/margins": 0.09074191004037857, "rewards/rejected": -0.31468281149864197, "step": 4959 }, { "epoch": 13.579739904175222, "grad_norm": 5.580508708953857, "learning_rate": 3.2095890410958905e-07, "log_odds_chosen": 1.1656367778778076, "log_odds_ratio": -0.32483792304992676, "logits/chosen": 0.8138331770896912, "logits/rejected": 0.7564674615859985, "logps/chosen": -1.6099274158477783, "logps/rejected": -2.6347713470458984, "loss": 0.6788, "nll_loss": 0.6463130116462708, "rewards/accuracies": 1.0, "rewards/chosen": -0.16099274158477783, "rewards/margins": 0.10248441249132156, "rewards/rejected": -0.2634771466255188, "step": 4960 }, { "epoch": 13.582477754962355, "grad_norm": 5.716150760650635, "learning_rate": 3.2082191780821915e-07, "log_odds_chosen": 1.2376419305801392, "log_odds_ratio": -0.35070598125457764, "logits/chosen": 0.9997091293334961, "logits/rejected": 1.0217382907867432, "logps/chosen": -1.8681026697158813, "logps/rejected": -2.994817018508911, "loss": 0.6015, "nll_loss": 0.5664215087890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.18681026995182037, "rewards/margins": 0.11267144232988358, "rewards/rejected": -0.29948171973228455, "step": 4961 }, { "epoch": 13.585215605749486, "grad_norm": 5.524420261383057, "learning_rate": 3.206849315068493e-07, "log_odds_chosen": 2.470705986022949, "log_odds_ratio": -0.27453669905662537, "logits/chosen": 0.7317177653312683, "logits/rejected": 0.777890682220459, "logps/chosen": -2.570565700531006, "logps/rejected": -4.938727378845215, "loss": 0.6399, "nll_loss": 0.6124935746192932, "rewards/accuracies": 0.875, "rewards/chosen": -0.2570565938949585, "rewards/margins": 0.2368161827325821, "rewards/rejected": -0.4938727617263794, "step": 4962 }, { "epoch": 13.587953456536619, "grad_norm": 5.826864719390869, "learning_rate": 3.2054794520547945e-07, "log_odds_chosen": 3.0445642471313477, "log_odds_ratio": -0.2166413515806198, "logits/chosen": 0.6315916776657104, "logits/rejected": 0.6844148635864258, "logps/chosen": -1.9320274591445923, "logps/rejected": -4.8475494384765625, "loss": 0.7689, "nll_loss": 0.747280478477478, "rewards/accuracies": 1.0, "rewards/chosen": -0.19320273399353027, "rewards/margins": 0.29155224561691284, "rewards/rejected": -0.4847549796104431, "step": 4963 }, { "epoch": 13.59069130732375, "grad_norm": 4.969758033752441, "learning_rate": 3.2041095890410955e-07, "log_odds_chosen": 4.055275917053223, "log_odds_ratio": -0.15503579378128052, "logits/chosen": 0.7277237772941589, "logits/rejected": 0.7366971373558044, "logps/chosen": -2.2060861587524414, "logps/rejected": -6.135349273681641, "loss": 0.7011, "nll_loss": 0.6856009364128113, "rewards/accuracies": 0.875, "rewards/chosen": -0.2206086367368698, "rewards/margins": 0.39292630553245544, "rewards/rejected": -0.6135349273681641, "step": 4964 }, { "epoch": 13.593429158110883, "grad_norm": 5.373331069946289, "learning_rate": 3.202739726027397e-07, "log_odds_chosen": 2.1862428188323975, "log_odds_ratio": -0.1996133029460907, "logits/chosen": 0.7150271534919739, "logits/rejected": 0.731894850730896, "logps/chosen": -2.146786689758301, "logps/rejected": -4.217911720275879, "loss": 0.6334, "nll_loss": 0.6134365797042847, "rewards/accuracies": 1.0, "rewards/chosen": -0.21467867493629456, "rewards/margins": 0.20711252093315125, "rewards/rejected": -0.4217911958694458, "step": 4965 }, { "epoch": 13.596167008898014, "grad_norm": 6.047661304473877, "learning_rate": 3.2013698630136985e-07, "log_odds_chosen": 1.450342059135437, "log_odds_ratio": -0.42043471336364746, "logits/chosen": 1.1276812553405762, "logits/rejected": 1.165907382965088, "logps/chosen": -2.6003756523132324, "logps/rejected": -3.9789633750915527, "loss": 0.7493, "nll_loss": 0.7072320580482483, "rewards/accuracies": 0.625, "rewards/chosen": -0.2600375711917877, "rewards/margins": 0.1378587931394577, "rewards/rejected": -0.3978963792324066, "step": 4966 }, { "epoch": 13.598904859685147, "grad_norm": 5.091878890991211, "learning_rate": 3.2e-07, "log_odds_chosen": 1.7643016576766968, "log_odds_ratio": -0.282755970954895, "logits/chosen": 0.8572995662689209, "logits/rejected": 0.9090979099273682, "logps/chosen": -2.1428062915802, "logps/rejected": -3.8062002658843994, "loss": 0.7116, "nll_loss": 0.6833667755126953, "rewards/accuracies": 0.875, "rewards/chosen": -0.2142806351184845, "rewards/margins": 0.16633938252925873, "rewards/rejected": -0.3806200325489044, "step": 4967 }, { "epoch": 13.60164271047228, "grad_norm": 6.230623245239258, "learning_rate": 3.198630136986301e-07, "log_odds_chosen": 1.0764102935791016, "log_odds_ratio": -0.48978134989738464, "logits/chosen": 0.7171792387962341, "logits/rejected": 0.7355822324752808, "logps/chosen": -2.7232141494750977, "logps/rejected": -3.7055084705352783, "loss": 0.7399, "nll_loss": 0.6909064054489136, "rewards/accuracies": 0.75, "rewards/chosen": -0.2723214328289032, "rewards/margins": 0.09822940826416016, "rewards/rejected": -0.37055087089538574, "step": 4968 }, { "epoch": 13.604380561259411, "grad_norm": 7.055217266082764, "learning_rate": 3.1972602739726026e-07, "log_odds_chosen": 2.0713906288146973, "log_odds_ratio": -0.5313354134559631, "logits/chosen": 0.9233747124671936, "logits/rejected": 0.9807842969894409, "logps/chosen": -3.100937604904175, "logps/rejected": -5.109279632568359, "loss": 0.7596, "nll_loss": 0.7065154314041138, "rewards/accuracies": 0.875, "rewards/chosen": -0.3100937604904175, "rewards/margins": 0.20083419978618622, "rewards/rejected": -0.5109279751777649, "step": 4969 }, { "epoch": 13.607118412046544, "grad_norm": 5.808156967163086, "learning_rate": 3.195890410958904e-07, "log_odds_chosen": 1.3833599090576172, "log_odds_ratio": -0.30428338050842285, "logits/chosen": 0.788865327835083, "logits/rejected": 0.8058786392211914, "logps/chosen": -1.9002636671066284, "logps/rejected": -3.1641130447387695, "loss": 0.5816, "nll_loss": 0.5511306524276733, "rewards/accuracies": 1.0, "rewards/chosen": -0.19002637267112732, "rewards/margins": 0.1263849437236786, "rewards/rejected": -0.3164113163948059, "step": 4970 }, { "epoch": 13.609856262833675, "grad_norm": 5.469506740570068, "learning_rate": 3.194520547945205e-07, "log_odds_chosen": 2.488603353500366, "log_odds_ratio": -0.3307952284812927, "logits/chosen": 0.8906970024108887, "logits/rejected": 0.8262154459953308, "logps/chosen": -1.7818822860717773, "logps/rejected": -4.182518482208252, "loss": 0.688, "nll_loss": 0.6548933982849121, "rewards/accuracies": 0.875, "rewards/chosen": -0.17818821966648102, "rewards/margins": 0.2400636225938797, "rewards/rejected": -0.4182518720626831, "step": 4971 }, { "epoch": 13.612594113620808, "grad_norm": 6.308000564575195, "learning_rate": 3.1931506849315066e-07, "log_odds_chosen": 2.417919158935547, "log_odds_ratio": -0.1804289072751999, "logits/chosen": 0.4909959137439728, "logits/rejected": 0.4766544699668884, "logps/chosen": -1.6449949741363525, "logps/rejected": -3.890244245529175, "loss": 0.5741, "nll_loss": 0.5560243129730225, "rewards/accuracies": 1.0, "rewards/chosen": -0.16449949145317078, "rewards/margins": 0.22452494502067566, "rewards/rejected": -0.38902443647384644, "step": 4972 }, { "epoch": 13.61533196440794, "grad_norm": 5.084789752960205, "learning_rate": 3.191780821917808e-07, "log_odds_chosen": 3.1352791786193848, "log_odds_ratio": -0.14469268918037415, "logits/chosen": 0.887581467628479, "logits/rejected": 0.952099084854126, "logps/chosen": -2.5631396770477295, "logps/rejected": -5.582857131958008, "loss": 0.6892, "nll_loss": 0.6747080683708191, "rewards/accuracies": 1.0, "rewards/chosen": -0.2563139796257019, "rewards/margins": 0.30197179317474365, "rewards/rejected": -0.5582857131958008, "step": 4973 }, { "epoch": 13.618069815195073, "grad_norm": 4.775389194488525, "learning_rate": 3.1904109589041097e-07, "log_odds_chosen": 1.8930699825286865, "log_odds_ratio": -0.3434634804725647, "logits/chosen": 0.8180595636367798, "logits/rejected": 0.8183128833770752, "logps/chosen": -1.9626109600067139, "logps/rejected": -3.7439956665039062, "loss": 0.6407, "nll_loss": 0.6063120365142822, "rewards/accuracies": 0.75, "rewards/chosen": -0.19626107811927795, "rewards/margins": 0.17813849449157715, "rewards/rejected": -0.3743995428085327, "step": 4974 }, { "epoch": 13.620807665982204, "grad_norm": 5.104455471038818, "learning_rate": 3.1890410958904106e-07, "log_odds_chosen": 3.4641671180725098, "log_odds_ratio": -0.1694353222846985, "logits/chosen": 0.9957526326179504, "logits/rejected": 1.005326271057129, "logps/chosen": -2.812750816345215, "logps/rejected": -6.209205627441406, "loss": 0.6791, "nll_loss": 0.662189245223999, "rewards/accuracies": 1.0, "rewards/chosen": -0.28127509355545044, "rewards/margins": 0.3396454453468323, "rewards/rejected": -0.6209205389022827, "step": 4975 }, { "epoch": 13.623545516769337, "grad_norm": 6.208702564239502, "learning_rate": 3.187671232876712e-07, "log_odds_chosen": 1.4339003562927246, "log_odds_ratio": -0.48969870805740356, "logits/chosen": 0.8948898911476135, "logits/rejected": 0.914556622505188, "logps/chosen": -2.3188509941101074, "logps/rejected": -3.6532914638519287, "loss": 0.6601, "nll_loss": 0.6110975742340088, "rewards/accuracies": 0.875, "rewards/chosen": -0.23188507556915283, "rewards/margins": 0.13344407081604004, "rewards/rejected": -0.36532920598983765, "step": 4976 }, { "epoch": 13.626283367556468, "grad_norm": 5.767729759216309, "learning_rate": 3.1863013698630137e-07, "log_odds_chosen": 2.15557861328125, "log_odds_ratio": -0.22232569754123688, "logits/chosen": 0.5884127020835876, "logits/rejected": 0.5456587076187134, "logps/chosen": -2.016922950744629, "logps/rejected": -4.058933258056641, "loss": 0.6434, "nll_loss": 0.6211844682693481, "rewards/accuracies": 1.0, "rewards/chosen": -0.20169228315353394, "rewards/margins": 0.20420104265213013, "rewards/rejected": -0.40589332580566406, "step": 4977 }, { "epoch": 13.6290212183436, "grad_norm": 6.621519565582275, "learning_rate": 3.1849315068493147e-07, "log_odds_chosen": 2.1062111854553223, "log_odds_ratio": -0.3237169682979584, "logits/chosen": 0.8258429765701294, "logits/rejected": 0.8579593896865845, "logps/chosen": -2.5518908500671387, "logps/rejected": -4.591710090637207, "loss": 0.6929, "nll_loss": 0.660576581954956, "rewards/accuracies": 1.0, "rewards/chosen": -0.25518909096717834, "rewards/margins": 0.20398195087909698, "rewards/rejected": -0.4591710567474365, "step": 4978 }, { "epoch": 13.631759069130732, "grad_norm": 7.764988422393799, "learning_rate": 3.183561643835616e-07, "log_odds_chosen": 1.389373540878296, "log_odds_ratio": -0.3234093487262726, "logits/chosen": 0.9117955565452576, "logits/rejected": 0.8749962449073792, "logps/chosen": -2.0356945991516113, "logps/rejected": -3.3058629035949707, "loss": 0.6252, "nll_loss": 0.5928596258163452, "rewards/accuracies": 0.875, "rewards/chosen": -0.2035694718360901, "rewards/margins": 0.1270168125629425, "rewards/rejected": -0.330586314201355, "step": 4979 }, { "epoch": 13.634496919917865, "grad_norm": 5.609292507171631, "learning_rate": 3.1821917808219177e-07, "log_odds_chosen": 1.6236448287963867, "log_odds_ratio": -0.2750709056854248, "logits/chosen": 0.9450316429138184, "logits/rejected": 1.006128191947937, "logps/chosen": -2.003486156463623, "logps/rejected": -3.444089889526367, "loss": 0.65, "nll_loss": 0.6224565505981445, "rewards/accuracies": 1.0, "rewards/chosen": -0.2003486156463623, "rewards/margins": 0.1440604031085968, "rewards/rejected": -0.3444089889526367, "step": 4980 }, { "epoch": 13.637234770704996, "grad_norm": 6.276493072509766, "learning_rate": 3.180821917808219e-07, "log_odds_chosen": 1.0350620746612549, "log_odds_ratio": -0.8269721865653992, "logits/chosen": 0.7253366708755493, "logits/rejected": 0.7674118280410767, "logps/chosen": -2.6559205055236816, "logps/rejected": -3.6198368072509766, "loss": 0.8173, "nll_loss": 0.7346464395523071, "rewards/accuracies": 0.625, "rewards/chosen": -0.2655920386314392, "rewards/margins": 0.09639164805412292, "rewards/rejected": -0.36198368668556213, "step": 4981 }, { "epoch": 13.639972621492129, "grad_norm": 5.495395660400391, "learning_rate": 3.17945205479452e-07, "log_odds_chosen": 2.4732649326324463, "log_odds_ratio": -0.23551523685455322, "logits/chosen": 0.8935959339141846, "logits/rejected": 0.8986089825630188, "logps/chosen": -2.1664841175079346, "logps/rejected": -4.525153160095215, "loss": 0.7134, "nll_loss": 0.6898897886276245, "rewards/accuracies": 1.0, "rewards/chosen": -0.2166483998298645, "rewards/margins": 0.23586690425872803, "rewards/rejected": -0.4525153338909149, "step": 4982 }, { "epoch": 13.64271047227926, "grad_norm": 6.134496212005615, "learning_rate": 3.1780821917808223e-07, "log_odds_chosen": 1.6857759952545166, "log_odds_ratio": -0.37364381551742554, "logits/chosen": 0.9087967872619629, "logits/rejected": 1.012873649597168, "logps/chosen": -2.7351794242858887, "logps/rejected": -4.318968296051025, "loss": 0.6337, "nll_loss": 0.5963695049285889, "rewards/accuracies": 0.75, "rewards/chosen": -0.2735179364681244, "rewards/margins": 0.15837888419628143, "rewards/rejected": -0.43189680576324463, "step": 4983 }, { "epoch": 13.645448323066393, "grad_norm": 5.079023361206055, "learning_rate": 3.1767123287671233e-07, "log_odds_chosen": 2.274845600128174, "log_odds_ratio": -0.21861615777015686, "logits/chosen": 0.8686050176620483, "logits/rejected": 0.8557799458503723, "logps/chosen": -2.3444080352783203, "logps/rejected": -4.462579727172852, "loss": 0.6821, "nll_loss": 0.660286545753479, "rewards/accuracies": 1.0, "rewards/chosen": -0.23444080352783203, "rewards/margins": 0.21181720495224, "rewards/rejected": -0.44625797867774963, "step": 4984 }, { "epoch": 13.648186173853524, "grad_norm": 8.239397048950195, "learning_rate": 3.175342465753424e-07, "log_odds_chosen": 1.7293438911437988, "log_odds_ratio": -0.3134937882423401, "logits/chosen": 1.189826488494873, "logits/rejected": 1.24370539188385, "logps/chosen": -3.2387208938598633, "logps/rejected": -4.893918991088867, "loss": 0.772, "nll_loss": 0.7406038045883179, "rewards/accuracies": 0.875, "rewards/chosen": -0.32387208938598633, "rewards/margins": 0.1655198484659195, "rewards/rejected": -0.48939192295074463, "step": 4985 }, { "epoch": 13.650924024640657, "grad_norm": 5.814456939697266, "learning_rate": 3.173972602739726e-07, "log_odds_chosen": 1.8188961744308472, "log_odds_ratio": -0.367519736289978, "logits/chosen": 0.6551706790924072, "logits/rejected": 0.6413390040397644, "logps/chosen": -2.163252830505371, "logps/rejected": -3.89884614944458, "loss": 0.6356, "nll_loss": 0.5988712906837463, "rewards/accuracies": 0.875, "rewards/chosen": -0.2163252830505371, "rewards/margins": 0.17355933785438538, "rewards/rejected": -0.3898845911026001, "step": 4986 }, { "epoch": 13.653661875427789, "grad_norm": 5.420213222503662, "learning_rate": 3.1726027397260273e-07, "log_odds_chosen": 1.7654565572738647, "log_odds_ratio": -0.42682185769081116, "logits/chosen": 1.0561646223068237, "logits/rejected": 1.0790462493896484, "logps/chosen": -2.2381863594055176, "logps/rejected": -3.9181265830993652, "loss": 0.6366, "nll_loss": 0.5939325094223022, "rewards/accuracies": 0.75, "rewards/chosen": -0.22381865978240967, "rewards/margins": 0.16799399256706238, "rewards/rejected": -0.39181262254714966, "step": 4987 }, { "epoch": 13.656399726214921, "grad_norm": 5.393800735473633, "learning_rate": 3.171232876712329e-07, "log_odds_chosen": 3.5583906173706055, "log_odds_ratio": -0.2597479224205017, "logits/chosen": 0.7383869886398315, "logits/rejected": 0.7008450627326965, "logps/chosen": -2.21376895904541, "logps/rejected": -5.64548921585083, "loss": 0.6367, "nll_loss": 0.6107439398765564, "rewards/accuracies": 0.875, "rewards/chosen": -0.22137689590454102, "rewards/margins": 0.34317201375961304, "rewards/rejected": -0.564548909664154, "step": 4988 }, { "epoch": 13.659137577002053, "grad_norm": 4.775329113006592, "learning_rate": 3.16986301369863e-07, "log_odds_chosen": 1.8545000553131104, "log_odds_ratio": -0.24610085785388947, "logits/chosen": 0.5779268145561218, "logits/rejected": 0.6653937101364136, "logps/chosen": -2.2435948848724365, "logps/rejected": -3.9449868202209473, "loss": 0.6266, "nll_loss": 0.6019812822341919, "rewards/accuracies": 1.0, "rewards/chosen": -0.22435946762561798, "rewards/margins": 0.17013923823833466, "rewards/rejected": -0.39449870586395264, "step": 4989 }, { "epoch": 13.661875427789186, "grad_norm": 4.750021457672119, "learning_rate": 3.168493150684932e-07, "log_odds_chosen": 2.40818452835083, "log_odds_ratio": -0.19226691126823425, "logits/chosen": 0.7659632563591003, "logits/rejected": 0.7689363360404968, "logps/chosen": -1.9552972316741943, "logps/rejected": -4.154985427856445, "loss": 0.6753, "nll_loss": 0.6560536026954651, "rewards/accuracies": 1.0, "rewards/chosen": -0.1955297291278839, "rewards/margins": 0.21996884047985077, "rewards/rejected": -0.4154985547065735, "step": 4990 }, { "epoch": 13.664613278576317, "grad_norm": 4.784526348114014, "learning_rate": 3.167123287671233e-07, "log_odds_chosen": 1.9294788837432861, "log_odds_ratio": -0.3259906768798828, "logits/chosen": 0.8675055503845215, "logits/rejected": 0.9684567451477051, "logps/chosen": -2.2181529998779297, "logps/rejected": -4.044992923736572, "loss": 0.6717, "nll_loss": 0.6391178965568542, "rewards/accuracies": 0.875, "rewards/chosen": -0.221815288066864, "rewards/margins": 0.1826840043067932, "rewards/rejected": -0.4044992923736572, "step": 4991 }, { "epoch": 13.66735112936345, "grad_norm": 6.033519268035889, "learning_rate": 3.165753424657534e-07, "log_odds_chosen": 1.8657103776931763, "log_odds_ratio": -0.22843337059020996, "logits/chosen": 0.8154749870300293, "logits/rejected": 0.8404499292373657, "logps/chosen": -2.228853940963745, "logps/rejected": -3.993023633956909, "loss": 0.6743, "nll_loss": 0.651417076587677, "rewards/accuracies": 1.0, "rewards/chosen": -0.222885400056839, "rewards/margins": 0.17641696333885193, "rewards/rejected": -0.3993023633956909, "step": 4992 }, { "epoch": 13.670088980150581, "grad_norm": 5.878998279571533, "learning_rate": 3.1643835616438354e-07, "log_odds_chosen": 1.0036327838897705, "log_odds_ratio": -0.43389248847961426, "logits/chosen": 0.904579222202301, "logits/rejected": 0.9341180324554443, "logps/chosen": -1.7697018384933472, "logps/rejected": -2.6865010261535645, "loss": 0.5826, "nll_loss": 0.5391857624053955, "rewards/accuracies": 0.625, "rewards/chosen": -0.1769701987504959, "rewards/margins": 0.09167990833520889, "rewards/rejected": -0.2686501145362854, "step": 4993 }, { "epoch": 13.672826830937714, "grad_norm": 6.940813064575195, "learning_rate": 3.163013698630137e-07, "log_odds_chosen": 4.016894340515137, "log_odds_ratio": -0.10579618066549301, "logits/chosen": 0.8430723547935486, "logits/rejected": 0.8473829030990601, "logps/chosen": -2.0482561588287354, "logps/rejected": -5.883835315704346, "loss": 0.6376, "nll_loss": 0.6270459890365601, "rewards/accuracies": 1.0, "rewards/chosen": -0.20482562482357025, "rewards/margins": 0.38355791568756104, "rewards/rejected": -0.5883835554122925, "step": 4994 }, { "epoch": 13.675564681724847, "grad_norm": 4.9284234046936035, "learning_rate": 3.1616438356164384e-07, "log_odds_chosen": 1.549681544303894, "log_odds_ratio": -0.31161901354789734, "logits/chosen": 0.9931716918945312, "logits/rejected": 0.956437349319458, "logps/chosen": -2.5571482181549072, "logps/rejected": -4.027153968811035, "loss": 0.6882, "nll_loss": 0.6570749282836914, "rewards/accuracies": 0.875, "rewards/chosen": -0.2557148337364197, "rewards/margins": 0.14700058102607727, "rewards/rejected": -0.40271541476249695, "step": 4995 }, { "epoch": 13.678302532511978, "grad_norm": 4.467111110687256, "learning_rate": 3.1602739726027394e-07, "log_odds_chosen": 3.1545908451080322, "log_odds_ratio": -0.23543597757816315, "logits/chosen": 0.986782431602478, "logits/rejected": 1.0212585926055908, "logps/chosen": -2.42565655708313, "logps/rejected": -5.515817642211914, "loss": 0.7172, "nll_loss": 0.6936392188072205, "rewards/accuracies": 1.0, "rewards/chosen": -0.24256564676761627, "rewards/margins": 0.3090161383152008, "rewards/rejected": -0.5515817999839783, "step": 4996 }, { "epoch": 13.681040383299111, "grad_norm": 6.2957658767700195, "learning_rate": 3.158904109589041e-07, "log_odds_chosen": 1.3064062595367432, "log_odds_ratio": -0.5588728189468384, "logits/chosen": 0.7070415019989014, "logits/rejected": 0.7379512190818787, "logps/chosen": -2.398860216140747, "logps/rejected": -3.683539628982544, "loss": 0.7479, "nll_loss": 0.6920110583305359, "rewards/accuracies": 0.625, "rewards/chosen": -0.23988603055477142, "rewards/margins": 0.12846790254116058, "rewards/rejected": -0.368353933095932, "step": 4997 }, { "epoch": 13.683778234086242, "grad_norm": 4.8927507400512695, "learning_rate": 3.1575342465753424e-07, "log_odds_chosen": 2.3943064212799072, "log_odds_ratio": -0.2049715518951416, "logits/chosen": 0.6120996475219727, "logits/rejected": 0.6770172119140625, "logps/chosen": -2.095363140106201, "logps/rejected": -4.361891269683838, "loss": 0.6826, "nll_loss": 0.6621280908584595, "rewards/accuracies": 0.875, "rewards/chosen": -0.2095363438129425, "rewards/margins": 0.2266528159379959, "rewards/rejected": -0.43618911504745483, "step": 4998 }, { "epoch": 13.686516084873375, "grad_norm": 5.910754203796387, "learning_rate": 3.1561643835616434e-07, "log_odds_chosen": 2.068514823913574, "log_odds_ratio": -0.19788210093975067, "logits/chosen": 0.8274850845336914, "logits/rejected": 0.8542735576629639, "logps/chosen": -2.5186758041381836, "logps/rejected": -4.497523307800293, "loss": 0.6746, "nll_loss": 0.6547954678535461, "rewards/accuracies": 1.0, "rewards/chosen": -0.2518675923347473, "rewards/margins": 0.19788476824760437, "rewards/rejected": -0.4497523605823517, "step": 4999 }, { "epoch": 13.689253935660506, "grad_norm": 5.128564357757568, "learning_rate": 3.154794520547945e-07, "log_odds_chosen": 1.614943265914917, "log_odds_ratio": -0.4129212498664856, "logits/chosen": 0.7410727143287659, "logits/rejected": 0.7700827717781067, "logps/chosen": -2.2054145336151123, "logps/rejected": -3.6741185188293457, "loss": 0.6605, "nll_loss": 0.6191977858543396, "rewards/accuracies": 0.75, "rewards/chosen": -0.22054144740104675, "rewards/margins": 0.14687037467956543, "rewards/rejected": -0.36741185188293457, "step": 5000 }, { "epoch": 13.69199178644764, "grad_norm": 5.693969249725342, "learning_rate": 3.1534246575342465e-07, "log_odds_chosen": 2.414548397064209, "log_odds_ratio": -0.2708059251308441, "logits/chosen": 0.9664888381958008, "logits/rejected": 0.930006742477417, "logps/chosen": -1.7997376918792725, "logps/rejected": -4.014739990234375, "loss": 0.6288, "nll_loss": 0.6017524600028992, "rewards/accuracies": 0.875, "rewards/chosen": -0.1799737811088562, "rewards/margins": 0.2215002179145813, "rewards/rejected": -0.4014739990234375, "step": 5001 }, { "epoch": 13.69472963723477, "grad_norm": 5.080500602722168, "learning_rate": 3.152054794520548e-07, "log_odds_chosen": 1.717274785041809, "log_odds_ratio": -0.36419904232025146, "logits/chosen": 0.9190526604652405, "logits/rejected": 0.9165946245193481, "logps/chosen": -2.133592128753662, "logps/rejected": -3.780966281890869, "loss": 0.7479, "nll_loss": 0.7114548087120056, "rewards/accuracies": 0.875, "rewards/chosen": -0.21335919201374054, "rewards/margins": 0.16473744809627533, "rewards/rejected": -0.37809664011001587, "step": 5002 }, { "epoch": 13.697467488021903, "grad_norm": 5.085547924041748, "learning_rate": 3.150684931506849e-07, "log_odds_chosen": 2.083247184753418, "log_odds_ratio": -0.21160423755645752, "logits/chosen": 0.7973203659057617, "logits/rejected": 0.8321040868759155, "logps/chosen": -2.2871928215026855, "logps/rejected": -4.261689186096191, "loss": 0.5992, "nll_loss": 0.5780169367790222, "rewards/accuracies": 1.0, "rewards/chosen": -0.22871927917003632, "rewards/margins": 0.1974496692419052, "rewards/rejected": -0.42616894841194153, "step": 5003 }, { "epoch": 13.700205338809035, "grad_norm": 5.34670352935791, "learning_rate": 3.1493150684931505e-07, "log_odds_chosen": 2.5136003494262695, "log_odds_ratio": -0.2196538746356964, "logits/chosen": 0.673984169960022, "logits/rejected": 0.6028996109962463, "logps/chosen": -1.893316388130188, "logps/rejected": -4.28156852722168, "loss": 0.6818, "nll_loss": 0.6598182320594788, "rewards/accuracies": 1.0, "rewards/chosen": -0.18933165073394775, "rewards/margins": 0.2388252168893814, "rewards/rejected": -0.42815688252449036, "step": 5004 }, { "epoch": 13.702943189596168, "grad_norm": 5.96221923828125, "learning_rate": 3.147945205479452e-07, "log_odds_chosen": 1.2122021913528442, "log_odds_ratio": -0.32021957635879517, "logits/chosen": 0.9198600649833679, "logits/rejected": 0.8979232907295227, "logps/chosen": -2.7588467597961426, "logps/rejected": -3.9238016605377197, "loss": 0.7505, "nll_loss": 0.7184773087501526, "rewards/accuracies": 0.875, "rewards/chosen": -0.2758846879005432, "rewards/margins": 0.11649546772241592, "rewards/rejected": -0.39238014817237854, "step": 5005 }, { "epoch": 13.705681040383299, "grad_norm": 5.211341381072998, "learning_rate": 3.146575342465753e-07, "log_odds_chosen": 0.718388557434082, "log_odds_ratio": -0.504591166973114, "logits/chosen": 0.937725305557251, "logits/rejected": 1.008916974067688, "logps/chosen": -2.643681049346924, "logps/rejected": -3.3257603645324707, "loss": 0.7522, "nll_loss": 0.7017471790313721, "rewards/accuracies": 0.625, "rewards/chosen": -0.26436811685562134, "rewards/margins": 0.06820794194936752, "rewards/rejected": -0.33257603645324707, "step": 5006 }, { "epoch": 13.708418891170432, "grad_norm": 11.386613845825195, "learning_rate": 3.1452054794520545e-07, "log_odds_chosen": 2.5489585399627686, "log_odds_ratio": -0.5452380776405334, "logits/chosen": 0.9798365831375122, "logits/rejected": 0.9106416702270508, "logps/chosen": -2.6501851081848145, "logps/rejected": -5.062044143676758, "loss": 0.7323, "nll_loss": 0.6777679920196533, "rewards/accuracies": 0.625, "rewards/chosen": -0.265018492937088, "rewards/margins": 0.24118590354919434, "rewards/rejected": -0.50620436668396, "step": 5007 }, { "epoch": 13.711156741957563, "grad_norm": 5.341933727264404, "learning_rate": 3.143835616438356e-07, "log_odds_chosen": 1.9426015615463257, "log_odds_ratio": -0.2805559039115906, "logits/chosen": 0.6674289703369141, "logits/rejected": 0.6223315000534058, "logps/chosen": -2.1103575229644775, "logps/rejected": -3.951275587081909, "loss": 0.6971, "nll_loss": 0.6690543293952942, "rewards/accuracies": 1.0, "rewards/chosen": -0.21103575825691223, "rewards/margins": 0.18409180641174316, "rewards/rejected": -0.3951275646686554, "step": 5008 }, { "epoch": 13.713894592744696, "grad_norm": 6.439633369445801, "learning_rate": 3.1424657534246576e-07, "log_odds_chosen": 2.381162166595459, "log_odds_ratio": -0.26560452580451965, "logits/chosen": 0.9738925099372864, "logits/rejected": 0.8954877853393555, "logps/chosen": -2.4019293785095215, "logps/rejected": -4.615389823913574, "loss": 0.6708, "nll_loss": 0.6442441940307617, "rewards/accuracies": 0.875, "rewards/chosen": -0.24019291996955872, "rewards/margins": 0.22134602069854736, "rewards/rejected": -0.46153897047042847, "step": 5009 }, { "epoch": 13.716632443531827, "grad_norm": 6.731269836425781, "learning_rate": 3.1410958904109586e-07, "log_odds_chosen": 3.0102851390838623, "log_odds_ratio": -0.464867502450943, "logits/chosen": 0.8325722813606262, "logits/rejected": 0.79673832654953, "logps/chosen": -2.1554479598999023, "logps/rejected": -4.983920097351074, "loss": 0.6975, "nll_loss": 0.6510610580444336, "rewards/accuracies": 0.875, "rewards/chosen": -0.21554480493068695, "rewards/margins": 0.28284722566604614, "rewards/rejected": -0.4983920454978943, "step": 5010 }, { "epoch": 13.71937029431896, "grad_norm": 4.8992109298706055, "learning_rate": 3.13972602739726e-07, "log_odds_chosen": 1.8261934518814087, "log_odds_ratio": -0.2264992892742157, "logits/chosen": 0.9063025712966919, "logits/rejected": 0.8816655278205872, "logps/chosen": -1.8078434467315674, "logps/rejected": -3.4819960594177246, "loss": 0.6502, "nll_loss": 0.6275375485420227, "rewards/accuracies": 1.0, "rewards/chosen": -0.18078435957431793, "rewards/margins": 0.16741526126861572, "rewards/rejected": -0.34819960594177246, "step": 5011 }, { "epoch": 13.722108145106091, "grad_norm": 6.076082706451416, "learning_rate": 3.1383561643835616e-07, "log_odds_chosen": 1.733602523803711, "log_odds_ratio": -0.2429555505514145, "logits/chosen": 0.8462960720062256, "logits/rejected": 0.865249514579773, "logps/chosen": -2.160677909851074, "logps/rejected": -3.788221597671509, "loss": 0.6628, "nll_loss": 0.6385005712509155, "rewards/accuracies": 1.0, "rewards/chosen": -0.21606780588626862, "rewards/margins": 0.1627543568611145, "rewards/rejected": -0.3788221478462219, "step": 5012 }, { "epoch": 13.724845995893224, "grad_norm": 5.801656246185303, "learning_rate": 3.1369863013698626e-07, "log_odds_chosen": 1.8176831007003784, "log_odds_ratio": -0.26608026027679443, "logits/chosen": 0.9337563514709473, "logits/rejected": 0.9847217798233032, "logps/chosen": -1.7665562629699707, "logps/rejected": -3.406667947769165, "loss": 0.799, "nll_loss": 0.7724111676216125, "rewards/accuracies": 1.0, "rewards/chosen": -0.1766556203365326, "rewards/margins": 0.164011150598526, "rewards/rejected": -0.3406667709350586, "step": 5013 }, { "epoch": 13.727583846680355, "grad_norm": 6.122660160064697, "learning_rate": 3.1356164383561647e-07, "log_odds_chosen": 1.3862115144729614, "log_odds_ratio": -0.3842831254005432, "logits/chosen": 0.7736804485321045, "logits/rejected": 0.8141211867332458, "logps/chosen": -1.8438094854354858, "logps/rejected": -3.1466450691223145, "loss": 0.5642, "nll_loss": 0.5257766842842102, "rewards/accuracies": 0.75, "rewards/chosen": -0.18438094854354858, "rewards/margins": 0.13028354942798615, "rewards/rejected": -0.31466448307037354, "step": 5014 }, { "epoch": 13.730321697467488, "grad_norm": 6.724706649780273, "learning_rate": 3.1342465753424657e-07, "log_odds_chosen": 1.4550039768218994, "log_odds_ratio": -0.26347294449806213, "logits/chosen": 0.680324375629425, "logits/rejected": 0.7585428953170776, "logps/chosen": -3.294947624206543, "logps/rejected": -4.665945053100586, "loss": 0.8035, "nll_loss": 0.7771629095077515, "rewards/accuracies": 0.875, "rewards/chosen": -0.32949474453926086, "rewards/margins": 0.13709977269172668, "rewards/rejected": -0.46659451723098755, "step": 5015 }, { "epoch": 13.73305954825462, "grad_norm": 5.334458827972412, "learning_rate": 3.132876712328767e-07, "log_odds_chosen": 2.677611827850342, "log_odds_ratio": -0.3774653673171997, "logits/chosen": 0.9370238780975342, "logits/rejected": 0.9271891117095947, "logps/chosen": -2.5332252979278564, "logps/rejected": -5.175609588623047, "loss": 0.7391, "nll_loss": 0.7013239860534668, "rewards/accuracies": 0.875, "rewards/chosen": -0.2533225417137146, "rewards/margins": 0.2642384171485901, "rewards/rejected": -0.5175609588623047, "step": 5016 }, { "epoch": 13.735797399041752, "grad_norm": 5.2780537605285645, "learning_rate": 3.131506849315068e-07, "log_odds_chosen": 2.650721788406372, "log_odds_ratio": -0.28568723797798157, "logits/chosen": 0.9785943627357483, "logits/rejected": 1.0134193897247314, "logps/chosen": -2.2533035278320312, "logps/rejected": -4.78087043762207, "loss": 0.6288, "nll_loss": 0.6002638339996338, "rewards/accuracies": 0.875, "rewards/chosen": -0.22533035278320312, "rewards/margins": 0.25275668501853943, "rewards/rejected": -0.47808700799942017, "step": 5017 }, { "epoch": 13.738535249828884, "grad_norm": 4.704178333282471, "learning_rate": 3.1301369863013697e-07, "log_odds_chosen": 2.3737730979919434, "log_odds_ratio": -0.16610166430473328, "logits/chosen": 1.017982840538025, "logits/rejected": 1.0730547904968262, "logps/chosen": -2.1550445556640625, "logps/rejected": -4.411579132080078, "loss": 0.6255, "nll_loss": 0.6088407039642334, "rewards/accuracies": 1.0, "rewards/chosen": -0.2155044674873352, "rewards/margins": 0.22565343976020813, "rewards/rejected": -0.44115790724754333, "step": 5018 }, { "epoch": 13.741273100616016, "grad_norm": 6.610931396484375, "learning_rate": 3.128767123287671e-07, "log_odds_chosen": 1.121236801147461, "log_odds_ratio": -0.38759079575538635, "logits/chosen": 0.6014612317085266, "logits/rejected": 0.6222211122512817, "logps/chosen": -2.0901713371276855, "logps/rejected": -3.101262092590332, "loss": 0.6381, "nll_loss": 0.5993008613586426, "rewards/accuracies": 0.75, "rewards/chosen": -0.20901712775230408, "rewards/margins": 0.1011090874671936, "rewards/rejected": -0.31012624502182007, "step": 5019 }, { "epoch": 13.744010951403148, "grad_norm": 6.144450664520264, "learning_rate": 3.127397260273972e-07, "log_odds_chosen": 2.7245261669158936, "log_odds_ratio": -0.23333527147769928, "logits/chosen": 0.9672828912734985, "logits/rejected": 0.9482086896896362, "logps/chosen": -2.438889503479004, "logps/rejected": -5.059863090515137, "loss": 0.6197, "nll_loss": 0.5964105725288391, "rewards/accuracies": 0.875, "rewards/chosen": -0.2438889741897583, "rewards/margins": 0.2620972990989685, "rewards/rejected": -0.505986213684082, "step": 5020 }, { "epoch": 13.74674880219028, "grad_norm": 4.697731971740723, "learning_rate": 3.126027397260274e-07, "log_odds_chosen": 3.067173480987549, "log_odds_ratio": -0.13705304265022278, "logits/chosen": 0.8842685222625732, "logits/rejected": 0.9207022190093994, "logps/chosen": -1.8937382698059082, "logps/rejected": -4.606644153594971, "loss": 0.5529, "nll_loss": 0.5391783118247986, "rewards/accuracies": 1.0, "rewards/chosen": -0.18937382102012634, "rewards/margins": 0.2712906002998352, "rewards/rejected": -0.46066442131996155, "step": 5021 }, { "epoch": 13.749486652977414, "grad_norm": 6.158238410949707, "learning_rate": 3.124657534246575e-07, "log_odds_chosen": 2.461522102355957, "log_odds_ratio": -0.2809871733188629, "logits/chosen": 0.9262682199478149, "logits/rejected": 0.9154872298240662, "logps/chosen": -2.5929179191589355, "logps/rejected": -4.950192928314209, "loss": 0.7093, "nll_loss": 0.681242823600769, "rewards/accuracies": 0.875, "rewards/chosen": -0.25929176807403564, "rewards/margins": 0.23572751879692078, "rewards/rejected": -0.4950193166732788, "step": 5022 }, { "epoch": 13.752224503764545, "grad_norm": 5.452850818634033, "learning_rate": 3.123287671232877e-07, "log_odds_chosen": 2.908890962600708, "log_odds_ratio": -0.2839428186416626, "logits/chosen": 0.686067521572113, "logits/rejected": 0.6728916168212891, "logps/chosen": -2.6621510982513428, "logps/rejected": -5.480268478393555, "loss": 0.6773, "nll_loss": 0.6488687992095947, "rewards/accuracies": 0.875, "rewards/chosen": -0.26621511578559875, "rewards/margins": 0.2818117141723633, "rewards/rejected": -0.5480268597602844, "step": 5023 }, { "epoch": 13.754962354551678, "grad_norm": 5.911134243011475, "learning_rate": 3.121917808219178e-07, "log_odds_chosen": 1.197786808013916, "log_odds_ratio": -0.2772321403026581, "logits/chosen": 0.9052577018737793, "logits/rejected": 0.8133406639099121, "logps/chosen": -1.6079798936843872, "logps/rejected": -2.63449764251709, "loss": 0.5613, "nll_loss": 0.533585250377655, "rewards/accuracies": 1.0, "rewards/chosen": -0.16079799830913544, "rewards/margins": 0.10265178978443146, "rewards/rejected": -0.2634497880935669, "step": 5024 }, { "epoch": 13.757700205338809, "grad_norm": 5.5709004402160645, "learning_rate": 3.1205479452054793e-07, "log_odds_chosen": 1.4867209196090698, "log_odds_ratio": -0.31513580679893494, "logits/chosen": 0.8560892343521118, "logits/rejected": 0.8131834268569946, "logps/chosen": -1.6925400495529175, "logps/rejected": -3.0226001739501953, "loss": 0.6729, "nll_loss": 0.6413583755493164, "rewards/accuracies": 0.875, "rewards/chosen": -0.16925400495529175, "rewards/margins": 0.1330060213804245, "rewards/rejected": -0.30226001143455505, "step": 5025 }, { "epoch": 13.760438056125942, "grad_norm": 5.348262786865234, "learning_rate": 3.119178082191781e-07, "log_odds_chosen": 1.518631935119629, "log_odds_ratio": -0.2598423659801483, "logits/chosen": 0.5809040665626526, "logits/rejected": 0.5729666948318481, "logps/chosen": -1.6696279048919678, "logps/rejected": -2.9979159832000732, "loss": 0.587, "nll_loss": 0.5610011219978333, "rewards/accuracies": 1.0, "rewards/chosen": -0.16696278750896454, "rewards/margins": 0.13282880187034607, "rewards/rejected": -0.2997916042804718, "step": 5026 }, { "epoch": 13.763175906913073, "grad_norm": 6.53223991394043, "learning_rate": 3.117808219178082e-07, "log_odds_chosen": 2.82177996635437, "log_odds_ratio": -0.20687207579612732, "logits/chosen": 0.6558986902236938, "logits/rejected": 0.7571564316749573, "logps/chosen": -2.1735143661499023, "logps/rejected": -4.903517246246338, "loss": 0.6692, "nll_loss": 0.6484839916229248, "rewards/accuracies": 0.875, "rewards/chosen": -0.21735143661499023, "rewards/margins": 0.2730003297328949, "rewards/rejected": -0.49035173654556274, "step": 5027 }, { "epoch": 13.765913757700206, "grad_norm": 4.603126525878906, "learning_rate": 3.116438356164384e-07, "log_odds_chosen": 3.1831350326538086, "log_odds_ratio": -0.22804434597492218, "logits/chosen": 0.8705402612686157, "logits/rejected": 0.9074381589889526, "logps/chosen": -2.5810370445251465, "logps/rejected": -5.715946197509766, "loss": 0.6765, "nll_loss": 0.6536658406257629, "rewards/accuracies": 0.875, "rewards/chosen": -0.25810372829437256, "rewards/margins": 0.31349092721939087, "rewards/rejected": -0.5715946555137634, "step": 5028 }, { "epoch": 13.768651608487337, "grad_norm": 6.397650241851807, "learning_rate": 3.115068493150685e-07, "log_odds_chosen": 2.396930694580078, "log_odds_ratio": -0.33104994893074036, "logits/chosen": 0.8865525126457214, "logits/rejected": 0.9067752361297607, "logps/chosen": -2.88130784034729, "logps/rejected": -5.230195045471191, "loss": 0.9271, "nll_loss": 0.8939457535743713, "rewards/accuracies": 0.875, "rewards/chosen": -0.2881307899951935, "rewards/margins": 0.2348886877298355, "rewards/rejected": -0.5230194330215454, "step": 5029 }, { "epoch": 13.77138945927447, "grad_norm": 4.67949914932251, "learning_rate": 3.1136986301369863e-07, "log_odds_chosen": 3.037140369415283, "log_odds_ratio": -0.13491766154766083, "logits/chosen": 1.0286681652069092, "logits/rejected": 1.031572699546814, "logps/chosen": -2.318880081176758, "logps/rejected": -5.2017741203308105, "loss": 0.6244, "nll_loss": 0.6109322905540466, "rewards/accuracies": 1.0, "rewards/chosen": -0.2318880259990692, "rewards/margins": 0.28828945755958557, "rewards/rejected": -0.5201774835586548, "step": 5030 }, { "epoch": 13.774127310061601, "grad_norm": 5.032785415649414, "learning_rate": 3.1123287671232873e-07, "log_odds_chosen": 2.5047521591186523, "log_odds_ratio": -0.2969928979873657, "logits/chosen": 0.949971079826355, "logits/rejected": 0.9867852330207825, "logps/chosen": -2.1883647441864014, "logps/rejected": -4.612273216247559, "loss": 0.7341, "nll_loss": 0.7044239640235901, "rewards/accuracies": 0.875, "rewards/chosen": -0.2188364863395691, "rewards/margins": 0.24239084124565125, "rewards/rejected": -0.46122732758522034, "step": 5031 }, { "epoch": 13.776865160848734, "grad_norm": 7.332086086273193, "learning_rate": 3.110958904109589e-07, "log_odds_chosen": 2.4436800479888916, "log_odds_ratio": -0.2007359266281128, "logits/chosen": 1.0945082902908325, "logits/rejected": 1.138168454170227, "logps/chosen": -2.827467918395996, "logps/rejected": -5.165783882141113, "loss": 0.6354, "nll_loss": 0.6153519749641418, "rewards/accuracies": 1.0, "rewards/chosen": -0.2827467918395996, "rewards/margins": 0.23383161425590515, "rewards/rejected": -0.5165783762931824, "step": 5032 }, { "epoch": 13.779603011635865, "grad_norm": 6.139428615570068, "learning_rate": 3.1095890410958904e-07, "log_odds_chosen": 2.4010355472564697, "log_odds_ratio": -0.2694515883922577, "logits/chosen": 0.7523053288459778, "logits/rejected": 0.8011336326599121, "logps/chosen": -2.611168384552002, "logps/rejected": -4.919981002807617, "loss": 0.7442, "nll_loss": 0.7172122001647949, "rewards/accuracies": 0.875, "rewards/chosen": -0.2611168622970581, "rewards/margins": 0.2308812290430069, "rewards/rejected": -0.4919980764389038, "step": 5033 }, { "epoch": 13.782340862422998, "grad_norm": 7.731971740722656, "learning_rate": 3.1082191780821914e-07, "log_odds_chosen": 1.4322848320007324, "log_odds_ratio": -0.36166101694107056, "logits/chosen": 0.7403008341789246, "logits/rejected": 0.7199885845184326, "logps/chosen": -2.642509937286377, "logps/rejected": -3.9550838470458984, "loss": 0.7625, "nll_loss": 0.7262974977493286, "rewards/accuracies": 0.875, "rewards/chosen": -0.2642509937286377, "rewards/margins": 0.13125737011432648, "rewards/rejected": -0.395508348941803, "step": 5034 }, { "epoch": 13.78507871321013, "grad_norm": 4.813602447509766, "learning_rate": 3.1068493150684934e-07, "log_odds_chosen": 2.0233962535858154, "log_odds_ratio": -0.1715848296880722, "logits/chosen": 0.537516713142395, "logits/rejected": 0.5927085876464844, "logps/chosen": -1.4176019430160522, "logps/rejected": -3.1658713817596436, "loss": 0.576, "nll_loss": 0.5588573813438416, "rewards/accuracies": 1.0, "rewards/chosen": -0.1417602002620697, "rewards/margins": 0.1748269498348236, "rewards/rejected": -0.3165871500968933, "step": 5035 }, { "epoch": 13.787816563997263, "grad_norm": 8.213071823120117, "learning_rate": 3.1054794520547944e-07, "log_odds_chosen": 0.6121854186058044, "log_odds_ratio": -0.7812641263008118, "logits/chosen": 0.81390780210495, "logits/rejected": 0.8977718353271484, "logps/chosen": -2.8799734115600586, "logps/rejected": -3.4497809410095215, "loss": 0.8491, "nll_loss": 0.7709341049194336, "rewards/accuracies": 0.75, "rewards/chosen": -0.28799736499786377, "rewards/margins": 0.05698079243302345, "rewards/rejected": -0.34497812390327454, "step": 5036 }, { "epoch": 13.790554414784394, "grad_norm": 6.126132011413574, "learning_rate": 3.1041095890410954e-07, "log_odds_chosen": 2.2376389503479004, "log_odds_ratio": -0.23677605390548706, "logits/chosen": 0.6392014026641846, "logits/rejected": 0.703363299369812, "logps/chosen": -2.313037872314453, "logps/rejected": -4.4244537353515625, "loss": 0.6963, "nll_loss": 0.6726272106170654, "rewards/accuracies": 1.0, "rewards/chosen": -0.23130379617214203, "rewards/margins": 0.21114160120487213, "rewards/rejected": -0.4424453675746918, "step": 5037 }, { "epoch": 13.793292265571527, "grad_norm": 5.107632637023926, "learning_rate": 3.102739726027397e-07, "log_odds_chosen": 2.869964361190796, "log_odds_ratio": -0.21600626409053802, "logits/chosen": 0.9372779726982117, "logits/rejected": 1.0370614528656006, "logps/chosen": -2.646775722503662, "logps/rejected": -5.4466166496276855, "loss": 0.7327, "nll_loss": 0.7111476063728333, "rewards/accuracies": 1.0, "rewards/chosen": -0.26467758417129517, "rewards/margins": 0.27998411655426025, "rewards/rejected": -0.5446617007255554, "step": 5038 }, { "epoch": 13.796030116358658, "grad_norm": 5.371434211730957, "learning_rate": 3.1013698630136984e-07, "log_odds_chosen": 1.3628268241882324, "log_odds_ratio": -0.30008137226104736, "logits/chosen": 0.7570444345474243, "logits/rejected": 0.8290407061576843, "logps/chosen": -1.9986668825149536, "logps/rejected": -3.243335723876953, "loss": 0.6224, "nll_loss": 0.5924189686775208, "rewards/accuracies": 1.0, "rewards/chosen": -0.19986669719219208, "rewards/margins": 0.12446688860654831, "rewards/rejected": -0.3243335783481598, "step": 5039 }, { "epoch": 13.79876796714579, "grad_norm": 5.8581671714782715, "learning_rate": 3.1e-07, "log_odds_chosen": 1.382533311843872, "log_odds_ratio": -0.2816616892814636, "logits/chosen": 1.0140511989593506, "logits/rejected": 1.1013104915618896, "logps/chosen": -2.359942674636841, "logps/rejected": -3.641564130783081, "loss": 0.5755, "nll_loss": 0.5473066568374634, "rewards/accuracies": 1.0, "rewards/chosen": -0.23599427938461304, "rewards/margins": 0.12816214561462402, "rewards/rejected": -0.36415642499923706, "step": 5040 }, { "epoch": 13.801505817932922, "grad_norm": 6.743419170379639, "learning_rate": 3.098630136986301e-07, "log_odds_chosen": 0.15591134130954742, "log_odds_ratio": -0.9120571613311768, "logits/chosen": 1.0531822443008423, "logits/rejected": 1.0568091869354248, "logps/chosen": -2.832508087158203, "logps/rejected": -2.9427316188812256, "loss": 0.8422, "nll_loss": 0.7509765028953552, "rewards/accuracies": 0.625, "rewards/chosen": -0.2832508385181427, "rewards/margins": 0.011022321879863739, "rewards/rejected": -0.29427313804626465, "step": 5041 }, { "epoch": 13.804243668720055, "grad_norm": 4.641788482666016, "learning_rate": 3.097260273972603e-07, "log_odds_chosen": 1.9170681238174438, "log_odds_ratio": -0.23182880878448486, "logits/chosen": 0.969258189201355, "logits/rejected": 1.0304139852523804, "logps/chosen": -2.2576217651367188, "logps/rejected": -4.056225299835205, "loss": 0.5858, "nll_loss": 0.5626256465911865, "rewards/accuracies": 1.0, "rewards/chosen": -0.22576218843460083, "rewards/margins": 0.17986033856868744, "rewards/rejected": -0.40562254190444946, "step": 5042 }, { "epoch": 13.806981519507186, "grad_norm": 6.899226665496826, "learning_rate": 3.095890410958904e-07, "log_odds_chosen": 1.296264886856079, "log_odds_ratio": -0.38433122634887695, "logits/chosen": 1.016788125038147, "logits/rejected": 1.0374705791473389, "logps/chosen": -2.760906219482422, "logps/rejected": -4.01936149597168, "loss": 0.7837, "nll_loss": 0.7453030943870544, "rewards/accuracies": 0.875, "rewards/chosen": -0.2760906219482422, "rewards/margins": 0.1258455216884613, "rewards/rejected": -0.4019361734390259, "step": 5043 }, { "epoch": 13.809719370294319, "grad_norm": 4.849369049072266, "learning_rate": 3.094520547945205e-07, "log_odds_chosen": 2.451582193374634, "log_odds_ratio": -0.16444256901741028, "logits/chosen": 1.0222169160842896, "logits/rejected": 1.0380175113677979, "logps/chosen": -2.1272170543670654, "logps/rejected": -4.420572757720947, "loss": 0.6237, "nll_loss": 0.6072326898574829, "rewards/accuracies": 1.0, "rewards/chosen": -0.21272173523902893, "rewards/margins": 0.22933557629585266, "rewards/rejected": -0.4420572817325592, "step": 5044 }, { "epoch": 13.81245722108145, "grad_norm": 6.296987056732178, "learning_rate": 3.0931506849315065e-07, "log_odds_chosen": 2.2099108695983887, "log_odds_ratio": -0.24352523684501648, "logits/chosen": 0.6808708906173706, "logits/rejected": 0.6695537567138672, "logps/chosen": -3.248382568359375, "logps/rejected": -5.395726203918457, "loss": 0.7203, "nll_loss": 0.6959246397018433, "rewards/accuracies": 0.875, "rewards/chosen": -0.324838250875473, "rewards/margins": 0.21473439037799835, "rewards/rejected": -0.5395726561546326, "step": 5045 }, { "epoch": 13.815195071868583, "grad_norm": 5.750594139099121, "learning_rate": 3.091780821917808e-07, "log_odds_chosen": 2.1705214977264404, "log_odds_ratio": -0.23131787776947021, "logits/chosen": 0.670129656791687, "logits/rejected": 0.7165096402168274, "logps/chosen": -2.930025100708008, "logps/rejected": -5.011256217956543, "loss": 0.8485, "nll_loss": 0.8253253698348999, "rewards/accuracies": 0.875, "rewards/chosen": -0.29300254583358765, "rewards/margins": 0.20812305808067322, "rewards/rejected": -0.5011255741119385, "step": 5046 }, { "epoch": 13.817932922655714, "grad_norm": 7.771468162536621, "learning_rate": 3.0904109589041096e-07, "log_odds_chosen": 2.2456421852111816, "log_odds_ratio": -0.542648434638977, "logits/chosen": 0.961229145526886, "logits/rejected": 0.9323288202285767, "logps/chosen": -2.323493719100952, "logps/rejected": -4.380126953125, "loss": 0.6379, "nll_loss": 0.5836057662963867, "rewards/accuracies": 0.875, "rewards/chosen": -0.23234936594963074, "rewards/margins": 0.2056632786989212, "rewards/rejected": -0.43801265954971313, "step": 5047 }, { "epoch": 13.820670773442847, "grad_norm": 4.963918685913086, "learning_rate": 3.0890410958904105e-07, "log_odds_chosen": 2.4609060287475586, "log_odds_ratio": -0.17029733955860138, "logits/chosen": 0.5935124158859253, "logits/rejected": 0.5800938010215759, "logps/chosen": -2.2914085388183594, "logps/rejected": -4.614229202270508, "loss": 0.6444, "nll_loss": 0.6273434162139893, "rewards/accuracies": 1.0, "rewards/chosen": -0.22914086282253265, "rewards/margins": 0.23228205740451813, "rewards/rejected": -0.4614229202270508, "step": 5048 }, { "epoch": 13.82340862422998, "grad_norm": 5.167130947113037, "learning_rate": 3.0876712328767126e-07, "log_odds_chosen": 3.3358030319213867, "log_odds_ratio": -0.12947717308998108, "logits/chosen": 0.8804628849029541, "logits/rejected": 0.8756919503211975, "logps/chosen": -2.353325605392456, "logps/rejected": -5.582429885864258, "loss": 0.688, "nll_loss": 0.6750525236129761, "rewards/accuracies": 1.0, "rewards/chosen": -0.23533256351947784, "rewards/margins": 0.3229103982448578, "rewards/rejected": -0.5582430362701416, "step": 5049 }, { "epoch": 13.826146475017111, "grad_norm": 13.5154447555542, "learning_rate": 3.0863013698630136e-07, "log_odds_chosen": -0.14328116178512573, "log_odds_ratio": -0.9057353138923645, "logits/chosen": 0.6521285772323608, "logits/rejected": 0.6036297082901001, "logps/chosen": -2.9578468799591064, "logps/rejected": -2.7924704551696777, "loss": 0.7204, "nll_loss": 0.629810631275177, "rewards/accuracies": 0.5, "rewards/chosen": -0.29578468203544617, "rewards/margins": -0.016537634655833244, "rewards/rejected": -0.2792470455169678, "step": 5050 }, { "epoch": 13.828884325804244, "grad_norm": 5.442882061004639, "learning_rate": 3.0849315068493146e-07, "log_odds_chosen": 2.254159927368164, "log_odds_ratio": -0.18771886825561523, "logits/chosen": 0.864392101764679, "logits/rejected": 0.848440408706665, "logps/chosen": -1.9120135307312012, "logps/rejected": -3.996255874633789, "loss": 0.6337, "nll_loss": 0.6149771213531494, "rewards/accuracies": 1.0, "rewards/chosen": -0.1912013441324234, "rewards/margins": 0.20842424035072327, "rewards/rejected": -0.39962559938430786, "step": 5051 }, { "epoch": 13.831622176591376, "grad_norm": 5.715734004974365, "learning_rate": 3.0835616438356166e-07, "log_odds_chosen": 2.6416780948638916, "log_odds_ratio": -0.17265252768993378, "logits/chosen": 0.8523013591766357, "logits/rejected": 0.8751526474952698, "logps/chosen": -2.0860469341278076, "logps/rejected": -4.5941081047058105, "loss": 0.6496, "nll_loss": 0.6323209404945374, "rewards/accuracies": 1.0, "rewards/chosen": -0.20860469341278076, "rewards/margins": 0.25080612301826477, "rewards/rejected": -0.45941078662872314, "step": 5052 }, { "epoch": 13.834360027378509, "grad_norm": 7.573765277862549, "learning_rate": 3.0821917808219176e-07, "log_odds_chosen": 2.0595309734344482, "log_odds_ratio": -0.4385112524032593, "logits/chosen": 0.6997728943824768, "logits/rejected": 0.7026007175445557, "logps/chosen": -2.913918972015381, "logps/rejected": -4.911403179168701, "loss": 0.8125, "nll_loss": 0.7686972618103027, "rewards/accuracies": 0.75, "rewards/chosen": -0.29139190912246704, "rewards/margins": 0.19974841177463531, "rewards/rejected": -0.49114030599594116, "step": 5053 }, { "epoch": 13.83709787816564, "grad_norm": 5.078672885894775, "learning_rate": 3.080821917808219e-07, "log_odds_chosen": 3.855132818222046, "log_odds_ratio": -0.21066372096538544, "logits/chosen": 0.8305670022964478, "logits/rejected": 0.8377072811126709, "logps/chosen": -2.30112886428833, "logps/rejected": -6.034273147583008, "loss": 0.7051, "nll_loss": 0.6840483546257019, "rewards/accuracies": 0.875, "rewards/chosen": -0.23011288046836853, "rewards/margins": 0.37331438064575195, "rewards/rejected": -0.6034272909164429, "step": 5054 }, { "epoch": 13.839835728952773, "grad_norm": 7.185070514678955, "learning_rate": 3.07945205479452e-07, "log_odds_chosen": 2.368192434310913, "log_odds_ratio": -0.22341781854629517, "logits/chosen": 1.0373069047927856, "logits/rejected": 1.0675307512283325, "logps/chosen": -2.6656622886657715, "logps/rejected": -4.967706680297852, "loss": 0.7136, "nll_loss": 0.6912226676940918, "rewards/accuracies": 0.875, "rewards/chosen": -0.2665662169456482, "rewards/margins": 0.2302044779062271, "rewards/rejected": -0.4967706799507141, "step": 5055 }, { "epoch": 13.842573579739904, "grad_norm": 4.777774333953857, "learning_rate": 3.078082191780822e-07, "log_odds_chosen": 2.2784183025360107, "log_odds_ratio": -0.16684213280677795, "logits/chosen": 1.1416077613830566, "logits/rejected": 1.2116676568984985, "logps/chosen": -2.169787883758545, "logps/rejected": -4.3495893478393555, "loss": 0.6127, "nll_loss": 0.5959867835044861, "rewards/accuracies": 1.0, "rewards/chosen": -0.21697880327701569, "rewards/margins": 0.21798014640808105, "rewards/rejected": -0.43495893478393555, "step": 5056 }, { "epoch": 13.845311430527037, "grad_norm": 5.560182571411133, "learning_rate": 3.076712328767123e-07, "log_odds_chosen": 2.466623306274414, "log_odds_ratio": -0.18795573711395264, "logits/chosen": 0.6857976913452148, "logits/rejected": 0.736801028251648, "logps/chosen": -1.9882738590240479, "logps/rejected": -4.335022926330566, "loss": 0.6633, "nll_loss": 0.6445506811141968, "rewards/accuracies": 1.0, "rewards/chosen": -0.19882738590240479, "rewards/margins": 0.23467490077018738, "rewards/rejected": -0.43350228667259216, "step": 5057 }, { "epoch": 13.848049281314168, "grad_norm": 4.563839435577393, "learning_rate": 3.075342465753424e-07, "log_odds_chosen": 3.1005148887634277, "log_odds_ratio": -0.22766217589378357, "logits/chosen": 0.9368031024932861, "logits/rejected": 0.9664339423179626, "logps/chosen": -2.3555221557617188, "logps/rejected": -5.389571189880371, "loss": 0.81, "nll_loss": 0.7872370481491089, "rewards/accuracies": 0.875, "rewards/chosen": -0.23555219173431396, "rewards/margins": 0.30340489745140076, "rewards/rejected": -0.5389571189880371, "step": 5058 }, { "epoch": 13.850787132101301, "grad_norm": 6.930627346038818, "learning_rate": 3.073972602739726e-07, "log_odds_chosen": 3.1635901927948, "log_odds_ratio": -0.2015010118484497, "logits/chosen": 1.0942274332046509, "logits/rejected": 1.0684936046600342, "logps/chosen": -2.737062454223633, "logps/rejected": -5.814178943634033, "loss": 0.827, "nll_loss": 0.8068966865539551, "rewards/accuracies": 0.875, "rewards/chosen": -0.27370625734329224, "rewards/margins": 0.3077116310596466, "rewards/rejected": -0.5814179182052612, "step": 5059 }, { "epoch": 13.853524982888432, "grad_norm": 7.215912342071533, "learning_rate": 3.072602739726027e-07, "log_odds_chosen": 1.9034593105316162, "log_odds_ratio": -0.3377978205680847, "logits/chosen": 0.9021660685539246, "logits/rejected": 0.9292710423469543, "logps/chosen": -2.7836647033691406, "logps/rejected": -4.621081829071045, "loss": 0.744, "nll_loss": 0.710176944732666, "rewards/accuracies": 0.875, "rewards/chosen": -0.27836644649505615, "rewards/margins": 0.1837417483329773, "rewards/rejected": -0.46210819482803345, "step": 5060 }, { "epoch": 13.856262833675565, "grad_norm": 4.82382869720459, "learning_rate": 3.0712328767123287e-07, "log_odds_chosen": 1.9299246072769165, "log_odds_ratio": -0.25885945558547974, "logits/chosen": 0.864682674407959, "logits/rejected": 0.9450652599334717, "logps/chosen": -2.233030080795288, "logps/rejected": -4.054481029510498, "loss": 0.7895, "nll_loss": 0.7636353373527527, "rewards/accuracies": 0.875, "rewards/chosen": -0.22330300509929657, "rewards/margins": 0.18214508891105652, "rewards/rejected": -0.4054480791091919, "step": 5061 }, { "epoch": 13.859000684462696, "grad_norm": 7.043272972106934, "learning_rate": 3.0698630136986297e-07, "log_odds_chosen": 1.5411369800567627, "log_odds_ratio": -0.30905258655548096, "logits/chosen": 0.8970378637313843, "logits/rejected": 0.8686524629592896, "logps/chosen": -2.424989700317383, "logps/rejected": -3.8550426959991455, "loss": 0.6923, "nll_loss": 0.6613930463790894, "rewards/accuracies": 0.875, "rewards/chosen": -0.2424989491701126, "rewards/margins": 0.1430053412914276, "rewards/rejected": -0.3855043053627014, "step": 5062 }, { "epoch": 13.86173853524983, "grad_norm": 5.288506984710693, "learning_rate": 3.068493150684932e-07, "log_odds_chosen": 2.5760889053344727, "log_odds_ratio": -0.1576882004737854, "logits/chosen": 0.8095852136611938, "logits/rejected": 0.9020370841026306, "logps/chosen": -1.807400107383728, "logps/rejected": -4.169589042663574, "loss": 0.6218, "nll_loss": 0.6060625910758972, "rewards/accuracies": 1.0, "rewards/chosen": -0.18074001371860504, "rewards/margins": 0.2362188696861267, "rewards/rejected": -0.41695886850357056, "step": 5063 }, { "epoch": 13.86447638603696, "grad_norm": 6.256679058074951, "learning_rate": 3.067123287671233e-07, "log_odds_chosen": 1.9406263828277588, "log_odds_ratio": -0.2900637686252594, "logits/chosen": 0.7354889512062073, "logits/rejected": 0.6935622692108154, "logps/chosen": -2.3938422203063965, "logps/rejected": -4.243088722229004, "loss": 0.6175, "nll_loss": 0.588528573513031, "rewards/accuracies": 1.0, "rewards/chosen": -0.2393842190504074, "rewards/margins": 0.1849246621131897, "rewards/rejected": -0.4243088960647583, "step": 5064 }, { "epoch": 13.867214236824093, "grad_norm": 6.212708473205566, "learning_rate": 3.065753424657534e-07, "log_odds_chosen": 0.8174124956130981, "log_odds_ratio": -0.47343599796295166, "logits/chosen": 0.8336907625198364, "logits/rejected": 0.8428618907928467, "logps/chosen": -1.8510873317718506, "logps/rejected": -2.632567882537842, "loss": 0.5928, "nll_loss": 0.5454403162002563, "rewards/accuracies": 0.625, "rewards/chosen": -0.1851087361574173, "rewards/margins": 0.07814804464578629, "rewards/rejected": -0.2632567882537842, "step": 5065 }, { "epoch": 13.869952087611225, "grad_norm": 4.81398868560791, "learning_rate": 3.064383561643836e-07, "log_odds_chosen": 2.028857946395874, "log_odds_ratio": -0.25129127502441406, "logits/chosen": 1.0561211109161377, "logits/rejected": 1.072089672088623, "logps/chosen": -1.6691476106643677, "logps/rejected": -3.5194485187530518, "loss": 0.5571, "nll_loss": 0.5320026874542236, "rewards/accuracies": 1.0, "rewards/chosen": -0.16691476106643677, "rewards/margins": 0.18503008782863617, "rewards/rejected": -0.35194486379623413, "step": 5066 }, { "epoch": 13.872689938398358, "grad_norm": 7.182231903076172, "learning_rate": 3.063013698630137e-07, "log_odds_chosen": 1.2059937715530396, "log_odds_ratio": -0.5360353589057922, "logits/chosen": 0.6590427756309509, "logits/rejected": 0.7064984440803528, "logps/chosen": -2.442713737487793, "logps/rejected": -3.5356926918029785, "loss": 0.775, "nll_loss": 0.7214217782020569, "rewards/accuracies": 0.875, "rewards/chosen": -0.2442713975906372, "rewards/margins": 0.10929786413908005, "rewards/rejected": -0.35356926918029785, "step": 5067 }, { "epoch": 13.875427789185489, "grad_norm": 5.531180381774902, "learning_rate": 3.0616438356164383e-07, "log_odds_chosen": 2.524139404296875, "log_odds_ratio": -0.15566575527191162, "logits/chosen": 0.7343506813049316, "logits/rejected": 0.7930577993392944, "logps/chosen": -2.06158709526062, "logps/rejected": -4.355480194091797, "loss": 0.6643, "nll_loss": 0.6486912965774536, "rewards/accuracies": 1.0, "rewards/chosen": -0.20615869760513306, "rewards/margins": 0.22938935458660126, "rewards/rejected": -0.4355480968952179, "step": 5068 }, { "epoch": 13.878165639972622, "grad_norm": 5.944504261016846, "learning_rate": 3.0602739726027393e-07, "log_odds_chosen": 2.8039979934692383, "log_odds_ratio": -0.2802281975746155, "logits/chosen": 1.0191587209701538, "logits/rejected": 1.1162776947021484, "logps/chosen": -4.028906345367432, "logps/rejected": -6.701844215393066, "loss": 0.9026, "nll_loss": 0.8746204376220703, "rewards/accuracies": 0.75, "rewards/chosen": -0.4028906226158142, "rewards/margins": 0.2672938406467438, "rewards/rejected": -0.6701844930648804, "step": 5069 }, { "epoch": 13.880903490759753, "grad_norm": 5.2465362548828125, "learning_rate": 3.0589041095890414e-07, "log_odds_chosen": 1.8075555562973022, "log_odds_ratio": -0.3237539529800415, "logits/chosen": 0.5607014894485474, "logits/rejected": 0.6304880380630493, "logps/chosen": -2.2631096839904785, "logps/rejected": -3.962540864944458, "loss": 0.7158, "nll_loss": 0.6834201812744141, "rewards/accuracies": 0.875, "rewards/chosen": -0.22631098330020905, "rewards/margins": 0.16994310915470123, "rewards/rejected": -0.39625412225723267, "step": 5070 }, { "epoch": 13.883641341546886, "grad_norm": 5.837467670440674, "learning_rate": 3.0575342465753423e-07, "log_odds_chosen": 1.8475983142852783, "log_odds_ratio": -0.1961548626422882, "logits/chosen": 0.9280799627304077, "logits/rejected": 0.8641625046730042, "logps/chosen": -1.8018096685409546, "logps/rejected": -3.49088191986084, "loss": 0.6194, "nll_loss": 0.5998131036758423, "rewards/accuracies": 1.0, "rewards/chosen": -0.18018098175525665, "rewards/margins": 0.16890722513198853, "rewards/rejected": -0.34908822178840637, "step": 5071 }, { "epoch": 13.886379192334019, "grad_norm": 7.885420322418213, "learning_rate": 3.0561643835616433e-07, "log_odds_chosen": 3.719273090362549, "log_odds_ratio": -0.21028195321559906, "logits/chosen": 0.9096207618713379, "logits/rejected": 0.949462354183197, "logps/chosen": -2.2377395629882812, "logps/rejected": -5.785740375518799, "loss": 0.5889, "nll_loss": 0.5678903460502625, "rewards/accuracies": 0.875, "rewards/chosen": -0.22377395629882812, "rewards/margins": 0.35480010509490967, "rewards/rejected": -0.5785740613937378, "step": 5072 }, { "epoch": 13.88911704312115, "grad_norm": 5.666604518890381, "learning_rate": 3.0547945205479454e-07, "log_odds_chosen": 2.401827335357666, "log_odds_ratio": -0.21654458343982697, "logits/chosen": 0.9779994487762451, "logits/rejected": 0.9991112947463989, "logps/chosen": -2.3519535064697266, "logps/rejected": -4.644220352172852, "loss": 0.6469, "nll_loss": 0.6252143979072571, "rewards/accuracies": 0.875, "rewards/chosen": -0.2351953536272049, "rewards/margins": 0.2292267233133316, "rewards/rejected": -0.4644220471382141, "step": 5073 }, { "epoch": 13.891854893908281, "grad_norm": 4.715537071228027, "learning_rate": 3.0534246575342464e-07, "log_odds_chosen": 1.973463773727417, "log_odds_ratio": -0.34422463178634644, "logits/chosen": 0.8376219868659973, "logits/rejected": 0.915505051612854, "logps/chosen": -2.3160347938537598, "logps/rejected": -4.153264999389648, "loss": 0.645, "nll_loss": 0.6106254458427429, "rewards/accuracies": 0.875, "rewards/chosen": -0.2316034734249115, "rewards/margins": 0.18372303247451782, "rewards/rejected": -0.4153265058994293, "step": 5074 }, { "epoch": 13.894592744695414, "grad_norm": 5.331954002380371, "learning_rate": 3.052054794520548e-07, "log_odds_chosen": 1.646311640739441, "log_odds_ratio": -0.32250428199768066, "logits/chosen": 0.8753893375396729, "logits/rejected": 0.9364610910415649, "logps/chosen": -2.510891914367676, "logps/rejected": -4.083459377288818, "loss": 0.6739, "nll_loss": 0.6416131854057312, "rewards/accuracies": 0.875, "rewards/chosen": -0.2510891854763031, "rewards/margins": 0.15725676715373993, "rewards/rejected": -0.40834593772888184, "step": 5075 }, { "epoch": 13.897330595482547, "grad_norm": 6.366796016693115, "learning_rate": 3.050684931506849e-07, "log_odds_chosen": 2.4653480052948, "log_odds_ratio": -0.32988494634628296, "logits/chosen": 0.8912115693092346, "logits/rejected": 0.9229145050048828, "logps/chosen": -1.7474281787872314, "logps/rejected": -4.102547645568848, "loss": 0.6723, "nll_loss": 0.6393016576766968, "rewards/accuracies": 0.75, "rewards/chosen": -0.17474281787872314, "rewards/margins": 0.23551195859909058, "rewards/rejected": -0.4102547764778137, "step": 5076 }, { "epoch": 13.900068446269678, "grad_norm": 5.323324680328369, "learning_rate": 3.049315068493151e-07, "log_odds_chosen": 2.5215861797332764, "log_odds_ratio": -0.22155578434467316, "logits/chosen": 1.109696865081787, "logits/rejected": 1.0968400239944458, "logps/chosen": -2.1538619995117188, "logps/rejected": -4.575650215148926, "loss": 0.6564, "nll_loss": 0.6342095136642456, "rewards/accuracies": 1.0, "rewards/chosen": -0.21538622677326202, "rewards/margins": 0.2421787977218628, "rewards/rejected": -0.4575650095939636, "step": 5077 }, { "epoch": 13.902806297056811, "grad_norm": 7.6070637702941895, "learning_rate": 3.047945205479452e-07, "log_odds_chosen": 3.0235931873321533, "log_odds_ratio": -0.30335965752601624, "logits/chosen": 1.214296817779541, "logits/rejected": 1.278327465057373, "logps/chosen": -2.659857749938965, "logps/rejected": -5.528252601623535, "loss": 0.7196, "nll_loss": 0.6893041133880615, "rewards/accuracies": 0.875, "rewards/chosen": -0.26598578691482544, "rewards/margins": 0.2868395447731018, "rewards/rejected": -0.5528253316879272, "step": 5078 }, { "epoch": 13.905544147843942, "grad_norm": 5.848971843719482, "learning_rate": 3.046575342465753e-07, "log_odds_chosen": 1.189752459526062, "log_odds_ratio": -0.33583343029022217, "logits/chosen": 0.8402031660079956, "logits/rejected": 0.8314714431762695, "logps/chosen": -1.8909454345703125, "logps/rejected": -2.970489025115967, "loss": 0.5594, "nll_loss": 0.5257859826087952, "rewards/accuracies": 1.0, "rewards/chosen": -0.18909454345703125, "rewards/margins": 0.10795437544584274, "rewards/rejected": -0.2970489263534546, "step": 5079 }, { "epoch": 13.908281998631075, "grad_norm": 5.99009895324707, "learning_rate": 3.045205479452055e-07, "log_odds_chosen": 2.4537010192871094, "log_odds_ratio": -0.20452819764614105, "logits/chosen": 0.5959365367889404, "logits/rejected": 0.5524388551712036, "logps/chosen": -1.6828317642211914, "logps/rejected": -3.9706685543060303, "loss": 0.6514, "nll_loss": 0.6309115290641785, "rewards/accuracies": 1.0, "rewards/chosen": -0.16828317940235138, "rewards/margins": 0.22878369688987732, "rewards/rejected": -0.3970668911933899, "step": 5080 }, { "epoch": 13.911019849418206, "grad_norm": 5.596327781677246, "learning_rate": 3.043835616438356e-07, "log_odds_chosen": 1.8791956901550293, "log_odds_ratio": -0.2971728444099426, "logits/chosen": 0.6173964738845825, "logits/rejected": 0.7035090923309326, "logps/chosen": -2.167442560195923, "logps/rejected": -3.910858154296875, "loss": 0.5874, "nll_loss": 0.5577121376991272, "rewards/accuracies": 0.875, "rewards/chosen": -0.21674425899982452, "rewards/margins": 0.1743415743112564, "rewards/rejected": -0.39108583331108093, "step": 5081 }, { "epoch": 13.91375770020534, "grad_norm": 5.628825664520264, "learning_rate": 3.0424657534246575e-07, "log_odds_chosen": 2.0079448223114014, "log_odds_ratio": -0.30478888750076294, "logits/chosen": 0.9330465197563171, "logits/rejected": 0.9249318838119507, "logps/chosen": -1.869450569152832, "logps/rejected": -3.7442309856414795, "loss": 0.6225, "nll_loss": 0.5919733047485352, "rewards/accuracies": 1.0, "rewards/chosen": -0.18694505095481873, "rewards/margins": 0.18747805058956146, "rewards/rejected": -0.3744231164455414, "step": 5082 }, { "epoch": 13.91649555099247, "grad_norm": 4.851633071899414, "learning_rate": 3.041095890410959e-07, "log_odds_chosen": 2.716428756713867, "log_odds_ratio": -0.1471286118030548, "logits/chosen": 0.7893164157867432, "logits/rejected": 0.7595047950744629, "logps/chosen": -2.202817916870117, "logps/rejected": -4.789306640625, "loss": 0.606, "nll_loss": 0.5913066864013672, "rewards/accuracies": 1.0, "rewards/chosen": -0.22028179466724396, "rewards/margins": 0.25864890217781067, "rewards/rejected": -0.47893068194389343, "step": 5083 }, { "epoch": 13.919233401779604, "grad_norm": 6.2681097984313965, "learning_rate": 3.03972602739726e-07, "log_odds_chosen": 0.9600709676742554, "log_odds_ratio": -0.4199758470058441, "logits/chosen": 0.6977899074554443, "logits/rejected": 0.6721141934394836, "logps/chosen": -2.5496346950531006, "logps/rejected": -3.467482566833496, "loss": 0.7122, "nll_loss": 0.6702409982681274, "rewards/accuracies": 0.875, "rewards/chosen": -0.2549634873867035, "rewards/margins": 0.09178479015827179, "rewards/rejected": -0.3467482328414917, "step": 5084 }, { "epoch": 13.921971252566735, "grad_norm": 6.961152076721191, "learning_rate": 3.0383561643835615e-07, "log_odds_chosen": 2.1760268211364746, "log_odds_ratio": -0.38763150572776794, "logits/chosen": 0.8786017894744873, "logits/rejected": 0.9222500324249268, "logps/chosen": -2.2889175415039062, "logps/rejected": -4.316653728485107, "loss": 0.6144, "nll_loss": 0.5756412148475647, "rewards/accuracies": 0.875, "rewards/chosen": -0.2288917601108551, "rewards/margins": 0.20277363061904907, "rewards/rejected": -0.4316653609275818, "step": 5085 }, { "epoch": 13.924709103353868, "grad_norm": 8.901338577270508, "learning_rate": 3.0369863013698625e-07, "log_odds_chosen": 2.8106226921081543, "log_odds_ratio": -0.6786725521087646, "logits/chosen": 1.0925898551940918, "logits/rejected": 1.1057018041610718, "logps/chosen": -3.0420048236846924, "logps/rejected": -5.754531383514404, "loss": 0.7925, "nll_loss": 0.7246119976043701, "rewards/accuracies": 0.75, "rewards/chosen": -0.3042004704475403, "rewards/margins": 0.2712526321411133, "rewards/rejected": -0.5754531621932983, "step": 5086 }, { "epoch": 13.927446954140999, "grad_norm": 6.956662178039551, "learning_rate": 3.0356164383561646e-07, "log_odds_chosen": 1.896173357963562, "log_odds_ratio": -0.25373807549476624, "logits/chosen": 1.0094349384307861, "logits/rejected": 1.0248361825942993, "logps/chosen": -2.733458995819092, "logps/rejected": -4.483603000640869, "loss": 0.6713, "nll_loss": 0.6458953022956848, "rewards/accuracies": 0.875, "rewards/chosen": -0.2733458876609802, "rewards/margins": 0.1750144064426422, "rewards/rejected": -0.44836026430130005, "step": 5087 }, { "epoch": 13.930184804928132, "grad_norm": 5.457456111907959, "learning_rate": 3.0342465753424656e-07, "log_odds_chosen": 1.4422507286071777, "log_odds_ratio": -0.61285799741745, "logits/chosen": 0.8871164321899414, "logits/rejected": 0.867412269115448, "logps/chosen": -2.4291794300079346, "logps/rejected": -3.8276357650756836, "loss": 0.7882, "nll_loss": 0.7269372940063477, "rewards/accuracies": 0.75, "rewards/chosen": -0.2429179549217224, "rewards/margins": 0.139845609664917, "rewards/rejected": -0.3827635645866394, "step": 5088 }, { "epoch": 13.932922655715263, "grad_norm": 7.086468696594238, "learning_rate": 3.032876712328767e-07, "log_odds_chosen": 1.7908644676208496, "log_odds_ratio": -0.42300575971603394, "logits/chosen": 1.0857651233673096, "logits/rejected": 1.0630979537963867, "logps/chosen": -2.106051206588745, "logps/rejected": -3.7048463821411133, "loss": 0.6201, "nll_loss": 0.5777629613876343, "rewards/accuracies": 0.875, "rewards/chosen": -0.21060511469841003, "rewards/margins": 0.15987950563430786, "rewards/rejected": -0.3704846501350403, "step": 5089 }, { "epoch": 13.935660506502396, "grad_norm": 4.6425557136535645, "learning_rate": 3.0315068493150686e-07, "log_odds_chosen": 1.8033411502838135, "log_odds_ratio": -0.25042322278022766, "logits/chosen": 0.7980850338935852, "logits/rejected": 0.80457603931427, "logps/chosen": -1.5423640012741089, "logps/rejected": -3.1430320739746094, "loss": 0.543, "nll_loss": 0.5179804563522339, "rewards/accuracies": 1.0, "rewards/chosen": -0.15423640608787537, "rewards/margins": 0.16006682813167572, "rewards/rejected": -0.3143032491207123, "step": 5090 }, { "epoch": 13.938398357289527, "grad_norm": 5.637869358062744, "learning_rate": 3.0301369863013696e-07, "log_odds_chosen": 3.171624183654785, "log_odds_ratio": -0.18316689133644104, "logits/chosen": 0.8341221213340759, "logits/rejected": 0.7288432121276855, "logps/chosen": -2.039090156555176, "logps/rejected": -5.092988014221191, "loss": 0.7086, "nll_loss": 0.6902360916137695, "rewards/accuracies": 1.0, "rewards/chosen": -0.2039090096950531, "rewards/margins": 0.3053898215293884, "rewards/rejected": -0.5092988014221191, "step": 5091 }, { "epoch": 13.94113620807666, "grad_norm": 5.841121196746826, "learning_rate": 3.028767123287671e-07, "log_odds_chosen": 1.5576908588409424, "log_odds_ratio": -0.24461093544960022, "logits/chosen": 0.9611737728118896, "logits/rejected": 0.9573374390602112, "logps/chosen": -2.525020122528076, "logps/rejected": -3.984706401824951, "loss": 0.6481, "nll_loss": 0.6236000061035156, "rewards/accuracies": 1.0, "rewards/chosen": -0.2525020241737366, "rewards/margins": 0.14596864581108093, "rewards/rejected": -0.3984706997871399, "step": 5092 }, { "epoch": 13.943874058863791, "grad_norm": 4.79789924621582, "learning_rate": 3.027397260273972e-07, "log_odds_chosen": 1.907468557357788, "log_odds_ratio": -0.24323274195194244, "logits/chosen": 0.6678478121757507, "logits/rejected": 0.7152442336082458, "logps/chosen": -2.0705387592315674, "logps/rejected": -3.8273510932922363, "loss": 0.6653, "nll_loss": 0.640953779220581, "rewards/accuracies": 1.0, "rewards/chosen": -0.20705386996269226, "rewards/margins": 0.17568127810955048, "rewards/rejected": -0.38273513317108154, "step": 5093 }, { "epoch": 13.946611909650924, "grad_norm": 5.997085094451904, "learning_rate": 3.026027397260274e-07, "log_odds_chosen": 1.1236796379089355, "log_odds_ratio": -0.5484721660614014, "logits/chosen": 0.5363354682922363, "logits/rejected": 0.5418205261230469, "logps/chosen": -2.478248119354248, "logps/rejected": -3.455456018447876, "loss": 0.685, "nll_loss": 0.630127489566803, "rewards/accuracies": 0.75, "rewards/chosen": -0.24782481789588928, "rewards/margins": 0.09772080183029175, "rewards/rejected": -0.34554561972618103, "step": 5094 }, { "epoch": 13.949349760438055, "grad_norm": 5.179007530212402, "learning_rate": 3.024657534246575e-07, "log_odds_chosen": 2.2436130046844482, "log_odds_ratio": -0.37022092938423157, "logits/chosen": 0.7236878275871277, "logits/rejected": 0.7586846947669983, "logps/chosen": -2.1531972885131836, "logps/rejected": -4.326019763946533, "loss": 0.6087, "nll_loss": 0.5716423392295837, "rewards/accuracies": 0.75, "rewards/chosen": -0.21531972289085388, "rewards/margins": 0.217282235622406, "rewards/rejected": -0.4326019585132599, "step": 5095 }, { "epoch": 13.952087611225188, "grad_norm": 5.400421619415283, "learning_rate": 3.0232876712328767e-07, "log_odds_chosen": 1.9551750421524048, "log_odds_ratio": -0.2805049419403076, "logits/chosen": 0.9510992765426636, "logits/rejected": 1.0134943723678589, "logps/chosen": -2.42252254486084, "logps/rejected": -4.272431373596191, "loss": 0.6552, "nll_loss": 0.6271377205848694, "rewards/accuracies": 0.875, "rewards/chosen": -0.24225224554538727, "rewards/margins": 0.18499088287353516, "rewards/rejected": -0.42724311351776123, "step": 5096 }, { "epoch": 13.95482546201232, "grad_norm": 7.351171970367432, "learning_rate": 3.021917808219178e-07, "log_odds_chosen": 2.396801471710205, "log_odds_ratio": -0.4558574855327606, "logits/chosen": 0.9507479667663574, "logits/rejected": 1.0320971012115479, "logps/chosen": -3.1829004287719727, "logps/rejected": -5.525346279144287, "loss": 0.7288, "nll_loss": 0.6832275390625, "rewards/accuracies": 0.875, "rewards/chosen": -0.3182900547981262, "rewards/margins": 0.23424461483955383, "rewards/rejected": -0.5525346398353577, "step": 5097 }, { "epoch": 13.957563312799453, "grad_norm": 4.581596374511719, "learning_rate": 3.020547945205479e-07, "log_odds_chosen": 3.6463398933410645, "log_odds_ratio": -0.12916994094848633, "logits/chosen": 0.7593536376953125, "logits/rejected": 0.7545632719993591, "logps/chosen": -1.9954240322113037, "logps/rejected": -5.461849212646484, "loss": 0.6538, "nll_loss": 0.6409242749214172, "rewards/accuracies": 1.0, "rewards/chosen": -0.19954241812229156, "rewards/margins": 0.34664249420166016, "rewards/rejected": -0.5461848974227905, "step": 5098 }, { "epoch": 13.960301163586585, "grad_norm": 5.17673397064209, "learning_rate": 3.0191780821917807e-07, "log_odds_chosen": 1.7679176330566406, "log_odds_ratio": -0.2981296479701996, "logits/chosen": 0.566430926322937, "logits/rejected": 0.6319213509559631, "logps/chosen": -2.0108251571655273, "logps/rejected": -3.6454358100891113, "loss": 0.8027, "nll_loss": 0.7729204893112183, "rewards/accuracies": 1.0, "rewards/chosen": -0.2010825276374817, "rewards/margins": 0.16346105933189392, "rewards/rejected": -0.3645435869693756, "step": 5099 }, { "epoch": 13.963039014373717, "grad_norm": 7.623741626739502, "learning_rate": 3.0178082191780817e-07, "log_odds_chosen": 1.9435826539993286, "log_odds_ratio": -0.4781344532966614, "logits/chosen": 0.6803039312362671, "logits/rejected": 0.7366609573364258, "logps/chosen": -2.866442918777466, "logps/rejected": -4.684844970703125, "loss": 0.7879, "nll_loss": 0.7400796413421631, "rewards/accuracies": 0.75, "rewards/chosen": -0.28664430975914, "rewards/margins": 0.181840181350708, "rewards/rejected": -0.46848446130752563, "step": 5100 }, { "epoch": 13.965776865160848, "grad_norm": 5.427071571350098, "learning_rate": 3.0164383561643837e-07, "log_odds_chosen": 2.161708116531372, "log_odds_ratio": -0.19647003710269928, "logits/chosen": 1.189905047416687, "logits/rejected": 1.182563066482544, "logps/chosen": -1.915244698524475, "logps/rejected": -3.89054012298584, "loss": 0.5913, "nll_loss": 0.5716631412506104, "rewards/accuracies": 0.875, "rewards/chosen": -0.191524475812912, "rewards/margins": 0.19752955436706543, "rewards/rejected": -0.3890540301799774, "step": 5101 }, { "epoch": 13.96851471594798, "grad_norm": 6.507933139801025, "learning_rate": 3.0150684931506847e-07, "log_odds_chosen": 3.621927499771118, "log_odds_ratio": -0.17470210790634155, "logits/chosen": 0.8085294961929321, "logits/rejected": 0.8656190037727356, "logps/chosen": -1.8228784799575806, "logps/rejected": -5.255856990814209, "loss": 0.736, "nll_loss": 0.7184915542602539, "rewards/accuracies": 1.0, "rewards/chosen": -0.18228784203529358, "rewards/margins": 0.3432978391647339, "rewards/rejected": -0.5255857110023499, "step": 5102 }, { "epoch": 13.971252566735114, "grad_norm": 4.880943775177002, "learning_rate": 3.013698630136986e-07, "log_odds_chosen": 2.406404495239258, "log_odds_ratio": -0.2902939021587372, "logits/chosen": 0.761877179145813, "logits/rejected": 0.7842514514923096, "logps/chosen": -2.3589582443237305, "logps/rejected": -4.685177803039551, "loss": 0.7669, "nll_loss": 0.7378362417221069, "rewards/accuracies": 0.875, "rewards/chosen": -0.2358958125114441, "rewards/margins": 0.2326219379901886, "rewards/rejected": -0.4685177505016327, "step": 5103 }, { "epoch": 13.973990417522245, "grad_norm": 6.267062187194824, "learning_rate": 3.012328767123288e-07, "log_odds_chosen": 1.5310306549072266, "log_odds_ratio": -0.26295819878578186, "logits/chosen": 0.85331130027771, "logits/rejected": 0.7722485065460205, "logps/chosen": -2.0573482513427734, "logps/rejected": -3.479702949523926, "loss": 0.7483, "nll_loss": 0.7220306992530823, "rewards/accuracies": 1.0, "rewards/chosen": -0.20573483407497406, "rewards/margins": 0.14223547279834747, "rewards/rejected": -0.34797030687332153, "step": 5104 }, { "epoch": 13.976728268309378, "grad_norm": 5.039016246795654, "learning_rate": 3.010958904109589e-07, "log_odds_chosen": 1.8004448413848877, "log_odds_ratio": -0.28923049569129944, "logits/chosen": 0.7169225811958313, "logits/rejected": 0.7734599709510803, "logps/chosen": -2.1403701305389404, "logps/rejected": -3.8452441692352295, "loss": 0.7489, "nll_loss": 0.7199694514274597, "rewards/accuracies": 0.875, "rewards/chosen": -0.2140370011329651, "rewards/margins": 0.17048737406730652, "rewards/rejected": -0.384524405002594, "step": 5105 }, { "epoch": 13.979466119096509, "grad_norm": 5.075798034667969, "learning_rate": 3.0095890410958903e-07, "log_odds_chosen": 2.709258556365967, "log_odds_ratio": -0.2911543548107147, "logits/chosen": 0.7586288452148438, "logits/rejected": 0.767051100730896, "logps/chosen": -1.6204485893249512, "logps/rejected": -4.140300750732422, "loss": 0.5984, "nll_loss": 0.5692916512489319, "rewards/accuracies": 0.75, "rewards/chosen": -0.16204486787319183, "rewards/margins": 0.25198525190353394, "rewards/rejected": -0.4140300750732422, "step": 5106 }, { "epoch": 13.982203969883642, "grad_norm": 5.0319108963012695, "learning_rate": 3.0082191780821913e-07, "log_odds_chosen": 3.471531867980957, "log_odds_ratio": -0.20040515065193176, "logits/chosen": 0.770244836807251, "logits/rejected": 0.7679805159568787, "logps/chosen": -1.7876914739608765, "logps/rejected": -5.0976033210754395, "loss": 0.7225, "nll_loss": 0.7024310827255249, "rewards/accuracies": 0.875, "rewards/chosen": -0.17876914143562317, "rewards/margins": 0.3309912085533142, "rewards/rejected": -0.5097603797912598, "step": 5107 }, { "epoch": 13.984941820670773, "grad_norm": 5.900009632110596, "learning_rate": 3.0068493150684933e-07, "log_odds_chosen": 1.1027541160583496, "log_odds_ratio": -0.442322313785553, "logits/chosen": 0.9627089500427246, "logits/rejected": 0.974295973777771, "logps/chosen": -2.7450273036956787, "logps/rejected": -3.7948341369628906, "loss": 0.7346, "nll_loss": 0.690371036529541, "rewards/accuracies": 0.75, "rewards/chosen": -0.2745027542114258, "rewards/margins": 0.10498065501451492, "rewards/rejected": -0.3794834315776825, "step": 5108 }, { "epoch": 13.987679671457906, "grad_norm": 4.845203399658203, "learning_rate": 3.0054794520547943e-07, "log_odds_chosen": 1.7020373344421387, "log_odds_ratio": -0.2956730127334595, "logits/chosen": 0.9873751401901245, "logits/rejected": 1.032982349395752, "logps/chosen": -2.5351200103759766, "logps/rejected": -4.136995792388916, "loss": 0.6864, "nll_loss": 0.6568160057067871, "rewards/accuracies": 0.875, "rewards/chosen": -0.25351202487945557, "rewards/margins": 0.16018760204315186, "rewards/rejected": -0.41369959712028503, "step": 5109 }, { "epoch": 13.990417522245037, "grad_norm": 5.472701549530029, "learning_rate": 3.004109589041096e-07, "log_odds_chosen": 2.693999767303467, "log_odds_ratio": -0.24530276656150818, "logits/chosen": 0.8772755861282349, "logits/rejected": 0.9542025327682495, "logps/chosen": -1.7374498844146729, "logps/rejected": -4.23648738861084, "loss": 0.5796, "nll_loss": 0.555045485496521, "rewards/accuracies": 0.875, "rewards/chosen": -0.17374499142169952, "rewards/margins": 0.24990376830101013, "rewards/rejected": -0.42364877462387085, "step": 5110 }, { "epoch": 13.99315537303217, "grad_norm": 6.15730094909668, "learning_rate": 3.0027397260273974e-07, "log_odds_chosen": 1.9325060844421387, "log_odds_ratio": -0.28098177909851074, "logits/chosen": 1.05189049243927, "logits/rejected": 1.0518724918365479, "logps/chosen": -1.879496693611145, "logps/rejected": -3.662954807281494, "loss": 0.6018, "nll_loss": 0.5737097859382629, "rewards/accuracies": 0.875, "rewards/chosen": -0.18794967234134674, "rewards/margins": 0.17834581434726715, "rewards/rejected": -0.3662954568862915, "step": 5111 }, { "epoch": 13.995893223819301, "grad_norm": 5.142602443695068, "learning_rate": 3.0013698630136983e-07, "log_odds_chosen": 1.9417139291763306, "log_odds_ratio": -0.25240758061408997, "logits/chosen": 0.7089602947235107, "logits/rejected": 0.7410315275192261, "logps/chosen": -2.0526528358459473, "logps/rejected": -3.885181188583374, "loss": 0.5891, "nll_loss": 0.5638118982315063, "rewards/accuracies": 0.875, "rewards/chosen": -0.20526528358459473, "rewards/margins": 0.18325282633304596, "rewards/rejected": -0.3885181248188019, "step": 5112 }, { "epoch": 13.998631074606434, "grad_norm": 7.1059136390686035, "learning_rate": 3e-07, "log_odds_chosen": 1.7877848148345947, "log_odds_ratio": -0.5705621242523193, "logits/chosen": 0.9161779880523682, "logits/rejected": 0.9774309992790222, "logps/chosen": -2.569286823272705, "logps/rejected": -4.212954044342041, "loss": 0.6947, "nll_loss": 0.6376116871833801, "rewards/accuracies": 0.875, "rewards/chosen": -0.2569286525249481, "rewards/margins": 0.1643667370080948, "rewards/rejected": -0.4212954044342041, "step": 5113 }, { "epoch": 14.001368925393566, "grad_norm": 4.509196758270264, "learning_rate": 2.998630136986301e-07, "log_odds_chosen": 2.8693389892578125, "log_odds_ratio": -0.20246757566928864, "logits/chosen": 0.7949938774108887, "logits/rejected": 0.7054332494735718, "logps/chosen": -1.6362271308898926, "logps/rejected": -4.30702018737793, "loss": 0.7496, "nll_loss": 0.7293042540550232, "rewards/accuracies": 0.875, "rewards/chosen": -0.16362270712852478, "rewards/margins": 0.26707929372787476, "rewards/rejected": -0.43070200085639954, "step": 5114 }, { "epoch": 14.004106776180699, "grad_norm": 6.18174409866333, "learning_rate": 2.997260273972603e-07, "log_odds_chosen": 3.7505407333374023, "log_odds_ratio": -0.2680668830871582, "logits/chosen": 0.7733717560768127, "logits/rejected": 0.8181755542755127, "logps/chosen": -2.2395176887512207, "logps/rejected": -5.830868721008301, "loss": 0.7043, "nll_loss": 0.6774953603744507, "rewards/accuracies": 1.0, "rewards/chosen": -0.2239517718553543, "rewards/margins": 0.35913509130477905, "rewards/rejected": -0.5830869078636169, "step": 5115 }, { "epoch": 14.00684462696783, "grad_norm": 4.822121620178223, "learning_rate": 2.995890410958904e-07, "log_odds_chosen": 2.25197696685791, "log_odds_ratio": -0.16475620865821838, "logits/chosen": 0.7669918537139893, "logits/rejected": 0.8349982500076294, "logps/chosen": -1.5331213474273682, "logps/rejected": -3.526458263397217, "loss": 0.558, "nll_loss": 0.5415090322494507, "rewards/accuracies": 1.0, "rewards/chosen": -0.15331214666366577, "rewards/margins": 0.19933368265628815, "rewards/rejected": -0.3526458144187927, "step": 5116 }, { "epoch": 14.009582477754963, "grad_norm": 5.653781890869141, "learning_rate": 2.994520547945205e-07, "log_odds_chosen": 1.027646541595459, "log_odds_ratio": -0.3860030770301819, "logits/chosen": 0.7724131941795349, "logits/rejected": 0.7324956059455872, "logps/chosen": -1.7208753824234009, "logps/rejected": -2.623194932937622, "loss": 0.5981, "nll_loss": 0.559522271156311, "rewards/accuracies": 1.0, "rewards/chosen": -0.17208752036094666, "rewards/margins": 0.09023197740316391, "rewards/rejected": -0.26231950521469116, "step": 5117 }, { "epoch": 14.012320328542094, "grad_norm": 6.030174255371094, "learning_rate": 2.993150684931507e-07, "log_odds_chosen": 2.4983773231506348, "log_odds_ratio": -0.235262930393219, "logits/chosen": 0.6393414735794067, "logits/rejected": 0.6587456464767456, "logps/chosen": -1.8032417297363281, "logps/rejected": -4.153879165649414, "loss": 0.7087, "nll_loss": 0.6851367354393005, "rewards/accuracies": 1.0, "rewards/chosen": -0.18032419681549072, "rewards/margins": 0.23506370186805725, "rewards/rejected": -0.415387898683548, "step": 5118 }, { "epoch": 14.015058179329227, "grad_norm": 5.94247579574585, "learning_rate": 2.991780821917808e-07, "log_odds_chosen": 1.113147497177124, "log_odds_ratio": -0.4145391285419464, "logits/chosen": 0.9185736179351807, "logits/rejected": 1.0020514726638794, "logps/chosen": -2.5708842277526855, "logps/rejected": -3.662686824798584, "loss": 0.6815, "nll_loss": 0.6400894522666931, "rewards/accuracies": 0.75, "rewards/chosen": -0.25708845257759094, "rewards/margins": 0.1091802716255188, "rewards/rejected": -0.36626872420310974, "step": 5119 }, { "epoch": 14.017796030116358, "grad_norm": 4.670079231262207, "learning_rate": 2.9904109589041095e-07, "log_odds_chosen": 2.218971014022827, "log_odds_ratio": -0.1701190024614334, "logits/chosen": 0.7432715892791748, "logits/rejected": 0.7581204175949097, "logps/chosen": -2.1360812187194824, "logps/rejected": -4.232542037963867, "loss": 0.6834, "nll_loss": 0.6664189696311951, "rewards/accuracies": 1.0, "rewards/chosen": -0.21360811591148376, "rewards/margins": 0.2096461057662964, "rewards/rejected": -0.42325422167778015, "step": 5120 }, { "epoch": 14.020533880903491, "grad_norm": 8.367412567138672, "learning_rate": 2.989041095890411e-07, "log_odds_chosen": 2.4583592414855957, "log_odds_ratio": -0.5053510665893555, "logits/chosen": 0.8520764112472534, "logits/rejected": 0.8278766870498657, "logps/chosen": -3.145069122314453, "logps/rejected": -5.502867698669434, "loss": 0.8032, "nll_loss": 0.7526247501373291, "rewards/accuracies": 0.875, "rewards/chosen": -0.3145068883895874, "rewards/margins": 0.23577985167503357, "rewards/rejected": -0.5502867698669434, "step": 5121 }, { "epoch": 14.023271731690622, "grad_norm": 5.013728141784668, "learning_rate": 2.9876712328767125e-07, "log_odds_chosen": 2.0684542655944824, "log_odds_ratio": -0.2546401917934418, "logits/chosen": 0.6424825191497803, "logits/rejected": 0.7468171715736389, "logps/chosen": -2.625515937805176, "logps/rejected": -4.602075576782227, "loss": 0.6453, "nll_loss": 0.6198104023933411, "rewards/accuracies": 0.875, "rewards/chosen": -0.26255160570144653, "rewards/margins": 0.19765599071979523, "rewards/rejected": -0.46020758152008057, "step": 5122 }, { "epoch": 14.026009582477755, "grad_norm": 6.835273265838623, "learning_rate": 2.9863013698630135e-07, "log_odds_chosen": 1.8819878101348877, "log_odds_ratio": -0.2776884436607361, "logits/chosen": 0.7491087317466736, "logits/rejected": 0.7474331855773926, "logps/chosen": -2.0572986602783203, "logps/rejected": -3.8476767539978027, "loss": 0.7082, "nll_loss": 0.6804476976394653, "rewards/accuracies": 1.0, "rewards/chosen": -0.2057298719882965, "rewards/margins": 0.17903777956962585, "rewards/rejected": -0.38476765155792236, "step": 5123 }, { "epoch": 14.028747433264886, "grad_norm": 5.550124168395996, "learning_rate": 2.9849315068493145e-07, "log_odds_chosen": 3.0187087059020996, "log_odds_ratio": -0.13208884000778198, "logits/chosen": 0.6913922429084778, "logits/rejected": 0.7049753665924072, "logps/chosen": -2.0345685482025146, "logps/rejected": -4.915997505187988, "loss": 0.6164, "nll_loss": 0.6032097339630127, "rewards/accuracies": 1.0, "rewards/chosen": -0.20345687866210938, "rewards/margins": 0.2881428897380829, "rewards/rejected": -0.49159979820251465, "step": 5124 }, { "epoch": 14.03148528405202, "grad_norm": 5.964295387268066, "learning_rate": 2.9835616438356165e-07, "log_odds_chosen": 1.9112627506256104, "log_odds_ratio": -0.2701283395290375, "logits/chosen": 0.8983574509620667, "logits/rejected": 0.9077906012535095, "logps/chosen": -2.1485097408294678, "logps/rejected": -3.9086387157440186, "loss": 0.6522, "nll_loss": 0.6251723766326904, "rewards/accuracies": 1.0, "rewards/chosen": -0.21485097706317902, "rewards/margins": 0.17601285874843597, "rewards/rejected": -0.390863835811615, "step": 5125 }, { "epoch": 14.03422313483915, "grad_norm": 5.812141418457031, "learning_rate": 2.9821917808219175e-07, "log_odds_chosen": 1.60194993019104, "log_odds_ratio": -0.3903040885925293, "logits/chosen": 1.1637088060379028, "logits/rejected": 1.1752851009368896, "logps/chosen": -2.498533248901367, "logps/rejected": -4.025815486907959, "loss": 0.6968, "nll_loss": 0.6577243804931641, "rewards/accuracies": 0.875, "rewards/chosen": -0.24985331296920776, "rewards/margins": 0.15272822976112366, "rewards/rejected": -0.4025815725326538, "step": 5126 }, { "epoch": 14.036960985626283, "grad_norm": 6.389123916625977, "learning_rate": 2.980821917808219e-07, "log_odds_chosen": 2.1882524490356445, "log_odds_ratio": -0.2379073053598404, "logits/chosen": 0.7466228604316711, "logits/rejected": 0.7838799953460693, "logps/chosen": -2.321971893310547, "logps/rejected": -4.408105373382568, "loss": 0.6223, "nll_loss": 0.5984711050987244, "rewards/accuracies": 1.0, "rewards/chosen": -0.23219719529151917, "rewards/margins": 0.2086133360862732, "rewards/rejected": -0.44081053137779236, "step": 5127 }, { "epoch": 14.039698836413416, "grad_norm": 5.455290794372559, "learning_rate": 2.9794520547945206e-07, "log_odds_chosen": 2.5914435386657715, "log_odds_ratio": -0.17667505145072937, "logits/chosen": 1.0804452896118164, "logits/rejected": 1.13185453414917, "logps/chosen": -2.809276819229126, "logps/rejected": -5.292356014251709, "loss": 0.6605, "nll_loss": 0.6428167819976807, "rewards/accuracies": 1.0, "rewards/chosen": -0.2809276878833771, "rewards/margins": 0.2483079433441162, "rewards/rejected": -0.5292356014251709, "step": 5128 }, { "epoch": 14.042436687200547, "grad_norm": 4.670607566833496, "learning_rate": 2.978082191780822e-07, "log_odds_chosen": 2.831409215927124, "log_odds_ratio": -0.17116807401180267, "logits/chosen": 0.9518994092941284, "logits/rejected": 1.0113656520843506, "logps/chosen": -2.0032734870910645, "logps/rejected": -4.6933207511901855, "loss": 0.6226, "nll_loss": 0.605499804019928, "rewards/accuracies": 1.0, "rewards/chosen": -0.20032736659049988, "rewards/margins": 0.269004762172699, "rewards/rejected": -0.46933209896087646, "step": 5129 }, { "epoch": 14.04517453798768, "grad_norm": 5.552496910095215, "learning_rate": 2.976712328767123e-07, "log_odds_chosen": 1.421138048171997, "log_odds_ratio": -0.322130411863327, "logits/chosen": 0.736417829990387, "logits/rejected": 0.6924102306365967, "logps/chosen": -1.6187019348144531, "logps/rejected": -2.920057535171509, "loss": 0.6119, "nll_loss": 0.5797288417816162, "rewards/accuracies": 0.875, "rewards/chosen": -0.16187019646167755, "rewards/margins": 0.13013553619384766, "rewards/rejected": -0.2920057475566864, "step": 5130 }, { "epoch": 14.047912388774812, "grad_norm": 4.640994071960449, "learning_rate": 2.975342465753424e-07, "log_odds_chosen": 2.734607696533203, "log_odds_ratio": -0.24236518144607544, "logits/chosen": 0.7796874046325684, "logits/rejected": 0.7834880352020264, "logps/chosen": -2.09784197807312, "logps/rejected": -4.734341144561768, "loss": 0.5693, "nll_loss": 0.5450147390365601, "rewards/accuracies": 0.875, "rewards/chosen": -0.20978419482707977, "rewards/margins": 0.26364991068840027, "rewards/rejected": -0.47343409061431885, "step": 5131 }, { "epoch": 14.050650239561945, "grad_norm": 4.7848286628723145, "learning_rate": 2.973972602739726e-07, "log_odds_chosen": 3.5757527351379395, "log_odds_ratio": -0.13869166374206543, "logits/chosen": 0.9549556374549866, "logits/rejected": 0.9997439384460449, "logps/chosen": -1.8787927627563477, "logps/rejected": -5.299262046813965, "loss": 0.7125, "nll_loss": 0.6986120939254761, "rewards/accuracies": 1.0, "rewards/chosen": -0.1878792941570282, "rewards/margins": 0.34204691648483276, "rewards/rejected": -0.5299261808395386, "step": 5132 }, { "epoch": 14.053388090349076, "grad_norm": 6.560344696044922, "learning_rate": 2.972602739726027e-07, "log_odds_chosen": 1.8971855640411377, "log_odds_ratio": -0.31073057651519775, "logits/chosen": 0.9079045653343201, "logits/rejected": 0.8924630880355835, "logps/chosen": -2.2536463737487793, "logps/rejected": -4.006309509277344, "loss": 0.6318, "nll_loss": 0.6006989479064941, "rewards/accuracies": 0.875, "rewards/chosen": -0.22536464035511017, "rewards/margins": 0.1752663552761078, "rewards/rejected": -0.40063101053237915, "step": 5133 }, { "epoch": 14.056125941136209, "grad_norm": 6.006748199462891, "learning_rate": 2.9712328767123286e-07, "log_odds_chosen": 2.9593348503112793, "log_odds_ratio": -0.22557957470417023, "logits/chosen": 1.1645545959472656, "logits/rejected": 1.251314401626587, "logps/chosen": -3.202517032623291, "logps/rejected": -6.1068925857543945, "loss": 0.7375, "nll_loss": 0.7149173617362976, "rewards/accuracies": 1.0, "rewards/chosen": -0.3202517032623291, "rewards/margins": 0.29043757915496826, "rewards/rejected": -0.6106892824172974, "step": 5134 }, { "epoch": 14.05886379192334, "grad_norm": 5.728139877319336, "learning_rate": 2.96986301369863e-07, "log_odds_chosen": 2.2706985473632812, "log_odds_ratio": -0.26242947578430176, "logits/chosen": 0.7870712280273438, "logits/rejected": 0.7224366068840027, "logps/chosen": -1.707423210144043, "logps/rejected": -3.768752336502075, "loss": 0.5578, "nll_loss": 0.5315799713134766, "rewards/accuracies": 1.0, "rewards/chosen": -0.17074231803417206, "rewards/margins": 0.2061329334974289, "rewards/rejected": -0.37687522172927856, "step": 5135 }, { "epoch": 14.061601642710473, "grad_norm": 5.535368919372559, "learning_rate": 2.9684931506849317e-07, "log_odds_chosen": 1.65720796585083, "log_odds_ratio": -0.3322370946407318, "logits/chosen": 1.0903493165969849, "logits/rejected": 1.0863158702850342, "logps/chosen": -1.931341290473938, "logps/rejected": -3.483323574066162, "loss": 0.6613, "nll_loss": 0.6281143426895142, "rewards/accuracies": 1.0, "rewards/chosen": -0.1931341141462326, "rewards/margins": 0.15519821643829346, "rewards/rejected": -0.34833231568336487, "step": 5136 }, { "epoch": 14.064339493497604, "grad_norm": 5.766473293304443, "learning_rate": 2.9671232876712327e-07, "log_odds_chosen": 4.504340171813965, "log_odds_ratio": -0.10397237539291382, "logits/chosen": 0.734237015247345, "logits/rejected": 0.7621878385543823, "logps/chosen": -2.6889309883117676, "logps/rejected": -7.108384609222412, "loss": 0.7658, "nll_loss": 0.7554355263710022, "rewards/accuracies": 1.0, "rewards/chosen": -0.2688930928707123, "rewards/margins": 0.4419454038143158, "rewards/rejected": -0.7108385562896729, "step": 5137 }, { "epoch": 14.067077344284737, "grad_norm": 6.389935493469238, "learning_rate": 2.9657534246575336e-07, "log_odds_chosen": 2.9609179496765137, "log_odds_ratio": -0.20689600706100464, "logits/chosen": 0.6876653432846069, "logits/rejected": 0.7263476252555847, "logps/chosen": -1.672943353652954, "logps/rejected": -4.451690673828125, "loss": 0.5826, "nll_loss": 0.5618810653686523, "rewards/accuracies": 1.0, "rewards/chosen": -0.16729435324668884, "rewards/margins": 0.27787476778030396, "rewards/rejected": -0.4451690912246704, "step": 5138 }, { "epoch": 14.069815195071868, "grad_norm": 5.281982421875, "learning_rate": 2.9643835616438357e-07, "log_odds_chosen": 1.7370803356170654, "log_odds_ratio": -0.2592214047908783, "logits/chosen": 0.9140567779541016, "logits/rejected": 1.0098533630371094, "logps/chosen": -2.414231061935425, "logps/rejected": -4.065005302429199, "loss": 0.7655, "nll_loss": 0.7395659685134888, "rewards/accuracies": 0.875, "rewards/chosen": -0.2414231151342392, "rewards/margins": 0.16507741808891296, "rewards/rejected": -0.40650051832199097, "step": 5139 }, { "epoch": 14.072553045859001, "grad_norm": 6.765572547912598, "learning_rate": 2.9630136986301367e-07, "log_odds_chosen": 1.2031968832015991, "log_odds_ratio": -0.5171715021133423, "logits/chosen": 0.9232171177864075, "logits/rejected": 0.9269677400588989, "logps/chosen": -3.247041702270508, "logps/rejected": -4.387208461761475, "loss": 0.7763, "nll_loss": 0.724591851234436, "rewards/accuracies": 0.625, "rewards/chosen": -0.3247041702270508, "rewards/margins": 0.11401668936014175, "rewards/rejected": -0.43872085213661194, "step": 5140 }, { "epoch": 14.075290896646132, "grad_norm": 6.188265323638916, "learning_rate": 2.961643835616438e-07, "log_odds_chosen": 3.3950014114379883, "log_odds_ratio": -0.1145656630396843, "logits/chosen": 1.0755419731140137, "logits/rejected": 1.1841548681259155, "logps/chosen": -2.733637809753418, "logps/rejected": -6.050907135009766, "loss": 0.6346, "nll_loss": 0.6231848001480103, "rewards/accuracies": 1.0, "rewards/chosen": -0.27336379885673523, "rewards/margins": 0.33172690868377686, "rewards/rejected": -0.6050907373428345, "step": 5141 }, { "epoch": 14.078028747433265, "grad_norm": 4.805602073669434, "learning_rate": 2.9602739726027397e-07, "log_odds_chosen": 3.1822216510772705, "log_odds_ratio": -0.1753038763999939, "logits/chosen": 1.0422027111053467, "logits/rejected": 1.09003484249115, "logps/chosen": -2.7776551246643066, "logps/rejected": -5.866725444793701, "loss": 0.6748, "nll_loss": 0.6572418808937073, "rewards/accuracies": 1.0, "rewards/chosen": -0.27776551246643066, "rewards/margins": 0.30890703201293945, "rewards/rejected": -0.5866725444793701, "step": 5142 }, { "epoch": 14.080766598220396, "grad_norm": 4.853468418121338, "learning_rate": 2.958904109589041e-07, "log_odds_chosen": 1.1413754224777222, "log_odds_ratio": -0.46943387389183044, "logits/chosen": 0.8312762975692749, "logits/rejected": 0.8238149881362915, "logps/chosen": -2.4043118953704834, "logps/rejected": -3.459094524383545, "loss": 0.7382, "nll_loss": 0.6912526488304138, "rewards/accuracies": 0.875, "rewards/chosen": -0.24043118953704834, "rewards/margins": 0.10547823458909988, "rewards/rejected": -0.3459094166755676, "step": 5143 }, { "epoch": 14.08350444900753, "grad_norm": 4.9302544593811035, "learning_rate": 2.957534246575342e-07, "log_odds_chosen": 3.692378520965576, "log_odds_ratio": -0.19584007561206818, "logits/chosen": 0.8211082220077515, "logits/rejected": 0.7788220643997192, "logps/chosen": -1.76878821849823, "logps/rejected": -5.289237976074219, "loss": 0.646, "nll_loss": 0.6264297962188721, "rewards/accuracies": 1.0, "rewards/chosen": -0.17687882483005524, "rewards/margins": 0.3520449995994568, "rewards/rejected": -0.5289238691329956, "step": 5144 }, { "epoch": 14.08624229979466, "grad_norm": 6.441702365875244, "learning_rate": 2.956164383561643e-07, "log_odds_chosen": 2.39530611038208, "log_odds_ratio": -0.27203038334846497, "logits/chosen": 0.6908483505249023, "logits/rejected": 0.7776907682418823, "logps/chosen": -2.883298397064209, "logps/rejected": -5.168390274047852, "loss": 0.6173, "nll_loss": 0.5900508165359497, "rewards/accuracies": 1.0, "rewards/chosen": -0.2883298099040985, "rewards/margins": 0.22850920259952545, "rewards/rejected": -0.5168390274047852, "step": 5145 }, { "epoch": 14.088980150581794, "grad_norm": 4.826483249664307, "learning_rate": 2.9547945205479453e-07, "log_odds_chosen": 2.190585136413574, "log_odds_ratio": -0.23434294760227203, "logits/chosen": 0.920549750328064, "logits/rejected": 0.9573074579238892, "logps/chosen": -2.0880377292633057, "logps/rejected": -4.162637710571289, "loss": 0.5962, "nll_loss": 0.5727502107620239, "rewards/accuracies": 1.0, "rewards/chosen": -0.20880377292633057, "rewards/margins": 0.20746000111103058, "rewards/rejected": -0.41626378893852234, "step": 5146 }, { "epoch": 14.091718001368925, "grad_norm": 5.262073040008545, "learning_rate": 2.9534246575342463e-07, "log_odds_chosen": 1.5397495031356812, "log_odds_ratio": -0.2637826204299927, "logits/chosen": 0.7714396715164185, "logits/rejected": 0.8313553333282471, "logps/chosen": -2.691990613937378, "logps/rejected": -4.0924072265625, "loss": 0.6492, "nll_loss": 0.6227997541427612, "rewards/accuracies": 0.875, "rewards/chosen": -0.26919907331466675, "rewards/margins": 0.14004163444042206, "rewards/rejected": -0.4092406928539276, "step": 5147 }, { "epoch": 14.094455852156058, "grad_norm": 9.152057647705078, "learning_rate": 2.952054794520548e-07, "log_odds_chosen": 1.8875203132629395, "log_odds_ratio": -0.6455685496330261, "logits/chosen": 1.0602006912231445, "logits/rejected": 0.9698549509048462, "logps/chosen": -2.265740394592285, "logps/rejected": -4.077297687530518, "loss": 0.7468, "nll_loss": 0.6822843551635742, "rewards/accuracies": 0.75, "rewards/chosen": -0.22657403349876404, "rewards/margins": 0.181155726313591, "rewards/rejected": -0.40772977471351624, "step": 5148 }, { "epoch": 14.097193702943189, "grad_norm": 6.349147319793701, "learning_rate": 2.9506849315068493e-07, "log_odds_chosen": 1.7724480628967285, "log_odds_ratio": -0.2817224860191345, "logits/chosen": 1.0187783241271973, "logits/rejected": 0.987216591835022, "logps/chosen": -1.6975021362304688, "logps/rejected": -3.3036136627197266, "loss": 0.5751, "nll_loss": 0.5469657778739929, "rewards/accuracies": 1.0, "rewards/chosen": -0.16975021362304688, "rewards/margins": 0.16061116755008698, "rewards/rejected": -0.33036136627197266, "step": 5149 }, { "epoch": 14.099931553730322, "grad_norm": 8.829095840454102, "learning_rate": 2.949315068493151e-07, "log_odds_chosen": 2.1792123317718506, "log_odds_ratio": -0.5291286706924438, "logits/chosen": 0.8167858123779297, "logits/rejected": 0.7908703684806824, "logps/chosen": -2.5745763778686523, "logps/rejected": -4.7002482414245605, "loss": 0.7163, "nll_loss": 0.663337230682373, "rewards/accuracies": 0.75, "rewards/chosen": -0.2574576437473297, "rewards/margins": 0.21256715059280396, "rewards/rejected": -0.47002482414245605, "step": 5150 }, { "epoch": 14.102669404517453, "grad_norm": 5.476446628570557, "learning_rate": 2.947945205479452e-07, "log_odds_chosen": 1.3068366050720215, "log_odds_ratio": -0.37371546030044556, "logits/chosen": 0.7361992597579956, "logits/rejected": 0.7232584953308105, "logps/chosen": -1.7306721210479736, "logps/rejected": -2.953519582748413, "loss": 0.562, "nll_loss": 0.5246610641479492, "rewards/accuracies": 0.75, "rewards/chosen": -0.17306722700595856, "rewards/margins": 0.12228474020957947, "rewards/rejected": -0.2953519821166992, "step": 5151 }, { "epoch": 14.105407255304586, "grad_norm": 8.06281852722168, "learning_rate": 2.9465753424657534e-07, "log_odds_chosen": 1.5177323818206787, "log_odds_ratio": -0.496079683303833, "logits/chosen": 0.9536420106887817, "logits/rejected": 0.9984735250473022, "logps/chosen": -2.4938201904296875, "logps/rejected": -3.9490115642547607, "loss": 0.7061, "nll_loss": 0.6565208435058594, "rewards/accuracies": 0.75, "rewards/chosen": -0.24938201904296875, "rewards/margins": 0.14551913738250732, "rewards/rejected": -0.3949011564254761, "step": 5152 }, { "epoch": 14.108145106091717, "grad_norm": 4.946279048919678, "learning_rate": 2.945205479452055e-07, "log_odds_chosen": 3.2145721912384033, "log_odds_ratio": -0.11037378013134003, "logits/chosen": 1.0025478601455688, "logits/rejected": 1.0657343864440918, "logps/chosen": -1.9967621564865112, "logps/rejected": -5.051517486572266, "loss": 0.693, "nll_loss": 0.6819618940353394, "rewards/accuracies": 1.0, "rewards/chosen": -0.19967621564865112, "rewards/margins": 0.30547553300857544, "rewards/rejected": -0.5051518082618713, "step": 5153 }, { "epoch": 14.11088295687885, "grad_norm": 5.5700883865356445, "learning_rate": 2.943835616438356e-07, "log_odds_chosen": 1.2526535987854004, "log_odds_ratio": -0.35578611493110657, "logits/chosen": 0.856464684009552, "logits/rejected": 0.8060495853424072, "logps/chosen": -1.5450599193572998, "logps/rejected": -2.660017967224121, "loss": 0.5937, "nll_loss": 0.5580817461013794, "rewards/accuracies": 1.0, "rewards/chosen": -0.15450599789619446, "rewards/margins": 0.11149580031633377, "rewards/rejected": -0.26600182056427, "step": 5154 }, { "epoch": 14.113620807665983, "grad_norm": 4.746334075927734, "learning_rate": 2.9424657534246574e-07, "log_odds_chosen": 1.981497049331665, "log_odds_ratio": -0.24930578470230103, "logits/chosen": 1.1082333326339722, "logits/rejected": 1.1168076992034912, "logps/chosen": -2.207592010498047, "logps/rejected": -4.056537628173828, "loss": 0.689, "nll_loss": 0.6640945076942444, "rewards/accuracies": 1.0, "rewards/chosen": -0.22075921297073364, "rewards/margins": 0.18489456176757812, "rewards/rejected": -0.4056537449359894, "step": 5155 }, { "epoch": 14.116358658453114, "grad_norm": 5.395142078399658, "learning_rate": 2.941095890410959e-07, "log_odds_chosen": 1.2706842422485352, "log_odds_ratio": -0.38561975955963135, "logits/chosen": 0.8232451677322388, "logits/rejected": 0.8713623285293579, "logps/chosen": -1.7574467658996582, "logps/rejected": -2.8795595169067383, "loss": 0.6407, "nll_loss": 0.6021374464035034, "rewards/accuracies": 0.75, "rewards/chosen": -0.1757446676492691, "rewards/margins": 0.11221130192279816, "rewards/rejected": -0.28795596957206726, "step": 5156 }, { "epoch": 14.119096509240247, "grad_norm": 6.169058322906494, "learning_rate": 2.9397260273972604e-07, "log_odds_chosen": 2.064755439758301, "log_odds_ratio": -0.20635342597961426, "logits/chosen": 0.9559640884399414, "logits/rejected": 1.0050678253173828, "logps/chosen": -2.16524338722229, "logps/rejected": -4.124678134918213, "loss": 0.6104, "nll_loss": 0.5897181034088135, "rewards/accuracies": 1.0, "rewards/chosen": -0.21652433276176453, "rewards/margins": 0.19594348967075348, "rewards/rejected": -0.4124678373336792, "step": 5157 }, { "epoch": 14.121834360027378, "grad_norm": 6.379405498504639, "learning_rate": 2.9383561643835614e-07, "log_odds_chosen": 1.7606465816497803, "log_odds_ratio": -0.26406824588775635, "logits/chosen": 0.6483593583106995, "logits/rejected": 0.6599488258361816, "logps/chosen": -2.2200498580932617, "logps/rejected": -3.8547511100769043, "loss": 0.6792, "nll_loss": 0.6527705192565918, "rewards/accuracies": 1.0, "rewards/chosen": -0.2220049798488617, "rewards/margins": 0.16347014904022217, "rewards/rejected": -0.38547515869140625, "step": 5158 }, { "epoch": 14.124572210814511, "grad_norm": 5.416629791259766, "learning_rate": 2.936986301369863e-07, "log_odds_chosen": 2.8989927768707275, "log_odds_ratio": -0.3359438478946686, "logits/chosen": 0.8576366901397705, "logits/rejected": 0.8472239971160889, "logps/chosen": -2.1964237689971924, "logps/rejected": -5.021982669830322, "loss": 0.7472, "nll_loss": 0.713623046875, "rewards/accuracies": 0.875, "rewards/chosen": -0.21964238584041595, "rewards/margins": 0.2825559377670288, "rewards/rejected": -0.502198338508606, "step": 5159 }, { "epoch": 14.127310061601642, "grad_norm": 6.504067897796631, "learning_rate": 2.9356164383561645e-07, "log_odds_chosen": 1.9117015600204468, "log_odds_ratio": -0.28160420060157776, "logits/chosen": 0.8416482210159302, "logits/rejected": 0.8493471145629883, "logps/chosen": -2.8168678283691406, "logps/rejected": -4.61793327331543, "loss": 0.6971, "nll_loss": 0.6689552664756775, "rewards/accuracies": 0.875, "rewards/chosen": -0.28168678283691406, "rewards/margins": 0.18010658025741577, "rewards/rejected": -0.46179336309432983, "step": 5160 }, { "epoch": 14.130047912388775, "grad_norm": 7.535162925720215, "learning_rate": 2.9342465753424654e-07, "log_odds_chosen": 2.451671600341797, "log_odds_ratio": -0.6243826150894165, "logits/chosen": 0.7282463908195496, "logits/rejected": 0.7805160880088806, "logps/chosen": -2.4751789569854736, "logps/rejected": -4.747987747192383, "loss": 0.7402, "nll_loss": 0.6777840852737427, "rewards/accuracies": 0.625, "rewards/chosen": -0.2475179135799408, "rewards/margins": 0.2272808998823166, "rewards/rejected": -0.4747988283634186, "step": 5161 }, { "epoch": 14.132785763175907, "grad_norm": 4.808192253112793, "learning_rate": 2.932876712328767e-07, "log_odds_chosen": 1.3880629539489746, "log_odds_ratio": -0.3077932596206665, "logits/chosen": 1.0622828006744385, "logits/rejected": 1.0792274475097656, "logps/chosen": -2.1488759517669678, "logps/rejected": -3.428384780883789, "loss": 0.509, "nll_loss": 0.4781833589076996, "rewards/accuracies": 1.0, "rewards/chosen": -0.2148876041173935, "rewards/margins": 0.12795087695121765, "rewards/rejected": -0.34283846616744995, "step": 5162 }, { "epoch": 14.13552361396304, "grad_norm": 5.625088691711426, "learning_rate": 2.9315068493150685e-07, "log_odds_chosen": 1.7617912292480469, "log_odds_ratio": -0.28995242714881897, "logits/chosen": 0.7973383665084839, "logits/rejected": 0.7234511971473694, "logps/chosen": -1.7640717029571533, "logps/rejected": -3.3895351886749268, "loss": 0.7026, "nll_loss": 0.6736457347869873, "rewards/accuracies": 1.0, "rewards/chosen": -0.17640715837478638, "rewards/margins": 0.16254635155200958, "rewards/rejected": -0.33895355463027954, "step": 5163 }, { "epoch": 14.13826146475017, "grad_norm": 5.340157985687256, "learning_rate": 2.9301369863013695e-07, "log_odds_chosen": 1.3009777069091797, "log_odds_ratio": -0.6395959854125977, "logits/chosen": 0.7128491997718811, "logits/rejected": 0.7588704824447632, "logps/chosen": -2.590640068054199, "logps/rejected": -3.8221163749694824, "loss": 0.6497, "nll_loss": 0.5857703685760498, "rewards/accuracies": 0.75, "rewards/chosen": -0.2590639889240265, "rewards/margins": 0.1231476366519928, "rewards/rejected": -0.3822116255760193, "step": 5164 }, { "epoch": 14.140999315537304, "grad_norm": 5.286808013916016, "learning_rate": 2.928767123287671e-07, "log_odds_chosen": 2.0473780632019043, "log_odds_ratio": -0.21321064233779907, "logits/chosen": 0.7819699048995972, "logits/rejected": 0.8458616733551025, "logps/chosen": -2.0848121643066406, "logps/rejected": -3.9416816234588623, "loss": 0.6339, "nll_loss": 0.6126083135604858, "rewards/accuracies": 1.0, "rewards/chosen": -0.20848120748996735, "rewards/margins": 0.18568697571754456, "rewards/rejected": -0.3941681683063507, "step": 5165 }, { "epoch": 14.143737166324435, "grad_norm": 5.6034369468688965, "learning_rate": 2.9273972602739725e-07, "log_odds_chosen": 0.9554901123046875, "log_odds_ratio": -0.6311238408088684, "logits/chosen": 0.8110589981079102, "logits/rejected": 0.8745258450508118, "logps/chosen": -2.4292666912078857, "logps/rejected": -3.3548359870910645, "loss": 0.7498, "nll_loss": 0.6867042779922485, "rewards/accuracies": 0.75, "rewards/chosen": -0.242926687002182, "rewards/margins": 0.0925569161772728, "rewards/rejected": -0.3354836106300354, "step": 5166 }, { "epoch": 14.146475017111568, "grad_norm": 5.769305229187012, "learning_rate": 2.926027397260274e-07, "log_odds_chosen": 2.0787534713745117, "log_odds_ratio": -0.28068479895591736, "logits/chosen": 0.7446410059928894, "logits/rejected": 0.7894378304481506, "logps/chosen": -2.244694948196411, "logps/rejected": -4.1971588134765625, "loss": 0.6213, "nll_loss": 0.5932143330574036, "rewards/accuracies": 0.75, "rewards/chosen": -0.22446949779987335, "rewards/margins": 0.1952463686466217, "rewards/rejected": -0.4197158217430115, "step": 5167 }, { "epoch": 14.149212867898699, "grad_norm": 5.815262794494629, "learning_rate": 2.924657534246575e-07, "log_odds_chosen": 2.8604140281677246, "log_odds_ratio": -0.20298725366592407, "logits/chosen": 0.8487101793289185, "logits/rejected": 0.8622941970825195, "logps/chosen": -2.5277185440063477, "logps/rejected": -5.264410972595215, "loss": 0.7551, "nll_loss": 0.7348036170005798, "rewards/accuracies": 0.875, "rewards/chosen": -0.25277185440063477, "rewards/margins": 0.27366921305656433, "rewards/rejected": -0.5264410972595215, "step": 5168 }, { "epoch": 14.151950718685832, "grad_norm": 6.040086269378662, "learning_rate": 2.9232876712328766e-07, "log_odds_chosen": 2.8027048110961914, "log_odds_ratio": -0.30646005272865295, "logits/chosen": 1.0319480895996094, "logits/rejected": 1.122957468032837, "logps/chosen": -2.4234120845794678, "logps/rejected": -5.071361064910889, "loss": 0.6446, "nll_loss": 0.6139109134674072, "rewards/accuracies": 0.875, "rewards/chosen": -0.24234120547771454, "rewards/margins": 0.2647949159145355, "rewards/rejected": -0.5071361064910889, "step": 5169 }, { "epoch": 14.154688569472963, "grad_norm": 6.212743759155273, "learning_rate": 2.921917808219178e-07, "log_odds_chosen": 0.5156140327453613, "log_odds_ratio": -0.7211084961891174, "logits/chosen": 0.70766282081604, "logits/rejected": 0.7226496934890747, "logps/chosen": -2.343634605407715, "logps/rejected": -2.7670493125915527, "loss": 0.6749, "nll_loss": 0.6027758121490479, "rewards/accuracies": 0.75, "rewards/chosen": -0.23436346650123596, "rewards/margins": 0.042341481894254684, "rewards/rejected": -0.27670493721961975, "step": 5170 }, { "epoch": 14.157426420260096, "grad_norm": 5.346175670623779, "learning_rate": 2.920547945205479e-07, "log_odds_chosen": 1.0626838207244873, "log_odds_ratio": -0.46308276057243347, "logits/chosen": 0.7876902222633362, "logits/rejected": 0.7643950581550598, "logps/chosen": -1.5703954696655273, "logps/rejected": -2.543022871017456, "loss": 0.6034, "nll_loss": 0.5570741891860962, "rewards/accuracies": 0.75, "rewards/chosen": -0.15703953802585602, "rewards/margins": 0.09726274013519287, "rewards/rejected": -0.2543022930622101, "step": 5171 }, { "epoch": 14.160164271047227, "grad_norm": 6.1398396492004395, "learning_rate": 2.9191780821917806e-07, "log_odds_chosen": 2.691380500793457, "log_odds_ratio": -0.24116723239421844, "logits/chosen": 0.7833456993103027, "logits/rejected": 0.6758158206939697, "logps/chosen": -1.8859719038009644, "logps/rejected": -4.440563201904297, "loss": 0.6213, "nll_loss": 0.5971361994743347, "rewards/accuracies": 0.875, "rewards/chosen": -0.1885972023010254, "rewards/margins": 0.25545912981033325, "rewards/rejected": -0.44405627250671387, "step": 5172 }, { "epoch": 14.16290212183436, "grad_norm": 6.50056791305542, "learning_rate": 2.917808219178082e-07, "log_odds_chosen": 1.5700187683105469, "log_odds_ratio": -0.3915879726409912, "logits/chosen": 0.721286416053772, "logits/rejected": 0.7865923047065735, "logps/chosen": -1.9516031742095947, "logps/rejected": -3.3709259033203125, "loss": 0.615, "nll_loss": 0.5758848786354065, "rewards/accuracies": 0.875, "rewards/chosen": -0.19516031444072723, "rewards/margins": 0.14193227887153625, "rewards/rejected": -0.3370926082134247, "step": 5173 }, { "epoch": 14.165639972621491, "grad_norm": 6.068900108337402, "learning_rate": 2.9164383561643836e-07, "log_odds_chosen": 3.028925657272339, "log_odds_ratio": -0.303341805934906, "logits/chosen": 0.8509689569473267, "logits/rejected": 0.9324856996536255, "logps/chosen": -2.7179031372070312, "logps/rejected": -5.683856010437012, "loss": 0.6607, "nll_loss": 0.6303958892822266, "rewards/accuracies": 0.75, "rewards/chosen": -0.2717902958393097, "rewards/margins": 0.2965952455997467, "rewards/rejected": -0.5683856010437012, "step": 5174 }, { "epoch": 14.168377823408624, "grad_norm": 5.08824348449707, "learning_rate": 2.9150684931506846e-07, "log_odds_chosen": 2.315196990966797, "log_odds_ratio": -0.2524075508117676, "logits/chosen": 0.9051228165626526, "logits/rejected": 0.9182493090629578, "logps/chosen": -2.4635231494903564, "logps/rejected": -4.689634799957275, "loss": 0.6944, "nll_loss": 0.6692023277282715, "rewards/accuracies": 1.0, "rewards/chosen": -0.24635231494903564, "rewards/margins": 0.2226111888885498, "rewards/rejected": -0.46896350383758545, "step": 5175 }, { "epoch": 14.171115674195756, "grad_norm": 5.1898112297058105, "learning_rate": 2.913698630136986e-07, "log_odds_chosen": 2.495474338531494, "log_odds_ratio": -0.18283063173294067, "logits/chosen": 0.712226390838623, "logits/rejected": 0.7127885222434998, "logps/chosen": -1.9739160537719727, "logps/rejected": -4.323051452636719, "loss": 0.6606, "nll_loss": 0.6423161625862122, "rewards/accuracies": 1.0, "rewards/chosen": -0.1973915994167328, "rewards/margins": 0.23491358757019043, "rewards/rejected": -0.4323051869869232, "step": 5176 }, { "epoch": 14.173853524982889, "grad_norm": 9.878580093383789, "learning_rate": 2.9123287671232877e-07, "log_odds_chosen": 1.0130995512008667, "log_odds_ratio": -0.7221143841743469, "logits/chosen": 1.0135760307312012, "logits/rejected": 1.0683603286743164, "logps/chosen": -3.5024116039276123, "logps/rejected": -4.382050514221191, "loss": 0.7711, "nll_loss": 0.6988518834114075, "rewards/accuracies": 0.875, "rewards/chosen": -0.35024118423461914, "rewards/margins": 0.08796392381191254, "rewards/rejected": -0.4382050633430481, "step": 5177 }, { "epoch": 14.17659137577002, "grad_norm": 7.242558002471924, "learning_rate": 2.9109589041095887e-07, "log_odds_chosen": 2.3493361473083496, "log_odds_ratio": -0.28923818469047546, "logits/chosen": 0.8199212551116943, "logits/rejected": 0.8377491235733032, "logps/chosen": -3.0657999515533447, "logps/rejected": -5.295922756195068, "loss": 0.7518, "nll_loss": 0.7229139804840088, "rewards/accuracies": 0.75, "rewards/chosen": -0.30657997727394104, "rewards/margins": 0.2230122983455658, "rewards/rejected": -0.5295922756195068, "step": 5178 }, { "epoch": 14.179329226557153, "grad_norm": 5.215257167816162, "learning_rate": 2.90958904109589e-07, "log_odds_chosen": 1.7260223627090454, "log_odds_ratio": -0.27476775646209717, "logits/chosen": 0.8873928785324097, "logits/rejected": 0.896030604839325, "logps/chosen": -2.649813652038574, "logps/rejected": -4.267570972442627, "loss": 0.7159, "nll_loss": 0.6883773803710938, "rewards/accuracies": 1.0, "rewards/chosen": -0.26498138904571533, "rewards/margins": 0.16177573800086975, "rewards/rejected": -0.4267570972442627, "step": 5179 }, { "epoch": 14.182067077344286, "grad_norm": 7.44412899017334, "learning_rate": 2.9082191780821917e-07, "log_odds_chosen": 2.525881290435791, "log_odds_ratio": -0.30713367462158203, "logits/chosen": 1.048833966255188, "logits/rejected": 1.0205769538879395, "logps/chosen": -1.944579005241394, "logps/rejected": -4.301973342895508, "loss": 0.7039, "nll_loss": 0.6732252836227417, "rewards/accuracies": 0.875, "rewards/chosen": -0.19445788860321045, "rewards/margins": 0.23573943972587585, "rewards/rejected": -0.4301973283290863, "step": 5180 }, { "epoch": 14.184804928131417, "grad_norm": 4.103282451629639, "learning_rate": 2.906849315068493e-07, "log_odds_chosen": 2.4904346466064453, "log_odds_ratio": -0.1187112107872963, "logits/chosen": 0.8409394025802612, "logits/rejected": 0.842988133430481, "logps/chosen": -1.6852898597717285, "logps/rejected": -3.9502432346343994, "loss": 0.6553, "nll_loss": 0.6434656381607056, "rewards/accuracies": 1.0, "rewards/chosen": -0.1685289889574051, "rewards/margins": 0.22649532556533813, "rewards/rejected": -0.39502429962158203, "step": 5181 }, { "epoch": 14.18754277891855, "grad_norm": 4.635128974914551, "learning_rate": 2.905479452054794e-07, "log_odds_chosen": 1.8819828033447266, "log_odds_ratio": -0.18494091928005219, "logits/chosen": 0.9360607266426086, "logits/rejected": 0.9379333853721619, "logps/chosen": -1.9992496967315674, "logps/rejected": -3.6758174896240234, "loss": 0.6179, "nll_loss": 0.5994143486022949, "rewards/accuracies": 1.0, "rewards/chosen": -0.19992496073246002, "rewards/margins": 0.1676567941904068, "rewards/rejected": -0.3675817847251892, "step": 5182 }, { "epoch": 14.190280629705681, "grad_norm": 4.700196743011475, "learning_rate": 2.9041095890410957e-07, "log_odds_chosen": 2.0253918170928955, "log_odds_ratio": -0.29938459396362305, "logits/chosen": 0.9547070860862732, "logits/rejected": 0.9951381087303162, "logps/chosen": -1.6957248449325562, "logps/rejected": -3.556175708770752, "loss": 0.5718, "nll_loss": 0.5418450832366943, "rewards/accuracies": 0.875, "rewards/chosen": -0.16957250237464905, "rewards/margins": 0.1860450804233551, "rewards/rejected": -0.35561755299568176, "step": 5183 }, { "epoch": 14.193018480492814, "grad_norm": 7.029287815093994, "learning_rate": 2.902739726027397e-07, "log_odds_chosen": 1.514464020729065, "log_odds_ratio": -0.36644837260246277, "logits/chosen": 0.8782913088798523, "logits/rejected": 0.9662759900093079, "logps/chosen": -2.679084539413452, "logps/rejected": -4.124098777770996, "loss": 0.6696, "nll_loss": 0.6329506635665894, "rewards/accuracies": 0.875, "rewards/chosen": -0.2679084539413452, "rewards/margins": 0.1445014476776123, "rewards/rejected": -0.4124099016189575, "step": 5184 }, { "epoch": 14.195756331279945, "grad_norm": 5.614260673522949, "learning_rate": 2.901369863013698e-07, "log_odds_chosen": 3.177528142929077, "log_odds_ratio": -0.286160409450531, "logits/chosen": 1.081713318824768, "logits/rejected": 1.0916461944580078, "logps/chosen": -2.5205166339874268, "logps/rejected": -5.594119071960449, "loss": 0.7219, "nll_loss": 0.693259060382843, "rewards/accuracies": 0.875, "rewards/chosen": -0.2520516514778137, "rewards/margins": 0.30736029148101807, "rewards/rejected": -0.5594119429588318, "step": 5185 }, { "epoch": 14.198494182067078, "grad_norm": 5.279280662536621, "learning_rate": 2.9e-07, "log_odds_chosen": 1.461471676826477, "log_odds_ratio": -0.453088641166687, "logits/chosen": 0.7089775204658508, "logits/rejected": 0.7304380536079407, "logps/chosen": -2.9415624141693115, "logps/rejected": -4.346570014953613, "loss": 0.8726, "nll_loss": 0.8273394703865051, "rewards/accuracies": 0.75, "rewards/chosen": -0.2941562533378601, "rewards/margins": 0.1405007690191269, "rewards/rejected": -0.4346570074558258, "step": 5186 }, { "epoch": 14.20123203285421, "grad_norm": 4.487062454223633, "learning_rate": 2.8986301369863013e-07, "log_odds_chosen": 3.2098307609558105, "log_odds_ratio": -0.13274848461151123, "logits/chosen": 0.8270809650421143, "logits/rejected": 0.8613175749778748, "logps/chosen": -2.053107261657715, "logps/rejected": -5.103774547576904, "loss": 0.6041, "nll_loss": 0.5908530950546265, "rewards/accuracies": 1.0, "rewards/chosen": -0.20531070232391357, "rewards/margins": 0.3050667941570282, "rewards/rejected": -0.5103775262832642, "step": 5187 }, { "epoch": 14.203969883641342, "grad_norm": 4.717026233673096, "learning_rate": 2.897260273972603e-07, "log_odds_chosen": 3.001567840576172, "log_odds_ratio": -0.12863312661647797, "logits/chosen": 1.0810959339141846, "logits/rejected": 1.079154133796692, "logps/chosen": -2.014073371887207, "logps/rejected": -4.846312522888184, "loss": 0.5831, "nll_loss": 0.5702441334724426, "rewards/accuracies": 1.0, "rewards/chosen": -0.2014073133468628, "rewards/margins": 0.2832239270210266, "rewards/rejected": -0.4846312701702118, "step": 5188 }, { "epoch": 14.206707734428473, "grad_norm": 10.756033897399902, "learning_rate": 2.895890410958904e-07, "log_odds_chosen": 0.5159252882003784, "log_odds_ratio": -0.6040871739387512, "logits/chosen": 0.8716057538986206, "logits/rejected": 0.7305173277854919, "logps/chosen": -2.403794050216675, "logps/rejected": -2.817288398742676, "loss": 0.8859, "nll_loss": 0.8254468441009521, "rewards/accuracies": 0.875, "rewards/chosen": -0.24037937819957733, "rewards/margins": 0.041349440813064575, "rewards/rejected": -0.2817288041114807, "step": 5189 }, { "epoch": 14.209445585215606, "grad_norm": 5.881359100341797, "learning_rate": 2.894520547945206e-07, "log_odds_chosen": 2.9653587341308594, "log_odds_ratio": -0.2122849076986313, "logits/chosen": 0.8749755620956421, "logits/rejected": 0.9740272760391235, "logps/chosen": -2.531055212020874, "logps/rejected": -5.382620811462402, "loss": 0.7816, "nll_loss": 0.7603354454040527, "rewards/accuracies": 0.875, "rewards/chosen": -0.2531055212020874, "rewards/margins": 0.2851565480232239, "rewards/rejected": -0.5382620692253113, "step": 5190 }, { "epoch": 14.212183436002737, "grad_norm": 6.201278209686279, "learning_rate": 2.893150684931507e-07, "log_odds_chosen": 2.828329563140869, "log_odds_ratio": -0.12469536066055298, "logits/chosen": 0.9441532492637634, "logits/rejected": 1.0529545545578003, "logps/chosen": -2.2424440383911133, "logps/rejected": -4.941822052001953, "loss": 0.6986, "nll_loss": 0.6861460208892822, "rewards/accuracies": 1.0, "rewards/chosen": -0.22424441576004028, "rewards/margins": 0.26993781328201294, "rewards/rejected": -0.4941822290420532, "step": 5191 }, { "epoch": 14.21492128678987, "grad_norm": 5.868082523345947, "learning_rate": 2.891780821917808e-07, "log_odds_chosen": 2.2102184295654297, "log_odds_ratio": -0.2648893892765045, "logits/chosen": 0.6559478640556335, "logits/rejected": 0.6699773073196411, "logps/chosen": -1.8471578359603882, "logps/rejected": -3.8911983966827393, "loss": 0.6089, "nll_loss": 0.582385778427124, "rewards/accuracies": 1.0, "rewards/chosen": -0.18471579253673553, "rewards/margins": 0.2044040709733963, "rewards/rejected": -0.38911986351013184, "step": 5192 }, { "epoch": 14.217659137577002, "grad_norm": 5.872912883758545, "learning_rate": 2.8904109589041093e-07, "log_odds_chosen": 1.4902071952819824, "log_odds_ratio": -0.28203919529914856, "logits/chosen": 0.7498723268508911, "logits/rejected": 0.8302506804466248, "logps/chosen": -2.3287699222564697, "logps/rejected": -3.7161340713500977, "loss": 0.7328, "nll_loss": 0.7045470476150513, "rewards/accuracies": 0.875, "rewards/chosen": -0.2328770011663437, "rewards/margins": 0.13873639702796936, "rewards/rejected": -0.37161341309547424, "step": 5193 }, { "epoch": 14.220396988364135, "grad_norm": 5.542586326599121, "learning_rate": 2.889041095890411e-07, "log_odds_chosen": 2.0476784706115723, "log_odds_ratio": -0.2500532865524292, "logits/chosen": 0.9119974374771118, "logits/rejected": 0.9910352230072021, "logps/chosen": -2.165419340133667, "logps/rejected": -4.044832229614258, "loss": 0.7472, "nll_loss": 0.7221605777740479, "rewards/accuracies": 1.0, "rewards/chosen": -0.21654194593429565, "rewards/margins": 0.1879412829875946, "rewards/rejected": -0.40448319911956787, "step": 5194 }, { "epoch": 14.223134839151266, "grad_norm": 5.134397983551025, "learning_rate": 2.8876712328767124e-07, "log_odds_chosen": 1.915202021598816, "log_odds_ratio": -0.2509763240814209, "logits/chosen": 0.8364197015762329, "logits/rejected": 0.8240734338760376, "logps/chosen": -2.492312431335449, "logps/rejected": -4.287982940673828, "loss": 0.6686, "nll_loss": 0.6435120701789856, "rewards/accuracies": 0.875, "rewards/chosen": -0.2492312490940094, "rewards/margins": 0.17956702411174774, "rewards/rejected": -0.42879825830459595, "step": 5195 }, { "epoch": 14.225872689938399, "grad_norm": 6.237504482269287, "learning_rate": 2.8863013698630134e-07, "log_odds_chosen": 3.850990056991577, "log_odds_ratio": -0.12168408930301666, "logits/chosen": 0.9885401725769043, "logits/rejected": 0.9959461688995361, "logps/chosen": -3.118858814239502, "logps/rejected": -6.916844367980957, "loss": 0.9227, "nll_loss": 0.9104898571968079, "rewards/accuracies": 1.0, "rewards/chosen": -0.31188589334487915, "rewards/margins": 0.3797984719276428, "rewards/rejected": -0.691684365272522, "step": 5196 }, { "epoch": 14.22861054072553, "grad_norm": 6.607944011688232, "learning_rate": 2.8849315068493154e-07, "log_odds_chosen": 1.848003625869751, "log_odds_ratio": -0.37304380536079407, "logits/chosen": 0.5242235660552979, "logits/rejected": 0.5550936460494995, "logps/chosen": -2.230323314666748, "logps/rejected": -4.010629653930664, "loss": 0.7495, "nll_loss": 0.7121543288230896, "rewards/accuracies": 0.75, "rewards/chosen": -0.22303231060504913, "rewards/margins": 0.17803062498569489, "rewards/rejected": -0.401062935590744, "step": 5197 }, { "epoch": 14.231348391512663, "grad_norm": 5.117560386657715, "learning_rate": 2.8835616438356164e-07, "log_odds_chosen": 2.685263156890869, "log_odds_ratio": -0.1003502756357193, "logits/chosen": 0.8243475556373596, "logits/rejected": 0.8405299186706543, "logps/chosen": -2.307333469390869, "logps/rejected": -4.849116325378418, "loss": 0.5996, "nll_loss": 0.5895699858665466, "rewards/accuracies": 1.0, "rewards/chosen": -0.23073336482048035, "rewards/margins": 0.2541782855987549, "rewards/rejected": -0.48491165041923523, "step": 5198 }, { "epoch": 14.234086242299794, "grad_norm": 5.307605743408203, "learning_rate": 2.8821917808219174e-07, "log_odds_chosen": 1.5059499740600586, "log_odds_ratio": -0.32372063398361206, "logits/chosen": 0.8512855768203735, "logits/rejected": 0.8823370337486267, "logps/chosen": -1.705741286277771, "logps/rejected": -3.091254234313965, "loss": 0.6662, "nll_loss": 0.6338505148887634, "rewards/accuracies": 1.0, "rewards/chosen": -0.1705741286277771, "rewards/margins": 0.13855129480361938, "rewards/rejected": -0.3091254234313965, "step": 5199 }, { "epoch": 14.236824093086927, "grad_norm": 4.901832103729248, "learning_rate": 2.880821917808219e-07, "log_odds_chosen": 2.779397964477539, "log_odds_ratio": -0.1345871537923813, "logits/chosen": 0.976317286491394, "logits/rejected": 1.036490797996521, "logps/chosen": -2.8541336059570312, "logps/rejected": -5.543636322021484, "loss": 0.837, "nll_loss": 0.8235622644424438, "rewards/accuracies": 1.0, "rewards/chosen": -0.28541335463523865, "rewards/margins": 0.26895028352737427, "rewards/rejected": -0.5543636083602905, "step": 5200 }, { "epoch": 14.239561943874058, "grad_norm": 5.504915714263916, "learning_rate": 2.8794520547945205e-07, "log_odds_chosen": 2.1970653533935547, "log_odds_ratio": -0.17124386131763458, "logits/chosen": 0.7968760132789612, "logits/rejected": 0.8529345989227295, "logps/chosen": -2.531149387359619, "logps/rejected": -4.608275413513184, "loss": 0.6434, "nll_loss": 0.6262534856796265, "rewards/accuracies": 1.0, "rewards/chosen": -0.2531149387359619, "rewards/margins": 0.20771266520023346, "rewards/rejected": -0.46082761883735657, "step": 5201 }, { "epoch": 14.242299794661191, "grad_norm": 5.47009801864624, "learning_rate": 2.878082191780822e-07, "log_odds_chosen": 1.6824870109558105, "log_odds_ratio": -0.23181132972240448, "logits/chosen": 0.8173332214355469, "logits/rejected": 0.7491360306739807, "logps/chosen": -1.7066301107406616, "logps/rejected": -3.214354991912842, "loss": 0.5654, "nll_loss": 0.5422605276107788, "rewards/accuracies": 1.0, "rewards/chosen": -0.1706630140542984, "rewards/margins": 0.15077248215675354, "rewards/rejected": -0.32143551111221313, "step": 5202 }, { "epoch": 14.245037645448322, "grad_norm": 5.635694980621338, "learning_rate": 2.876712328767123e-07, "log_odds_chosen": 2.3537516593933105, "log_odds_ratio": -0.2044304609298706, "logits/chosen": 0.8601060509681702, "logits/rejected": 0.8629582524299622, "logps/chosen": -2.496732234954834, "logps/rejected": -4.706295013427734, "loss": 0.7285, "nll_loss": 0.7080479860305786, "rewards/accuracies": 1.0, "rewards/chosen": -0.24967321753501892, "rewards/margins": 0.22095629572868347, "rewards/rejected": -0.4706295430660248, "step": 5203 }, { "epoch": 14.247775496235455, "grad_norm": 8.749119758605957, "learning_rate": 2.875342465753425e-07, "log_odds_chosen": 2.365021228790283, "log_odds_ratio": -0.3747316896915436, "logits/chosen": 0.6689571738243103, "logits/rejected": 0.647960364818573, "logps/chosen": -2.390745162963867, "logps/rejected": -4.589035511016846, "loss": 0.6992, "nll_loss": 0.6617380976676941, "rewards/accuracies": 0.75, "rewards/chosen": -0.23907452821731567, "rewards/margins": 0.21982906758785248, "rewards/rejected": -0.45890355110168457, "step": 5204 }, { "epoch": 14.250513347022586, "grad_norm": 4.677429676055908, "learning_rate": 2.873972602739726e-07, "log_odds_chosen": 2.4005813598632812, "log_odds_ratio": -0.1718754917383194, "logits/chosen": 0.8999335765838623, "logits/rejected": 0.9291035532951355, "logps/chosen": -2.173990249633789, "logps/rejected": -4.463332176208496, "loss": 0.583, "nll_loss": 0.5658552050590515, "rewards/accuracies": 1.0, "rewards/chosen": -0.21739903092384338, "rewards/margins": 0.22893419861793518, "rewards/rejected": -0.44633322954177856, "step": 5205 }, { "epoch": 14.25325119780972, "grad_norm": 4.979928970336914, "learning_rate": 2.872602739726027e-07, "log_odds_chosen": 2.3810038566589355, "log_odds_ratio": -0.1439014971256256, "logits/chosen": 0.8692582845687866, "logits/rejected": 0.8740418553352356, "logps/chosen": -1.9220985174179077, "logps/rejected": -4.080264568328857, "loss": 0.7604, "nll_loss": 0.7459986209869385, "rewards/accuracies": 1.0, "rewards/chosen": -0.192209854722023, "rewards/margins": 0.21581661701202393, "rewards/rejected": -0.40802645683288574, "step": 5206 }, { "epoch": 14.255989048596852, "grad_norm": 4.863177299499512, "learning_rate": 2.8712328767123285e-07, "log_odds_chosen": 2.6075210571289062, "log_odds_ratio": -0.17826926708221436, "logits/chosen": 0.6907784342765808, "logits/rejected": 0.7572838664054871, "logps/chosen": -1.9582796096801758, "logps/rejected": -4.364793300628662, "loss": 0.6973, "nll_loss": 0.6794353127479553, "rewards/accuracies": 1.0, "rewards/chosen": -0.19582796096801758, "rewards/margins": 0.24065139889717102, "rewards/rejected": -0.4364793598651886, "step": 5207 }, { "epoch": 14.258726899383984, "grad_norm": 5.807261943817139, "learning_rate": 2.86986301369863e-07, "log_odds_chosen": 1.869629144668579, "log_odds_ratio": -0.41309618949890137, "logits/chosen": 1.1286022663116455, "logits/rejected": 1.1306467056274414, "logps/chosen": -2.0213429927825928, "logps/rejected": -3.786278247833252, "loss": 0.533, "nll_loss": 0.4916805922985077, "rewards/accuracies": 0.75, "rewards/chosen": -0.20213431119918823, "rewards/margins": 0.1764935404062271, "rewards/rejected": -0.37862783670425415, "step": 5208 }, { "epoch": 14.261464750171116, "grad_norm": 5.141949653625488, "learning_rate": 2.8684931506849316e-07, "log_odds_chosen": 3.237994909286499, "log_odds_ratio": -0.2489730417728424, "logits/chosen": 0.8091838359832764, "logits/rejected": 0.8201591968536377, "logps/chosen": -2.196666717529297, "logps/rejected": -5.2613205909729, "loss": 0.7274, "nll_loss": 0.702539324760437, "rewards/accuracies": 0.75, "rewards/chosen": -0.21966668963432312, "rewards/margins": 0.30646538734436035, "rewards/rejected": -0.5261320471763611, "step": 5209 }, { "epoch": 14.264202600958248, "grad_norm": 6.471392631530762, "learning_rate": 2.8671232876712326e-07, "log_odds_chosen": 1.2616682052612305, "log_odds_ratio": -0.34936976432800293, "logits/chosen": 0.663521409034729, "logits/rejected": 0.6779361367225647, "logps/chosen": -2.195014476776123, "logps/rejected": -3.3001506328582764, "loss": 0.7256, "nll_loss": 0.6906384229660034, "rewards/accuracies": 0.875, "rewards/chosen": -0.21950143575668335, "rewards/margins": 0.11051362752914429, "rewards/rejected": -0.33001506328582764, "step": 5210 }, { "epoch": 14.26694045174538, "grad_norm": 6.6906914710998535, "learning_rate": 2.865753424657534e-07, "log_odds_chosen": 0.9008004665374756, "log_odds_ratio": -0.463763564825058, "logits/chosen": 0.6672355532646179, "logits/rejected": 0.6993165612220764, "logps/chosen": -1.9376126527786255, "logps/rejected": -2.7374911308288574, "loss": 0.604, "nll_loss": 0.5576366186141968, "rewards/accuracies": 0.875, "rewards/chosen": -0.19376125931739807, "rewards/margins": 0.07998784631490707, "rewards/rejected": -0.27374911308288574, "step": 5211 }, { "epoch": 14.269678302532512, "grad_norm": 5.307746887207031, "learning_rate": 2.8643835616438356e-07, "log_odds_chosen": 3.8319122791290283, "log_odds_ratio": -0.09545345604419708, "logits/chosen": 0.8603050112724304, "logits/rejected": 0.9308614730834961, "logps/chosen": -1.9269850254058838, "logps/rejected": -5.510996341705322, "loss": 0.6808, "nll_loss": 0.6712864637374878, "rewards/accuracies": 1.0, "rewards/chosen": -0.19269849359989166, "rewards/margins": 0.35840117931365967, "rewards/rejected": -0.5510996580123901, "step": 5212 }, { "epoch": 14.272416153319645, "grad_norm": 6.548095703125, "learning_rate": 2.8630136986301366e-07, "log_odds_chosen": 1.6886231899261475, "log_odds_ratio": -0.3621872663497925, "logits/chosen": 0.9376609325408936, "logits/rejected": 0.9799286723136902, "logps/chosen": -2.7851626873016357, "logps/rejected": -4.423832893371582, "loss": 0.8034, "nll_loss": 0.7671696543693542, "rewards/accuracies": 0.875, "rewards/chosen": -0.2785162925720215, "rewards/margins": 0.1638670265674591, "rewards/rejected": -0.4423832893371582, "step": 5213 }, { "epoch": 14.275154004106776, "grad_norm": 6.632957935333252, "learning_rate": 2.861643835616438e-07, "log_odds_chosen": 2.9403443336486816, "log_odds_ratio": -0.2735331058502197, "logits/chosen": 0.7493081092834473, "logits/rejected": 0.811852216720581, "logps/chosen": -2.229410171508789, "logps/rejected": -5.0767388343811035, "loss": 0.748, "nll_loss": 0.7206348776817322, "rewards/accuracies": 0.875, "rewards/chosen": -0.22294102609157562, "rewards/margins": 0.284732848405838, "rewards/rejected": -0.5076738595962524, "step": 5214 }, { "epoch": 14.277891854893909, "grad_norm": 6.959826469421387, "learning_rate": 2.8602739726027396e-07, "log_odds_chosen": 1.4587082862854004, "log_odds_ratio": -0.29509299993515015, "logits/chosen": 0.9318733215332031, "logits/rejected": 1.0065698623657227, "logps/chosen": -2.834050416946411, "logps/rejected": -4.248312950134277, "loss": 0.7653, "nll_loss": 0.7357622981071472, "rewards/accuracies": 0.875, "rewards/chosen": -0.283405065536499, "rewards/margins": 0.1414262354373932, "rewards/rejected": -0.4248312711715698, "step": 5215 }, { "epoch": 14.28062970568104, "grad_norm": 5.657863140106201, "learning_rate": 2.858904109589041e-07, "log_odds_chosen": 2.1406893730163574, "log_odds_ratio": -0.24577751755714417, "logits/chosen": 0.7043275833129883, "logits/rejected": 0.7442646622657776, "logps/chosen": -2.309894561767578, "logps/rejected": -4.367088794708252, "loss": 0.6983, "nll_loss": 0.6737255454063416, "rewards/accuracies": 1.0, "rewards/chosen": -0.230989471077919, "rewards/margins": 0.20571939647197723, "rewards/rejected": -0.43670886754989624, "step": 5216 }, { "epoch": 14.283367556468173, "grad_norm": 6.464633941650391, "learning_rate": 2.857534246575342e-07, "log_odds_chosen": 0.5568806529045105, "log_odds_ratio": -0.49125730991363525, "logits/chosen": 0.8831185102462769, "logits/rejected": 0.9027528762817383, "logps/chosen": -2.242382764816284, "logps/rejected": -2.7354331016540527, "loss": 0.6653, "nll_loss": 0.6162170171737671, "rewards/accuracies": 0.75, "rewards/chosen": -0.2242382913827896, "rewards/margins": 0.0493050143122673, "rewards/rejected": -0.2735432982444763, "step": 5217 }, { "epoch": 14.286105407255304, "grad_norm": 6.280089378356934, "learning_rate": 2.8561643835616437e-07, "log_odds_chosen": 2.10994815826416, "log_odds_ratio": -0.4111533761024475, "logits/chosen": 0.788205623626709, "logits/rejected": 0.8963079452514648, "logps/chosen": -2.6665568351745605, "logps/rejected": -4.693140029907227, "loss": 0.8226, "nll_loss": 0.7814875841140747, "rewards/accuracies": 0.75, "rewards/chosen": -0.26665568351745605, "rewards/margins": 0.20265835523605347, "rewards/rejected": -0.4693140387535095, "step": 5218 }, { "epoch": 14.288843258042437, "grad_norm": 5.820461273193359, "learning_rate": 2.854794520547945e-07, "log_odds_chosen": 1.8729848861694336, "log_odds_ratio": -0.25037920475006104, "logits/chosen": 0.7263168692588806, "logits/rejected": 0.7130488157272339, "logps/chosen": -2.038620948791504, "logps/rejected": -3.8056116104125977, "loss": 0.6458, "nll_loss": 0.6207741498947144, "rewards/accuracies": 1.0, "rewards/chosen": -0.20386210083961487, "rewards/margins": 0.17669905722141266, "rewards/rejected": -0.3805611729621887, "step": 5219 }, { "epoch": 14.291581108829568, "grad_norm": 4.82666015625, "learning_rate": 2.853424657534246e-07, "log_odds_chosen": 2.4937000274658203, "log_odds_ratio": -0.1827281415462494, "logits/chosen": 0.7624039649963379, "logits/rejected": 0.7833992838859558, "logps/chosen": -1.9950839281082153, "logps/rejected": -4.3598809242248535, "loss": 0.6677, "nll_loss": 0.6493884921073914, "rewards/accuracies": 1.0, "rewards/chosen": -0.199508398771286, "rewards/margins": 0.23647969961166382, "rewards/rejected": -0.4359881281852722, "step": 5220 }, { "epoch": 14.294318959616701, "grad_norm": 5.362864017486572, "learning_rate": 2.852054794520548e-07, "log_odds_chosen": 1.4451851844787598, "log_odds_ratio": -0.31245213747024536, "logits/chosen": 0.8290215134620667, "logits/rejected": 0.9052255749702454, "logps/chosen": -1.949050784111023, "logps/rejected": -3.304924488067627, "loss": 0.7039, "nll_loss": 0.6726254820823669, "rewards/accuracies": 0.875, "rewards/chosen": -0.19490507245063782, "rewards/margins": 0.13558737933635712, "rewards/rejected": -0.33049246668815613, "step": 5221 }, { "epoch": 14.297056810403832, "grad_norm": 5.1979475021362305, "learning_rate": 2.850684931506849e-07, "log_odds_chosen": 1.9739630222320557, "log_odds_ratio": -0.21847721934318542, "logits/chosen": 0.7198337912559509, "logits/rejected": 0.7354487180709839, "logps/chosen": -2.204033136367798, "logps/rejected": -4.051486492156982, "loss": 0.611, "nll_loss": 0.5891207456588745, "rewards/accuracies": 1.0, "rewards/chosen": -0.2204032987356186, "rewards/margins": 0.18474534153938293, "rewards/rejected": -0.4051486849784851, "step": 5222 }, { "epoch": 14.299794661190965, "grad_norm": 5.5163726806640625, "learning_rate": 2.849315068493151e-07, "log_odds_chosen": 2.6771721839904785, "log_odds_ratio": -0.17944756150245667, "logits/chosen": 1.028304100036621, "logits/rejected": 1.1153912544250488, "logps/chosen": -1.6684931516647339, "logps/rejected": -4.168707370758057, "loss": 0.5897, "nll_loss": 0.5717402100563049, "rewards/accuracies": 0.875, "rewards/chosen": -0.1668493151664734, "rewards/margins": 0.25002142786979675, "rewards/rejected": -0.41687077283859253, "step": 5223 }, { "epoch": 14.302532511978097, "grad_norm": 5.975193500518799, "learning_rate": 2.8479452054794517e-07, "log_odds_chosen": 1.6082419157028198, "log_odds_ratio": -0.23656974732875824, "logits/chosen": 0.783266007900238, "logits/rejected": 0.8070952296257019, "logps/chosen": -1.9071323871612549, "logps/rejected": -3.3814282417297363, "loss": 0.5483, "nll_loss": 0.5246780514717102, "rewards/accuracies": 1.0, "rewards/chosen": -0.19071322679519653, "rewards/margins": 0.14742958545684814, "rewards/rejected": -0.33814284205436707, "step": 5224 }, { "epoch": 14.30527036276523, "grad_norm": 4.57717752456665, "learning_rate": 2.846575342465753e-07, "log_odds_chosen": 1.9153882265090942, "log_odds_ratio": -0.2595367133617401, "logits/chosen": 0.8793466091156006, "logits/rejected": 0.8829981088638306, "logps/chosen": -1.8892544507980347, "logps/rejected": -3.6714837551116943, "loss": 0.668, "nll_loss": 0.6420378684997559, "rewards/accuracies": 1.0, "rewards/chosen": -0.18892544507980347, "rewards/margins": 0.1782229095697403, "rewards/rejected": -0.36714836955070496, "step": 5225 }, { "epoch": 14.30800821355236, "grad_norm": 7.5189595222473145, "learning_rate": 2.845205479452055e-07, "log_odds_chosen": 1.5487112998962402, "log_odds_ratio": -0.32566797733306885, "logits/chosen": 0.9735236167907715, "logits/rejected": 0.9672778248786926, "logps/chosen": -2.4107251167297363, "logps/rejected": -3.875386953353882, "loss": 0.721, "nll_loss": 0.6884123086929321, "rewards/accuracies": 0.875, "rewards/chosen": -0.24107250571250916, "rewards/margins": 0.1464662253856659, "rewards/rejected": -0.38753873109817505, "step": 5226 }, { "epoch": 14.310746064339494, "grad_norm": 6.999859809875488, "learning_rate": 2.843835616438356e-07, "log_odds_chosen": 2.6050353050231934, "log_odds_ratio": -0.12515072524547577, "logits/chosen": 1.003089189529419, "logits/rejected": 1.0450835227966309, "logps/chosen": -2.334430456161499, "logps/rejected": -4.828656196594238, "loss": 0.6602, "nll_loss": 0.6476694941520691, "rewards/accuracies": 1.0, "rewards/chosen": -0.23344305157661438, "rewards/margins": 0.2494225949048996, "rewards/rejected": -0.48286566138267517, "step": 5227 }, { "epoch": 14.313483915126625, "grad_norm": 5.7265191078186035, "learning_rate": 2.842465753424658e-07, "log_odds_chosen": 1.9745978116989136, "log_odds_ratio": -0.27950453758239746, "logits/chosen": 1.0065183639526367, "logits/rejected": 1.0175933837890625, "logps/chosen": -1.7788889408111572, "logps/rejected": -3.6313185691833496, "loss": 0.653, "nll_loss": 0.6250343322753906, "rewards/accuracies": 1.0, "rewards/chosen": -0.1778889000415802, "rewards/margins": 0.18524295091629028, "rewards/rejected": -0.3631318509578705, "step": 5228 }, { "epoch": 14.316221765913758, "grad_norm": 6.31678581237793, "learning_rate": 2.841095890410959e-07, "log_odds_chosen": 1.4683700799942017, "log_odds_ratio": -0.24509665369987488, "logits/chosen": 1.1147996187210083, "logits/rejected": 1.143506407737732, "logps/chosen": -2.279754161834717, "logps/rejected": -3.589230537414551, "loss": 0.5908, "nll_loss": 0.5662983655929565, "rewards/accuracies": 1.0, "rewards/chosen": -0.22797539830207825, "rewards/margins": 0.13094766438007355, "rewards/rejected": -0.358923077583313, "step": 5229 }, { "epoch": 14.318959616700889, "grad_norm": 5.659498691558838, "learning_rate": 2.8397260273972603e-07, "log_odds_chosen": 1.7371407747268677, "log_odds_ratio": -0.26559656858444214, "logits/chosen": 0.9175515174865723, "logits/rejected": 0.9645379781723022, "logps/chosen": -2.382913589477539, "logps/rejected": -4.004185676574707, "loss": 0.7575, "nll_loss": 0.7309072017669678, "rewards/accuracies": 0.875, "rewards/chosen": -0.23829138278961182, "rewards/margins": 0.16212719678878784, "rewards/rejected": -0.40041857957839966, "step": 5230 }, { "epoch": 14.321697467488022, "grad_norm": 5.60014533996582, "learning_rate": 2.8383561643835613e-07, "log_odds_chosen": 2.219514846801758, "log_odds_ratio": -0.5169953107833862, "logits/chosen": 0.7374563217163086, "logits/rejected": 0.709352433681488, "logps/chosen": -2.6377804279327393, "logps/rejected": -4.715087890625, "loss": 0.6458, "nll_loss": 0.5941289067268372, "rewards/accuracies": 0.875, "rewards/chosen": -0.26377803087234497, "rewards/margins": 0.20773078501224518, "rewards/rejected": -0.47150880098342896, "step": 5231 }, { "epoch": 14.324435318275153, "grad_norm": 6.64133882522583, "learning_rate": 2.836986301369863e-07, "log_odds_chosen": 0.9664797782897949, "log_odds_ratio": -0.49732083082199097, "logits/chosen": 0.9647414684295654, "logits/rejected": 0.980790913105011, "logps/chosen": -3.2014901638031006, "logps/rejected": -4.1176838874816895, "loss": 0.756, "nll_loss": 0.7062935829162598, "rewards/accuracies": 0.875, "rewards/chosen": -0.3201490044593811, "rewards/margins": 0.09161935746669769, "rewards/rejected": -0.41176837682724, "step": 5232 }, { "epoch": 14.327173169062286, "grad_norm": 6.557675838470459, "learning_rate": 2.8356164383561644e-07, "log_odds_chosen": 1.942121982574463, "log_odds_ratio": -0.26924005150794983, "logits/chosen": 1.0330976247787476, "logits/rejected": 1.074162244796753, "logps/chosen": -1.5259602069854736, "logps/rejected": -3.217435598373413, "loss": 0.6097, "nll_loss": 0.5828191637992859, "rewards/accuracies": 0.875, "rewards/chosen": -0.15259602665901184, "rewards/margins": 0.1691475510597229, "rewards/rejected": -0.32174354791641235, "step": 5233 }, { "epoch": 14.329911019849419, "grad_norm": 5.027763366699219, "learning_rate": 2.8342465753424653e-07, "log_odds_chosen": 2.001589059829712, "log_odds_ratio": -0.2045053243637085, "logits/chosen": 0.7969387173652649, "logits/rejected": 0.8561984896659851, "logps/chosen": -2.32562255859375, "logps/rejected": -4.2341437339782715, "loss": 0.7104, "nll_loss": 0.6899507641792297, "rewards/accuracies": 0.875, "rewards/chosen": -0.23256227374076843, "rewards/margins": 0.19085213541984558, "rewards/rejected": -0.423414409160614, "step": 5234 }, { "epoch": 14.33264887063655, "grad_norm": 5.186654567718506, "learning_rate": 2.8328767123287674e-07, "log_odds_chosen": 1.3149971961975098, "log_odds_ratio": -0.3134399354457855, "logits/chosen": 0.9275000691413879, "logits/rejected": 0.8539735674858093, "logps/chosen": -1.7477669715881348, "logps/rejected": -2.877276659011841, "loss": 0.6968, "nll_loss": 0.6654707193374634, "rewards/accuracies": 0.875, "rewards/chosen": -0.17477668821811676, "rewards/margins": 0.11295096576213837, "rewards/rejected": -0.2877276539802551, "step": 5235 }, { "epoch": 14.335386721423683, "grad_norm": 6.7116007804870605, "learning_rate": 2.8315068493150684e-07, "log_odds_chosen": 1.999119758605957, "log_odds_ratio": -0.3478250503540039, "logits/chosen": 0.9725914001464844, "logits/rejected": 1.0276082754135132, "logps/chosen": -2.4447412490844727, "logps/rejected": -4.390259265899658, "loss": 0.6885, "nll_loss": 0.6537105441093445, "rewards/accuracies": 0.875, "rewards/chosen": -0.2444741129875183, "rewards/margins": 0.19455181062221527, "rewards/rejected": -0.4390259385108948, "step": 5236 }, { "epoch": 14.338124572210814, "grad_norm": 9.075387954711914, "learning_rate": 2.83013698630137e-07, "log_odds_chosen": 0.8633861541748047, "log_odds_ratio": -1.0122485160827637, "logits/chosen": 0.685695230960846, "logits/rejected": 0.678547739982605, "logps/chosen": -2.8288815021514893, "logps/rejected": -3.6128177642822266, "loss": 0.8003, "nll_loss": 0.6991101503372192, "rewards/accuracies": 0.625, "rewards/chosen": -0.28288814425468445, "rewards/margins": 0.07839362323284149, "rewards/rejected": -0.36128178238868713, "step": 5237 }, { "epoch": 14.340862422997947, "grad_norm": 7.581986427307129, "learning_rate": 2.828767123287671e-07, "log_odds_chosen": 2.6934304237365723, "log_odds_ratio": -0.25975480675697327, "logits/chosen": 1.0179977416992188, "logits/rejected": 1.1022998094558716, "logps/chosen": -2.365631103515625, "logps/rejected": -4.979267597198486, "loss": 0.6, "nll_loss": 0.5740591287612915, "rewards/accuracies": 0.875, "rewards/chosen": -0.23656311631202698, "rewards/margins": 0.2613636553287506, "rewards/rejected": -0.4979268014431, "step": 5238 }, { "epoch": 14.343600273785079, "grad_norm": 7.312911510467529, "learning_rate": 2.8273972602739724e-07, "log_odds_chosen": 1.3194063901901245, "log_odds_ratio": -0.5484702587127686, "logits/chosen": 0.7429434061050415, "logits/rejected": 0.7046382427215576, "logps/chosen": -2.0653347969055176, "logps/rejected": -3.244251251220703, "loss": 0.6835, "nll_loss": 0.6286181807518005, "rewards/accuracies": 0.875, "rewards/chosen": -0.20653347671031952, "rewards/margins": 0.11789166927337646, "rewards/rejected": -0.3244251608848572, "step": 5239 }, { "epoch": 14.346338124572211, "grad_norm": 5.0991010665893555, "learning_rate": 2.826027397260274e-07, "log_odds_chosen": 4.266240119934082, "log_odds_ratio": -0.1296817809343338, "logits/chosen": 1.0005220174789429, "logits/rejected": 1.0470417737960815, "logps/chosen": -2.661048650741577, "logps/rejected": -6.818723678588867, "loss": 0.6647, "nll_loss": 0.651726484298706, "rewards/accuracies": 1.0, "rewards/chosen": -0.26610487699508667, "rewards/margins": 0.41576746106147766, "rewards/rejected": -0.6818723678588867, "step": 5240 }, { "epoch": 14.349075975359343, "grad_norm": 4.807572841644287, "learning_rate": 2.824657534246575e-07, "log_odds_chosen": 2.513913869857788, "log_odds_ratio": -0.24239268898963928, "logits/chosen": 0.9577353596687317, "logits/rejected": 1.0225094556808472, "logps/chosen": -2.809558629989624, "logps/rejected": -5.237807750701904, "loss": 0.6971, "nll_loss": 0.6728344559669495, "rewards/accuracies": 1.0, "rewards/chosen": -0.28095588088035583, "rewards/margins": 0.2428249567747116, "rewards/rejected": -0.5237808227539062, "step": 5241 }, { "epoch": 14.351813826146476, "grad_norm": 5.369956016540527, "learning_rate": 2.823287671232877e-07, "log_odds_chosen": 2.324435234069824, "log_odds_ratio": -0.3339391350746155, "logits/chosen": 0.4937061667442322, "logits/rejected": 0.5493127703666687, "logps/chosen": -2.134620189666748, "logps/rejected": -4.344801425933838, "loss": 0.7005, "nll_loss": 0.6670792102813721, "rewards/accuracies": 0.75, "rewards/chosen": -0.21346203982830048, "rewards/margins": 0.22101813554763794, "rewards/rejected": -0.43448013067245483, "step": 5242 }, { "epoch": 14.354551676933607, "grad_norm": 5.433910369873047, "learning_rate": 2.821917808219178e-07, "log_odds_chosen": 3.012873411178589, "log_odds_ratio": -0.12022262811660767, "logits/chosen": 0.7871842384338379, "logits/rejected": 0.776586651802063, "logps/chosen": -1.3089174032211304, "logps/rejected": -3.9940578937530518, "loss": 0.5491, "nll_loss": 0.5370302200317383, "rewards/accuracies": 1.0, "rewards/chosen": -0.13089174032211304, "rewards/margins": 0.26851409673690796, "rewards/rejected": -0.3994058072566986, "step": 5243 }, { "epoch": 14.35728952772074, "grad_norm": 6.253722190856934, "learning_rate": 2.820547945205479e-07, "log_odds_chosen": 2.7541184425354004, "log_odds_ratio": -0.2506646513938904, "logits/chosen": 0.8124704957008362, "logits/rejected": 0.8422680497169495, "logps/chosen": -3.2749412059783936, "logps/rejected": -6.01297664642334, "loss": 0.7439, "nll_loss": 0.7188767194747925, "rewards/accuracies": 0.875, "rewards/chosen": -0.32749414443969727, "rewards/margins": 0.2738035321235657, "rewards/rejected": -0.6012976765632629, "step": 5244 }, { "epoch": 14.360027378507871, "grad_norm": 5.918256759643555, "learning_rate": 2.8191780821917805e-07, "log_odds_chosen": 1.5306826829910278, "log_odds_ratio": -0.2751239538192749, "logits/chosen": 0.8215656876564026, "logits/rejected": 0.7462756037712097, "logps/chosen": -2.456359624862671, "logps/rejected": -3.8960537910461426, "loss": 0.7461, "nll_loss": 0.7185899615287781, "rewards/accuracies": 1.0, "rewards/chosen": -0.2456359714269638, "rewards/margins": 0.14396944642066956, "rewards/rejected": -0.38960540294647217, "step": 5245 }, { "epoch": 14.362765229295004, "grad_norm": 4.31155252456665, "learning_rate": 2.817808219178082e-07, "log_odds_chosen": 3.443835973739624, "log_odds_ratio": -0.09427490830421448, "logits/chosen": 1.1839100122451782, "logits/rejected": 1.211126446723938, "logps/chosen": -1.9698665142059326, "logps/rejected": -5.243764877319336, "loss": 0.6126, "nll_loss": 0.6031520366668701, "rewards/accuracies": 1.0, "rewards/chosen": -0.19698666036128998, "rewards/margins": 0.3273898661136627, "rewards/rejected": -0.5243765115737915, "step": 5246 }, { "epoch": 14.365503080082135, "grad_norm": 5.24147891998291, "learning_rate": 2.8164383561643835e-07, "log_odds_chosen": 1.8317174911499023, "log_odds_ratio": -0.3368943929672241, "logits/chosen": 0.955032467842102, "logits/rejected": 0.9771484136581421, "logps/chosen": -2.0068321228027344, "logps/rejected": -3.7415835857391357, "loss": 0.6499, "nll_loss": 0.616256058216095, "rewards/accuracies": 1.0, "rewards/chosen": -0.20068322122097015, "rewards/margins": 0.17347514629364014, "rewards/rejected": -0.3741583526134491, "step": 5247 }, { "epoch": 14.368240930869268, "grad_norm": 7.749539852142334, "learning_rate": 2.8150684931506845e-07, "log_odds_chosen": 1.9383291006088257, "log_odds_ratio": -0.3542151153087616, "logits/chosen": 1.0844441652297974, "logits/rejected": 1.1092702150344849, "logps/chosen": -2.943964719772339, "logps/rejected": -4.784188747406006, "loss": 0.8039, "nll_loss": 0.7684641480445862, "rewards/accuracies": 0.875, "rewards/chosen": -0.2943964898586273, "rewards/margins": 0.18402236700057983, "rewards/rejected": -0.47841888666152954, "step": 5248 }, { "epoch": 14.3709787816564, "grad_norm": 7.588202953338623, "learning_rate": 2.8136986301369866e-07, "log_odds_chosen": 1.4992215633392334, "log_odds_ratio": -0.3154417872428894, "logits/chosen": 1.0248594284057617, "logits/rejected": 0.946315348148346, "logps/chosen": -2.603224754333496, "logps/rejected": -3.9883882999420166, "loss": 0.7549, "nll_loss": 0.7233642339706421, "rewards/accuracies": 0.875, "rewards/chosen": -0.2603224813938141, "rewards/margins": 0.13851633667945862, "rewards/rejected": -0.3988388180732727, "step": 5249 }, { "epoch": 14.373716632443532, "grad_norm": 6.3945136070251465, "learning_rate": 2.8123287671232876e-07, "log_odds_chosen": 2.0718817710876465, "log_odds_ratio": -0.5405663251876831, "logits/chosen": 0.873478889465332, "logits/rejected": 0.8360402584075928, "logps/chosen": -2.8529653549194336, "logps/rejected": -4.826823711395264, "loss": 0.7112, "nll_loss": 0.6571722030639648, "rewards/accuracies": 0.875, "rewards/chosen": -0.28529655933380127, "rewards/margins": 0.19738580286502838, "rewards/rejected": -0.48268234729766846, "step": 5250 }, { "epoch": 14.376454483230663, "grad_norm": 5.633581638336182, "learning_rate": 2.8109589041095886e-07, "log_odds_chosen": 2.1841087341308594, "log_odds_ratio": -0.14058783650398254, "logits/chosen": 0.6519171595573425, "logits/rejected": 0.6526551246643066, "logps/chosen": -1.6582331657409668, "logps/rejected": -3.6189117431640625, "loss": 0.6325, "nll_loss": 0.6184172630310059, "rewards/accuracies": 1.0, "rewards/chosen": -0.1658233106136322, "rewards/margins": 0.19606786966323853, "rewards/rejected": -0.3618912100791931, "step": 5251 }, { "epoch": 14.379192334017796, "grad_norm": 4.880949020385742, "learning_rate": 2.80958904109589e-07, "log_odds_chosen": 2.9304094314575195, "log_odds_ratio": -0.19038140773773193, "logits/chosen": 0.7900129556655884, "logits/rejected": 0.8437194228172302, "logps/chosen": -1.6356658935546875, "logps/rejected": -4.401603698730469, "loss": 0.6815, "nll_loss": 0.6624318361282349, "rewards/accuracies": 1.0, "rewards/chosen": -0.16356660425662994, "rewards/margins": 0.27659380435943604, "rewards/rejected": -0.4401603937149048, "step": 5252 }, { "epoch": 14.381930184804927, "grad_norm": 8.51491928100586, "learning_rate": 2.8082191780821916e-07, "log_odds_chosen": 1.4386613368988037, "log_odds_ratio": -0.4129404127597809, "logits/chosen": 0.9110265970230103, "logits/rejected": 0.8952218294143677, "logps/chosen": -2.595427989959717, "logps/rejected": -3.9068987369537354, "loss": 0.7361, "nll_loss": 0.6948422193527222, "rewards/accuracies": 0.75, "rewards/chosen": -0.2595427930355072, "rewards/margins": 0.1311470866203308, "rewards/rejected": -0.3906898498535156, "step": 5253 }, { "epoch": 14.38466803559206, "grad_norm": 6.22127628326416, "learning_rate": 2.806849315068493e-07, "log_odds_chosen": 1.5083367824554443, "log_odds_ratio": -0.3176414370536804, "logits/chosen": 0.8721834421157837, "logits/rejected": 0.8706804513931274, "logps/chosen": -1.5534071922302246, "logps/rejected": -2.886298179626465, "loss": 0.6647, "nll_loss": 0.6329716444015503, "rewards/accuracies": 0.875, "rewards/chosen": -0.15534071624279022, "rewards/margins": 0.13328911364078522, "rewards/rejected": -0.28862982988357544, "step": 5254 }, { "epoch": 14.387405886379192, "grad_norm": 5.98236608505249, "learning_rate": 2.805479452054794e-07, "log_odds_chosen": 1.9060349464416504, "log_odds_ratio": -0.21520663797855377, "logits/chosen": 1.1177239418029785, "logits/rejected": 1.180969476699829, "logps/chosen": -2.256195545196533, "logps/rejected": -4.051304817199707, "loss": 0.6517, "nll_loss": 0.6301969289779663, "rewards/accuracies": 1.0, "rewards/chosen": -0.2256195843219757, "rewards/margins": 0.1795109361410141, "rewards/rejected": -0.4051305055618286, "step": 5255 }, { "epoch": 14.390143737166325, "grad_norm": 5.408984184265137, "learning_rate": 2.804109589041096e-07, "log_odds_chosen": 1.5231382846832275, "log_odds_ratio": -0.3577653467655182, "logits/chosen": 0.8189660310745239, "logits/rejected": 0.8291890025138855, "logps/chosen": -2.484222173690796, "logps/rejected": -3.9039623737335205, "loss": 0.7227, "nll_loss": 0.6869161128997803, "rewards/accuracies": 0.875, "rewards/chosen": -0.24842222034931183, "rewards/margins": 0.14197401702404022, "rewards/rejected": -0.39039623737335205, "step": 5256 }, { "epoch": 14.392881587953456, "grad_norm": 5.233195781707764, "learning_rate": 2.802739726027397e-07, "log_odds_chosen": 1.9776957035064697, "log_odds_ratio": -0.2569429874420166, "logits/chosen": 0.8856459259986877, "logits/rejected": 0.9506962299346924, "logps/chosen": -3.45379638671875, "logps/rejected": -5.33394718170166, "loss": 0.7438, "nll_loss": 0.7180875539779663, "rewards/accuracies": 0.875, "rewards/chosen": -0.34537965059280396, "rewards/margins": 0.18801507353782654, "rewards/rejected": -0.5333947539329529, "step": 5257 }, { "epoch": 14.395619438740589, "grad_norm": 5.227592945098877, "learning_rate": 2.801369863013698e-07, "log_odds_chosen": 2.752298355102539, "log_odds_ratio": -0.18868999183177948, "logits/chosen": 0.8449302911758423, "logits/rejected": 0.8988775014877319, "logps/chosen": -2.5330910682678223, "logps/rejected": -5.184085369110107, "loss": 0.6919, "nll_loss": 0.6730765700340271, "rewards/accuracies": 1.0, "rewards/chosen": -0.25330910086631775, "rewards/margins": 0.2650994062423706, "rewards/rejected": -0.5184085369110107, "step": 5258 }, { "epoch": 14.39835728952772, "grad_norm": 6.485958576202393, "learning_rate": 2.8e-07, "log_odds_chosen": 2.604186773300171, "log_odds_ratio": -0.285947322845459, "logits/chosen": 1.2345706224441528, "logits/rejected": 1.2721623182296753, "logps/chosen": -2.043933629989624, "logps/rejected": -4.437139511108398, "loss": 0.6362, "nll_loss": 0.6076474189758301, "rewards/accuracies": 0.875, "rewards/chosen": -0.20439335703849792, "rewards/margins": 0.23932062089443207, "rewards/rejected": -0.4437139630317688, "step": 5259 }, { "epoch": 14.401095140314853, "grad_norm": 4.669106483459473, "learning_rate": 2.798630136986301e-07, "log_odds_chosen": 3.2555527687072754, "log_odds_ratio": -0.17569246888160706, "logits/chosen": 0.9375767111778259, "logits/rejected": 1.0015240907669067, "logps/chosen": -2.4051575660705566, "logps/rejected": -5.570580005645752, "loss": 0.7822, "nll_loss": 0.7646665573120117, "rewards/accuracies": 1.0, "rewards/chosen": -0.24051573872566223, "rewards/margins": 0.31654223799705505, "rewards/rejected": -0.5570580363273621, "step": 5260 }, { "epoch": 14.403832991101986, "grad_norm": 5.521905899047852, "learning_rate": 2.7972602739726027e-07, "log_odds_chosen": 2.5912857055664062, "log_odds_ratio": -0.2036728709936142, "logits/chosen": 0.8604373335838318, "logits/rejected": 0.8971480131149292, "logps/chosen": -2.1471095085144043, "logps/rejected": -4.638309478759766, "loss": 0.6283, "nll_loss": 0.6079020500183105, "rewards/accuracies": 1.0, "rewards/chosen": -0.21471093595027924, "rewards/margins": 0.24911999702453613, "rewards/rejected": -0.4638309180736542, "step": 5261 }, { "epoch": 14.406570841889117, "grad_norm": 5.6651835441589355, "learning_rate": 2.7958904109589037e-07, "log_odds_chosen": 2.70033597946167, "log_odds_ratio": -0.3075169622898102, "logits/chosen": 0.7089998722076416, "logits/rejected": 0.7927075624465942, "logps/chosen": -2.133788585662842, "logps/rejected": -4.632016181945801, "loss": 0.6692, "nll_loss": 0.6384766101837158, "rewards/accuracies": 0.875, "rewards/chosen": -0.2133788764476776, "rewards/margins": 0.2498227208852768, "rewards/rejected": -0.4632016122341156, "step": 5262 }, { "epoch": 14.40930869267625, "grad_norm": 5.162413597106934, "learning_rate": 2.794520547945206e-07, "log_odds_chosen": 2.1572062969207764, "log_odds_ratio": -0.28596293926239014, "logits/chosen": 0.9560520648956299, "logits/rejected": 0.968830943107605, "logps/chosen": -2.1631174087524414, "logps/rejected": -4.21254825592041, "loss": 0.5686, "nll_loss": 0.5399594306945801, "rewards/accuracies": 0.875, "rewards/chosen": -0.2163117378950119, "rewards/margins": 0.20494309067726135, "rewards/rejected": -0.42125481367111206, "step": 5263 }, { "epoch": 14.412046543463381, "grad_norm": 6.34428071975708, "learning_rate": 2.793150684931507e-07, "log_odds_chosen": 1.4748146533966064, "log_odds_ratio": -0.4641415476799011, "logits/chosen": 0.8756495714187622, "logits/rejected": 0.8346444368362427, "logps/chosen": -2.5026493072509766, "logps/rejected": -3.92256236076355, "loss": 0.7111, "nll_loss": 0.6647323369979858, "rewards/accuracies": 0.75, "rewards/chosen": -0.2502649128437042, "rewards/margins": 0.14199134707450867, "rewards/rejected": -0.3922562599182129, "step": 5264 }, { "epoch": 14.414784394250514, "grad_norm": 5.173528671264648, "learning_rate": 2.7917808219178077e-07, "log_odds_chosen": 1.9769060611724854, "log_odds_ratio": -0.1860990673303604, "logits/chosen": 0.9021866917610168, "logits/rejected": 0.8681857585906982, "logps/chosen": -1.8612239360809326, "logps/rejected": -3.6786303520202637, "loss": 0.5843, "nll_loss": 0.5656869411468506, "rewards/accuracies": 1.0, "rewards/chosen": -0.18612238764762878, "rewards/margins": 0.1817406713962555, "rewards/rejected": -0.3678630590438843, "step": 5265 }, { "epoch": 14.417522245037645, "grad_norm": 5.671846389770508, "learning_rate": 2.79041095890411e-07, "log_odds_chosen": 2.974646806716919, "log_odds_ratio": -0.12494058907032013, "logits/chosen": 1.057248592376709, "logits/rejected": 1.0691723823547363, "logps/chosen": -2.9412779808044434, "logps/rejected": -5.837370872497559, "loss": 0.6566, "nll_loss": 0.6441173553466797, "rewards/accuracies": 1.0, "rewards/chosen": -0.29412776231765747, "rewards/margins": 0.28960931301116943, "rewards/rejected": -0.5837370753288269, "step": 5266 }, { "epoch": 14.420260095824778, "grad_norm": 7.09104061126709, "learning_rate": 2.789041095890411e-07, "log_odds_chosen": 2.9051992893218994, "log_odds_ratio": -0.30268430709838867, "logits/chosen": 0.8010149002075195, "logits/rejected": 0.8295709490776062, "logps/chosen": -1.947921872138977, "logps/rejected": -4.673587799072266, "loss": 0.6047, "nll_loss": 0.5743959546089172, "rewards/accuracies": 0.875, "rewards/chosen": -0.19479219615459442, "rewards/margins": 0.2725665867328644, "rewards/rejected": -0.4673587679862976, "step": 5267 }, { "epoch": 14.42299794661191, "grad_norm": 6.0300703048706055, "learning_rate": 2.7876712328767123e-07, "log_odds_chosen": 1.3458412885665894, "log_odds_ratio": -0.36424827575683594, "logits/chosen": 0.6404143571853638, "logits/rejected": 0.6686233878135681, "logps/chosen": -2.4129691123962402, "logps/rejected": -3.711652994155884, "loss": 0.7035, "nll_loss": 0.6671205759048462, "rewards/accuracies": 1.0, "rewards/chosen": -0.2412969172000885, "rewards/margins": 0.12986840307712555, "rewards/rejected": -0.37116530537605286, "step": 5268 }, { "epoch": 14.425735797399042, "grad_norm": 11.487886428833008, "learning_rate": 2.7863013698630133e-07, "log_odds_chosen": 1.3036420345306396, "log_odds_ratio": -0.41432636976242065, "logits/chosen": 0.8467381000518799, "logits/rejected": 0.7827674746513367, "logps/chosen": -2.9740798473358154, "logps/rejected": -4.233175277709961, "loss": 0.8153, "nll_loss": 0.7738480567932129, "rewards/accuracies": 0.75, "rewards/chosen": -0.29740798473358154, "rewards/margins": 0.12590959668159485, "rewards/rejected": -0.4233176112174988, "step": 5269 }, { "epoch": 14.428473648186174, "grad_norm": 4.528703689575195, "learning_rate": 2.7849315068493153e-07, "log_odds_chosen": 2.255526304244995, "log_odds_ratio": -0.24625656008720398, "logits/chosen": 0.8274056911468506, "logits/rejected": 0.8935289978981018, "logps/chosen": -2.038116931915283, "logps/rejected": -4.167740821838379, "loss": 0.5941, "nll_loss": 0.5694729089736938, "rewards/accuracies": 0.875, "rewards/chosen": -0.20381170511245728, "rewards/margins": 0.21296238899230957, "rewards/rejected": -0.41677409410476685, "step": 5270 }, { "epoch": 14.431211498973306, "grad_norm": 8.469411849975586, "learning_rate": 2.7835616438356163e-07, "log_odds_chosen": 0.7502658367156982, "log_odds_ratio": -0.6353162527084351, "logits/chosen": 0.9180061221122742, "logits/rejected": 0.8047323822975159, "logps/chosen": -2.1338796615600586, "logps/rejected": -2.7515928745269775, "loss": 0.6503, "nll_loss": 0.5867374539375305, "rewards/accuracies": 0.625, "rewards/chosen": -0.21338796615600586, "rewards/margins": 0.06177133694291115, "rewards/rejected": -0.2751592993736267, "step": 5271 }, { "epoch": 14.433949349760438, "grad_norm": 4.716984748840332, "learning_rate": 2.7821917808219173e-07, "log_odds_chosen": 3.7829694747924805, "log_odds_ratio": -0.08131960034370422, "logits/chosen": 0.897510826587677, "logits/rejected": 0.9428852200508118, "logps/chosen": -2.3993935585021973, "logps/rejected": -6.026503086090088, "loss": 0.6043, "nll_loss": 0.596185028553009, "rewards/accuracies": 1.0, "rewards/chosen": -0.2399393618106842, "rewards/margins": 0.36271095275878906, "rewards/rejected": -0.6026502847671509, "step": 5272 }, { "epoch": 14.43668720054757, "grad_norm": 5.23940896987915, "learning_rate": 2.7808219178082194e-07, "log_odds_chosen": 2.7655069828033447, "log_odds_ratio": -0.10737328231334686, "logits/chosen": 0.952710747718811, "logits/rejected": 1.003204345703125, "logps/chosen": -2.250908374786377, "logps/rejected": -4.8874831199646, "loss": 0.642, "nll_loss": 0.6312916874885559, "rewards/accuracies": 1.0, "rewards/chosen": -0.22509083151817322, "rewards/margins": 0.26365748047828674, "rewards/rejected": -0.48874834179878235, "step": 5273 }, { "epoch": 14.439425051334702, "grad_norm": 6.562053680419922, "learning_rate": 2.7794520547945204e-07, "log_odds_chosen": 2.050943613052368, "log_odds_ratio": -0.27597400546073914, "logits/chosen": 0.743348240852356, "logits/rejected": 0.7015958428382874, "logps/chosen": -2.161179542541504, "logps/rejected": -4.062453746795654, "loss": 0.7412, "nll_loss": 0.7136350274085999, "rewards/accuracies": 0.875, "rewards/chosen": -0.2161179631948471, "rewards/margins": 0.1901274472475052, "rewards/rejected": -0.4062454104423523, "step": 5274 }, { "epoch": 14.442162902121835, "grad_norm": 5.544283866882324, "learning_rate": 2.778082191780822e-07, "log_odds_chosen": 3.2250730991363525, "log_odds_ratio": -0.07490034401416779, "logits/chosen": 0.8802920579910278, "logits/rejected": 0.856205940246582, "logps/chosen": -2.6189115047454834, "logps/rejected": -5.709218502044678, "loss": 0.737, "nll_loss": 0.7295275330543518, "rewards/accuracies": 1.0, "rewards/chosen": -0.2618911564350128, "rewards/margins": 0.30903077125549316, "rewards/rejected": -0.5709218978881836, "step": 5275 }, { "epoch": 14.444900752908966, "grad_norm": 7.045131683349609, "learning_rate": 2.776712328767123e-07, "log_odds_chosen": 1.1308969259262085, "log_odds_ratio": -0.5030330419540405, "logits/chosen": 0.8544726371765137, "logits/rejected": 0.8484017848968506, "logps/chosen": -2.5427050590515137, "logps/rejected": -3.5746943950653076, "loss": 0.6231, "nll_loss": 0.5727624893188477, "rewards/accuracies": 0.625, "rewards/chosen": -0.2542705237865448, "rewards/margins": 0.10319895297288895, "rewards/rejected": -0.35746943950653076, "step": 5276 }, { "epoch": 14.447638603696099, "grad_norm": 5.356829643249512, "learning_rate": 2.775342465753425e-07, "log_odds_chosen": 2.5140626430511475, "log_odds_ratio": -0.186796173453331, "logits/chosen": 1.0356615781784058, "logits/rejected": 1.088970422744751, "logps/chosen": -2.512880325317383, "logps/rejected": -4.914402008056641, "loss": 0.6865, "nll_loss": 0.6678193211555481, "rewards/accuracies": 1.0, "rewards/chosen": -0.2512880265712738, "rewards/margins": 0.24015220999717712, "rewards/rejected": -0.4914402365684509, "step": 5277 }, { "epoch": 14.45037645448323, "grad_norm": 6.0381760597229, "learning_rate": 2.773972602739726e-07, "log_odds_chosen": 3.206653118133545, "log_odds_ratio": -0.16310113668441772, "logits/chosen": 1.014458417892456, "logits/rejected": 1.023760437965393, "logps/chosen": -2.5596752166748047, "logps/rejected": -5.67848014831543, "loss": 0.8001, "nll_loss": 0.7837404012680054, "rewards/accuracies": 1.0, "rewards/chosen": -0.25596749782562256, "rewards/margins": 0.31188055872917175, "rewards/rejected": -0.5678480863571167, "step": 5278 }, { "epoch": 14.453114305270363, "grad_norm": 5.13228178024292, "learning_rate": 2.772602739726027e-07, "log_odds_chosen": 2.196700096130371, "log_odds_ratio": -0.28294092416763306, "logits/chosen": 1.0739774703979492, "logits/rejected": 1.1048583984375, "logps/chosen": -1.8577021360397339, "logps/rejected": -3.8730766773223877, "loss": 0.6443, "nll_loss": 0.6160070300102234, "rewards/accuracies": 0.75, "rewards/chosen": -0.18577024340629578, "rewards/margins": 0.20153741538524628, "rewards/rejected": -0.38730764389038086, "step": 5279 }, { "epoch": 14.455852156057494, "grad_norm": 5.2732133865356445, "learning_rate": 2.771232876712329e-07, "log_odds_chosen": 2.0817341804504395, "log_odds_ratio": -0.2832096219062805, "logits/chosen": 0.6031749844551086, "logits/rejected": 0.5757851600646973, "logps/chosen": -2.249741315841675, "logps/rejected": -4.199222564697266, "loss": 0.7421, "nll_loss": 0.7137330770492554, "rewards/accuracies": 0.875, "rewards/chosen": -0.224974125623703, "rewards/margins": 0.19494816660881042, "rewards/rejected": -0.4199222922325134, "step": 5280 }, { "epoch": 14.458590006844627, "grad_norm": 5.4569830894470215, "learning_rate": 2.76986301369863e-07, "log_odds_chosen": 3.2173876762390137, "log_odds_ratio": -0.17762590944766998, "logits/chosen": 0.8844740390777588, "logits/rejected": 0.8219834566116333, "logps/chosen": -1.9503388404846191, "logps/rejected": -5.04490852355957, "loss": 0.6542, "nll_loss": 0.6364257335662842, "rewards/accuracies": 1.0, "rewards/chosen": -0.19503389298915863, "rewards/margins": 0.3094569444656372, "rewards/rejected": -0.504490852355957, "step": 5281 }, { "epoch": 14.461327857631758, "grad_norm": 5.46932315826416, "learning_rate": 2.7684931506849315e-07, "log_odds_chosen": 3.131599187850952, "log_odds_ratio": -0.19207188487052917, "logits/chosen": 0.7651383876800537, "logits/rejected": 0.814053475856781, "logps/chosen": -1.9844214916229248, "logps/rejected": -4.967013835906982, "loss": 0.6017, "nll_loss": 0.5825396180152893, "rewards/accuracies": 1.0, "rewards/chosen": -0.19844216108322144, "rewards/margins": 0.2982592284679413, "rewards/rejected": -0.4967013895511627, "step": 5282 }, { "epoch": 14.464065708418891, "grad_norm": 5.559873104095459, "learning_rate": 2.7671232876712325e-07, "log_odds_chosen": 2.9981021881103516, "log_odds_ratio": -0.132292702794075, "logits/chosen": 0.6746141314506531, "logits/rejected": 0.6925615072250366, "logps/chosen": -1.7357470989227295, "logps/rejected": -4.540971755981445, "loss": 0.5333, "nll_loss": 0.5200487971305847, "rewards/accuracies": 1.0, "rewards/chosen": -0.17357473075389862, "rewards/margins": 0.2805224657058716, "rewards/rejected": -0.454097181558609, "step": 5283 }, { "epoch": 14.466803559206022, "grad_norm": 5.607818126678467, "learning_rate": 2.7657534246575345e-07, "log_odds_chosen": 1.7977795600891113, "log_odds_ratio": -0.31720128655433655, "logits/chosen": 0.8136105537414551, "logits/rejected": 0.889717698097229, "logps/chosen": -2.1311964988708496, "logps/rejected": -3.8153438568115234, "loss": 0.6151, "nll_loss": 0.5833646655082703, "rewards/accuracies": 0.875, "rewards/chosen": -0.21311964094638824, "rewards/margins": 0.16841474175453186, "rewards/rejected": -0.3815343976020813, "step": 5284 }, { "epoch": 14.469541409993155, "grad_norm": 6.127387523651123, "learning_rate": 2.7643835616438355e-07, "log_odds_chosen": 1.6212456226348877, "log_odds_ratio": -0.2931867837905884, "logits/chosen": 0.8074172735214233, "logits/rejected": 0.8314986824989319, "logps/chosen": -2.176801919937134, "logps/rejected": -3.690532684326172, "loss": 0.8433, "nll_loss": 0.8139979839324951, "rewards/accuracies": 0.875, "rewards/chosen": -0.2176801860332489, "rewards/margins": 0.15137310326099396, "rewards/rejected": -0.36905327439308167, "step": 5285 }, { "epoch": 14.472279260780287, "grad_norm": 4.713351726531982, "learning_rate": 2.7630136986301365e-07, "log_odds_chosen": 3.344180107116699, "log_odds_ratio": -0.25555768609046936, "logits/chosen": 1.0548871755599976, "logits/rejected": 1.0548624992370605, "logps/chosen": -2.1252951622009277, "logps/rejected": -5.391456604003906, "loss": 0.612, "nll_loss": 0.5864638090133667, "rewards/accuracies": 0.875, "rewards/chosen": -0.2125295102596283, "rewards/margins": 0.32661616802215576, "rewards/rejected": -0.5391457080841064, "step": 5286 }, { "epoch": 14.47501711156742, "grad_norm": 6.089685916900635, "learning_rate": 2.7616438356164385e-07, "log_odds_chosen": 1.625069499015808, "log_odds_ratio": -0.39828038215637207, "logits/chosen": 1.0960841178894043, "logits/rejected": 1.1400524377822876, "logps/chosen": -2.809900999069214, "logps/rejected": -4.380211353302002, "loss": 0.7088, "nll_loss": 0.6689627170562744, "rewards/accuracies": 0.75, "rewards/chosen": -0.2809900939464569, "rewards/margins": 0.15703104436397552, "rewards/rejected": -0.43802112340927124, "step": 5287 }, { "epoch": 14.477754962354553, "grad_norm": 5.098086357116699, "learning_rate": 2.7602739726027395e-07, "log_odds_chosen": 3.5570971965789795, "log_odds_ratio": -0.16909795999526978, "logits/chosen": 0.8659594058990479, "logits/rejected": 0.8915223479270935, "logps/chosen": -2.334707260131836, "logps/rejected": -5.788334369659424, "loss": 0.7146, "nll_loss": 0.697731614112854, "rewards/accuracies": 1.0, "rewards/chosen": -0.23347075283527374, "rewards/margins": 0.34536272287368774, "rewards/rejected": -0.5788334608078003, "step": 5288 }, { "epoch": 14.480492813141684, "grad_norm": 5.016388416290283, "learning_rate": 2.758904109589041e-07, "log_odds_chosen": 2.036982536315918, "log_odds_ratio": -0.19181951880455017, "logits/chosen": 1.142323613166809, "logits/rejected": 1.147118330001831, "logps/chosen": -1.686816692352295, "logps/rejected": -3.545318841934204, "loss": 0.4944, "nll_loss": 0.47524452209472656, "rewards/accuracies": 1.0, "rewards/chosen": -0.16868168115615845, "rewards/margins": 0.18585021793842316, "rewards/rejected": -0.3545318841934204, "step": 5289 }, { "epoch": 14.483230663928817, "grad_norm": 7.315682411193848, "learning_rate": 2.7575342465753426e-07, "log_odds_chosen": 2.138988971710205, "log_odds_ratio": -0.2799401581287384, "logits/chosen": 0.9641164541244507, "logits/rejected": 1.0262084007263184, "logps/chosen": -2.7506461143493652, "logps/rejected": -4.819752216339111, "loss": 0.6842, "nll_loss": 0.6562474966049194, "rewards/accuracies": 0.875, "rewards/chosen": -0.275064617395401, "rewards/margins": 0.2069106251001358, "rewards/rejected": -0.481975257396698, "step": 5290 }, { "epoch": 14.485968514715948, "grad_norm": 5.928196907043457, "learning_rate": 2.7561643835616436e-07, "log_odds_chosen": 2.351912498474121, "log_odds_ratio": -0.3052470088005066, "logits/chosen": 0.7831965684890747, "logits/rejected": 0.7824036478996277, "logps/chosen": -2.3035945892333984, "logps/rejected": -4.565109729766846, "loss": 0.6569, "nll_loss": 0.6263518929481506, "rewards/accuracies": 0.875, "rewards/chosen": -0.23035944998264313, "rewards/margins": 0.22615155577659607, "rewards/rejected": -0.4565110206604004, "step": 5291 }, { "epoch": 14.48870636550308, "grad_norm": 6.157619476318359, "learning_rate": 2.754794520547945e-07, "log_odds_chosen": 1.8651517629623413, "log_odds_ratio": -0.350776731967926, "logits/chosen": 0.9079044461250305, "logits/rejected": 0.9987471699714661, "logps/chosen": -2.7217204570770264, "logps/rejected": -4.4883832931518555, "loss": 0.6492, "nll_loss": 0.6141318082809448, "rewards/accuracies": 0.75, "rewards/chosen": -0.2721720337867737, "rewards/margins": 0.1766662746667862, "rewards/rejected": -0.44883835315704346, "step": 5292 }, { "epoch": 14.491444216290212, "grad_norm": 6.137630939483643, "learning_rate": 2.753424657534246e-07, "log_odds_chosen": 1.9430391788482666, "log_odds_ratio": -0.4472005367279053, "logits/chosen": 0.9200491905212402, "logits/rejected": 0.8545264601707458, "logps/chosen": -1.9985475540161133, "logps/rejected": -3.8056702613830566, "loss": 0.6882, "nll_loss": 0.6435098648071289, "rewards/accuracies": 0.875, "rewards/chosen": -0.1998547464609146, "rewards/margins": 0.1807122677564621, "rewards/rejected": -0.3805670142173767, "step": 5293 }, { "epoch": 14.494182067077345, "grad_norm": 5.047677516937256, "learning_rate": 2.752054794520548e-07, "log_odds_chosen": 3.281850814819336, "log_odds_ratio": -0.15583762526512146, "logits/chosen": 0.6731137633323669, "logits/rejected": 0.6129676699638367, "logps/chosen": -2.613030195236206, "logps/rejected": -5.8061299324035645, "loss": 0.8267, "nll_loss": 0.8111420273780823, "rewards/accuracies": 0.875, "rewards/chosen": -0.26130300760269165, "rewards/margins": 0.3193100094795227, "rewards/rejected": -0.5806130170822144, "step": 5294 }, { "epoch": 14.496919917864476, "grad_norm": 5.03503942489624, "learning_rate": 2.750684931506849e-07, "log_odds_chosen": 2.33132004737854, "log_odds_ratio": -0.18217048048973083, "logits/chosen": 0.6411918997764587, "logits/rejected": 0.7018373012542725, "logps/chosen": -2.1936631202697754, "logps/rejected": -4.386379241943359, "loss": 0.6973, "nll_loss": 0.6790808439254761, "rewards/accuracies": 1.0, "rewards/chosen": -0.21936634182929993, "rewards/margins": 0.21927158534526825, "rewards/rejected": -0.43863794207572937, "step": 5295 }, { "epoch": 14.499657768651609, "grad_norm": 5.6701884269714355, "learning_rate": 2.7493150684931506e-07, "log_odds_chosen": 1.3424631357192993, "log_odds_ratio": -0.28906726837158203, "logits/chosen": 0.8689904808998108, "logits/rejected": 0.8598986268043518, "logps/chosen": -1.831995964050293, "logps/rejected": -3.0456137657165527, "loss": 0.5602, "nll_loss": 0.5312500596046448, "rewards/accuracies": 1.0, "rewards/chosen": -0.18319961428642273, "rewards/margins": 0.12136177718639374, "rewards/rejected": -0.3045613765716553, "step": 5296 }, { "epoch": 14.50239561943874, "grad_norm": 6.605988502502441, "learning_rate": 2.747945205479452e-07, "log_odds_chosen": 2.115189790725708, "log_odds_ratio": -0.4623565673828125, "logits/chosen": 0.791856050491333, "logits/rejected": 0.8310856819152832, "logps/chosen": -2.406787395477295, "logps/rejected": -4.383758068084717, "loss": 0.7666, "nll_loss": 0.7203956842422485, "rewards/accuracies": 0.875, "rewards/chosen": -0.24067875742912292, "rewards/margins": 0.19769704341888428, "rewards/rejected": -0.4383758008480072, "step": 5297 }, { "epoch": 14.505133470225873, "grad_norm": 7.260969161987305, "learning_rate": 2.746575342465753e-07, "log_odds_chosen": 2.0196590423583984, "log_odds_ratio": -0.3847217261791229, "logits/chosen": 0.6722964644432068, "logits/rejected": 0.6190014481544495, "logps/chosen": -2.2794790267944336, "logps/rejected": -4.148216247558594, "loss": 0.755, "nll_loss": 0.7165148258209229, "rewards/accuracies": 0.875, "rewards/chosen": -0.2279479205608368, "rewards/margins": 0.18687370419502258, "rewards/rejected": -0.4148216247558594, "step": 5298 }, { "epoch": 14.507871321013004, "grad_norm": 4.849092960357666, "learning_rate": 2.7452054794520547e-07, "log_odds_chosen": 1.6592345237731934, "log_odds_ratio": -0.23793412744998932, "logits/chosen": 0.8754759430885315, "logits/rejected": 0.8837682604789734, "logps/chosen": -2.9873204231262207, "logps/rejected": -4.50814151763916, "loss": 0.6878, "nll_loss": 0.6639701724052429, "rewards/accuracies": 1.0, "rewards/chosen": -0.2987320125102997, "rewards/margins": 0.15208211541175842, "rewards/rejected": -0.4508141279220581, "step": 5299 }, { "epoch": 14.510609171800137, "grad_norm": 5.265275001525879, "learning_rate": 2.7438356164383557e-07, "log_odds_chosen": 1.693963885307312, "log_odds_ratio": -0.2621448040008545, "logits/chosen": 0.7998042702674866, "logits/rejected": 0.8346419334411621, "logps/chosen": -2.670408248901367, "logps/rejected": -4.296405792236328, "loss": 0.7918, "nll_loss": 0.7656322121620178, "rewards/accuracies": 0.875, "rewards/chosen": -0.26704084873199463, "rewards/margins": 0.16259974241256714, "rewards/rejected": -0.42964059114456177, "step": 5300 }, { "epoch": 14.513347022587268, "grad_norm": 4.739964962005615, "learning_rate": 2.7424657534246577e-07, "log_odds_chosen": 1.6801729202270508, "log_odds_ratio": -0.30370035767555237, "logits/chosen": 0.818953275680542, "logits/rejected": 0.9032158255577087, "logps/chosen": -2.145966053009033, "logps/rejected": -3.7661333084106445, "loss": 0.5803, "nll_loss": 0.5499166250228882, "rewards/accuracies": 0.75, "rewards/chosen": -0.21459659934043884, "rewards/margins": 0.16201676428318024, "rewards/rejected": -0.3766133487224579, "step": 5301 }, { "epoch": 14.516084873374401, "grad_norm": 6.983745574951172, "learning_rate": 2.7410958904109587e-07, "log_odds_chosen": 1.6274347305297852, "log_odds_ratio": -0.2777338922023773, "logits/chosen": 0.666077733039856, "logits/rejected": 0.7003815174102783, "logps/chosen": -2.150115728378296, "logps/rejected": -3.675877094268799, "loss": 0.7002, "nll_loss": 0.6723970770835876, "rewards/accuracies": 0.875, "rewards/chosen": -0.2150115668773651, "rewards/margins": 0.15257614850997925, "rewards/rejected": -0.367587685585022, "step": 5302 }, { "epoch": 14.518822724161533, "grad_norm": 10.756234169006348, "learning_rate": 2.73972602739726e-07, "log_odds_chosen": 1.5150495767593384, "log_odds_ratio": -0.27754268050193787, "logits/chosen": 0.7582777738571167, "logits/rejected": 0.8085752129554749, "logps/chosen": -2.2759523391723633, "logps/rejected": -3.6725473403930664, "loss": 0.5649, "nll_loss": 0.5371944904327393, "rewards/accuracies": 1.0, "rewards/chosen": -0.2275952398777008, "rewards/margins": 0.13965949416160583, "rewards/rejected": -0.36725473403930664, "step": 5303 }, { "epoch": 14.521560574948666, "grad_norm": 5.983609676361084, "learning_rate": 2.738356164383562e-07, "log_odds_chosen": 1.7181613445281982, "log_odds_ratio": -0.20282655954360962, "logits/chosen": 0.8818904757499695, "logits/rejected": 0.937557578086853, "logps/chosen": -2.152400493621826, "logps/rejected": -3.7735769748687744, "loss": 0.6542, "nll_loss": 0.6339559555053711, "rewards/accuracies": 1.0, "rewards/chosen": -0.21524006128311157, "rewards/margins": 0.1621176302433014, "rewards/rejected": -0.37735772132873535, "step": 5304 }, { "epoch": 14.524298425735797, "grad_norm": 5.3025712966918945, "learning_rate": 2.7369863013698627e-07, "log_odds_chosen": 2.226635694503784, "log_odds_ratio": -0.17880548536777496, "logits/chosen": 0.7453287243843079, "logits/rejected": 0.7457610368728638, "logps/chosen": -2.0879337787628174, "logps/rejected": -4.172770977020264, "loss": 0.5617, "nll_loss": 0.5438514351844788, "rewards/accuracies": 1.0, "rewards/chosen": -0.20879337191581726, "rewards/margins": 0.2084837555885315, "rewards/rejected": -0.41727709770202637, "step": 5305 }, { "epoch": 14.52703627652293, "grad_norm": 6.9061760902404785, "learning_rate": 2.735616438356164e-07, "log_odds_chosen": 1.8573886156082153, "log_odds_ratio": -0.32055312395095825, "logits/chosen": 0.8796791434288025, "logits/rejected": 0.9098140597343445, "logps/chosen": -2.643181324005127, "logps/rejected": -4.456740379333496, "loss": 0.734, "nll_loss": 0.7019836902618408, "rewards/accuracies": 0.875, "rewards/chosen": -0.26431816816329956, "rewards/margins": 0.18135592341423035, "rewards/rejected": -0.4456740617752075, "step": 5306 }, { "epoch": 14.529774127310061, "grad_norm": 5.912571430206299, "learning_rate": 2.734246575342465e-07, "log_odds_chosen": 2.359135627746582, "log_odds_ratio": -0.1450468897819519, "logits/chosen": 0.9133056402206421, "logits/rejected": 0.9649905562400818, "logps/chosen": -2.5861902236938477, "logps/rejected": -4.868488788604736, "loss": 0.6838, "nll_loss": 0.6693257689476013, "rewards/accuracies": 1.0, "rewards/chosen": -0.2586190104484558, "rewards/margins": 0.2282298505306244, "rewards/rejected": -0.4868488907814026, "step": 5307 }, { "epoch": 14.532511978097194, "grad_norm": 7.797422885894775, "learning_rate": 2.7328767123287673e-07, "log_odds_chosen": 2.3131425380706787, "log_odds_ratio": -0.5463523864746094, "logits/chosen": 0.8057337403297424, "logits/rejected": 0.772664487361908, "logps/chosen": -2.7520084381103516, "logps/rejected": -4.9890456199646, "loss": 0.7604, "nll_loss": 0.7057539820671082, "rewards/accuracies": 0.625, "rewards/chosen": -0.27520087361335754, "rewards/margins": 0.22370374202728271, "rewards/rejected": -0.49890458583831787, "step": 5308 }, { "epoch": 14.535249828884325, "grad_norm": 4.741346836090088, "learning_rate": 2.7315068493150683e-07, "log_odds_chosen": 2.569666862487793, "log_odds_ratio": -0.24654319882392883, "logits/chosen": 0.8798955678939819, "logits/rejected": 0.8860456943511963, "logps/chosen": -2.1025896072387695, "logps/rejected": -4.58210563659668, "loss": 0.6255, "nll_loss": 0.6008667945861816, "rewards/accuracies": 0.875, "rewards/chosen": -0.21025896072387695, "rewards/margins": 0.24795159697532654, "rewards/rejected": -0.4582105576992035, "step": 5309 }, { "epoch": 14.537987679671458, "grad_norm": 6.267358779907227, "learning_rate": 2.73013698630137e-07, "log_odds_chosen": 1.935387134552002, "log_odds_ratio": -0.3382655382156372, "logits/chosen": 0.8737452626228333, "logits/rejected": 0.7941009998321533, "logps/chosen": -2.875751495361328, "logps/rejected": -4.740312099456787, "loss": 0.7496, "nll_loss": 0.7158069610595703, "rewards/accuracies": 0.75, "rewards/chosen": -0.2875751852989197, "rewards/margins": 0.18645602464675903, "rewards/rejected": -0.4740311801433563, "step": 5310 }, { "epoch": 14.54072553045859, "grad_norm": 5.889821529388428, "learning_rate": 2.7287671232876713e-07, "log_odds_chosen": 2.6012632846832275, "log_odds_ratio": -0.39146241545677185, "logits/chosen": 0.7786194682121277, "logits/rejected": 0.7826774716377258, "logps/chosen": -2.1096909046173096, "logps/rejected": -4.608643531799316, "loss": 0.602, "nll_loss": 0.5629008412361145, "rewards/accuracies": 0.75, "rewards/chosen": -0.21096909046173096, "rewards/margins": 0.24989527463912964, "rewards/rejected": -0.4608643651008606, "step": 5311 }, { "epoch": 14.543463381245722, "grad_norm": 9.919326782226562, "learning_rate": 2.7273972602739723e-07, "log_odds_chosen": 3.9977312088012695, "log_odds_ratio": -0.2818189561367035, "logits/chosen": 0.7845146656036377, "logits/rejected": 0.7073110342025757, "logps/chosen": -2.40531325340271, "logps/rejected": -6.273324012756348, "loss": 0.8648, "nll_loss": 0.836661159992218, "rewards/accuracies": 0.875, "rewards/chosen": -0.2405313104391098, "rewards/margins": 0.3868011236190796, "rewards/rejected": -0.6273324489593506, "step": 5312 }, { "epoch": 14.546201232032853, "grad_norm": 4.592273235321045, "learning_rate": 2.726027397260274e-07, "log_odds_chosen": 2.573091745376587, "log_odds_ratio": -0.12010356038808823, "logits/chosen": 0.7481197118759155, "logits/rejected": 0.7897520661354065, "logps/chosen": -2.440404176712036, "logps/rejected": -4.8986358642578125, "loss": 0.7214, "nll_loss": 0.7093825340270996, "rewards/accuracies": 1.0, "rewards/chosen": -0.24404042959213257, "rewards/margins": 0.2458232045173645, "rewards/rejected": -0.48986363410949707, "step": 5313 }, { "epoch": 14.548939082819986, "grad_norm": 6.706703186035156, "learning_rate": 2.724657534246575e-07, "log_odds_chosen": 1.9164754152297974, "log_odds_ratio": -0.6205762624740601, "logits/chosen": 0.8830047845840454, "logits/rejected": 0.909827709197998, "logps/chosen": -2.906568765640259, "logps/rejected": -4.722033500671387, "loss": 0.7797, "nll_loss": 0.7176409959793091, "rewards/accuracies": 0.875, "rewards/chosen": -0.2906568944454193, "rewards/margins": 0.18154647946357727, "rewards/rejected": -0.4722033739089966, "step": 5314 }, { "epoch": 14.55167693360712, "grad_norm": 5.162522792816162, "learning_rate": 2.723287671232877e-07, "log_odds_chosen": 2.1944470405578613, "log_odds_ratio": -0.17857415974140167, "logits/chosen": 0.6061108112335205, "logits/rejected": 0.611730694770813, "logps/chosen": -1.7973941564559937, "logps/rejected": -3.814241886138916, "loss": 0.5084, "nll_loss": 0.49054789543151855, "rewards/accuracies": 1.0, "rewards/chosen": -0.17973941564559937, "rewards/margins": 0.20168477296829224, "rewards/rejected": -0.3814241886138916, "step": 5315 }, { "epoch": 14.55441478439425, "grad_norm": 5.3880157470703125, "learning_rate": 2.721917808219178e-07, "log_odds_chosen": 2.502354383468628, "log_odds_ratio": -0.22151459753513336, "logits/chosen": 0.837653636932373, "logits/rejected": 0.9263117909431458, "logps/chosen": -2.8094520568847656, "logps/rejected": -5.257530212402344, "loss": 0.8768, "nll_loss": 0.8546098470687866, "rewards/accuracies": 0.875, "rewards/chosen": -0.28094518184661865, "rewards/margins": 0.24480783939361572, "rewards/rejected": -0.5257530212402344, "step": 5316 }, { "epoch": 14.557152635181383, "grad_norm": 11.090192794799805, "learning_rate": 2.7205479452054794e-07, "log_odds_chosen": 2.002777099609375, "log_odds_ratio": -0.251360684633255, "logits/chosen": 0.8976271748542786, "logits/rejected": 0.893291711807251, "logps/chosen": -2.7424471378326416, "logps/rejected": -4.634852886199951, "loss": 0.7033, "nll_loss": 0.678166925907135, "rewards/accuracies": 0.875, "rewards/chosen": -0.2742446959018707, "rewards/margins": 0.18924058973789215, "rewards/rejected": -0.4634853005409241, "step": 5317 }, { "epoch": 14.559890485968515, "grad_norm": 4.93467378616333, "learning_rate": 2.719178082191781e-07, "log_odds_chosen": 2.8144962787628174, "log_odds_ratio": -0.1561155468225479, "logits/chosen": 0.8242335915565491, "logits/rejected": 0.8984805345535278, "logps/chosen": -2.4766948223114014, "logps/rejected": -5.187661647796631, "loss": 0.657, "nll_loss": 0.6414113640785217, "rewards/accuracies": 1.0, "rewards/chosen": -0.24766948819160461, "rewards/margins": 0.27109667658805847, "rewards/rejected": -0.5187661647796631, "step": 5318 }, { "epoch": 14.562628336755647, "grad_norm": 5.748514175415039, "learning_rate": 2.717808219178082e-07, "log_odds_chosen": 1.967987060546875, "log_odds_ratio": -0.317839652299881, "logits/chosen": 0.9593427181243896, "logits/rejected": 0.9422122240066528, "logps/chosen": -2.297518253326416, "logps/rejected": -4.202546119689941, "loss": 0.6593, "nll_loss": 0.627480685710907, "rewards/accuracies": 0.75, "rewards/chosen": -0.2297517955303192, "rewards/margins": 0.1905028223991394, "rewards/rejected": -0.420254647731781, "step": 5319 }, { "epoch": 14.565366187542779, "grad_norm": 4.886826992034912, "learning_rate": 2.7164383561643834e-07, "log_odds_chosen": 1.582838773727417, "log_odds_ratio": -0.2527446150779724, "logits/chosen": 0.6097649931907654, "logits/rejected": 0.6200417876243591, "logps/chosen": -2.020087242126465, "logps/rejected": -3.4844493865966797, "loss": 0.6381, "nll_loss": 0.6128662824630737, "rewards/accuracies": 1.0, "rewards/chosen": -0.20200872421264648, "rewards/margins": 0.14643622934818268, "rewards/rejected": -0.34844493865966797, "step": 5320 }, { "epoch": 14.568104038329912, "grad_norm": 4.7537031173706055, "learning_rate": 2.7150684931506844e-07, "log_odds_chosen": 2.5706915855407715, "log_odds_ratio": -0.2074877917766571, "logits/chosen": 0.7647756934165955, "logits/rejected": 0.7342515587806702, "logps/chosen": -2.3717052936553955, "logps/rejected": -4.843780040740967, "loss": 0.6769, "nll_loss": 0.656141459941864, "rewards/accuracies": 0.875, "rewards/chosen": -0.23717054724693298, "rewards/margins": 0.24720749258995056, "rewards/rejected": -0.48437803983688354, "step": 5321 }, { "epoch": 14.570841889117043, "grad_norm": 5.888593673706055, "learning_rate": 2.7136986301369865e-07, "log_odds_chosen": 1.5538411140441895, "log_odds_ratio": -0.2382899522781372, "logits/chosen": 0.7932324409484863, "logits/rejected": 0.8212940096855164, "logps/chosen": -1.693377137184143, "logps/rejected": -3.079592704772949, "loss": 0.6552, "nll_loss": 0.6313899755477905, "rewards/accuracies": 1.0, "rewards/chosen": -0.16933771967887878, "rewards/margins": 0.13862156867980957, "rewards/rejected": -0.30795928835868835, "step": 5322 }, { "epoch": 14.573579739904176, "grad_norm": 4.851619720458984, "learning_rate": 2.7123287671232875e-07, "log_odds_chosen": 3.1851096153259277, "log_odds_ratio": -0.11667422205209732, "logits/chosen": 0.8928098082542419, "logits/rejected": 0.978305459022522, "logps/chosen": -1.878021001815796, "logps/rejected": -4.85143518447876, "loss": 0.744, "nll_loss": 0.732379674911499, "rewards/accuracies": 1.0, "rewards/chosen": -0.18780210614204407, "rewards/margins": 0.29734140634536743, "rewards/rejected": -0.4851435422897339, "step": 5323 }, { "epoch": 14.576317590691307, "grad_norm": 4.8343610763549805, "learning_rate": 2.710958904109589e-07, "log_odds_chosen": 2.902492046356201, "log_odds_ratio": -0.15423664450645447, "logits/chosen": 0.7390692234039307, "logits/rejected": 0.7793212532997131, "logps/chosen": -1.3843815326690674, "logps/rejected": -3.9937148094177246, "loss": 0.5614, "nll_loss": 0.5459983944892883, "rewards/accuracies": 1.0, "rewards/chosen": -0.1384381502866745, "rewards/margins": 0.2609333395957947, "rewards/rejected": -0.39937150478363037, "step": 5324 }, { "epoch": 14.57905544147844, "grad_norm": 9.546680450439453, "learning_rate": 2.7095890410958905e-07, "log_odds_chosen": 0.6766879558563232, "log_odds_ratio": -0.6505738496780396, "logits/chosen": 0.8865447640419006, "logits/rejected": 0.8423405885696411, "logps/chosen": -2.2524101734161377, "logps/rejected": -2.814894914627075, "loss": 0.6492, "nll_loss": 0.5841329097747803, "rewards/accuracies": 0.875, "rewards/chosen": -0.22524100542068481, "rewards/margins": 0.056248486042022705, "rewards/rejected": -0.2814894914627075, "step": 5325 }, { "epoch": 14.581793292265571, "grad_norm": 5.637298583984375, "learning_rate": 2.7082191780821915e-07, "log_odds_chosen": 1.883197546005249, "log_odds_ratio": -0.2820476293563843, "logits/chosen": 1.0407121181488037, "logits/rejected": 1.120154619216919, "logps/chosen": -2.5712881088256836, "logps/rejected": -4.369421482086182, "loss": 0.6457, "nll_loss": 0.6175183653831482, "rewards/accuracies": 0.875, "rewards/chosen": -0.2571288049221039, "rewards/margins": 0.17981332540512085, "rewards/rejected": -0.43694213032722473, "step": 5326 }, { "epoch": 14.584531143052704, "grad_norm": 5.249849796295166, "learning_rate": 2.706849315068493e-07, "log_odds_chosen": 1.3183887004852295, "log_odds_ratio": -0.3637109696865082, "logits/chosen": 0.7803105115890503, "logits/rejected": 0.8767368197441101, "logps/chosen": -2.140883684158325, "logps/rejected": -3.2473278045654297, "loss": 0.7155, "nll_loss": 0.6791596412658691, "rewards/accuracies": 0.875, "rewards/chosen": -0.21408836543560028, "rewards/margins": 0.11064444482326508, "rewards/rejected": -0.32473281025886536, "step": 5327 }, { "epoch": 14.587268993839835, "grad_norm": 4.2255539894104, "learning_rate": 2.7054794520547945e-07, "log_odds_chosen": 3.5528206825256348, "log_odds_ratio": -0.06822246313095093, "logits/chosen": 0.7653865218162537, "logits/rejected": 0.8488388657569885, "logps/chosen": -2.069035768508911, "logps/rejected": -5.42836856842041, "loss": 0.723, "nll_loss": 0.7161688208580017, "rewards/accuracies": 1.0, "rewards/chosen": -0.2069035917520523, "rewards/margins": 0.33593323826789856, "rewards/rejected": -0.5428367853164673, "step": 5328 }, { "epoch": 14.590006844626968, "grad_norm": 4.610962390899658, "learning_rate": 2.704109589041096e-07, "log_odds_chosen": 3.764230251312256, "log_odds_ratio": -0.17245574295520782, "logits/chosen": 0.8042960166931152, "logits/rejected": 0.810743510723114, "logps/chosen": -2.0605735778808594, "logps/rejected": -5.6893486976623535, "loss": 0.6299, "nll_loss": 0.6126651763916016, "rewards/accuracies": 1.0, "rewards/chosen": -0.2060573548078537, "rewards/margins": 0.3628775477409363, "rewards/rejected": -0.5689349174499512, "step": 5329 }, { "epoch": 14.5927446954141, "grad_norm": 5.84597110748291, "learning_rate": 2.702739726027397e-07, "log_odds_chosen": 1.959202766418457, "log_odds_ratio": -0.42668616771698, "logits/chosen": 0.8537077903747559, "logits/rejected": 0.8262380361557007, "logps/chosen": -2.301541566848755, "logps/rejected": -4.154087066650391, "loss": 0.6278, "nll_loss": 0.5851749181747437, "rewards/accuracies": 0.875, "rewards/chosen": -0.2301541566848755, "rewards/margins": 0.18525457382202148, "rewards/rejected": -0.415408730506897, "step": 5330 }, { "epoch": 14.595482546201232, "grad_norm": 4.897413730621338, "learning_rate": 2.701369863013698e-07, "log_odds_chosen": 1.627324104309082, "log_odds_ratio": -0.27360740303993225, "logits/chosen": 0.8208063840866089, "logits/rejected": 0.8554930090904236, "logps/chosen": -1.4789025783538818, "logps/rejected": -2.8975605964660645, "loss": 0.4972, "nll_loss": 0.4698023498058319, "rewards/accuracies": 0.875, "rewards/chosen": -0.14789025485515594, "rewards/margins": 0.1418658047914505, "rewards/rejected": -0.28975605964660645, "step": 5331 }, { "epoch": 14.598220396988363, "grad_norm": 5.713990211486816, "learning_rate": 2.7e-07, "log_odds_chosen": 2.466913938522339, "log_odds_ratio": -0.22876369953155518, "logits/chosen": 0.9954599142074585, "logits/rejected": 1.031428575515747, "logps/chosen": -2.4702258110046387, "logps/rejected": -4.722207546234131, "loss": 0.7406, "nll_loss": 0.7177119851112366, "rewards/accuracies": 0.875, "rewards/chosen": -0.2470225691795349, "rewards/margins": 0.22519820928573608, "rewards/rejected": -0.4722208082675934, "step": 5332 }, { "epoch": 14.600958247775496, "grad_norm": 5.824213981628418, "learning_rate": 2.698630136986301e-07, "log_odds_chosen": 1.364998459815979, "log_odds_ratio": -0.282757043838501, "logits/chosen": 1.069164514541626, "logits/rejected": 0.9252328872680664, "logps/chosen": -1.599375605583191, "logps/rejected": -2.789125442504883, "loss": 0.5843, "nll_loss": 0.5559949278831482, "rewards/accuracies": 1.0, "rewards/chosen": -0.15993757545948029, "rewards/margins": 0.118974968791008, "rewards/rejected": -0.2789125442504883, "step": 5333 }, { "epoch": 14.603696098562628, "grad_norm": 6.539890766143799, "learning_rate": 2.6972602739726026e-07, "log_odds_chosen": 1.7715836763381958, "log_odds_ratio": -0.3095235526561737, "logits/chosen": 0.934650719165802, "logits/rejected": 1.005678415298462, "logps/chosen": -2.2648043632507324, "logps/rejected": -3.9416329860687256, "loss": 0.6077, "nll_loss": 0.576725423336029, "rewards/accuracies": 0.875, "rewards/chosen": -0.2264804244041443, "rewards/margins": 0.16768288612365723, "rewards/rejected": -0.3941633105278015, "step": 5334 }, { "epoch": 14.60643394934976, "grad_norm": 4.513796806335449, "learning_rate": 2.695890410958904e-07, "log_odds_chosen": 2.6235766410827637, "log_odds_ratio": -0.1735086441040039, "logits/chosen": 0.7743277549743652, "logits/rejected": 0.8139594793319702, "logps/chosen": -2.3772635459899902, "logps/rejected": -4.901553630828857, "loss": 0.6388, "nll_loss": 0.6214451193809509, "rewards/accuracies": 1.0, "rewards/chosen": -0.2377263605594635, "rewards/margins": 0.2524290084838867, "rewards/rejected": -0.4901553988456726, "step": 5335 }, { "epoch": 14.609171800136892, "grad_norm": 5.004255294799805, "learning_rate": 2.6945205479452056e-07, "log_odds_chosen": 2.547466278076172, "log_odds_ratio": -0.17700476944446564, "logits/chosen": 0.79944908618927, "logits/rejected": 0.81687992811203, "logps/chosen": -1.5511302947998047, "logps/rejected": -3.8491029739379883, "loss": 0.509, "nll_loss": 0.4913092851638794, "rewards/accuracies": 0.875, "rewards/chosen": -0.15511304140090942, "rewards/margins": 0.22979725897312164, "rewards/rejected": -0.38491031527519226, "step": 5336 }, { "epoch": 14.611909650924025, "grad_norm": 4.8687663078308105, "learning_rate": 2.6931506849315066e-07, "log_odds_chosen": 2.144115447998047, "log_odds_ratio": -0.2998999357223511, "logits/chosen": 0.7909021377563477, "logits/rejected": 0.8121187686920166, "logps/chosen": -2.2235920429229736, "logps/rejected": -4.28661584854126, "loss": 0.6445, "nll_loss": 0.6145309805870056, "rewards/accuracies": 0.875, "rewards/chosen": -0.22235921025276184, "rewards/margins": 0.20630237460136414, "rewards/rejected": -0.428661584854126, "step": 5337 }, { "epoch": 14.614647501711158, "grad_norm": 6.229548931121826, "learning_rate": 2.6917808219178076e-07, "log_odds_chosen": 2.414687156677246, "log_odds_ratio": -0.27481284737586975, "logits/chosen": 1.0217616558074951, "logits/rejected": 1.0939284563064575, "logps/chosen": -2.8715639114379883, "logps/rejected": -5.194650650024414, "loss": 0.752, "nll_loss": 0.7244868278503418, "rewards/accuracies": 0.875, "rewards/chosen": -0.28715643286705017, "rewards/margins": 0.23230865597724915, "rewards/rejected": -0.5194650888442993, "step": 5338 }, { "epoch": 14.617385352498289, "grad_norm": 5.774407863616943, "learning_rate": 2.6904109589041097e-07, "log_odds_chosen": 1.0732121467590332, "log_odds_ratio": -0.3764782249927521, "logits/chosen": 0.8533675670623779, "logits/rejected": 0.9320322275161743, "logps/chosen": -1.975032091140747, "logps/rejected": -2.918955087661743, "loss": 0.6378, "nll_loss": 0.6001920700073242, "rewards/accuracies": 0.875, "rewards/chosen": -0.1975031942129135, "rewards/margins": 0.09439229965209961, "rewards/rejected": -0.2918955087661743, "step": 5339 }, { "epoch": 14.62012320328542, "grad_norm": 6.607381820678711, "learning_rate": 2.6890410958904107e-07, "log_odds_chosen": 1.7928922176361084, "log_odds_ratio": -0.5424584150314331, "logits/chosen": 0.729749858379364, "logits/rejected": 0.7314713001251221, "logps/chosen": -2.46535062789917, "logps/rejected": -4.142647743225098, "loss": 0.7422, "nll_loss": 0.6879370212554932, "rewards/accuracies": 0.875, "rewards/chosen": -0.2465350478887558, "rewards/margins": 0.16772973537445068, "rewards/rejected": -0.4142647385597229, "step": 5340 }, { "epoch": 14.622861054072553, "grad_norm": 6.446445941925049, "learning_rate": 2.687671232876712e-07, "log_odds_chosen": 4.147652626037598, "log_odds_ratio": -0.3127329349517822, "logits/chosen": 1.053537130355835, "logits/rejected": 1.0914255380630493, "logps/chosen": -2.6054534912109375, "logps/rejected": -6.7269721031188965, "loss": 0.7274, "nll_loss": 0.6961298584938049, "rewards/accuracies": 0.875, "rewards/chosen": -0.26054537296295166, "rewards/margins": 0.41215187311172485, "rewards/rejected": -0.6726971864700317, "step": 5341 }, { "epoch": 14.625598904859686, "grad_norm": 5.900031089782715, "learning_rate": 2.6863013698630137e-07, "log_odds_chosen": 2.2954695224761963, "log_odds_ratio": -0.14525407552719116, "logits/chosen": 0.9386669397354126, "logits/rejected": 0.8272875547409058, "logps/chosen": -2.1224396228790283, "logps/rejected": -4.287518501281738, "loss": 0.69, "nll_loss": 0.6754619479179382, "rewards/accuracies": 1.0, "rewards/chosen": -0.2122439593076706, "rewards/margins": 0.21650785207748413, "rewards/rejected": -0.4287518262863159, "step": 5342 }, { "epoch": 14.628336755646817, "grad_norm": 5.7189459800720215, "learning_rate": 2.684931506849315e-07, "log_odds_chosen": 2.8538079261779785, "log_odds_ratio": -0.21953082084655762, "logits/chosen": 1.0939083099365234, "logits/rejected": 1.147298812866211, "logps/chosen": -2.3747920989990234, "logps/rejected": -5.159150123596191, "loss": 0.7412, "nll_loss": 0.7192603349685669, "rewards/accuracies": 0.875, "rewards/chosen": -0.23747923970222473, "rewards/margins": 0.27843573689460754, "rewards/rejected": -0.5159149765968323, "step": 5343 }, { "epoch": 14.63107460643395, "grad_norm": 5.749507904052734, "learning_rate": 2.683561643835616e-07, "log_odds_chosen": 1.6927859783172607, "log_odds_ratio": -0.37543871998786926, "logits/chosen": 0.8434768319129944, "logits/rejected": 0.8877490758895874, "logps/chosen": -1.9432570934295654, "logps/rejected": -3.362381935119629, "loss": 0.5567, "nll_loss": 0.5191537737846375, "rewards/accuracies": 0.875, "rewards/chosen": -0.19432571530342102, "rewards/margins": 0.1419125199317932, "rewards/rejected": -0.33623823523521423, "step": 5344 }, { "epoch": 14.633812457221081, "grad_norm": 6.548123359680176, "learning_rate": 2.682191780821917e-07, "log_odds_chosen": 2.37294340133667, "log_odds_ratio": -0.20492276549339294, "logits/chosen": 0.9045608043670654, "logits/rejected": 0.9144113063812256, "logps/chosen": -2.7325279712677, "logps/rejected": -5.009826183319092, "loss": 0.7977, "nll_loss": 0.7772385478019714, "rewards/accuracies": 1.0, "rewards/chosen": -0.27325281500816345, "rewards/margins": 0.22772979736328125, "rewards/rejected": -0.5009825825691223, "step": 5345 }, { "epoch": 14.636550308008214, "grad_norm": 6.854866027832031, "learning_rate": 2.680821917808219e-07, "log_odds_chosen": 4.171530723571777, "log_odds_ratio": -0.09466706216335297, "logits/chosen": 0.9342145919799805, "logits/rejected": 0.9880244135856628, "logps/chosen": -2.3046576976776123, "logps/rejected": -6.35939884185791, "loss": 0.7695, "nll_loss": 0.7600134611129761, "rewards/accuracies": 1.0, "rewards/chosen": -0.23046578466892242, "rewards/margins": 0.40547406673431396, "rewards/rejected": -0.6359398365020752, "step": 5346 }, { "epoch": 14.639288158795345, "grad_norm": 7.957022190093994, "learning_rate": 2.67945205479452e-07, "log_odds_chosen": 0.9015921354293823, "log_odds_ratio": -0.5566385984420776, "logits/chosen": 1.088592529296875, "logits/rejected": 1.1292164325714111, "logps/chosen": -2.3923182487487793, "logps/rejected": -3.2047781944274902, "loss": 0.67, "nll_loss": 0.6143375039100647, "rewards/accuracies": 0.625, "rewards/chosen": -0.23923182487487793, "rewards/margins": 0.08124600350856781, "rewards/rejected": -0.32047781348228455, "step": 5347 }, { "epoch": 14.642026009582478, "grad_norm": 6.482845783233643, "learning_rate": 2.678082191780822e-07, "log_odds_chosen": 2.2924063205718994, "log_odds_ratio": -0.24562117457389832, "logits/chosen": 0.9371333122253418, "logits/rejected": 0.9358992576599121, "logps/chosen": -2.3018879890441895, "logps/rejected": -4.478218078613281, "loss": 0.6756, "nll_loss": 0.6510591506958008, "rewards/accuracies": 1.0, "rewards/chosen": -0.23018884658813477, "rewards/margins": 0.2176329642534256, "rewards/rejected": -0.44782179594039917, "step": 5348 }, { "epoch": 14.64476386036961, "grad_norm": 4.771328926086426, "learning_rate": 2.6767123287671233e-07, "log_odds_chosen": 0.7206523418426514, "log_odds_ratio": -0.48136091232299805, "logits/chosen": 0.8004050254821777, "logits/rejected": 0.8240123391151428, "logps/chosen": -2.5720252990722656, "logps/rejected": -3.2366018295288086, "loss": 0.6609, "nll_loss": 0.6128100156784058, "rewards/accuracies": 0.75, "rewards/chosen": -0.25720250606536865, "rewards/margins": 0.06645768135786057, "rewards/rejected": -0.3236601948738098, "step": 5349 }, { "epoch": 14.647501711156742, "grad_norm": 4.8080267906188965, "learning_rate": 2.675342465753425e-07, "log_odds_chosen": 3.6030020713806152, "log_odds_ratio": -0.11105522513389587, "logits/chosen": 0.7697146534919739, "logits/rejected": 0.7785613536834717, "logps/chosen": -2.1673946380615234, "logps/rejected": -5.633197784423828, "loss": 0.5886, "nll_loss": 0.5775025486946106, "rewards/accuracies": 1.0, "rewards/chosen": -0.2167394608259201, "rewards/margins": 0.3465803563594818, "rewards/rejected": -0.5633198618888855, "step": 5350 }, { "epoch": 14.650239561943874, "grad_norm": 5.431736469268799, "learning_rate": 2.673972602739726e-07, "log_odds_chosen": 2.1572208404541016, "log_odds_ratio": -0.21855774521827698, "logits/chosen": 0.7782889008522034, "logits/rejected": 0.7964038252830505, "logps/chosen": -2.4939050674438477, "logps/rejected": -4.574020862579346, "loss": 0.7239, "nll_loss": 0.7020679712295532, "rewards/accuracies": 1.0, "rewards/chosen": -0.24939048290252686, "rewards/margins": 0.20801159739494324, "rewards/rejected": -0.4574021100997925, "step": 5351 }, { "epoch": 14.652977412731007, "grad_norm": 6.268148899078369, "learning_rate": 2.672602739726027e-07, "log_odds_chosen": 1.018023133277893, "log_odds_ratio": -0.3919883668422699, "logits/chosen": 0.8410962820053101, "logits/rejected": 0.904455304145813, "logps/chosen": -2.6709206104278564, "logps/rejected": -3.597536087036133, "loss": 0.6752, "nll_loss": 0.6360490322113037, "rewards/accuracies": 0.875, "rewards/chosen": -0.2670920789241791, "rewards/margins": 0.0926615372300148, "rewards/rejected": -0.3597536087036133, "step": 5352 }, { "epoch": 14.655715263518138, "grad_norm": 5.871987819671631, "learning_rate": 2.671232876712329e-07, "log_odds_chosen": 0.8600527048110962, "log_odds_ratio": -0.5403236746788025, "logits/chosen": 0.7466543912887573, "logits/rejected": 0.6976364254951477, "logps/chosen": -2.170851945877075, "logps/rejected": -2.957674264907837, "loss": 0.7232, "nll_loss": 0.6691296100616455, "rewards/accuracies": 0.75, "rewards/chosen": -0.21708518266677856, "rewards/margins": 0.07868224382400513, "rewards/rejected": -0.2957674264907837, "step": 5353 }, { "epoch": 14.65845311430527, "grad_norm": 9.830772399902344, "learning_rate": 2.66986301369863e-07, "log_odds_chosen": 1.8279087543487549, "log_odds_ratio": -0.3926880955696106, "logits/chosen": 0.7309879064559937, "logits/rejected": 0.7241722345352173, "logps/chosen": -2.7118723392486572, "logps/rejected": -4.4292755126953125, "loss": 0.8173, "nll_loss": 0.7780259847640991, "rewards/accuracies": 0.875, "rewards/chosen": -0.2711872458457947, "rewards/margins": 0.17174032330513, "rewards/rejected": -0.4429275393486023, "step": 5354 }, { "epoch": 14.661190965092402, "grad_norm": 5.1372294425964355, "learning_rate": 2.6684931506849314e-07, "log_odds_chosen": 1.8149287700653076, "log_odds_ratio": -0.32944339513778687, "logits/chosen": 0.7481395602226257, "logits/rejected": 0.761398196220398, "logps/chosen": -1.631690263748169, "logps/rejected": -3.2715630531311035, "loss": 0.6109, "nll_loss": 0.5779469013214111, "rewards/accuracies": 0.875, "rewards/chosen": -0.1631690412759781, "rewards/margins": 0.16398726403713226, "rewards/rejected": -0.32715630531311035, "step": 5355 }, { "epoch": 14.663928815879535, "grad_norm": 6.845120906829834, "learning_rate": 2.667123287671233e-07, "log_odds_chosen": 1.2167500257492065, "log_odds_ratio": -0.3577803671360016, "logits/chosen": 0.5880244374275208, "logits/rejected": 0.5655478835105896, "logps/chosen": -2.3796072006225586, "logps/rejected": -3.4845774173736572, "loss": 0.6967, "nll_loss": 0.660879909992218, "rewards/accuracies": 0.75, "rewards/chosen": -0.23796072602272034, "rewards/margins": 0.11049701273441315, "rewards/rejected": -0.3484577536582947, "step": 5356 }, { "epoch": 14.666666666666666, "grad_norm": 5.945518970489502, "learning_rate": 2.6657534246575344e-07, "log_odds_chosen": 2.5712430477142334, "log_odds_ratio": -0.2198338657617569, "logits/chosen": 0.9645117521286011, "logits/rejected": 1.0001671314239502, "logps/chosen": -2.691458225250244, "logps/rejected": -5.192526817321777, "loss": 0.7151, "nll_loss": 0.6931408643722534, "rewards/accuracies": 1.0, "rewards/chosen": -0.2691458463668823, "rewards/margins": 0.2501068413257599, "rewards/rejected": -0.5192526578903198, "step": 5357 }, { "epoch": 14.669404517453799, "grad_norm": 4.987655162811279, "learning_rate": 2.6643835616438354e-07, "log_odds_chosen": 1.96846604347229, "log_odds_ratio": -0.34234923124313354, "logits/chosen": 0.7622352838516235, "logits/rejected": 0.7587956190109253, "logps/chosen": -2.222994565963745, "logps/rejected": -4.081411838531494, "loss": 0.5956, "nll_loss": 0.5613382458686829, "rewards/accuracies": 0.75, "rewards/chosen": -0.2222994565963745, "rewards/margins": 0.18584170937538147, "rewards/rejected": -0.40814119577407837, "step": 5358 }, { "epoch": 14.67214236824093, "grad_norm": 5.168966293334961, "learning_rate": 2.663013698630137e-07, "log_odds_chosen": 2.5875191688537598, "log_odds_ratio": -0.2891753911972046, "logits/chosen": 0.7239742875099182, "logits/rejected": 0.7644045948982239, "logps/chosen": -2.4893651008605957, "logps/rejected": -5.004706382751465, "loss": 0.7404, "nll_loss": 0.7115143537521362, "rewards/accuracies": 0.875, "rewards/chosen": -0.2489365041255951, "rewards/margins": 0.251534104347229, "rewards/rejected": -0.5004706382751465, "step": 5359 }, { "epoch": 14.674880219028063, "grad_norm": 5.469573497772217, "learning_rate": 2.6616438356164384e-07, "log_odds_chosen": 2.3073298931121826, "log_odds_ratio": -0.21217484772205353, "logits/chosen": 0.7320568561553955, "logits/rejected": 0.7583060264587402, "logps/chosen": -1.8990488052368164, "logps/rejected": -4.047645568847656, "loss": 0.6791, "nll_loss": 0.6578922867774963, "rewards/accuracies": 1.0, "rewards/chosen": -0.18990489840507507, "rewards/margins": 0.21485967934131622, "rewards/rejected": -0.4047645926475525, "step": 5360 }, { "epoch": 14.677618069815194, "grad_norm": 9.907182693481445, "learning_rate": 2.6602739726027394e-07, "log_odds_chosen": 2.5142595767974854, "log_odds_ratio": -0.4269520342350006, "logits/chosen": 0.7585995197296143, "logits/rejected": 0.7867993116378784, "logps/chosen": -2.423396587371826, "logps/rejected": -4.8679351806640625, "loss": 0.7043, "nll_loss": 0.661653995513916, "rewards/accuracies": 0.875, "rewards/chosen": -0.24233965575695038, "rewards/margins": 0.24445384740829468, "rewards/rejected": -0.48679354786872864, "step": 5361 }, { "epoch": 14.680355920602327, "grad_norm": 6.3029069900512695, "learning_rate": 2.658904109589041e-07, "log_odds_chosen": 2.256756544113159, "log_odds_ratio": -0.1907896101474762, "logits/chosen": 0.7023502588272095, "logits/rejected": 0.7094371318817139, "logps/chosen": -2.436534881591797, "logps/rejected": -4.524388313293457, "loss": 0.6147, "nll_loss": 0.595634400844574, "rewards/accuracies": 1.0, "rewards/chosen": -0.24365349113941193, "rewards/margins": 0.20878534018993378, "rewards/rejected": -0.4524388313293457, "step": 5362 }, { "epoch": 14.683093771389458, "grad_norm": 5.686381816864014, "learning_rate": 2.6575342465753425e-07, "log_odds_chosen": 1.3367071151733398, "log_odds_ratio": -0.3383964002132416, "logits/chosen": 0.8884412050247192, "logits/rejected": 0.9405790567398071, "logps/chosen": -2.1746883392333984, "logps/rejected": -3.4196577072143555, "loss": 0.5758, "nll_loss": 0.5419636964797974, "rewards/accuracies": 0.875, "rewards/chosen": -0.21746884286403656, "rewards/margins": 0.12449690699577332, "rewards/rejected": -0.34196576476097107, "step": 5363 }, { "epoch": 14.685831622176591, "grad_norm": 5.150101184844971, "learning_rate": 2.656164383561644e-07, "log_odds_chosen": 2.5206475257873535, "log_odds_ratio": -0.24799704551696777, "logits/chosen": 1.068895697593689, "logits/rejected": 0.975437581539154, "logps/chosen": -2.1976866722106934, "logps/rejected": -4.605866432189941, "loss": 0.6509, "nll_loss": 0.6260872483253479, "rewards/accuracies": 1.0, "rewards/chosen": -0.21976865828037262, "rewards/margins": 0.24081800878047943, "rewards/rejected": -0.46058666706085205, "step": 5364 }, { "epoch": 14.688569472963724, "grad_norm": 5.094153881072998, "learning_rate": 2.654794520547945e-07, "log_odds_chosen": 2.5341639518737793, "log_odds_ratio": -0.46436285972595215, "logits/chosen": 0.7273756861686707, "logits/rejected": 0.7886481285095215, "logps/chosen": -2.085958480834961, "logps/rejected": -4.356109619140625, "loss": 0.7169, "nll_loss": 0.6705102920532227, "rewards/accuracies": 0.875, "rewards/chosen": -0.2085958570241928, "rewards/margins": 0.22701512277126312, "rewards/rejected": -0.4356110095977783, "step": 5365 }, { "epoch": 14.691307323750856, "grad_norm": 5.954479217529297, "learning_rate": 2.6534246575342465e-07, "log_odds_chosen": 2.8577332496643066, "log_odds_ratio": -0.1434003859758377, "logits/chosen": 0.6918030977249146, "logits/rejected": 0.673424243927002, "logps/chosen": -1.9712200164794922, "logps/rejected": -4.69489049911499, "loss": 0.6117, "nll_loss": 0.5973191261291504, "rewards/accuracies": 1.0, "rewards/chosen": -0.1971220225095749, "rewards/margins": 0.27236703038215637, "rewards/rejected": -0.46948903799057007, "step": 5366 }, { "epoch": 14.694045174537987, "grad_norm": 5.681075572967529, "learning_rate": 2.652054794520548e-07, "log_odds_chosen": 2.0741052627563477, "log_odds_ratio": -0.25889497995376587, "logits/chosen": 0.981256902217865, "logits/rejected": 1.0118752717971802, "logps/chosen": -1.840592622756958, "logps/rejected": -3.7604408264160156, "loss": 0.6163, "nll_loss": 0.5904403924942017, "rewards/accuracies": 1.0, "rewards/chosen": -0.1840592622756958, "rewards/margins": 0.19198483228683472, "rewards/rejected": -0.3760440945625305, "step": 5367 }, { "epoch": 14.69678302532512, "grad_norm": 4.732837200164795, "learning_rate": 2.650684931506849e-07, "log_odds_chosen": 2.609684467315674, "log_odds_ratio": -0.15589122474193573, "logits/chosen": 0.8452746868133545, "logits/rejected": 0.8962602615356445, "logps/chosen": -1.955244779586792, "logps/rejected": -4.400508880615234, "loss": 0.6359, "nll_loss": 0.6203094124794006, "rewards/accuracies": 1.0, "rewards/chosen": -0.19552448391914368, "rewards/margins": 0.2445264458656311, "rewards/rejected": -0.4400508999824524, "step": 5368 }, { "epoch": 14.699520876112253, "grad_norm": 6.541799068450928, "learning_rate": 2.6493150684931505e-07, "log_odds_chosen": 0.832798182964325, "log_odds_ratio": -0.44347280263900757, "logits/chosen": 0.9350264668464661, "logits/rejected": 1.0406875610351562, "logps/chosen": -3.0625364780426025, "logps/rejected": -3.8739898204803467, "loss": 0.741, "nll_loss": 0.6966506838798523, "rewards/accuracies": 0.625, "rewards/chosen": -0.30625367164611816, "rewards/margins": 0.08114534616470337, "rewards/rejected": -0.38739901781082153, "step": 5369 }, { "epoch": 14.702258726899384, "grad_norm": 4.748234272003174, "learning_rate": 2.647945205479452e-07, "log_odds_chosen": 3.312694549560547, "log_odds_ratio": -0.15643204748630524, "logits/chosen": 1.0715696811676025, "logits/rejected": 1.11247718334198, "logps/chosen": -2.065736770629883, "logps/rejected": -5.202133655548096, "loss": 0.6072, "nll_loss": 0.5916035771369934, "rewards/accuracies": 1.0, "rewards/chosen": -0.20657366514205933, "rewards/margins": 0.31363970041275024, "rewards/rejected": -0.5202133655548096, "step": 5370 }, { "epoch": 14.704996577686517, "grad_norm": 4.988943576812744, "learning_rate": 2.6465753424657536e-07, "log_odds_chosen": 1.4483628273010254, "log_odds_ratio": -0.3283192217350006, "logits/chosen": 0.8735827207565308, "logits/rejected": 0.8794422149658203, "logps/chosen": -2.4946963787078857, "logps/rejected": -3.85670804977417, "loss": 0.6761, "nll_loss": 0.6432854533195496, "rewards/accuracies": 0.875, "rewards/chosen": -0.24946963787078857, "rewards/margins": 0.1362011879682541, "rewards/rejected": -0.38567081093788147, "step": 5371 }, { "epoch": 14.707734428473648, "grad_norm": 4.96566104888916, "learning_rate": 2.6452054794520546e-07, "log_odds_chosen": 2.4344053268432617, "log_odds_ratio": -0.1637086272239685, "logits/chosen": 0.7005252242088318, "logits/rejected": 0.6549938321113586, "logps/chosen": -1.832166314125061, "logps/rejected": -4.082630157470703, "loss": 0.6353, "nll_loss": 0.6189587116241455, "rewards/accuracies": 1.0, "rewards/chosen": -0.1832166165113449, "rewards/margins": 0.22504639625549316, "rewards/rejected": -0.40826302766799927, "step": 5372 }, { "epoch": 14.710472279260781, "grad_norm": 5.378352642059326, "learning_rate": 2.643835616438356e-07, "log_odds_chosen": 1.8138269186019897, "log_odds_ratio": -0.3132230043411255, "logits/chosen": 0.9386230111122131, "logits/rejected": 0.9674373269081116, "logps/chosen": -2.198533773422241, "logps/rejected": -3.9342589378356934, "loss": 0.6001, "nll_loss": 0.5688192248344421, "rewards/accuracies": 0.875, "rewards/chosen": -0.21985338628292084, "rewards/margins": 0.17357252538204193, "rewards/rejected": -0.3934258818626404, "step": 5373 }, { "epoch": 14.713210130047912, "grad_norm": 5.808071613311768, "learning_rate": 2.6424657534246576e-07, "log_odds_chosen": 2.4123637676239014, "log_odds_ratio": -0.37099945545196533, "logits/chosen": 0.6536428332328796, "logits/rejected": 0.7526544332504272, "logps/chosen": -2.5942506790161133, "logps/rejected": -4.907297611236572, "loss": 0.7932, "nll_loss": 0.7560738325119019, "rewards/accuracies": 0.75, "rewards/chosen": -0.2594250440597534, "rewards/margins": 0.23130470514297485, "rewards/rejected": -0.49072974920272827, "step": 5374 }, { "epoch": 14.715947980835045, "grad_norm": 5.151019096374512, "learning_rate": 2.6410958904109586e-07, "log_odds_chosen": 2.885545015335083, "log_odds_ratio": -0.17955856025218964, "logits/chosen": 0.9653605222702026, "logits/rejected": 0.9630237221717834, "logps/chosen": -2.092458724975586, "logps/rejected": -4.853719711303711, "loss": 0.6834, "nll_loss": 0.6654061675071716, "rewards/accuracies": 1.0, "rewards/chosen": -0.20924586057662964, "rewards/margins": 0.27612608671188354, "rewards/rejected": -0.48537197709083557, "step": 5375 }, { "epoch": 14.718685831622176, "grad_norm": 5.637526035308838, "learning_rate": 2.63972602739726e-07, "log_odds_chosen": 1.9107890129089355, "log_odds_ratio": -0.27108529210090637, "logits/chosen": 0.7055057287216187, "logits/rejected": 0.6918363571166992, "logps/chosen": -2.0972423553466797, "logps/rejected": -3.9072771072387695, "loss": 0.7082, "nll_loss": 0.6810584664344788, "rewards/accuracies": 1.0, "rewards/chosen": -0.20972424745559692, "rewards/margins": 0.18100348114967346, "rewards/rejected": -0.390727698802948, "step": 5376 }, { "epoch": 14.72142368240931, "grad_norm": 5.047316074371338, "learning_rate": 2.6383561643835616e-07, "log_odds_chosen": 2.3158183097839355, "log_odds_ratio": -0.2306804358959198, "logits/chosen": 0.6515021920204163, "logits/rejected": 0.6513555645942688, "logps/chosen": -1.95291268825531, "logps/rejected": -4.133410453796387, "loss": 0.6832, "nll_loss": 0.6601455807685852, "rewards/accuracies": 0.875, "rewards/chosen": -0.19529126584529877, "rewards/margins": 0.2180497944355011, "rewards/rejected": -0.41334104537963867, "step": 5377 }, { "epoch": 14.72416153319644, "grad_norm": 6.003580570220947, "learning_rate": 2.6369863013698626e-07, "log_odds_chosen": 2.2251739501953125, "log_odds_ratio": -0.30569297075271606, "logits/chosen": 0.6836012601852417, "logits/rejected": 0.719007134437561, "logps/chosen": -2.0459349155426025, "logps/rejected": -4.148689270019531, "loss": 0.7404, "nll_loss": 0.7097843289375305, "rewards/accuracies": 1.0, "rewards/chosen": -0.2045935094356537, "rewards/margins": 0.21027539670467377, "rewards/rejected": -0.41486889123916626, "step": 5378 }, { "epoch": 14.726899383983573, "grad_norm": 6.493971347808838, "learning_rate": 2.635616438356164e-07, "log_odds_chosen": 1.2457906007766724, "log_odds_ratio": -0.35788699984550476, "logits/chosen": 0.6993792653083801, "logits/rejected": 0.7077643871307373, "logps/chosen": -2.460075855255127, "logps/rejected": -3.5855493545532227, "loss": 0.5819, "nll_loss": 0.5460973381996155, "rewards/accuracies": 0.875, "rewards/chosen": -0.24600759148597717, "rewards/margins": 0.11254734545946121, "rewards/rejected": -0.3585549592971802, "step": 5379 }, { "epoch": 14.729637234770705, "grad_norm": 6.256384372711182, "learning_rate": 2.6342465753424657e-07, "log_odds_chosen": 1.4644289016723633, "log_odds_ratio": -0.4480821192264557, "logits/chosen": 0.915557861328125, "logits/rejected": 0.8856440782546997, "logps/chosen": -2.3911173343658447, "logps/rejected": -3.7583465576171875, "loss": 0.7848, "nll_loss": 0.739980936050415, "rewards/accuracies": 0.75, "rewards/chosen": -0.2391117364168167, "rewards/margins": 0.13672290742397308, "rewards/rejected": -0.3758346438407898, "step": 5380 }, { "epoch": 14.732375085557837, "grad_norm": 7.714416027069092, "learning_rate": 2.632876712328767e-07, "log_odds_chosen": 2.1715948581695557, "log_odds_ratio": -0.24889767169952393, "logits/chosen": 1.0776067972183228, "logits/rejected": 1.1936589479446411, "logps/chosen": -2.9677515029907227, "logps/rejected": -5.061584949493408, "loss": 0.7045, "nll_loss": 0.6795884966850281, "rewards/accuracies": 0.875, "rewards/chosen": -0.29677513241767883, "rewards/margins": 0.20938332378864288, "rewards/rejected": -0.5061584711074829, "step": 5381 }, { "epoch": 14.735112936344969, "grad_norm": 6.234890460968018, "learning_rate": 2.631506849315068e-07, "log_odds_chosen": 2.519287109375, "log_odds_ratio": -0.20391854643821716, "logits/chosen": 0.9062918424606323, "logits/rejected": 0.9551769495010376, "logps/chosen": -2.1073901653289795, "logps/rejected": -4.509252548217773, "loss": 0.6394, "nll_loss": 0.6190284490585327, "rewards/accuracies": 1.0, "rewards/chosen": -0.21073901653289795, "rewards/margins": 0.24018621444702148, "rewards/rejected": -0.45092523097991943, "step": 5382 }, { "epoch": 14.737850787132102, "grad_norm": 5.048222541809082, "learning_rate": 2.6301369863013697e-07, "log_odds_chosen": 2.1494414806365967, "log_odds_ratio": -0.23875105381011963, "logits/chosen": 0.773022472858429, "logits/rejected": 0.7895461320877075, "logps/chosen": -2.138256788253784, "logps/rejected": -4.179209232330322, "loss": 0.7269, "nll_loss": 0.7030650973320007, "rewards/accuracies": 0.875, "rewards/chosen": -0.21382567286491394, "rewards/margins": 0.20409530401229858, "rewards/rejected": -0.41792094707489014, "step": 5383 }, { "epoch": 14.740588637919233, "grad_norm": 6.445687770843506, "learning_rate": 2.628767123287671e-07, "log_odds_chosen": 2.7173542976379395, "log_odds_ratio": -0.2945857644081116, "logits/chosen": 0.7844773530960083, "logits/rejected": 0.8122851848602295, "logps/chosen": -2.477083921432495, "logps/rejected": -5.051637649536133, "loss": 0.7348, "nll_loss": 0.7053523659706116, "rewards/accuracies": 0.875, "rewards/chosen": -0.24770838022232056, "rewards/margins": 0.25745540857315063, "rewards/rejected": -0.5051637887954712, "step": 5384 }, { "epoch": 14.743326488706366, "grad_norm": 5.640596866607666, "learning_rate": 2.627397260273972e-07, "log_odds_chosen": 2.262176990509033, "log_odds_ratio": -0.2016461193561554, "logits/chosen": 0.8406567573547363, "logits/rejected": 0.8563414812088013, "logps/chosen": -1.851593255996704, "logps/rejected": -3.9076485633850098, "loss": 0.5964, "nll_loss": 0.5762794613838196, "rewards/accuracies": 1.0, "rewards/chosen": -0.1851593255996704, "rewards/margins": 0.20560553669929504, "rewards/rejected": -0.39076486229896545, "step": 5385 }, { "epoch": 14.746064339493497, "grad_norm": 6.502933502197266, "learning_rate": 2.626027397260274e-07, "log_odds_chosen": 2.824207305908203, "log_odds_ratio": -0.20131650567054749, "logits/chosen": 0.505763053894043, "logits/rejected": 0.4511704742908478, "logps/chosen": -1.4521207809448242, "logps/rejected": -4.067911148071289, "loss": 0.7076, "nll_loss": 0.6874609589576721, "rewards/accuracies": 1.0, "rewards/chosen": -0.1452120840549469, "rewards/margins": 0.26157906651496887, "rewards/rejected": -0.4067911207675934, "step": 5386 }, { "epoch": 14.74880219028063, "grad_norm": 4.990322589874268, "learning_rate": 2.624657534246575e-07, "log_odds_chosen": 1.9303131103515625, "log_odds_ratio": -0.24918146431446075, "logits/chosen": 0.6694779396057129, "logits/rejected": 0.6753947734832764, "logps/chosen": -2.1180739402770996, "logps/rejected": -3.9526290893554688, "loss": 0.7047, "nll_loss": 0.6797667741775513, "rewards/accuracies": 0.875, "rewards/chosen": -0.21180739998817444, "rewards/margins": 0.18345552682876587, "rewards/rejected": -0.3952629566192627, "step": 5387 }, { "epoch": 14.751540041067761, "grad_norm": 5.750461578369141, "learning_rate": 2.623287671232877e-07, "log_odds_chosen": 2.1717636585235596, "log_odds_ratio": -0.28666526079177856, "logits/chosen": 0.7030324935913086, "logits/rejected": 0.6859650611877441, "logps/chosen": -1.904776692390442, "logps/rejected": -3.955599308013916, "loss": 0.5749, "nll_loss": 0.5462294816970825, "rewards/accuracies": 1.0, "rewards/chosen": -0.1904776692390442, "rewards/margins": 0.2050822377204895, "rewards/rejected": -0.3955599367618561, "step": 5388 }, { "epoch": 14.754277891854894, "grad_norm": 4.822018623352051, "learning_rate": 2.621917808219178e-07, "log_odds_chosen": 3.054352045059204, "log_odds_ratio": -0.30473989248275757, "logits/chosen": 0.8657059073448181, "logits/rejected": 0.972896933555603, "logps/chosen": -2.466439723968506, "logps/rejected": -5.455440521240234, "loss": 0.6641, "nll_loss": 0.6336178779602051, "rewards/accuracies": 0.75, "rewards/chosen": -0.24664399027824402, "rewards/margins": 0.2989000380039215, "rewards/rejected": -0.5455440282821655, "step": 5389 }, { "epoch": 14.757015742642025, "grad_norm": 5.9038405418396, "learning_rate": 2.6205479452054793e-07, "log_odds_chosen": 3.067540168762207, "log_odds_ratio": -0.1642424315214157, "logits/chosen": 0.7274073362350464, "logits/rejected": 0.6904662847518921, "logps/chosen": -2.2433457374572754, "logps/rejected": -5.16467809677124, "loss": 0.7083, "nll_loss": 0.6918596625328064, "rewards/accuracies": 1.0, "rewards/chosen": -0.22433459758758545, "rewards/margins": 0.29213324189186096, "rewards/rejected": -0.516467809677124, "step": 5390 }, { "epoch": 14.759753593429158, "grad_norm": 4.939763069152832, "learning_rate": 2.619178082191781e-07, "log_odds_chosen": 3.276297092437744, "log_odds_ratio": -0.14989157021045685, "logits/chosen": 0.9156441688537598, "logits/rejected": 0.9887936115264893, "logps/chosen": -2.1070539951324463, "logps/rejected": -5.239701747894287, "loss": 0.6998, "nll_loss": 0.6848032474517822, "rewards/accuracies": 1.0, "rewards/chosen": -0.21070541441440582, "rewards/margins": 0.31326478719711304, "rewards/rejected": -0.5239701867103577, "step": 5391 }, { "epoch": 14.762491444216291, "grad_norm": 5.556890487670898, "learning_rate": 2.617808219178082e-07, "log_odds_chosen": 2.0413689613342285, "log_odds_ratio": -0.2680453062057495, "logits/chosen": 0.9382517337799072, "logits/rejected": 0.9514880180358887, "logps/chosen": -1.7836215496063232, "logps/rejected": -3.642242908477783, "loss": 0.6116, "nll_loss": 0.5848197937011719, "rewards/accuracies": 1.0, "rewards/chosen": -0.1783621609210968, "rewards/margins": 0.18586215376853943, "rewards/rejected": -0.36422431468963623, "step": 5392 }, { "epoch": 14.765229295003422, "grad_norm": 4.976710796356201, "learning_rate": 2.6164383561643833e-07, "log_odds_chosen": 1.2899296283721924, "log_odds_ratio": -0.41382816433906555, "logits/chosen": 1.0397639274597168, "logits/rejected": 1.072808027267456, "logps/chosen": -2.2930595874786377, "logps/rejected": -3.423706531524658, "loss": 0.6416, "nll_loss": 0.6002400517463684, "rewards/accuracies": 0.75, "rewards/chosen": -0.22930596768856049, "rewards/margins": 0.11306467652320862, "rewards/rejected": -0.3423706293106079, "step": 5393 }, { "epoch": 14.767967145790553, "grad_norm": 6.013468265533447, "learning_rate": 2.615068493150685e-07, "log_odds_chosen": 3.2239553928375244, "log_odds_ratio": -0.18084998428821564, "logits/chosen": 0.9054321050643921, "logits/rejected": 0.9157956838607788, "logps/chosen": -2.674281120300293, "logps/rejected": -5.7499799728393555, "loss": 0.7747, "nll_loss": 0.7566614747047424, "rewards/accuracies": 1.0, "rewards/chosen": -0.26742812991142273, "rewards/margins": 0.30756986141204834, "rewards/rejected": -0.5749979615211487, "step": 5394 }, { "epoch": 14.770704996577686, "grad_norm": 5.04480504989624, "learning_rate": 2.6136986301369864e-07, "log_odds_chosen": 2.4137682914733887, "log_odds_ratio": -0.30642277002334595, "logits/chosen": 0.843880295753479, "logits/rejected": 0.8679193258285522, "logps/chosen": -2.0674290657043457, "logps/rejected": -4.363892078399658, "loss": 0.655, "nll_loss": 0.6243235468864441, "rewards/accuracies": 0.875, "rewards/chosen": -0.20674291253089905, "rewards/margins": 0.22964632511138916, "rewards/rejected": -0.4363892078399658, "step": 5395 }, { "epoch": 14.77344284736482, "grad_norm": 6.444547176361084, "learning_rate": 2.6123287671232874e-07, "log_odds_chosen": 2.099395751953125, "log_odds_ratio": -0.3349219858646393, "logits/chosen": 0.551349401473999, "logits/rejected": 0.5205332040786743, "logps/chosen": -2.4921693801879883, "logps/rejected": -4.472455978393555, "loss": 0.6894, "nll_loss": 0.6559135317802429, "rewards/accuracies": 0.875, "rewards/chosen": -0.2492169439792633, "rewards/margins": 0.19802866876125336, "rewards/rejected": -0.44724559783935547, "step": 5396 }, { "epoch": 14.77618069815195, "grad_norm": 5.747513294219971, "learning_rate": 2.6109589041095894e-07, "log_odds_chosen": 1.3900952339172363, "log_odds_ratio": -0.34617525339126587, "logits/chosen": 0.9176176190376282, "logits/rejected": 0.9423211216926575, "logps/chosen": -2.0523006916046143, "logps/rejected": -3.3475193977355957, "loss": 0.6749, "nll_loss": 0.6402591466903687, "rewards/accuracies": 1.0, "rewards/chosen": -0.20523007214069366, "rewards/margins": 0.12952187657356262, "rewards/rejected": -0.3347519338130951, "step": 5397 }, { "epoch": 14.778918548939084, "grad_norm": 5.988826274871826, "learning_rate": 2.6095890410958904e-07, "log_odds_chosen": 2.650313377380371, "log_odds_ratio": -0.29181569814682007, "logits/chosen": 0.8544344902038574, "logits/rejected": 0.7786205410957336, "logps/chosen": -2.2295243740081787, "logps/rejected": -4.751594543457031, "loss": 0.7044, "nll_loss": 0.6751875877380371, "rewards/accuracies": 0.875, "rewards/chosen": -0.2229524403810501, "rewards/margins": 0.2522070109844208, "rewards/rejected": -0.4751594662666321, "step": 5398 }, { "epoch": 14.781656399726215, "grad_norm": 4.678533554077148, "learning_rate": 2.6082191780821914e-07, "log_odds_chosen": 3.355354070663452, "log_odds_ratio": -0.20674866437911987, "logits/chosen": 0.7838892936706543, "logits/rejected": 0.8021095395088196, "logps/chosen": -1.6699621677398682, "logps/rejected": -4.823980808258057, "loss": 0.6637, "nll_loss": 0.6430641412734985, "rewards/accuracies": 0.875, "rewards/chosen": -0.16699621081352234, "rewards/margins": 0.3154018819332123, "rewards/rejected": -0.48239806294441223, "step": 5399 }, { "epoch": 14.784394250513348, "grad_norm": 9.932600021362305, "learning_rate": 2.606849315068493e-07, "log_odds_chosen": 0.35114210844039917, "log_odds_ratio": -0.9426628351211548, "logits/chosen": 0.8392477631568909, "logits/rejected": 0.8827968835830688, "logps/chosen": -3.337498903274536, "logps/rejected": -3.586512327194214, "loss": 0.7929, "nll_loss": 0.6986219882965088, "rewards/accuracies": 0.625, "rewards/chosen": -0.3337498903274536, "rewards/margins": 0.02490134909749031, "rewards/rejected": -0.35865122079849243, "step": 5400 }, { "epoch": 14.787132101300479, "grad_norm": 6.816246032714844, "learning_rate": 2.6054794520547944e-07, "log_odds_chosen": 1.7508602142333984, "log_odds_ratio": -0.6320816278457642, "logits/chosen": 0.911761999130249, "logits/rejected": 1.0611239671707153, "logps/chosen": -2.5047812461853027, "logps/rejected": -4.196528434753418, "loss": 0.6649, "nll_loss": 0.6016595959663391, "rewards/accuracies": 0.75, "rewards/chosen": -0.2504781484603882, "rewards/margins": 0.16917473077774048, "rewards/rejected": -0.4196528196334839, "step": 5401 }, { "epoch": 14.789869952087612, "grad_norm": 5.751650810241699, "learning_rate": 2.604109589041096e-07, "log_odds_chosen": 2.222604513168335, "log_odds_ratio": -0.2889997065067291, "logits/chosen": 0.8566291332244873, "logits/rejected": 0.8509914875030518, "logps/chosen": -2.6431946754455566, "logps/rejected": -4.7609357833862305, "loss": 0.6318, "nll_loss": 0.602851390838623, "rewards/accuracies": 0.875, "rewards/chosen": -0.2643194794654846, "rewards/margins": 0.2117740958929062, "rewards/rejected": -0.476093590259552, "step": 5402 }, { "epoch": 14.792607802874743, "grad_norm": 6.509545803070068, "learning_rate": 2.602739726027397e-07, "log_odds_chosen": 1.2028977870941162, "log_odds_ratio": -0.5135685205459595, "logits/chosen": 0.6988434791564941, "logits/rejected": 0.7328633069992065, "logps/chosen": -2.76277232170105, "logps/rejected": -3.8967692852020264, "loss": 0.6768, "nll_loss": 0.6254080533981323, "rewards/accuracies": 0.875, "rewards/chosen": -0.27627724409103394, "rewards/margins": 0.1133996993303299, "rewards/rejected": -0.38967692852020264, "step": 5403 }, { "epoch": 14.795345653661876, "grad_norm": 5.866238594055176, "learning_rate": 2.601369863013699e-07, "log_odds_chosen": 1.5487414598464966, "log_odds_ratio": -0.26903313398361206, "logits/chosen": 0.8108009696006775, "logits/rejected": 0.770386278629303, "logps/chosen": -2.069870948791504, "logps/rejected": -3.521455764770508, "loss": 0.7387, "nll_loss": 0.7118451595306396, "rewards/accuracies": 1.0, "rewards/chosen": -0.20698708295822144, "rewards/margins": 0.14515849947929382, "rewards/rejected": -0.35214558243751526, "step": 5404 }, { "epoch": 14.798083504449007, "grad_norm": 6.017904281616211, "learning_rate": 2.6e-07, "log_odds_chosen": 2.0111377239227295, "log_odds_ratio": -0.25604256987571716, "logits/chosen": 0.831882119178772, "logits/rejected": 0.8204512000083923, "logps/chosen": -1.943419337272644, "logps/rejected": -3.679253578186035, "loss": 0.6495, "nll_loss": 0.623866617679596, "rewards/accuracies": 0.875, "rewards/chosen": -0.19434192776679993, "rewards/margins": 0.17358343303203583, "rewards/rejected": -0.36792537569999695, "step": 5405 }, { "epoch": 14.80082135523614, "grad_norm": 5.483667373657227, "learning_rate": 2.598630136986301e-07, "log_odds_chosen": 1.38637375831604, "log_odds_ratio": -0.3259708285331726, "logits/chosen": 0.8225364685058594, "logits/rejected": 0.7680121660232544, "logps/chosen": -1.5932531356811523, "logps/rejected": -2.8292043209075928, "loss": 0.5486, "nll_loss": 0.5160208940505981, "rewards/accuracies": 0.875, "rewards/chosen": -0.15932531654834747, "rewards/margins": 0.12359512597322464, "rewards/rejected": -0.2829204499721527, "step": 5406 }, { "epoch": 14.803559206023271, "grad_norm": 5.739438056945801, "learning_rate": 2.5972602739726025e-07, "log_odds_chosen": 1.3406460285186768, "log_odds_ratio": -0.26145225763320923, "logits/chosen": 0.7667409777641296, "logits/rejected": 0.7600244879722595, "logps/chosen": -1.9183872938156128, "logps/rejected": -3.1417534351348877, "loss": 0.5828, "nll_loss": 0.5566675066947937, "rewards/accuracies": 1.0, "rewards/chosen": -0.19183874130249023, "rewards/margins": 0.12233661860227585, "rewards/rejected": -0.3141753375530243, "step": 5407 }, { "epoch": 14.806297056810404, "grad_norm": 7.027749538421631, "learning_rate": 2.595890410958904e-07, "log_odds_chosen": 1.4556585550308228, "log_odds_ratio": -0.38748571276664734, "logits/chosen": 0.828389048576355, "logits/rejected": 0.787085235118866, "logps/chosen": -1.6120314598083496, "logps/rejected": -2.958831310272217, "loss": 0.6185, "nll_loss": 0.5797609686851501, "rewards/accuracies": 0.75, "rewards/chosen": -0.16120314598083496, "rewards/margins": 0.13467997312545776, "rewards/rejected": -0.2958831191062927, "step": 5408 }, { "epoch": 14.809034907597535, "grad_norm": 6.54554557800293, "learning_rate": 2.5945205479452055e-07, "log_odds_chosen": 2.6705586910247803, "log_odds_ratio": -0.17442412674427032, "logits/chosen": 0.9952998161315918, "logits/rejected": 0.9538973569869995, "logps/chosen": -2.2900824546813965, "logps/rejected": -4.8079071044921875, "loss": 0.7043, "nll_loss": 0.6868469715118408, "rewards/accuracies": 1.0, "rewards/chosen": -0.22900822758674622, "rewards/margins": 0.25178250670433044, "rewards/rejected": -0.48079073429107666, "step": 5409 }, { "epoch": 14.811772758384668, "grad_norm": 9.11236572265625, "learning_rate": 2.5931506849315065e-07, "log_odds_chosen": 1.4383679628372192, "log_odds_ratio": -0.5261122584342957, "logits/chosen": 1.2088630199432373, "logits/rejected": 1.1805349588394165, "logps/chosen": -2.563467264175415, "logps/rejected": -3.7022218704223633, "loss": 0.7223, "nll_loss": 0.669731616973877, "rewards/accuracies": 0.75, "rewards/chosen": -0.256346732378006, "rewards/margins": 0.11387542635202408, "rewards/rejected": -0.37022218108177185, "step": 5410 }, { "epoch": 14.8145106091718, "grad_norm": 5.971323013305664, "learning_rate": 2.5917808219178086e-07, "log_odds_chosen": 2.1229872703552246, "log_odds_ratio": -0.17302319407463074, "logits/chosen": 0.9188312292098999, "logits/rejected": 1.00393545627594, "logps/chosen": -2.1240663528442383, "logps/rejected": -4.092280387878418, "loss": 0.6542, "nll_loss": 0.6368743777275085, "rewards/accuracies": 1.0, "rewards/chosen": -0.21240663528442383, "rewards/margins": 0.196821391582489, "rewards/rejected": -0.40922802686691284, "step": 5411 }, { "epoch": 14.817248459958932, "grad_norm": 5.0011491775512695, "learning_rate": 2.5904109589041096e-07, "log_odds_chosen": 3.455564022064209, "log_odds_ratio": -0.11746114492416382, "logits/chosen": 0.54585200548172, "logits/rejected": 0.5357832908630371, "logps/chosen": -1.3549662828445435, "logps/rejected": -4.520682334899902, "loss": 0.7972, "nll_loss": 0.7854070067405701, "rewards/accuracies": 1.0, "rewards/chosen": -0.13549664616584778, "rewards/margins": 0.3165716528892517, "rewards/rejected": -0.4520682692527771, "step": 5412 }, { "epoch": 14.819986310746064, "grad_norm": 5.248895645141602, "learning_rate": 2.5890410958904106e-07, "log_odds_chosen": 2.1558632850646973, "log_odds_ratio": -0.15004141628742218, "logits/chosen": 1.058652400970459, "logits/rejected": 1.1072230339050293, "logps/chosen": -2.204334020614624, "logps/rejected": -4.21727180480957, "loss": 0.6074, "nll_loss": 0.5924239158630371, "rewards/accuracies": 1.0, "rewards/chosen": -0.22043338418006897, "rewards/margins": 0.20129381120204926, "rewards/rejected": -0.4217272102832794, "step": 5413 }, { "epoch": 14.822724161533197, "grad_norm": 6.03975772857666, "learning_rate": 2.587671232876712e-07, "log_odds_chosen": 1.7382864952087402, "log_odds_ratio": -0.2402987778186798, "logits/chosen": 0.8299786448478699, "logits/rejected": 0.8546007871627808, "logps/chosen": -2.1965322494506836, "logps/rejected": -3.8057363033294678, "loss": 0.5865, "nll_loss": 0.5624672770500183, "rewards/accuracies": 0.875, "rewards/chosen": -0.21965321898460388, "rewards/margins": 0.1609204262495041, "rewards/rejected": -0.3805736303329468, "step": 5414 }, { "epoch": 14.825462012320328, "grad_norm": 5.762875080108643, "learning_rate": 2.5863013698630136e-07, "log_odds_chosen": 2.760829448699951, "log_odds_ratio": -0.31976884603500366, "logits/chosen": 1.1340916156768799, "logits/rejected": 1.0840696096420288, "logps/chosen": -2.08491587638855, "logps/rejected": -4.738198280334473, "loss": 0.5741, "nll_loss": 0.5420922040939331, "rewards/accuracies": 0.875, "rewards/chosen": -0.20849159359931946, "rewards/margins": 0.2653282880783081, "rewards/rejected": -0.4738198518753052, "step": 5415 }, { "epoch": 14.82819986310746, "grad_norm": 5.403860569000244, "learning_rate": 2.584931506849315e-07, "log_odds_chosen": 2.122901439666748, "log_odds_ratio": -0.3674596846103668, "logits/chosen": 0.6615580916404724, "logits/rejected": 0.6617195010185242, "logps/chosen": -1.7741420269012451, "logps/rejected": -3.77606463432312, "loss": 0.6109, "nll_loss": 0.5741919875144958, "rewards/accuracies": 0.875, "rewards/chosen": -0.1774141937494278, "rewards/margins": 0.20019227266311646, "rewards/rejected": -0.37760651111602783, "step": 5416 }, { "epoch": 14.830937713894592, "grad_norm": 7.0940022468566895, "learning_rate": 2.583561643835616e-07, "log_odds_chosen": 2.6460962295532227, "log_odds_ratio": -0.35755109786987305, "logits/chosen": 0.9213899374008179, "logits/rejected": 0.9208681583404541, "logps/chosen": -2.32358980178833, "logps/rejected": -4.786880970001221, "loss": 0.7421, "nll_loss": 0.7063209414482117, "rewards/accuracies": 0.75, "rewards/chosen": -0.23235899209976196, "rewards/margins": 0.24632909893989563, "rewards/rejected": -0.4786880910396576, "step": 5417 }, { "epoch": 14.833675564681725, "grad_norm": 5.133569240570068, "learning_rate": 2.582191780821918e-07, "log_odds_chosen": 3.215052604675293, "log_odds_ratio": -0.24064287543296814, "logits/chosen": 0.9470210075378418, "logits/rejected": 1.0216214656829834, "logps/chosen": -2.213226795196533, "logps/rejected": -5.238785743713379, "loss": 0.6301, "nll_loss": 0.6060026288032532, "rewards/accuracies": 0.875, "rewards/chosen": -0.2213226854801178, "rewards/margins": 0.3025558888912201, "rewards/rejected": -0.5238785743713379, "step": 5418 }, { "epoch": 14.836413415468858, "grad_norm": 5.774510860443115, "learning_rate": 2.580821917808219e-07, "log_odds_chosen": 4.210710525512695, "log_odds_ratio": -0.1915685087442398, "logits/chosen": 0.8519127368927002, "logits/rejected": 0.8726685047149658, "logps/chosen": -2.7204253673553467, "logps/rejected": -6.8196892738342285, "loss": 0.7149, "nll_loss": 0.6957077980041504, "rewards/accuracies": 0.875, "rewards/chosen": -0.27204254269599915, "rewards/margins": 0.4099263846874237, "rewards/rejected": -0.6819689273834229, "step": 5419 }, { "epoch": 14.839151266255989, "grad_norm": 5.848110675811768, "learning_rate": 2.57945205479452e-07, "log_odds_chosen": 1.6351221799850464, "log_odds_ratio": -0.3526964783668518, "logits/chosen": 0.8382447957992554, "logits/rejected": 0.8232677578926086, "logps/chosen": -2.1624836921691895, "logps/rejected": -3.6936120986938477, "loss": 0.6709, "nll_loss": 0.6356750726699829, "rewards/accuracies": 0.875, "rewards/chosen": -0.21624836325645447, "rewards/margins": 0.15311285853385925, "rewards/rejected": -0.3693612217903137, "step": 5420 }, { "epoch": 14.841889117043122, "grad_norm": 5.391059398651123, "learning_rate": 2.5780821917808217e-07, "log_odds_chosen": 2.1450655460357666, "log_odds_ratio": -0.374451220035553, "logits/chosen": 0.852853536605835, "logits/rejected": 0.8556787967681885, "logps/chosen": -1.765442132949829, "logps/rejected": -3.7862751483917236, "loss": 0.6157, "nll_loss": 0.5782800912857056, "rewards/accuracies": 0.75, "rewards/chosen": -0.1765442192554474, "rewards/margins": 0.2020832896232605, "rewards/rejected": -0.3786275088787079, "step": 5421 }, { "epoch": 14.844626967830253, "grad_norm": 5.9820876121521, "learning_rate": 2.576712328767123e-07, "log_odds_chosen": 1.5217506885528564, "log_odds_ratio": -0.20997655391693115, "logits/chosen": 0.7671594619750977, "logits/rejected": 0.7929887175559998, "logps/chosen": -2.346707582473755, "logps/rejected": -3.76847505569458, "loss": 0.6475, "nll_loss": 0.6265220046043396, "rewards/accuracies": 1.0, "rewards/chosen": -0.2346707582473755, "rewards/margins": 0.14217671751976013, "rewards/rejected": -0.3768474757671356, "step": 5422 }, { "epoch": 14.847364818617386, "grad_norm": 8.893003463745117, "learning_rate": 2.5753424657534247e-07, "log_odds_chosen": 2.4071836471557617, "log_odds_ratio": -0.33628830313682556, "logits/chosen": 0.964989185333252, "logits/rejected": 0.9615238904953003, "logps/chosen": -2.1287102699279785, "logps/rejected": -4.438716888427734, "loss": 0.6549, "nll_loss": 0.6212727427482605, "rewards/accuracies": 0.875, "rewards/chosen": -0.2128710299730301, "rewards/margins": 0.2310006320476532, "rewards/rejected": -0.4438716769218445, "step": 5423 }, { "epoch": 14.850102669404517, "grad_norm": 5.547897815704346, "learning_rate": 2.5739726027397257e-07, "log_odds_chosen": 1.5666406154632568, "log_odds_ratio": -0.2599981427192688, "logits/chosen": 0.7135680913925171, "logits/rejected": 0.7334257364273071, "logps/chosen": -2.0711255073547363, "logps/rejected": -3.486009359359741, "loss": 0.6542, "nll_loss": 0.628193199634552, "rewards/accuracies": 1.0, "rewards/chosen": -0.20711255073547363, "rewards/margins": 0.14148840308189392, "rewards/rejected": -0.34860095381736755, "step": 5424 }, { "epoch": 14.85284052019165, "grad_norm": 6.729323387145996, "learning_rate": 2.572602739726027e-07, "log_odds_chosen": 2.6267499923706055, "log_odds_ratio": -0.22158940136432648, "logits/chosen": 0.9334813356399536, "logits/rejected": 0.9682461619377136, "logps/chosen": -2.7129831314086914, "logps/rejected": -5.234842777252197, "loss": 0.7408, "nll_loss": 0.7186681032180786, "rewards/accuracies": 1.0, "rewards/chosen": -0.2712983191013336, "rewards/margins": 0.25218597054481506, "rewards/rejected": -0.5234842896461487, "step": 5425 }, { "epoch": 14.855578370978781, "grad_norm": 5.0638508796691895, "learning_rate": 2.571232876712329e-07, "log_odds_chosen": 0.8128519058227539, "log_odds_ratio": -0.5187351703643799, "logits/chosen": 0.8494994640350342, "logits/rejected": 0.8698699474334717, "logps/chosen": -2.3161134719848633, "logps/rejected": -3.079207181930542, "loss": 0.6188, "nll_loss": 0.566897988319397, "rewards/accuracies": 0.75, "rewards/chosen": -0.23161137104034424, "rewards/margins": 0.07630938291549683, "rewards/rejected": -0.3079207241535187, "step": 5426 }, { "epoch": 14.858316221765914, "grad_norm": 4.893418312072754, "learning_rate": 2.56986301369863e-07, "log_odds_chosen": 2.360456943511963, "log_odds_ratio": -0.1719277799129486, "logits/chosen": 0.7478927969932556, "logits/rejected": 0.7995452880859375, "logps/chosen": -2.0187108516693115, "logps/rejected": -4.221297740936279, "loss": 0.7449, "nll_loss": 0.7276642322540283, "rewards/accuracies": 1.0, "rewards/chosen": -0.2018710970878601, "rewards/margins": 0.2202587127685547, "rewards/rejected": -0.4221297800540924, "step": 5427 }, { "epoch": 14.861054072553046, "grad_norm": 7.588561534881592, "learning_rate": 2.568493150684932e-07, "log_odds_chosen": 2.4514055252075195, "log_odds_ratio": -0.2730979323387146, "logits/chosen": 0.8242647647857666, "logits/rejected": 0.8332035541534424, "logps/chosen": -2.709482192993164, "logps/rejected": -5.058826446533203, "loss": 0.7151, "nll_loss": 0.6877578496932983, "rewards/accuracies": 0.875, "rewards/chosen": -0.27094826102256775, "rewards/margins": 0.23493440449237823, "rewards/rejected": -0.5058826804161072, "step": 5428 }, { "epoch": 14.863791923340179, "grad_norm": 5.063017845153809, "learning_rate": 2.567123287671233e-07, "log_odds_chosen": 1.6830861568450928, "log_odds_ratio": -0.32431739568710327, "logits/chosen": 0.8095060586929321, "logits/rejected": 0.8895066380500793, "logps/chosen": -2.1021604537963867, "logps/rejected": -3.714146852493286, "loss": 0.619, "nll_loss": 0.5865553021430969, "rewards/accuracies": 0.875, "rewards/chosen": -0.21021604537963867, "rewards/margins": 0.16119861602783203, "rewards/rejected": -0.3714146614074707, "step": 5429 }, { "epoch": 14.86652977412731, "grad_norm": 5.404983997344971, "learning_rate": 2.5657534246575343e-07, "log_odds_chosen": 3.201432228088379, "log_odds_ratio": -0.19029292464256287, "logits/chosen": 0.838092565536499, "logits/rejected": 0.8067501783370972, "logps/chosen": -2.0999999046325684, "logps/rejected": -5.184255599975586, "loss": 0.6707, "nll_loss": 0.6517001986503601, "rewards/accuracies": 1.0, "rewards/chosen": -0.20999999344348907, "rewards/margins": 0.3084256052970886, "rewards/rejected": -0.5184255838394165, "step": 5430 }, { "epoch": 14.869267624914443, "grad_norm": 4.707502841949463, "learning_rate": 2.5643835616438353e-07, "log_odds_chosen": 1.860649585723877, "log_odds_ratio": -0.26521506905555725, "logits/chosen": 0.7675817608833313, "logits/rejected": 0.7983373999595642, "logps/chosen": -2.2560877799987793, "logps/rejected": -4.032038688659668, "loss": 0.6106, "nll_loss": 0.5841102004051208, "rewards/accuracies": 1.0, "rewards/chosen": -0.22560879588127136, "rewards/margins": 0.17759506404399872, "rewards/rejected": -0.4032038748264313, "step": 5431 }, { "epoch": 14.872005475701574, "grad_norm": 6.334266662597656, "learning_rate": 2.563013698630137e-07, "log_odds_chosen": 1.3754667043685913, "log_odds_ratio": -0.5099939107894897, "logits/chosen": 0.9493231177330017, "logits/rejected": 0.9321553707122803, "logps/chosen": -2.388681650161743, "logps/rejected": -3.669950246810913, "loss": 0.6683, "nll_loss": 0.6172916293144226, "rewards/accuracies": 0.625, "rewards/chosen": -0.23886817693710327, "rewards/margins": 0.1281268447637558, "rewards/rejected": -0.36699503660202026, "step": 5432 }, { "epoch": 14.874743326488707, "grad_norm": 5.818120956420898, "learning_rate": 2.5616438356164383e-07, "log_odds_chosen": 2.7177295684814453, "log_odds_ratio": -0.15883898735046387, "logits/chosen": 0.9007103443145752, "logits/rejected": 0.9812453985214233, "logps/chosen": -1.914170742034912, "logps/rejected": -4.487037658691406, "loss": 0.6086, "nll_loss": 0.592688798904419, "rewards/accuracies": 1.0, "rewards/chosen": -0.19141708314418793, "rewards/margins": 0.2572866976261139, "rewards/rejected": -0.448703795671463, "step": 5433 }, { "epoch": 14.877481177275838, "grad_norm": 5.592845916748047, "learning_rate": 2.5602739726027393e-07, "log_odds_chosen": 2.354367256164551, "log_odds_ratio": -0.15400618314743042, "logits/chosen": 0.91233229637146, "logits/rejected": 0.9882593154907227, "logps/chosen": -2.592498779296875, "logps/rejected": -4.869917392730713, "loss": 0.7763, "nll_loss": 0.7609003782272339, "rewards/accuracies": 1.0, "rewards/chosen": -0.25924986600875854, "rewards/margins": 0.22774185240268707, "rewards/rejected": -0.4869917333126068, "step": 5434 }, { "epoch": 14.880219028062971, "grad_norm": 5.857880115509033, "learning_rate": 2.5589041095890414e-07, "log_odds_chosen": 2.4114856719970703, "log_odds_ratio": -0.2369876503944397, "logits/chosen": 1.0579622983932495, "logits/rejected": 1.1308926343917847, "logps/chosen": -2.729687213897705, "logps/rejected": -5.088209629058838, "loss": 0.6879, "nll_loss": 0.6642162799835205, "rewards/accuracies": 0.875, "rewards/chosen": -0.27296873927116394, "rewards/margins": 0.23585225641727448, "rewards/rejected": -0.5088210105895996, "step": 5435 }, { "epoch": 14.882956878850102, "grad_norm": 5.872844696044922, "learning_rate": 2.5575342465753424e-07, "log_odds_chosen": 1.6332826614379883, "log_odds_ratio": -0.31606513261795044, "logits/chosen": 0.8379335999488831, "logits/rejected": 0.7712070345878601, "logps/chosen": -1.6292014122009277, "logps/rejected": -3.1350085735321045, "loss": 0.6329, "nll_loss": 0.6012454032897949, "rewards/accuracies": 1.0, "rewards/chosen": -0.16292016208171844, "rewards/margins": 0.15058068931102753, "rewards/rejected": -0.31350085139274597, "step": 5436 }, { "epoch": 14.885694729637235, "grad_norm": 5.811275005340576, "learning_rate": 2.556164383561644e-07, "log_odds_chosen": 2.6034152507781982, "log_odds_ratio": -0.30360981822013855, "logits/chosen": 0.8936434984207153, "logits/rejected": 0.8849445581436157, "logps/chosen": -2.4940004348754883, "logps/rejected": -4.94779109954834, "loss": 0.614, "nll_loss": 0.5836439728736877, "rewards/accuracies": 0.875, "rewards/chosen": -0.2494000792503357, "rewards/margins": 0.2453790009021759, "rewards/rejected": -0.4947790801525116, "step": 5437 }, { "epoch": 14.888432580424366, "grad_norm": 5.084546089172363, "learning_rate": 2.554794520547945e-07, "log_odds_chosen": 2.736151695251465, "log_odds_ratio": -0.22777242958545685, "logits/chosen": 0.8703038692474365, "logits/rejected": 0.8833857774734497, "logps/chosen": -2.1310930252075195, "logps/rejected": -4.756885051727295, "loss": 0.6675, "nll_loss": 0.6447287797927856, "rewards/accuracies": 1.0, "rewards/chosen": -0.21310929954051971, "rewards/margins": 0.26257920265197754, "rewards/rejected": -0.47568851709365845, "step": 5438 }, { "epoch": 14.8911704312115, "grad_norm": 5.649483680725098, "learning_rate": 2.5534246575342464e-07, "log_odds_chosen": 1.7488536834716797, "log_odds_ratio": -0.27922722697257996, "logits/chosen": 0.6220497488975525, "logits/rejected": 0.5868353247642517, "logps/chosen": -1.626206636428833, "logps/rejected": -3.2025938034057617, "loss": 0.6097, "nll_loss": 0.5817821621894836, "rewards/accuracies": 1.0, "rewards/chosen": -0.1626206636428833, "rewards/margins": 0.15763872861862183, "rewards/rejected": -0.3202593922615051, "step": 5439 }, { "epoch": 14.89390828199863, "grad_norm": 5.610661506652832, "learning_rate": 2.552054794520548e-07, "log_odds_chosen": 1.986741065979004, "log_odds_ratio": -0.4052451550960541, "logits/chosen": 1.0234766006469727, "logits/rejected": 1.0002624988555908, "logps/chosen": -3.2174763679504395, "logps/rejected": -5.16570520401001, "loss": 0.6984, "nll_loss": 0.6578812599182129, "rewards/accuracies": 0.875, "rewards/chosen": -0.32174766063690186, "rewards/margins": 0.19482287764549255, "rewards/rejected": -0.5165705680847168, "step": 5440 }, { "epoch": 14.896646132785763, "grad_norm": 4.9017229080200195, "learning_rate": 2.550684931506849e-07, "log_odds_chosen": 2.8395836353302, "log_odds_ratio": -0.21952399611473083, "logits/chosen": 0.6438006162643433, "logits/rejected": 0.7525763511657715, "logps/chosen": -2.0532007217407227, "logps/rejected": -4.749484539031982, "loss": 0.7159, "nll_loss": 0.6939558386802673, "rewards/accuracies": 1.0, "rewards/chosen": -0.2053200602531433, "rewards/margins": 0.2696284055709839, "rewards/rejected": -0.4749484360218048, "step": 5441 }, { "epoch": 14.899383983572895, "grad_norm": 5.816033363342285, "learning_rate": 2.549315068493151e-07, "log_odds_chosen": 0.878454864025116, "log_odds_ratio": -0.6238317489624023, "logits/chosen": 0.9037206172943115, "logits/rejected": 0.9619311094284058, "logps/chosen": -2.4873502254486084, "logps/rejected": -3.3282508850097656, "loss": 0.8082, "nll_loss": 0.745805561542511, "rewards/accuracies": 0.5, "rewards/chosen": -0.24873504042625427, "rewards/margins": 0.08409006148576736, "rewards/rejected": -0.33282509446144104, "step": 5442 }, { "epoch": 14.902121834360027, "grad_norm": 5.952820777893066, "learning_rate": 2.547945205479452e-07, "log_odds_chosen": 2.3906404972076416, "log_odds_ratio": -0.2872733473777771, "logits/chosen": 0.8749887347221375, "logits/rejected": 0.922333300113678, "logps/chosen": -2.0865631103515625, "logps/rejected": -4.395245552062988, "loss": 0.6146, "nll_loss": 0.5858967304229736, "rewards/accuracies": 0.875, "rewards/chosen": -0.20865631103515625, "rewards/margins": 0.23086830973625183, "rewards/rejected": -0.4395245909690857, "step": 5443 }, { "epoch": 14.904859685147159, "grad_norm": 7.132083415985107, "learning_rate": 2.5465753424657535e-07, "log_odds_chosen": 2.659332036972046, "log_odds_ratio": -0.24581101536750793, "logits/chosen": 1.0765395164489746, "logits/rejected": 1.0762338638305664, "logps/chosen": -3.1952192783355713, "logps/rejected": -5.790310382843018, "loss": 0.8359, "nll_loss": 0.8112860918045044, "rewards/accuracies": 0.875, "rewards/chosen": -0.3195219039916992, "rewards/margins": 0.2595090866088867, "rewards/rejected": -0.5790311098098755, "step": 5444 }, { "epoch": 14.907597535934292, "grad_norm": 6.152289390563965, "learning_rate": 2.5452054794520545e-07, "log_odds_chosen": 1.7589908838272095, "log_odds_ratio": -0.42545539140701294, "logits/chosen": 0.7901751399040222, "logits/rejected": 0.7832086682319641, "logps/chosen": -2.907679557800293, "logps/rejected": -4.6231489181518555, "loss": 0.8573, "nll_loss": 0.8147136569023132, "rewards/accuracies": 0.875, "rewards/chosen": -0.29076793789863586, "rewards/margins": 0.17154695093631744, "rewards/rejected": -0.4623149037361145, "step": 5445 }, { "epoch": 14.910335386721425, "grad_norm": 6.863082408905029, "learning_rate": 2.543835616438356e-07, "log_odds_chosen": 3.0562047958374023, "log_odds_ratio": -0.21959508955478668, "logits/chosen": 0.7780592441558838, "logits/rejected": 0.7179710865020752, "logps/chosen": -2.4016640186309814, "logps/rejected": -5.3151421546936035, "loss": 0.7503, "nll_loss": 0.7282957434654236, "rewards/accuracies": 0.875, "rewards/chosen": -0.24016639590263367, "rewards/margins": 0.291347861289978, "rewards/rejected": -0.5315142273902893, "step": 5446 }, { "epoch": 14.913073237508556, "grad_norm": 6.443781852722168, "learning_rate": 2.5424657534246575e-07, "log_odds_chosen": 4.407295227050781, "log_odds_ratio": -0.12096893787384033, "logits/chosen": 0.8010287284851074, "logits/rejected": 0.789539635181427, "logps/chosen": -1.4524418115615845, "logps/rejected": -5.611553192138672, "loss": 0.6765, "nll_loss": 0.6644459962844849, "rewards/accuracies": 1.0, "rewards/chosen": -0.14524418115615845, "rewards/margins": 0.41591113805770874, "rewards/rejected": -0.5611553192138672, "step": 5447 }, { "epoch": 14.915811088295689, "grad_norm": 9.423713684082031, "learning_rate": 2.5410958904109585e-07, "log_odds_chosen": 1.4682550430297852, "log_odds_ratio": -0.7401964068412781, "logits/chosen": 1.0052536725997925, "logits/rejected": 0.9693004488945007, "logps/chosen": -2.4043331146240234, "logps/rejected": -3.7323079109191895, "loss": 0.7146, "nll_loss": 0.6405801773071289, "rewards/accuracies": 0.625, "rewards/chosen": -0.24043330550193787, "rewards/margins": 0.1327974796295166, "rewards/rejected": -0.37323078513145447, "step": 5448 }, { "epoch": 14.91854893908282, "grad_norm": 5.250479698181152, "learning_rate": 2.5397260273972605e-07, "log_odds_chosen": 2.5468521118164062, "log_odds_ratio": -0.17839089035987854, "logits/chosen": 0.8942732214927673, "logits/rejected": 0.9759625196456909, "logps/chosen": -1.8876014947891235, "logps/rejected": -4.231235027313232, "loss": 0.6287, "nll_loss": 0.6108700037002563, "rewards/accuracies": 1.0, "rewards/chosen": -0.1887601613998413, "rewards/margins": 0.2343633770942688, "rewards/rejected": -0.4231235384941101, "step": 5449 }, { "epoch": 14.921286789869953, "grad_norm": 7.1054205894470215, "learning_rate": 2.5383561643835615e-07, "log_odds_chosen": 1.1992237567901611, "log_odds_ratio": -0.3788941502571106, "logits/chosen": 0.8326655030250549, "logits/rejected": 0.8635517954826355, "logps/chosen": -2.4664013385772705, "logps/rejected": -3.6230430603027344, "loss": 0.5977, "nll_loss": 0.559857964515686, "rewards/accuracies": 0.875, "rewards/chosen": -0.246640145778656, "rewards/margins": 0.11566417664289474, "rewards/rejected": -0.36230432987213135, "step": 5450 }, { "epoch": 14.924024640657084, "grad_norm": 4.950046062469482, "learning_rate": 2.536986301369863e-07, "log_odds_chosen": 1.7550575733184814, "log_odds_ratio": -0.3315195143222809, "logits/chosen": 0.9105151891708374, "logits/rejected": 0.9369253516197205, "logps/chosen": -2.4135890007019043, "logps/rejected": -4.0938334465026855, "loss": 0.6006, "nll_loss": 0.5674545764923096, "rewards/accuracies": 0.875, "rewards/chosen": -0.2413589209318161, "rewards/margins": 0.1680244356393814, "rewards/rejected": -0.4093833565711975, "step": 5451 }, { "epoch": 14.926762491444217, "grad_norm": 6.492047309875488, "learning_rate": 2.535616438356164e-07, "log_odds_chosen": 2.1933305263519287, "log_odds_ratio": -0.29156333208084106, "logits/chosen": 0.9983981251716614, "logits/rejected": 0.9906818866729736, "logps/chosen": -2.0483803749084473, "logps/rejected": -4.139101028442383, "loss": 0.7313, "nll_loss": 0.7021246552467346, "rewards/accuracies": 0.875, "rewards/chosen": -0.20483802258968353, "rewards/margins": 0.2090720534324646, "rewards/rejected": -0.4139100909233093, "step": 5452 }, { "epoch": 14.929500342231348, "grad_norm": 5.595330715179443, "learning_rate": 2.5342465753424656e-07, "log_odds_chosen": 2.38468599319458, "log_odds_ratio": -0.1959528625011444, "logits/chosen": 0.9536807537078857, "logits/rejected": 1.027014970779419, "logps/chosen": -2.060941457748413, "logps/rejected": -4.323853492736816, "loss": 0.5175, "nll_loss": 0.49789005517959595, "rewards/accuracies": 1.0, "rewards/chosen": -0.2060941457748413, "rewards/margins": 0.22629114985466003, "rewards/rejected": -0.43238532543182373, "step": 5453 }, { "epoch": 14.932238193018481, "grad_norm": 6.201740741729736, "learning_rate": 2.532876712328767e-07, "log_odds_chosen": 2.8081586360931396, "log_odds_ratio": -0.3268781304359436, "logits/chosen": 0.9872127175331116, "logits/rejected": 1.0237644910812378, "logps/chosen": -2.5030980110168457, "logps/rejected": -5.250778675079346, "loss": 0.5862, "nll_loss": 0.5535581707954407, "rewards/accuracies": 0.875, "rewards/chosen": -0.2503097653388977, "rewards/margins": 0.2747681140899658, "rewards/rejected": -0.5250778794288635, "step": 5454 }, { "epoch": 14.934976043805612, "grad_norm": 8.388989448547363, "learning_rate": 2.531506849315068e-07, "log_odds_chosen": 2.368698835372925, "log_odds_ratio": -0.3554791212081909, "logits/chosen": 0.79026198387146, "logits/rejected": 0.7183116674423218, "logps/chosen": -2.6283257007598877, "logps/rejected": -4.911915302276611, "loss": 0.7505, "nll_loss": 0.7149132490158081, "rewards/accuracies": 0.75, "rewards/chosen": -0.2628325819969177, "rewards/margins": 0.2283589392900467, "rewards/rejected": -0.4911915361881256, "step": 5455 }, { "epoch": 14.937713894592745, "grad_norm": 9.452272415161133, "learning_rate": 2.53013698630137e-07, "log_odds_chosen": 0.8097784519195557, "log_odds_ratio": -0.6208024621009827, "logits/chosen": 0.927640438079834, "logits/rejected": 0.8878082633018494, "logps/chosen": -2.9914278984069824, "logps/rejected": -3.7426486015319824, "loss": 0.7759, "nll_loss": 0.7138334512710571, "rewards/accuracies": 0.625, "rewards/chosen": -0.2991427779197693, "rewards/margins": 0.07512210309505463, "rewards/rejected": -0.3742648959159851, "step": 5456 }, { "epoch": 14.940451745379876, "grad_norm": 5.547165870666504, "learning_rate": 2.528767123287671e-07, "log_odds_chosen": 2.857844352722168, "log_odds_ratio": -0.190163716673851, "logits/chosen": 0.9109695553779602, "logits/rejected": 0.9674886465072632, "logps/chosen": -1.9065251350402832, "logps/rejected": -4.531986236572266, "loss": 0.6257, "nll_loss": 0.6066765189170837, "rewards/accuracies": 1.0, "rewards/chosen": -0.1906524896621704, "rewards/margins": 0.262546181678772, "rewards/rejected": -0.4531986713409424, "step": 5457 }, { "epoch": 14.94318959616701, "grad_norm": 5.530040264129639, "learning_rate": 2.527397260273972e-07, "log_odds_chosen": 1.7122023105621338, "log_odds_ratio": -0.221598282456398, "logits/chosen": 0.8152600526809692, "logits/rejected": 0.8136476278305054, "logps/chosen": -1.5778396129608154, "logps/rejected": -3.083508014678955, "loss": 0.5112, "nll_loss": 0.48904138803482056, "rewards/accuracies": 1.0, "rewards/chosen": -0.15778397023677826, "rewards/margins": 0.15056684613227844, "rewards/rejected": -0.3083508014678955, "step": 5458 }, { "epoch": 14.94592744695414, "grad_norm": 6.2089433670043945, "learning_rate": 2.5260273972602736e-07, "log_odds_chosen": 0.865883469581604, "log_odds_ratio": -0.41066721081733704, "logits/chosen": 0.6376131772994995, "logits/rejected": 0.630864143371582, "logps/chosen": -2.2164206504821777, "logps/rejected": -2.9919939041137695, "loss": 0.6082, "nll_loss": 0.5670948028564453, "rewards/accuracies": 0.875, "rewards/chosen": -0.22164204716682434, "rewards/margins": 0.07755733281373978, "rewards/rejected": -0.2991994023323059, "step": 5459 }, { "epoch": 14.948665297741274, "grad_norm": 5.148991584777832, "learning_rate": 2.524657534246575e-07, "log_odds_chosen": 2.914565086364746, "log_odds_ratio": -0.11937543004751205, "logits/chosen": 0.9316145181655884, "logits/rejected": 0.9591997861862183, "logps/chosen": -1.6406224966049194, "logps/rejected": -4.283865451812744, "loss": 0.6223, "nll_loss": 0.6104106307029724, "rewards/accuracies": 1.0, "rewards/chosen": -0.1640622466802597, "rewards/margins": 0.2643243074417114, "rewards/rejected": -0.42838653922080994, "step": 5460 }, { "epoch": 14.951403148528405, "grad_norm": 5.028526782989502, "learning_rate": 2.5232876712328767e-07, "log_odds_chosen": 2.337935447692871, "log_odds_ratio": -0.21211522817611694, "logits/chosen": 0.6800335049629211, "logits/rejected": 0.6561825275421143, "logps/chosen": -2.254807710647583, "logps/rejected": -4.498208045959473, "loss": 0.7121, "nll_loss": 0.6908406615257263, "rewards/accuracies": 1.0, "rewards/chosen": -0.22548078000545502, "rewards/margins": 0.22434002161026, "rewards/rejected": -0.4498208165168762, "step": 5461 }, { "epoch": 14.954140999315538, "grad_norm": 5.468724727630615, "learning_rate": 2.5219178082191777e-07, "log_odds_chosen": 2.0771453380584717, "log_odds_ratio": -0.24782153964042664, "logits/chosen": 0.8314419984817505, "logits/rejected": 0.8733503818511963, "logps/chosen": -2.0563278198242188, "logps/rejected": -4.03225564956665, "loss": 0.6128, "nll_loss": 0.5879830718040466, "rewards/accuracies": 0.875, "rewards/chosen": -0.2056327760219574, "rewards/margins": 0.19759276509284973, "rewards/rejected": -0.4032255709171295, "step": 5462 }, { "epoch": 14.956878850102669, "grad_norm": 10.334769248962402, "learning_rate": 2.5205479452054797e-07, "log_odds_chosen": 2.3087825775146484, "log_odds_ratio": -0.6545014381408691, "logits/chosen": 1.0685908794403076, "logits/rejected": 1.0873408317565918, "logps/chosen": -3.414262294769287, "logps/rejected": -5.607278823852539, "loss": 0.8143, "nll_loss": 0.7488300800323486, "rewards/accuracies": 0.875, "rewards/chosen": -0.3414262533187866, "rewards/margins": 0.21930159628391266, "rewards/rejected": -0.5607278347015381, "step": 5463 }, { "epoch": 14.959616700889802, "grad_norm": 5.049564838409424, "learning_rate": 2.5191780821917807e-07, "log_odds_chosen": 1.4023782014846802, "log_odds_ratio": -0.29604801535606384, "logits/chosen": 0.9622806310653687, "logits/rejected": 0.972865104675293, "logps/chosen": -2.0984132289886475, "logps/rejected": -3.3994035720825195, "loss": 0.6583, "nll_loss": 0.6286453008651733, "rewards/accuracies": 0.875, "rewards/chosen": -0.20984134078025818, "rewards/margins": 0.13009902834892273, "rewards/rejected": -0.3399403393268585, "step": 5464 }, { "epoch": 14.962354551676933, "grad_norm": 5.850058078765869, "learning_rate": 2.5178082191780817e-07, "log_odds_chosen": 2.9915318489074707, "log_odds_ratio": -0.26797908544540405, "logits/chosen": 0.9709512591362, "logits/rejected": 0.914829671382904, "logps/chosen": -1.6631669998168945, "logps/rejected": -4.463400840759277, "loss": 0.7382, "nll_loss": 0.7113785147666931, "rewards/accuracies": 0.875, "rewards/chosen": -0.1663166880607605, "rewards/margins": 0.28002336621284485, "rewards/rejected": -0.44634008407592773, "step": 5465 }, { "epoch": 14.965092402464066, "grad_norm": 6.438877105712891, "learning_rate": 2.516438356164384e-07, "log_odds_chosen": 2.0968079566955566, "log_odds_ratio": -0.46446603536605835, "logits/chosen": 0.8734140396118164, "logits/rejected": 0.8648564219474792, "logps/chosen": -2.434152841567993, "logps/rejected": -4.376289367675781, "loss": 0.6602, "nll_loss": 0.6137078404426575, "rewards/accuracies": 0.875, "rewards/chosen": -0.24341526627540588, "rewards/margins": 0.19421370327472687, "rewards/rejected": -0.43762895464897156, "step": 5466 }, { "epoch": 14.967830253251197, "grad_norm": 5.3809990882873535, "learning_rate": 2.515068493150685e-07, "log_odds_chosen": 2.2130377292633057, "log_odds_ratio": -0.16802287101745605, "logits/chosen": 0.8368756175041199, "logits/rejected": 0.8274916410446167, "logps/chosen": -1.7180290222167969, "logps/rejected": -3.749784469604492, "loss": 0.5761, "nll_loss": 0.5592567920684814, "rewards/accuracies": 1.0, "rewards/chosen": -0.17180289328098297, "rewards/margins": 0.20317554473876953, "rewards/rejected": -0.3749784231185913, "step": 5467 }, { "epoch": 14.97056810403833, "grad_norm": 4.978186130523682, "learning_rate": 2.5136986301369863e-07, "log_odds_chosen": 3.5987050533294678, "log_odds_ratio": -0.15095745027065277, "logits/chosen": 0.8290566802024841, "logits/rejected": 0.8545264601707458, "logps/chosen": -1.8656022548675537, "logps/rejected": -5.249558925628662, "loss": 0.622, "nll_loss": 0.6069227457046509, "rewards/accuracies": 1.0, "rewards/chosen": -0.18656021356582642, "rewards/margins": 0.3383956551551819, "rewards/rejected": -0.5249558687210083, "step": 5468 }, { "epoch": 14.973305954825461, "grad_norm": 5.478302001953125, "learning_rate": 2.512328767123287e-07, "log_odds_chosen": 2.988253116607666, "log_odds_ratio": -0.11413060128688812, "logits/chosen": 0.7436494827270508, "logits/rejected": 0.8002490997314453, "logps/chosen": -2.1274030208587646, "logps/rejected": -4.951852321624756, "loss": 0.6902, "nll_loss": 0.6788297295570374, "rewards/accuracies": 1.0, "rewards/chosen": -0.21274031698703766, "rewards/margins": 0.28244495391845703, "rewards/rejected": -0.4951852560043335, "step": 5469 }, { "epoch": 14.976043805612594, "grad_norm": 6.438102722167969, "learning_rate": 2.5109589041095893e-07, "log_odds_chosen": 2.3251209259033203, "log_odds_ratio": -0.4590180516242981, "logits/chosen": 0.8159775733947754, "logits/rejected": 0.8490266799926758, "logps/chosen": -2.677196979522705, "logps/rejected": -4.826021194458008, "loss": 0.7757, "nll_loss": 0.7297673225402832, "rewards/accuracies": 0.625, "rewards/chosen": -0.26771971583366394, "rewards/margins": 0.21488238871097565, "rewards/rejected": -0.4826021194458008, "step": 5470 }, { "epoch": 14.978781656399725, "grad_norm": 6.877730369567871, "learning_rate": 2.5095890410958903e-07, "log_odds_chosen": 1.0346359014511108, "log_odds_ratio": -0.3830533027648926, "logits/chosen": 0.8014805912971497, "logits/rejected": 0.7763773202896118, "logps/chosen": -2.627971649169922, "logps/rejected": -3.596996784210205, "loss": 0.6871, "nll_loss": 0.6487993597984314, "rewards/accuracies": 0.875, "rewards/chosen": -0.26279717683792114, "rewards/margins": 0.0969025269150734, "rewards/rejected": -0.35969969630241394, "step": 5471 }, { "epoch": 14.981519507186858, "grad_norm": 6.86460542678833, "learning_rate": 2.5082191780821913e-07, "log_odds_chosen": 1.341130018234253, "log_odds_ratio": -0.4931710362434387, "logits/chosen": 0.6412016153335571, "logits/rejected": 0.7231357097625732, "logps/chosen": -2.335880756378174, "logps/rejected": -3.543856143951416, "loss": 0.6589, "nll_loss": 0.6095573306083679, "rewards/accuracies": 0.875, "rewards/chosen": -0.2335880696773529, "rewards/margins": 0.1207975521683693, "rewards/rejected": -0.3543856143951416, "step": 5472 }, { "epoch": 14.984257357973991, "grad_norm": 4.948017597198486, "learning_rate": 2.5068493150684933e-07, "log_odds_chosen": 1.9607822895050049, "log_odds_ratio": -0.282332181930542, "logits/chosen": 0.6849493384361267, "logits/rejected": 0.6190627813339233, "logps/chosen": -2.0464751720428467, "logps/rejected": -3.9058966636657715, "loss": 0.6507, "nll_loss": 0.6224656105041504, "rewards/accuracies": 0.875, "rewards/chosen": -0.20464752614498138, "rewards/margins": 0.185942143201828, "rewards/rejected": -0.3905896544456482, "step": 5473 }, { "epoch": 14.986995208761122, "grad_norm": 5.703585147857666, "learning_rate": 2.5054794520547943e-07, "log_odds_chosen": 3.0951144695281982, "log_odds_ratio": -0.14262506365776062, "logits/chosen": 0.5956624150276184, "logits/rejected": 0.6369994878768921, "logps/chosen": -2.270432472229004, "logps/rejected": -5.195539474487305, "loss": 0.6102, "nll_loss": 0.5959193110466003, "rewards/accuracies": 1.0, "rewards/chosen": -0.2270432412624359, "rewards/margins": 0.2925107777118683, "rewards/rejected": -0.5195540189743042, "step": 5474 }, { "epoch": 14.989733059548255, "grad_norm": 5.408166408538818, "learning_rate": 2.504109589041096e-07, "log_odds_chosen": 1.4354684352874756, "log_odds_ratio": -0.3615794777870178, "logits/chosen": 0.8074439764022827, "logits/rejected": 0.8980876207351685, "logps/chosen": -2.608825206756592, "logps/rejected": -4.010466575622559, "loss": 0.6728, "nll_loss": 0.6366364359855652, "rewards/accuracies": 0.75, "rewards/chosen": -0.26088252663612366, "rewards/margins": 0.14016415178775787, "rewards/rejected": -0.4010466933250427, "step": 5475 }, { "epoch": 14.992470910335387, "grad_norm": 6.589406490325928, "learning_rate": 2.502739726027397e-07, "log_odds_chosen": 1.7457096576690674, "log_odds_ratio": -0.46153098344802856, "logits/chosen": 0.9549242854118347, "logits/rejected": 0.9106547236442566, "logps/chosen": -1.983397126197815, "logps/rejected": -3.549452781677246, "loss": 0.7046, "nll_loss": 0.6584653854370117, "rewards/accuracies": 0.875, "rewards/chosen": -0.19833971560001373, "rewards/margins": 0.1566055417060852, "rewards/rejected": -0.35494524240493774, "step": 5476 }, { "epoch": 14.99520876112252, "grad_norm": 6.138692378997803, "learning_rate": 2.501369863013699e-07, "log_odds_chosen": 0.9427992105484009, "log_odds_ratio": -0.4405769407749176, "logits/chosen": 0.7682211399078369, "logits/rejected": 0.699446439743042, "logps/chosen": -2.2264695167541504, "logps/rejected": -3.0261545181274414, "loss": 0.6311, "nll_loss": 0.5870399475097656, "rewards/accuracies": 0.875, "rewards/chosen": -0.22264696657657623, "rewards/margins": 0.07996849715709686, "rewards/rejected": -0.3026154637336731, "step": 5477 }, { "epoch": 14.99794661190965, "grad_norm": 5.85692024230957, "learning_rate": 2.5e-07, "log_odds_chosen": 1.5395845174789429, "log_odds_ratio": -0.4152125418186188, "logits/chosen": 0.7896416783332825, "logits/rejected": 0.687951385974884, "logps/chosen": -2.4892361164093018, "logps/rejected": -3.9179177284240723, "loss": 0.7274, "nll_loss": 0.6859065294265747, "rewards/accuracies": 0.75, "rewards/chosen": -0.2489236295223236, "rewards/margins": 0.14286814630031586, "rewards/rejected": -0.39179176092147827, "step": 5478 }, { "epoch": 15.000684462696784, "grad_norm": 4.987766265869141, "learning_rate": 2.4986301369863014e-07, "log_odds_chosen": 2.9882705211639404, "log_odds_ratio": -0.20000901818275452, "logits/chosen": 0.7434175610542297, "logits/rejected": 0.7446637153625488, "logps/chosen": -1.722031593322754, "logps/rejected": -4.5428466796875, "loss": 0.617, "nll_loss": 0.5969592928886414, "rewards/accuracies": 1.0, "rewards/chosen": -0.1722031682729721, "rewards/margins": 0.2820815443992615, "rewards/rejected": -0.4542847275733948, "step": 5479 }, { "epoch": 15.003422313483915, "grad_norm": 5.837924957275391, "learning_rate": 2.4972602739726024e-07, "log_odds_chosen": 1.3592422008514404, "log_odds_ratio": -0.5854213237762451, "logits/chosen": 0.6863939166069031, "logits/rejected": 0.7796091437339783, "logps/chosen": -2.6387319564819336, "logps/rejected": -3.8707315921783447, "loss": 0.6622, "nll_loss": 0.6036370992660522, "rewards/accuracies": 0.625, "rewards/chosen": -0.26387321949005127, "rewards/margins": 0.12319996953010559, "rewards/rejected": -0.3870731592178345, "step": 5480 }, { "epoch": 15.006160164271048, "grad_norm": 5.558077812194824, "learning_rate": 2.495890410958904e-07, "log_odds_chosen": 3.524294853210449, "log_odds_ratio": -0.0868084728717804, "logits/chosen": 0.845995306968689, "logits/rejected": 0.873245358467102, "logps/chosen": -2.4131407737731934, "logps/rejected": -5.771312713623047, "loss": 0.7341, "nll_loss": 0.72544264793396, "rewards/accuracies": 1.0, "rewards/chosen": -0.2413140833377838, "rewards/margins": 0.33581721782684326, "rewards/rejected": -0.5771312713623047, "step": 5481 }, { "epoch": 15.008898015058179, "grad_norm": 4.589591026306152, "learning_rate": 2.4945205479452054e-07, "log_odds_chosen": 2.634077310562134, "log_odds_ratio": -0.3298889994621277, "logits/chosen": 0.8122516870498657, "logits/rejected": 0.781273365020752, "logps/chosen": -2.092724323272705, "logps/rejected": -4.648747444152832, "loss": 0.6794, "nll_loss": 0.6464471817016602, "rewards/accuracies": 0.875, "rewards/chosen": -0.20927241444587708, "rewards/margins": 0.25560232996940613, "rewards/rejected": -0.4648747444152832, "step": 5482 }, { "epoch": 15.011635865845312, "grad_norm": 5.200397968292236, "learning_rate": 2.493150684931507e-07, "log_odds_chosen": 2.437617063522339, "log_odds_ratio": -0.13627304136753082, "logits/chosen": 0.9160346388816833, "logits/rejected": 0.9555341601371765, "logps/chosen": -2.1390483379364014, "logps/rejected": -4.443641185760498, "loss": 0.6154, "nll_loss": 0.6018085479736328, "rewards/accuracies": 1.0, "rewards/chosen": -0.21390484273433685, "rewards/margins": 0.2304593324661255, "rewards/rejected": -0.44436413049697876, "step": 5483 }, { "epoch": 15.014373716632443, "grad_norm": 6.828906059265137, "learning_rate": 2.491780821917808e-07, "log_odds_chosen": 2.4813663959503174, "log_odds_ratio": -0.34792962670326233, "logits/chosen": 0.9694907665252686, "logits/rejected": 1.08633291721344, "logps/chosen": -2.33721923828125, "logps/rejected": -4.643760681152344, "loss": 0.6401, "nll_loss": 0.6052583456039429, "rewards/accuracies": 0.875, "rewards/chosen": -0.23372195661067963, "rewards/margins": 0.23065416514873505, "rewards/rejected": -0.4643760919570923, "step": 5484 }, { "epoch": 15.017111567419576, "grad_norm": 4.506753921508789, "learning_rate": 2.4904109589041095e-07, "log_odds_chosen": 2.994690179824829, "log_odds_ratio": -0.09733764827251434, "logits/chosen": 0.7817524671554565, "logits/rejected": 0.8315153121948242, "logps/chosen": -1.7721799612045288, "logps/rejected": -4.478724002838135, "loss": 0.72, "nll_loss": 0.7102615833282471, "rewards/accuracies": 1.0, "rewards/chosen": -0.1772179901599884, "rewards/margins": 0.27065443992614746, "rewards/rejected": -0.44787243008613586, "step": 5485 }, { "epoch": 15.019849418206707, "grad_norm": 9.338467597961426, "learning_rate": 2.489041095890411e-07, "log_odds_chosen": 0.977816641330719, "log_odds_ratio": -0.6516863703727722, "logits/chosen": 0.7802422046661377, "logits/rejected": 0.7864569425582886, "logps/chosen": -3.2570812702178955, "logps/rejected": -4.187495231628418, "loss": 0.9758, "nll_loss": 0.910667896270752, "rewards/accuracies": 0.75, "rewards/chosen": -0.32570815086364746, "rewards/margins": 0.09304137527942657, "rewards/rejected": -0.41874951124191284, "step": 5486 }, { "epoch": 15.02258726899384, "grad_norm": 6.374089241027832, "learning_rate": 2.487671232876712e-07, "log_odds_chosen": 1.4399158954620361, "log_odds_ratio": -0.4368734657764435, "logits/chosen": 0.6810266375541687, "logits/rejected": 0.7111603617668152, "logps/chosen": -2.4555864334106445, "logps/rejected": -3.8607354164123535, "loss": 0.7156, "nll_loss": 0.6718647480010986, "rewards/accuracies": 0.75, "rewards/chosen": -0.24555861949920654, "rewards/margins": 0.14051492512226105, "rewards/rejected": -0.38607358932495117, "step": 5487 }, { "epoch": 15.025325119780971, "grad_norm": 4.8619489669799805, "learning_rate": 2.4863013698630135e-07, "log_odds_chosen": 2.114867925643921, "log_odds_ratio": -0.33532387018203735, "logits/chosen": 0.6542277932167053, "logits/rejected": 0.7391834855079651, "logps/chosen": -1.946724772453308, "logps/rejected": -3.957782506942749, "loss": 0.561, "nll_loss": 0.5274183750152588, "rewards/accuracies": 0.875, "rewards/chosen": -0.19467249512672424, "rewards/margins": 0.20110578835010529, "rewards/rejected": -0.39577826857566833, "step": 5488 }, { "epoch": 15.028062970568104, "grad_norm": 5.329531192779541, "learning_rate": 2.484931506849315e-07, "log_odds_chosen": 1.3817609548568726, "log_odds_ratio": -0.41566821932792664, "logits/chosen": 0.8608773946762085, "logits/rejected": 0.821448564529419, "logps/chosen": -1.5714726448059082, "logps/rejected": -2.8538284301757812, "loss": 0.6828, "nll_loss": 0.641189455986023, "rewards/accuracies": 0.875, "rewards/chosen": -0.15714725852012634, "rewards/margins": 0.1282355636358261, "rewards/rejected": -0.28538280725479126, "step": 5489 }, { "epoch": 15.030800821355236, "grad_norm": 4.4158220291137695, "learning_rate": 2.4835616438356165e-07, "log_odds_chosen": 3.550431489944458, "log_odds_ratio": -0.162079319357872, "logits/chosen": 0.7477801442146301, "logits/rejected": 0.7092704176902771, "logps/chosen": -1.6453748941421509, "logps/rejected": -4.960221767425537, "loss": 0.5839, "nll_loss": 0.5677170753479004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1645374894142151, "rewards/margins": 0.33148473501205444, "rewards/rejected": -0.49602219462394714, "step": 5490 }, { "epoch": 15.033538672142368, "grad_norm": 5.348583698272705, "learning_rate": 2.482191780821918e-07, "log_odds_chosen": 2.480376958847046, "log_odds_ratio": -0.20759323239326477, "logits/chosen": 0.7948718070983887, "logits/rejected": 0.8442625403404236, "logps/chosen": -2.014106273651123, "logps/rejected": -4.335341930389404, "loss": 0.6742, "nll_loss": 0.6534059047698975, "rewards/accuracies": 1.0, "rewards/chosen": -0.20141065120697021, "rewards/margins": 0.23212355375289917, "rewards/rejected": -0.4335342049598694, "step": 5491 }, { "epoch": 15.0362765229295, "grad_norm": 5.137892246246338, "learning_rate": 2.480821917808219e-07, "log_odds_chosen": 2.5376482009887695, "log_odds_ratio": -0.20116709172725677, "logits/chosen": 0.7868147492408752, "logits/rejected": 0.8329951763153076, "logps/chosen": -2.048165798187256, "logps/rejected": -4.4261651039123535, "loss": 0.6893, "nll_loss": 0.6691766381263733, "rewards/accuracies": 1.0, "rewards/chosen": -0.2048165500164032, "rewards/margins": 0.23779994249343872, "rewards/rejected": -0.44261646270751953, "step": 5492 }, { "epoch": 15.039014373716633, "grad_norm": 6.106042385101318, "learning_rate": 2.4794520547945206e-07, "log_odds_chosen": 1.4369397163391113, "log_odds_ratio": -0.4787617027759552, "logits/chosen": 0.93230140209198, "logits/rejected": 1.0167019367218018, "logps/chosen": -2.223231315612793, "logps/rejected": -3.568798542022705, "loss": 0.6431, "nll_loss": 0.595262885093689, "rewards/accuracies": 0.625, "rewards/chosen": -0.22232313454151154, "rewards/margins": 0.13455672562122345, "rewards/rejected": -0.356879860162735, "step": 5493 }, { "epoch": 15.041752224503764, "grad_norm": 5.048480987548828, "learning_rate": 2.4780821917808216e-07, "log_odds_chosen": 1.6120890378952026, "log_odds_ratio": -0.35803771018981934, "logits/chosen": 0.6055964231491089, "logits/rejected": 0.6464568376541138, "logps/chosen": -1.8470573425292969, "logps/rejected": -3.298187255859375, "loss": 0.6024, "nll_loss": 0.5665829181671143, "rewards/accuracies": 0.75, "rewards/chosen": -0.1847057193517685, "rewards/margins": 0.1451129913330078, "rewards/rejected": -0.3298187255859375, "step": 5494 }, { "epoch": 15.044490075290897, "grad_norm": 5.146686553955078, "learning_rate": 2.476712328767123e-07, "log_odds_chosen": 1.7075707912445068, "log_odds_ratio": -0.2283955067396164, "logits/chosen": 0.8895966410636902, "logits/rejected": 0.9034574627876282, "logps/chosen": -1.784576177597046, "logps/rejected": -3.3597660064697266, "loss": 0.5256, "nll_loss": 0.5027165412902832, "rewards/accuracies": 1.0, "rewards/chosen": -0.1784576177597046, "rewards/margins": 0.15751898288726807, "rewards/rejected": -0.33597660064697266, "step": 5495 }, { "epoch": 15.047227926078028, "grad_norm": 5.861409664154053, "learning_rate": 2.4753424657534246e-07, "log_odds_chosen": 2.3324756622314453, "log_odds_ratio": -0.19648906588554382, "logits/chosen": 0.9146848320960999, "logits/rejected": 1.002974510192871, "logps/chosen": -2.7517080307006836, "logps/rejected": -4.961709022521973, "loss": 0.7062, "nll_loss": 0.6865338683128357, "rewards/accuracies": 1.0, "rewards/chosen": -0.27517080307006836, "rewards/margins": 0.22100010514259338, "rewards/rejected": -0.49617090821266174, "step": 5496 }, { "epoch": 15.049965776865161, "grad_norm": 6.360289573669434, "learning_rate": 2.473972602739726e-07, "log_odds_chosen": 1.5387349128723145, "log_odds_ratio": -0.2886638045310974, "logits/chosen": 1.1526529788970947, "logits/rejected": 1.160447120666504, "logps/chosen": -1.569077968597412, "logps/rejected": -2.9263222217559814, "loss": 0.5312, "nll_loss": 0.5023822784423828, "rewards/accuracies": 0.875, "rewards/chosen": -0.1569077968597412, "rewards/margins": 0.13572444021701813, "rewards/rejected": -0.29263225197792053, "step": 5497 }, { "epoch": 15.052703627652292, "grad_norm": 6.212764739990234, "learning_rate": 2.4726027397260277e-07, "log_odds_chosen": 2.1996591091156006, "log_odds_ratio": -0.21208862960338593, "logits/chosen": 0.7228773832321167, "logits/rejected": 0.6513442397117615, "logps/chosen": -2.2240734100341797, "logps/rejected": -4.3029398918151855, "loss": 0.7929, "nll_loss": 0.7716598510742188, "rewards/accuracies": 1.0, "rewards/chosen": -0.22240734100341797, "rewards/margins": 0.20788662135601044, "rewards/rejected": -0.4302939772605896, "step": 5498 }, { "epoch": 15.055441478439425, "grad_norm": 5.084736347198486, "learning_rate": 2.4712328767123286e-07, "log_odds_chosen": 2.174753189086914, "log_odds_ratio": -0.20349496603012085, "logits/chosen": 0.8224164843559265, "logits/rejected": 0.892329752445221, "logps/chosen": -2.0312564373016357, "logps/rejected": -4.038907051086426, "loss": 0.7625, "nll_loss": 0.7421756982803345, "rewards/accuracies": 1.0, "rewards/chosen": -0.20312562584877014, "rewards/margins": 0.20076507329940796, "rewards/rejected": -0.4038907289505005, "step": 5499 }, { "epoch": 15.058179329226558, "grad_norm": 5.560783386230469, "learning_rate": 2.46986301369863e-07, "log_odds_chosen": 3.0987071990966797, "log_odds_ratio": -0.19628416001796722, "logits/chosen": 0.5768315196037292, "logits/rejected": 0.6024351119995117, "logps/chosen": -2.520479917526245, "logps/rejected": -5.4956583976745605, "loss": 0.747, "nll_loss": 0.7273281216621399, "rewards/accuracies": 1.0, "rewards/chosen": -0.2520480155944824, "rewards/margins": 0.2975178360939026, "rewards/rejected": -0.549565851688385, "step": 5500 }, { "epoch": 15.06091718001369, "grad_norm": 6.1612958908081055, "learning_rate": 2.468493150684931e-07, "log_odds_chosen": 3.7398271560668945, "log_odds_ratio": -0.3167644143104553, "logits/chosen": 0.8162314891815186, "logits/rejected": 0.8708188533782959, "logps/chosen": -2.258789300918579, "logps/rejected": -5.900076866149902, "loss": 0.7124, "nll_loss": 0.680725634098053, "rewards/accuracies": 0.875, "rewards/chosen": -0.22587892413139343, "rewards/margins": 0.3641287386417389, "rewards/rejected": -0.5900076627731323, "step": 5501 }, { "epoch": 15.063655030800822, "grad_norm": 5.452856063842773, "learning_rate": 2.4671232876712327e-07, "log_odds_chosen": 1.7590041160583496, "log_odds_ratio": -0.22308234870433807, "logits/chosen": 0.7712078094482422, "logits/rejected": 0.830321192741394, "logps/chosen": -2.021669864654541, "logps/rejected": -3.629413366317749, "loss": 0.6224, "nll_loss": 0.6000645160675049, "rewards/accuracies": 1.0, "rewards/chosen": -0.20216698944568634, "rewards/margins": 0.1607743501663208, "rewards/rejected": -0.36294132471084595, "step": 5502 }, { "epoch": 15.066392881587953, "grad_norm": 5.168088436126709, "learning_rate": 2.465753424657534e-07, "log_odds_chosen": 3.554211378097534, "log_odds_ratio": -0.07445685565471649, "logits/chosen": 1.0314831733703613, "logits/rejected": 1.0785608291625977, "logps/chosen": -2.7146172523498535, "logps/rejected": -6.134446144104004, "loss": 0.7015, "nll_loss": 0.694037914276123, "rewards/accuracies": 1.0, "rewards/chosen": -0.27146172523498535, "rewards/margins": 0.341982901096344, "rewards/rejected": -0.6134446263313293, "step": 5503 }, { "epoch": 15.069130732375086, "grad_norm": 5.334762096405029, "learning_rate": 2.4643835616438357e-07, "log_odds_chosen": 2.841973304748535, "log_odds_ratio": -0.1954634189605713, "logits/chosen": 0.9966921806335449, "logits/rejected": 0.9220473170280457, "logps/chosen": -2.120335817337036, "logps/rejected": -4.834101676940918, "loss": 0.6273, "nll_loss": 0.6077966094017029, "rewards/accuracies": 1.0, "rewards/chosen": -0.21203358471393585, "rewards/margins": 0.2713765799999237, "rewards/rejected": -0.48341014981269836, "step": 5504 }, { "epoch": 15.071868583162217, "grad_norm": 5.831600666046143, "learning_rate": 2.4630136986301367e-07, "log_odds_chosen": 1.7195854187011719, "log_odds_ratio": -0.33150720596313477, "logits/chosen": 1.2000433206558228, "logits/rejected": 1.2434788942337036, "logps/chosen": -3.066223382949829, "logps/rejected": -4.680267333984375, "loss": 0.6529, "nll_loss": 0.61976158618927, "rewards/accuracies": 0.875, "rewards/chosen": -0.30662235617637634, "rewards/margins": 0.16140441596508026, "rewards/rejected": -0.4680267572402954, "step": 5505 }, { "epoch": 15.07460643394935, "grad_norm": 4.493821144104004, "learning_rate": 2.461643835616438e-07, "log_odds_chosen": 4.163623332977295, "log_odds_ratio": -0.10260384529829025, "logits/chosen": 0.6062373518943787, "logits/rejected": 0.6440010070800781, "logps/chosen": -1.264804720878601, "logps/rejected": -5.07403039932251, "loss": 0.5946, "nll_loss": 0.5843387246131897, "rewards/accuracies": 1.0, "rewards/chosen": -0.12648046016693115, "rewards/margins": 0.3809226155281067, "rewards/rejected": -0.5074030756950378, "step": 5506 }, { "epoch": 15.077344284736482, "grad_norm": 7.8839874267578125, "learning_rate": 2.46027397260274e-07, "log_odds_chosen": 0.8634188175201416, "log_odds_ratio": -0.5837033987045288, "logits/chosen": 0.9334315061569214, "logits/rejected": 0.9379321336746216, "logps/chosen": -2.4698662757873535, "logps/rejected": -3.2535550594329834, "loss": 0.6709, "nll_loss": 0.6125562787055969, "rewards/accuracies": 0.75, "rewards/chosen": -0.24698661267757416, "rewards/margins": 0.0783688873052597, "rewards/rejected": -0.32535552978515625, "step": 5507 }, { "epoch": 15.080082135523615, "grad_norm": 12.241780281066895, "learning_rate": 2.458904109589041e-07, "log_odds_chosen": 3.9932830333709717, "log_odds_ratio": -0.0754256621003151, "logits/chosen": 0.845228910446167, "logits/rejected": 0.8953640460968018, "logps/chosen": -2.785271644592285, "logps/rejected": -6.651684761047363, "loss": 0.7701, "nll_loss": 0.7625874280929565, "rewards/accuracies": 1.0, "rewards/chosen": -0.278527170419693, "rewards/margins": 0.386641263961792, "rewards/rejected": -0.6651684641838074, "step": 5508 }, { "epoch": 15.082819986310746, "grad_norm": 6.873237133026123, "learning_rate": 2.4575342465753423e-07, "log_odds_chosen": 1.6241121292114258, "log_odds_ratio": -0.25882914662361145, "logits/chosen": 0.9864309430122375, "logits/rejected": 0.873965859413147, "logps/chosen": -2.553544521331787, "logps/rejected": -4.065428733825684, "loss": 0.6344, "nll_loss": 0.608527660369873, "rewards/accuracies": 1.0, "rewards/chosen": -0.2553544342517853, "rewards/margins": 0.1511884480714798, "rewards/rejected": -0.40654289722442627, "step": 5509 }, { "epoch": 15.085557837097879, "grad_norm": 5.509394645690918, "learning_rate": 2.456164383561644e-07, "log_odds_chosen": 2.660306453704834, "log_odds_ratio": -0.17434529960155487, "logits/chosen": 0.7952964305877686, "logits/rejected": 0.7968006134033203, "logps/chosen": -2.5619399547576904, "logps/rejected": -5.115534782409668, "loss": 0.6514, "nll_loss": 0.633928656578064, "rewards/accuracies": 1.0, "rewards/chosen": -0.25619399547576904, "rewards/margins": 0.2553594708442688, "rewards/rejected": -0.5115534663200378, "step": 5510 }, { "epoch": 15.08829568788501, "grad_norm": 4.702223300933838, "learning_rate": 2.4547945205479453e-07, "log_odds_chosen": 1.7950987815856934, "log_odds_ratio": -0.2591155767440796, "logits/chosen": 0.791289746761322, "logits/rejected": 0.7787763476371765, "logps/chosen": -2.3591151237487793, "logps/rejected": -4.049903392791748, "loss": 0.6052, "nll_loss": 0.5792862176895142, "rewards/accuracies": 1.0, "rewards/chosen": -0.2359115034341812, "rewards/margins": 0.16907882690429688, "rewards/rejected": -0.4049903154373169, "step": 5511 }, { "epoch": 15.091033538672143, "grad_norm": 5.4391255378723145, "learning_rate": 2.4534246575342463e-07, "log_odds_chosen": 2.4954795837402344, "log_odds_ratio": -0.1924598515033722, "logits/chosen": 0.7316588759422302, "logits/rejected": 0.7526840567588806, "logps/chosen": -1.6967833042144775, "logps/rejected": -4.007778167724609, "loss": 0.59, "nll_loss": 0.5707327127456665, "rewards/accuracies": 1.0, "rewards/chosen": -0.16967833042144775, "rewards/margins": 0.231099471449852, "rewards/rejected": -0.40077781677246094, "step": 5512 }, { "epoch": 15.093771389459274, "grad_norm": 5.049811363220215, "learning_rate": 2.452054794520548e-07, "log_odds_chosen": 3.0689549446105957, "log_odds_ratio": -0.1704929769039154, "logits/chosen": 0.7643560171127319, "logits/rejected": 0.7585222721099854, "logps/chosen": -2.3207554817199707, "logps/rejected": -5.2633161544799805, "loss": 0.6145, "nll_loss": 0.5974622964859009, "rewards/accuracies": 0.875, "rewards/chosen": -0.2320755422115326, "rewards/margins": 0.2942560911178589, "rewards/rejected": -0.5263316631317139, "step": 5513 }, { "epoch": 15.096509240246407, "grad_norm": 6.030389308929443, "learning_rate": 2.4506849315068493e-07, "log_odds_chosen": 2.013129472732544, "log_odds_ratio": -0.2982068955898285, "logits/chosen": 0.7535926699638367, "logits/rejected": 0.742820143699646, "logps/chosen": -1.620295524597168, "logps/rejected": -3.505394220352173, "loss": 0.5955, "nll_loss": 0.5656954646110535, "rewards/accuracies": 1.0, "rewards/chosen": -0.16202956438064575, "rewards/margins": 0.18850985169410706, "rewards/rejected": -0.3505394160747528, "step": 5514 }, { "epoch": 15.099247091033538, "grad_norm": 5.479750156402588, "learning_rate": 2.4493150684931503e-07, "log_odds_chosen": 3.95276141166687, "log_odds_ratio": -0.07332644611597061, "logits/chosen": 0.7928391695022583, "logits/rejected": 0.8208787441253662, "logps/chosen": -2.2741315364837646, "logps/rejected": -6.065021991729736, "loss": 0.6499, "nll_loss": 0.6425445675849915, "rewards/accuracies": 1.0, "rewards/chosen": -0.22741316258907318, "rewards/margins": 0.3790890872478485, "rewards/rejected": -0.6065022945404053, "step": 5515 }, { "epoch": 15.101984941820671, "grad_norm": 6.010155200958252, "learning_rate": 2.447945205479452e-07, "log_odds_chosen": 2.815737724304199, "log_odds_ratio": -0.299056738615036, "logits/chosen": 0.9069526791572571, "logits/rejected": 0.9563097953796387, "logps/chosen": -2.46346378326416, "logps/rejected": -5.201963424682617, "loss": 0.7615, "nll_loss": 0.7315647006034851, "rewards/accuracies": 0.875, "rewards/chosen": -0.2463463693857193, "rewards/margins": 0.2738499939441681, "rewards/rejected": -0.5201963782310486, "step": 5516 }, { "epoch": 15.104722792607802, "grad_norm": 8.169658660888672, "learning_rate": 2.4465753424657534e-07, "log_odds_chosen": 1.7533204555511475, "log_odds_ratio": -0.42910587787628174, "logits/chosen": 1.017659306526184, "logits/rejected": 1.0553092956542969, "logps/chosen": -2.9972665309906006, "logps/rejected": -4.633950710296631, "loss": 0.7377, "nll_loss": 0.6948245167732239, "rewards/accuracies": 0.75, "rewards/chosen": -0.299726665019989, "rewards/margins": 0.1636684238910675, "rewards/rejected": -0.46339505910873413, "step": 5517 }, { "epoch": 15.107460643394935, "grad_norm": 4.86779260635376, "learning_rate": 2.445205479452055e-07, "log_odds_chosen": 1.910998821258545, "log_odds_ratio": -0.25928276777267456, "logits/chosen": 0.7670307755470276, "logits/rejected": 0.6985811591148376, "logps/chosen": -1.812847375869751, "logps/rejected": -3.6050705909729004, "loss": 0.6644, "nll_loss": 0.6384294033050537, "rewards/accuracies": 0.875, "rewards/chosen": -0.18128474056720734, "rewards/margins": 0.17922231554985046, "rewards/rejected": -0.360507071018219, "step": 5518 }, { "epoch": 15.110198494182066, "grad_norm": 4.8990349769592285, "learning_rate": 2.443835616438356e-07, "log_odds_chosen": 2.211198329925537, "log_odds_ratio": -0.21728035807609558, "logits/chosen": 0.6186002492904663, "logits/rejected": 0.6476767063140869, "logps/chosen": -2.13657808303833, "logps/rejected": -4.225836753845215, "loss": 0.6702, "nll_loss": 0.6484247446060181, "rewards/accuracies": 1.0, "rewards/chosen": -0.21365781128406525, "rewards/margins": 0.20892590284347534, "rewards/rejected": -0.4225836992263794, "step": 5519 }, { "epoch": 15.1129363449692, "grad_norm": 4.869511127471924, "learning_rate": 2.4424657534246574e-07, "log_odds_chosen": 1.8566254377365112, "log_odds_ratio": -0.2716837227344513, "logits/chosen": 0.7887314558029175, "logits/rejected": 0.8377839922904968, "logps/chosen": -2.0357139110565186, "logps/rejected": -3.7668190002441406, "loss": 0.6701, "nll_loss": 0.642920196056366, "rewards/accuracies": 0.875, "rewards/chosen": -0.2035713791847229, "rewards/margins": 0.17311052978038788, "rewards/rejected": -0.376681923866272, "step": 5520 }, { "epoch": 15.11567419575633, "grad_norm": 4.688965320587158, "learning_rate": 2.441095890410959e-07, "log_odds_chosen": 2.7012572288513184, "log_odds_ratio": -0.1387680470943451, "logits/chosen": 1.0131901502609253, "logits/rejected": 0.9999744892120361, "logps/chosen": -1.7216557264328003, "logps/rejected": -4.235751152038574, "loss": 0.6035, "nll_loss": 0.589624285697937, "rewards/accuracies": 1.0, "rewards/chosen": -0.17216557264328003, "rewards/margins": 0.25140953063964844, "rewards/rejected": -0.42357510328292847, "step": 5521 }, { "epoch": 15.118412046543463, "grad_norm": 5.788275241851807, "learning_rate": 2.4397260273972604e-07, "log_odds_chosen": 0.8662071228027344, "log_odds_ratio": -0.4009895324707031, "logits/chosen": 0.9783904552459717, "logits/rejected": 0.8805687427520752, "logps/chosen": -1.634738564491272, "logps/rejected": -2.360797643661499, "loss": 0.6234, "nll_loss": 0.5832861065864563, "rewards/accuracies": 0.75, "rewards/chosen": -0.16347385942935944, "rewards/margins": 0.0726059079170227, "rewards/rejected": -0.23607978224754333, "step": 5522 }, { "epoch": 15.121149897330595, "grad_norm": 6.267362594604492, "learning_rate": 2.4383561643835614e-07, "log_odds_chosen": 3.2786436080932617, "log_odds_ratio": -0.2985988259315491, "logits/chosen": 0.7617312669754028, "logits/rejected": 0.767288327217102, "logps/chosen": -2.4867568016052246, "logps/rejected": -5.633831024169922, "loss": 0.7132, "nll_loss": 0.6833249926567078, "rewards/accuracies": 0.875, "rewards/chosen": -0.24867567420005798, "rewards/margins": 0.314707487821579, "rewards/rejected": -0.5633831024169922, "step": 5523 }, { "epoch": 15.123887748117728, "grad_norm": 4.870014667510986, "learning_rate": 2.436986301369863e-07, "log_odds_chosen": 1.6988084316253662, "log_odds_ratio": -0.20542873442173004, "logits/chosen": 1.0207531452178955, "logits/rejected": 1.020371437072754, "logps/chosen": -2.3371410369873047, "logps/rejected": -3.9340784549713135, "loss": 0.6892, "nll_loss": 0.6686338186264038, "rewards/accuracies": 1.0, "rewards/chosen": -0.23371413350105286, "rewards/margins": 0.15969368815422058, "rewards/rejected": -0.39340782165527344, "step": 5524 }, { "epoch": 15.126625598904859, "grad_norm": 4.8588690757751465, "learning_rate": 2.435616438356164e-07, "log_odds_chosen": 2.698756694793701, "log_odds_ratio": -0.12190095335245132, "logits/chosen": 0.8627983331680298, "logits/rejected": 0.9123340249061584, "logps/chosen": -1.8558549880981445, "logps/rejected": -4.316107273101807, "loss": 0.5661, "nll_loss": 0.5539453029632568, "rewards/accuracies": 1.0, "rewards/chosen": -0.18558551371097565, "rewards/margins": 0.2460252344608307, "rewards/rejected": -0.43161073327064514, "step": 5525 }, { "epoch": 15.129363449691992, "grad_norm": 6.262192249298096, "learning_rate": 2.4342465753424655e-07, "log_odds_chosen": 1.7282295227050781, "log_odds_ratio": -0.2874535322189331, "logits/chosen": 0.8820412158966064, "logits/rejected": 0.9326350688934326, "logps/chosen": -1.9358717203140259, "logps/rejected": -3.5463144779205322, "loss": 0.5391, "nll_loss": 0.5103530287742615, "rewards/accuracies": 0.875, "rewards/chosen": -0.19358716905117035, "rewards/margins": 0.16104426980018616, "rewards/rejected": -0.3546314537525177, "step": 5526 }, { "epoch": 15.132101300479125, "grad_norm": 5.458600044250488, "learning_rate": 2.432876712328767e-07, "log_odds_chosen": 3.3361082077026367, "log_odds_ratio": -0.13615165650844574, "logits/chosen": 0.9909824132919312, "logits/rejected": 1.0680416822433472, "logps/chosen": -2.321519136428833, "logps/rejected": -5.467991828918457, "loss": 0.7395, "nll_loss": 0.7258509993553162, "rewards/accuracies": 1.0, "rewards/chosen": -0.23215192556381226, "rewards/margins": 0.31464725732803345, "rewards/rejected": -0.5467991828918457, "step": 5527 }, { "epoch": 15.134839151266256, "grad_norm": 5.276974201202393, "learning_rate": 2.4315068493150685e-07, "log_odds_chosen": 2.648078680038452, "log_odds_ratio": -0.28869739174842834, "logits/chosen": 1.1974513530731201, "logits/rejected": 1.1103743314743042, "logps/chosen": -1.6142147779464722, "logps/rejected": -4.078516960144043, "loss": 0.6057, "nll_loss": 0.5768171548843384, "rewards/accuracies": 0.875, "rewards/chosen": -0.16142147779464722, "rewards/margins": 0.2464302033185959, "rewards/rejected": -0.4078516662120819, "step": 5528 }, { "epoch": 15.137577002053389, "grad_norm": 5.850637912750244, "learning_rate": 2.43013698630137e-07, "log_odds_chosen": 2.159227132797241, "log_odds_ratio": -0.30736833810806274, "logits/chosen": 0.8494001626968384, "logits/rejected": 0.8292931914329529, "logps/chosen": -2.437013864517212, "logps/rejected": -4.48760986328125, "loss": 0.7717, "nll_loss": 0.7409592866897583, "rewards/accuracies": 0.75, "rewards/chosen": -0.24370139837265015, "rewards/margins": 0.20505955815315247, "rewards/rejected": -0.4487609565258026, "step": 5529 }, { "epoch": 15.14031485284052, "grad_norm": 5.118168830871582, "learning_rate": 2.428767123287671e-07, "log_odds_chosen": 2.983734607696533, "log_odds_ratio": -0.2776032090187073, "logits/chosen": 0.9507029056549072, "logits/rejected": 0.9420560598373413, "logps/chosen": -2.4235410690307617, "logps/rejected": -5.322829723358154, "loss": 0.7404, "nll_loss": 0.7126296162605286, "rewards/accuracies": 0.875, "rewards/chosen": -0.2423541098833084, "rewards/margins": 0.2899288833141327, "rewards/rejected": -0.5322830080986023, "step": 5530 }, { "epoch": 15.143052703627653, "grad_norm": 8.381291389465332, "learning_rate": 2.4273972602739725e-07, "log_odds_chosen": 1.3398059606552124, "log_odds_ratio": -0.6833296418190002, "logits/chosen": 0.7997984886169434, "logits/rejected": 0.8162920475006104, "logps/chosen": -2.7615256309509277, "logps/rejected": -3.9605979919433594, "loss": 0.645, "nll_loss": 0.5766663551330566, "rewards/accuracies": 0.75, "rewards/chosen": -0.2761525809764862, "rewards/margins": 0.11990721523761749, "rewards/rejected": -0.3960598111152649, "step": 5531 }, { "epoch": 15.145790554414784, "grad_norm": 5.657228946685791, "learning_rate": 2.4260273972602735e-07, "log_odds_chosen": 2.6406850814819336, "log_odds_ratio": -0.1806846261024475, "logits/chosen": 0.9278774261474609, "logits/rejected": 0.9873579740524292, "logps/chosen": -2.61600923538208, "logps/rejected": -5.142573356628418, "loss": 0.6204, "nll_loss": 0.6023054122924805, "rewards/accuracies": 1.0, "rewards/chosen": -0.26160091161727905, "rewards/margins": 0.25265640020370483, "rewards/rejected": -0.5142573714256287, "step": 5532 }, { "epoch": 15.148528405201917, "grad_norm": 4.687076568603516, "learning_rate": 2.424657534246575e-07, "log_odds_chosen": 1.305551290512085, "log_odds_ratio": -0.37356793880462646, "logits/chosen": 0.8918941020965576, "logits/rejected": 0.9253535866737366, "logps/chosen": -1.9709440469741821, "logps/rejected": -3.1928200721740723, "loss": 0.6509, "nll_loss": 0.6135330200195312, "rewards/accuracies": 0.75, "rewards/chosen": -0.1970943957567215, "rewards/margins": 0.12218759208917618, "rewards/rejected": -0.31928199529647827, "step": 5533 }, { "epoch": 15.151266255989048, "grad_norm": 6.727692127227783, "learning_rate": 2.4232876712328766e-07, "log_odds_chosen": 3.238217353820801, "log_odds_ratio": -0.1527237892150879, "logits/chosen": 0.9243718385696411, "logits/rejected": 0.9758413434028625, "logps/chosen": -1.7792655229568481, "logps/rejected": -4.835531234741211, "loss": 0.7243, "nll_loss": 0.7089970111846924, "rewards/accuracies": 1.0, "rewards/chosen": -0.17792655527591705, "rewards/margins": 0.3056265711784363, "rewards/rejected": -0.4835531413555145, "step": 5534 }, { "epoch": 15.154004106776181, "grad_norm": 5.65268611907959, "learning_rate": 2.421917808219178e-07, "log_odds_chosen": 2.7084338665008545, "log_odds_ratio": -0.1332138478755951, "logits/chosen": 0.9398033618927002, "logits/rejected": 1.0222138166427612, "logps/chosen": -1.9469517469406128, "logps/rejected": -4.463242530822754, "loss": 0.6067, "nll_loss": 0.5933818221092224, "rewards/accuracies": 1.0, "rewards/chosen": -0.19469517469406128, "rewards/margins": 0.251629114151001, "rewards/rejected": -0.44632428884506226, "step": 5535 }, { "epoch": 15.156741957563312, "grad_norm": 4.725836277008057, "learning_rate": 2.4205479452054796e-07, "log_odds_chosen": 2.3603053092956543, "log_odds_ratio": -0.2281198650598526, "logits/chosen": 0.9527186155319214, "logits/rejected": 0.875836968421936, "logps/chosen": -1.9387637376785278, "logps/rejected": -4.154919147491455, "loss": 0.6588, "nll_loss": 0.6359575986862183, "rewards/accuracies": 1.0, "rewards/chosen": -0.19387638568878174, "rewards/margins": 0.2216155230998993, "rewards/rejected": -0.41549190878868103, "step": 5536 }, { "epoch": 15.159479808350445, "grad_norm": 6.714807033538818, "learning_rate": 2.4191780821917806e-07, "log_odds_chosen": 0.8335573673248291, "log_odds_ratio": -0.4010424017906189, "logits/chosen": 0.8396333456039429, "logits/rejected": 0.738525927066803, "logps/chosen": -3.3155877590179443, "logps/rejected": -4.056989669799805, "loss": 0.6947, "nll_loss": 0.654548168182373, "rewards/accuracies": 1.0, "rewards/chosen": -0.3315587639808655, "rewards/margins": 0.07414016127586365, "rewards/rejected": -0.4056989252567291, "step": 5537 }, { "epoch": 15.162217659137577, "grad_norm": 5.628910064697266, "learning_rate": 2.417808219178082e-07, "log_odds_chosen": 1.3963454961776733, "log_odds_ratio": -0.3814927637577057, "logits/chosen": 0.8056789040565491, "logits/rejected": 0.8448119163513184, "logps/chosen": -2.314661741256714, "logps/rejected": -3.6553192138671875, "loss": 0.6283, "nll_loss": 0.5901416540145874, "rewards/accuracies": 0.75, "rewards/chosen": -0.2314661741256714, "rewards/margins": 0.13406574726104736, "rewards/rejected": -0.36553192138671875, "step": 5538 }, { "epoch": 15.16495550992471, "grad_norm": 8.670825004577637, "learning_rate": 2.416438356164383e-07, "log_odds_chosen": 1.067193865776062, "log_odds_ratio": -0.6776083707809448, "logits/chosen": 0.8768267631530762, "logits/rejected": 0.9326606392860413, "logps/chosen": -3.232325553894043, "logps/rejected": -4.27062463760376, "loss": 0.7462, "nll_loss": 0.6784501671791077, "rewards/accuracies": 0.75, "rewards/chosen": -0.32323259115219116, "rewards/margins": 0.10382988303899765, "rewards/rejected": -0.427062451839447, "step": 5539 }, { "epoch": 15.16769336071184, "grad_norm": 6.394906997680664, "learning_rate": 2.4150684931506846e-07, "log_odds_chosen": 2.0640718936920166, "log_odds_ratio": -0.32767224311828613, "logits/chosen": 0.8327846527099609, "logits/rejected": 0.8665635585784912, "logps/chosen": -1.8999204635620117, "logps/rejected": -3.774099349975586, "loss": 0.5548, "nll_loss": 0.5220613479614258, "rewards/accuracies": 0.875, "rewards/chosen": -0.1899920403957367, "rewards/margins": 0.1874178946018219, "rewards/rejected": -0.3774099349975586, "step": 5540 }, { "epoch": 15.170431211498974, "grad_norm": 7.75304651260376, "learning_rate": 2.413698630136986e-07, "log_odds_chosen": 1.6296448707580566, "log_odds_ratio": -0.4441812038421631, "logits/chosen": 0.5326479077339172, "logits/rejected": 0.5360978245735168, "logps/chosen": -2.304055690765381, "logps/rejected": -3.834730625152588, "loss": 0.6479, "nll_loss": 0.6034757494926453, "rewards/accuracies": 0.875, "rewards/chosen": -0.23040556907653809, "rewards/margins": 0.15306749939918518, "rewards/rejected": -0.38347306847572327, "step": 5541 }, { "epoch": 15.173169062286105, "grad_norm": 6.356451034545898, "learning_rate": 2.4123287671232877e-07, "log_odds_chosen": 2.271013021469116, "log_odds_ratio": -0.3509829342365265, "logits/chosen": 1.033651351928711, "logits/rejected": 1.0617377758026123, "logps/chosen": -2.7794437408447266, "logps/rejected": -4.992430210113525, "loss": 0.7733, "nll_loss": 0.7382077574729919, "rewards/accuracies": 0.875, "rewards/chosen": -0.2779443860054016, "rewards/margins": 0.22129862010478973, "rewards/rejected": -0.49924302101135254, "step": 5542 }, { "epoch": 15.175906913073238, "grad_norm": 11.203495025634766, "learning_rate": 2.410958904109589e-07, "log_odds_chosen": 1.2487404346466064, "log_odds_ratio": -0.8427348136901855, "logits/chosen": 0.8655766248703003, "logits/rejected": 0.9122718572616577, "logps/chosen": -3.1837143898010254, "logps/rejected": -4.394482135772705, "loss": 0.8225, "nll_loss": 0.7381933927536011, "rewards/accuracies": 0.5, "rewards/chosen": -0.31837141513824463, "rewards/margins": 0.1210767924785614, "rewards/rejected": -0.4394482374191284, "step": 5543 }, { "epoch": 15.178644763860369, "grad_norm": 5.3864521980285645, "learning_rate": 2.40958904109589e-07, "log_odds_chosen": 2.780200242996216, "log_odds_ratio": -0.3280961513519287, "logits/chosen": 0.9737313985824585, "logits/rejected": 1.0143178701400757, "logps/chosen": -2.2872283458709717, "logps/rejected": -4.987814903259277, "loss": 0.6992, "nll_loss": 0.6664166450500488, "rewards/accuracies": 0.75, "rewards/chosen": -0.22872282564640045, "rewards/margins": 0.27005869150161743, "rewards/rejected": -0.4987815022468567, "step": 5544 }, { "epoch": 15.181382614647502, "grad_norm": 5.58628511428833, "learning_rate": 2.4082191780821917e-07, "log_odds_chosen": 1.6396434307098389, "log_odds_ratio": -0.33369261026382446, "logits/chosen": 0.9105031490325928, "logits/rejected": 0.959991455078125, "logps/chosen": -2.6495842933654785, "logps/rejected": -4.22084903717041, "loss": 0.6132, "nll_loss": 0.5797983407974243, "rewards/accuracies": 0.75, "rewards/chosen": -0.2649584412574768, "rewards/margins": 0.15712645649909973, "rewards/rejected": -0.42208486795425415, "step": 5545 }, { "epoch": 15.184120465434633, "grad_norm": 4.847257137298584, "learning_rate": 2.4068493150684927e-07, "log_odds_chosen": 2.675088882446289, "log_odds_ratio": -0.3123127222061157, "logits/chosen": 0.7964025735855103, "logits/rejected": 0.8399559855461121, "logps/chosen": -1.9756786823272705, "logps/rejected": -4.575892448425293, "loss": 0.6096, "nll_loss": 0.5783319473266602, "rewards/accuracies": 0.875, "rewards/chosen": -0.197567880153656, "rewards/margins": 0.2600213587284088, "rewards/rejected": -0.4575892686843872, "step": 5546 }, { "epoch": 15.186858316221766, "grad_norm": 7.051913738250732, "learning_rate": 2.405479452054794e-07, "log_odds_chosen": 2.1145920753479004, "log_odds_ratio": -0.3330557346343994, "logits/chosen": 1.0414196252822876, "logits/rejected": 1.0837578773498535, "logps/chosen": -2.449835777282715, "logps/rejected": -4.475815773010254, "loss": 0.709, "nll_loss": 0.6756975054740906, "rewards/accuracies": 0.875, "rewards/chosen": -0.24498358368873596, "rewards/margins": 0.2025979906320572, "rewards/rejected": -0.44758155941963196, "step": 5547 }, { "epoch": 15.189596167008897, "grad_norm": 5.519481182098389, "learning_rate": 2.404109589041096e-07, "log_odds_chosen": 2.0952165126800537, "log_odds_ratio": -0.25441113114356995, "logits/chosen": 0.9012854695320129, "logits/rejected": 0.8443701267242432, "logps/chosen": -1.9971299171447754, "logps/rejected": -3.982924461364746, "loss": 0.7228, "nll_loss": 0.6973907947540283, "rewards/accuracies": 0.875, "rewards/chosen": -0.19971299171447754, "rewards/margins": 0.19857944548130035, "rewards/rejected": -0.3982924520969391, "step": 5548 }, { "epoch": 15.19233401779603, "grad_norm": 5.57747220993042, "learning_rate": 2.4027397260273973e-07, "log_odds_chosen": 2.2973430156707764, "log_odds_ratio": -0.2160244584083557, "logits/chosen": 0.8989810347557068, "logits/rejected": 0.9551925659179688, "logps/chosen": -2.3988678455352783, "logps/rejected": -4.617588520050049, "loss": 0.5659, "nll_loss": 0.5442554950714111, "rewards/accuracies": 1.0, "rewards/chosen": -0.2398867905139923, "rewards/margins": 0.22187210619449615, "rewards/rejected": -0.4617588520050049, "step": 5549 }, { "epoch": 15.195071868583161, "grad_norm": 5.3038716316223145, "learning_rate": 2.401369863013699e-07, "log_odds_chosen": 1.0854885578155518, "log_odds_ratio": -0.3337211608886719, "logits/chosen": 0.6674998998641968, "logits/rejected": 0.7203511595726013, "logps/chosen": -1.7928340435028076, "logps/rejected": -2.7136013507843018, "loss": 0.569, "nll_loss": 0.5356253981590271, "rewards/accuracies": 1.0, "rewards/chosen": -0.17928341031074524, "rewards/margins": 0.09207674860954285, "rewards/rejected": -0.2713601589202881, "step": 5550 }, { "epoch": 15.197809719370294, "grad_norm": 6.0915327072143555, "learning_rate": 2.4e-07, "log_odds_chosen": 2.200996160507202, "log_odds_ratio": -0.2493324726819992, "logits/chosen": 0.7973617315292358, "logits/rejected": 0.7990863919258118, "logps/chosen": -1.8526051044464111, "logps/rejected": -3.9319801330566406, "loss": 0.5892, "nll_loss": 0.5642267465591431, "rewards/accuracies": 1.0, "rewards/chosen": -0.18526050448417664, "rewards/margins": 0.20793747901916504, "rewards/rejected": -0.39319801330566406, "step": 5551 }, { "epoch": 15.200547570157426, "grad_norm": 5.750964641571045, "learning_rate": 2.3986301369863013e-07, "log_odds_chosen": 2.9795258045196533, "log_odds_ratio": -0.17013169825077057, "logits/chosen": 0.7051566243171692, "logits/rejected": 0.6720372438430786, "logps/chosen": -2.0082502365112305, "logps/rejected": -4.855032444000244, "loss": 0.6911, "nll_loss": 0.6741106510162354, "rewards/accuracies": 1.0, "rewards/chosen": -0.200825035572052, "rewards/margins": 0.284678190946579, "rewards/rejected": -0.4855031967163086, "step": 5552 }, { "epoch": 15.203285420944558, "grad_norm": 4.4387688636779785, "learning_rate": 2.3972602739726023e-07, "log_odds_chosen": 2.0474984645843506, "log_odds_ratio": -0.22150152921676636, "logits/chosen": 0.7381490468978882, "logits/rejected": 0.6994010806083679, "logps/chosen": -1.7669785022735596, "logps/rejected": -3.6504874229431152, "loss": 0.6291, "nll_loss": 0.6069017648696899, "rewards/accuracies": 1.0, "rewards/chosen": -0.17669785022735596, "rewards/margins": 0.18835091590881348, "rewards/rejected": -0.36504873633384705, "step": 5553 }, { "epoch": 15.206023271731691, "grad_norm": 4.9461750984191895, "learning_rate": 2.395890410958904e-07, "log_odds_chosen": 2.06003475189209, "log_odds_ratio": -0.24265670776367188, "logits/chosen": 0.9644757509231567, "logits/rejected": 0.9925499558448792, "logps/chosen": -1.8158389329910278, "logps/rejected": -3.6984267234802246, "loss": 0.6906, "nll_loss": 0.6663107872009277, "rewards/accuracies": 1.0, "rewards/chosen": -0.18158389627933502, "rewards/margins": 0.18825878202915192, "rewards/rejected": -0.36984264850616455, "step": 5554 }, { "epoch": 15.208761122518823, "grad_norm": 5.365074157714844, "learning_rate": 2.3945205479452053e-07, "log_odds_chosen": 3.592177391052246, "log_odds_ratio": -0.19111299514770508, "logits/chosen": 0.9836134910583496, "logits/rejected": 1.0166231393814087, "logps/chosen": -2.626455545425415, "logps/rejected": -6.157260894775391, "loss": 0.6573, "nll_loss": 0.6381582617759705, "rewards/accuracies": 0.875, "rewards/chosen": -0.26264557242393494, "rewards/margins": 0.35308051109313965, "rewards/rejected": -0.615726113319397, "step": 5555 }, { "epoch": 15.211498973305956, "grad_norm": 5.138327598571777, "learning_rate": 2.393150684931507e-07, "log_odds_chosen": 3.7441630363464355, "log_odds_ratio": -0.10879360139369965, "logits/chosen": 0.9991714954376221, "logits/rejected": 0.9827974438667297, "logps/chosen": -2.5772266387939453, "logps/rejected": -6.198753356933594, "loss": 0.693, "nll_loss": 0.6821190714836121, "rewards/accuracies": 1.0, "rewards/chosen": -0.2577226758003235, "rewards/margins": 0.36215266585350037, "rewards/rejected": -0.6198753714561462, "step": 5556 }, { "epoch": 15.214236824093087, "grad_norm": 5.75608491897583, "learning_rate": 2.3917808219178084e-07, "log_odds_chosen": 2.8105599880218506, "log_odds_ratio": -0.13101644814014435, "logits/chosen": 1.0668160915374756, "logits/rejected": 1.0698084831237793, "logps/chosen": -1.8736824989318848, "logps/rejected": -4.521792411804199, "loss": 0.5937, "nll_loss": 0.5806401968002319, "rewards/accuracies": 1.0, "rewards/chosen": -0.187368243932724, "rewards/margins": 0.2648110091686249, "rewards/rejected": -0.4521792531013489, "step": 5557 }, { "epoch": 15.21697467488022, "grad_norm": 6.1819257736206055, "learning_rate": 2.3904109589041094e-07, "log_odds_chosen": 1.368683099746704, "log_odds_ratio": -0.4292834401130676, "logits/chosen": 1.0051093101501465, "logits/rejected": 0.975334644317627, "logps/chosen": -2.3435137271881104, "logps/rejected": -3.6184000968933105, "loss": 0.7305, "nll_loss": 0.6875398755073547, "rewards/accuracies": 0.875, "rewards/chosen": -0.23435136675834656, "rewards/margins": 0.1274886429309845, "rewards/rejected": -0.36184000968933105, "step": 5558 }, { "epoch": 15.219712525667351, "grad_norm": 6.862893104553223, "learning_rate": 2.389041095890411e-07, "log_odds_chosen": 1.8606719970703125, "log_odds_ratio": -0.4076724648475647, "logits/chosen": 0.7511968612670898, "logits/rejected": 0.6983509063720703, "logps/chosen": -2.292661190032959, "logps/rejected": -4.072566032409668, "loss": 0.6345, "nll_loss": 0.593754231929779, "rewards/accuracies": 0.625, "rewards/chosen": -0.22926613688468933, "rewards/margins": 0.17799043655395508, "rewards/rejected": -0.4072565734386444, "step": 5559 }, { "epoch": 15.222450376454484, "grad_norm": 6.220983028411865, "learning_rate": 2.3876712328767124e-07, "log_odds_chosen": 2.0704848766326904, "log_odds_ratio": -0.29124608635902405, "logits/chosen": 0.7873265147209167, "logits/rejected": 0.7803311347961426, "logps/chosen": -2.0901682376861572, "logps/rejected": -4.026371955871582, "loss": 0.758, "nll_loss": 0.7288482189178467, "rewards/accuracies": 0.875, "rewards/chosen": -0.2090168297290802, "rewards/margins": 0.19362042844295502, "rewards/rejected": -0.402637243270874, "step": 5560 }, { "epoch": 15.225188227241615, "grad_norm": 9.327864646911621, "learning_rate": 2.3863013698630134e-07, "log_odds_chosen": 2.2455244064331055, "log_odds_ratio": -0.3974905014038086, "logits/chosen": 0.9864574074745178, "logits/rejected": 0.9918717741966248, "logps/chosen": -2.5680999755859375, "logps/rejected": -4.696655750274658, "loss": 0.6808, "nll_loss": 0.6410648822784424, "rewards/accuracies": 0.75, "rewards/chosen": -0.2568100094795227, "rewards/margins": 0.21285556256771088, "rewards/rejected": -0.4696655571460724, "step": 5561 }, { "epoch": 15.227926078028748, "grad_norm": 5.596108436584473, "learning_rate": 2.384931506849315e-07, "log_odds_chosen": 1.8435062170028687, "log_odds_ratio": -0.3494607210159302, "logits/chosen": 0.850860595703125, "logits/rejected": 0.8924014568328857, "logps/chosen": -1.8772239685058594, "logps/rejected": -3.6295061111450195, "loss": 0.5286, "nll_loss": 0.49370276927948, "rewards/accuracies": 0.75, "rewards/chosen": -0.18772239983081818, "rewards/margins": 0.17522820830345154, "rewards/rejected": -0.3629506230354309, "step": 5562 }, { "epoch": 15.23066392881588, "grad_norm": 7.321225166320801, "learning_rate": 2.3835616438356162e-07, "log_odds_chosen": 1.8724650144577026, "log_odds_ratio": -0.5729706287384033, "logits/chosen": 0.7444705367088318, "logits/rejected": 0.6486055850982666, "logps/chosen": -2.8431248664855957, "logps/rejected": -4.59442138671875, "loss": 0.7597, "nll_loss": 0.7023714780807495, "rewards/accuracies": 0.75, "rewards/chosen": -0.28431248664855957, "rewards/margins": 0.17512962222099304, "rewards/rejected": -0.459442138671875, "step": 5563 }, { "epoch": 15.233401779603012, "grad_norm": 5.88264274597168, "learning_rate": 2.3821917808219177e-07, "log_odds_chosen": 2.7838919162750244, "log_odds_ratio": -0.15640103816986084, "logits/chosen": 0.9141150712966919, "logits/rejected": 0.9212965965270996, "logps/chosen": -2.1456377506256104, "logps/rejected": -4.760919570922852, "loss": 0.6076, "nll_loss": 0.5919638872146606, "rewards/accuracies": 1.0, "rewards/chosen": -0.21456378698349, "rewards/margins": 0.2615281939506531, "rewards/rejected": -0.47609198093414307, "step": 5564 }, { "epoch": 15.236139630390143, "grad_norm": 5.07717752456665, "learning_rate": 2.380821917808219e-07, "log_odds_chosen": 2.623682975769043, "log_odds_ratio": -0.12860199809074402, "logits/chosen": 0.640731692314148, "logits/rejected": 0.6727526187896729, "logps/chosen": -2.155289649963379, "logps/rejected": -4.615447521209717, "loss": 0.7216, "nll_loss": 0.7087055444717407, "rewards/accuracies": 1.0, "rewards/chosen": -0.2155289351940155, "rewards/margins": 0.24601584672927856, "rewards/rejected": -0.46154478192329407, "step": 5565 }, { "epoch": 15.238877481177276, "grad_norm": 5.566354274749756, "learning_rate": 2.3794520547945205e-07, "log_odds_chosen": 1.8258001804351807, "log_odds_ratio": -0.32856523990631104, "logits/chosen": 0.9081723690032959, "logits/rejected": 0.9644001126289368, "logps/chosen": -2.4348809719085693, "logps/rejected": -4.208856105804443, "loss": 0.606, "nll_loss": 0.5731120705604553, "rewards/accuracies": 0.75, "rewards/chosen": -0.2434881031513214, "rewards/margins": 0.17739754915237427, "rewards/rejected": -0.4208856225013733, "step": 5566 }, { "epoch": 15.241615331964407, "grad_norm": 7.533950328826904, "learning_rate": 2.378082191780822e-07, "log_odds_chosen": 1.831274151802063, "log_odds_ratio": -0.33141258358955383, "logits/chosen": 1.0040414333343506, "logits/rejected": 1.0652446746826172, "logps/chosen": -2.4188408851623535, "logps/rejected": -4.137276649475098, "loss": 0.6936, "nll_loss": 0.6604562997817993, "rewards/accuracies": 0.875, "rewards/chosen": -0.24188408255577087, "rewards/margins": 0.17184360325336456, "rewards/rejected": -0.41372770071029663, "step": 5567 }, { "epoch": 15.24435318275154, "grad_norm": 5.5710225105285645, "learning_rate": 2.3767123287671233e-07, "log_odds_chosen": 3.6764016151428223, "log_odds_ratio": -0.140876904129982, "logits/chosen": 0.8219244480133057, "logits/rejected": 0.872428297996521, "logps/chosen": -3.0336217880249023, "logps/rejected": -6.618033409118652, "loss": 0.7507, "nll_loss": 0.7366049289703369, "rewards/accuracies": 1.0, "rewards/chosen": -0.3033621907234192, "rewards/margins": 0.35844120383262634, "rewards/rejected": -0.6618033647537231, "step": 5568 }, { "epoch": 15.247091033538672, "grad_norm": 6.140903472900391, "learning_rate": 2.3753424657534245e-07, "log_odds_chosen": 1.426256537437439, "log_odds_ratio": -0.36866217851638794, "logits/chosen": 1.0432543754577637, "logits/rejected": 1.109120488166809, "logps/chosen": -2.655320882797241, "logps/rejected": -3.977384090423584, "loss": 0.7042, "nll_loss": 0.6673410534858704, "rewards/accuracies": 0.875, "rewards/chosen": -0.26553210616111755, "rewards/margins": 0.13220633566379547, "rewards/rejected": -0.39773839712142944, "step": 5569 }, { "epoch": 15.249828884325805, "grad_norm": 6.282379150390625, "learning_rate": 2.3739726027397258e-07, "log_odds_chosen": 2.316171169281006, "log_odds_ratio": -0.19320809841156006, "logits/chosen": 0.9692559242248535, "logits/rejected": 1.0697684288024902, "logps/chosen": -2.715630054473877, "logps/rejected": -4.952960968017578, "loss": 0.888, "nll_loss": 0.8686593770980835, "rewards/accuracies": 1.0, "rewards/chosen": -0.27156299352645874, "rewards/margins": 0.2237331122159958, "rewards/rejected": -0.4952961206436157, "step": 5570 }, { "epoch": 15.252566735112936, "grad_norm": 4.220134735107422, "learning_rate": 2.3726027397260273e-07, "log_odds_chosen": 2.7347264289855957, "log_odds_ratio": -0.1928161084651947, "logits/chosen": 0.769186794757843, "logits/rejected": 0.844516932964325, "logps/chosen": -2.3441009521484375, "logps/rejected": -4.9964470863342285, "loss": 0.6168, "nll_loss": 0.597524881362915, "rewards/accuracies": 1.0, "rewards/chosen": -0.2344100922346115, "rewards/margins": 0.2652346193790436, "rewards/rejected": -0.4996446967124939, "step": 5571 }, { "epoch": 15.255304585900069, "grad_norm": 4.712632179260254, "learning_rate": 2.3712328767123285e-07, "log_odds_chosen": 2.2078826427459717, "log_odds_ratio": -0.24501411616802216, "logits/chosen": 0.9571301937103271, "logits/rejected": 0.9955796003341675, "logps/chosen": -2.037594795227051, "logps/rejected": -4.084644317626953, "loss": 0.6071, "nll_loss": 0.5825504660606384, "rewards/accuracies": 1.0, "rewards/chosen": -0.20375949144363403, "rewards/margins": 0.20470494031906128, "rewards/rejected": -0.4084644317626953, "step": 5572 }, { "epoch": 15.2580424366872, "grad_norm": 5.501998424530029, "learning_rate": 2.36986301369863e-07, "log_odds_chosen": 2.2083945274353027, "log_odds_ratio": -0.17940077185630798, "logits/chosen": 1.064380407333374, "logits/rejected": 1.1138335466384888, "logps/chosen": -2.3903942108154297, "logps/rejected": -4.494677543640137, "loss": 0.6964, "nll_loss": 0.6784852743148804, "rewards/accuracies": 1.0, "rewards/chosen": -0.23903942108154297, "rewards/margins": 0.21042831242084503, "rewards/rejected": -0.4494677186012268, "step": 5573 }, { "epoch": 15.260780287474333, "grad_norm": 5.296698570251465, "learning_rate": 2.3684931506849316e-07, "log_odds_chosen": 2.8488080501556396, "log_odds_ratio": -0.1988135576248169, "logits/chosen": 0.8313575387001038, "logits/rejected": 0.8363719582557678, "logps/chosen": -2.2989742755889893, "logps/rejected": -5.027595520019531, "loss": 0.6172, "nll_loss": 0.5973535180091858, "rewards/accuracies": 1.0, "rewards/chosen": -0.22989743947982788, "rewards/margins": 0.27286213636398315, "rewards/rejected": -0.502759575843811, "step": 5574 }, { "epoch": 15.263518138261464, "grad_norm": 8.784172058105469, "learning_rate": 2.3671232876712326e-07, "log_odds_chosen": 0.8881545662879944, "log_odds_ratio": -0.4036915898323059, "logits/chosen": 0.5756093263626099, "logits/rejected": 0.5016007423400879, "logps/chosen": -2.064173460006714, "logps/rejected": -2.826594591140747, "loss": 0.7226, "nll_loss": 0.6822535991668701, "rewards/accuracies": 0.875, "rewards/chosen": -0.20641733705997467, "rewards/margins": 0.0762421190738678, "rewards/rejected": -0.28265947103500366, "step": 5575 }, { "epoch": 15.266255989048597, "grad_norm": 7.072922229766846, "learning_rate": 2.365753424657534e-07, "log_odds_chosen": 1.0199449062347412, "log_odds_ratio": -0.44868817925453186, "logits/chosen": 0.8604536056518555, "logits/rejected": 0.738528847694397, "logps/chosen": -1.5250110626220703, "logps/rejected": -2.439554214477539, "loss": 0.6646, "nll_loss": 0.6197592616081238, "rewards/accuracies": 0.625, "rewards/chosen": -0.15250109136104584, "rewards/margins": 0.09145432710647583, "rewards/rejected": -0.24395543336868286, "step": 5576 }, { "epoch": 15.268993839835728, "grad_norm": 5.416445255279541, "learning_rate": 2.3643835616438354e-07, "log_odds_chosen": 1.1483545303344727, "log_odds_ratio": -0.27748605608940125, "logits/chosen": 0.6615475416183472, "logits/rejected": 0.706312894821167, "logps/chosen": -1.7872788906097412, "logps/rejected": -2.7806663513183594, "loss": 0.6014, "nll_loss": 0.5736725330352783, "rewards/accuracies": 1.0, "rewards/chosen": -0.1787278950214386, "rewards/margins": 0.09933874011039734, "rewards/rejected": -0.27806663513183594, "step": 5577 }, { "epoch": 15.271731690622861, "grad_norm": 5.9921135902404785, "learning_rate": 2.363013698630137e-07, "log_odds_chosen": 1.1533524990081787, "log_odds_ratio": -0.41360020637512207, "logits/chosen": 0.7255049347877502, "logits/rejected": 0.7523932456970215, "logps/chosen": -2.150719404220581, "logps/rejected": -3.1895852088928223, "loss": 0.5964, "nll_loss": 0.5550362467765808, "rewards/accuracies": 0.875, "rewards/chosen": -0.2150719165802002, "rewards/margins": 0.10388658195734024, "rewards/rejected": -0.31895849108695984, "step": 5578 }, { "epoch": 15.274469541409992, "grad_norm": 5.538167476654053, "learning_rate": 2.3616438356164384e-07, "log_odds_chosen": 2.182736873626709, "log_odds_ratio": -0.15699920058250427, "logits/chosen": 1.0921926498413086, "logits/rejected": 1.1403309106826782, "logps/chosen": -1.971534252166748, "logps/rejected": -4.001775741577148, "loss": 0.6039, "nll_loss": 0.5882309079170227, "rewards/accuracies": 1.0, "rewards/chosen": -0.19715344905853271, "rewards/margins": 0.20302410423755646, "rewards/rejected": -0.400177538394928, "step": 5579 }, { "epoch": 15.277207392197125, "grad_norm": 5.911759853363037, "learning_rate": 2.3602739726027397e-07, "log_odds_chosen": 2.8360111713409424, "log_odds_ratio": -0.16183805465698242, "logits/chosen": 0.9777536392211914, "logits/rejected": 1.0823136568069458, "logps/chosen": -2.310326099395752, "logps/rejected": -4.999629020690918, "loss": 0.7232, "nll_loss": 0.7070551514625549, "rewards/accuracies": 0.875, "rewards/chosen": -0.2310326099395752, "rewards/margins": 0.2689303159713745, "rewards/rejected": -0.4999629259109497, "step": 5580 }, { "epoch": 15.279945242984258, "grad_norm": 4.965200901031494, "learning_rate": 2.3589041095890412e-07, "log_odds_chosen": 4.354804515838623, "log_odds_ratio": -0.18182185292243958, "logits/chosen": 0.7690014839172363, "logits/rejected": 0.7959486246109009, "logps/chosen": -1.7439520359039307, "logps/rejected": -5.936354160308838, "loss": 0.6837, "nll_loss": 0.6655316948890686, "rewards/accuracies": 1.0, "rewards/chosen": -0.17439520359039307, "rewards/margins": 0.419240266084671, "rewards/rejected": -0.5936354994773865, "step": 5581 }, { "epoch": 15.28268309377139, "grad_norm": 5.9778547286987305, "learning_rate": 2.3575342465753422e-07, "log_odds_chosen": 1.2388134002685547, "log_odds_ratio": -0.37930765748023987, "logits/chosen": 0.826196014881134, "logits/rejected": 0.7876905798912048, "logps/chosen": -1.6159101724624634, "logps/rejected": -2.759549617767334, "loss": 0.5793, "nll_loss": 0.5413900017738342, "rewards/accuracies": 0.875, "rewards/chosen": -0.16159102320671082, "rewards/margins": 0.11436392366886139, "rewards/rejected": -0.2759549617767334, "step": 5582 }, { "epoch": 15.285420944558522, "grad_norm": 5.39400053024292, "learning_rate": 2.3561643835616437e-07, "log_odds_chosen": 2.531247138977051, "log_odds_ratio": -0.23289529979228973, "logits/chosen": 0.8211783170700073, "logits/rejected": 0.9018949866294861, "logps/chosen": -2.167759418487549, "logps/rejected": -4.590584754943848, "loss": 0.6178, "nll_loss": 0.5945079326629639, "rewards/accuracies": 1.0, "rewards/chosen": -0.21677595376968384, "rewards/margins": 0.24228249490261078, "rewards/rejected": -0.4590584635734558, "step": 5583 }, { "epoch": 15.288158795345653, "grad_norm": 5.388333797454834, "learning_rate": 2.354794520547945e-07, "log_odds_chosen": 2.626211643218994, "log_odds_ratio": -0.3040441572666168, "logits/chosen": 0.9145406484603882, "logits/rejected": 0.9217479228973389, "logps/chosen": -1.9049545526504517, "logps/rejected": -4.404211044311523, "loss": 0.6954, "nll_loss": 0.6650158166885376, "rewards/accuracies": 0.875, "rewards/chosen": -0.19049547612667084, "rewards/margins": 0.2499256134033203, "rewards/rejected": -0.44042110443115234, "step": 5584 }, { "epoch": 15.290896646132786, "grad_norm": 4.820343017578125, "learning_rate": 2.3534246575342465e-07, "log_odds_chosen": 2.1254591941833496, "log_odds_ratio": -0.22750034928321838, "logits/chosen": 1.1177139282226562, "logits/rejected": 1.1610201597213745, "logps/chosen": -2.6291112899780273, "logps/rejected": -4.635353088378906, "loss": 0.6841, "nll_loss": 0.661305844783783, "rewards/accuracies": 1.0, "rewards/chosen": -0.2629111409187317, "rewards/margins": 0.20062419772148132, "rewards/rejected": -0.463535338640213, "step": 5585 }, { "epoch": 15.293634496919918, "grad_norm": 9.833544731140137, "learning_rate": 2.352054794520548e-07, "log_odds_chosen": 1.4549256563186646, "log_odds_ratio": -0.5522394180297852, "logits/chosen": 0.9538716673851013, "logits/rejected": 0.9262630343437195, "logps/chosen": -3.304758071899414, "logps/rejected": -4.669832229614258, "loss": 0.6646, "nll_loss": 0.6093766093254089, "rewards/accuracies": 0.625, "rewards/chosen": -0.3304758071899414, "rewards/margins": 0.13650739192962646, "rewards/rejected": -0.46698319911956787, "step": 5586 }, { "epoch": 15.29637234770705, "grad_norm": 5.758293628692627, "learning_rate": 2.3506849315068492e-07, "log_odds_chosen": 1.790764570236206, "log_odds_ratio": -0.41464221477508545, "logits/chosen": 0.8207700252532959, "logits/rejected": 0.8655418753623962, "logps/chosen": -2.242673397064209, "logps/rejected": -3.9745936393737793, "loss": 0.6017, "nll_loss": 0.5602225661277771, "rewards/accuracies": 0.75, "rewards/chosen": -0.2242673635482788, "rewards/margins": 0.17319199442863464, "rewards/rejected": -0.39745932817459106, "step": 5587 }, { "epoch": 15.299110198494182, "grad_norm": 6.279335021972656, "learning_rate": 2.3493150684931508e-07, "log_odds_chosen": 1.5167423486709595, "log_odds_ratio": -0.3844325840473175, "logits/chosen": 0.8407695889472961, "logits/rejected": 0.7870709896087646, "logps/chosen": -2.5415163040161133, "logps/rejected": -4.001971244812012, "loss": 0.6688, "nll_loss": 0.6303632259368896, "rewards/accuracies": 0.75, "rewards/chosen": -0.25415167212486267, "rewards/margins": 0.14604546129703522, "rewards/rejected": -0.4001971483230591, "step": 5588 }, { "epoch": 15.301848049281315, "grad_norm": 10.14643383026123, "learning_rate": 2.3479452054794518e-07, "log_odds_chosen": 1.6856610774993896, "log_odds_ratio": -0.6167090535163879, "logits/chosen": 0.6434974074363708, "logits/rejected": 0.697962760925293, "logps/chosen": -3.5313563346862793, "logps/rejected": -5.087158203125, "loss": 0.8441, "nll_loss": 0.7823964357376099, "rewards/accuracies": 0.75, "rewards/chosen": -0.3531356155872345, "rewards/margins": 0.15558022260665894, "rewards/rejected": -0.5087158679962158, "step": 5589 }, { "epoch": 15.304585900068446, "grad_norm": 5.759764194488525, "learning_rate": 2.3465753424657533e-07, "log_odds_chosen": 1.8327066898345947, "log_odds_ratio": -0.2825711667537689, "logits/chosen": 0.8867987394332886, "logits/rejected": 0.9166700839996338, "logps/chosen": -2.464519500732422, "logps/rejected": -4.218837738037109, "loss": 0.6602, "nll_loss": 0.631981372833252, "rewards/accuracies": 0.875, "rewards/chosen": -0.2464519739151001, "rewards/margins": 0.17543181777000427, "rewards/rejected": -0.421883761882782, "step": 5590 }, { "epoch": 15.307323750855579, "grad_norm": 6.740490436553955, "learning_rate": 2.3452054794520548e-07, "log_odds_chosen": 1.896780252456665, "log_odds_ratio": -0.4057878851890564, "logits/chosen": 0.6169593930244446, "logits/rejected": 0.6753469705581665, "logps/chosen": -2.288137197494507, "logps/rejected": -4.098935127258301, "loss": 0.7108, "nll_loss": 0.6701905131340027, "rewards/accuracies": 0.75, "rewards/chosen": -0.22881372272968292, "rewards/margins": 0.18107977509498596, "rewards/rejected": -0.4098935127258301, "step": 5591 }, { "epoch": 15.31006160164271, "grad_norm": 5.616278648376465, "learning_rate": 2.343835616438356e-07, "log_odds_chosen": 1.5745329856872559, "log_odds_ratio": -0.2898064851760864, "logits/chosen": 0.8632172346115112, "logits/rejected": 0.7904217839241028, "logps/chosen": -1.7707077264785767, "logps/rejected": -3.110175132751465, "loss": 0.6249, "nll_loss": 0.5959275960922241, "rewards/accuracies": 1.0, "rewards/chosen": -0.17707078158855438, "rewards/margins": 0.1339467167854309, "rewards/rejected": -0.3110175132751465, "step": 5592 }, { "epoch": 15.312799452429843, "grad_norm": 6.742702007293701, "learning_rate": 2.3424657534246576e-07, "log_odds_chosen": 1.8440251350402832, "log_odds_ratio": -0.31373950839042664, "logits/chosen": 0.6791127324104309, "logits/rejected": 0.7188317179679871, "logps/chosen": -2.5546371936798096, "logps/rejected": -4.252812385559082, "loss": 0.6589, "nll_loss": 0.6275376081466675, "rewards/accuracies": 0.875, "rewards/chosen": -0.25546371936798096, "rewards/margins": 0.16981755197048187, "rewards/rejected": -0.425281286239624, "step": 5593 }, { "epoch": 15.315537303216974, "grad_norm": 4.956798076629639, "learning_rate": 2.3410958904109588e-07, "log_odds_chosen": 3.8894288539886475, "log_odds_ratio": -0.12939344346523285, "logits/chosen": 0.9310449361801147, "logits/rejected": 0.9642887115478516, "logps/chosen": -2.220072031021118, "logps/rejected": -5.985658645629883, "loss": 0.6456, "nll_loss": 0.6326640844345093, "rewards/accuracies": 1.0, "rewards/chosen": -0.22200720012187958, "rewards/margins": 0.3765586316585541, "rewards/rejected": -0.5985658764839172, "step": 5594 }, { "epoch": 15.318275154004107, "grad_norm": 4.697774410247803, "learning_rate": 2.3397260273972603e-07, "log_odds_chosen": 1.9651532173156738, "log_odds_ratio": -0.3792852759361267, "logits/chosen": 0.6149764060974121, "logits/rejected": 0.6646881103515625, "logps/chosen": -2.308687925338745, "logps/rejected": -4.176088809967041, "loss": 0.7148, "nll_loss": 0.6768690347671509, "rewards/accuracies": 0.875, "rewards/chosen": -0.23086878657341003, "rewards/margins": 0.18674010038375854, "rewards/rejected": -0.41760891675949097, "step": 5595 }, { "epoch": 15.321013004791238, "grad_norm": 5.40238094329834, "learning_rate": 2.3383561643835613e-07, "log_odds_chosen": 2.331083297729492, "log_odds_ratio": -0.2539415657520294, "logits/chosen": 1.0284966230392456, "logits/rejected": 1.0242669582366943, "logps/chosen": -2.346079111099243, "logps/rejected": -4.57488489151001, "loss": 0.6913, "nll_loss": 0.6658979654312134, "rewards/accuracies": 0.875, "rewards/chosen": -0.23460790514945984, "rewards/margins": 0.22288060188293457, "rewards/rejected": -0.4574885070323944, "step": 5596 }, { "epoch": 15.323750855578371, "grad_norm": 5.591176986694336, "learning_rate": 2.3369863013698629e-07, "log_odds_chosen": 1.4631738662719727, "log_odds_ratio": -0.2974252700805664, "logits/chosen": 0.8165267705917358, "logits/rejected": 0.847369909286499, "logps/chosen": -1.3353222608566284, "logps/rejected": -2.6138176918029785, "loss": 0.4861, "nll_loss": 0.4563230276107788, "rewards/accuracies": 1.0, "rewards/chosen": -0.13353222608566284, "rewards/margins": 0.1278495490550995, "rewards/rejected": -0.26138177514076233, "step": 5597 }, { "epoch": 15.326488706365502, "grad_norm": 7.396956443786621, "learning_rate": 2.3356164383561644e-07, "log_odds_chosen": 2.2546305656433105, "log_odds_ratio": -0.31490617990493774, "logits/chosen": 0.715928316116333, "logits/rejected": 0.6372316479682922, "logps/chosen": -2.18391752243042, "logps/rejected": -4.307514190673828, "loss": 0.6518, "nll_loss": 0.6202729940414429, "rewards/accuracies": 1.0, "rewards/chosen": -0.2183917760848999, "rewards/margins": 0.21235968172550201, "rewards/rejected": -0.4307514429092407, "step": 5598 }, { "epoch": 15.329226557152635, "grad_norm": 5.118327617645264, "learning_rate": 2.3342465753424656e-07, "log_odds_chosen": 1.8296836614608765, "log_odds_ratio": -0.23090723156929016, "logits/chosen": 0.9458156228065491, "logits/rejected": 1.0046221017837524, "logps/chosen": -1.7144544124603271, "logps/rejected": -3.378232479095459, "loss": 0.6355, "nll_loss": 0.6124014854431152, "rewards/accuracies": 1.0, "rewards/chosen": -0.17144544422626495, "rewards/margins": 0.16637779772281647, "rewards/rejected": -0.3378232419490814, "step": 5599 }, { "epoch": 15.331964407939767, "grad_norm": 5.173280239105225, "learning_rate": 2.3328767123287672e-07, "log_odds_chosen": 3.6535980701446533, "log_odds_ratio": -0.10257873684167862, "logits/chosen": 0.812726616859436, "logits/rejected": 0.925013542175293, "logps/chosen": -2.2349324226379395, "logps/rejected": -5.760430335998535, "loss": 0.7243, "nll_loss": 0.714066743850708, "rewards/accuracies": 1.0, "rewards/chosen": -0.22349324822425842, "rewards/margins": 0.35254982113838196, "rewards/rejected": -0.5760430693626404, "step": 5600 }, { "epoch": 15.3347022587269, "grad_norm": 4.821091175079346, "learning_rate": 2.3315068493150684e-07, "log_odds_chosen": 2.381509780883789, "log_odds_ratio": -0.1466692090034485, "logits/chosen": 1.2016892433166504, "logits/rejected": 1.1805477142333984, "logps/chosen": -2.1141624450683594, "logps/rejected": -4.339546203613281, "loss": 0.566, "nll_loss": 0.5513514280319214, "rewards/accuracies": 1.0, "rewards/chosen": -0.21141624450683594, "rewards/margins": 0.22253838181495667, "rewards/rejected": -0.433954656124115, "step": 5601 }, { "epoch": 15.33744010951403, "grad_norm": 5.0762224197387695, "learning_rate": 2.3301369863013697e-07, "log_odds_chosen": 3.7078990936279297, "log_odds_ratio": -0.1008620634675026, "logits/chosen": 0.6566393971443176, "logits/rejected": 0.6915132999420166, "logps/chosen": -2.115577220916748, "logps/rejected": -5.661806106567383, "loss": 0.7648, "nll_loss": 0.7547508478164673, "rewards/accuracies": 1.0, "rewards/chosen": -0.21155770123004913, "rewards/margins": 0.3546229302883148, "rewards/rejected": -0.5661805868148804, "step": 5602 }, { "epoch": 15.340177960301164, "grad_norm": 7.918284893035889, "learning_rate": 2.328767123287671e-07, "log_odds_chosen": 2.94499135017395, "log_odds_ratio": -0.4032759368419647, "logits/chosen": 1.0263477563858032, "logits/rejected": 1.0215556621551514, "logps/chosen": -2.7639355659484863, "logps/rejected": -5.644282341003418, "loss": 0.7476, "nll_loss": 0.7072645425796509, "rewards/accuracies": 0.875, "rewards/chosen": -0.2763935625553131, "rewards/margins": 0.28803467750549316, "rewards/rejected": -0.5644282102584839, "step": 5603 }, { "epoch": 15.342915811088295, "grad_norm": 6.88768196105957, "learning_rate": 2.3273972602739724e-07, "log_odds_chosen": 1.9255542755126953, "log_odds_ratio": -0.3525753319263458, "logits/chosen": 0.748346209526062, "logits/rejected": 0.6694556474685669, "logps/chosen": -2.550659656524658, "logps/rejected": -4.38798189163208, "loss": 0.7981, "nll_loss": 0.7627924084663391, "rewards/accuracies": 0.875, "rewards/chosen": -0.2550659477710724, "rewards/margins": 0.18373222649097443, "rewards/rejected": -0.438798189163208, "step": 5604 }, { "epoch": 15.345653661875428, "grad_norm": 6.005445957183838, "learning_rate": 2.326027397260274e-07, "log_odds_chosen": 2.2923622131347656, "log_odds_ratio": -0.22299280762672424, "logits/chosen": 0.7584824562072754, "logits/rejected": 0.7787918448448181, "logps/chosen": -1.916275978088379, "logps/rejected": -4.057326316833496, "loss": 0.5975, "nll_loss": 0.5752374529838562, "rewards/accuracies": 1.0, "rewards/chosen": -0.1916275918483734, "rewards/margins": 0.2141050398349762, "rewards/rejected": -0.4057326316833496, "step": 5605 }, { "epoch": 15.34839151266256, "grad_norm": 6.810873031616211, "learning_rate": 2.3246575342465752e-07, "log_odds_chosen": 1.6992658376693726, "log_odds_ratio": -0.2391025722026825, "logits/chosen": 0.8729108572006226, "logits/rejected": 0.8031914234161377, "logps/chosen": -2.8405332565307617, "logps/rejected": -4.46327018737793, "loss": 0.7459, "nll_loss": 0.7219797968864441, "rewards/accuracies": 1.0, "rewards/chosen": -0.28405332565307617, "rewards/margins": 0.16227370500564575, "rewards/rejected": -0.4463270306587219, "step": 5606 }, { "epoch": 15.351129363449692, "grad_norm": 5.796442985534668, "learning_rate": 2.3232876712328767e-07, "log_odds_chosen": 2.6381404399871826, "log_odds_ratio": -0.30106833577156067, "logits/chosen": 0.7775086760520935, "logits/rejected": 0.8328789472579956, "logps/chosen": -2.019667625427246, "logps/rejected": -4.476049423217773, "loss": 0.5786, "nll_loss": 0.548526406288147, "rewards/accuracies": 0.875, "rewards/chosen": -0.201966792345047, "rewards/margins": 0.2456381916999817, "rewards/rejected": -0.4476049542427063, "step": 5607 }, { "epoch": 15.353867214236825, "grad_norm": 5.454138278961182, "learning_rate": 2.321917808219178e-07, "log_odds_chosen": 3.6346468925476074, "log_odds_ratio": -0.20701685547828674, "logits/chosen": 0.942136824131012, "logits/rejected": 0.9802477359771729, "logps/chosen": -2.7076830863952637, "logps/rejected": -6.267017364501953, "loss": 0.6947, "nll_loss": 0.6739922761917114, "rewards/accuracies": 0.875, "rewards/chosen": -0.27076831459999084, "rewards/margins": 0.3559334874153137, "rewards/rejected": -0.6267017722129822, "step": 5608 }, { "epoch": 15.356605065023956, "grad_norm": 5.532583236694336, "learning_rate": 2.3205479452054793e-07, "log_odds_chosen": 1.5642844438552856, "log_odds_ratio": -0.3496723473072052, "logits/chosen": 0.8693619966506958, "logits/rejected": 0.9526000022888184, "logps/chosen": -2.619621753692627, "logps/rejected": -4.111528396606445, "loss": 0.6982, "nll_loss": 0.6632346510887146, "rewards/accuracies": 0.75, "rewards/chosen": -0.2619621753692627, "rewards/margins": 0.14919066429138184, "rewards/rejected": -0.41115283966064453, "step": 5609 }, { "epoch": 15.359342915811089, "grad_norm": 5.061581611633301, "learning_rate": 2.3191780821917808e-07, "log_odds_chosen": 0.8115906119346619, "log_odds_ratio": -0.42144376039505005, "logits/chosen": 0.7374014854431152, "logits/rejected": 0.7659443616867065, "logps/chosen": -2.12886905670166, "logps/rejected": -2.813159465789795, "loss": 0.6241, "nll_loss": 0.5819779634475708, "rewards/accuracies": 0.75, "rewards/chosen": -0.21288689970970154, "rewards/margins": 0.06842903792858124, "rewards/rejected": -0.28131595253944397, "step": 5610 }, { "epoch": 15.36208076659822, "grad_norm": 6.259095191955566, "learning_rate": 2.317808219178082e-07, "log_odds_chosen": 1.8803585767745972, "log_odds_ratio": -0.3132410943508148, "logits/chosen": 0.6096653938293457, "logits/rejected": 0.6360311508178711, "logps/chosen": -2.555893898010254, "logps/rejected": -4.341139793395996, "loss": 0.5779, "nll_loss": 0.5465294122695923, "rewards/accuracies": 1.0, "rewards/chosen": -0.25558939576148987, "rewards/margins": 0.17852458357810974, "rewards/rejected": -0.4341139793395996, "step": 5611 }, { "epoch": 15.364818617385353, "grad_norm": 4.590893268585205, "learning_rate": 2.3164383561643836e-07, "log_odds_chosen": 3.0442123413085938, "log_odds_ratio": -0.07897113263607025, "logits/chosen": 0.890113115310669, "logits/rejected": 0.8612518310546875, "logps/chosen": -1.7783900499343872, "logps/rejected": -4.597346305847168, "loss": 0.5077, "nll_loss": 0.49975451827049255, "rewards/accuracies": 1.0, "rewards/chosen": -0.1778390258550644, "rewards/margins": 0.28189560770988464, "rewards/rejected": -0.45973461866378784, "step": 5612 }, { "epoch": 15.367556468172484, "grad_norm": 8.130634307861328, "learning_rate": 2.3150684931506848e-07, "log_odds_chosen": 2.028895854949951, "log_odds_ratio": -0.4785727858543396, "logits/chosen": 0.7985711693763733, "logits/rejected": 0.7525734901428223, "logps/chosen": -2.2842493057250977, "logps/rejected": -4.2484283447265625, "loss": 0.6414, "nll_loss": 0.5935547351837158, "rewards/accuracies": 0.75, "rewards/chosen": -0.22842496633529663, "rewards/margins": 0.1964179426431656, "rewards/rejected": -0.424842894077301, "step": 5613 }, { "epoch": 15.370294318959617, "grad_norm": 5.328863143920898, "learning_rate": 2.3136986301369863e-07, "log_odds_chosen": 2.3057663440704346, "log_odds_ratio": -0.30737361311912537, "logits/chosen": 0.93086838722229, "logits/rejected": 0.9762232899665833, "logps/chosen": -2.3677544593811035, "logps/rejected": -4.593417167663574, "loss": 0.6662, "nll_loss": 0.6354831457138062, "rewards/accuracies": 0.75, "rewards/chosen": -0.23677542805671692, "rewards/margins": 0.22256624698638916, "rewards/rejected": -0.45934170484542847, "step": 5614 }, { "epoch": 15.373032169746748, "grad_norm": 5.235040664672852, "learning_rate": 2.3123287671232873e-07, "log_odds_chosen": 2.222299575805664, "log_odds_ratio": -0.37165313959121704, "logits/chosen": 0.9416744709014893, "logits/rejected": 0.967488169670105, "logps/chosen": -2.3495702743530273, "logps/rejected": -4.473221778869629, "loss": 0.7506, "nll_loss": 0.7134346961975098, "rewards/accuracies": 0.75, "rewards/chosen": -0.2349570244550705, "rewards/margins": 0.21236518025398254, "rewards/rejected": -0.44732218980789185, "step": 5615 }, { "epoch": 15.375770020533881, "grad_norm": 5.524904251098633, "learning_rate": 2.3109589041095888e-07, "log_odds_chosen": 1.487388253211975, "log_odds_ratio": -0.3451026380062103, "logits/chosen": 1.1042101383209229, "logits/rejected": 1.1580936908721924, "logps/chosen": -1.6572315692901611, "logps/rejected": -2.996346950531006, "loss": 0.5859, "nll_loss": 0.5513784289360046, "rewards/accuracies": 0.875, "rewards/chosen": -0.16572314500808716, "rewards/margins": 0.13391155004501343, "rewards/rejected": -0.2996346950531006, "step": 5616 }, { "epoch": 15.378507871321013, "grad_norm": 4.669279098510742, "learning_rate": 2.3095890410958904e-07, "log_odds_chosen": 2.632359504699707, "log_odds_ratio": -0.11734693497419357, "logits/chosen": 0.9268075227737427, "logits/rejected": 1.0097757577896118, "logps/chosen": -1.9200069904327393, "logps/rejected": -4.299665451049805, "loss": 0.6186, "nll_loss": 0.6068516969680786, "rewards/accuracies": 1.0, "rewards/chosen": -0.19200068712234497, "rewards/margins": 0.23796585202217102, "rewards/rejected": -0.4299665689468384, "step": 5617 }, { "epoch": 15.381245722108146, "grad_norm": 4.926095485687256, "learning_rate": 2.3082191780821916e-07, "log_odds_chosen": 2.024888515472412, "log_odds_ratio": -0.19687366485595703, "logits/chosen": 0.7865962982177734, "logits/rejected": 0.7439467310905457, "logps/chosen": -1.5125828981399536, "logps/rejected": -3.3362157344818115, "loss": 0.5398, "nll_loss": 0.5201037526130676, "rewards/accuracies": 1.0, "rewards/chosen": -0.15125827491283417, "rewards/margins": 0.18236330151557922, "rewards/rejected": -0.3336215615272522, "step": 5618 }, { "epoch": 15.383983572895277, "grad_norm": 5.907826900482178, "learning_rate": 2.3068493150684931e-07, "log_odds_chosen": 2.4097585678100586, "log_odds_ratio": -0.25368303060531616, "logits/chosen": 0.7529094219207764, "logits/rejected": 0.8492011427879333, "logps/chosen": -2.3729233741760254, "logps/rejected": -4.69489049911499, "loss": 0.7241, "nll_loss": 0.698752760887146, "rewards/accuracies": 0.875, "rewards/chosen": -0.2372923493385315, "rewards/margins": 0.23219667375087738, "rewards/rejected": -0.46948903799057007, "step": 5619 }, { "epoch": 15.38672142368241, "grad_norm": 5.38571310043335, "learning_rate": 2.3054794520547944e-07, "log_odds_chosen": 2.622450351715088, "log_odds_ratio": -0.16620133817195892, "logits/chosen": 1.0898027420043945, "logits/rejected": 1.1203968524932861, "logps/chosen": -2.005587100982666, "logps/rejected": -4.485044479370117, "loss": 0.6465, "nll_loss": 0.6298520565032959, "rewards/accuracies": 1.0, "rewards/chosen": -0.20055870711803436, "rewards/margins": 0.24794581532478333, "rewards/rejected": -0.4485045075416565, "step": 5620 }, { "epoch": 15.38945927446954, "grad_norm": 5.293938636779785, "learning_rate": 2.304109589041096e-07, "log_odds_chosen": 0.9924569725990295, "log_odds_ratio": -0.5712844133377075, "logits/chosen": 0.7793163061141968, "logits/rejected": 0.6899499893188477, "logps/chosen": -1.9671955108642578, "logps/rejected": -2.797485113143921, "loss": 0.7069, "nll_loss": 0.6497706174850464, "rewards/accuracies": 0.75, "rewards/chosen": -0.19671954214572906, "rewards/margins": 0.08302897214889526, "rewards/rejected": -0.2797485291957855, "step": 5621 }, { "epoch": 15.392197125256674, "grad_norm": 4.965274333953857, "learning_rate": 2.302739726027397e-07, "log_odds_chosen": 2.440591812133789, "log_odds_ratio": -0.14266802370548248, "logits/chosen": 0.7928839921951294, "logits/rejected": 0.8232701420783997, "logps/chosen": -2.7202134132385254, "logps/rejected": -5.053521156311035, "loss": 0.7472, "nll_loss": 0.7329033613204956, "rewards/accuracies": 1.0, "rewards/chosen": -0.2720213532447815, "rewards/margins": 0.23333075642585754, "rewards/rejected": -0.5053521394729614, "step": 5622 }, { "epoch": 15.394934976043805, "grad_norm": 4.815074920654297, "learning_rate": 2.3013698630136984e-07, "log_odds_chosen": 3.3308916091918945, "log_odds_ratio": -0.21761031448841095, "logits/chosen": 0.9533918499946594, "logits/rejected": 1.0050169229507446, "logps/chosen": -2.318972110748291, "logps/rejected": -5.535121917724609, "loss": 0.6159, "nll_loss": 0.5941851139068604, "rewards/accuracies": 1.0, "rewards/chosen": -0.23189722001552582, "rewards/margins": 0.32161498069763184, "rewards/rejected": -0.5535122156143188, "step": 5623 }, { "epoch": 15.397672826830938, "grad_norm": 5.520585536956787, "learning_rate": 2.3e-07, "log_odds_chosen": 2.8964121341705322, "log_odds_ratio": -0.16965314745903015, "logits/chosen": 0.8888572454452515, "logits/rejected": 0.8939963579177856, "logps/chosen": -2.01794171333313, "logps/rejected": -4.772884368896484, "loss": 0.8493, "nll_loss": 0.8323013782501221, "rewards/accuracies": 1.0, "rewards/chosen": -0.20179416239261627, "rewards/margins": 0.2754943072795868, "rewards/rejected": -0.47728845477104187, "step": 5624 }, { "epoch": 15.40041067761807, "grad_norm": 5.627275466918945, "learning_rate": 2.2986301369863012e-07, "log_odds_chosen": 2.179274797439575, "log_odds_ratio": -0.23115096986293793, "logits/chosen": 0.8896437883377075, "logits/rejected": 0.9702720642089844, "logps/chosen": -1.969116449356079, "logps/rejected": -3.987240791320801, "loss": 0.5467, "nll_loss": 0.5235450267791748, "rewards/accuracies": 1.0, "rewards/chosen": -0.19691163301467896, "rewards/margins": 0.20181246101856232, "rewards/rejected": -0.3987240791320801, "step": 5625 }, { "epoch": 15.403148528405202, "grad_norm": 5.042508125305176, "learning_rate": 2.2972602739726027e-07, "log_odds_chosen": 2.4204888343811035, "log_odds_ratio": -0.20401975512504578, "logits/chosen": 0.7932605743408203, "logits/rejected": 0.8435845375061035, "logps/chosen": -1.9724082946777344, "logps/rejected": -4.287655830383301, "loss": 0.6113, "nll_loss": 0.5908790230751038, "rewards/accuracies": 1.0, "rewards/chosen": -0.19724082946777344, "rewards/margins": 0.23152479529380798, "rewards/rejected": -0.4287656545639038, "step": 5626 }, { "epoch": 15.405886379192333, "grad_norm": 4.765353202819824, "learning_rate": 2.295890410958904e-07, "log_odds_chosen": 4.5065155029296875, "log_odds_ratio": -0.11747988313436508, "logits/chosen": 0.9796388149261475, "logits/rejected": 0.9523657560348511, "logps/chosen": -1.8446100950241089, "logps/rejected": -6.193517684936523, "loss": 0.6524, "nll_loss": 0.640606164932251, "rewards/accuracies": 1.0, "rewards/chosen": -0.18446101248264313, "rewards/margins": 0.4348907768726349, "rewards/rejected": -0.6193518042564392, "step": 5627 }, { "epoch": 15.408624229979466, "grad_norm": 5.666335105895996, "learning_rate": 2.2945205479452055e-07, "log_odds_chosen": 1.8222670555114746, "log_odds_ratio": -0.18173208832740784, "logits/chosen": 0.7176018953323364, "logits/rejected": 0.7249807119369507, "logps/chosen": -2.0245511531829834, "logps/rejected": -3.7105283737182617, "loss": 0.5921, "nll_loss": 0.5738795399665833, "rewards/accuracies": 1.0, "rewards/chosen": -0.20245511829853058, "rewards/margins": 0.1685977280139923, "rewards/rejected": -0.3710528612136841, "step": 5628 }, { "epoch": 15.411362080766597, "grad_norm": 6.280655860900879, "learning_rate": 2.2931506849315068e-07, "log_odds_chosen": 2.114347219467163, "log_odds_ratio": -0.31089192628860474, "logits/chosen": 0.9359334707260132, "logits/rejected": 0.9899265766143799, "logps/chosen": -2.601318836212158, "logps/rejected": -4.63607120513916, "loss": 0.6639, "nll_loss": 0.6327666640281677, "rewards/accuracies": 0.75, "rewards/chosen": -0.2601318955421448, "rewards/margins": 0.2034751921892166, "rewards/rejected": -0.46360713243484497, "step": 5629 }, { "epoch": 15.41409993155373, "grad_norm": 5.238865375518799, "learning_rate": 2.291780821917808e-07, "log_odds_chosen": 2.851315498352051, "log_odds_ratio": -0.10849855840206146, "logits/chosen": 0.9123631715774536, "logits/rejected": 0.9749510288238525, "logps/chosen": -2.0320894718170166, "logps/rejected": -4.679573059082031, "loss": 0.7228, "nll_loss": 0.7119670510292053, "rewards/accuracies": 1.0, "rewards/chosen": -0.20320895314216614, "rewards/margins": 0.26474836468696594, "rewards/rejected": -0.4679573178291321, "step": 5630 }, { "epoch": 15.416837782340863, "grad_norm": 6.350758075714111, "learning_rate": 2.2904109589041095e-07, "log_odds_chosen": 1.808019995689392, "log_odds_ratio": -0.27373743057250977, "logits/chosen": 0.7632558345794678, "logits/rejected": 0.8171972036361694, "logps/chosen": -1.529596209526062, "logps/rejected": -3.1560587882995605, "loss": 0.5353, "nll_loss": 0.5078909397125244, "rewards/accuracies": 1.0, "rewards/chosen": -0.15295962989330292, "rewards/margins": 0.16264626383781433, "rewards/rejected": -0.31560587882995605, "step": 5631 }, { "epoch": 15.419575633127995, "grad_norm": 8.465012550354004, "learning_rate": 2.2890410958904108e-07, "log_odds_chosen": 2.8016104698181152, "log_odds_ratio": -0.33244284987449646, "logits/chosen": 0.88645339012146, "logits/rejected": 0.8746265172958374, "logps/chosen": -2.393803358078003, "logps/rejected": -5.058403968811035, "loss": 0.6804, "nll_loss": 0.6471951603889465, "rewards/accuracies": 0.875, "rewards/chosen": -0.23938032984733582, "rewards/margins": 0.2664600908756256, "rewards/rejected": -0.5058404803276062, "step": 5632 }, { "epoch": 15.422313483915127, "grad_norm": 6.709170818328857, "learning_rate": 2.2876712328767123e-07, "log_odds_chosen": 1.4699543714523315, "log_odds_ratio": -0.32611027359962463, "logits/chosen": 1.0551072359085083, "logits/rejected": 0.9884710907936096, "logps/chosen": -1.6625657081604004, "logps/rejected": -2.9742202758789062, "loss": 0.6315, "nll_loss": 0.5989120602607727, "rewards/accuracies": 0.875, "rewards/chosen": -0.16625657677650452, "rewards/margins": 0.13116545975208282, "rewards/rejected": -0.29742202162742615, "step": 5633 }, { "epoch": 15.425051334702259, "grad_norm": 5.5466461181640625, "learning_rate": 2.2863013698630136e-07, "log_odds_chosen": 1.9556715488433838, "log_odds_ratio": -0.21604789793491364, "logits/chosen": 0.8572040796279907, "logits/rejected": 0.7980536222457886, "logps/chosen": -1.8969841003417969, "logps/rejected": -3.6917569637298584, "loss": 0.6051, "nll_loss": 0.5834774374961853, "rewards/accuracies": 1.0, "rewards/chosen": -0.18969841301441193, "rewards/margins": 0.1794772893190384, "rewards/rejected": -0.3691757023334503, "step": 5634 }, { "epoch": 15.427789185489392, "grad_norm": 5.95003604888916, "learning_rate": 2.284931506849315e-07, "log_odds_chosen": 3.1265478134155273, "log_odds_ratio": -0.17882557213306427, "logits/chosen": 0.9044685363769531, "logits/rejected": 0.9150220155715942, "logps/chosen": -1.993118405342102, "logps/rejected": -4.964442253112793, "loss": 0.7245, "nll_loss": 0.7066011428833008, "rewards/accuracies": 1.0, "rewards/chosen": -0.19931182265281677, "rewards/margins": 0.29713237285614014, "rewards/rejected": -0.4964441955089569, "step": 5635 }, { "epoch": 15.430527036276523, "grad_norm": 6.322601795196533, "learning_rate": 2.2835616438356163e-07, "log_odds_chosen": 3.9815402030944824, "log_odds_ratio": -0.14236566424369812, "logits/chosen": 1.1506693363189697, "logits/rejected": 1.1868613958358765, "logps/chosen": -2.4709038734436035, "logps/rejected": -6.361908435821533, "loss": 0.6473, "nll_loss": 0.6330984830856323, "rewards/accuracies": 0.875, "rewards/chosen": -0.2470904141664505, "rewards/margins": 0.38910046219825745, "rewards/rejected": -0.6361908316612244, "step": 5636 }, { "epoch": 15.433264887063656, "grad_norm": 6.028160095214844, "learning_rate": 2.2821917808219176e-07, "log_odds_chosen": 1.4913913011550903, "log_odds_ratio": -0.2708284556865692, "logits/chosen": 0.8480175733566284, "logits/rejected": 0.8226311206817627, "logps/chosen": -2.1413586139678955, "logps/rejected": -3.473597526550293, "loss": 0.655, "nll_loss": 0.6279159784317017, "rewards/accuracies": 0.875, "rewards/chosen": -0.21413588523864746, "rewards/margins": 0.13322389125823975, "rewards/rejected": -0.3473597466945648, "step": 5637 }, { "epoch": 15.436002737850787, "grad_norm": 6.660788059234619, "learning_rate": 2.280821917808219e-07, "log_odds_chosen": 1.5236003398895264, "log_odds_ratio": -0.3004208207130432, "logits/chosen": 0.888320803642273, "logits/rejected": 0.9054652452468872, "logps/chosen": -2.50917911529541, "logps/rejected": -3.9487905502319336, "loss": 0.69, "nll_loss": 0.6599794030189514, "rewards/accuracies": 1.0, "rewards/chosen": -0.250917911529541, "rewards/margins": 0.1439611315727234, "rewards/rejected": -0.3948790431022644, "step": 5638 }, { "epoch": 15.43874058863792, "grad_norm": 6.367304801940918, "learning_rate": 2.2794520547945204e-07, "log_odds_chosen": 1.7114049196243286, "log_odds_ratio": -0.2493860125541687, "logits/chosen": 0.804504930973053, "logits/rejected": 0.7766623497009277, "logps/chosen": -2.002664804458618, "logps/rejected": -3.57061767578125, "loss": 0.6857, "nll_loss": 0.6607689261436462, "rewards/accuracies": 1.0, "rewards/chosen": -0.20026648044586182, "rewards/margins": 0.15679526329040527, "rewards/rejected": -0.3570617437362671, "step": 5639 }, { "epoch": 15.441478439425051, "grad_norm": 6.502397060394287, "learning_rate": 2.278082191780822e-07, "log_odds_chosen": 1.8277275562286377, "log_odds_ratio": -0.5771179795265198, "logits/chosen": 0.8913142681121826, "logits/rejected": 0.8888224363327026, "logps/chosen": -2.566364288330078, "logps/rejected": -4.3553242683410645, "loss": 0.7904, "nll_loss": 0.7327322959899902, "rewards/accuracies": 0.625, "rewards/chosen": -0.25663644075393677, "rewards/margins": 0.1788959950208664, "rewards/rejected": -0.43553242087364197, "step": 5640 }, { "epoch": 15.444216290212184, "grad_norm": 6.2960004806518555, "learning_rate": 2.2767123287671232e-07, "log_odds_chosen": 1.274559497833252, "log_odds_ratio": -0.3138582408428192, "logits/chosen": 0.6188603639602661, "logits/rejected": 0.625968873500824, "logps/chosen": -2.258193016052246, "logps/rejected": -3.389601230621338, "loss": 0.6626, "nll_loss": 0.6312631368637085, "rewards/accuracies": 0.875, "rewards/chosen": -0.22581930458545685, "rewards/margins": 0.11314082145690918, "rewards/rejected": -0.3389601409435272, "step": 5641 }, { "epoch": 15.446954140999315, "grad_norm": 4.970925331115723, "learning_rate": 2.2753424657534244e-07, "log_odds_chosen": 2.5595312118530273, "log_odds_ratio": -0.22134006023406982, "logits/chosen": 0.9135124683380127, "logits/rejected": 0.9169918894767761, "logps/chosen": -2.162745237350464, "logps/rejected": -4.62385368347168, "loss": 0.6727, "nll_loss": 0.6505396962165833, "rewards/accuracies": 1.0, "rewards/chosen": -0.21627452969551086, "rewards/margins": 0.24611081182956696, "rewards/rejected": -0.462385356426239, "step": 5642 }, { "epoch": 15.449691991786448, "grad_norm": 6.435241222381592, "learning_rate": 2.273972602739726e-07, "log_odds_chosen": 2.558523178100586, "log_odds_ratio": -0.30806493759155273, "logits/chosen": 1.0230436325073242, "logits/rejected": 1.0790584087371826, "logps/chosen": -2.0326976776123047, "logps/rejected": -4.434892177581787, "loss": 0.5817, "nll_loss": 0.5508928298950195, "rewards/accuracies": 0.875, "rewards/chosen": -0.20326977968215942, "rewards/margins": 0.24021942913532257, "rewards/rejected": -0.4434892237186432, "step": 5643 }, { "epoch": 15.45242984257358, "grad_norm": 7.471103668212891, "learning_rate": 2.2726027397260272e-07, "log_odds_chosen": 0.5162146687507629, "log_odds_ratio": -0.9439551830291748, "logits/chosen": 0.7413781881332397, "logits/rejected": 0.748664915561676, "logps/chosen": -3.0195815563201904, "logps/rejected": -3.482760429382324, "loss": 0.8438, "nll_loss": 0.7493679523468018, "rewards/accuracies": 0.625, "rewards/chosen": -0.3019581735134125, "rewards/margins": 0.046317871659994125, "rewards/rejected": -0.3482760488986969, "step": 5644 }, { "epoch": 15.455167693360712, "grad_norm": 6.878046989440918, "learning_rate": 2.2712328767123287e-07, "log_odds_chosen": 2.9076952934265137, "log_odds_ratio": -0.3287953734397888, "logits/chosen": 0.918291449546814, "logits/rejected": 0.8514980673789978, "logps/chosen": -2.3157243728637695, "logps/rejected": -5.091397762298584, "loss": 0.7196, "nll_loss": 0.6866819858551025, "rewards/accuracies": 0.75, "rewards/chosen": -0.23157243430614471, "rewards/margins": 0.27756738662719727, "rewards/rejected": -0.5091398358345032, "step": 5645 }, { "epoch": 15.457905544147843, "grad_norm": 5.322679042816162, "learning_rate": 2.26986301369863e-07, "log_odds_chosen": 2.5445046424865723, "log_odds_ratio": -0.23662282526493073, "logits/chosen": 0.8149261474609375, "logits/rejected": 0.7837194204330444, "logps/chosen": -1.6825934648513794, "logps/rejected": -4.0827813148498535, "loss": 0.5883, "nll_loss": 0.5645973682403564, "rewards/accuracies": 1.0, "rewards/chosen": -0.16825935244560242, "rewards/margins": 0.24001877009868622, "rewards/rejected": -0.40827810764312744, "step": 5646 }, { "epoch": 15.460643394934976, "grad_norm": 4.780040740966797, "learning_rate": 2.2684931506849315e-07, "log_odds_chosen": 3.2622923851013184, "log_odds_ratio": -0.3342697024345398, "logits/chosen": 0.8037146925926208, "logits/rejected": 0.9436497688293457, "logps/chosen": -2.268435478210449, "logps/rejected": -5.3786821365356445, "loss": 0.7097, "nll_loss": 0.676240861415863, "rewards/accuracies": 0.875, "rewards/chosen": -0.22684355080127716, "rewards/margins": 0.31102466583251953, "rewards/rejected": -0.5378682017326355, "step": 5647 }, { "epoch": 15.463381245722108, "grad_norm": 6.403187274932861, "learning_rate": 2.267123287671233e-07, "log_odds_chosen": 1.6343297958374023, "log_odds_ratio": -0.2864387035369873, "logits/chosen": 0.9834546446800232, "logits/rejected": 1.0012377500534058, "logps/chosen": -2.4199631214141846, "logps/rejected": -3.953401565551758, "loss": 0.5871, "nll_loss": 0.5584415793418884, "rewards/accuracies": 0.875, "rewards/chosen": -0.24199630320072174, "rewards/margins": 0.15334387123584747, "rewards/rejected": -0.3953402042388916, "step": 5648 }, { "epoch": 15.46611909650924, "grad_norm": 5.146874904632568, "learning_rate": 2.265753424657534e-07, "log_odds_chosen": 2.115016222000122, "log_odds_ratio": -0.21915684640407562, "logits/chosen": 0.6254091262817383, "logits/rejected": 0.6245934963226318, "logps/chosen": -2.1330907344818115, "logps/rejected": -4.121310710906982, "loss": 0.5991, "nll_loss": 0.5772082209587097, "rewards/accuracies": 0.875, "rewards/chosen": -0.21330907940864563, "rewards/margins": 0.1988220065832138, "rewards/rejected": -0.41213107109069824, "step": 5649 }, { "epoch": 15.468856947296372, "grad_norm": 5.724137783050537, "learning_rate": 2.2643835616438355e-07, "log_odds_chosen": 1.4033373594284058, "log_odds_ratio": -0.3322153389453888, "logits/chosen": 0.9065366983413696, "logits/rejected": 0.9208614826202393, "logps/chosen": -2.217941999435425, "logps/rejected": -3.4968295097351074, "loss": 0.6354, "nll_loss": 0.6021428108215332, "rewards/accuracies": 0.875, "rewards/chosen": -0.2217942178249359, "rewards/margins": 0.1278887540102005, "rewards/rejected": -0.3496829569339752, "step": 5650 }, { "epoch": 15.471594798083505, "grad_norm": 4.795169353485107, "learning_rate": 2.2630136986301368e-07, "log_odds_chosen": 2.655360221862793, "log_odds_ratio": -0.2017114758491516, "logits/chosen": 0.7973049283027649, "logits/rejected": 0.8732786178588867, "logps/chosen": -2.020266056060791, "logps/rejected": -4.559774398803711, "loss": 0.5559, "nll_loss": 0.5357322692871094, "rewards/accuracies": 1.0, "rewards/chosen": -0.2020266205072403, "rewards/margins": 0.253950834274292, "rewards/rejected": -0.4559774398803711, "step": 5651 }, { "epoch": 15.474332648870636, "grad_norm": 6.251889228820801, "learning_rate": 2.2616438356164383e-07, "log_odds_chosen": 1.7760010957717896, "log_odds_ratio": -0.2654567360877991, "logits/chosen": 0.5451316833496094, "logits/rejected": 0.613404393196106, "logps/chosen": -2.179992198944092, "logps/rejected": -3.7923433780670166, "loss": 0.5832, "nll_loss": 0.5567000508308411, "rewards/accuracies": 1.0, "rewards/chosen": -0.21799921989440918, "rewards/margins": 0.16123513877391815, "rewards/rejected": -0.3792343735694885, "step": 5652 }, { "epoch": 15.477070499657769, "grad_norm": 5.566537380218506, "learning_rate": 2.2602739726027396e-07, "log_odds_chosen": 1.860379934310913, "log_odds_ratio": -0.2899993658065796, "logits/chosen": 0.8590390682220459, "logits/rejected": 0.9174415469169617, "logps/chosen": -2.4169178009033203, "logps/rejected": -4.199192523956299, "loss": 0.7263, "nll_loss": 0.697300910949707, "rewards/accuracies": 1.0, "rewards/chosen": -0.24169179797172546, "rewards/margins": 0.1782274693250656, "rewards/rejected": -0.4199192523956299, "step": 5653 }, { "epoch": 15.4798083504449, "grad_norm": 5.0330891609191895, "learning_rate": 2.258904109589041e-07, "log_odds_chosen": 2.670403480529785, "log_odds_ratio": -0.18499335646629333, "logits/chosen": 0.8958144187927246, "logits/rejected": 0.9401034712791443, "logps/chosen": -2.015031337738037, "logps/rejected": -4.530137062072754, "loss": 0.6185, "nll_loss": 0.5999575257301331, "rewards/accuracies": 1.0, "rewards/chosen": -0.20150314271450043, "rewards/margins": 0.2515105903148651, "rewards/rejected": -0.45301371812820435, "step": 5654 }, { "epoch": 15.482546201232033, "grad_norm": 6.29852819442749, "learning_rate": 2.2575342465753426e-07, "log_odds_chosen": 2.497769594192505, "log_odds_ratio": -0.3027138411998749, "logits/chosen": 0.6745586395263672, "logits/rejected": 0.6396529078483582, "logps/chosen": -2.3639259338378906, "logps/rejected": -4.699748992919922, "loss": 0.744, "nll_loss": 0.7137545943260193, "rewards/accuracies": 0.875, "rewards/chosen": -0.23639260232448578, "rewards/margins": 0.23358231782913208, "rewards/rejected": -0.46997490525245667, "step": 5655 }, { "epoch": 15.485284052019164, "grad_norm": 5.135828971862793, "learning_rate": 2.2561643835616436e-07, "log_odds_chosen": 2.392613410949707, "log_odds_ratio": -0.161138653755188, "logits/chosen": 0.856195330619812, "logits/rejected": 0.8740684986114502, "logps/chosen": -1.949951171875, "logps/rejected": -4.160277843475342, "loss": 0.5764, "nll_loss": 0.5603284239768982, "rewards/accuracies": 1.0, "rewards/chosen": -0.19499510526657104, "rewards/margins": 0.22103269398212433, "rewards/rejected": -0.4160277843475342, "step": 5656 }, { "epoch": 15.488021902806297, "grad_norm": 6.117331504821777, "learning_rate": 2.254794520547945e-07, "log_odds_chosen": 1.6932244300842285, "log_odds_ratio": -0.32021358609199524, "logits/chosen": 1.0692654848098755, "logits/rejected": 1.046592116355896, "logps/chosen": -2.0176124572753906, "logps/rejected": -3.5680131912231445, "loss": 0.6807, "nll_loss": 0.6486670970916748, "rewards/accuracies": 0.875, "rewards/chosen": -0.20176124572753906, "rewards/margins": 0.15504007041454315, "rewards/rejected": -0.3568013310432434, "step": 5657 }, { "epoch": 15.49075975359343, "grad_norm": 5.780356407165527, "learning_rate": 2.2534246575342464e-07, "log_odds_chosen": 2.779362201690674, "log_odds_ratio": -0.11862010508775711, "logits/chosen": 0.8596140146255493, "logits/rejected": 0.9156094193458557, "logps/chosen": -2.4097161293029785, "logps/rejected": -5.059961795806885, "loss": 0.7304, "nll_loss": 0.7185493111610413, "rewards/accuracies": 1.0, "rewards/chosen": -0.2409716099500656, "rewards/margins": 0.2650245726108551, "rewards/rejected": -0.5059962272644043, "step": 5658 }, { "epoch": 15.493497604380561, "grad_norm": 5.880341529846191, "learning_rate": 2.252054794520548e-07, "log_odds_chosen": 1.6566556692123413, "log_odds_ratio": -0.2666472792625427, "logits/chosen": 1.0013926029205322, "logits/rejected": 1.0055139064788818, "logps/chosen": -1.7409050464630127, "logps/rejected": -3.2108407020568848, "loss": 0.5551, "nll_loss": 0.5284311771392822, "rewards/accuracies": 0.875, "rewards/chosen": -0.17409051954746246, "rewards/margins": 0.14699357748031616, "rewards/rejected": -0.32108408212661743, "step": 5659 }, { "epoch": 15.496235455167694, "grad_norm": 5.306108474731445, "learning_rate": 2.2506849315068494e-07, "log_odds_chosen": 1.6002490520477295, "log_odds_ratio": -0.2690134346485138, "logits/chosen": 0.8114609122276306, "logits/rejected": 0.8466774225234985, "logps/chosen": -1.9693076610565186, "logps/rejected": -3.434414863586426, "loss": 0.5363, "nll_loss": 0.509402871131897, "rewards/accuracies": 0.875, "rewards/chosen": -0.19693076610565186, "rewards/margins": 0.14651072025299072, "rewards/rejected": -0.3434414863586426, "step": 5660 }, { "epoch": 15.498973305954825, "grad_norm": 7.256101608276367, "learning_rate": 2.2493150684931507e-07, "log_odds_chosen": 2.425839900970459, "log_odds_ratio": -0.3003584146499634, "logits/chosen": 1.1501047611236572, "logits/rejected": 1.1448659896850586, "logps/chosen": -2.80172061920166, "logps/rejected": -5.1645894050598145, "loss": 0.7587, "nll_loss": 0.7286163568496704, "rewards/accuracies": 0.875, "rewards/chosen": -0.28017207980155945, "rewards/margins": 0.23628687858581543, "rewards/rejected": -0.5164589881896973, "step": 5661 }, { "epoch": 15.501711156741958, "grad_norm": 8.792252540588379, "learning_rate": 2.247945205479452e-07, "log_odds_chosen": 2.346433639526367, "log_odds_ratio": -0.14514240622520447, "logits/chosen": 0.9358623027801514, "logits/rejected": 0.8840481042861938, "logps/chosen": -1.910019040107727, "logps/rejected": -4.022673606872559, "loss": 0.6086, "nll_loss": 0.5940837264060974, "rewards/accuracies": 1.0, "rewards/chosen": -0.19100192189216614, "rewards/margins": 0.2112654447555542, "rewards/rejected": -0.40226736664772034, "step": 5662 }, { "epoch": 15.50444900752909, "grad_norm": 7.418568134307861, "learning_rate": 2.2465753424657532e-07, "log_odds_chosen": 1.9928910732269287, "log_odds_ratio": -0.48685571551322937, "logits/chosen": 0.8076181411743164, "logits/rejected": 0.8423618674278259, "logps/chosen": -2.7815845012664795, "logps/rejected": -4.686279296875, "loss": 0.7327, "nll_loss": 0.6839888095855713, "rewards/accuracies": 0.75, "rewards/chosen": -0.2781584858894348, "rewards/margins": 0.19046948850154877, "rewards/rejected": -0.4686279296875, "step": 5663 }, { "epoch": 15.507186858316222, "grad_norm": 6.084557056427002, "learning_rate": 2.2452054794520547e-07, "log_odds_chosen": 1.2823235988616943, "log_odds_ratio": -0.2991984486579895, "logits/chosen": 1.0533678531646729, "logits/rejected": 1.067252278327942, "logps/chosen": -2.009408473968506, "logps/rejected": -3.1654067039489746, "loss": 0.5423, "nll_loss": 0.5124233961105347, "rewards/accuracies": 1.0, "rewards/chosen": -0.20094084739685059, "rewards/margins": 0.11559983342885971, "rewards/rejected": -0.3165406584739685, "step": 5664 }, { "epoch": 15.509924709103354, "grad_norm": 5.630929946899414, "learning_rate": 2.243835616438356e-07, "log_odds_chosen": 1.906449317932129, "log_odds_ratio": -0.360381543636322, "logits/chosen": 0.6367703080177307, "logits/rejected": 0.6923413872718811, "logps/chosen": -2.7808518409729004, "logps/rejected": -4.5767083168029785, "loss": 0.7501, "nll_loss": 0.7140742540359497, "rewards/accuracies": 0.875, "rewards/chosen": -0.27808520197868347, "rewards/margins": 0.17958563566207886, "rewards/rejected": -0.45767083764076233, "step": 5665 }, { "epoch": 15.512662559890487, "grad_norm": 4.896236896514893, "learning_rate": 2.2424657534246575e-07, "log_odds_chosen": 2.3967432975769043, "log_odds_ratio": -0.2608972489833832, "logits/chosen": 0.9525800943374634, "logits/rejected": 0.9774345755577087, "logps/chosen": -2.5077335834503174, "logps/rejected": -4.804125785827637, "loss": 0.7533, "nll_loss": 0.7272316217422485, "rewards/accuracies": 1.0, "rewards/chosen": -0.2507733404636383, "rewards/margins": 0.2296392321586609, "rewards/rejected": -0.4804125726222992, "step": 5666 }, { "epoch": 15.515400410677618, "grad_norm": 6.505123615264893, "learning_rate": 2.241095890410959e-07, "log_odds_chosen": 2.8846113681793213, "log_odds_ratio": -0.17127038538455963, "logits/chosen": 0.8504724502563477, "logits/rejected": 0.8604956269264221, "logps/chosen": -2.639202833175659, "logps/rejected": -5.398682594299316, "loss": 0.7437, "nll_loss": 0.7265633344650269, "rewards/accuracies": 1.0, "rewards/chosen": -0.26392030715942383, "rewards/margins": 0.2759479880332947, "rewards/rejected": -0.5398682355880737, "step": 5667 }, { "epoch": 15.51813826146475, "grad_norm": 7.73102331161499, "learning_rate": 2.2397260273972602e-07, "log_odds_chosen": 3.4701552391052246, "log_odds_ratio": -0.10058349370956421, "logits/chosen": 0.9422906041145325, "logits/rejected": 0.9492677450180054, "logps/chosen": -2.433783531188965, "logps/rejected": -5.796655178070068, "loss": 0.6764, "nll_loss": 0.6663842797279358, "rewards/accuracies": 1.0, "rewards/chosen": -0.24337834119796753, "rewards/margins": 0.3362872004508972, "rewards/rejected": -0.5796655416488647, "step": 5668 }, { "epoch": 15.520876112251882, "grad_norm": 5.4044671058654785, "learning_rate": 2.2383561643835615e-07, "log_odds_chosen": 1.3138670921325684, "log_odds_ratio": -0.35140296816825867, "logits/chosen": 0.6294881701469421, "logits/rejected": 0.6178699731826782, "logps/chosen": -1.9799983501434326, "logps/rejected": -3.191732168197632, "loss": 0.5839, "nll_loss": 0.5487884879112244, "rewards/accuracies": 0.875, "rewards/chosen": -0.19799983501434326, "rewards/margins": 0.12117341160774231, "rewards/rejected": -0.31917324662208557, "step": 5669 }, { "epoch": 15.523613963039015, "grad_norm": 6.264688014984131, "learning_rate": 2.2369863013698628e-07, "log_odds_chosen": 3.5950002670288086, "log_odds_ratio": -0.2351868450641632, "logits/chosen": 1.0374149084091187, "logits/rejected": 1.092812180519104, "logps/chosen": -2.521864891052246, "logps/rejected": -6.007847785949707, "loss": 0.7911, "nll_loss": 0.7675976753234863, "rewards/accuracies": 1.0, "rewards/chosen": -0.25218650698661804, "rewards/margins": 0.3485982418060303, "rewards/rejected": -0.6007847785949707, "step": 5670 }, { "epoch": 15.526351813826146, "grad_norm": 6.696859836578369, "learning_rate": 2.2356164383561643e-07, "log_odds_chosen": 2.4702582359313965, "log_odds_ratio": -0.2914660573005676, "logits/chosen": 0.8728746175765991, "logits/rejected": 0.9541543126106262, "logps/chosen": -2.5875144004821777, "logps/rejected": -4.94773530960083, "loss": 0.7675, "nll_loss": 0.7383050918579102, "rewards/accuracies": 0.75, "rewards/chosen": -0.25875142216682434, "rewards/margins": 0.23602211475372314, "rewards/rejected": -0.4947735369205475, "step": 5671 }, { "epoch": 15.529089664613279, "grad_norm": 5.687101364135742, "learning_rate": 2.2342465753424655e-07, "log_odds_chosen": 2.792874813079834, "log_odds_ratio": -0.1603720635175705, "logits/chosen": 1.0906920433044434, "logits/rejected": 1.0036734342575073, "logps/chosen": -2.4404428005218506, "logps/rejected": -5.095508575439453, "loss": 0.6774, "nll_loss": 0.6613514423370361, "rewards/accuracies": 1.0, "rewards/chosen": -0.24404430389404297, "rewards/margins": 0.2655065655708313, "rewards/rejected": -0.5095508694648743, "step": 5672 }, { "epoch": 15.53182751540041, "grad_norm": 6.127904891967773, "learning_rate": 2.232876712328767e-07, "log_odds_chosen": 2.0615415573120117, "log_odds_ratio": -0.26420342922210693, "logits/chosen": 0.8433162569999695, "logits/rejected": 0.8612450957298279, "logps/chosen": -2.4451615810394287, "logps/rejected": -4.416384220123291, "loss": 0.7397, "nll_loss": 0.7132872343063354, "rewards/accuracies": 0.875, "rewards/chosen": -0.24451616406440735, "rewards/margins": 0.19712227582931519, "rewards/rejected": -0.44163843989372253, "step": 5673 }, { "epoch": 15.534565366187543, "grad_norm": 5.381173610687256, "learning_rate": 2.2315068493150686e-07, "log_odds_chosen": 2.1672048568725586, "log_odds_ratio": -0.2594209909439087, "logits/chosen": 0.946049690246582, "logits/rejected": 0.9708355665206909, "logps/chosen": -2.3260464668273926, "logps/rejected": -4.425800323486328, "loss": 0.6925, "nll_loss": 0.6665999889373779, "rewards/accuracies": 0.875, "rewards/chosen": -0.23260465264320374, "rewards/margins": 0.20997539162635803, "rewards/rejected": -0.44258004426956177, "step": 5674 }, { "epoch": 15.537303216974674, "grad_norm": 5.818109512329102, "learning_rate": 2.2301369863013698e-07, "log_odds_chosen": 1.2506253719329834, "log_odds_ratio": -0.3454620838165283, "logits/chosen": 0.6004027724266052, "logits/rejected": 0.6157301068305969, "logps/chosen": -1.9845943450927734, "logps/rejected": -3.1198017597198486, "loss": 0.7366, "nll_loss": 0.7020502090454102, "rewards/accuracies": 1.0, "rewards/chosen": -0.1984594166278839, "rewards/margins": 0.11352075636386871, "rewards/rejected": -0.3119801878929138, "step": 5675 }, { "epoch": 15.540041067761807, "grad_norm": 5.767563819885254, "learning_rate": 2.228767123287671e-07, "log_odds_chosen": 2.995649814605713, "log_odds_ratio": -0.15309025347232819, "logits/chosen": 0.8491904735565186, "logits/rejected": 0.8659195899963379, "logps/chosen": -2.2278451919555664, "logps/rejected": -5.122977256774902, "loss": 0.6887, "nll_loss": 0.6733630299568176, "rewards/accuracies": 1.0, "rewards/chosen": -0.22278454899787903, "rewards/margins": 0.28951317071914673, "rewards/rejected": -0.5122977495193481, "step": 5676 }, { "epoch": 15.542778918548938, "grad_norm": 5.387051582336426, "learning_rate": 2.2273972602739723e-07, "log_odds_chosen": 3.328279733657837, "log_odds_ratio": -0.1531006395816803, "logits/chosen": 1.0073670148849487, "logits/rejected": 1.06334388256073, "logps/chosen": -2.448716163635254, "logps/rejected": -5.666012763977051, "loss": 0.5896, "nll_loss": 0.5742403268814087, "rewards/accuracies": 1.0, "rewards/chosen": -0.2448716163635254, "rewards/margins": 0.3217296600341797, "rewards/rejected": -0.5666012763977051, "step": 5677 }, { "epoch": 15.545516769336071, "grad_norm": 6.069304943084717, "learning_rate": 2.2260273972602739e-07, "log_odds_chosen": 1.1466352939605713, "log_odds_ratio": -0.41167619824409485, "logits/chosen": 0.7902013063430786, "logits/rejected": 0.8522877097129822, "logps/chosen": -2.65712833404541, "logps/rejected": -3.7260048389434814, "loss": 0.6133, "nll_loss": 0.5721002817153931, "rewards/accuracies": 0.75, "rewards/chosen": -0.2657128572463989, "rewards/margins": 0.10688763856887817, "rewards/rejected": -0.3726004958152771, "step": 5678 }, { "epoch": 15.548254620123203, "grad_norm": 7.196857929229736, "learning_rate": 2.2246575342465754e-07, "log_odds_chosen": 1.824328899383545, "log_odds_ratio": -0.2478858232498169, "logits/chosen": 0.8815189599990845, "logits/rejected": 0.7933096885681152, "logps/chosen": -1.8853809833526611, "logps/rejected": -3.5568971633911133, "loss": 0.6314, "nll_loss": 0.6066547632217407, "rewards/accuracies": 1.0, "rewards/chosen": -0.1885381042957306, "rewards/margins": 0.16715161502361298, "rewards/rejected": -0.35568973422050476, "step": 5679 }, { "epoch": 15.550992470910336, "grad_norm": 8.163311004638672, "learning_rate": 2.2232876712328766e-07, "log_odds_chosen": 2.086374282836914, "log_odds_ratio": -0.33569398522377014, "logits/chosen": 1.0273058414459229, "logits/rejected": 1.0370979309082031, "logps/chosen": -2.2983968257904053, "logps/rejected": -4.265905857086182, "loss": 0.6972, "nll_loss": 0.663590669631958, "rewards/accuracies": 0.875, "rewards/chosen": -0.22983968257904053, "rewards/margins": 0.19675090909004211, "rewards/rejected": -0.42659059166908264, "step": 5680 }, { "epoch": 15.553730321697467, "grad_norm": 4.999593257904053, "learning_rate": 2.2219178082191782e-07, "log_odds_chosen": 2.120302677154541, "log_odds_ratio": -0.16076381504535675, "logits/chosen": 0.7640349864959717, "logits/rejected": 0.7051960229873657, "logps/chosen": -1.6632345914840698, "logps/rejected": -3.5621211528778076, "loss": 0.6748, "nll_loss": 0.6587027311325073, "rewards/accuracies": 1.0, "rewards/chosen": -0.1663234680891037, "rewards/margins": 0.18988865613937378, "rewards/rejected": -0.35621213912963867, "step": 5681 }, { "epoch": 15.5564681724846, "grad_norm": 5.497368812561035, "learning_rate": 2.2205479452054792e-07, "log_odds_chosen": 1.3979237079620361, "log_odds_ratio": -0.3368909955024719, "logits/chosen": 1.1704283952713013, "logits/rejected": 1.1867327690124512, "logps/chosen": -1.8057515621185303, "logps/rejected": -3.0616402626037598, "loss": 0.6029, "nll_loss": 0.5691670775413513, "rewards/accuracies": 0.875, "rewards/chosen": -0.1805751621723175, "rewards/margins": 0.12558884918689728, "rewards/rejected": -0.306164026260376, "step": 5682 }, { "epoch": 15.55920602327173, "grad_norm": 5.493536472320557, "learning_rate": 2.2191780821917807e-07, "log_odds_chosen": 1.0731841325759888, "log_odds_ratio": -0.3825932741165161, "logits/chosen": 0.7426021099090576, "logits/rejected": 0.7380815744400024, "logps/chosen": -2.121486186981201, "logps/rejected": -3.1053755283355713, "loss": 0.5747, "nll_loss": 0.5364447236061096, "rewards/accuracies": 0.875, "rewards/chosen": -0.21214863657951355, "rewards/margins": 0.0983889251947403, "rewards/rejected": -0.31053754687309265, "step": 5683 }, { "epoch": 15.561943874058864, "grad_norm": 5.2594828605651855, "learning_rate": 2.217808219178082e-07, "log_odds_chosen": 2.0749287605285645, "log_odds_ratio": -0.32072484493255615, "logits/chosen": 0.856802225112915, "logits/rejected": 0.8071075081825256, "logps/chosen": -2.0946543216705322, "logps/rejected": -4.0854291915893555, "loss": 0.6993, "nll_loss": 0.6672705411911011, "rewards/accuracies": 0.875, "rewards/chosen": -0.20946544408798218, "rewards/margins": 0.19907748699188232, "rewards/rejected": -0.4085429012775421, "step": 5684 }, { "epoch": 15.564681724845997, "grad_norm": 4.821351051330566, "learning_rate": 2.2164383561643835e-07, "log_odds_chosen": 2.3683855533599854, "log_odds_ratio": -0.1635543555021286, "logits/chosen": 0.6722776889801025, "logits/rejected": 0.7667232155799866, "logps/chosen": -1.939256191253662, "logps/rejected": -4.159778594970703, "loss": 0.5982, "nll_loss": 0.5818560123443604, "rewards/accuracies": 1.0, "rewards/chosen": -0.1939256340265274, "rewards/margins": 0.2220522165298462, "rewards/rejected": -0.4159778356552124, "step": 5685 }, { "epoch": 15.567419575633128, "grad_norm": 8.679510116577148, "learning_rate": 2.215068493150685e-07, "log_odds_chosen": 0.6143689155578613, "log_odds_ratio": -0.7482635378837585, "logits/chosen": 0.9407408237457275, "logits/rejected": 1.0406628847122192, "logps/chosen": -3.0197157859802246, "logps/rejected": -3.5668604373931885, "loss": 0.7619, "nll_loss": 0.6871093511581421, "rewards/accuracies": 0.875, "rewards/chosen": -0.30197155475616455, "rewards/margins": 0.05471447855234146, "rewards/rejected": -0.3566860258579254, "step": 5686 }, { "epoch": 15.570157426420261, "grad_norm": 5.956575870513916, "learning_rate": 2.2136986301369862e-07, "log_odds_chosen": 3.476306915283203, "log_odds_ratio": -0.2390451729297638, "logits/chosen": 1.0793670415878296, "logits/rejected": 1.1689397096633911, "logps/chosen": -2.157341718673706, "logps/rejected": -5.470468997955322, "loss": 0.8134, "nll_loss": 0.7895272970199585, "rewards/accuracies": 0.875, "rewards/chosen": -0.21573418378829956, "rewards/margins": 0.33131271600723267, "rewards/rejected": -0.5470468997955322, "step": 5687 }, { "epoch": 15.572895277207392, "grad_norm": 6.5989298820495605, "learning_rate": 2.2123287671232877e-07, "log_odds_chosen": 0.7732992172241211, "log_odds_ratio": -0.657089114189148, "logits/chosen": 0.8672982454299927, "logits/rejected": 0.9004546403884888, "logps/chosen": -2.9602105617523193, "logps/rejected": -3.6993067264556885, "loss": 0.7017, "nll_loss": 0.636013925075531, "rewards/accuracies": 0.625, "rewards/chosen": -0.29602107405662537, "rewards/margins": 0.07390959560871124, "rewards/rejected": -0.369930624961853, "step": 5688 }, { "epoch": 15.575633127994525, "grad_norm": 5.7440690994262695, "learning_rate": 2.2109589041095887e-07, "log_odds_chosen": 1.8335999250411987, "log_odds_ratio": -0.34883108735084534, "logits/chosen": 0.8870457410812378, "logits/rejected": 0.8892900943756104, "logps/chosen": -2.148128032684326, "logps/rejected": -3.9033474922180176, "loss": 0.6893, "nll_loss": 0.6544556021690369, "rewards/accuracies": 0.875, "rewards/chosen": -0.21481281518936157, "rewards/margins": 0.17552195489406586, "rewards/rejected": -0.3903347849845886, "step": 5689 }, { "epoch": 15.578370978781656, "grad_norm": 5.244324684143066, "learning_rate": 2.2095890410958903e-07, "log_odds_chosen": 2.428283929824829, "log_odds_ratio": -0.25594833493232727, "logits/chosen": 0.8428466320037842, "logits/rejected": 0.9698941707611084, "logps/chosen": -3.1126933097839355, "logps/rejected": -5.496283531188965, "loss": 0.7575, "nll_loss": 0.731953501701355, "rewards/accuracies": 0.875, "rewards/chosen": -0.3112693428993225, "rewards/margins": 0.2383589893579483, "rewards/rejected": -0.5496283173561096, "step": 5690 }, { "epoch": 15.58110882956879, "grad_norm": 5.883164405822754, "learning_rate": 2.2082191780821915e-07, "log_odds_chosen": 1.3694031238555908, "log_odds_ratio": -0.2957763075828552, "logits/chosen": 0.5632078647613525, "logits/rejected": 0.47880077362060547, "logps/chosen": -2.161410331726074, "logps/rejected": -3.391256809234619, "loss": 0.5984, "nll_loss": 0.5687861442565918, "rewards/accuracies": 1.0, "rewards/chosen": -0.21614103019237518, "rewards/margins": 0.12298467755317688, "rewards/rejected": -0.33912569284439087, "step": 5691 }, { "epoch": 15.58384668035592, "grad_norm": 5.3064727783203125, "learning_rate": 2.206849315068493e-07, "log_odds_chosen": 2.757502555847168, "log_odds_ratio": -0.14630721509456635, "logits/chosen": 1.0604604482650757, "logits/rejected": 1.0883424282073975, "logps/chosen": -2.4265971183776855, "logps/rejected": -5.09537410736084, "loss": 0.6427, "nll_loss": 0.6280421614646912, "rewards/accuracies": 1.0, "rewards/chosen": -0.24265970289707184, "rewards/margins": 0.2668777108192444, "rewards/rejected": -0.509537398815155, "step": 5692 }, { "epoch": 15.586584531143053, "grad_norm": 7.117559909820557, "learning_rate": 2.2054794520547946e-07, "log_odds_chosen": 5.073576927185059, "log_odds_ratio": -0.19816015660762787, "logits/chosen": 0.8669776916503906, "logits/rejected": 0.9442261457443237, "logps/chosen": -3.163983106613159, "logps/rejected": -8.146101951599121, "loss": 0.8071, "nll_loss": 0.7872658967971802, "rewards/accuracies": 0.875, "rewards/chosen": -0.3163982927799225, "rewards/margins": 0.49821189045906067, "rewards/rejected": -0.8146102428436279, "step": 5693 }, { "epoch": 15.589322381930184, "grad_norm": 5.600249767303467, "learning_rate": 2.2041095890410958e-07, "log_odds_chosen": 1.880519986152649, "log_odds_ratio": -0.30685633420944214, "logits/chosen": 0.7900227308273315, "logits/rejected": 0.898747444152832, "logps/chosen": -2.826779842376709, "logps/rejected": -4.658063888549805, "loss": 0.8716, "nll_loss": 0.8409332633018494, "rewards/accuracies": 0.75, "rewards/chosen": -0.2826780080795288, "rewards/margins": 0.18312841653823853, "rewards/rejected": -0.46580642461776733, "step": 5694 }, { "epoch": 15.592060232717317, "grad_norm": 5.408854007720947, "learning_rate": 2.2027397260273973e-07, "log_odds_chosen": 1.2516629695892334, "log_odds_ratio": -0.3457517921924591, "logits/chosen": 0.764000415802002, "logits/rejected": 0.7818912267684937, "logps/chosen": -2.002328634262085, "logps/rejected": -3.156695604324341, "loss": 0.5713, "nll_loss": 0.5367035865783691, "rewards/accuracies": 0.875, "rewards/chosen": -0.2002328634262085, "rewards/margins": 0.11543669551610947, "rewards/rejected": -0.31566956639289856, "step": 5695 }, { "epoch": 15.594798083504449, "grad_norm": 5.913284778594971, "learning_rate": 2.2013698630136983e-07, "log_odds_chosen": 1.3354213237762451, "log_odds_ratio": -0.35667115449905396, "logits/chosen": 0.936684250831604, "logits/rejected": 0.9443495273590088, "logps/chosen": -2.5770297050476074, "logps/rejected": -3.821382522583008, "loss": 0.8001, "nll_loss": 0.7644606828689575, "rewards/accuracies": 0.875, "rewards/chosen": -0.2577029764652252, "rewards/margins": 0.12443527579307556, "rewards/rejected": -0.3821382522583008, "step": 5696 }, { "epoch": 15.597535934291582, "grad_norm": 5.268280982971191, "learning_rate": 2.1999999999999998e-07, "log_odds_chosen": 1.400511384010315, "log_odds_ratio": -0.3374011516571045, "logits/chosen": 1.1483209133148193, "logits/rejected": 1.1454565525054932, "logps/chosen": -2.1742658615112305, "logps/rejected": -3.4797611236572266, "loss": 0.6604, "nll_loss": 0.6266552209854126, "rewards/accuracies": 0.875, "rewards/chosen": -0.217426598072052, "rewards/margins": 0.13054953515529633, "rewards/rejected": -0.3479761481285095, "step": 5697 }, { "epoch": 15.600273785078713, "grad_norm": 5.19870138168335, "learning_rate": 2.1986301369863014e-07, "log_odds_chosen": 2.5072402954101562, "log_odds_ratio": -0.27913302183151245, "logits/chosen": 0.8828016519546509, "logits/rejected": 0.8908340930938721, "logps/chosen": -2.0332937240600586, "logps/rejected": -4.432750225067139, "loss": 0.6351, "nll_loss": 0.6071776747703552, "rewards/accuracies": 0.875, "rewards/chosen": -0.20332935452461243, "rewards/margins": 0.2399456650018692, "rewards/rejected": -0.4432750344276428, "step": 5698 }, { "epoch": 15.603011635865846, "grad_norm": 5.032533645629883, "learning_rate": 2.1972602739726026e-07, "log_odds_chosen": 2.935302734375, "log_odds_ratio": -0.2411075085401535, "logits/chosen": 0.9516665935516357, "logits/rejected": 0.9663706421852112, "logps/chosen": -1.8450464010238647, "logps/rejected": -4.628697395324707, "loss": 0.6889, "nll_loss": 0.6648364067077637, "rewards/accuracies": 0.875, "rewards/chosen": -0.18450462818145752, "rewards/margins": 0.2783651351928711, "rewards/rejected": -0.4628697633743286, "step": 5699 }, { "epoch": 15.605749486652977, "grad_norm": 6.090025424957275, "learning_rate": 2.1958904109589041e-07, "log_odds_chosen": 3.202678918838501, "log_odds_ratio": -0.2554317116737366, "logits/chosen": 0.9385422468185425, "logits/rejected": 1.0492417812347412, "logps/chosen": -3.0278806686401367, "logps/rejected": -6.1543192863464355, "loss": 0.7002, "nll_loss": 0.674644947052002, "rewards/accuracies": 0.875, "rewards/chosen": -0.30278804898262024, "rewards/margins": 0.3126438856124878, "rewards/rejected": -0.6154319047927856, "step": 5700 }, { "epoch": 15.60848733744011, "grad_norm": 6.893675327301025, "learning_rate": 2.1945205479452054e-07, "log_odds_chosen": 0.31497085094451904, "log_odds_ratio": -0.7876178622245789, "logits/chosen": 0.646554708480835, "logits/rejected": 0.7014268636703491, "logps/chosen": -2.326763153076172, "logps/rejected": -2.534571647644043, "loss": 0.6813, "nll_loss": 0.6025199890136719, "rewards/accuracies": 0.75, "rewards/chosen": -0.23267629742622375, "rewards/margins": 0.020780859515070915, "rewards/rejected": -0.2534571588039398, "step": 5701 }, { "epoch": 15.611225188227241, "grad_norm": 4.7573981285095215, "learning_rate": 2.1931506849315067e-07, "log_odds_chosen": 3.285980463027954, "log_odds_ratio": -0.11159180104732513, "logits/chosen": 0.7137141227722168, "logits/rejected": 0.7390464544296265, "logps/chosen": -1.699815273284912, "logps/rejected": -4.771304130554199, "loss": 0.5999, "nll_loss": 0.5887181758880615, "rewards/accuracies": 1.0, "rewards/chosen": -0.16998153924942017, "rewards/margins": 0.30714887380599976, "rewards/rejected": -0.4771304130554199, "step": 5702 }, { "epoch": 15.613963039014374, "grad_norm": 7.133199214935303, "learning_rate": 2.191780821917808e-07, "log_odds_chosen": 1.0037914514541626, "log_odds_ratio": -0.40413880348205566, "logits/chosen": 1.0938395261764526, "logits/rejected": 0.9668177366256714, "logps/chosen": -2.244206190109253, "logps/rejected": -3.174931049346924, "loss": 0.7227, "nll_loss": 0.6823057532310486, "rewards/accuracies": 0.75, "rewards/chosen": -0.22442063689231873, "rewards/margins": 0.09307248145341873, "rewards/rejected": -0.31749311089515686, "step": 5703 }, { "epoch": 15.616700889801505, "grad_norm": 8.296149253845215, "learning_rate": 2.1904109589041094e-07, "log_odds_chosen": 0.8153929114341736, "log_odds_ratio": -0.4538286030292511, "logits/chosen": 0.8487238883972168, "logits/rejected": 0.9418970346450806, "logps/chosen": -3.5949907302856445, "logps/rejected": -4.3318986892700195, "loss": 0.7304, "nll_loss": 0.6849958300590515, "rewards/accuracies": 0.75, "rewards/chosen": -0.35949909687042236, "rewards/margins": 0.07369078695774078, "rewards/rejected": -0.43318986892700195, "step": 5704 }, { "epoch": 15.619438740588638, "grad_norm": 5.695920467376709, "learning_rate": 2.189041095890411e-07, "log_odds_chosen": 2.0515031814575195, "log_odds_ratio": -0.3212754726409912, "logits/chosen": 0.8240451216697693, "logits/rejected": 0.8644770979881287, "logps/chosen": -2.3482398986816406, "logps/rejected": -4.216421604156494, "loss": 0.5702, "nll_loss": 0.538031816482544, "rewards/accuracies": 0.75, "rewards/chosen": -0.23482400178909302, "rewards/margins": 0.18681812286376953, "rewards/rejected": -0.42164212465286255, "step": 5705 }, { "epoch": 15.62217659137577, "grad_norm": 5.93669319152832, "learning_rate": 2.1876712328767122e-07, "log_odds_chosen": 2.9216272830963135, "log_odds_ratio": -0.27226293087005615, "logits/chosen": 0.9983142614364624, "logits/rejected": 1.0042920112609863, "logps/chosen": -2.2275121212005615, "logps/rejected": -5.034919738769531, "loss": 0.6311, "nll_loss": 0.6038426160812378, "rewards/accuracies": 1.0, "rewards/chosen": -0.2227512151002884, "rewards/margins": 0.28074079751968384, "rewards/rejected": -0.503491997718811, "step": 5706 }, { "epoch": 15.624914442162902, "grad_norm": 5.250954627990723, "learning_rate": 2.1863013698630137e-07, "log_odds_chosen": 2.4641401767730713, "log_odds_ratio": -0.15220460295677185, "logits/chosen": 0.766411304473877, "logits/rejected": 0.7495735883712769, "logps/chosen": -1.7457255125045776, "logps/rejected": -4.025755882263184, "loss": 0.594, "nll_loss": 0.5787442922592163, "rewards/accuracies": 1.0, "rewards/chosen": -0.17457255721092224, "rewards/margins": 0.22800301015377045, "rewards/rejected": -0.4025755524635315, "step": 5707 }, { "epoch": 15.627652292950033, "grad_norm": 6.15615177154541, "learning_rate": 2.184931506849315e-07, "log_odds_chosen": 3.812056064605713, "log_odds_ratio": -0.06557223945856094, "logits/chosen": 0.8533968329429626, "logits/rejected": 0.880160927772522, "logps/chosen": -1.8355891704559326, "logps/rejected": -5.432742118835449, "loss": 0.5531, "nll_loss": 0.5465790033340454, "rewards/accuracies": 1.0, "rewards/chosen": -0.18355891108512878, "rewards/margins": 0.3597153127193451, "rewards/rejected": -0.5432742834091187, "step": 5708 }, { "epoch": 15.630390143737166, "grad_norm": 4.876659870147705, "learning_rate": 2.1835616438356162e-07, "log_odds_chosen": 1.4743623733520508, "log_odds_ratio": -0.40944328904151917, "logits/chosen": 1.0047770738601685, "logits/rejected": 0.995915412902832, "logps/chosen": -2.3567421436309814, "logps/rejected": -3.7526769638061523, "loss": 0.706, "nll_loss": 0.6650370955467224, "rewards/accuracies": 0.875, "rewards/chosen": -0.23567423224449158, "rewards/margins": 0.1395934820175171, "rewards/rejected": -0.37526771426200867, "step": 5709 }, { "epoch": 15.633127994524298, "grad_norm": 5.540953159332275, "learning_rate": 2.1821917808219175e-07, "log_odds_chosen": 2.3591136932373047, "log_odds_ratio": -0.22400324046611786, "logits/chosen": 0.7793428897857666, "logits/rejected": 0.7295650243759155, "logps/chosen": -1.8620012998580933, "logps/rejected": -4.0756144523620605, "loss": 0.6451, "nll_loss": 0.6226803064346313, "rewards/accuracies": 1.0, "rewards/chosen": -0.18620014190673828, "rewards/margins": 0.22136130928993225, "rewards/rejected": -0.40756142139434814, "step": 5710 }, { "epoch": 15.63586584531143, "grad_norm": 4.696473121643066, "learning_rate": 2.180821917808219e-07, "log_odds_chosen": 2.2059459686279297, "log_odds_ratio": -0.20508912205696106, "logits/chosen": 0.8355556726455688, "logits/rejected": 0.848136305809021, "logps/chosen": -2.066593885421753, "logps/rejected": -4.158222198486328, "loss": 0.6096, "nll_loss": 0.5890739560127258, "rewards/accuracies": 1.0, "rewards/chosen": -0.20665937662124634, "rewards/margins": 0.2091628611087799, "rewards/rejected": -0.41582226753234863, "step": 5711 }, { "epoch": 15.638603696098563, "grad_norm": 5.557722091674805, "learning_rate": 2.1794520547945205e-07, "log_odds_chosen": 3.6438708305358887, "log_odds_ratio": -0.1364617794752121, "logits/chosen": 0.6771543025970459, "logits/rejected": 0.714740514755249, "logps/chosen": -2.3710927963256836, "logps/rejected": -5.8225884437561035, "loss": 0.6965, "nll_loss": 0.682835042476654, "rewards/accuracies": 1.0, "rewards/chosen": -0.23710931837558746, "rewards/margins": 0.34514957666397095, "rewards/rejected": -0.5822588801383972, "step": 5712 }, { "epoch": 15.641341546885695, "grad_norm": 4.9433441162109375, "learning_rate": 2.1780821917808218e-07, "log_odds_chosen": 3.217714786529541, "log_odds_ratio": -0.14323914051055908, "logits/chosen": 0.9651620388031006, "logits/rejected": 1.006568193435669, "logps/chosen": -1.9932529926300049, "logps/rejected": -5.0519232749938965, "loss": 0.6167, "nll_loss": 0.6023261547088623, "rewards/accuracies": 1.0, "rewards/chosen": -0.1993253082036972, "rewards/margins": 0.3058670461177826, "rewards/rejected": -0.5051923394203186, "step": 5713 }, { "epoch": 15.644079397672828, "grad_norm": 6.6415300369262695, "learning_rate": 2.1767123287671233e-07, "log_odds_chosen": 2.2701659202575684, "log_odds_ratio": -0.4722408652305603, "logits/chosen": 0.924868106842041, "logits/rejected": 0.9226257801055908, "logps/chosen": -2.221836566925049, "logps/rejected": -4.403264999389648, "loss": 0.58, "nll_loss": 0.5328214764595032, "rewards/accuracies": 0.875, "rewards/chosen": -0.22218364477157593, "rewards/margins": 0.21814283728599548, "rewards/rejected": -0.4403265118598938, "step": 5714 }, { "epoch": 15.646817248459959, "grad_norm": 5.410141468048096, "learning_rate": 2.1753424657534246e-07, "log_odds_chosen": 1.6676011085510254, "log_odds_ratio": -0.2177761197090149, "logits/chosen": 0.8399114608764648, "logits/rejected": 0.901823878288269, "logps/chosen": -2.3293700218200684, "logps/rejected": -3.8843767642974854, "loss": 0.63, "nll_loss": 0.6082192659378052, "rewards/accuracies": 1.0, "rewards/chosen": -0.2329370081424713, "rewards/margins": 0.1555006504058838, "rewards/rejected": -0.3884376287460327, "step": 5715 }, { "epoch": 15.649555099247092, "grad_norm": 6.635052680969238, "learning_rate": 2.1739726027397258e-07, "log_odds_chosen": 3.1187891960144043, "log_odds_ratio": -0.2051897495985031, "logits/chosen": 1.050408124923706, "logits/rejected": 1.0672812461853027, "logps/chosen": -2.4497039318084717, "logps/rejected": -5.396018981933594, "loss": 0.6502, "nll_loss": 0.629691481590271, "rewards/accuracies": 0.875, "rewards/chosen": -0.2449704110622406, "rewards/margins": 0.2946315407752991, "rewards/rejected": -0.5396019220352173, "step": 5716 }, { "epoch": 15.652292950034223, "grad_norm": 5.631957530975342, "learning_rate": 2.1726027397260274e-07, "log_odds_chosen": 1.4938544034957886, "log_odds_ratio": -0.3531930446624756, "logits/chosen": 0.6679238677024841, "logits/rejected": 0.7174232006072998, "logps/chosen": -2.2812697887420654, "logps/rejected": -3.6362032890319824, "loss": 0.7518, "nll_loss": 0.7164402604103088, "rewards/accuracies": 0.875, "rewards/chosen": -0.22812698781490326, "rewards/margins": 0.13549335300922394, "rewards/rejected": -0.3636203110218048, "step": 5717 }, { "epoch": 15.655030800821356, "grad_norm": 6.386329650878906, "learning_rate": 2.1712328767123286e-07, "log_odds_chosen": 2.619235038757324, "log_odds_ratio": -0.25983762741088867, "logits/chosen": 0.7735118865966797, "logits/rejected": 0.7930208444595337, "logps/chosen": -2.14005708694458, "logps/rejected": -4.626455307006836, "loss": 0.7165, "nll_loss": 0.6905205249786377, "rewards/accuracies": 0.875, "rewards/chosen": -0.214005708694458, "rewards/margins": 0.24863985180854797, "rewards/rejected": -0.462645560503006, "step": 5718 }, { "epoch": 15.657768651608487, "grad_norm": 5.771114349365234, "learning_rate": 2.16986301369863e-07, "log_odds_chosen": 2.5661959648132324, "log_odds_ratio": -0.21998462080955505, "logits/chosen": 0.7494626045227051, "logits/rejected": 0.7240331172943115, "logps/chosen": -2.2130866050720215, "logps/rejected": -4.6680474281311035, "loss": 0.7553, "nll_loss": 0.733292281627655, "rewards/accuracies": 1.0, "rewards/chosen": -0.22130867838859558, "rewards/margins": 0.24549603462219238, "rewards/rejected": -0.46680474281311035, "step": 5719 }, { "epoch": 15.66050650239562, "grad_norm": 6.0878376960754395, "learning_rate": 2.1684931506849314e-07, "log_odds_chosen": 1.877188801765442, "log_odds_ratio": -0.3004060387611389, "logits/chosen": 1.0715309381484985, "logits/rejected": 1.0702643394470215, "logps/chosen": -2.1491124629974365, "logps/rejected": -3.939664125442505, "loss": 0.6687, "nll_loss": 0.6386525630950928, "rewards/accuracies": 0.875, "rewards/chosen": -0.21491125226020813, "rewards/margins": 0.17905516922473907, "rewards/rejected": -0.3939664363861084, "step": 5720 }, { "epoch": 15.663244353182751, "grad_norm": 5.025296688079834, "learning_rate": 2.167123287671233e-07, "log_odds_chosen": 1.7563825845718384, "log_odds_ratio": -0.3458419442176819, "logits/chosen": 0.7152626514434814, "logits/rejected": 0.7651776075363159, "logps/chosen": -1.6284059286117554, "logps/rejected": -3.261019468307495, "loss": 0.6173, "nll_loss": 0.582747757434845, "rewards/accuracies": 0.875, "rewards/chosen": -0.1628406047821045, "rewards/margins": 0.16326135396957397, "rewards/rejected": -0.32610195875167847, "step": 5721 }, { "epoch": 15.665982203969884, "grad_norm": 6.0602641105651855, "learning_rate": 2.165753424657534e-07, "log_odds_chosen": 2.432823657989502, "log_odds_ratio": -0.15914040803909302, "logits/chosen": 0.8209110498428345, "logits/rejected": 0.8032225966453552, "logps/chosen": -2.0569257736206055, "logps/rejected": -4.335975646972656, "loss": 0.6399, "nll_loss": 0.6240347623825073, "rewards/accuracies": 1.0, "rewards/chosen": -0.2056925892829895, "rewards/margins": 0.22790497541427612, "rewards/rejected": -0.4335975646972656, "step": 5722 }, { "epoch": 15.668720054757015, "grad_norm": 5.597553253173828, "learning_rate": 2.1643835616438354e-07, "log_odds_chosen": 1.6383949518203735, "log_odds_ratio": -0.4225689172744751, "logits/chosen": 0.5616352558135986, "logits/rejected": 0.5788422226905823, "logps/chosen": -2.6470751762390137, "logps/rejected": -4.101561546325684, "loss": 0.6361, "nll_loss": 0.593874990940094, "rewards/accuracies": 0.875, "rewards/chosen": -0.2647075057029724, "rewards/margins": 0.14544865489006042, "rewards/rejected": -0.41015616059303284, "step": 5723 }, { "epoch": 15.671457905544148, "grad_norm": 5.678074836730957, "learning_rate": 2.163013698630137e-07, "log_odds_chosen": 0.6181414127349854, "log_odds_ratio": -0.47761040925979614, "logits/chosen": 0.7815698981285095, "logits/rejected": 0.7810306549072266, "logps/chosen": -2.0913162231445312, "logps/rejected": -2.612947940826416, "loss": 0.6981, "nll_loss": 0.6503368020057678, "rewards/accuracies": 0.75, "rewards/chosen": -0.2091316282749176, "rewards/margins": 0.05216318368911743, "rewards/rejected": -0.26129481196403503, "step": 5724 }, { "epoch": 15.67419575633128, "grad_norm": 6.143176555633545, "learning_rate": 2.1616438356164382e-07, "log_odds_chosen": 1.1776071786880493, "log_odds_ratio": -0.3411925435066223, "logits/chosen": 0.6976706385612488, "logits/rejected": 0.7018070816993713, "logps/chosen": -2.30849552154541, "logps/rejected": -3.372434139251709, "loss": 0.6032, "nll_loss": 0.5690321326255798, "rewards/accuracies": 0.875, "rewards/chosen": -0.23084954917430878, "rewards/margins": 0.10639387369155884, "rewards/rejected": -0.3372434377670288, "step": 5725 }, { "epoch": 15.676933607118412, "grad_norm": 5.295289993286133, "learning_rate": 2.1602739726027397e-07, "log_odds_chosen": 1.9445428848266602, "log_odds_ratio": -0.2338753640651703, "logits/chosen": 0.6893527507781982, "logits/rejected": 0.8050976991653442, "logps/chosen": -2.2704029083251953, "logps/rejected": -4.129250526428223, "loss": 0.633, "nll_loss": 0.6095933318138123, "rewards/accuracies": 1.0, "rewards/chosen": -0.22704029083251953, "rewards/margins": 0.18588480353355408, "rewards/rejected": -0.4129250943660736, "step": 5726 }, { "epoch": 15.679671457905544, "grad_norm": 5.708569049835205, "learning_rate": 2.158904109589041e-07, "log_odds_chosen": 2.581348180770874, "log_odds_ratio": -0.2250634729862213, "logits/chosen": 0.8963159918785095, "logits/rejected": 0.8383350372314453, "logps/chosen": -1.5097771883010864, "logps/rejected": -3.8695828914642334, "loss": 0.5425, "nll_loss": 0.51996910572052, "rewards/accuracies": 1.0, "rewards/chosen": -0.1509777158498764, "rewards/margins": 0.2359805703163147, "rewards/rejected": -0.3869583010673523, "step": 5727 }, { "epoch": 15.682409308692677, "grad_norm": 5.743592262268066, "learning_rate": 2.1575342465753425e-07, "log_odds_chosen": 2.756472587585449, "log_odds_ratio": -0.19321966171264648, "logits/chosen": 0.8397358655929565, "logits/rejected": 0.6838431358337402, "logps/chosen": -2.5696730613708496, "logps/rejected": -5.19829797744751, "loss": 0.7335, "nll_loss": 0.714173436164856, "rewards/accuracies": 0.875, "rewards/chosen": -0.25696730613708496, "rewards/margins": 0.2628624737262726, "rewards/rejected": -0.5198297500610352, "step": 5728 }, { "epoch": 15.685147159479808, "grad_norm": 7.320113182067871, "learning_rate": 2.1561643835616437e-07, "log_odds_chosen": 1.4075956344604492, "log_odds_ratio": -0.7521902322769165, "logits/chosen": 0.9486022591590881, "logits/rejected": 1.0310475826263428, "logps/chosen": -3.495737314224243, "logps/rejected": -4.8689799308776855, "loss": 0.7499, "nll_loss": 0.6747267246246338, "rewards/accuracies": 0.75, "rewards/chosen": -0.3495737314224243, "rewards/margins": 0.1373242884874344, "rewards/rejected": -0.4868980348110199, "step": 5729 }, { "epoch": 15.68788501026694, "grad_norm": 5.787841320037842, "learning_rate": 2.154794520547945e-07, "log_odds_chosen": 2.63224458694458, "log_odds_ratio": -0.21773140132427216, "logits/chosen": 0.702421247959137, "logits/rejected": 0.6704319715499878, "logps/chosen": -1.6955475807189941, "logps/rejected": -4.146238327026367, "loss": 0.5544, "nll_loss": 0.5326208472251892, "rewards/accuracies": 1.0, "rewards/chosen": -0.16955474019050598, "rewards/margins": 0.24506908655166626, "rewards/rejected": -0.41462382674217224, "step": 5730 }, { "epoch": 15.690622861054072, "grad_norm": 7.836207866668701, "learning_rate": 2.1534246575342465e-07, "log_odds_chosen": 1.5272414684295654, "log_odds_ratio": -0.5148204565048218, "logits/chosen": 0.8481714129447937, "logits/rejected": 0.8131389617919922, "logps/chosen": -2.6439623832702637, "logps/rejected": -4.036759376525879, "loss": 0.8159, "nll_loss": 0.764400839805603, "rewards/accuracies": 0.75, "rewards/chosen": -0.2643962502479553, "rewards/margins": 0.13927972316741943, "rewards/rejected": -0.40367597341537476, "step": 5731 }, { "epoch": 15.693360711841205, "grad_norm": 9.004222869873047, "learning_rate": 2.1520547945205478e-07, "log_odds_chosen": 0.45581984519958496, "log_odds_ratio": -0.8329696655273438, "logits/chosen": 0.7003489136695862, "logits/rejected": 0.7045795917510986, "logps/chosen": -3.437527656555176, "logps/rejected": -3.8080923557281494, "loss": 0.6423, "nll_loss": 0.5590070486068726, "rewards/accuracies": 0.75, "rewards/chosen": -0.34375277161598206, "rewards/margins": 0.03705647587776184, "rewards/rejected": -0.3808092474937439, "step": 5732 }, { "epoch": 15.696098562628336, "grad_norm": 5.469500541687012, "learning_rate": 2.1506849315068493e-07, "log_odds_chosen": 1.7412030696868896, "log_odds_ratio": -0.2853342890739441, "logits/chosen": 0.9632233381271362, "logits/rejected": 1.002490520477295, "logps/chosen": -2.492978096008301, "logps/rejected": -4.177355766296387, "loss": 0.7632, "nll_loss": 0.7346502542495728, "rewards/accuracies": 0.875, "rewards/chosen": -0.2492978274822235, "rewards/margins": 0.16843776404857635, "rewards/rejected": -0.41773560643196106, "step": 5733 }, { "epoch": 15.698836413415469, "grad_norm": 6.943078517913818, "learning_rate": 2.1493150684931506e-07, "log_odds_chosen": 1.7256004810333252, "log_odds_ratio": -0.43950557708740234, "logits/chosen": 0.9073371887207031, "logits/rejected": 0.8989012241363525, "logps/chosen": -3.1369566917419434, "logps/rejected": -4.814159393310547, "loss": 0.7867, "nll_loss": 0.7427758574485779, "rewards/accuracies": 0.75, "rewards/chosen": -0.31369566917419434, "rewards/margins": 0.16772028803825378, "rewards/rejected": -0.4814159572124481, "step": 5734 }, { "epoch": 15.7015742642026, "grad_norm": 5.044093132019043, "learning_rate": 2.147945205479452e-07, "log_odds_chosen": 1.8457353115081787, "log_odds_ratio": -0.2775454819202423, "logits/chosen": 0.7113385200500488, "logits/rejected": 0.6812442541122437, "logps/chosen": -1.8084149360656738, "logps/rejected": -3.527121067047119, "loss": 0.6438, "nll_loss": 0.6160708665847778, "rewards/accuracies": 0.875, "rewards/chosen": -0.18084149062633514, "rewards/margins": 0.17187060415744781, "rewards/rejected": -0.35271212458610535, "step": 5735 }, { "epoch": 15.704312114989733, "grad_norm": 5.430287837982178, "learning_rate": 2.1465753424657533e-07, "log_odds_chosen": 2.9766111373901367, "log_odds_ratio": -0.2546224594116211, "logits/chosen": 0.5709682703018188, "logits/rejected": 0.5579622983932495, "logps/chosen": -2.2047505378723145, "logps/rejected": -5.073184967041016, "loss": 0.6398, "nll_loss": 0.6143186688423157, "rewards/accuracies": 0.875, "rewards/chosen": -0.22047504782676697, "rewards/margins": 0.2868434488773346, "rewards/rejected": -0.5073184967041016, "step": 5736 }, { "epoch": 15.707049965776864, "grad_norm": 6.468559265136719, "learning_rate": 2.1452054794520546e-07, "log_odds_chosen": 1.780127763748169, "log_odds_ratio": -0.5647101998329163, "logits/chosen": 0.9516189098358154, "logits/rejected": 1.018817663192749, "logps/chosen": -2.7589833736419678, "logps/rejected": -4.476030349731445, "loss": 0.8162, "nll_loss": 0.7597236633300781, "rewards/accuracies": 0.75, "rewards/chosen": -0.2758983373641968, "rewards/margins": 0.17170467972755432, "rewards/rejected": -0.4476030468940735, "step": 5737 }, { "epoch": 15.709787816563997, "grad_norm": 5.513749599456787, "learning_rate": 2.143835616438356e-07, "log_odds_chosen": 2.2634079456329346, "log_odds_ratio": -0.22744524478912354, "logits/chosen": 0.912321925163269, "logits/rejected": 0.8914610743522644, "logps/chosen": -1.970640778541565, "logps/rejected": -4.100157260894775, "loss": 0.6574, "nll_loss": 0.6346682906150818, "rewards/accuracies": 1.0, "rewards/chosen": -0.19706407189369202, "rewards/margins": 0.2129516750574112, "rewards/rejected": -0.410015732049942, "step": 5738 }, { "epoch": 15.71252566735113, "grad_norm": 5.559698104858398, "learning_rate": 2.1424657534246574e-07, "log_odds_chosen": 1.8411355018615723, "log_odds_ratio": -0.2660713493824005, "logits/chosen": 0.9320201873779297, "logits/rejected": 0.9170457124710083, "logps/chosen": -2.354694128036499, "logps/rejected": -4.123072624206543, "loss": 0.6911, "nll_loss": 0.6644821763038635, "rewards/accuracies": 0.875, "rewards/chosen": -0.23546940088272095, "rewards/margins": 0.17683786153793335, "rewards/rejected": -0.4123072624206543, "step": 5739 }, { "epoch": 15.715263518138261, "grad_norm": 6.967108249664307, "learning_rate": 2.141095890410959e-07, "log_odds_chosen": 2.3119609355926514, "log_odds_ratio": -0.25346076488494873, "logits/chosen": 1.131018877029419, "logits/rejected": 1.146517276763916, "logps/chosen": -2.4280288219451904, "logps/rejected": -4.640707015991211, "loss": 0.6524, "nll_loss": 0.6270608901977539, "rewards/accuracies": 1.0, "rewards/chosen": -0.24280288815498352, "rewards/margins": 0.22126781940460205, "rewards/rejected": -0.46407070755958557, "step": 5740 }, { "epoch": 15.718001368925394, "grad_norm": 6.007473945617676, "learning_rate": 2.1397260273972601e-07, "log_odds_chosen": 1.4896615743637085, "log_odds_ratio": -0.42884010076522827, "logits/chosen": 0.7768492698669434, "logits/rejected": 0.8101860284805298, "logps/chosen": -2.2639260292053223, "logps/rejected": -3.699911594390869, "loss": 0.6914, "nll_loss": 0.6484768390655518, "rewards/accuracies": 0.875, "rewards/chosen": -0.22639259696006775, "rewards/margins": 0.14359857141971588, "rewards/rejected": -0.36999115347862244, "step": 5741 }, { "epoch": 15.720739219712526, "grad_norm": 4.713082790374756, "learning_rate": 2.1383561643835617e-07, "log_odds_chosen": 2.335251808166504, "log_odds_ratio": -0.17287541925907135, "logits/chosen": 0.9717325568199158, "logits/rejected": 0.9646157622337341, "logps/chosen": -2.194885492324829, "logps/rejected": -4.425375461578369, "loss": 0.6592, "nll_loss": 0.6419489979743958, "rewards/accuracies": 1.0, "rewards/chosen": -0.21948856115341187, "rewards/margins": 0.22304901480674744, "rewards/rejected": -0.4425376057624817, "step": 5742 }, { "epoch": 15.723477070499658, "grad_norm": 5.396705627441406, "learning_rate": 2.136986301369863e-07, "log_odds_chosen": 2.1439766883850098, "log_odds_ratio": -0.22995369136333466, "logits/chosen": 0.78563392162323, "logits/rejected": 0.8020480871200562, "logps/chosen": -1.851434588432312, "logps/rejected": -3.822847843170166, "loss": 0.6771, "nll_loss": 0.6540858745574951, "rewards/accuracies": 1.0, "rewards/chosen": -0.18514345586299896, "rewards/margins": 0.19714131951332092, "rewards/rejected": -0.3822847902774811, "step": 5743 }, { "epoch": 15.72621492128679, "grad_norm": 6.174875259399414, "learning_rate": 2.1356164383561642e-07, "log_odds_chosen": 2.1994810104370117, "log_odds_ratio": -0.19691210985183716, "logits/chosen": 0.6090414524078369, "logits/rejected": 0.6489869356155396, "logps/chosen": -2.4333252906799316, "logps/rejected": -4.494783878326416, "loss": 0.8452, "nll_loss": 0.8254709243774414, "rewards/accuracies": 1.0, "rewards/chosen": -0.24333254992961884, "rewards/margins": 0.20614582300186157, "rewards/rejected": -0.4494783878326416, "step": 5744 }, { "epoch": 15.728952772073923, "grad_norm": 6.300144672393799, "learning_rate": 2.1342465753424657e-07, "log_odds_chosen": 0.7189366817474365, "log_odds_ratio": -0.42415720224380493, "logits/chosen": 0.9470253586769104, "logits/rejected": 0.8712265491485596, "logps/chosen": -2.095515251159668, "logps/rejected": -2.721092700958252, "loss": 0.6768, "nll_loss": 0.63435959815979, "rewards/accuracies": 0.875, "rewards/chosen": -0.20955152809619904, "rewards/margins": 0.06255774199962616, "rewards/rejected": -0.2721092700958252, "step": 5745 }, { "epoch": 15.731690622861054, "grad_norm": 4.970813751220703, "learning_rate": 2.132876712328767e-07, "log_odds_chosen": 1.8169000148773193, "log_odds_ratio": -0.2530381977558136, "logits/chosen": 0.836833119392395, "logits/rejected": 0.8620615005493164, "logps/chosen": -2.373863697052002, "logps/rejected": -4.125671863555908, "loss": 0.5793, "nll_loss": 0.5539582371711731, "rewards/accuracies": 0.875, "rewards/chosen": -0.23738637566566467, "rewards/margins": 0.1751808226108551, "rewards/rejected": -0.4125671982765198, "step": 5746 }, { "epoch": 15.734428473648187, "grad_norm": 7.168429374694824, "learning_rate": 2.1315068493150685e-07, "log_odds_chosen": 2.265846014022827, "log_odds_ratio": -0.30679386854171753, "logits/chosen": 0.7480709552764893, "logits/rejected": 0.8056155443191528, "logps/chosen": -2.6413822174072266, "logps/rejected": -4.785887718200684, "loss": 0.8271, "nll_loss": 0.7964614629745483, "rewards/accuracies": 0.75, "rewards/chosen": -0.26413822174072266, "rewards/margins": 0.21445058286190033, "rewards/rejected": -0.4785887598991394, "step": 5747 }, { "epoch": 15.737166324435318, "grad_norm": 6.77075719833374, "learning_rate": 2.13013698630137e-07, "log_odds_chosen": 1.4760223627090454, "log_odds_ratio": -0.4317690432071686, "logits/chosen": 0.9523197412490845, "logits/rejected": 1.0312604904174805, "logps/chosen": -2.570466995239258, "logps/rejected": -3.9142236709594727, "loss": 0.7367, "nll_loss": 0.6935648918151855, "rewards/accuracies": 0.875, "rewards/chosen": -0.2570466995239258, "rewards/margins": 0.1343757063150406, "rewards/rejected": -0.3914223909378052, "step": 5748 }, { "epoch": 15.739904175222451, "grad_norm": 8.260564804077148, "learning_rate": 2.128767123287671e-07, "log_odds_chosen": 1.5093498229980469, "log_odds_ratio": -0.31084325909614563, "logits/chosen": 0.5662079453468323, "logits/rejected": 0.4897944927215576, "logps/chosen": -2.3975324630737305, "logps/rejected": -3.80344295501709, "loss": 0.7353, "nll_loss": 0.7041928172111511, "rewards/accuracies": 0.875, "rewards/chosen": -0.23975326120853424, "rewards/margins": 0.14059102535247803, "rewards/rejected": -0.38034430146217346, "step": 5749 }, { "epoch": 15.742642026009582, "grad_norm": 4.9673261642456055, "learning_rate": 2.1273972602739725e-07, "log_odds_chosen": 1.8500683307647705, "log_odds_ratio": -0.20367348194122314, "logits/chosen": 0.49378594756126404, "logits/rejected": 0.5454432368278503, "logps/chosen": -1.8629566431045532, "logps/rejected": -3.4814369678497314, "loss": 0.5505, "nll_loss": 0.5300934314727783, "rewards/accuracies": 1.0, "rewards/chosen": -0.18629565834999084, "rewards/margins": 0.1618480533361435, "rewards/rejected": -0.34814372658729553, "step": 5750 }, { "epoch": 15.745379876796715, "grad_norm": 6.777287483215332, "learning_rate": 2.1260273972602738e-07, "log_odds_chosen": 2.633650541305542, "log_odds_ratio": -0.17791444063186646, "logits/chosen": 1.0408090353012085, "logits/rejected": 1.1395196914672852, "logps/chosen": -2.3686699867248535, "logps/rejected": -4.8261823654174805, "loss": 0.6002, "nll_loss": 0.5823857188224792, "rewards/accuracies": 1.0, "rewards/chosen": -0.2368669956922531, "rewards/margins": 0.2457512617111206, "rewards/rejected": -0.4826182425022125, "step": 5751 }, { "epoch": 15.748117727583846, "grad_norm": 5.255167007446289, "learning_rate": 2.1246575342465753e-07, "log_odds_chosen": 1.3363398313522339, "log_odds_ratio": -0.28080427646636963, "logits/chosen": 0.8182981610298157, "logits/rejected": 0.7903336882591248, "logps/chosen": -1.9843171834945679, "logps/rejected": -3.1937246322631836, "loss": 0.6153, "nll_loss": 0.5872495174407959, "rewards/accuracies": 1.0, "rewards/chosen": -0.19843173027038574, "rewards/margins": 0.12094074487686157, "rewards/rejected": -0.3193724751472473, "step": 5752 }, { "epoch": 15.75085557837098, "grad_norm": 6.092057704925537, "learning_rate": 2.1232876712328765e-07, "log_odds_chosen": 1.2030892372131348, "log_odds_ratio": -0.4082338213920593, "logits/chosen": 0.6417204141616821, "logits/rejected": 0.6372890472412109, "logps/chosen": -2.135749578475952, "logps/rejected": -3.243648052215576, "loss": 0.55, "nll_loss": 0.5091625452041626, "rewards/accuracies": 0.75, "rewards/chosen": -0.21357494592666626, "rewards/margins": 0.11078983545303345, "rewards/rejected": -0.3243647813796997, "step": 5753 }, { "epoch": 15.75359342915811, "grad_norm": 8.694825172424316, "learning_rate": 2.121917808219178e-07, "log_odds_chosen": 2.3476333618164062, "log_odds_ratio": -0.5482423305511475, "logits/chosen": 0.9518588781356812, "logits/rejected": 0.9039737582206726, "logps/chosen": -2.3338379859924316, "logps/rejected": -4.543381214141846, "loss": 0.5849, "nll_loss": 0.5301066637039185, "rewards/accuracies": 0.875, "rewards/chosen": -0.23338380455970764, "rewards/margins": 0.22095434367656708, "rewards/rejected": -0.4543381333351135, "step": 5754 }, { "epoch": 15.756331279945243, "grad_norm": 5.9500908851623535, "learning_rate": 2.1205479452054796e-07, "log_odds_chosen": 1.5055882930755615, "log_odds_ratio": -0.2613043785095215, "logits/chosen": 0.656722903251648, "logits/rejected": 0.6416763067245483, "logps/chosen": -2.2574832439422607, "logps/rejected": -3.6333792209625244, "loss": 0.6964, "nll_loss": 0.6702932715415955, "rewards/accuracies": 1.0, "rewards/chosen": -0.22574833035469055, "rewards/margins": 0.13758960366249084, "rewards/rejected": -0.3633379340171814, "step": 5755 }, { "epoch": 15.759069130732374, "grad_norm": 5.271310329437256, "learning_rate": 2.1191780821917806e-07, "log_odds_chosen": 1.6140923500061035, "log_odds_ratio": -0.2679431438446045, "logits/chosen": 1.0916972160339355, "logits/rejected": 1.1437238454818726, "logps/chosen": -2.3827853202819824, "logps/rejected": -3.9118170738220215, "loss": 0.6198, "nll_loss": 0.5930101871490479, "rewards/accuracies": 1.0, "rewards/chosen": -0.23827852308750153, "rewards/margins": 0.15290319919586182, "rewards/rejected": -0.39118170738220215, "step": 5756 }, { "epoch": 15.761806981519507, "grad_norm": 5.2748494148254395, "learning_rate": 2.117808219178082e-07, "log_odds_chosen": 3.2740678787231445, "log_odds_ratio": -0.08686353266239166, "logits/chosen": 1.0182468891143799, "logits/rejected": 1.0867254734039307, "logps/chosen": -2.776240348815918, "logps/rejected": -5.974388122558594, "loss": 0.6906, "nll_loss": 0.6819556355476379, "rewards/accuracies": 1.0, "rewards/chosen": -0.2776240110397339, "rewards/margins": 0.3198148012161255, "rewards/rejected": -0.5974388122558594, "step": 5757 }, { "epoch": 15.764544832306639, "grad_norm": 5.195403099060059, "learning_rate": 2.1164383561643833e-07, "log_odds_chosen": 1.987657904624939, "log_odds_ratio": -0.3349834084510803, "logits/chosen": 0.7653693556785583, "logits/rejected": 0.8142666220664978, "logps/chosen": -2.425361394882202, "logps/rejected": -4.310204982757568, "loss": 0.7538, "nll_loss": 0.7203388810157776, "rewards/accuracies": 0.875, "rewards/chosen": -0.24253614246845245, "rewards/margins": 0.18848437070846558, "rewards/rejected": -0.43102049827575684, "step": 5758 }, { "epoch": 15.767282683093772, "grad_norm": 5.928603172302246, "learning_rate": 2.115068493150685e-07, "log_odds_chosen": 1.7284389734268188, "log_odds_ratio": -0.4214441478252411, "logits/chosen": 0.8874469995498657, "logits/rejected": 0.9572434425354004, "logps/chosen": -2.4152908325195312, "logps/rejected": -3.9708378314971924, "loss": 0.7048, "nll_loss": 0.6626356244087219, "rewards/accuracies": 0.875, "rewards/chosen": -0.24152907729148865, "rewards/margins": 0.1555546671152115, "rewards/rejected": -0.3970837891101837, "step": 5759 }, { "epoch": 15.770020533880903, "grad_norm": 5.346317291259766, "learning_rate": 2.113698630136986e-07, "log_odds_chosen": 2.2281408309936523, "log_odds_ratio": -0.19892117381095886, "logits/chosen": 1.0948951244354248, "logits/rejected": 1.0779188871383667, "logps/chosen": -1.7022309303283691, "logps/rejected": -3.7536959648132324, "loss": 0.6002, "nll_loss": 0.5802688002586365, "rewards/accuracies": 1.0, "rewards/chosen": -0.17022308707237244, "rewards/margins": 0.20514650642871857, "rewards/rejected": -0.3753696084022522, "step": 5760 }, { "epoch": 15.772758384668036, "grad_norm": 7.083469390869141, "learning_rate": 2.1123287671232876e-07, "log_odds_chosen": 1.1577427387237549, "log_odds_ratio": -0.32510632276535034, "logits/chosen": 1.170403242111206, "logits/rejected": 1.1990286111831665, "logps/chosen": -3.1276204586029053, "logps/rejected": -4.209742546081543, "loss": 0.6941, "nll_loss": 0.6615868806838989, "rewards/accuracies": 0.875, "rewards/chosen": -0.312762051820755, "rewards/margins": 0.10821221023797989, "rewards/rejected": -0.4209742248058319, "step": 5761 }, { "epoch": 15.775496235455167, "grad_norm": 5.5763750076293945, "learning_rate": 2.1109589041095892e-07, "log_odds_chosen": 4.12853479385376, "log_odds_ratio": -0.12067925930023193, "logits/chosen": 0.9708248376846313, "logits/rejected": 1.0110961198806763, "logps/chosen": -2.3259692192077637, "logps/rejected": -6.346440315246582, "loss": 0.7741, "nll_loss": 0.7620378732681274, "rewards/accuracies": 1.0, "rewards/chosen": -0.23259693384170532, "rewards/margins": 0.4020470976829529, "rewards/rejected": -0.6346440315246582, "step": 5762 }, { "epoch": 15.7782340862423, "grad_norm": 5.51207160949707, "learning_rate": 2.1095890410958902e-07, "log_odds_chosen": 2.5741164684295654, "log_odds_ratio": -0.2102130651473999, "logits/chosen": 0.9986443519592285, "logits/rejected": 0.9928375482559204, "logps/chosen": -2.2155649662017822, "logps/rejected": -4.656784534454346, "loss": 0.6572, "nll_loss": 0.6361861228942871, "rewards/accuracies": 1.0, "rewards/chosen": -0.22155649960041046, "rewards/margins": 0.24412193894386292, "rewards/rejected": -0.46567845344543457, "step": 5763 }, { "epoch": 15.780971937029431, "grad_norm": 4.8385748863220215, "learning_rate": 2.1082191780821917e-07, "log_odds_chosen": 1.7053183317184448, "log_odds_ratio": -0.22144928574562073, "logits/chosen": 0.7814875841140747, "logits/rejected": 0.8076788187026978, "logps/chosen": -2.4700326919555664, "logps/rejected": -4.050559997558594, "loss": 0.6955, "nll_loss": 0.6733279228210449, "rewards/accuracies": 1.0, "rewards/chosen": -0.24700327217578888, "rewards/margins": 0.1580527424812317, "rewards/rejected": -0.4050559997558594, "step": 5764 }, { "epoch": 15.783709787816564, "grad_norm": 6.714444160461426, "learning_rate": 2.106849315068493e-07, "log_odds_chosen": 3.174835681915283, "log_odds_ratio": -0.18332907557487488, "logits/chosen": 0.7696329355239868, "logits/rejected": 0.8299977779388428, "logps/chosen": -2.041560173034668, "logps/rejected": -4.998476505279541, "loss": 0.6116, "nll_loss": 0.5932246446609497, "rewards/accuracies": 1.0, "rewards/chosen": -0.20415601134300232, "rewards/margins": 0.29569166898727417, "rewards/rejected": -0.4998476505279541, "step": 5765 }, { "epoch": 15.786447638603697, "grad_norm": 5.31488561630249, "learning_rate": 2.1054794520547945e-07, "log_odds_chosen": 2.3169660568237305, "log_odds_ratio": -0.19620420038700104, "logits/chosen": 0.7314189672470093, "logits/rejected": 0.7535341382026672, "logps/chosen": -1.3295128345489502, "logps/rejected": -3.361952543258667, "loss": 0.6182, "nll_loss": 0.5986115336418152, "rewards/accuracies": 1.0, "rewards/chosen": -0.1329512745141983, "rewards/margins": 0.20324398577213287, "rewards/rejected": -0.33619529008865356, "step": 5766 }, { "epoch": 15.789185489390828, "grad_norm": 6.169214248657227, "learning_rate": 2.104109589041096e-07, "log_odds_chosen": 0.9552801251411438, "log_odds_ratio": -0.3920021653175354, "logits/chosen": 0.9044072031974792, "logits/rejected": 0.9436178207397461, "logps/chosen": -1.8975591659545898, "logps/rejected": -2.7305185794830322, "loss": 0.5764, "nll_loss": 0.5371850728988647, "rewards/accuracies": 0.875, "rewards/chosen": -0.18975591659545898, "rewards/margins": 0.08329595625400543, "rewards/rejected": -0.2730518877506256, "step": 5767 }, { "epoch": 15.791923340177961, "grad_norm": 7.471776008605957, "learning_rate": 2.1027397260273972e-07, "log_odds_chosen": 2.537891387939453, "log_odds_ratio": -0.20648185908794403, "logits/chosen": 0.8341136574745178, "logits/rejected": 0.8261903524398804, "logps/chosen": -2.561314821243286, "logps/rejected": -5.007510185241699, "loss": 0.7181, "nll_loss": 0.6974841356277466, "rewards/accuracies": 1.0, "rewards/chosen": -0.25613147020339966, "rewards/margins": 0.24461951851844788, "rewards/rejected": -0.5007510185241699, "step": 5768 }, { "epoch": 15.794661190965092, "grad_norm": 5.23164701461792, "learning_rate": 2.1013698630136985e-07, "log_odds_chosen": 2.4312753677368164, "log_odds_ratio": -0.2144453376531601, "logits/chosen": 0.873870849609375, "logits/rejected": 0.9530127048492432, "logps/chosen": -1.8033353090286255, "logps/rejected": -4.085473537445068, "loss": 0.5759, "nll_loss": 0.5544868111610413, "rewards/accuracies": 1.0, "rewards/chosen": -0.18033352494239807, "rewards/margins": 0.2282138168811798, "rewards/rejected": -0.4085473418235779, "step": 5769 }, { "epoch": 15.797399041752225, "grad_norm": 4.881483554840088, "learning_rate": 2.0999999999999997e-07, "log_odds_chosen": 2.7600767612457275, "log_odds_ratio": -0.1793009638786316, "logits/chosen": 0.8545964956283569, "logits/rejected": 0.8599690198898315, "logps/chosen": -2.457434892654419, "logps/rejected": -5.105517387390137, "loss": 0.6806, "nll_loss": 0.6626909971237183, "rewards/accuracies": 1.0, "rewards/chosen": -0.24574346840381622, "rewards/margins": 0.2648082971572876, "rewards/rejected": -0.5105517506599426, "step": 5770 }, { "epoch": 15.800136892539356, "grad_norm": 5.386788368225098, "learning_rate": 2.0986301369863013e-07, "log_odds_chosen": 1.9374547004699707, "log_odds_ratio": -0.3222941756248474, "logits/chosen": 0.7586157917976379, "logits/rejected": 0.826337456703186, "logps/chosen": -1.9637951850891113, "logps/rejected": -3.7657370567321777, "loss": 0.6253, "nll_loss": 0.5930971503257751, "rewards/accuracies": 1.0, "rewards/chosen": -0.19637951254844666, "rewards/margins": 0.1801941841840744, "rewards/rejected": -0.37657368183135986, "step": 5771 }, { "epoch": 15.80287474332649, "grad_norm": 14.422279357910156, "learning_rate": 2.0972602739726025e-07, "log_odds_chosen": -0.1776278018951416, "log_odds_ratio": -1.1179136037826538, "logits/chosen": 0.4996356964111328, "logits/rejected": 0.4731200337409973, "logps/chosen": -3.20617938041687, "logps/rejected": -3.010262966156006, "loss": 0.9693, "nll_loss": 0.8575180172920227, "rewards/accuracies": 0.625, "rewards/chosen": -0.3206179141998291, "rewards/margins": -0.019591612741351128, "rewards/rejected": -0.30102628469467163, "step": 5772 }, { "epoch": 15.80561259411362, "grad_norm": 5.948195934295654, "learning_rate": 2.095890410958904e-07, "log_odds_chosen": 1.776728868484497, "log_odds_ratio": -0.31327080726623535, "logits/chosen": 0.8302391767501831, "logits/rejected": 0.7512364387512207, "logps/chosen": -1.4151442050933838, "logps/rejected": -2.999418258666992, "loss": 0.5986, "nll_loss": 0.5672239065170288, "rewards/accuracies": 1.0, "rewards/chosen": -0.14151440560817719, "rewards/margins": 0.1584274023771286, "rewards/rejected": -0.2999418079853058, "step": 5773 }, { "epoch": 15.808350444900753, "grad_norm": 5.862972259521484, "learning_rate": 2.0945205479452056e-07, "log_odds_chosen": 1.568713903427124, "log_odds_ratio": -0.3691111207008362, "logits/chosen": 1.0056300163269043, "logits/rejected": 0.995327889919281, "logps/chosen": -2.4657680988311768, "logps/rejected": -3.9397737979888916, "loss": 0.6212, "nll_loss": 0.5843231678009033, "rewards/accuracies": 0.75, "rewards/chosen": -0.24657681584358215, "rewards/margins": 0.14740058779716492, "rewards/rejected": -0.39397740364074707, "step": 5774 }, { "epoch": 15.811088295687885, "grad_norm": 5.3976263999938965, "learning_rate": 2.0931506849315068e-07, "log_odds_chosen": 2.062211275100708, "log_odds_ratio": -0.26866233348846436, "logits/chosen": 0.9409729838371277, "logits/rejected": 0.9245367050170898, "logps/chosen": -1.783292293548584, "logps/rejected": -3.6457345485687256, "loss": 0.604, "nll_loss": 0.577121913433075, "rewards/accuracies": 1.0, "rewards/chosen": -0.1783292293548584, "rewards/margins": 0.18624421954154968, "rewards/rejected": -0.3645734488964081, "step": 5775 }, { "epoch": 15.813826146475018, "grad_norm": 5.876923084259033, "learning_rate": 2.091780821917808e-07, "log_odds_chosen": 2.21663761138916, "log_odds_ratio": -0.2773294448852539, "logits/chosen": 0.6876732110977173, "logits/rejected": 0.6676701903343201, "logps/chosen": -1.7888176441192627, "logps/rejected": -3.758699417114258, "loss": 0.6248, "nll_loss": 0.597041130065918, "rewards/accuracies": 0.875, "rewards/chosen": -0.17888176441192627, "rewards/margins": 0.19698818027973175, "rewards/rejected": -0.3758699893951416, "step": 5776 }, { "epoch": 15.816563997262149, "grad_norm": 5.740224361419678, "learning_rate": 2.0904109589041093e-07, "log_odds_chosen": 1.3568419218063354, "log_odds_ratio": -0.39557355642318726, "logits/chosen": 0.861310601234436, "logits/rejected": 0.8632645606994629, "logps/chosen": -1.675549030303955, "logps/rejected": -2.912696123123169, "loss": 0.608, "nll_loss": 0.5684185028076172, "rewards/accuracies": 0.875, "rewards/chosen": -0.16755490005016327, "rewards/margins": 0.12371471524238586, "rewards/rejected": -0.29126960039138794, "step": 5777 }, { "epoch": 15.819301848049282, "grad_norm": 5.596091270446777, "learning_rate": 2.0890410958904109e-07, "log_odds_chosen": 1.7320973873138428, "log_odds_ratio": -0.26395198702812195, "logits/chosen": 1.0400235652923584, "logits/rejected": 1.0879319906234741, "logps/chosen": -2.1106338500976562, "logps/rejected": -3.711336135864258, "loss": 0.6025, "nll_loss": 0.5760717391967773, "rewards/accuracies": 1.0, "rewards/chosen": -0.21106338500976562, "rewards/margins": 0.1600702404975891, "rewards/rejected": -0.3711336553096771, "step": 5778 }, { "epoch": 15.822039698836413, "grad_norm": 4.707484245300293, "learning_rate": 2.087671232876712e-07, "log_odds_chosen": 3.0298523902893066, "log_odds_ratio": -0.20796459913253784, "logits/chosen": 0.6822742223739624, "logits/rejected": 0.7273255586624146, "logps/chosen": -2.8094189167022705, "logps/rejected": -5.754749774932861, "loss": 0.748, "nll_loss": 0.7272204160690308, "rewards/accuracies": 1.0, "rewards/chosen": -0.2809418737888336, "rewards/margins": 0.29453304409980774, "rewards/rejected": -0.5754749774932861, "step": 5779 }, { "epoch": 15.824777549623546, "grad_norm": 6.945603370666504, "learning_rate": 2.0863013698630136e-07, "log_odds_chosen": 0.8159651756286621, "log_odds_ratio": -0.44664716720581055, "logits/chosen": 0.8822716474533081, "logits/rejected": 0.8668937683105469, "logps/chosen": -1.4992640018463135, "logps/rejected": -2.161621570587158, "loss": 0.5116, "nll_loss": 0.46696826815605164, "rewards/accuracies": 0.875, "rewards/chosen": -0.14992640912532806, "rewards/margins": 0.06623575836420059, "rewards/rejected": -0.21616216003894806, "step": 5780 }, { "epoch": 15.827515400410677, "grad_norm": 4.56696891784668, "learning_rate": 2.0849315068493152e-07, "log_odds_chosen": 2.7705068588256836, "log_odds_ratio": -0.1368149220943451, "logits/chosen": 0.9204776287078857, "logits/rejected": 0.917980968952179, "logps/chosen": -2.10937237739563, "logps/rejected": -4.732734203338623, "loss": 0.6911, "nll_loss": 0.6773902177810669, "rewards/accuracies": 1.0, "rewards/chosen": -0.2109372615814209, "rewards/margins": 0.2623361647129059, "rewards/rejected": -0.47327345609664917, "step": 5781 }, { "epoch": 15.83025325119781, "grad_norm": 5.1191864013671875, "learning_rate": 2.0835616438356164e-07, "log_odds_chosen": 2.550252914428711, "log_odds_ratio": -0.17339763045310974, "logits/chosen": 0.7670078873634338, "logits/rejected": 0.8281188607215881, "logps/chosen": -2.017665386199951, "logps/rejected": -4.424567699432373, "loss": 0.6691, "nll_loss": 0.6517184972763062, "rewards/accuracies": 1.0, "rewards/chosen": -0.20176655054092407, "rewards/margins": 0.24069024622440338, "rewards/rejected": -0.44245681166648865, "step": 5782 }, { "epoch": 15.832991101984941, "grad_norm": 5.38053560256958, "learning_rate": 2.0821917808219177e-07, "log_odds_chosen": 1.9121899604797363, "log_odds_ratio": -0.31777891516685486, "logits/chosen": 0.6753172278404236, "logits/rejected": 0.6501660346984863, "logps/chosen": -1.852808952331543, "logps/rejected": -3.6380183696746826, "loss": 0.57, "nll_loss": 0.5382131338119507, "rewards/accuracies": 0.875, "rewards/chosen": -0.1852809190750122, "rewards/margins": 0.17852091789245605, "rewards/rejected": -0.36380183696746826, "step": 5783 }, { "epoch": 15.835728952772074, "grad_norm": 5.749859809875488, "learning_rate": 2.080821917808219e-07, "log_odds_chosen": 2.748732089996338, "log_odds_ratio": -0.22066186368465424, "logits/chosen": 0.7924861907958984, "logits/rejected": 0.8884860277175903, "logps/chosen": -2.1838138103485107, "logps/rejected": -4.834452152252197, "loss": 0.7871, "nll_loss": 0.7650034427642822, "rewards/accuracies": 1.0, "rewards/chosen": -0.2183813601732254, "rewards/margins": 0.2650638818740845, "rewards/rejected": -0.4834451973438263, "step": 5784 }, { "epoch": 15.838466803559205, "grad_norm": 6.513910293579102, "learning_rate": 2.0794520547945204e-07, "log_odds_chosen": 1.7922331094741821, "log_odds_ratio": -0.47353610396385193, "logits/chosen": 1.083390235900879, "logits/rejected": 1.0649614334106445, "logps/chosen": -2.6017894744873047, "logps/rejected": -4.277932167053223, "loss": 0.6716, "nll_loss": 0.624210000038147, "rewards/accuracies": 0.875, "rewards/chosen": -0.26017895340919495, "rewards/margins": 0.16761426627635956, "rewards/rejected": -0.4277932345867157, "step": 5785 }, { "epoch": 15.841204654346338, "grad_norm": 5.994978904724121, "learning_rate": 2.078082191780822e-07, "log_odds_chosen": 2.445572853088379, "log_odds_ratio": -0.17121505737304688, "logits/chosen": 1.0511505603790283, "logits/rejected": 1.0106830596923828, "logps/chosen": -2.1438822746276855, "logps/rejected": -4.458124160766602, "loss": 0.6622, "nll_loss": 0.645053505897522, "rewards/accuracies": 1.0, "rewards/chosen": -0.21438822150230408, "rewards/margins": 0.23142419755458832, "rewards/rejected": -0.4458124041557312, "step": 5786 }, { "epoch": 15.84394250513347, "grad_norm": 4.964682579040527, "learning_rate": 2.0767123287671232e-07, "log_odds_chosen": 2.4645378589630127, "log_odds_ratio": -0.23706603050231934, "logits/chosen": 0.842497706413269, "logits/rejected": 0.8474143147468567, "logps/chosen": -2.176499843597412, "logps/rejected": -4.4961676597595215, "loss": 0.7283, "nll_loss": 0.7046204805374146, "rewards/accuracies": 1.0, "rewards/chosen": -0.21764998137950897, "rewards/margins": 0.2319667637348175, "rewards/rejected": -0.44961676001548767, "step": 5787 }, { "epoch": 15.846680355920602, "grad_norm": 5.586306095123291, "learning_rate": 2.0753424657534247e-07, "log_odds_chosen": 1.9904751777648926, "log_odds_ratio": -0.2819695770740509, "logits/chosen": 0.9128700494766235, "logits/rejected": 0.9720478653907776, "logps/chosen": -2.550520896911621, "logps/rejected": -4.452983856201172, "loss": 0.6406, "nll_loss": 0.6123965978622437, "rewards/accuracies": 1.0, "rewards/chosen": -0.2550520896911621, "rewards/margins": 0.19024629890918732, "rewards/rejected": -0.445298433303833, "step": 5788 }, { "epoch": 15.849418206707734, "grad_norm": 5.740349769592285, "learning_rate": 2.0739726027397257e-07, "log_odds_chosen": 1.5866458415985107, "log_odds_ratio": -0.34429359436035156, "logits/chosen": 0.7645256519317627, "logits/rejected": 0.791912317276001, "logps/chosen": -1.9735426902770996, "logps/rejected": -3.469886541366577, "loss": 0.6287, "nll_loss": 0.5943084955215454, "rewards/accuracies": 0.875, "rewards/chosen": -0.1973542720079422, "rewards/margins": 0.14963437616825104, "rewards/rejected": -0.34698864817619324, "step": 5789 }, { "epoch": 15.852156057494867, "grad_norm": 14.137113571166992, "learning_rate": 2.0726027397260272e-07, "log_odds_chosen": 2.43359637260437, "log_odds_ratio": -0.6775196194648743, "logits/chosen": 0.8185812830924988, "logits/rejected": 0.7552672028541565, "logps/chosen": -3.322866201400757, "logps/rejected": -5.572518348693848, "loss": 0.7811, "nll_loss": 0.7133824229240417, "rewards/accuracies": 0.75, "rewards/chosen": -0.33228665590286255, "rewards/margins": 0.2249651849269867, "rewards/rejected": -0.5572518110275269, "step": 5790 }, { "epoch": 15.854893908281998, "grad_norm": 5.6238789558410645, "learning_rate": 2.0712328767123285e-07, "log_odds_chosen": 3.294912338256836, "log_odds_ratio": -0.12073884159326553, "logits/chosen": 0.9822414517402649, "logits/rejected": 1.0525579452514648, "logps/chosen": -3.233458995819092, "logps/rejected": -6.302975177764893, "loss": 0.851, "nll_loss": 0.838902473449707, "rewards/accuracies": 0.875, "rewards/chosen": -0.3233458995819092, "rewards/margins": 0.306951642036438, "rewards/rejected": -0.6302975416183472, "step": 5791 }, { "epoch": 15.85763175906913, "grad_norm": 5.9554972648620605, "learning_rate": 2.06986301369863e-07, "log_odds_chosen": 1.7690558433532715, "log_odds_ratio": -0.2731529474258423, "logits/chosen": 0.964445948600769, "logits/rejected": 0.9435585737228394, "logps/chosen": -1.7458668947219849, "logps/rejected": -3.3680357933044434, "loss": 0.5914, "nll_loss": 0.5640620589256287, "rewards/accuracies": 0.875, "rewards/chosen": -0.174586683511734, "rewards/margins": 0.1622168868780136, "rewards/rejected": -0.3368035852909088, "step": 5792 }, { "epoch": 15.860369609856264, "grad_norm": 6.168697834014893, "learning_rate": 2.0684931506849315e-07, "log_odds_chosen": 4.077489852905273, "log_odds_ratio": -0.1502329558134079, "logits/chosen": 0.9769868850708008, "logits/rejected": 0.9303576946258545, "logps/chosen": -2.556464910507202, "logps/rejected": -6.517930030822754, "loss": 0.763, "nll_loss": 0.7479776740074158, "rewards/accuracies": 0.875, "rewards/chosen": -0.2556464672088623, "rewards/margins": 0.3961465656757355, "rewards/rejected": -0.6517930626869202, "step": 5793 }, { "epoch": 15.863107460643395, "grad_norm": 5.0328826904296875, "learning_rate": 2.0671232876712328e-07, "log_odds_chosen": 3.247561454772949, "log_odds_ratio": -0.09679993242025375, "logits/chosen": 0.9496370553970337, "logits/rejected": 1.0124059915542603, "logps/chosen": -1.8358299732208252, "logps/rejected": -4.873969078063965, "loss": 0.529, "nll_loss": 0.519364595413208, "rewards/accuracies": 1.0, "rewards/chosen": -0.18358299136161804, "rewards/margins": 0.3038139045238495, "rewards/rejected": -0.48739689588546753, "step": 5794 }, { "epoch": 15.865845311430528, "grad_norm": 5.571426868438721, "learning_rate": 2.0657534246575343e-07, "log_odds_chosen": 2.064556121826172, "log_odds_ratio": -0.1909540593624115, "logits/chosen": 0.924359917640686, "logits/rejected": 0.9767158031463623, "logps/chosen": -2.2382168769836426, "logps/rejected": -4.182413578033447, "loss": 0.7827, "nll_loss": 0.7635828256607056, "rewards/accuracies": 1.0, "rewards/chosen": -0.2238216996192932, "rewards/margins": 0.19441963732242584, "rewards/rejected": -0.41824132204055786, "step": 5795 }, { "epoch": 15.868583162217659, "grad_norm": 5.017242908477783, "learning_rate": 2.0643835616438353e-07, "log_odds_chosen": 1.4136584997177124, "log_odds_ratio": -0.25072959065437317, "logits/chosen": 1.1159613132476807, "logits/rejected": 1.1585001945495605, "logps/chosen": -2.174257516860962, "logps/rejected": -3.442065954208374, "loss": 0.5766, "nll_loss": 0.5514779686927795, "rewards/accuracies": 1.0, "rewards/chosen": -0.21742576360702515, "rewards/margins": 0.12678085267543793, "rewards/rejected": -0.3442066013813019, "step": 5796 }, { "epoch": 15.871321013004792, "grad_norm": 4.72866678237915, "learning_rate": 2.0630136986301368e-07, "log_odds_chosen": 2.685159683227539, "log_odds_ratio": -0.15920646488666534, "logits/chosen": 0.953223705291748, "logits/rejected": 0.9917561411857605, "logps/chosen": -1.8165744543075562, "logps/rejected": -4.303006172180176, "loss": 0.627, "nll_loss": 0.6110709309577942, "rewards/accuracies": 1.0, "rewards/chosen": -0.18165743350982666, "rewards/margins": 0.248643159866333, "rewards/rejected": -0.43030059337615967, "step": 5797 }, { "epoch": 15.874058863791923, "grad_norm": 5.280025005340576, "learning_rate": 2.0616438356164384e-07, "log_odds_chosen": 3.915177822113037, "log_odds_ratio": -0.097442626953125, "logits/chosen": 0.8724256753921509, "logits/rejected": 0.9418669939041138, "logps/chosen": -2.6856746673583984, "logps/rejected": -6.522477149963379, "loss": 0.766, "nll_loss": 0.7562657594680786, "rewards/accuracies": 1.0, "rewards/chosen": -0.2685675024986267, "rewards/margins": 0.3836802542209625, "rewards/rejected": -0.6522477865219116, "step": 5798 }, { "epoch": 15.876796714579056, "grad_norm": 6.884300231933594, "learning_rate": 2.0602739726027396e-07, "log_odds_chosen": 0.37865233421325684, "log_odds_ratio": -0.6866037845611572, "logits/chosen": 0.9180684089660645, "logits/rejected": 0.9521913528442383, "logps/chosen": -2.811038017272949, "logps/rejected": -3.134227752685547, "loss": 0.7704, "nll_loss": 0.7017622590065002, "rewards/accuracies": 0.625, "rewards/chosen": -0.28110378980636597, "rewards/margins": 0.03231894224882126, "rewards/rejected": -0.3134227693080902, "step": 5799 }, { "epoch": 15.879534565366187, "grad_norm": 4.534475803375244, "learning_rate": 2.0589041095890411e-07, "log_odds_chosen": 3.244340419769287, "log_odds_ratio": -0.14776794612407684, "logits/chosen": 0.5955547094345093, "logits/rejected": 0.5935066938400269, "logps/chosen": -1.2821844816207886, "logps/rejected": -4.219214916229248, "loss": 0.6972, "nll_loss": 0.6824384927749634, "rewards/accuracies": 1.0, "rewards/chosen": -0.12821844220161438, "rewards/margins": 0.2937030792236328, "rewards/rejected": -0.4219215214252472, "step": 5800 }, { "epoch": 15.88227241615332, "grad_norm": 7.177615642547607, "learning_rate": 2.0575342465753424e-07, "log_odds_chosen": 0.9521018266677856, "log_odds_ratio": -0.5527454614639282, "logits/chosen": 0.9578613042831421, "logits/rejected": 0.9994174838066101, "logps/chosen": -2.757108688354492, "logps/rejected": -3.665764808654785, "loss": 0.7154, "nll_loss": 0.6601181030273438, "rewards/accuracies": 0.875, "rewards/chosen": -0.2757108807563782, "rewards/margins": 0.0908656194806099, "rewards/rejected": -0.36657649278640747, "step": 5801 }, { "epoch": 15.885010266940451, "grad_norm": 5.7359938621521, "learning_rate": 2.056164383561644e-07, "log_odds_chosen": 1.1226204633712769, "log_odds_ratio": -0.3218880295753479, "logits/chosen": 0.8489539623260498, "logits/rejected": 0.9099381566047668, "logps/chosen": -2.094747304916382, "logps/rejected": -3.099761486053467, "loss": 0.6062, "nll_loss": 0.5739628672599792, "rewards/accuracies": 0.875, "rewards/chosen": -0.20947474241256714, "rewards/margins": 0.10050144046545029, "rewards/rejected": -0.30997616052627563, "step": 5802 }, { "epoch": 15.887748117727584, "grad_norm": 5.287118911743164, "learning_rate": 2.054794520547945e-07, "log_odds_chosen": 2.8103559017181396, "log_odds_ratio": -0.19173726439476013, "logits/chosen": 0.7450976371765137, "logits/rejected": 0.7654377818107605, "logps/chosen": -2.1532249450683594, "logps/rejected": -4.847228050231934, "loss": 0.7363, "nll_loss": 0.7171497344970703, "rewards/accuracies": 0.875, "rewards/chosen": -0.21532249450683594, "rewards/margins": 0.26940029859542847, "rewards/rejected": -0.4847227931022644, "step": 5803 }, { "epoch": 15.890485968514716, "grad_norm": 4.863055229187012, "learning_rate": 2.0534246575342464e-07, "log_odds_chosen": 2.4235854148864746, "log_odds_ratio": -0.20790180563926697, "logits/chosen": 0.8212399482727051, "logits/rejected": 0.8743950128555298, "logps/chosen": -2.699479579925537, "logps/rejected": -4.86229133605957, "loss": 0.7062, "nll_loss": 0.6853947639465332, "rewards/accuracies": 0.875, "rewards/chosen": -0.26994794607162476, "rewards/margins": 0.2162812054157257, "rewards/rejected": -0.4862291216850281, "step": 5804 }, { "epoch": 15.893223819301848, "grad_norm": 6.536972999572754, "learning_rate": 2.052054794520548e-07, "log_odds_chosen": 2.446340322494507, "log_odds_ratio": -0.34130245447158813, "logits/chosen": 0.8549583554267883, "logits/rejected": 0.8204061985015869, "logps/chosen": -2.844529628753662, "logps/rejected": -5.21259880065918, "loss": 0.7536, "nll_loss": 0.7194638252258301, "rewards/accuracies": 0.875, "rewards/chosen": -0.28445297479629517, "rewards/margins": 0.2368069291114807, "rewards/rejected": -0.5212599039077759, "step": 5805 }, { "epoch": 15.89596167008898, "grad_norm": 5.899433612823486, "learning_rate": 2.0506849315068492e-07, "log_odds_chosen": 0.9382295608520508, "log_odds_ratio": -0.5548433661460876, "logits/chosen": 0.6711859703063965, "logits/rejected": 0.8038773536682129, "logps/chosen": -1.7746297121047974, "logps/rejected": -2.655961036682129, "loss": 0.6455, "nll_loss": 0.5899797081947327, "rewards/accuracies": 0.75, "rewards/chosen": -0.17746298015117645, "rewards/margins": 0.08813314884901047, "rewards/rejected": -0.2655961215496063, "step": 5806 }, { "epoch": 15.898699520876113, "grad_norm": 5.857838153839111, "learning_rate": 2.0493150684931507e-07, "log_odds_chosen": 1.702444314956665, "log_odds_ratio": -0.2891123592853546, "logits/chosen": 0.720409095287323, "logits/rejected": 0.6918209791183472, "logps/chosen": -2.0910463333129883, "logps/rejected": -3.693722724914551, "loss": 0.6388, "nll_loss": 0.6099090576171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.20910465717315674, "rewards/margins": 0.160267636179924, "rewards/rejected": -0.36937227845191956, "step": 5807 }, { "epoch": 15.901437371663244, "grad_norm": 4.8259663581848145, "learning_rate": 2.047945205479452e-07, "log_odds_chosen": 2.4678752422332764, "log_odds_ratio": -0.18553254008293152, "logits/chosen": 0.6710126399993896, "logits/rejected": 0.7586048245429993, "logps/chosen": -2.0565688610076904, "logps/rejected": -4.411238193511963, "loss": 0.6734, "nll_loss": 0.6548921465873718, "rewards/accuracies": 0.875, "rewards/chosen": -0.20565690100193024, "rewards/margins": 0.23546692728996277, "rewards/rejected": -0.4411238133907318, "step": 5808 }, { "epoch": 15.904175222450377, "grad_norm": 8.35754108428955, "learning_rate": 2.0465753424657532e-07, "log_odds_chosen": 1.0594521760940552, "log_odds_ratio": -0.45275014638900757, "logits/chosen": 0.9023723006248474, "logits/rejected": 0.9173338413238525, "logps/chosen": -2.9050631523132324, "logps/rejected": -3.8763132095336914, "loss": 0.7674, "nll_loss": 0.7221260070800781, "rewards/accuracies": 0.75, "rewards/chosen": -0.2905063033103943, "rewards/margins": 0.09712503850460052, "rewards/rejected": -0.387631356716156, "step": 5809 }, { "epoch": 15.906913073237508, "grad_norm": 5.449529647827148, "learning_rate": 2.0452054794520545e-07, "log_odds_chosen": 2.320462226867676, "log_odds_ratio": -0.30709585547447205, "logits/chosen": 0.709375262260437, "logits/rejected": 0.721179187297821, "logps/chosen": -1.7870638370513916, "logps/rejected": -3.8476481437683105, "loss": 0.5581, "nll_loss": 0.527397871017456, "rewards/accuracies": 0.875, "rewards/chosen": -0.17870637774467468, "rewards/margins": 0.20605842769145966, "rewards/rejected": -0.38476479053497314, "step": 5810 }, { "epoch": 15.90965092402464, "grad_norm": 4.865056037902832, "learning_rate": 2.043835616438356e-07, "log_odds_chosen": 4.532971382141113, "log_odds_ratio": -0.032840095460414886, "logits/chosen": 1.132968783378601, "logits/rejected": 1.1972105503082275, "logps/chosen": -2.231990098953247, "logps/rejected": -6.468992233276367, "loss": 0.5268, "nll_loss": 0.5234741568565369, "rewards/accuracies": 1.0, "rewards/chosen": -0.2231990247964859, "rewards/margins": 0.423700213432312, "rewards/rejected": -0.6468992233276367, "step": 5811 }, { "epoch": 15.912388774811772, "grad_norm": 7.969579696655273, "learning_rate": 2.0424657534246575e-07, "log_odds_chosen": 1.3035521507263184, "log_odds_ratio": -0.37995707988739014, "logits/chosen": 0.8885752558708191, "logits/rejected": 0.8871644139289856, "logps/chosen": -2.2160120010375977, "logps/rejected": -3.4373698234558105, "loss": 0.654, "nll_loss": 0.6159778833389282, "rewards/accuracies": 0.875, "rewards/chosen": -0.2216012179851532, "rewards/margins": 0.12213577330112457, "rewards/rejected": -0.3437369763851166, "step": 5812 }, { "epoch": 15.915126625598905, "grad_norm": 5.9824652671813965, "learning_rate": 2.0410958904109588e-07, "log_odds_chosen": 2.0114593505859375, "log_odds_ratio": -0.6689141988754272, "logits/chosen": 0.948448121547699, "logits/rejected": 0.9853662252426147, "logps/chosen": -2.946319103240967, "logps/rejected": -4.861879825592041, "loss": 0.7343, "nll_loss": 0.6674485206604004, "rewards/accuracies": 0.75, "rewards/chosen": -0.2946319282054901, "rewards/margins": 0.19155606627464294, "rewards/rejected": -0.48618799448013306, "step": 5813 }, { "epoch": 15.917864476386036, "grad_norm": 6.021421432495117, "learning_rate": 2.0397260273972603e-07, "log_odds_chosen": 2.8800384998321533, "log_odds_ratio": -0.1232219859957695, "logits/chosen": 0.587749183177948, "logits/rejected": 0.5751928687095642, "logps/chosen": -1.4950859546661377, "logps/rejected": -4.138768672943115, "loss": 0.5554, "nll_loss": 0.5430701375007629, "rewards/accuracies": 1.0, "rewards/chosen": -0.14950859546661377, "rewards/margins": 0.2643682658672333, "rewards/rejected": -0.41387686133384705, "step": 5814 }, { "epoch": 15.92060232717317, "grad_norm": 6.248990535736084, "learning_rate": 2.0383561643835616e-07, "log_odds_chosen": 1.453844666481018, "log_odds_ratio": -0.2798745632171631, "logits/chosen": 0.9463683366775513, "logits/rejected": 0.8788385987281799, "logps/chosen": -1.844956874847412, "logps/rejected": -3.1158761978149414, "loss": 0.5301, "nll_loss": 0.5020986795425415, "rewards/accuracies": 1.0, "rewards/chosen": -0.1844956874847412, "rewards/margins": 0.1270918995141983, "rewards/rejected": -0.3115875720977783, "step": 5815 }, { "epoch": 15.923340177960302, "grad_norm": 5.683578968048096, "learning_rate": 2.0369863013698628e-07, "log_odds_chosen": 1.5736052989959717, "log_odds_ratio": -0.3433944284915924, "logits/chosen": 0.657783567905426, "logits/rejected": 0.8068676590919495, "logps/chosen": -2.507225751876831, "logps/rejected": -3.976067304611206, "loss": 0.6715, "nll_loss": 0.6371368765830994, "rewards/accuracies": 0.875, "rewards/chosen": -0.25072258710861206, "rewards/margins": 0.14688414335250854, "rewards/rejected": -0.3976067304611206, "step": 5816 }, { "epoch": 15.926078028747433, "grad_norm": 5.257632255554199, "learning_rate": 2.0356164383561643e-07, "log_odds_chosen": 2.912228584289551, "log_odds_ratio": -0.1370847523212433, "logits/chosen": 0.7233722805976868, "logits/rejected": 0.7274062633514404, "logps/chosen": -2.137279510498047, "logps/rejected": -4.906209945678711, "loss": 0.6334, "nll_loss": 0.6197388768196106, "rewards/accuracies": 1.0, "rewards/chosen": -0.2137279361486435, "rewards/margins": 0.27689307928085327, "rewards/rejected": -0.49062103033065796, "step": 5817 }, { "epoch": 15.928815879534564, "grad_norm": 5.51864767074585, "learning_rate": 2.0342465753424656e-07, "log_odds_chosen": 2.215925693511963, "log_odds_ratio": -0.21023698151111603, "logits/chosen": 0.8022893071174622, "logits/rejected": 0.8817539215087891, "logps/chosen": -2.506340742111206, "logps/rejected": -4.619531631469727, "loss": 0.6183, "nll_loss": 0.5972961187362671, "rewards/accuracies": 1.0, "rewards/chosen": -0.2506340742111206, "rewards/margins": 0.21131910383701324, "rewards/rejected": -0.46195316314697266, "step": 5818 }, { "epoch": 15.931553730321697, "grad_norm": 4.821835041046143, "learning_rate": 2.032876712328767e-07, "log_odds_chosen": 3.07181978225708, "log_odds_ratio": -0.15332213044166565, "logits/chosen": 1.1879465579986572, "logits/rejected": 1.2668756246566772, "logps/chosen": -2.0199766159057617, "logps/rejected": -4.961089134216309, "loss": 0.6363, "nll_loss": 0.6209664344787598, "rewards/accuracies": 1.0, "rewards/chosen": -0.20199765264987946, "rewards/margins": 0.2941112816333771, "rewards/rejected": -0.49610888957977295, "step": 5819 }, { "epoch": 15.93429158110883, "grad_norm": 5.84066104888916, "learning_rate": 2.0315068493150684e-07, "log_odds_chosen": 2.263615846633911, "log_odds_ratio": -0.3158344030380249, "logits/chosen": 1.0755963325500488, "logits/rejected": 1.0907458066940308, "logps/chosen": -2.2347819805145264, "logps/rejected": -4.380728721618652, "loss": 0.6513, "nll_loss": 0.619681715965271, "rewards/accuracies": 0.875, "rewards/chosen": -0.22347822785377502, "rewards/margins": 0.21459466218948364, "rewards/rejected": -0.4380728602409363, "step": 5820 }, { "epoch": 15.937029431895962, "grad_norm": 5.446117877960205, "learning_rate": 2.03013698630137e-07, "log_odds_chosen": 4.280516624450684, "log_odds_ratio": -0.14978966116905212, "logits/chosen": 0.8074792623519897, "logits/rejected": 0.8415610790252686, "logps/chosen": -1.976610779762268, "logps/rejected": -6.082196235656738, "loss": 0.749, "nll_loss": 0.734043538570404, "rewards/accuracies": 0.875, "rewards/chosen": -0.19766108691692352, "rewards/margins": 0.4105585217475891, "rewards/rejected": -0.6082196235656738, "step": 5821 }, { "epoch": 15.939767282683095, "grad_norm": 4.735710620880127, "learning_rate": 2.0287671232876711e-07, "log_odds_chosen": 2.5755081176757812, "log_odds_ratio": -0.4080069363117218, "logits/chosen": 0.9421848058700562, "logits/rejected": 0.9644961357116699, "logps/chosen": -2.31321382522583, "logps/rejected": -4.8565263748168945, "loss": 0.7243, "nll_loss": 0.6834735870361328, "rewards/accuracies": 0.625, "rewards/chosen": -0.23132139444351196, "rewards/margins": 0.25433123111724854, "rewards/rejected": -0.4856526553630829, "step": 5822 }, { "epoch": 15.942505133470226, "grad_norm": 4.88513708114624, "learning_rate": 2.0273972602739724e-07, "log_odds_chosen": 2.8682591915130615, "log_odds_ratio": -0.18588797748088837, "logits/chosen": 0.7461318373680115, "logits/rejected": 0.7572989463806152, "logps/chosen": -2.2161173820495605, "logps/rejected": -4.9636688232421875, "loss": 0.6798, "nll_loss": 0.6611748933792114, "rewards/accuracies": 1.0, "rewards/chosen": -0.22161172330379486, "rewards/margins": 0.2747551202774048, "rewards/rejected": -0.49636685848236084, "step": 5823 }, { "epoch": 15.945242984257359, "grad_norm": 5.037313461303711, "learning_rate": 2.026027397260274e-07, "log_odds_chosen": 2.080622911453247, "log_odds_ratio": -0.20157195627689362, "logits/chosen": 0.6771830320358276, "logits/rejected": 0.7667012810707092, "logps/chosen": -2.0167958736419678, "logps/rejected": -3.960167169570923, "loss": 0.5848, "nll_loss": 0.5646352767944336, "rewards/accuracies": 1.0, "rewards/chosen": -0.20167958736419678, "rewards/margins": 0.1943371295928955, "rewards/rejected": -0.3960167169570923, "step": 5824 }, { "epoch": 15.94798083504449, "grad_norm": 5.575008869171143, "learning_rate": 2.0246575342465752e-07, "log_odds_chosen": 3.5995869636535645, "log_odds_ratio": -0.20563670992851257, "logits/chosen": 0.6899875998497009, "logits/rejected": 0.7191787958145142, "logps/chosen": -2.3271946907043457, "logps/rejected": -5.7720561027526855, "loss": 0.5941, "nll_loss": 0.5735395550727844, "rewards/accuracies": 0.875, "rewards/chosen": -0.23271948099136353, "rewards/margins": 0.34448614716529846, "rewards/rejected": -0.5772056579589844, "step": 5825 }, { "epoch": 15.950718685831623, "grad_norm": 7.386927127838135, "learning_rate": 2.0232876712328767e-07, "log_odds_chosen": 1.45564866065979, "log_odds_ratio": -0.4947843551635742, "logits/chosen": 0.7272486686706543, "logits/rejected": 0.7668636441230774, "logps/chosen": -2.4460859298706055, "logps/rejected": -3.8192105293273926, "loss": 0.8037, "nll_loss": 0.7542027831077576, "rewards/accuracies": 0.75, "rewards/chosen": -0.2446085810661316, "rewards/margins": 0.13731247186660767, "rewards/rejected": -0.38192105293273926, "step": 5826 }, { "epoch": 15.953456536618754, "grad_norm": 5.198037147521973, "learning_rate": 2.021917808219178e-07, "log_odds_chosen": 2.6458117961883545, "log_odds_ratio": -0.10354625433683395, "logits/chosen": 0.9061620235443115, "logits/rejected": 0.9705885648727417, "logps/chosen": -2.1429991722106934, "logps/rejected": -4.643784523010254, "loss": 0.6882, "nll_loss": 0.6778188943862915, "rewards/accuracies": 1.0, "rewards/chosen": -0.21429993212223053, "rewards/margins": 0.25007855892181396, "rewards/rejected": -0.4643784761428833, "step": 5827 }, { "epoch": 15.956194387405887, "grad_norm": 5.556384086608887, "learning_rate": 2.0205479452054795e-07, "log_odds_chosen": 2.8693318367004395, "log_odds_ratio": -0.25675058364868164, "logits/chosen": 0.8825356364250183, "logits/rejected": 0.8883627653121948, "logps/chosen": -2.1933541297912598, "logps/rejected": -4.94557523727417, "loss": 0.6183, "nll_loss": 0.5926116704940796, "rewards/accuracies": 1.0, "rewards/chosen": -0.2193354219198227, "rewards/margins": 0.27522212266921997, "rewards/rejected": -0.49455755949020386, "step": 5828 }, { "epoch": 15.958932238193018, "grad_norm": 5.844046115875244, "learning_rate": 2.0191780821917805e-07, "log_odds_chosen": 1.6337716579437256, "log_odds_ratio": -0.33838027715682983, "logits/chosen": 0.6864993572235107, "logits/rejected": 0.6875564455986023, "logps/chosen": -2.2579071521759033, "logps/rejected": -3.8346853256225586, "loss": 0.6224, "nll_loss": 0.5885668992996216, "rewards/accuracies": 0.875, "rewards/chosen": -0.22579072415828705, "rewards/margins": 0.15767782926559448, "rewards/rejected": -0.3834685683250427, "step": 5829 }, { "epoch": 15.961670088980151, "grad_norm": 6.397800922393799, "learning_rate": 2.017808219178082e-07, "log_odds_chosen": 1.7771047353744507, "log_odds_ratio": -0.31885862350463867, "logits/chosen": 0.7295917272567749, "logits/rejected": 0.5571112036705017, "logps/chosen": -2.010244607925415, "logps/rejected": -3.6600327491760254, "loss": 0.7094, "nll_loss": 0.6775467395782471, "rewards/accuracies": 1.0, "rewards/chosen": -0.20102447271347046, "rewards/margins": 0.16497880220413208, "rewards/rejected": -0.36600327491760254, "step": 5830 }, { "epoch": 15.964407939767282, "grad_norm": 5.616750717163086, "learning_rate": 2.0164383561643835e-07, "log_odds_chosen": 1.6570794582366943, "log_odds_ratio": -0.3391822874546051, "logits/chosen": 0.8000066876411438, "logits/rejected": 0.8334593772888184, "logps/chosen": -2.1659371852874756, "logps/rejected": -3.690943956375122, "loss": 0.6358, "nll_loss": 0.6018690466880798, "rewards/accuracies": 0.75, "rewards/chosen": -0.21659371256828308, "rewards/margins": 0.1525006890296936, "rewards/rejected": -0.3690944314002991, "step": 5831 }, { "epoch": 15.967145790554415, "grad_norm": 5.277344226837158, "learning_rate": 2.0150684931506848e-07, "log_odds_chosen": 1.9381988048553467, "log_odds_ratio": -0.30536088347435, "logits/chosen": 0.909538984298706, "logits/rejected": 1.0114713907241821, "logps/chosen": -2.8974738121032715, "logps/rejected": -4.780906677246094, "loss": 0.7527, "nll_loss": 0.7221808433532715, "rewards/accuracies": 0.875, "rewards/chosen": -0.2897473871707916, "rewards/margins": 0.1883433312177658, "rewards/rejected": -0.47809073328971863, "step": 5832 }, { "epoch": 15.969883641341546, "grad_norm": 6.712449550628662, "learning_rate": 2.0136986301369863e-07, "log_odds_chosen": 2.2261905670166016, "log_odds_ratio": -0.2750400900840759, "logits/chosen": 0.6621576547622681, "logits/rejected": 0.6598613858222961, "logps/chosen": -2.4679882526397705, "logps/rejected": -4.552257537841797, "loss": 0.7144, "nll_loss": 0.6868571043014526, "rewards/accuracies": 0.75, "rewards/chosen": -0.2467988282442093, "rewards/margins": 0.20842689275741577, "rewards/rejected": -0.45522573590278625, "step": 5833 }, { "epoch": 15.97262149212868, "grad_norm": 5.185853958129883, "learning_rate": 2.0123287671232875e-07, "log_odds_chosen": 1.2843824625015259, "log_odds_ratio": -0.34327954053878784, "logits/chosen": 0.6291122436523438, "logits/rejected": 0.6606664061546326, "logps/chosen": -2.231060266494751, "logps/rejected": -3.4526052474975586, "loss": 0.6329, "nll_loss": 0.5986104011535645, "rewards/accuracies": 1.0, "rewards/chosen": -0.2231060266494751, "rewards/margins": 0.12215446680784225, "rewards/rejected": -0.34526050090789795, "step": 5834 }, { "epoch": 15.97535934291581, "grad_norm": 5.335807800292969, "learning_rate": 2.010958904109589e-07, "log_odds_chosen": 2.077021360397339, "log_odds_ratio": -0.3834231197834015, "logits/chosen": 0.9184906482696533, "logits/rejected": 0.9276627898216248, "logps/chosen": -1.6095101833343506, "logps/rejected": -3.492720603942871, "loss": 0.6616, "nll_loss": 0.6232962012290955, "rewards/accuracies": 0.875, "rewards/chosen": -0.16095101833343506, "rewards/margins": 0.188321053981781, "rewards/rejected": -0.34927207231521606, "step": 5835 }, { "epoch": 15.978097193702943, "grad_norm": 5.622039794921875, "learning_rate": 2.0095890410958903e-07, "log_odds_chosen": 1.8035942316055298, "log_odds_ratio": -0.4408363997936249, "logits/chosen": 0.7237414717674255, "logits/rejected": 0.8246539235115051, "logps/chosen": -2.571190357208252, "logps/rejected": -4.285154342651367, "loss": 0.7153, "nll_loss": 0.6712406873703003, "rewards/accuracies": 0.625, "rewards/chosen": -0.25711899995803833, "rewards/margins": 0.1713964343070984, "rewards/rejected": -0.4285154938697815, "step": 5836 }, { "epoch": 15.980835044490075, "grad_norm": 7.621979236602783, "learning_rate": 2.0082191780821916e-07, "log_odds_chosen": 1.3441872596740723, "log_odds_ratio": -0.29824304580688477, "logits/chosen": 0.813433825969696, "logits/rejected": 0.8265138268470764, "logps/chosen": -2.195770502090454, "logps/rejected": -3.4190211296081543, "loss": 0.6189, "nll_loss": 0.589066743850708, "rewards/accuracies": 0.875, "rewards/chosen": -0.21957704424858093, "rewards/margins": 0.12232507020235062, "rewards/rejected": -0.34190213680267334, "step": 5837 }, { "epoch": 15.983572895277208, "grad_norm": 6.183297634124756, "learning_rate": 2.006849315068493e-07, "log_odds_chosen": 1.6513856649398804, "log_odds_ratio": -0.26378482580184937, "logits/chosen": 0.6619211435317993, "logits/rejected": 0.7848926782608032, "logps/chosen": -2.554316759109497, "logps/rejected": -4.131656646728516, "loss": 0.6777, "nll_loss": 0.6513682007789612, "rewards/accuracies": 1.0, "rewards/chosen": -0.2554316520690918, "rewards/margins": 0.1577340066432953, "rewards/rejected": -0.4131656885147095, "step": 5838 }, { "epoch": 15.986310746064339, "grad_norm": 5.408022403717041, "learning_rate": 2.0054794520547944e-07, "log_odds_chosen": 1.7762483358383179, "log_odds_ratio": -0.2317160665988922, "logits/chosen": 0.5542640686035156, "logits/rejected": 0.5793777704238892, "logps/chosen": -2.3141424655914307, "logps/rejected": -3.976547956466675, "loss": 0.5666, "nll_loss": 0.5434484481811523, "rewards/accuracies": 1.0, "rewards/chosen": -0.23141425848007202, "rewards/margins": 0.16624055802822113, "rewards/rejected": -0.39765480160713196, "step": 5839 }, { "epoch": 15.989048596851472, "grad_norm": 5.5430169105529785, "learning_rate": 2.004109589041096e-07, "log_odds_chosen": 1.4613730907440186, "log_odds_ratio": -0.2873477339744568, "logits/chosen": 0.808349609375, "logits/rejected": 0.8263509273529053, "logps/chosen": -2.2169694900512695, "logps/rejected": -3.534540891647339, "loss": 0.635, "nll_loss": 0.6062576770782471, "rewards/accuracies": 1.0, "rewards/chosen": -0.22169694304466248, "rewards/margins": 0.13175715506076813, "rewards/rejected": -0.3534541130065918, "step": 5840 }, { "epoch": 15.991786447638603, "grad_norm": 5.642735481262207, "learning_rate": 2.0027397260273971e-07, "log_odds_chosen": 1.6127997636795044, "log_odds_ratio": -0.30903637409210205, "logits/chosen": 0.7495059967041016, "logits/rejected": 0.8240464925765991, "logps/chosen": -2.474611282348633, "logps/rejected": -4.003005027770996, "loss": 0.6897, "nll_loss": 0.6588424444198608, "rewards/accuracies": 0.875, "rewards/chosen": -0.24746114015579224, "rewards/margins": 0.15283939242362976, "rewards/rejected": -0.400300532579422, "step": 5841 }, { "epoch": 15.994524298425736, "grad_norm": 5.3503193855285645, "learning_rate": 2.0013698630136987e-07, "log_odds_chosen": 2.40635347366333, "log_odds_ratio": -0.15190210938453674, "logits/chosen": 0.9376397132873535, "logits/rejected": 1.0117746591567993, "logps/chosen": -2.1238951683044434, "logps/rejected": -4.413694381713867, "loss": 0.7563, "nll_loss": 0.7411414384841919, "rewards/accuracies": 1.0, "rewards/chosen": -0.2123895287513733, "rewards/margins": 0.2289799004793167, "rewards/rejected": -0.4413694143295288, "step": 5842 }, { "epoch": 15.997262149212869, "grad_norm": 6.680150032043457, "learning_rate": 2e-07, "log_odds_chosen": 1.7332051992416382, "log_odds_ratio": -0.5040969848632812, "logits/chosen": 0.7659101486206055, "logits/rejected": 0.7829575538635254, "logps/chosen": -2.5348422527313232, "logps/rejected": -4.196953296661377, "loss": 0.7259, "nll_loss": 0.6755192279815674, "rewards/accuracies": 0.625, "rewards/chosen": -0.25348421931266785, "rewards/margins": 0.1662110984325409, "rewards/rejected": -0.41969531774520874, "step": 5843 }, { "epoch": 16.0, "grad_norm": 5.808909893035889, "learning_rate": 1.9986301369863012e-07, "log_odds_chosen": 1.9929144382476807, "log_odds_ratio": -0.3924480974674225, "logits/chosen": 0.6053739786148071, "logits/rejected": 0.6590114831924438, "logps/chosen": -2.1747236251831055, "logps/rejected": -4.0549726486206055, "loss": 0.6576, "nll_loss": 0.6183465719223022, "rewards/accuracies": 0.625, "rewards/chosen": -0.2174723744392395, "rewards/margins": 0.18802489340305328, "rewards/rejected": -0.4054972529411316, "step": 5844 }, { "epoch": 16.00273785078713, "grad_norm": 5.880213260650635, "learning_rate": 1.9972602739726027e-07, "log_odds_chosen": 1.6757290363311768, "log_odds_ratio": -0.34318068623542786, "logits/chosen": 0.9476854801177979, "logits/rejected": 0.8840962648391724, "logps/chosen": -1.8749364614486694, "logps/rejected": -3.4673686027526855, "loss": 0.5499, "nll_loss": 0.515555739402771, "rewards/accuracies": 0.75, "rewards/chosen": -0.18749365210533142, "rewards/margins": 0.15924321115016937, "rewards/rejected": -0.346736878156662, "step": 5845 }, { "epoch": 16.005475701574266, "grad_norm": 7.403766632080078, "learning_rate": 1.995890410958904e-07, "log_odds_chosen": 2.5148768424987793, "log_odds_ratio": -0.3106679916381836, "logits/chosen": 1.189700722694397, "logits/rejected": 1.179184913635254, "logps/chosen": -2.2739691734313965, "logps/rejected": -4.646382808685303, "loss": 0.5844, "nll_loss": 0.5533447265625, "rewards/accuracies": 0.875, "rewards/chosen": -0.22739695012569427, "rewards/margins": 0.23724137246608734, "rewards/rejected": -0.46463829278945923, "step": 5846 }, { "epoch": 16.008213552361397, "grad_norm": 7.587852954864502, "learning_rate": 1.9945205479452055e-07, "log_odds_chosen": 0.9796303510665894, "log_odds_ratio": -0.6073024868965149, "logits/chosen": 0.6721227765083313, "logits/rejected": 0.6333733797073364, "logps/chosen": -2.2725932598114014, "logps/rejected": -3.0652565956115723, "loss": 0.5972, "nll_loss": 0.5364258885383606, "rewards/accuracies": 0.75, "rewards/chosen": -0.2272593230009079, "rewards/margins": 0.07926633208990097, "rewards/rejected": -0.30652564764022827, "step": 5847 }, { "epoch": 16.01095140314853, "grad_norm": 4.728231430053711, "learning_rate": 1.9931506849315067e-07, "log_odds_chosen": 1.7322570085525513, "log_odds_ratio": -0.20439346134662628, "logits/chosen": 0.7971044182777405, "logits/rejected": 0.8084522485733032, "logps/chosen": -1.6052885055541992, "logps/rejected": -3.138719081878662, "loss": 0.6648, "nll_loss": 0.6443993449211121, "rewards/accuracies": 1.0, "rewards/chosen": -0.16052886843681335, "rewards/margins": 0.1533430516719818, "rewards/rejected": -0.3138718903064728, "step": 5848 }, { "epoch": 16.01368925393566, "grad_norm": 4.3193135261535645, "learning_rate": 1.991780821917808e-07, "log_odds_chosen": 2.6955392360687256, "log_odds_ratio": -0.2167292684316635, "logits/chosen": 0.9570060968399048, "logits/rejected": 0.9697956442832947, "logps/chosen": -2.1690471172332764, "logps/rejected": -4.768278121948242, "loss": 0.6385, "nll_loss": 0.6168329119682312, "rewards/accuracies": 0.875, "rewards/chosen": -0.21690472960472107, "rewards/margins": 0.2599230706691742, "rewards/rejected": -0.47682780027389526, "step": 5849 }, { "epoch": 16.016427104722794, "grad_norm": 4.98428201675415, "learning_rate": 1.9904109589041095e-07, "log_odds_chosen": 2.2300515174865723, "log_odds_ratio": -0.2903870940208435, "logits/chosen": 0.899513304233551, "logits/rejected": 0.9575043320655823, "logps/chosen": -1.794377326965332, "logps/rejected": -3.7950353622436523, "loss": 0.553, "nll_loss": 0.523995041847229, "rewards/accuracies": 0.875, "rewards/chosen": -0.17943772673606873, "rewards/margins": 0.20006583631038666, "rewards/rejected": -0.3795035481452942, "step": 5850 }, { "epoch": 16.019164955509925, "grad_norm": 9.06445598602295, "learning_rate": 1.9890410958904108e-07, "log_odds_chosen": 2.202859878540039, "log_odds_ratio": -0.2630058228969574, "logits/chosen": 0.927925705909729, "logits/rejected": 1.011303186416626, "logps/chosen": -2.9080872535705566, "logps/rejected": -5.043357849121094, "loss": 0.752, "nll_loss": 0.725716233253479, "rewards/accuracies": 0.875, "rewards/chosen": -0.2908087372779846, "rewards/margins": 0.21352703869342804, "rewards/rejected": -0.5043357610702515, "step": 5851 }, { "epoch": 16.021902806297057, "grad_norm": 5.455178737640381, "learning_rate": 1.9876712328767123e-07, "log_odds_chosen": 1.267012119293213, "log_odds_ratio": -0.36966314911842346, "logits/chosen": 0.9287868738174438, "logits/rejected": 0.943842887878418, "logps/chosen": -2.256190776824951, "logps/rejected": -3.453589916229248, "loss": 0.613, "nll_loss": 0.5760583877563477, "rewards/accuracies": 0.875, "rewards/chosen": -0.22561906278133392, "rewards/margins": 0.11973992735147476, "rewards/rejected": -0.3453589975833893, "step": 5852 }, { "epoch": 16.024640657084188, "grad_norm": 5.700216770172119, "learning_rate": 1.9863013698630135e-07, "log_odds_chosen": 2.0668253898620605, "log_odds_ratio": -0.1748111993074417, "logits/chosen": 0.7670760154724121, "logits/rejected": 0.8059651851654053, "logps/chosen": -1.7168574333190918, "logps/rejected": -3.608874797821045, "loss": 0.5517, "nll_loss": 0.5341958999633789, "rewards/accuracies": 1.0, "rewards/chosen": -0.17168575525283813, "rewards/margins": 0.1892017424106598, "rewards/rejected": -0.36088746786117554, "step": 5853 }, { "epoch": 16.027378507871322, "grad_norm": 5.472954273223877, "learning_rate": 1.984931506849315e-07, "log_odds_chosen": 3.3988404273986816, "log_odds_ratio": -0.11924897134304047, "logits/chosen": 0.7086866497993469, "logits/rejected": 0.6755855083465576, "logps/chosen": -1.5764544010162354, "logps/rejected": -4.758153438568115, "loss": 0.5799, "nll_loss": 0.5679454207420349, "rewards/accuracies": 1.0, "rewards/chosen": -0.15764543414115906, "rewards/margins": 0.31816989183425903, "rewards/rejected": -0.4758153557777405, "step": 5854 }, { "epoch": 16.030116358658454, "grad_norm": 6.850875377655029, "learning_rate": 1.9835616438356166e-07, "log_odds_chosen": 1.6279144287109375, "log_odds_ratio": -0.4779306948184967, "logits/chosen": 0.6444201469421387, "logits/rejected": 0.7298930883407593, "logps/chosen": -2.464590549468994, "logps/rejected": -3.9794821739196777, "loss": 0.7245, "nll_loss": 0.6767122149467468, "rewards/accuracies": 0.875, "rewards/chosen": -0.24645906686782837, "rewards/margins": 0.15148915350437164, "rewards/rejected": -0.3979482352733612, "step": 5855 }, { "epoch": 16.032854209445585, "grad_norm": 5.168578624725342, "learning_rate": 1.9821917808219176e-07, "log_odds_chosen": 1.72361421585083, "log_odds_ratio": -0.4606797993183136, "logits/chosen": 0.6678470373153687, "logits/rejected": 0.7357126474380493, "logps/chosen": -1.800119400024414, "logps/rejected": -3.3687660694122314, "loss": 0.6471, "nll_loss": 0.600990891456604, "rewards/accuracies": 0.75, "rewards/chosen": -0.18001195788383484, "rewards/margins": 0.15686467289924622, "rewards/rejected": -0.33687663078308105, "step": 5856 }, { "epoch": 16.035592060232716, "grad_norm": 7.172696113586426, "learning_rate": 1.980821917808219e-07, "log_odds_chosen": 2.3928093910217285, "log_odds_ratio": -0.2543495297431946, "logits/chosen": 0.9757278561592102, "logits/rejected": 1.0095361471176147, "logps/chosen": -3.097720146179199, "logps/rejected": -5.4150214195251465, "loss": 0.6756, "nll_loss": 0.6501765847206116, "rewards/accuracies": 0.875, "rewards/chosen": -0.3097720146179199, "rewards/margins": 0.2317301332950592, "rewards/rejected": -0.5415021777153015, "step": 5857 }, { "epoch": 16.03832991101985, "grad_norm": 6.171201705932617, "learning_rate": 1.9794520547945203e-07, "log_odds_chosen": 2.6521146297454834, "log_odds_ratio": -0.17432087659835815, "logits/chosen": 0.9318585395812988, "logits/rejected": 1.0169668197631836, "logps/chosen": -2.725243330001831, "logps/rejected": -5.298737525939941, "loss": 0.7602, "nll_loss": 0.7427207231521606, "rewards/accuracies": 1.0, "rewards/chosen": -0.272524356842041, "rewards/margins": 0.25734943151474, "rewards/rejected": -0.529873788356781, "step": 5858 }, { "epoch": 16.041067761806982, "grad_norm": 6.881107807159424, "learning_rate": 1.9780821917808219e-07, "log_odds_chosen": 3.819746255874634, "log_odds_ratio": -0.15056125819683075, "logits/chosen": 1.0529999732971191, "logits/rejected": 0.9870838522911072, "logps/chosen": -2.2293474674224854, "logps/rejected": -5.8613786697387695, "loss": 0.7245, "nll_loss": 0.7094857692718506, "rewards/accuracies": 0.875, "rewards/chosen": -0.222934752702713, "rewards/margins": 0.3632030785083771, "rewards/rejected": -0.5861378908157349, "step": 5859 }, { "epoch": 16.043805612594113, "grad_norm": 7.066321849822998, "learning_rate": 1.976712328767123e-07, "log_odds_chosen": 2.4939446449279785, "log_odds_ratio": -0.19477590918540955, "logits/chosen": 0.8159267902374268, "logits/rejected": 0.8481810092926025, "logps/chosen": -2.594998836517334, "logps/rejected": -5.01336145401001, "loss": 0.7596, "nll_loss": 0.7401188015937805, "rewards/accuracies": 1.0, "rewards/chosen": -0.2594999074935913, "rewards/margins": 0.2418362945318222, "rewards/rejected": -0.5013362169265747, "step": 5860 }, { "epoch": 16.046543463381244, "grad_norm": 5.609142303466797, "learning_rate": 1.9753424657534246e-07, "log_odds_chosen": 1.64878511428833, "log_odds_ratio": -0.3373130261898041, "logits/chosen": 0.7013217806816101, "logits/rejected": 0.7665281295776367, "logps/chosen": -2.26753830909729, "logps/rejected": -3.852905035018921, "loss": 0.7265, "nll_loss": 0.6927453279495239, "rewards/accuracies": 0.875, "rewards/chosen": -0.226753830909729, "rewards/margins": 0.15853668749332428, "rewards/rejected": -0.3852905035018921, "step": 5861 }, { "epoch": 16.04928131416838, "grad_norm": 5.306994915008545, "learning_rate": 1.9739726027397262e-07, "log_odds_chosen": 2.3061704635620117, "log_odds_ratio": -0.282865434885025, "logits/chosen": 0.7609583139419556, "logits/rejected": 0.7507808208465576, "logps/chosen": -1.8013560771942139, "logps/rejected": -3.895773410797119, "loss": 0.6556, "nll_loss": 0.6273596286773682, "rewards/accuracies": 1.0, "rewards/chosen": -0.1801356077194214, "rewards/margins": 0.20944175124168396, "rewards/rejected": -0.38957738876342773, "step": 5862 }, { "epoch": 16.05201916495551, "grad_norm": 6.349751949310303, "learning_rate": 1.9726027397260271e-07, "log_odds_chosen": 1.5106227397918701, "log_odds_ratio": -0.3874027132987976, "logits/chosen": 0.8729052543640137, "logits/rejected": 0.8511733412742615, "logps/chosen": -1.785862684249878, "logps/rejected": -3.125798225402832, "loss": 0.7085, "nll_loss": 0.6697783470153809, "rewards/accuracies": 0.75, "rewards/chosen": -0.17858624458312988, "rewards/margins": 0.13399355113506317, "rewards/rejected": -0.31257981061935425, "step": 5863 }, { "epoch": 16.05475701574264, "grad_norm": 7.299656867980957, "learning_rate": 1.9712328767123287e-07, "log_odds_chosen": 3.330418109893799, "log_odds_ratio": -0.30822721123695374, "logits/chosen": 0.9686703085899353, "logits/rejected": 0.9426679611206055, "logps/chosen": -2.699540615081787, "logps/rejected": -5.92544412612915, "loss": 0.8119, "nll_loss": 0.7810755372047424, "rewards/accuracies": 0.875, "rewards/chosen": -0.26995402574539185, "rewards/margins": 0.32259035110473633, "rewards/rejected": -0.5925443768501282, "step": 5864 }, { "epoch": 16.057494866529773, "grad_norm": 6.207226753234863, "learning_rate": 1.96986301369863e-07, "log_odds_chosen": 3.2008330821990967, "log_odds_ratio": -0.1931384801864624, "logits/chosen": 0.8239971399307251, "logits/rejected": 0.7955543994903564, "logps/chosen": -2.1035661697387695, "logps/rejected": -5.163880825042725, "loss": 0.7658, "nll_loss": 0.7465308904647827, "rewards/accuracies": 1.0, "rewards/chosen": -0.21035659313201904, "rewards/margins": 0.3060314655303955, "rewards/rejected": -0.5163880586624146, "step": 5865 }, { "epoch": 16.060232717316907, "grad_norm": 4.91996431350708, "learning_rate": 1.9684931506849314e-07, "log_odds_chosen": 1.2320184707641602, "log_odds_ratio": -0.4991997182369232, "logits/chosen": 0.7299140691757202, "logits/rejected": 0.7375460267066956, "logps/chosen": -2.4049057960510254, "logps/rejected": -3.502877950668335, "loss": 0.6852, "nll_loss": 0.635298490524292, "rewards/accuracies": 0.75, "rewards/chosen": -0.24049058556556702, "rewards/margins": 0.10979720950126648, "rewards/rejected": -0.3502877950668335, "step": 5866 }, { "epoch": 16.06297056810404, "grad_norm": 5.257029056549072, "learning_rate": 1.967123287671233e-07, "log_odds_chosen": 1.744372844696045, "log_odds_ratio": -0.2430250197649002, "logits/chosen": 0.9620130062103271, "logits/rejected": 0.9979966878890991, "logps/chosen": -2.3252944946289062, "logps/rejected": -3.960181713104248, "loss": 0.645, "nll_loss": 0.6206860542297363, "rewards/accuracies": 1.0, "rewards/chosen": -0.23252946138381958, "rewards/margins": 0.1634887307882309, "rewards/rejected": -0.39601820707321167, "step": 5867 }, { "epoch": 16.06570841889117, "grad_norm": 5.416234016418457, "learning_rate": 1.9657534246575342e-07, "log_odds_chosen": 1.963350534439087, "log_odds_ratio": -0.460593581199646, "logits/chosen": 0.9023003578186035, "logits/rejected": 0.9285491704940796, "logps/chosen": -2.2810287475585938, "logps/rejected": -4.087009429931641, "loss": 0.6876, "nll_loss": 0.6415838599205017, "rewards/accuracies": 0.875, "rewards/chosen": -0.2281028926372528, "rewards/margins": 0.18059806525707245, "rewards/rejected": -0.40870094299316406, "step": 5868 }, { "epoch": 16.0684462696783, "grad_norm": 5.47911262512207, "learning_rate": 1.9643835616438357e-07, "log_odds_chosen": 1.5050628185272217, "log_odds_ratio": -0.40606439113616943, "logits/chosen": 0.6112762093544006, "logits/rejected": 0.6444988250732422, "logps/chosen": -2.2165699005126953, "logps/rejected": -3.670620918273926, "loss": 0.7207, "nll_loss": 0.6800733804702759, "rewards/accuracies": 0.75, "rewards/chosen": -0.22165697813034058, "rewards/margins": 0.1454050987958908, "rewards/rejected": -0.3670620918273926, "step": 5869 }, { "epoch": 16.071184120465436, "grad_norm": 7.268156051635742, "learning_rate": 1.9630136986301367e-07, "log_odds_chosen": 3.2447328567504883, "log_odds_ratio": -0.2953837513923645, "logits/chosen": 0.7779722213745117, "logits/rejected": 0.7492712736129761, "logps/chosen": -2.2939743995666504, "logps/rejected": -5.390508651733398, "loss": 0.7176, "nll_loss": 0.6880747675895691, "rewards/accuracies": 0.75, "rewards/chosen": -0.22939743101596832, "rewards/margins": 0.30965346097946167, "rewards/rejected": -0.5390508770942688, "step": 5870 }, { "epoch": 16.073921971252567, "grad_norm": 5.68443489074707, "learning_rate": 1.9616438356164383e-07, "log_odds_chosen": 1.8795506954193115, "log_odds_ratio": -0.3295590281486511, "logits/chosen": 0.9084309339523315, "logits/rejected": 0.9656997919082642, "logps/chosen": -1.9025641679763794, "logps/rejected": -3.635760545730591, "loss": 0.7593, "nll_loss": 0.7263566255569458, "rewards/accuracies": 0.75, "rewards/chosen": -0.19025641679763794, "rewards/margins": 0.17331966757774353, "rewards/rejected": -0.36357608437538147, "step": 5871 }, { "epoch": 16.076659822039698, "grad_norm": 6.223429203033447, "learning_rate": 1.9602739726027395e-07, "log_odds_chosen": 1.4581098556518555, "log_odds_ratio": -0.4496040642261505, "logits/chosen": 0.8546624183654785, "logits/rejected": 0.8087082505226135, "logps/chosen": -1.9428529739379883, "logps/rejected": -3.3050012588500977, "loss": 0.5731, "nll_loss": 0.5281068086624146, "rewards/accuracies": 0.75, "rewards/chosen": -0.1942853182554245, "rewards/margins": 0.13621482253074646, "rewards/rejected": -0.33050012588500977, "step": 5872 }, { "epoch": 16.079397672826833, "grad_norm": 5.310755729675293, "learning_rate": 1.958904109589041e-07, "log_odds_chosen": 3.5042896270751953, "log_odds_ratio": -0.1524384766817093, "logits/chosen": 0.8741472363471985, "logits/rejected": 0.865204930305481, "logps/chosen": -2.169320583343506, "logps/rejected": -5.565426349639893, "loss": 0.6238, "nll_loss": 0.6085444092750549, "rewards/accuracies": 1.0, "rewards/chosen": -0.21693207323551178, "rewards/margins": 0.33961057662963867, "rewards/rejected": -0.5565426349639893, "step": 5873 }, { "epoch": 16.082135523613964, "grad_norm": 5.31308650970459, "learning_rate": 1.9575342465753426e-07, "log_odds_chosen": 3.0666263103485107, "log_odds_ratio": -0.24676594138145447, "logits/chosen": 0.7100463509559631, "logits/rejected": 0.7896207571029663, "logps/chosen": -2.020841121673584, "logps/rejected": -5.003032207489014, "loss": 0.5819, "nll_loss": 0.5571753978729248, "rewards/accuracies": 0.75, "rewards/chosen": -0.20208412408828735, "rewards/margins": 0.29821905493736267, "rewards/rejected": -0.5003032088279724, "step": 5874 }, { "epoch": 16.084873374401095, "grad_norm": 6.41722297668457, "learning_rate": 1.9561643835616438e-07, "log_odds_chosen": 1.424889326095581, "log_odds_ratio": -0.34685027599334717, "logits/chosen": 0.8243582248687744, "logits/rejected": 0.8429746031761169, "logps/chosen": -2.970019817352295, "logps/rejected": -4.311954498291016, "loss": 0.7768, "nll_loss": 0.7421095967292786, "rewards/accuracies": 0.875, "rewards/chosen": -0.2970019578933716, "rewards/margins": 0.13419348001480103, "rewards/rejected": -0.4311954379081726, "step": 5875 }, { "epoch": 16.087611225188226, "grad_norm": 5.214881420135498, "learning_rate": 1.954794520547945e-07, "log_odds_chosen": 2.1694066524505615, "log_odds_ratio": -0.21741658449172974, "logits/chosen": 0.8731133341789246, "logits/rejected": 0.8849258422851562, "logps/chosen": -1.6413874626159668, "logps/rejected": -3.648308753967285, "loss": 0.6386, "nll_loss": 0.6168387532234192, "rewards/accuracies": 1.0, "rewards/chosen": -0.1641387641429901, "rewards/margins": 0.20069211721420288, "rewards/rejected": -0.3648308515548706, "step": 5876 }, { "epoch": 16.09034907597536, "grad_norm": 6.988272666931152, "learning_rate": 1.9534246575342463e-07, "log_odds_chosen": 1.589572548866272, "log_odds_ratio": -0.4905601441860199, "logits/chosen": 1.0232089757919312, "logits/rejected": 1.0124237537384033, "logps/chosen": -3.2052347660064697, "logps/rejected": -4.742245674133301, "loss": 0.7022, "nll_loss": 0.6531753540039062, "rewards/accuracies": 0.625, "rewards/chosen": -0.3205235004425049, "rewards/margins": 0.153701052069664, "rewards/rejected": -0.4742245376110077, "step": 5877 }, { "epoch": 16.093086926762492, "grad_norm": 5.836400985717773, "learning_rate": 1.9520547945205478e-07, "log_odds_chosen": 1.1045095920562744, "log_odds_ratio": -0.3763957619667053, "logits/chosen": 0.9604073166847229, "logits/rejected": 0.9703240394592285, "logps/chosen": -2.747209072113037, "logps/rejected": -3.7713236808776855, "loss": 0.7117, "nll_loss": 0.6740873456001282, "rewards/accuracies": 0.875, "rewards/chosen": -0.2747209370136261, "rewards/margins": 0.10241145640611649, "rewards/rejected": -0.3771323561668396, "step": 5878 }, { "epoch": 16.095824777549623, "grad_norm": 6.907416820526123, "learning_rate": 1.950684931506849e-07, "log_odds_chosen": 2.1793711185455322, "log_odds_ratio": -0.14514601230621338, "logits/chosen": 1.0515990257263184, "logits/rejected": 1.1342179775238037, "logps/chosen": -2.4792675971984863, "logps/rejected": -4.571681022644043, "loss": 0.6743, "nll_loss": 0.6598351001739502, "rewards/accuracies": 1.0, "rewards/chosen": -0.2479267567396164, "rewards/margins": 0.2092413604259491, "rewards/rejected": -0.4571680724620819, "step": 5879 }, { "epoch": 16.098562628336754, "grad_norm": 5.6132097244262695, "learning_rate": 1.9493150684931506e-07, "log_odds_chosen": 2.178516387939453, "log_odds_ratio": -0.2568795382976532, "logits/chosen": 0.8165416717529297, "logits/rejected": 0.8385938405990601, "logps/chosen": -1.8879839181900024, "logps/rejected": -3.8769712448120117, "loss": 0.5177, "nll_loss": 0.4920533001422882, "rewards/accuracies": 0.875, "rewards/chosen": -0.18879838287830353, "rewards/margins": 0.19889873266220093, "rewards/rejected": -0.38769713044166565, "step": 5880 }, { "epoch": 16.10130047912389, "grad_norm": 4.651319980621338, "learning_rate": 1.9479452054794521e-07, "log_odds_chosen": 1.8097476959228516, "log_odds_ratio": -0.2507878839969635, "logits/chosen": 0.9274042844772339, "logits/rejected": 0.9636399745941162, "logps/chosen": -2.3709218502044678, "logps/rejected": -4.0764288902282715, "loss": 0.6359, "nll_loss": 0.6108291745185852, "rewards/accuracies": 0.875, "rewards/chosen": -0.23709219694137573, "rewards/margins": 0.17055071890354156, "rewards/rejected": -0.4076429009437561, "step": 5881 }, { "epoch": 16.10403832991102, "grad_norm": 5.763381004333496, "learning_rate": 1.9465753424657534e-07, "log_odds_chosen": 2.3645262718200684, "log_odds_ratio": -0.20188897848129272, "logits/chosen": 0.8735484480857849, "logits/rejected": 0.7838869690895081, "logps/chosen": -1.4458444118499756, "logps/rejected": -3.6073198318481445, "loss": 0.554, "nll_loss": 0.5338068008422852, "rewards/accuracies": 1.0, "rewards/chosen": -0.14458446204662323, "rewards/margins": 0.2161475419998169, "rewards/rejected": -0.36073198914527893, "step": 5882 }, { "epoch": 16.10677618069815, "grad_norm": 4.689014434814453, "learning_rate": 1.9452054794520547e-07, "log_odds_chosen": 3.053645133972168, "log_odds_ratio": -0.15070746839046478, "logits/chosen": 0.932909369468689, "logits/rejected": 0.9440063834190369, "logps/chosen": -2.3889732360839844, "logps/rejected": -5.3352179527282715, "loss": 0.5939, "nll_loss": 0.5787925720214844, "rewards/accuracies": 1.0, "rewards/chosen": -0.23889732360839844, "rewards/margins": 0.2946244478225708, "rewards/rejected": -0.5335217714309692, "step": 5883 }, { "epoch": 16.109514031485283, "grad_norm": 5.187576770782471, "learning_rate": 1.943835616438356e-07, "log_odds_chosen": 2.6548712253570557, "log_odds_ratio": -0.23657375574111938, "logits/chosen": 0.8235278129577637, "logits/rejected": 0.8487140536308289, "logps/chosen": -2.2669670581817627, "logps/rejected": -4.798120975494385, "loss": 0.614, "nll_loss": 0.5903574824333191, "rewards/accuracies": 0.875, "rewards/chosen": -0.22669672966003418, "rewards/margins": 0.2531154155731201, "rewards/rejected": -0.4798121154308319, "step": 5884 }, { "epoch": 16.112251882272417, "grad_norm": 5.347537517547607, "learning_rate": 1.9424657534246574e-07, "log_odds_chosen": 1.8699662685394287, "log_odds_ratio": -0.24574260413646698, "logits/chosen": 0.9258242249488831, "logits/rejected": 0.9807696342468262, "logps/chosen": -2.2342567443847656, "logps/rejected": -3.9933600425720215, "loss": 0.687, "nll_loss": 0.6623899340629578, "rewards/accuracies": 1.0, "rewards/chosen": -0.22342568635940552, "rewards/margins": 0.1759103238582611, "rewards/rejected": -0.3993360102176666, "step": 5885 }, { "epoch": 16.11498973305955, "grad_norm": 7.251189231872559, "learning_rate": 1.941095890410959e-07, "log_odds_chosen": 2.390810012817383, "log_odds_ratio": -0.22055041790008545, "logits/chosen": 0.9238662123680115, "logits/rejected": 0.9767910838127136, "logps/chosen": -2.235368490219116, "logps/rejected": -4.444533348083496, "loss": 0.6976, "nll_loss": 0.6755287647247314, "rewards/accuracies": 0.875, "rewards/chosen": -0.22353684902191162, "rewards/margins": 0.2209164798259735, "rewards/rejected": -0.44445329904556274, "step": 5886 }, { "epoch": 16.11772758384668, "grad_norm": 7.575551986694336, "learning_rate": 1.9397260273972602e-07, "log_odds_chosen": 2.7332327365875244, "log_odds_ratio": -0.17213067412376404, "logits/chosen": 0.7282684445381165, "logits/rejected": 0.6089907884597778, "logps/chosen": -1.9759776592254639, "logps/rejected": -4.456625461578369, "loss": 0.7702, "nll_loss": 0.7530051469802856, "rewards/accuracies": 0.875, "rewards/chosen": -0.19759777188301086, "rewards/margins": 0.2480648010969162, "rewards/rejected": -0.44566258788108826, "step": 5887 }, { "epoch": 16.12046543463381, "grad_norm": 6.645524024963379, "learning_rate": 1.9383561643835617e-07, "log_odds_chosen": 1.3669869899749756, "log_odds_ratio": -0.384512722492218, "logits/chosen": 0.9198448657989502, "logits/rejected": 0.8749738931655884, "logps/chosen": -1.838921308517456, "logps/rejected": -3.057319164276123, "loss": 0.5287, "nll_loss": 0.4902799725532532, "rewards/accuracies": 0.875, "rewards/chosen": -0.1838921308517456, "rewards/margins": 0.12183979153633118, "rewards/rejected": -0.3057318925857544, "step": 5888 }, { "epoch": 16.123203285420946, "grad_norm": 7.666328430175781, "learning_rate": 1.936986301369863e-07, "log_odds_chosen": 1.8629775047302246, "log_odds_ratio": -0.400991290807724, "logits/chosen": 0.8253704309463501, "logits/rejected": 0.8613898754119873, "logps/chosen": -3.0655341148376465, "logps/rejected": -4.86654806137085, "loss": 0.7802, "nll_loss": 0.7401275634765625, "rewards/accuracies": 0.75, "rewards/chosen": -0.3065534234046936, "rewards/margins": 0.1801014244556427, "rewards/rejected": -0.4866548180580139, "step": 5889 }, { "epoch": 16.125941136208077, "grad_norm": 4.3009538650512695, "learning_rate": 1.9356164383561642e-07, "log_odds_chosen": 1.9946911334991455, "log_odds_ratio": -0.19670343399047852, "logits/chosen": 0.8995292782783508, "logits/rejected": 0.9443333148956299, "logps/chosen": -1.9099528789520264, "logps/rejected": -3.758855104446411, "loss": 0.612, "nll_loss": 0.5923328399658203, "rewards/accuracies": 1.0, "rewards/chosen": -0.19099530577659607, "rewards/margins": 0.18489021062850952, "rewards/rejected": -0.3758855164051056, "step": 5890 }, { "epoch": 16.128678986995208, "grad_norm": 5.349530220031738, "learning_rate": 1.9342465753424655e-07, "log_odds_chosen": 2.538422107696533, "log_odds_ratio": -0.13968366384506226, "logits/chosen": 1.010003924369812, "logits/rejected": 1.1015428304672241, "logps/chosen": -2.2157821655273438, "logps/rejected": -4.556021690368652, "loss": 0.7189, "nll_loss": 0.7049232125282288, "rewards/accuracies": 1.0, "rewards/chosen": -0.2215782105922699, "rewards/margins": 0.23402394354343414, "rewards/rejected": -0.45560216903686523, "step": 5891 }, { "epoch": 16.13141683778234, "grad_norm": 4.854220867156982, "learning_rate": 1.932876712328767e-07, "log_odds_chosen": 2.983736276626587, "log_odds_ratio": -0.14271271228790283, "logits/chosen": 0.8805946111679077, "logits/rejected": 0.9109436273574829, "logps/chosen": -2.3540892601013184, "logps/rejected": -5.202165603637695, "loss": 0.6194, "nll_loss": 0.6051110029220581, "rewards/accuracies": 1.0, "rewards/chosen": -0.2354089319705963, "rewards/margins": 0.28480765223503113, "rewards/rejected": -0.5202165842056274, "step": 5892 }, { "epoch": 16.134154688569474, "grad_norm": 4.770015239715576, "learning_rate": 1.9315068493150685e-07, "log_odds_chosen": 4.239562034606934, "log_odds_ratio": -0.07086558640003204, "logits/chosen": 0.8932744860649109, "logits/rejected": 0.9666619300842285, "logps/chosen": -2.0103511810302734, "logps/rejected": -6.036612510681152, "loss": 0.6068, "nll_loss": 0.5996698141098022, "rewards/accuracies": 1.0, "rewards/chosen": -0.20103512704372406, "rewards/margins": 0.402626097202301, "rewards/rejected": -0.603661298751831, "step": 5893 }, { "epoch": 16.136892539356605, "grad_norm": 5.521723747253418, "learning_rate": 1.9301369863013698e-07, "log_odds_chosen": 1.4242149591445923, "log_odds_ratio": -0.27433377504348755, "logits/chosen": 0.960415244102478, "logits/rejected": 0.9795025587081909, "logps/chosen": -2.412458896636963, "logps/rejected": -3.745217800140381, "loss": 0.6967, "nll_loss": 0.6692229509353638, "rewards/accuracies": 1.0, "rewards/chosen": -0.24124592542648315, "rewards/margins": 0.13327588140964508, "rewards/rejected": -0.37452179193496704, "step": 5894 }, { "epoch": 16.139630390143736, "grad_norm": 6.397805213928223, "learning_rate": 1.9287671232876713e-07, "log_odds_chosen": 2.203066110610962, "log_odds_ratio": -0.16627033054828644, "logits/chosen": 0.7279306650161743, "logits/rejected": 0.7859729528427124, "logps/chosen": -2.3139846324920654, "logps/rejected": -4.377344131469727, "loss": 0.8462, "nll_loss": 0.829556941986084, "rewards/accuracies": 1.0, "rewards/chosen": -0.23139846324920654, "rewards/margins": 0.20633597671985626, "rewards/rejected": -0.4377344250679016, "step": 5895 }, { "epoch": 16.142368240930868, "grad_norm": 5.313666343688965, "learning_rate": 1.9273972602739723e-07, "log_odds_chosen": 2.7590935230255127, "log_odds_ratio": -0.18436479568481445, "logits/chosen": 0.9732276797294617, "logits/rejected": 1.086121678352356, "logps/chosen": -2.0004706382751465, "logps/rejected": -4.597110748291016, "loss": 0.5807, "nll_loss": 0.5622601509094238, "rewards/accuracies": 0.875, "rewards/chosen": -0.2000470757484436, "rewards/margins": 0.25966402888298035, "rewards/rejected": -0.45971107482910156, "step": 5896 }, { "epoch": 16.145106091718002, "grad_norm": 4.965407371520996, "learning_rate": 1.9260273972602738e-07, "log_odds_chosen": 3.4833340644836426, "log_odds_ratio": -0.14822526276111603, "logits/chosen": 0.6281517148017883, "logits/rejected": 0.6022838950157166, "logps/chosen": -1.4447208642959595, "logps/rejected": -4.705828666687012, "loss": 0.6301, "nll_loss": 0.6152859926223755, "rewards/accuracies": 1.0, "rewards/chosen": -0.14447207748889923, "rewards/margins": 0.3261107802391052, "rewards/rejected": -0.47058287262916565, "step": 5897 }, { "epoch": 16.147843942505133, "grad_norm": 6.683852672576904, "learning_rate": 1.924657534246575e-07, "log_odds_chosen": 1.1674598455429077, "log_odds_ratio": -0.5113058090209961, "logits/chosen": 0.6897897720336914, "logits/rejected": 0.6842927932739258, "logps/chosen": -2.9154019355773926, "logps/rejected": -4.056663513183594, "loss": 0.7029, "nll_loss": 0.6518181562423706, "rewards/accuracies": 0.75, "rewards/chosen": -0.2915401756763458, "rewards/margins": 0.11412620544433594, "rewards/rejected": -0.40566638112068176, "step": 5898 }, { "epoch": 16.150581793292265, "grad_norm": 7.248232364654541, "learning_rate": 1.9232876712328766e-07, "log_odds_chosen": 3.1623620986938477, "log_odds_ratio": -0.3699701726436615, "logits/chosen": 0.7697489857673645, "logits/rejected": 0.843295693397522, "logps/chosen": -2.5526208877563477, "logps/rejected": -5.645798206329346, "loss": 0.8381, "nll_loss": 0.8010822534561157, "rewards/accuracies": 0.875, "rewards/chosen": -0.2552620768547058, "rewards/margins": 0.30931776762008667, "rewards/rejected": -0.5645798444747925, "step": 5899 }, { "epoch": 16.1533196440794, "grad_norm": 5.526072978973389, "learning_rate": 1.921917808219178e-07, "log_odds_chosen": 2.9955384731292725, "log_odds_ratio": -0.21589279174804688, "logits/chosen": 0.6608254313468933, "logits/rejected": 0.6587474346160889, "logps/chosen": -2.6462759971618652, "logps/rejected": -5.520404815673828, "loss": 0.7138, "nll_loss": 0.6922441124916077, "rewards/accuracies": 0.875, "rewards/chosen": -0.264627605676651, "rewards/margins": 0.2874128818511963, "rewards/rejected": -0.5520404577255249, "step": 5900 }, { "epoch": 16.15605749486653, "grad_norm": 5.79508638381958, "learning_rate": 1.9205479452054794e-07, "log_odds_chosen": 4.849636554718018, "log_odds_ratio": -0.27711760997772217, "logits/chosen": 1.0411268472671509, "logits/rejected": 1.111255168914795, "logps/chosen": -2.5798988342285156, "logps/rejected": -7.363888263702393, "loss": 0.6642, "nll_loss": 0.636498212814331, "rewards/accuracies": 0.75, "rewards/chosen": -0.25798988342285156, "rewards/margins": 0.47839897871017456, "rewards/rejected": -0.7363888621330261, "step": 5901 }, { "epoch": 16.15879534565366, "grad_norm": 5.699057102203369, "learning_rate": 1.919178082191781e-07, "log_odds_chosen": 1.7462794780731201, "log_odds_ratio": -0.2838156819343567, "logits/chosen": 0.7165305018424988, "logits/rejected": 0.7500247955322266, "logps/chosen": -2.1672964096069336, "logps/rejected": -3.747638702392578, "loss": 0.5951, "nll_loss": 0.566745400428772, "rewards/accuracies": 1.0, "rewards/chosen": -0.21672964096069336, "rewards/margins": 0.15803423523902893, "rewards/rejected": -0.3747639060020447, "step": 5902 }, { "epoch": 16.161533196440793, "grad_norm": 7.976846694946289, "learning_rate": 1.917808219178082e-07, "log_odds_chosen": 1.7121702432632446, "log_odds_ratio": -0.5241336822509766, "logits/chosen": 1.0248624086380005, "logits/rejected": 1.0708603858947754, "logps/chosen": -2.4738988876342773, "logps/rejected": -4.036169052124023, "loss": 0.641, "nll_loss": 0.5886105298995972, "rewards/accuracies": 0.875, "rewards/chosen": -0.24738986790180206, "rewards/margins": 0.1562270224094391, "rewards/rejected": -0.40361690521240234, "step": 5903 }, { "epoch": 16.164271047227928, "grad_norm": 4.522360324859619, "learning_rate": 1.9164383561643834e-07, "log_odds_chosen": 2.1021344661712646, "log_odds_ratio": -0.3353208601474762, "logits/chosen": 0.910042405128479, "logits/rejected": 0.8846069574356079, "logps/chosen": -2.1129748821258545, "logps/rejected": -4.157397270202637, "loss": 0.6504, "nll_loss": 0.6168814301490784, "rewards/accuracies": 0.875, "rewards/chosen": -0.21129749715328217, "rewards/margins": 0.20444224774837494, "rewards/rejected": -0.4157397449016571, "step": 5904 }, { "epoch": 16.16700889801506, "grad_norm": 8.566904067993164, "learning_rate": 1.915068493150685e-07, "log_odds_chosen": 1.5252599716186523, "log_odds_ratio": -0.34028977155685425, "logits/chosen": 1.096720576286316, "logits/rejected": 1.082567811012268, "logps/chosen": -2.1162519454956055, "logps/rejected": -3.5079665184020996, "loss": 0.5677, "nll_loss": 0.5336740612983704, "rewards/accuracies": 0.875, "rewards/chosen": -0.21162518858909607, "rewards/margins": 0.13917145133018494, "rewards/rejected": -0.350796639919281, "step": 5905 }, { "epoch": 16.16974674880219, "grad_norm": 6.172893047332764, "learning_rate": 1.9136986301369862e-07, "log_odds_chosen": 3.0749902725219727, "log_odds_ratio": -0.12003684788942337, "logits/chosen": 0.933596134185791, "logits/rejected": 1.004684329032898, "logps/chosen": -2.51637864112854, "logps/rejected": -5.472308158874512, "loss": 0.6844, "nll_loss": 0.6724308729171753, "rewards/accuracies": 1.0, "rewards/chosen": -0.25163784623146057, "rewards/margins": 0.2955929934978485, "rewards/rejected": -0.5472308397293091, "step": 5906 }, { "epoch": 16.17248459958932, "grad_norm": 7.342236518859863, "learning_rate": 1.9123287671232877e-07, "log_odds_chosen": 1.9958118200302124, "log_odds_ratio": -0.49674543738365173, "logits/chosen": 0.8164710998535156, "logits/rejected": 0.8034189343452454, "logps/chosen": -2.9412343502044678, "logps/rejected": -4.851619243621826, "loss": 0.7457, "nll_loss": 0.6960383653640747, "rewards/accuracies": 0.875, "rewards/chosen": -0.29412341117858887, "rewards/margins": 0.19103853404521942, "rewards/rejected": -0.4851619303226471, "step": 5907 }, { "epoch": 16.175222450376456, "grad_norm": 5.102811813354492, "learning_rate": 1.910958904109589e-07, "log_odds_chosen": 3.0467398166656494, "log_odds_ratio": -0.2236279398202896, "logits/chosen": 0.947136402130127, "logits/rejected": 0.9624632596969604, "logps/chosen": -2.7524254322052, "logps/rejected": -5.711672782897949, "loss": 0.6962, "nll_loss": 0.6738336086273193, "rewards/accuracies": 1.0, "rewards/chosen": -0.27524253726005554, "rewards/margins": 0.2959247827529907, "rewards/rejected": -0.5711672902107239, "step": 5908 }, { "epoch": 16.177960301163587, "grad_norm": 7.032931327819824, "learning_rate": 1.9095890410958905e-07, "log_odds_chosen": 1.9497679471969604, "log_odds_ratio": -0.2671228051185608, "logits/chosen": 1.0753982067108154, "logits/rejected": 1.0586739778518677, "logps/chosen": -2.1812732219696045, "logps/rejected": -4.0153961181640625, "loss": 0.569, "nll_loss": 0.5423309803009033, "rewards/accuracies": 1.0, "rewards/chosen": -0.2181273251771927, "rewards/margins": 0.1834123134613037, "rewards/rejected": -0.4015396535396576, "step": 5909 }, { "epoch": 16.18069815195072, "grad_norm": 5.194143772125244, "learning_rate": 1.9082191780821915e-07, "log_odds_chosen": 2.2143449783325195, "log_odds_ratio": -0.5093944668769836, "logits/chosen": 0.8294777274131775, "logits/rejected": 0.887177050113678, "logps/chosen": -2.423640251159668, "logps/rejected": -4.503483772277832, "loss": 0.7838, "nll_loss": 0.732905387878418, "rewards/accuracies": 0.75, "rewards/chosen": -0.24236401915550232, "rewards/margins": 0.20798435807228088, "rewards/rejected": -0.4503483772277832, "step": 5910 }, { "epoch": 16.18343600273785, "grad_norm": 5.58125114440918, "learning_rate": 1.906849315068493e-07, "log_odds_chosen": 1.3051406145095825, "log_odds_ratio": -0.42691242694854736, "logits/chosen": 0.7484117746353149, "logits/rejected": 0.7028200626373291, "logps/chosen": -2.014457941055298, "logps/rejected": -3.2447457313537598, "loss": 0.6254, "nll_loss": 0.5827347040176392, "rewards/accuracies": 0.875, "rewards/chosen": -0.2014457881450653, "rewards/margins": 0.12302879989147186, "rewards/rejected": -0.32447460293769836, "step": 5911 }, { "epoch": 16.186173853524984, "grad_norm": 4.266139507293701, "learning_rate": 1.9054794520547945e-07, "log_odds_chosen": 2.865125894546509, "log_odds_ratio": -0.20527397096157074, "logits/chosen": 0.843260645866394, "logits/rejected": 0.7885853052139282, "logps/chosen": -1.6019630432128906, "logps/rejected": -4.291633605957031, "loss": 0.5939, "nll_loss": 0.5733908414840698, "rewards/accuracies": 1.0, "rewards/chosen": -0.16019630432128906, "rewards/margins": 0.26896703243255615, "rewards/rejected": -0.4291633665561676, "step": 5912 }, { "epoch": 16.188911704312115, "grad_norm": 7.5722503662109375, "learning_rate": 1.9041095890410958e-07, "log_odds_chosen": 3.030028820037842, "log_odds_ratio": -0.1532016098499298, "logits/chosen": 0.9267570376396179, "logits/rejected": 0.9407850503921509, "logps/chosen": -1.5984227657318115, "logps/rejected": -4.4230852127075195, "loss": 0.6947, "nll_loss": 0.6793549656867981, "rewards/accuracies": 1.0, "rewards/chosen": -0.15984228253364563, "rewards/margins": 0.2824662923812866, "rewards/rejected": -0.44230854511260986, "step": 5913 }, { "epoch": 16.191649555099247, "grad_norm": 4.310144424438477, "learning_rate": 1.9027397260273973e-07, "log_odds_chosen": 2.5521953105926514, "log_odds_ratio": -0.18481391668319702, "logits/chosen": 0.7790318131446838, "logits/rejected": 0.8229494094848633, "logps/chosen": -2.1136553287506104, "logps/rejected": -4.448342323303223, "loss": 0.771, "nll_loss": 0.7525277137756348, "rewards/accuracies": 1.0, "rewards/chosen": -0.21136555075645447, "rewards/margins": 0.2334686815738678, "rewards/rejected": -0.44483423233032227, "step": 5914 }, { "epoch": 16.194387405886378, "grad_norm": 5.379693031311035, "learning_rate": 1.9013698630136986e-07, "log_odds_chosen": 2.39044451713562, "log_odds_ratio": -0.25864624977111816, "logits/chosen": 0.8748313188552856, "logits/rejected": 0.9515944719314575, "logps/chosen": -2.294921875, "logps/rejected": -4.581876277923584, "loss": 0.8002, "nll_loss": 0.7743051052093506, "rewards/accuracies": 0.875, "rewards/chosen": -0.2294921875, "rewards/margins": 0.22869546711444855, "rewards/rejected": -0.45818763971328735, "step": 5915 }, { "epoch": 16.197125256673512, "grad_norm": 5.635643482208252, "learning_rate": 1.8999999999999998e-07, "log_odds_chosen": 2.2409114837646484, "log_odds_ratio": -0.14773337543010712, "logits/chosen": 0.8102002739906311, "logits/rejected": 0.7773935794830322, "logps/chosen": -1.9247299432754517, "logps/rejected": -4.017594337463379, "loss": 0.5521, "nll_loss": 0.5373183488845825, "rewards/accuracies": 1.0, "rewards/chosen": -0.19247299432754517, "rewards/margins": 0.20928643643856049, "rewards/rejected": -0.40175941586494446, "step": 5916 }, { "epoch": 16.199863107460644, "grad_norm": 6.579338550567627, "learning_rate": 1.898630136986301e-07, "log_odds_chosen": 2.545988082885742, "log_odds_ratio": -0.35197752714157104, "logits/chosen": 0.9560522437095642, "logits/rejected": 1.0491653680801392, "logps/chosen": -2.800184726715088, "logps/rejected": -5.263713836669922, "loss": 0.7343, "nll_loss": 0.6991158127784729, "rewards/accuracies": 0.875, "rewards/chosen": -0.28001847863197327, "rewards/margins": 0.2463528960943222, "rewards/rejected": -0.5263713598251343, "step": 5917 }, { "epoch": 16.202600958247775, "grad_norm": 5.8115234375, "learning_rate": 1.8972602739726026e-07, "log_odds_chosen": 2.9070687294006348, "log_odds_ratio": -0.12263940274715424, "logits/chosen": 0.9325830936431885, "logits/rejected": 1.0137428045272827, "logps/chosen": -2.8242712020874023, "logps/rejected": -5.66560697555542, "loss": 0.7035, "nll_loss": 0.691239595413208, "rewards/accuracies": 1.0, "rewards/chosen": -0.2824271321296692, "rewards/margins": 0.28413355350494385, "rewards/rejected": -0.5665607452392578, "step": 5918 }, { "epoch": 16.205338809034906, "grad_norm": 6.076197147369385, "learning_rate": 1.895890410958904e-07, "log_odds_chosen": 2.3038368225097656, "log_odds_ratio": -0.5259263515472412, "logits/chosen": 1.030322551727295, "logits/rejected": 1.0558862686157227, "logps/chosen": -2.6982083320617676, "logps/rejected": -4.928462505340576, "loss": 0.7579, "nll_loss": 0.7052944302558899, "rewards/accuracies": 0.875, "rewards/chosen": -0.26982080936431885, "rewards/margins": 0.22302542626857758, "rewards/rejected": -0.4928462505340576, "step": 5919 }, { "epoch": 16.20807665982204, "grad_norm": 5.357016563415527, "learning_rate": 1.8945205479452054e-07, "log_odds_chosen": 2.0499472618103027, "log_odds_ratio": -0.23419705033302307, "logits/chosen": 0.7660014033317566, "logits/rejected": 0.7538848519325256, "logps/chosen": -1.6611692905426025, "logps/rejected": -3.562462568283081, "loss": 0.6457, "nll_loss": 0.6223166584968567, "rewards/accuracies": 1.0, "rewards/chosen": -0.16611692309379578, "rewards/margins": 0.19012930989265442, "rewards/rejected": -0.3562462627887726, "step": 5920 }, { "epoch": 16.210814510609172, "grad_norm": 4.83119010925293, "learning_rate": 1.893150684931507e-07, "log_odds_chosen": 2.8813040256500244, "log_odds_ratio": -0.17925798892974854, "logits/chosen": 0.8415682315826416, "logits/rejected": 0.8402495980262756, "logps/chosen": -2.1089439392089844, "logps/rejected": -4.874882698059082, "loss": 0.6123, "nll_loss": 0.5943595170974731, "rewards/accuracies": 1.0, "rewards/chosen": -0.2108944058418274, "rewards/margins": 0.2765938639640808, "rewards/rejected": -0.4874882698059082, "step": 5921 }, { "epoch": 16.213552361396303, "grad_norm": 6.827181339263916, "learning_rate": 1.8917808219178081e-07, "log_odds_chosen": 2.428135633468628, "log_odds_ratio": -0.24059997498989105, "logits/chosen": 0.8754572868347168, "logits/rejected": 0.937724232673645, "logps/chosen": -2.5142993927001953, "logps/rejected": -4.812352180480957, "loss": 0.8381, "nll_loss": 0.8139926195144653, "rewards/accuracies": 0.875, "rewards/chosen": -0.251429945230484, "rewards/margins": 0.22980529069900513, "rewards/rejected": -0.48123520612716675, "step": 5922 }, { "epoch": 16.216290212183434, "grad_norm": 5.2743821144104, "learning_rate": 1.8904109589041094e-07, "log_odds_chosen": 1.6605830192565918, "log_odds_ratio": -0.4293057918548584, "logits/chosen": 0.6337794661521912, "logits/rejected": 0.6473053693771362, "logps/chosen": -1.8194698095321655, "logps/rejected": -3.4021482467651367, "loss": 0.626, "nll_loss": 0.583053708076477, "rewards/accuracies": 0.875, "rewards/chosen": -0.18194696307182312, "rewards/margins": 0.1582678258419037, "rewards/rejected": -0.3402147889137268, "step": 5923 }, { "epoch": 16.21902806297057, "grad_norm": 6.37488317489624, "learning_rate": 1.889041095890411e-07, "log_odds_chosen": 1.9271924495697021, "log_odds_ratio": -0.2639116048812866, "logits/chosen": 0.7886781692504883, "logits/rejected": 0.8057955503463745, "logps/chosen": -2.3000645637512207, "logps/rejected": -4.092245101928711, "loss": 0.5731, "nll_loss": 0.5467451810836792, "rewards/accuracies": 1.0, "rewards/chosen": -0.23000645637512207, "rewards/margins": 0.17921805381774902, "rewards/rejected": -0.4092244803905487, "step": 5924 }, { "epoch": 16.2217659137577, "grad_norm": 5.869921684265137, "learning_rate": 1.8876712328767122e-07, "log_odds_chosen": 1.5998203754425049, "log_odds_ratio": -0.32490938901901245, "logits/chosen": 0.825044572353363, "logits/rejected": 0.8271197080612183, "logps/chosen": -1.966374158859253, "logps/rejected": -3.445829391479492, "loss": 0.6004, "nll_loss": 0.567948579788208, "rewards/accuracies": 0.875, "rewards/chosen": -0.19663742184638977, "rewards/margins": 0.14794550836086273, "rewards/rejected": -0.3445829153060913, "step": 5925 }, { "epoch": 16.22450376454483, "grad_norm": 5.354283332824707, "learning_rate": 1.8863013698630137e-07, "log_odds_chosen": 2.32037615776062, "log_odds_ratio": -0.20746715366840363, "logits/chosen": 0.8056290149688721, "logits/rejected": 0.8633745312690735, "logps/chosen": -2.121121406555176, "logps/rejected": -4.323856353759766, "loss": 0.6576, "nll_loss": 0.6368515491485596, "rewards/accuracies": 1.0, "rewards/chosen": -0.212112158536911, "rewards/margins": 0.22027349472045898, "rewards/rejected": -0.4323856830596924, "step": 5926 }, { "epoch": 16.227241615331966, "grad_norm": 4.501871585845947, "learning_rate": 1.884931506849315e-07, "log_odds_chosen": 3.14666748046875, "log_odds_ratio": -0.08778826892375946, "logits/chosen": 0.8169161677360535, "logits/rejected": 0.8640983700752258, "logps/chosen": -2.116499423980713, "logps/rejected": -5.080769062042236, "loss": 0.6623, "nll_loss": 0.6535547971725464, "rewards/accuracies": 1.0, "rewards/chosen": -0.21164995431900024, "rewards/margins": 0.2964269518852234, "rewards/rejected": -0.5080769062042236, "step": 5927 }, { "epoch": 16.229979466119097, "grad_norm": 5.877374649047852, "learning_rate": 1.8835616438356165e-07, "log_odds_chosen": 1.9643070697784424, "log_odds_ratio": -0.27405932545661926, "logits/chosen": 0.49931907653808594, "logits/rejected": 0.5260636210441589, "logps/chosen": -2.8254308700561523, "logps/rejected": -4.673756122589111, "loss": 0.6585, "nll_loss": 0.6311272382736206, "rewards/accuracies": 1.0, "rewards/chosen": -0.2825430929660797, "rewards/margins": 0.18483254313468933, "rewards/rejected": -0.46737563610076904, "step": 5928 }, { "epoch": 16.23271731690623, "grad_norm": 12.02418327331543, "learning_rate": 1.8821917808219177e-07, "log_odds_chosen": 0.25441205501556396, "log_odds_ratio": -0.9626606702804565, "logits/chosen": 1.0140488147735596, "logits/rejected": 1.0380032062530518, "logps/chosen": -3.6326324939727783, "logps/rejected": -3.811415672302246, "loss": 0.8841, "nll_loss": 0.7877959609031677, "rewards/accuracies": 0.75, "rewards/chosen": -0.36326324939727783, "rewards/margins": 0.017878323793411255, "rewards/rejected": -0.3811415433883667, "step": 5929 }, { "epoch": 16.23545516769336, "grad_norm": 5.318253040313721, "learning_rate": 1.880821917808219e-07, "log_odds_chosen": 3.2755208015441895, "log_odds_ratio": -0.17739304900169373, "logits/chosen": 0.8352693319320679, "logits/rejected": 0.875269889831543, "logps/chosen": -2.073176383972168, "logps/rejected": -5.153719902038574, "loss": 0.6102, "nll_loss": 0.5924341082572937, "rewards/accuracies": 1.0, "rewards/chosen": -0.20731765031814575, "rewards/margins": 0.3080543875694275, "rewards/rejected": -0.5153720378875732, "step": 5930 }, { "epoch": 16.238193018480494, "grad_norm": 5.449516296386719, "learning_rate": 1.8794520547945205e-07, "log_odds_chosen": 2.118764877319336, "log_odds_ratio": -0.3949001431465149, "logits/chosen": 0.8158031702041626, "logits/rejected": 0.8346270322799683, "logps/chosen": -3.2158799171447754, "logps/rejected": -5.299923419952393, "loss": 0.866, "nll_loss": 0.8264657855033875, "rewards/accuracies": 0.875, "rewards/chosen": -0.32158800959587097, "rewards/margins": 0.2084043174982071, "rewards/rejected": -0.5299923419952393, "step": 5931 }, { "epoch": 16.240930869267626, "grad_norm": 4.886868476867676, "learning_rate": 1.8780821917808218e-07, "log_odds_chosen": 3.52535343170166, "log_odds_ratio": -0.12630993127822876, "logits/chosen": 0.713544487953186, "logits/rejected": 0.7064975500106812, "logps/chosen": -2.444512367248535, "logps/rejected": -5.844257354736328, "loss": 0.684, "nll_loss": 0.671329915523529, "rewards/accuracies": 1.0, "rewards/chosen": -0.24445123970508575, "rewards/margins": 0.3399744927883148, "rewards/rejected": -0.5844257473945618, "step": 5932 }, { "epoch": 16.243668720054757, "grad_norm": 5.80643367767334, "learning_rate": 1.8767123287671233e-07, "log_odds_chosen": 2.5755105018615723, "log_odds_ratio": -0.2387373447418213, "logits/chosen": 0.7124156951904297, "logits/rejected": 0.80814129114151, "logps/chosen": -2.238839864730835, "logps/rejected": -4.6981377601623535, "loss": 0.5936, "nll_loss": 0.5697010159492493, "rewards/accuracies": 0.875, "rewards/chosen": -0.2238839864730835, "rewards/margins": 0.2459297776222229, "rewards/rejected": -0.4698137938976288, "step": 5933 }, { "epoch": 16.246406570841888, "grad_norm": 5.2847089767456055, "learning_rate": 1.8753424657534245e-07, "log_odds_chosen": 2.6713802814483643, "log_odds_ratio": -0.28265994787216187, "logits/chosen": 0.6471157073974609, "logits/rejected": 0.6865609884262085, "logps/chosen": -2.0571465492248535, "logps/rejected": -4.610167503356934, "loss": 0.7044, "nll_loss": 0.6761748194694519, "rewards/accuracies": 1.0, "rewards/chosen": -0.20571467280387878, "rewards/margins": 0.2553020715713501, "rewards/rejected": -0.4610167443752289, "step": 5934 }, { "epoch": 16.249144421629023, "grad_norm": 5.205422878265381, "learning_rate": 1.873972602739726e-07, "log_odds_chosen": 2.264005184173584, "log_odds_ratio": -0.30263030529022217, "logits/chosen": 0.8394656181335449, "logits/rejected": 0.9034028053283691, "logps/chosen": -2.6560144424438477, "logps/rejected": -4.857503414154053, "loss": 0.7076, "nll_loss": 0.6773820519447327, "rewards/accuracies": 0.875, "rewards/chosen": -0.2656014561653137, "rewards/margins": 0.22014889121055603, "rewards/rejected": -0.48575031757354736, "step": 5935 }, { "epoch": 16.251882272416154, "grad_norm": 6.879476070404053, "learning_rate": 1.8726027397260276e-07, "log_odds_chosen": 1.7667648792266846, "log_odds_ratio": -0.4116593301296234, "logits/chosen": 0.9727579355239868, "logits/rejected": 0.911906898021698, "logps/chosen": -2.0576748847961426, "logps/rejected": -3.665405750274658, "loss": 0.6393, "nll_loss": 0.5981080532073975, "rewards/accuracies": 0.75, "rewards/chosen": -0.20576751232147217, "rewards/margins": 0.16077309846878052, "rewards/rejected": -0.3665406107902527, "step": 5936 }, { "epoch": 16.254620123203285, "grad_norm": 5.76701545715332, "learning_rate": 1.8712328767123286e-07, "log_odds_chosen": 1.4716752767562866, "log_odds_ratio": -0.30462414026260376, "logits/chosen": 0.8960301876068115, "logits/rejected": 0.825803816318512, "logps/chosen": -1.9849010705947876, "logps/rejected": -3.354951858520508, "loss": 0.5638, "nll_loss": 0.5333178043365479, "rewards/accuracies": 0.875, "rewards/chosen": -0.19849011301994324, "rewards/margins": 0.13700509071350098, "rewards/rejected": -0.3354951739311218, "step": 5937 }, { "epoch": 16.257357973990416, "grad_norm": 5.379568099975586, "learning_rate": 1.86986301369863e-07, "log_odds_chosen": 2.57261323928833, "log_odds_ratio": -0.1679391860961914, "logits/chosen": 0.8679989576339722, "logits/rejected": 0.9719524383544922, "logps/chosen": -2.4996962547302246, "logps/rejected": -4.957563400268555, "loss": 0.7131, "nll_loss": 0.6963162422180176, "rewards/accuracies": 1.0, "rewards/chosen": -0.24996964633464813, "rewards/margins": 0.24578672647476196, "rewards/rejected": -0.4957563579082489, "step": 5938 }, { "epoch": 16.26009582477755, "grad_norm": 5.246201515197754, "learning_rate": 1.8684931506849313e-07, "log_odds_chosen": 1.6701476573944092, "log_odds_ratio": -0.4161229431629181, "logits/chosen": 0.740902304649353, "logits/rejected": 0.8392742276191711, "logps/chosen": -2.4948372840881348, "logps/rejected": -4.09172248840332, "loss": 0.6215, "nll_loss": 0.5799030065536499, "rewards/accuracies": 0.75, "rewards/chosen": -0.24948373436927795, "rewards/margins": 0.15968851745128632, "rewards/rejected": -0.4091722369194031, "step": 5939 }, { "epoch": 16.262833675564682, "grad_norm": 4.964813232421875, "learning_rate": 1.8671232876712329e-07, "log_odds_chosen": 3.352186918258667, "log_odds_ratio": -0.18957576155662537, "logits/chosen": 0.638012707233429, "logits/rejected": 0.677473247051239, "logps/chosen": -2.1821367740631104, "logps/rejected": -5.37491512298584, "loss": 0.7585, "nll_loss": 0.7395693063735962, "rewards/accuracies": 1.0, "rewards/chosen": -0.21821367740631104, "rewards/margins": 0.31927788257598877, "rewards/rejected": -0.5374915599822998, "step": 5940 }, { "epoch": 16.265571526351813, "grad_norm": 5.85504674911499, "learning_rate": 1.865753424657534e-07, "log_odds_chosen": 2.384732484817505, "log_odds_ratio": -0.311782568693161, "logits/chosen": 0.7221860289573669, "logits/rejected": 0.7960703372955322, "logps/chosen": -2.5055627822875977, "logps/rejected": -4.716368675231934, "loss": 0.6694, "nll_loss": 0.6382622718811035, "rewards/accuracies": 0.75, "rewards/chosen": -0.2505562901496887, "rewards/margins": 0.22108061611652374, "rewards/rejected": -0.47163689136505127, "step": 5941 }, { "epoch": 16.268309377138944, "grad_norm": 6.3290019035339355, "learning_rate": 1.8643835616438356e-07, "log_odds_chosen": 1.8665879964828491, "log_odds_ratio": -0.28658583760261536, "logits/chosen": 0.6100134253501892, "logits/rejected": 0.6528604030609131, "logps/chosen": -2.2199206352233887, "logps/rejected": -3.9437379837036133, "loss": 0.6928, "nll_loss": 0.6641560792922974, "rewards/accuracies": 0.875, "rewards/chosen": -0.22199207544326782, "rewards/margins": 0.1723816990852356, "rewards/rejected": -0.3943737745285034, "step": 5942 }, { "epoch": 16.27104722792608, "grad_norm": 4.909041404724121, "learning_rate": 1.863013698630137e-07, "log_odds_chosen": 1.185404658317566, "log_odds_ratio": -0.3782380521297455, "logits/chosen": 0.8891597986221313, "logits/rejected": 0.9500014781951904, "logps/chosen": -2.4357733726501465, "logps/rejected": -3.5214405059814453, "loss": 0.6794, "nll_loss": 0.6415829658508301, "rewards/accuracies": 0.875, "rewards/chosen": -0.24357736110687256, "rewards/margins": 0.10856670141220093, "rewards/rejected": -0.3521440625190735, "step": 5943 }, { "epoch": 16.27378507871321, "grad_norm": 7.869137287139893, "learning_rate": 1.8616438356164382e-07, "log_odds_chosen": 0.9936065077781677, "log_odds_ratio": -0.4176984131336212, "logits/chosen": 0.8962819576263428, "logits/rejected": 0.7945391535758972, "logps/chosen": -2.279360294342041, "logps/rejected": -3.1866650581359863, "loss": 0.7216, "nll_loss": 0.6798107624053955, "rewards/accuracies": 0.875, "rewards/chosen": -0.2279360592365265, "rewards/margins": 0.09073047339916229, "rewards/rejected": -0.3186665177345276, "step": 5944 }, { "epoch": 16.27652292950034, "grad_norm": 4.775667190551758, "learning_rate": 1.8602739726027397e-07, "log_odds_chosen": 2.823197841644287, "log_odds_ratio": -0.16258038580417633, "logits/chosen": 0.9118121862411499, "logits/rejected": 0.9084794521331787, "logps/chosen": -1.8588885068893433, "logps/rejected": -4.517263412475586, "loss": 0.5464, "nll_loss": 0.5301419496536255, "rewards/accuracies": 1.0, "rewards/chosen": -0.1858888566493988, "rewards/margins": 0.26583755016326904, "rewards/rejected": -0.45172637701034546, "step": 5945 }, { "epoch": 16.279260780287473, "grad_norm": 5.597485542297363, "learning_rate": 1.858904109589041e-07, "log_odds_chosen": 1.219942569732666, "log_odds_ratio": -0.4028733968734741, "logits/chosen": 0.8863639235496521, "logits/rejected": 0.9500390291213989, "logps/chosen": -2.49784517288208, "logps/rejected": -3.6824705600738525, "loss": 0.6123, "nll_loss": 0.5720313787460327, "rewards/accuracies": 0.75, "rewards/chosen": -0.24978449940681458, "rewards/margins": 0.11846254765987396, "rewards/rejected": -0.36824706196784973, "step": 5946 }, { "epoch": 16.281998631074607, "grad_norm": 5.264597415924072, "learning_rate": 1.8575342465753425e-07, "log_odds_chosen": 3.0707621574401855, "log_odds_ratio": -0.1582152247428894, "logits/chosen": 0.9135141372680664, "logits/rejected": 0.953424870967865, "logps/chosen": -2.00730037689209, "logps/rejected": -4.909583568572998, "loss": 0.6612, "nll_loss": 0.6453832983970642, "rewards/accuracies": 1.0, "rewards/chosen": -0.20073002576828003, "rewards/margins": 0.29022836685180664, "rewards/rejected": -0.49095839262008667, "step": 5947 }, { "epoch": 16.28473648186174, "grad_norm": 5.141600131988525, "learning_rate": 1.8561643835616437e-07, "log_odds_chosen": 2.405045747756958, "log_odds_ratio": -0.1183440089225769, "logits/chosen": 0.9385744333267212, "logits/rejected": 0.9453688263893127, "logps/chosen": -2.1242775917053223, "logps/rejected": -4.3820271492004395, "loss": 0.5761, "nll_loss": 0.5642958283424377, "rewards/accuracies": 1.0, "rewards/chosen": -0.2124277651309967, "rewards/margins": 0.22577497363090515, "rewards/rejected": -0.43820273876190186, "step": 5948 }, { "epoch": 16.28747433264887, "grad_norm": 5.624568462371826, "learning_rate": 1.8547945205479452e-07, "log_odds_chosen": 2.9355664253234863, "log_odds_ratio": -0.21849846839904785, "logits/chosen": 0.9535330533981323, "logits/rejected": 0.9862348437309265, "logps/chosen": -2.0153653621673584, "logps/rejected": -4.8431925773620605, "loss": 0.5494, "nll_loss": 0.5275712013244629, "rewards/accuracies": 1.0, "rewards/chosen": -0.20153653621673584, "rewards/margins": 0.2827827036380768, "rewards/rejected": -0.484319269657135, "step": 5949 }, { "epoch": 16.290212183436005, "grad_norm": 5.696214199066162, "learning_rate": 1.8534246575342465e-07, "log_odds_chosen": 1.6264809370040894, "log_odds_ratio": -0.3339788615703583, "logits/chosen": 0.7933878898620605, "logits/rejected": 0.9006423354148865, "logps/chosen": -2.3082549571990967, "logps/rejected": -3.862248182296753, "loss": 0.7516, "nll_loss": 0.7181792855262756, "rewards/accuracies": 1.0, "rewards/chosen": -0.2308255136013031, "rewards/margins": 0.15539930760860443, "rewards/rejected": -0.38622480630874634, "step": 5950 }, { "epoch": 16.292950034223136, "grad_norm": 4.915828227996826, "learning_rate": 1.8520547945205477e-07, "log_odds_chosen": 3.1108970642089844, "log_odds_ratio": -0.21842972934246063, "logits/chosen": 0.8419867157936096, "logits/rejected": 0.8625842928886414, "logps/chosen": -2.8391218185424805, "logps/rejected": -5.898061275482178, "loss": 0.6057, "nll_loss": 0.5838436484336853, "rewards/accuracies": 0.875, "rewards/chosen": -0.28391218185424805, "rewards/margins": 0.3058939576148987, "rewards/rejected": -0.5898061394691467, "step": 5951 }, { "epoch": 16.295687885010267, "grad_norm": 4.8893818855285645, "learning_rate": 1.8506849315068493e-07, "log_odds_chosen": 2.6200175285339355, "log_odds_ratio": -0.1853030025959015, "logits/chosen": 0.7685340046882629, "logits/rejected": 0.7901996970176697, "logps/chosen": -2.0049476623535156, "logps/rejected": -4.447131633758545, "loss": 0.7214, "nll_loss": 0.7028774619102478, "rewards/accuracies": 1.0, "rewards/chosen": -0.20049476623535156, "rewards/margins": 0.2442183792591095, "rewards/rejected": -0.44471314549446106, "step": 5952 }, { "epoch": 16.298425735797398, "grad_norm": 5.4325714111328125, "learning_rate": 1.8493150684931505e-07, "log_odds_chosen": 2.7179131507873535, "log_odds_ratio": -0.16417652368545532, "logits/chosen": 0.993873119354248, "logits/rejected": 0.9938753843307495, "logps/chosen": -1.8567794561386108, "logps/rejected": -4.349107265472412, "loss": 0.7563, "nll_loss": 0.7399267554283142, "rewards/accuracies": 1.0, "rewards/chosen": -0.18567794561386108, "rewards/margins": 0.24923276901245117, "rewards/rejected": -0.43491074442863464, "step": 5953 }, { "epoch": 16.301163586584533, "grad_norm": 5.651763916015625, "learning_rate": 1.847945205479452e-07, "log_odds_chosen": 1.697688102722168, "log_odds_ratio": -0.22329925000667572, "logits/chosen": 0.7144356369972229, "logits/rejected": 0.7363188862800598, "logps/chosen": -2.6889901161193848, "logps/rejected": -4.287739276885986, "loss": 0.6389, "nll_loss": 0.6165493130683899, "rewards/accuracies": 1.0, "rewards/chosen": -0.26889899373054504, "rewards/margins": 0.15987494587898254, "rewards/rejected": -0.42877396941185, "step": 5954 }, { "epoch": 16.303901437371664, "grad_norm": 5.225244998931885, "learning_rate": 1.8465753424657536e-07, "log_odds_chosen": 2.6279499530792236, "log_odds_ratio": -0.19982045888900757, "logits/chosen": 1.0504529476165771, "logits/rejected": 1.0166707038879395, "logps/chosen": -2.0709025859832764, "logps/rejected": -4.5434184074401855, "loss": 0.5704, "nll_loss": 0.5503798723220825, "rewards/accuracies": 1.0, "rewards/chosen": -0.20709025859832764, "rewards/margins": 0.24725160002708435, "rewards/rejected": -0.4543418884277344, "step": 5955 }, { "epoch": 16.306639288158795, "grad_norm": 7.483120441436768, "learning_rate": 1.8452054794520546e-07, "log_odds_chosen": 2.2627103328704834, "log_odds_ratio": -0.3792993724346161, "logits/chosen": 0.6460717916488647, "logits/rejected": 0.6272541284561157, "logps/chosen": -2.1075098514556885, "logps/rejected": -4.190141677856445, "loss": 0.6122, "nll_loss": 0.5743197798728943, "rewards/accuracies": 0.875, "rewards/chosen": -0.2107509821653366, "rewards/margins": 0.20826315879821777, "rewards/rejected": -0.41901418566703796, "step": 5956 }, { "epoch": 16.309377138945926, "grad_norm": 6.765692234039307, "learning_rate": 1.843835616438356e-07, "log_odds_chosen": 1.2853976488113403, "log_odds_ratio": -0.36303281784057617, "logits/chosen": 0.5932273864746094, "logits/rejected": 0.5830051898956299, "logps/chosen": -2.247310161590576, "logps/rejected": -3.448833465576172, "loss": 0.6707, "nll_loss": 0.6343865394592285, "rewards/accuracies": 1.0, "rewards/chosen": -0.22473101317882538, "rewards/margins": 0.12015235424041748, "rewards/rejected": -0.34488338232040405, "step": 5957 }, { "epoch": 16.31211498973306, "grad_norm": 5.261674880981445, "learning_rate": 1.8424657534246573e-07, "log_odds_chosen": 3.5140812397003174, "log_odds_ratio": -0.0697648823261261, "logits/chosen": 0.9931638240814209, "logits/rejected": 1.0667028427124023, "logps/chosen": -2.164372444152832, "logps/rejected": -5.541152000427246, "loss": 0.6425, "nll_loss": 0.6355534791946411, "rewards/accuracies": 1.0, "rewards/chosen": -0.21643725037574768, "rewards/margins": 0.3376779556274414, "rewards/rejected": -0.5541152358055115, "step": 5958 }, { "epoch": 16.314852840520192, "grad_norm": 8.041875839233398, "learning_rate": 1.8410958904109588e-07, "log_odds_chosen": 2.0239040851593018, "log_odds_ratio": -0.4406985640525818, "logits/chosen": 0.9520217180252075, "logits/rejected": 1.0360581874847412, "logps/chosen": -2.589796304702759, "logps/rejected": -4.493045330047607, "loss": 0.7109, "nll_loss": 0.6668555736541748, "rewards/accuracies": 0.75, "rewards/chosen": -0.2589796483516693, "rewards/margins": 0.19032488763332367, "rewards/rejected": -0.4493045210838318, "step": 5959 }, { "epoch": 16.317590691307323, "grad_norm": 6.4917192459106445, "learning_rate": 1.83972602739726e-07, "log_odds_chosen": 2.3884117603302, "log_odds_ratio": -0.19368070363998413, "logits/chosen": 0.8998980522155762, "logits/rejected": 0.9403956532478333, "logps/chosen": -2.196777105331421, "logps/rejected": -4.443499565124512, "loss": 0.6207, "nll_loss": 0.6013527512550354, "rewards/accuracies": 1.0, "rewards/chosen": -0.21967771649360657, "rewards/margins": 0.22467225790023804, "rewards/rejected": -0.4443499445915222, "step": 5960 }, { "epoch": 16.320328542094455, "grad_norm": 5.026060104370117, "learning_rate": 1.8383561643835616e-07, "log_odds_chosen": 1.961796522140503, "log_odds_ratio": -0.2038661539554596, "logits/chosen": 1.0299787521362305, "logits/rejected": 0.9965351819992065, "logps/chosen": -1.740164041519165, "logps/rejected": -3.510241985321045, "loss": 0.5671, "nll_loss": 0.546707034111023, "rewards/accuracies": 1.0, "rewards/chosen": -0.17401641607284546, "rewards/margins": 0.17700782418251038, "rewards/rejected": -0.35102421045303345, "step": 5961 }, { "epoch": 16.32306639288159, "grad_norm": 5.791778564453125, "learning_rate": 1.8369863013698631e-07, "log_odds_chosen": 3.4267077445983887, "log_odds_ratio": -0.40192270278930664, "logits/chosen": 0.7614402770996094, "logits/rejected": 0.7357454299926758, "logps/chosen": -2.744536876678467, "logps/rejected": -6.0904645919799805, "loss": 0.7504, "nll_loss": 0.7102387547492981, "rewards/accuracies": 0.75, "rewards/chosen": -0.27445369958877563, "rewards/margins": 0.3345927298069, "rewards/rejected": -0.609046459197998, "step": 5962 }, { "epoch": 16.32580424366872, "grad_norm": 8.2645902633667, "learning_rate": 1.8356164383561641e-07, "log_odds_chosen": 2.284827709197998, "log_odds_ratio": -0.6573237776756287, "logits/chosen": 0.6543145179748535, "logits/rejected": 0.6699246168136597, "logps/chosen": -2.9181623458862305, "logps/rejected": -5.138049125671387, "loss": 0.7574, "nll_loss": 0.6916790008544922, "rewards/accuracies": 0.75, "rewards/chosen": -0.29181623458862305, "rewards/margins": 0.22198867797851562, "rewards/rejected": -0.5138049125671387, "step": 5963 }, { "epoch": 16.32854209445585, "grad_norm": 6.421146392822266, "learning_rate": 1.8342465753424657e-07, "log_odds_chosen": 4.8176398277282715, "log_odds_ratio": -0.02270979806780815, "logits/chosen": 0.9710477590560913, "logits/rejected": 1.0383328199386597, "logps/chosen": -2.2478063106536865, "logps/rejected": -6.868957996368408, "loss": 0.7965, "nll_loss": 0.7942544221878052, "rewards/accuracies": 1.0, "rewards/chosen": -0.2247806340456009, "rewards/margins": 0.46211516857147217, "rewards/rejected": -0.6868958473205566, "step": 5964 }, { "epoch": 16.331279945242983, "grad_norm": 9.660493850708008, "learning_rate": 1.832876712328767e-07, "log_odds_chosen": 2.3885462284088135, "log_odds_ratio": -0.6888600587844849, "logits/chosen": 0.9380362629890442, "logits/rejected": 0.9440248012542725, "logps/chosen": -2.9631922245025635, "logps/rejected": -5.271933078765869, "loss": 0.8581, "nll_loss": 0.7892528176307678, "rewards/accuracies": 0.625, "rewards/chosen": -0.29631921648979187, "rewards/margins": 0.23087406158447266, "rewards/rejected": -0.5271933078765869, "step": 5965 }, { "epoch": 16.334017796030118, "grad_norm": 5.477539539337158, "learning_rate": 1.8315068493150684e-07, "log_odds_chosen": 1.8218728303909302, "log_odds_ratio": -0.3126375079154968, "logits/chosen": 0.8708006739616394, "logits/rejected": 0.8338702917098999, "logps/chosen": -1.4562432765960693, "logps/rejected": -3.089810371398926, "loss": 0.5223, "nll_loss": 0.4909934997558594, "rewards/accuracies": 0.875, "rewards/chosen": -0.1456243246793747, "rewards/margins": 0.1633567214012146, "rewards/rejected": -0.3089810609817505, "step": 5966 }, { "epoch": 16.33675564681725, "grad_norm": 6.13831090927124, "learning_rate": 1.8301369863013697e-07, "log_odds_chosen": 2.4909133911132812, "log_odds_ratio": -0.19171816110610962, "logits/chosen": 0.7742590308189392, "logits/rejected": 0.7438808679580688, "logps/chosen": -2.255669593811035, "logps/rejected": -4.6168413162231445, "loss": 0.6495, "nll_loss": 0.6302887797355652, "rewards/accuracies": 1.0, "rewards/chosen": -0.22556695342063904, "rewards/margins": 0.2361171692609787, "rewards/rejected": -0.46168413758277893, "step": 5967 }, { "epoch": 16.33949349760438, "grad_norm": 9.186101913452148, "learning_rate": 1.8287671232876712e-07, "log_odds_chosen": 0.8932406902313232, "log_odds_ratio": -0.4920250475406647, "logits/chosen": 0.8735671043395996, "logits/rejected": 0.7316325902938843, "logps/chosen": -2.708059310913086, "logps/rejected": -3.5811994075775146, "loss": 0.8471, "nll_loss": 0.7978813052177429, "rewards/accuracies": 0.75, "rewards/chosen": -0.2708059251308441, "rewards/margins": 0.08731400221586227, "rewards/rejected": -0.358119934797287, "step": 5968 }, { "epoch": 16.34223134839151, "grad_norm": 7.308210849761963, "learning_rate": 1.8273972602739727e-07, "log_odds_chosen": 2.5425915718078613, "log_odds_ratio": -0.21182209253311157, "logits/chosen": 0.7603594064712524, "logits/rejected": 0.759713888168335, "logps/chosen": -2.025597333908081, "logps/rejected": -4.361456871032715, "loss": 0.6586, "nll_loss": 0.6374566555023193, "rewards/accuracies": 0.875, "rewards/chosen": -0.20255975425243378, "rewards/margins": 0.23358593881130219, "rewards/rejected": -0.4361456632614136, "step": 5969 }, { "epoch": 16.344969199178646, "grad_norm": 6.096683025360107, "learning_rate": 1.8260273972602737e-07, "log_odds_chosen": 1.4677577018737793, "log_odds_ratio": -0.4705091416835785, "logits/chosen": 0.8701028823852539, "logits/rejected": 0.8642314076423645, "logps/chosen": -2.2776055335998535, "logps/rejected": -3.6658480167388916, "loss": 0.6963, "nll_loss": 0.6492111086845398, "rewards/accuracies": 0.75, "rewards/chosen": -0.22776058316230774, "rewards/margins": 0.1388242095708847, "rewards/rejected": -0.36658480763435364, "step": 5970 }, { "epoch": 16.347707049965777, "grad_norm": 4.499016284942627, "learning_rate": 1.8246575342465752e-07, "log_odds_chosen": 2.689685106277466, "log_odds_ratio": -0.15078336000442505, "logits/chosen": 0.799752414226532, "logits/rejected": 0.842525839805603, "logps/chosen": -1.9638862609863281, "logps/rejected": -4.455484867095947, "loss": 0.5918, "nll_loss": 0.576701283454895, "rewards/accuracies": 1.0, "rewards/chosen": -0.1963886320590973, "rewards/margins": 0.24915990233421326, "rewards/rejected": -0.44554853439331055, "step": 5971 }, { "epoch": 16.35044490075291, "grad_norm": 5.439084053039551, "learning_rate": 1.8232876712328765e-07, "log_odds_chosen": 2.4083023071289062, "log_odds_ratio": -0.20516139268875122, "logits/chosen": 0.6389771699905396, "logits/rejected": 0.7245373725891113, "logps/chosen": -2.2595415115356445, "logps/rejected": -4.5298991203308105, "loss": 0.7649, "nll_loss": 0.7444018721580505, "rewards/accuracies": 1.0, "rewards/chosen": -0.22595417499542236, "rewards/margins": 0.22703571617603302, "rewards/rejected": -0.4529898762702942, "step": 5972 }, { "epoch": 16.35318275154004, "grad_norm": 6.034219264984131, "learning_rate": 1.821917808219178e-07, "log_odds_chosen": 2.012298345565796, "log_odds_ratio": -0.4087981581687927, "logits/chosen": 0.5668968558311462, "logits/rejected": 0.6002944707870483, "logps/chosen": -1.8213319778442383, "logps/rejected": -3.743901491165161, "loss": 0.728, "nll_loss": 0.6870785355567932, "rewards/accuracies": 0.75, "rewards/chosen": -0.18213319778442383, "rewards/margins": 0.19225698709487915, "rewards/rejected": -0.3743901550769806, "step": 5973 }, { "epoch": 16.355920602327174, "grad_norm": 5.265753746032715, "learning_rate": 1.8205479452054795e-07, "log_odds_chosen": 2.290365695953369, "log_odds_ratio": -0.22368183732032776, "logits/chosen": 0.9507120847702026, "logits/rejected": 1.0250396728515625, "logps/chosen": -2.1973180770874023, "logps/rejected": -4.3051252365112305, "loss": 0.6192, "nll_loss": 0.5968237519264221, "rewards/accuracies": 0.875, "rewards/chosen": -0.21973182260990143, "rewards/margins": 0.21078069508075714, "rewards/rejected": -0.43051254749298096, "step": 5974 }, { "epoch": 16.358658453114305, "grad_norm": 5.923810958862305, "learning_rate": 1.8191780821917808e-07, "log_odds_chosen": 4.51132345199585, "log_odds_ratio": -0.07139165699481964, "logits/chosen": 1.0439773797988892, "logits/rejected": 1.1109309196472168, "logps/chosen": -2.228982448577881, "logps/rejected": -6.606721878051758, "loss": 0.5734, "nll_loss": 0.5662521123886108, "rewards/accuracies": 1.0, "rewards/chosen": -0.22289825975894928, "rewards/margins": 0.4377739429473877, "rewards/rejected": -0.6606721878051758, "step": 5975 }, { "epoch": 16.361396303901437, "grad_norm": 7.210816383361816, "learning_rate": 1.8178082191780823e-07, "log_odds_chosen": 1.3066223859786987, "log_odds_ratio": -0.44337496161460876, "logits/chosen": 0.8451822400093079, "logits/rejected": 0.8239955902099609, "logps/chosen": -1.6246286630630493, "logps/rejected": -2.7855942249298096, "loss": 0.5996, "nll_loss": 0.5553120374679565, "rewards/accuracies": 0.875, "rewards/chosen": -0.16246287524700165, "rewards/margins": 0.11609655618667603, "rewards/rejected": -0.27855944633483887, "step": 5976 }, { "epoch": 16.36413415468857, "grad_norm": 6.3845415115356445, "learning_rate": 1.8164383561643833e-07, "log_odds_chosen": 2.477703094482422, "log_odds_ratio": -0.23335030674934387, "logits/chosen": 0.8881077766418457, "logits/rejected": 0.8777583241462708, "logps/chosen": -2.081874370574951, "logps/rejected": -4.450480937957764, "loss": 0.7172, "nll_loss": 0.6939119696617126, "rewards/accuracies": 1.0, "rewards/chosen": -0.20818743109703064, "rewards/margins": 0.23686063289642334, "rewards/rejected": -0.44504809379577637, "step": 5977 }, { "epoch": 16.366872005475702, "grad_norm": 4.974800109863281, "learning_rate": 1.8150684931506848e-07, "log_odds_chosen": 1.4997549057006836, "log_odds_ratio": -0.3581460416316986, "logits/chosen": 0.6539337635040283, "logits/rejected": 0.7569233179092407, "logps/chosen": -1.8988986015319824, "logps/rejected": -3.238250970840454, "loss": 0.6958, "nll_loss": 0.6599893569946289, "rewards/accuracies": 0.75, "rewards/chosen": -0.18988987803459167, "rewards/margins": 0.13393521308898926, "rewards/rejected": -0.3238251209259033, "step": 5978 }, { "epoch": 16.369609856262834, "grad_norm": 5.799515247344971, "learning_rate": 1.813698630136986e-07, "log_odds_chosen": 2.0583314895629883, "log_odds_ratio": -0.19152384996414185, "logits/chosen": 0.8510252237319946, "logits/rejected": 0.8358167409896851, "logps/chosen": -1.8226914405822754, "logps/rejected": -3.7113494873046875, "loss": 0.5877, "nll_loss": 0.5685719847679138, "rewards/accuracies": 1.0, "rewards/chosen": -0.1822691559791565, "rewards/margins": 0.1888657808303833, "rewards/rejected": -0.3711349368095398, "step": 5979 }, { "epoch": 16.372347707049965, "grad_norm": 5.592091083526611, "learning_rate": 1.8123287671232876e-07, "log_odds_chosen": 1.6770901679992676, "log_odds_ratio": -0.2667961120605469, "logits/chosen": 0.611771821975708, "logits/rejected": 0.6180036067962646, "logps/chosen": -1.5031654834747314, "logps/rejected": -3.0040292739868164, "loss": 0.519, "nll_loss": 0.49230796098709106, "rewards/accuracies": 1.0, "rewards/chosen": -0.1503165364265442, "rewards/margins": 0.1500863879919052, "rewards/rejected": -0.3004029393196106, "step": 5980 }, { "epoch": 16.3750855578371, "grad_norm": 5.631370544433594, "learning_rate": 1.810958904109589e-07, "log_odds_chosen": 2.2276644706726074, "log_odds_ratio": -0.1569589376449585, "logits/chosen": 0.6383025646209717, "logits/rejected": 0.6092049479484558, "logps/chosen": -2.2650294303894043, "logps/rejected": -4.310466766357422, "loss": 0.7235, "nll_loss": 0.7078099250793457, "rewards/accuracies": 1.0, "rewards/chosen": -0.22650295495986938, "rewards/margins": 0.20454373955726624, "rewards/rejected": -0.4310466945171356, "step": 5981 }, { "epoch": 16.37782340862423, "grad_norm": 5.715804100036621, "learning_rate": 1.8095890410958904e-07, "log_odds_chosen": 2.1159253120422363, "log_odds_ratio": -0.29900282621383667, "logits/chosen": 1.0374873876571655, "logits/rejected": 1.0412838459014893, "logps/chosen": -2.3687872886657715, "logps/rejected": -4.330810546875, "loss": 0.6668, "nll_loss": 0.6369394659996033, "rewards/accuracies": 0.875, "rewards/chosen": -0.23687872290611267, "rewards/margins": 0.1962023377418518, "rewards/rejected": -0.43308109045028687, "step": 5982 }, { "epoch": 16.380561259411362, "grad_norm": 4.777434825897217, "learning_rate": 1.8082191780821916e-07, "log_odds_chosen": 2.304727077484131, "log_odds_ratio": -0.24766719341278076, "logits/chosen": 0.719102144241333, "logits/rejected": 0.7272663712501526, "logps/chosen": -2.283538818359375, "logps/rejected": -4.44185733795166, "loss": 0.6483, "nll_loss": 0.6235090494155884, "rewards/accuracies": 0.875, "rewards/chosen": -0.22835388779640198, "rewards/margins": 0.2158319056034088, "rewards/rejected": -0.4441857933998108, "step": 5983 }, { "epoch": 16.383299110198493, "grad_norm": 6.671356201171875, "learning_rate": 1.806849315068493e-07, "log_odds_chosen": 1.327106237411499, "log_odds_ratio": -0.3691117465496063, "logits/chosen": 0.937828779220581, "logits/rejected": 0.886506199836731, "logps/chosen": -2.266972780227661, "logps/rejected": -3.501091957092285, "loss": 0.7193, "nll_loss": 0.682391881942749, "rewards/accuracies": 0.875, "rewards/chosen": -0.22669729590415955, "rewards/margins": 0.12341190874576569, "rewards/rejected": -0.3501092195510864, "step": 5984 }, { "epoch": 16.386036960985628, "grad_norm": 4.878699779510498, "learning_rate": 1.8054794520547944e-07, "log_odds_chosen": 2.1636006832122803, "log_odds_ratio": -0.19870659708976746, "logits/chosen": 0.6508434414863586, "logits/rejected": 0.6391705274581909, "logps/chosen": -1.8749228715896606, "logps/rejected": -3.8639321327209473, "loss": 0.6972, "nll_loss": 0.6773627400398254, "rewards/accuracies": 0.875, "rewards/chosen": -0.1874922811985016, "rewards/margins": 0.19890093803405762, "rewards/rejected": -0.3863931894302368, "step": 5985 }, { "epoch": 16.38877481177276, "grad_norm": 5.578246593475342, "learning_rate": 1.804109589041096e-07, "log_odds_chosen": 1.9263337850570679, "log_odds_ratio": -0.250876784324646, "logits/chosen": 0.9334924221038818, "logits/rejected": 0.965648353099823, "logps/chosen": -2.3676564693450928, "logps/rejected": -4.141049385070801, "loss": 0.5578, "nll_loss": 0.5327059626579285, "rewards/accuracies": 1.0, "rewards/chosen": -0.23676565289497375, "rewards/margins": 0.17733927071094513, "rewards/rejected": -0.4141049385070801, "step": 5986 }, { "epoch": 16.39151266255989, "grad_norm": 5.070324420928955, "learning_rate": 1.8027397260273972e-07, "log_odds_chosen": 2.3974320888519287, "log_odds_ratio": -0.20130395889282227, "logits/chosen": 0.9494929909706116, "logits/rejected": 1.020946979522705, "logps/chosen": -2.4959511756896973, "logps/rejected": -4.783305644989014, "loss": 0.6332, "nll_loss": 0.6130630970001221, "rewards/accuracies": 0.875, "rewards/chosen": -0.24959510564804077, "rewards/margins": 0.22873547673225403, "rewards/rejected": -0.4783305525779724, "step": 5987 }, { "epoch": 16.39425051334702, "grad_norm": 5.27368688583374, "learning_rate": 1.8013698630136987e-07, "log_odds_chosen": 1.7196669578552246, "log_odds_ratio": -0.2553294897079468, "logits/chosen": 0.7940736413002014, "logits/rejected": 0.7456016540527344, "logps/chosen": -2.4830737113952637, "logps/rejected": -4.134964466094971, "loss": 0.6788, "nll_loss": 0.6532365083694458, "rewards/accuracies": 1.0, "rewards/chosen": -0.24830736219882965, "rewards/margins": 0.16518908739089966, "rewards/rejected": -0.4134964644908905, "step": 5988 }, { "epoch": 16.396988364134156, "grad_norm": 6.010602951049805, "learning_rate": 1.8e-07, "log_odds_chosen": 0.8778798580169678, "log_odds_ratio": -0.4566715359687805, "logits/chosen": 0.9087936878204346, "logits/rejected": 0.8877733945846558, "logps/chosen": -1.8259401321411133, "logps/rejected": -2.6101200580596924, "loss": 0.6037, "nll_loss": 0.5580805540084839, "rewards/accuracies": 0.75, "rewards/chosen": -0.18259400129318237, "rewards/margins": 0.07841800898313522, "rewards/rejected": -0.2610120177268982, "step": 5989 }, { "epoch": 16.399726214921287, "grad_norm": 7.610523700714111, "learning_rate": 1.7986301369863012e-07, "log_odds_chosen": 1.4015793800354004, "log_odds_ratio": -0.33100199699401855, "logits/chosen": 0.6563513875007629, "logits/rejected": 0.5587854981422424, "logps/chosen": -2.9179201126098633, "logps/rejected": -4.207431793212891, "loss": 0.7693, "nll_loss": 0.7362144589424133, "rewards/accuracies": 0.875, "rewards/chosen": -0.29179203510284424, "rewards/margins": 0.12895117700099945, "rewards/rejected": -0.4207432270050049, "step": 5990 }, { "epoch": 16.40246406570842, "grad_norm": 5.762943267822266, "learning_rate": 1.7972602739726025e-07, "log_odds_chosen": 1.5964484214782715, "log_odds_ratio": -0.30068260431289673, "logits/chosen": 0.6340746283531189, "logits/rejected": 0.6866046786308289, "logps/chosen": -2.0575852394104004, "logps/rejected": -3.5435495376586914, "loss": 0.6595, "nll_loss": 0.6293966174125671, "rewards/accuracies": 0.875, "rewards/chosen": -0.20575852692127228, "rewards/margins": 0.14859643578529358, "rewards/rejected": -0.35435497760772705, "step": 5991 }, { "epoch": 16.40520191649555, "grad_norm": 5.505599498748779, "learning_rate": 1.795890410958904e-07, "log_odds_chosen": 2.1432158946990967, "log_odds_ratio": -0.3160431981086731, "logits/chosen": 0.9595429301261902, "logits/rejected": 0.8849931955337524, "logps/chosen": -1.467139482498169, "logps/rejected": -3.4295990467071533, "loss": 0.5817, "nll_loss": 0.5501276850700378, "rewards/accuracies": 0.875, "rewards/chosen": -0.1467139571905136, "rewards/margins": 0.196245938539505, "rewards/rejected": -0.3429599106311798, "step": 5992 }, { "epoch": 16.407939767282684, "grad_norm": 6.038954257965088, "learning_rate": 1.7945205479452055e-07, "log_odds_chosen": 0.8658422827720642, "log_odds_ratio": -0.40394729375839233, "logits/chosen": 0.920849621295929, "logits/rejected": 0.9585127830505371, "logps/chosen": -2.3615520000457764, "logps/rejected": -3.082601547241211, "loss": 0.5928, "nll_loss": 0.5523557066917419, "rewards/accuracies": 0.875, "rewards/chosen": -0.2361551970243454, "rewards/margins": 0.07210495322942734, "rewards/rejected": -0.30826014280319214, "step": 5993 }, { "epoch": 16.410677618069816, "grad_norm": 6.267599105834961, "learning_rate": 1.7931506849315068e-07, "log_odds_chosen": 0.8145966529846191, "log_odds_ratio": -0.46573856472969055, "logits/chosen": 0.537154495716095, "logits/rejected": 0.48210471868515015, "logps/chosen": -2.3374381065368652, "logps/rejected": -3.084042549133301, "loss": 0.701, "nll_loss": 0.6544262170791626, "rewards/accuracies": 0.875, "rewards/chosen": -0.2337438464164734, "rewards/margins": 0.07466043531894684, "rewards/rejected": -0.30840426683425903, "step": 5994 }, { "epoch": 16.413415468856947, "grad_norm": 6.414323806762695, "learning_rate": 1.7917808219178083e-07, "log_odds_chosen": 2.547095775604248, "log_odds_ratio": -0.17677468061447144, "logits/chosen": 0.8415391445159912, "logits/rejected": 0.8669761419296265, "logps/chosen": -2.657198190689087, "logps/rejected": -5.114686965942383, "loss": 0.7593, "nll_loss": 0.7416342496871948, "rewards/accuracies": 1.0, "rewards/chosen": -0.26571983098983765, "rewards/margins": 0.24574890732765198, "rewards/rejected": -0.5114687085151672, "step": 5995 }, { "epoch": 16.416153319644078, "grad_norm": 5.429285049438477, "learning_rate": 1.7904109589041093e-07, "log_odds_chosen": 1.7655130624771118, "log_odds_ratio": -0.29254329204559326, "logits/chosen": 0.8031406402587891, "logits/rejected": 0.8474608063697815, "logps/chosen": -2.4718503952026367, "logps/rejected": -4.1100687980651855, "loss": 0.6713, "nll_loss": 0.6420391201972961, "rewards/accuracies": 1.0, "rewards/chosen": -0.24718505144119263, "rewards/margins": 0.16382186114788055, "rewards/rejected": -0.411006897687912, "step": 5996 }, { "epoch": 16.418891170431213, "grad_norm": 5.703385353088379, "learning_rate": 1.7890410958904108e-07, "log_odds_chosen": 2.1095592975616455, "log_odds_ratio": -0.28555041551589966, "logits/chosen": 0.678607165813446, "logits/rejected": 0.714303731918335, "logps/chosen": -2.0893187522888184, "logps/rejected": -4.100997447967529, "loss": 0.6681, "nll_loss": 0.6395800709724426, "rewards/accuracies": 0.875, "rewards/chosen": -0.20893187820911407, "rewards/margins": 0.20116788148880005, "rewards/rejected": -0.41009974479675293, "step": 5997 }, { "epoch": 16.421629021218344, "grad_norm": 5.0200114250183105, "learning_rate": 1.787671232876712e-07, "log_odds_chosen": 2.0837020874023438, "log_odds_ratio": -0.26592203974723816, "logits/chosen": 1.1383345127105713, "logits/rejected": 1.1056865453720093, "logps/chosen": -1.9800066947937012, "logps/rejected": -3.97137451171875, "loss": 0.701, "nll_loss": 0.674369215965271, "rewards/accuracies": 0.875, "rewards/chosen": -0.19800066947937012, "rewards/margins": 0.19913679361343384, "rewards/rejected": -0.39713746309280396, "step": 5998 }, { "epoch": 16.424366872005475, "grad_norm": 9.800386428833008, "learning_rate": 1.7863013698630136e-07, "log_odds_chosen": 1.7056505680084229, "log_odds_ratio": -0.4876507818698883, "logits/chosen": 1.0560379028320312, "logits/rejected": 1.0998187065124512, "logps/chosen": -2.9936037063598633, "logps/rejected": -4.557016372680664, "loss": 0.674, "nll_loss": 0.6251919269561768, "rewards/accuracies": 0.875, "rewards/chosen": -0.29936033487319946, "rewards/margins": 0.1563413143157959, "rewards/rejected": -0.45570167899131775, "step": 5999 }, { "epoch": 16.427104722792606, "grad_norm": 6.514529705047607, "learning_rate": 1.784931506849315e-07, "log_odds_chosen": 1.8903237581253052, "log_odds_ratio": -0.5005537867546082, "logits/chosen": 0.9520095586776733, "logits/rejected": 1.1336017847061157, "logps/chosen": -2.4896459579467773, "logps/rejected": -4.290443420410156, "loss": 0.6519, "nll_loss": 0.6018917560577393, "rewards/accuracies": 0.625, "rewards/chosen": -0.2489646077156067, "rewards/margins": 0.18007975816726685, "rewards/rejected": -0.42904436588287354, "step": 6000 }, { "epoch": 16.42984257357974, "grad_norm": 5.029346942901611, "learning_rate": 1.7835616438356164e-07, "log_odds_chosen": 1.8529462814331055, "log_odds_ratio": -0.22357003390789032, "logits/chosen": 0.8790393471717834, "logits/rejected": 0.9232879281044006, "logps/chosen": -2.305860757827759, "logps/rejected": -4.02009391784668, "loss": 0.6629, "nll_loss": 0.6405004858970642, "rewards/accuracies": 1.0, "rewards/chosen": -0.23058606684207916, "rewards/margins": 0.1714233160018921, "rewards/rejected": -0.40200936794281006, "step": 6001 }, { "epoch": 16.432580424366872, "grad_norm": 6.233063220977783, "learning_rate": 1.782191780821918e-07, "log_odds_chosen": 4.154053688049316, "log_odds_ratio": -0.11688488721847534, "logits/chosen": 0.7594523429870605, "logits/rejected": 0.7600733637809753, "logps/chosen": -2.4884064197540283, "logps/rejected": -6.4283857345581055, "loss": 0.77, "nll_loss": 0.7583152651786804, "rewards/accuracies": 1.0, "rewards/chosen": -0.24884064495563507, "rewards/margins": 0.3939979374408722, "rewards/rejected": -0.6428385376930237, "step": 6002 }, { "epoch": 16.435318275154003, "grad_norm": 5.269156455993652, "learning_rate": 1.780821917808219e-07, "log_odds_chosen": 2.980034589767456, "log_odds_ratio": -0.3073442280292511, "logits/chosen": 0.7726146578788757, "logits/rejected": 0.7579237818717957, "logps/chosen": -2.66033935546875, "logps/rejected": -5.559424877166748, "loss": 0.7345, "nll_loss": 0.7037883400917053, "rewards/accuracies": 0.875, "rewards/chosen": -0.26603391766548157, "rewards/margins": 0.28990861773490906, "rewards/rejected": -0.5559425354003906, "step": 6003 }, { "epoch": 16.438056125941138, "grad_norm": 5.829180717468262, "learning_rate": 1.7794520547945204e-07, "log_odds_chosen": 1.8732974529266357, "log_odds_ratio": -0.36470866203308105, "logits/chosen": 0.9022883772850037, "logits/rejected": 0.8504023551940918, "logps/chosen": -1.6074069738388062, "logps/rejected": -3.2605786323547363, "loss": 0.5996, "nll_loss": 0.5631153583526611, "rewards/accuracies": 0.875, "rewards/chosen": -0.1607406884431839, "rewards/margins": 0.16531717777252197, "rewards/rejected": -0.3260578513145447, "step": 6004 }, { "epoch": 16.44079397672827, "grad_norm": 5.885805606842041, "learning_rate": 1.778082191780822e-07, "log_odds_chosen": 0.5546338558197021, "log_odds_ratio": -0.6119077801704407, "logits/chosen": 0.8348432183265686, "logits/rejected": 0.8721863031387329, "logps/chosen": -3.0242791175842285, "logps/rejected": -3.5132627487182617, "loss": 0.8184, "nll_loss": 0.7572312355041504, "rewards/accuracies": 0.5, "rewards/chosen": -0.30242791771888733, "rewards/margins": 0.048898376524448395, "rewards/rejected": -0.3513262867927551, "step": 6005 }, { "epoch": 16.4435318275154, "grad_norm": 6.0185546875, "learning_rate": 1.7767123287671232e-07, "log_odds_chosen": 2.0323009490966797, "log_odds_ratio": -0.2531556189060211, "logits/chosen": 0.519408106803894, "logits/rejected": 0.566180944442749, "logps/chosen": -1.7733447551727295, "logps/rejected": -3.6014719009399414, "loss": 0.5946, "nll_loss": 0.5693252682685852, "rewards/accuracies": 1.0, "rewards/chosen": -0.1773344725370407, "rewards/margins": 0.18281269073486328, "rewards/rejected": -0.3601471781730652, "step": 6006 }, { "epoch": 16.44626967830253, "grad_norm": 4.5656256675720215, "learning_rate": 1.7753424657534247e-07, "log_odds_chosen": 2.125251293182373, "log_odds_ratio": -0.1700226217508316, "logits/chosen": 1.105900526046753, "logits/rejected": 1.1707078218460083, "logps/chosen": -2.384000778198242, "logps/rejected": -4.408065319061279, "loss": 0.635, "nll_loss": 0.6179959177970886, "rewards/accuracies": 1.0, "rewards/chosen": -0.23840007185935974, "rewards/margins": 0.20240646600723267, "rewards/rejected": -0.4408065676689148, "step": 6007 }, { "epoch": 16.449007529089666, "grad_norm": 4.979948043823242, "learning_rate": 1.773972602739726e-07, "log_odds_chosen": 2.023949384689331, "log_odds_ratio": -0.2174028754234314, "logits/chosen": 1.013488531112671, "logits/rejected": 1.0376911163330078, "logps/chosen": -2.5282933712005615, "logps/rejected": -4.488320350646973, "loss": 0.6452, "nll_loss": 0.6234960556030273, "rewards/accuracies": 1.0, "rewards/chosen": -0.25282934308052063, "rewards/margins": 0.19600266218185425, "rewards/rejected": -0.44883203506469727, "step": 6008 }, { "epoch": 16.451745379876797, "grad_norm": 6.954064846038818, "learning_rate": 1.7726027397260275e-07, "log_odds_chosen": 2.808716297149658, "log_odds_ratio": -0.14054366946220398, "logits/chosen": 0.837043285369873, "logits/rejected": 0.910000205039978, "logps/chosen": -2.462096691131592, "logps/rejected": -5.09331750869751, "loss": 0.7542, "nll_loss": 0.7401016354560852, "rewards/accuracies": 1.0, "rewards/chosen": -0.24620966613292694, "rewards/margins": 0.2631220817565918, "rewards/rejected": -0.5093317627906799, "step": 6009 }, { "epoch": 16.45448323066393, "grad_norm": 4.970816135406494, "learning_rate": 1.7712328767123285e-07, "log_odds_chosen": 1.4610364437103271, "log_odds_ratio": -0.26474010944366455, "logits/chosen": 0.7597065567970276, "logits/rejected": 0.81967693567276, "logps/chosen": -2.1385092735290527, "logps/rejected": -3.509641647338867, "loss": 0.7021, "nll_loss": 0.6756139993667603, "rewards/accuracies": 1.0, "rewards/chosen": -0.21385091543197632, "rewards/margins": 0.13711324334144592, "rewards/rejected": -0.35096418857574463, "step": 6010 }, { "epoch": 16.45722108145106, "grad_norm": 6.07567834854126, "learning_rate": 1.76986301369863e-07, "log_odds_chosen": 1.9428980350494385, "log_odds_ratio": -0.26987943053245544, "logits/chosen": 0.9062320590019226, "logits/rejected": 0.9556045532226562, "logps/chosen": -2.1589527130126953, "logps/rejected": -4.011781215667725, "loss": 0.6534, "nll_loss": 0.6263880729675293, "rewards/accuracies": 0.875, "rewards/chosen": -0.21589529514312744, "rewards/margins": 0.1852828562259674, "rewards/rejected": -0.40117812156677246, "step": 6011 }, { "epoch": 16.459958932238195, "grad_norm": 6.995300769805908, "learning_rate": 1.7684931506849315e-07, "log_odds_chosen": 2.306894302368164, "log_odds_ratio": -0.2813571095466614, "logits/chosen": 0.9007408618927002, "logits/rejected": 0.9397577047348022, "logps/chosen": -2.1412320137023926, "logps/rejected": -4.326700687408447, "loss": 0.6274, "nll_loss": 0.5992985367774963, "rewards/accuracies": 1.0, "rewards/chosen": -0.2141231894493103, "rewards/margins": 0.21854686737060547, "rewards/rejected": -0.43267008662223816, "step": 6012 }, { "epoch": 16.462696783025326, "grad_norm": 5.849673748016357, "learning_rate": 1.7671232876712328e-07, "log_odds_chosen": 2.298051118850708, "log_odds_ratio": -0.2441912740468979, "logits/chosen": 0.9176408052444458, "logits/rejected": 0.924082338809967, "logps/chosen": -2.508070707321167, "logps/rejected": -4.693802833557129, "loss": 0.6873, "nll_loss": 0.6629183292388916, "rewards/accuracies": 0.875, "rewards/chosen": -0.2508070766925812, "rewards/margins": 0.21857324242591858, "rewards/rejected": -0.46938031911849976, "step": 6013 }, { "epoch": 16.465434633812457, "grad_norm": 6.361352920532227, "learning_rate": 1.7657534246575343e-07, "log_odds_chosen": 4.62247371673584, "log_odds_ratio": -0.13227464258670807, "logits/chosen": 0.8873850703239441, "logits/rejected": 0.9401885867118835, "logps/chosen": -1.9155316352844238, "logps/rejected": -6.356163024902344, "loss": 0.7131, "nll_loss": 0.6999069452285767, "rewards/accuracies": 1.0, "rewards/chosen": -0.19155317544937134, "rewards/margins": 0.4440631866455078, "rewards/rejected": -0.6356163024902344, "step": 6014 }, { "epoch": 16.468172484599588, "grad_norm": 5.6374993324279785, "learning_rate": 1.7643835616438355e-07, "log_odds_chosen": 2.951319694519043, "log_odds_ratio": -0.20946753025054932, "logits/chosen": 0.8099030256271362, "logits/rejected": 0.7789625525474548, "logps/chosen": -1.8417624235153198, "logps/rejected": -4.609637260437012, "loss": 0.6659, "nll_loss": 0.6449874639511108, "rewards/accuracies": 1.0, "rewards/chosen": -0.1841762214899063, "rewards/margins": 0.2767874598503113, "rewards/rejected": -0.46096372604370117, "step": 6015 }, { "epoch": 16.470910335386723, "grad_norm": 5.212337017059326, "learning_rate": 1.763013698630137e-07, "log_odds_chosen": 2.6491081714630127, "log_odds_ratio": -0.13026711344718933, "logits/chosen": 0.9960904121398926, "logits/rejected": 1.0066698789596558, "logps/chosen": -1.6784729957580566, "logps/rejected": -4.076228141784668, "loss": 0.609, "nll_loss": 0.5959553718566895, "rewards/accuracies": 1.0, "rewards/chosen": -0.16784729063510895, "rewards/margins": 0.23977553844451904, "rewards/rejected": -0.4076228439807892, "step": 6016 }, { "epoch": 16.473648186173854, "grad_norm": 5.384225845336914, "learning_rate": 1.761643835616438e-07, "log_odds_chosen": 1.8483701944351196, "log_odds_ratio": -0.2683447599411011, "logits/chosen": 0.8847166299819946, "logits/rejected": 0.9248278141021729, "logps/chosen": -2.031651020050049, "logps/rejected": -3.773144245147705, "loss": 0.5235, "nll_loss": 0.496624231338501, "rewards/accuracies": 1.0, "rewards/chosen": -0.20316511392593384, "rewards/margins": 0.17414936423301697, "rewards/rejected": -0.3773144483566284, "step": 6017 }, { "epoch": 16.476386036960985, "grad_norm": 6.125276565551758, "learning_rate": 1.7602739726027396e-07, "log_odds_chosen": 2.637873888015747, "log_odds_ratio": -0.281335711479187, "logits/chosen": 0.9412389993667603, "logits/rejected": 0.9563875198364258, "logps/chosen": -2.66200852394104, "logps/rejected": -5.210143089294434, "loss": 0.6039, "nll_loss": 0.5757999420166016, "rewards/accuracies": 1.0, "rewards/chosen": -0.26620087027549744, "rewards/margins": 0.25481340289115906, "rewards/rejected": -0.5210142731666565, "step": 6018 }, { "epoch": 16.479123887748116, "grad_norm": 5.1799187660217285, "learning_rate": 1.758904109589041e-07, "log_odds_chosen": 2.8181838989257812, "log_odds_ratio": -0.22206705808639526, "logits/chosen": 0.6485090255737305, "logits/rejected": 0.6185520887374878, "logps/chosen": -1.4156438112258911, "logps/rejected": -4.0103864669799805, "loss": 0.6033, "nll_loss": 0.5811161994934082, "rewards/accuracies": 1.0, "rewards/chosen": -0.14156438410282135, "rewards/margins": 0.2594742774963379, "rewards/rejected": -0.40103867650032043, "step": 6019 }, { "epoch": 16.48186173853525, "grad_norm": 5.323207378387451, "learning_rate": 1.7575342465753424e-07, "log_odds_chosen": 1.4046106338500977, "log_odds_ratio": -0.310302197933197, "logits/chosen": 0.8700982332229614, "logits/rejected": 0.9080028533935547, "logps/chosen": -1.9711759090423584, "logps/rejected": -3.2770285606384277, "loss": 0.6827, "nll_loss": 0.6516544818878174, "rewards/accuracies": 0.875, "rewards/chosen": -0.19711758196353912, "rewards/margins": 0.13058531284332275, "rewards/rejected": -0.3277028799057007, "step": 6020 }, { "epoch": 16.484599589322382, "grad_norm": 5.344717502593994, "learning_rate": 1.756164383561644e-07, "log_odds_chosen": 2.110915422439575, "log_odds_ratio": -0.2995448112487793, "logits/chosen": 0.8651137351989746, "logits/rejected": 0.9258750677108765, "logps/chosen": -2.102506399154663, "logps/rejected": -4.12271785736084, "loss": 0.6431, "nll_loss": 0.6131349802017212, "rewards/accuracies": 0.875, "rewards/chosen": -0.2102506458759308, "rewards/margins": 0.20202115178108215, "rewards/rejected": -0.4122718274593353, "step": 6021 }, { "epoch": 16.487337440109513, "grad_norm": 5.420061111450195, "learning_rate": 1.754794520547945e-07, "log_odds_chosen": 2.3522865772247314, "log_odds_ratio": -0.20551009476184845, "logits/chosen": 0.7529867887496948, "logits/rejected": 0.8006433844566345, "logps/chosen": -2.480266809463501, "logps/rejected": -4.750172138214111, "loss": 0.7142, "nll_loss": 0.6936907172203064, "rewards/accuracies": 1.0, "rewards/chosen": -0.24802666902542114, "rewards/margins": 0.22699052095413208, "rewards/rejected": -0.4750172197818756, "step": 6022 }, { "epoch": 16.490075290896645, "grad_norm": 5.993345260620117, "learning_rate": 1.7534246575342464e-07, "log_odds_chosen": 1.670885443687439, "log_odds_ratio": -0.3212975263595581, "logits/chosen": 0.8262941837310791, "logits/rejected": 0.8331171274185181, "logps/chosen": -2.057957172393799, "logps/rejected": -3.584484577178955, "loss": 0.6811, "nll_loss": 0.6490007638931274, "rewards/accuracies": 0.875, "rewards/chosen": -0.20579572021961212, "rewards/margins": 0.15265274047851562, "rewards/rejected": -0.35844847559928894, "step": 6023 }, { "epoch": 16.49281314168378, "grad_norm": 7.515902042388916, "learning_rate": 1.752054794520548e-07, "log_odds_chosen": 0.8138805627822876, "log_odds_ratio": -0.49079251289367676, "logits/chosen": 0.8479428291320801, "logits/rejected": 0.8085298538208008, "logps/chosen": -2.719186782836914, "logps/rejected": -3.4793972969055176, "loss": 0.8304, "nll_loss": 0.7813348174095154, "rewards/accuracies": 0.625, "rewards/chosen": -0.2719186842441559, "rewards/margins": 0.07602104544639587, "rewards/rejected": -0.34793972969055176, "step": 6024 }, { "epoch": 16.49555099247091, "grad_norm": 13.094797134399414, "learning_rate": 1.7506849315068492e-07, "log_odds_chosen": 0.9056047201156616, "log_odds_ratio": -0.6428908109664917, "logits/chosen": 1.050442099571228, "logits/rejected": 1.000835657119751, "logps/chosen": -3.1445913314819336, "logps/rejected": -4.011979103088379, "loss": 0.7678, "nll_loss": 0.7034934759140015, "rewards/accuracies": 0.75, "rewards/chosen": -0.3144591152667999, "rewards/margins": 0.08673880249261856, "rewards/rejected": -0.4011979401111603, "step": 6025 }, { "epoch": 16.49828884325804, "grad_norm": 6.450070381164551, "learning_rate": 1.7493150684931507e-07, "log_odds_chosen": 4.011574745178223, "log_odds_ratio": -0.16929028928279877, "logits/chosen": 0.8113020658493042, "logits/rejected": 0.9105361700057983, "logps/chosen": -2.643014430999756, "logps/rejected": -6.584640026092529, "loss": 0.8223, "nll_loss": 0.8053296804428101, "rewards/accuracies": 0.875, "rewards/chosen": -0.26430144906044006, "rewards/margins": 0.3941625952720642, "rewards/rejected": -0.6584640145301819, "step": 6026 }, { "epoch": 16.501026694045173, "grad_norm": 5.797945022583008, "learning_rate": 1.747945205479452e-07, "log_odds_chosen": 2.569976806640625, "log_odds_ratio": -0.20937460660934448, "logits/chosen": 0.6174182295799255, "logits/rejected": 0.6162149906158447, "logps/chosen": -1.7743498086929321, "logps/rejected": -4.202912330627441, "loss": 0.7034, "nll_loss": 0.6824368238449097, "rewards/accuracies": 1.0, "rewards/chosen": -0.1774349808692932, "rewards/margins": 0.24285629391670227, "rewards/rejected": -0.4202912449836731, "step": 6027 }, { "epoch": 16.503764544832308, "grad_norm": 5.897658348083496, "learning_rate": 1.7465753424657535e-07, "log_odds_chosen": 3.482192039489746, "log_odds_ratio": -0.1720300316810608, "logits/chosen": 0.8550552725791931, "logits/rejected": 0.9503000974655151, "logps/chosen": -2.5270800590515137, "logps/rejected": -5.90533971786499, "loss": 0.6576, "nll_loss": 0.6403831839561462, "rewards/accuracies": 1.0, "rewards/chosen": -0.2527080178260803, "rewards/margins": 0.3378260135650635, "rewards/rejected": -0.5905340313911438, "step": 6028 }, { "epoch": 16.50650239561944, "grad_norm": 4.92555570602417, "learning_rate": 1.7452054794520547e-07, "log_odds_chosen": 1.773129940032959, "log_odds_ratio": -0.308169424533844, "logits/chosen": 0.7874923348426819, "logits/rejected": 0.7879329919815063, "logps/chosen": -1.9330108165740967, "logps/rejected": -3.5982863903045654, "loss": 0.6139, "nll_loss": 0.5830937623977661, "rewards/accuracies": 0.875, "rewards/chosen": -0.19330109655857086, "rewards/margins": 0.16652755439281464, "rewards/rejected": -0.3598286509513855, "step": 6029 }, { "epoch": 16.50924024640657, "grad_norm": 5.298651218414307, "learning_rate": 1.743835616438356e-07, "log_odds_chosen": 2.4345390796661377, "log_odds_ratio": -0.16142448782920837, "logits/chosen": 0.9380882978439331, "logits/rejected": 0.9965295195579529, "logps/chosen": -1.4464876651763916, "logps/rejected": -3.6445772647857666, "loss": 0.5334, "nll_loss": 0.5172624588012695, "rewards/accuracies": 1.0, "rewards/chosen": -0.14464876055717468, "rewards/margins": 0.21980898082256317, "rewards/rejected": -0.36445772647857666, "step": 6030 }, { "epoch": 16.511978097193705, "grad_norm": 5.982094764709473, "learning_rate": 1.7424657534246575e-07, "log_odds_chosen": 2.278012990951538, "log_odds_ratio": -0.25733256340026855, "logits/chosen": 0.8615303635597229, "logits/rejected": 0.8994498252868652, "logps/chosen": -2.275571584701538, "logps/rejected": -4.412581920623779, "loss": 0.5881, "nll_loss": 0.5623905658721924, "rewards/accuracies": 0.875, "rewards/chosen": -0.22755716741085052, "rewards/margins": 0.2137010246515274, "rewards/rejected": -0.44125819206237793, "step": 6031 }, { "epoch": 16.514715947980836, "grad_norm": 5.61169958114624, "learning_rate": 1.7410958904109587e-07, "log_odds_chosen": 1.9276165962219238, "log_odds_ratio": -0.3338050842285156, "logits/chosen": 0.8998949527740479, "logits/rejected": 0.9484613537788391, "logps/chosen": -1.8158683776855469, "logps/rejected": -3.547466516494751, "loss": 0.5678, "nll_loss": 0.534459114074707, "rewards/accuracies": 0.75, "rewards/chosen": -0.1815868318080902, "rewards/margins": 0.17315980792045593, "rewards/rejected": -0.35474663972854614, "step": 6032 }, { "epoch": 16.517453798767967, "grad_norm": 5.223745346069336, "learning_rate": 1.7397260273972603e-07, "log_odds_chosen": 3.4763174057006836, "log_odds_ratio": -0.07975839078426361, "logits/chosen": 0.8792169094085693, "logits/rejected": 0.9554020166397095, "logps/chosen": -2.7804312705993652, "logps/rejected": -6.1504316329956055, "loss": 0.6452, "nll_loss": 0.6372109055519104, "rewards/accuracies": 1.0, "rewards/chosen": -0.27804315090179443, "rewards/margins": 0.3370000720024109, "rewards/rejected": -0.6150431632995605, "step": 6033 }, { "epoch": 16.520191649555098, "grad_norm": 11.79967975616455, "learning_rate": 1.7383561643835615e-07, "log_odds_chosen": 2.439311981201172, "log_odds_ratio": -0.5199713706970215, "logits/chosen": 0.9254519939422607, "logits/rejected": 0.8794143199920654, "logps/chosen": -2.6440114974975586, "logps/rejected": -4.930239677429199, "loss": 0.7608, "nll_loss": 0.70882648229599, "rewards/accuracies": 0.75, "rewards/chosen": -0.2644011378288269, "rewards/margins": 0.22862283885478973, "rewards/rejected": -0.49302396178245544, "step": 6034 }, { "epoch": 16.522929500342233, "grad_norm": 5.010838508605957, "learning_rate": 1.736986301369863e-07, "log_odds_chosen": 1.4315539598464966, "log_odds_ratio": -0.2724788784980774, "logits/chosen": 0.7408289909362793, "logits/rejected": 0.8803989887237549, "logps/chosen": -2.3405914306640625, "logps/rejected": -3.6663529872894287, "loss": 0.6308, "nll_loss": 0.6035633087158203, "rewards/accuracies": 1.0, "rewards/chosen": -0.2340591549873352, "rewards/margins": 0.13257616758346558, "rewards/rejected": -0.3666353225708008, "step": 6035 }, { "epoch": 16.525667351129364, "grad_norm": 5.5329718589782715, "learning_rate": 1.7356164383561643e-07, "log_odds_chosen": 1.7071452140808105, "log_odds_ratio": -0.24281643331050873, "logits/chosen": 0.7800602912902832, "logits/rejected": 0.7891771793365479, "logps/chosen": -1.9314730167388916, "logps/rejected": -3.4904837608337402, "loss": 0.6357, "nll_loss": 0.6114335060119629, "rewards/accuracies": 1.0, "rewards/chosen": -0.19314731657505035, "rewards/margins": 0.15590104460716248, "rewards/rejected": -0.34904834628105164, "step": 6036 }, { "epoch": 16.528405201916495, "grad_norm": 5.591225624084473, "learning_rate": 1.7342465753424656e-07, "log_odds_chosen": 2.0915136337280273, "log_odds_ratio": -0.25738325715065, "logits/chosen": 1.0702683925628662, "logits/rejected": 1.1352864503860474, "logps/chosen": -2.612544536590576, "logps/rejected": -4.6214494705200195, "loss": 0.6414, "nll_loss": 0.6156793236732483, "rewards/accuracies": 0.75, "rewards/chosen": -0.2612544596195221, "rewards/margins": 0.20089051127433777, "rewards/rejected": -0.46214500069618225, "step": 6037 }, { "epoch": 16.531143052703626, "grad_norm": 6.655810832977295, "learning_rate": 1.732876712328767e-07, "log_odds_chosen": 2.0144433975219727, "log_odds_ratio": -0.21513603627681732, "logits/chosen": 0.8559651374816895, "logits/rejected": 0.781305193901062, "logps/chosen": -2.8032844066619873, "logps/rejected": -4.727874755859375, "loss": 0.8346, "nll_loss": 0.8131095170974731, "rewards/accuracies": 1.0, "rewards/chosen": -0.2803284525871277, "rewards/margins": 0.19245903193950653, "rewards/rejected": -0.472787469625473, "step": 6038 }, { "epoch": 16.53388090349076, "grad_norm": 6.897248268127441, "learning_rate": 1.7315068493150683e-07, "log_odds_chosen": 2.506887435913086, "log_odds_ratio": -0.19909709692001343, "logits/chosen": 0.9492385387420654, "logits/rejected": 0.9752955436706543, "logps/chosen": -1.8171944618225098, "logps/rejected": -4.074284553527832, "loss": 0.6457, "nll_loss": 0.6257833242416382, "rewards/accuracies": 0.875, "rewards/chosen": -0.18171945214271545, "rewards/margins": 0.2257089763879776, "rewards/rejected": -0.40742844343185425, "step": 6039 }, { "epoch": 16.536618754277892, "grad_norm": 5.9419097900390625, "learning_rate": 1.7301369863013699e-07, "log_odds_chosen": 1.9715614318847656, "log_odds_ratio": -0.34982776641845703, "logits/chosen": 0.9824173450469971, "logits/rejected": 0.9828958511352539, "logps/chosen": -2.8875672817230225, "logps/rejected": -4.781719207763672, "loss": 0.6997, "nll_loss": 0.6647544503211975, "rewards/accuracies": 0.875, "rewards/chosen": -0.288756787776947, "rewards/margins": 0.18941518664360046, "rewards/rejected": -0.4781719744205475, "step": 6040 }, { "epoch": 16.539356605065024, "grad_norm": 6.038936138153076, "learning_rate": 1.728767123287671e-07, "log_odds_chosen": 3.25335431098938, "log_odds_ratio": -0.16809064149856567, "logits/chosen": 0.9323869347572327, "logits/rejected": 0.8711985945701599, "logps/chosen": -1.8754210472106934, "logps/rejected": -4.936798572540283, "loss": 0.627, "nll_loss": 0.6101537346839905, "rewards/accuracies": 1.0, "rewards/chosen": -0.18754209578037262, "rewards/margins": 0.30613774061203003, "rewards/rejected": -0.49367982149124146, "step": 6041 }, { "epoch": 16.542094455852155, "grad_norm": 11.336430549621582, "learning_rate": 1.7273972602739726e-07, "log_odds_chosen": 1.3959975242614746, "log_odds_ratio": -0.4650565981864929, "logits/chosen": 0.9199511408805847, "logits/rejected": 0.9499661326408386, "logps/chosen": -2.646420478820801, "logps/rejected": -3.9574689865112305, "loss": 0.7035, "nll_loss": 0.6570003032684326, "rewards/accuracies": 0.75, "rewards/chosen": -0.26464205980300903, "rewards/margins": 0.13110484182834625, "rewards/rejected": -0.3957468867301941, "step": 6042 }, { "epoch": 16.54483230663929, "grad_norm": 5.746023178100586, "learning_rate": 1.7260273972602742e-07, "log_odds_chosen": 2.0229997634887695, "log_odds_ratio": -0.19904646277427673, "logits/chosen": 0.76755690574646, "logits/rejected": 0.7251873016357422, "logps/chosen": -1.5974044799804688, "logps/rejected": -3.37965989112854, "loss": 0.6633, "nll_loss": 0.643355667591095, "rewards/accuracies": 1.0, "rewards/chosen": -0.15974044799804688, "rewards/margins": 0.1782255321741104, "rewards/rejected": -0.3379659950733185, "step": 6043 }, { "epoch": 16.54757015742642, "grad_norm": 5.1458048820495605, "learning_rate": 1.7246575342465751e-07, "log_odds_chosen": 1.5462088584899902, "log_odds_ratio": -0.21515293419361115, "logits/chosen": 0.6339758634567261, "logits/rejected": 0.634209156036377, "logps/chosen": -1.7825344800949097, "logps/rejected": -3.1615169048309326, "loss": 0.6907, "nll_loss": 0.6692054271697998, "rewards/accuracies": 1.0, "rewards/chosen": -0.17825345695018768, "rewards/margins": 0.137898251414299, "rewards/rejected": -0.3161516785621643, "step": 6044 }, { "epoch": 16.550308008213552, "grad_norm": 10.870138168334961, "learning_rate": 1.7232876712328767e-07, "log_odds_chosen": 1.1522421836853027, "log_odds_ratio": -0.5094829201698303, "logits/chosen": 1.045377492904663, "logits/rejected": 1.0339083671569824, "logps/chosen": -2.406808376312256, "logps/rejected": -3.489696979522705, "loss": 0.7578, "nll_loss": 0.7068725824356079, "rewards/accuracies": 0.75, "rewards/chosen": -0.24068081378936768, "rewards/margins": 0.10828886926174164, "rewards/rejected": -0.3489696979522705, "step": 6045 }, { "epoch": 16.553045859000683, "grad_norm": 13.243195533752441, "learning_rate": 1.721917808219178e-07, "log_odds_chosen": 2.4449338912963867, "log_odds_ratio": -0.17931929230690002, "logits/chosen": 0.8715123534202576, "logits/rejected": 0.8786001801490784, "logps/chosen": -2.7594525814056396, "logps/rejected": -5.116100311279297, "loss": 0.6259, "nll_loss": 0.6079404354095459, "rewards/accuracies": 1.0, "rewards/chosen": -0.275945246219635, "rewards/margins": 0.2356647551059723, "rewards/rejected": -0.5116100311279297, "step": 6046 }, { "epoch": 16.555783709787818, "grad_norm": 5.350107192993164, "learning_rate": 1.7205479452054794e-07, "log_odds_chosen": 2.3678972721099854, "log_odds_ratio": -0.1342422366142273, "logits/chosen": 0.9080727100372314, "logits/rejected": 0.9264050722122192, "logps/chosen": -1.7512776851654053, "logps/rejected": -3.902338981628418, "loss": 0.6442, "nll_loss": 0.630753219127655, "rewards/accuracies": 1.0, "rewards/chosen": -0.1751277595758438, "rewards/margins": 0.21510617434978485, "rewards/rejected": -0.39023396372795105, "step": 6047 }, { "epoch": 16.55852156057495, "grad_norm": 6.385751247406006, "learning_rate": 1.7191780821917807e-07, "log_odds_chosen": 1.2901785373687744, "log_odds_ratio": -0.3552817404270172, "logits/chosen": 1.0680489540100098, "logits/rejected": 1.105733036994934, "logps/chosen": -3.0481948852539062, "logps/rejected": -4.271971702575684, "loss": 0.7731, "nll_loss": 0.7376002073287964, "rewards/accuracies": 0.875, "rewards/chosen": -0.3048195242881775, "rewards/margins": 0.1223776638507843, "rewards/rejected": -0.4271971583366394, "step": 6048 }, { "epoch": 16.56125941136208, "grad_norm": 5.720571994781494, "learning_rate": 1.7178082191780822e-07, "log_odds_chosen": 2.351208209991455, "log_odds_ratio": -0.1980576068162918, "logits/chosen": 0.798805832862854, "logits/rejected": 0.8851503133773804, "logps/chosen": -2.3346757888793945, "logps/rejected": -4.577557563781738, "loss": 0.7087, "nll_loss": 0.6888560652732849, "rewards/accuracies": 1.0, "rewards/chosen": -0.23346757888793945, "rewards/margins": 0.22428825497627258, "rewards/rejected": -0.45775583386421204, "step": 6049 }, { "epoch": 16.56399726214921, "grad_norm": 6.873622894287109, "learning_rate": 1.7164383561643835e-07, "log_odds_chosen": 1.7395620346069336, "log_odds_ratio": -0.3046148717403412, "logits/chosen": 0.9299835562705994, "logits/rejected": 0.9921303987503052, "logps/chosen": -3.024049758911133, "logps/rejected": -4.687532424926758, "loss": 0.6206, "nll_loss": 0.5901697874069214, "rewards/accuracies": 0.875, "rewards/chosen": -0.3024049699306488, "rewards/margins": 0.16634830832481384, "rewards/rejected": -0.46875327825546265, "step": 6050 }, { "epoch": 16.566735112936346, "grad_norm": 5.182485103607178, "learning_rate": 1.7150684931506847e-07, "log_odds_chosen": 3.402070999145508, "log_odds_ratio": -0.19450359046459198, "logits/chosen": 0.8487983345985413, "logits/rejected": 0.8896668553352356, "logps/chosen": -2.136204957962036, "logps/rejected": -5.3441009521484375, "loss": 0.6348, "nll_loss": 0.6153032183647156, "rewards/accuracies": 1.0, "rewards/chosen": -0.21362049877643585, "rewards/margins": 0.3207896053791046, "rewards/rejected": -0.5344101190567017, "step": 6051 }, { "epoch": 16.569472963723477, "grad_norm": 5.747364521026611, "learning_rate": 1.7136986301369863e-07, "log_odds_chosen": 1.6941379308700562, "log_odds_ratio": -0.3673563301563263, "logits/chosen": 0.7021728754043579, "logits/rejected": 0.7274467945098877, "logps/chosen": -2.6783933639526367, "logps/rejected": -4.241905689239502, "loss": 0.7653, "nll_loss": 0.7286124229431152, "rewards/accuracies": 0.875, "rewards/chosen": -0.26783937215805054, "rewards/margins": 0.156351238489151, "rewards/rejected": -0.42419061064720154, "step": 6052 }, { "epoch": 16.57221081451061, "grad_norm": 7.821060657501221, "learning_rate": 1.7123287671232875e-07, "log_odds_chosen": 1.224708914756775, "log_odds_ratio": -0.4072844088077545, "logits/chosen": 0.8824666142463684, "logits/rejected": 0.8958032727241516, "logps/chosen": -2.5467848777770996, "logps/rejected": -3.709514617919922, "loss": 0.6755, "nll_loss": 0.6347900629043579, "rewards/accuracies": 0.75, "rewards/chosen": -0.25467851758003235, "rewards/margins": 0.11627297103404999, "rewards/rejected": -0.37095147371292114, "step": 6053 }, { "epoch": 16.57494866529774, "grad_norm": 5.112720489501953, "learning_rate": 1.710958904109589e-07, "log_odds_chosen": 2.884984254837036, "log_odds_ratio": -0.1269330382347107, "logits/chosen": 1.0259472131729126, "logits/rejected": 1.0267341136932373, "logps/chosen": -2.13999605178833, "logps/rejected": -4.846061706542969, "loss": 0.5633, "nll_loss": 0.5506051778793335, "rewards/accuracies": 1.0, "rewards/chosen": -0.21399959921836853, "rewards/margins": 0.2706066071987152, "rewards/rejected": -0.48460620641708374, "step": 6054 }, { "epoch": 16.577686516084874, "grad_norm": 5.026331424713135, "learning_rate": 1.7095890410958905e-07, "log_odds_chosen": 1.8141694068908691, "log_odds_ratio": -0.19579459726810455, "logits/chosen": 0.7009103894233704, "logits/rejected": 0.7432188987731934, "logps/chosen": -2.603339195251465, "logps/rejected": -4.312327861785889, "loss": 0.5833, "nll_loss": 0.5637681484222412, "rewards/accuracies": 1.0, "rewards/chosen": -0.26033392548561096, "rewards/margins": 0.17089886963367462, "rewards/rejected": -0.4312328100204468, "step": 6055 }, { "epoch": 16.580424366872005, "grad_norm": 6.214585781097412, "learning_rate": 1.7082191780821918e-07, "log_odds_chosen": 1.5998046398162842, "log_odds_ratio": -0.28198811411857605, "logits/chosen": 0.9313935041427612, "logits/rejected": 0.9314495325088501, "logps/chosen": -1.8255189657211304, "logps/rejected": -3.276064872741699, "loss": 0.6233, "nll_loss": 0.5951175689697266, "rewards/accuracies": 1.0, "rewards/chosen": -0.18255189061164856, "rewards/margins": 0.14505457878112793, "rewards/rejected": -0.3276064395904541, "step": 6056 }, { "epoch": 16.583162217659137, "grad_norm": 6.926342964172363, "learning_rate": 1.706849315068493e-07, "log_odds_chosen": 1.1588771343231201, "log_odds_ratio": -0.4765051305294037, "logits/chosen": 1.0848500728607178, "logits/rejected": 1.02977454662323, "logps/chosen": -2.350182294845581, "logps/rejected": -3.4111905097961426, "loss": 0.6702, "nll_loss": 0.6225192546844482, "rewards/accuracies": 0.625, "rewards/chosen": -0.23501822352409363, "rewards/margins": 0.10610084980726242, "rewards/rejected": -0.34111911058425903, "step": 6057 }, { "epoch": 16.58590006844627, "grad_norm": 5.034332275390625, "learning_rate": 1.7054794520547943e-07, "log_odds_chosen": 0.940437912940979, "log_odds_ratio": -0.3885380029678345, "logits/chosen": 0.5759336352348328, "logits/rejected": 0.5073661208152771, "logps/chosen": -1.5689518451690674, "logps/rejected": -2.3648898601531982, "loss": 0.6895, "nll_loss": 0.6506603360176086, "rewards/accuracies": 0.875, "rewards/chosen": -0.15689519047737122, "rewards/margins": 0.07959379255771637, "rewards/rejected": -0.23648898303508759, "step": 6058 }, { "epoch": 16.588637919233403, "grad_norm": 6.39376974105835, "learning_rate": 1.7041095890410958e-07, "log_odds_chosen": 2.4760560989379883, "log_odds_ratio": -0.23806561529636383, "logits/chosen": 0.812018871307373, "logits/rejected": 0.8571534156799316, "logps/chosen": -2.3985183238983154, "logps/rejected": -4.74833345413208, "loss": 0.624, "nll_loss": 0.6001756191253662, "rewards/accuracies": 1.0, "rewards/chosen": -0.23985183238983154, "rewards/margins": 0.2349814921617508, "rewards/rejected": -0.4748333692550659, "step": 6059 }, { "epoch": 16.591375770020534, "grad_norm": 5.15045690536499, "learning_rate": 1.702739726027397e-07, "log_odds_chosen": 2.135678291320801, "log_odds_ratio": -0.295604407787323, "logits/chosen": 0.5889530777931213, "logits/rejected": 0.6338523626327515, "logps/chosen": -1.9661972522735596, "logps/rejected": -3.961857318878174, "loss": 0.637, "nll_loss": 0.6074369549751282, "rewards/accuracies": 0.875, "rewards/chosen": -0.19661971926689148, "rewards/margins": 0.19956602156162262, "rewards/rejected": -0.3961857557296753, "step": 6060 }, { "epoch": 16.594113620807665, "grad_norm": 5.247171878814697, "learning_rate": 1.7013698630136986e-07, "log_odds_chosen": 1.1303553581237793, "log_odds_ratio": -0.30633246898651123, "logits/chosen": 0.9234850406646729, "logits/rejected": 0.9352693557739258, "logps/chosen": -2.0752720832824707, "logps/rejected": -3.085620403289795, "loss": 0.6422, "nll_loss": 0.6115559339523315, "rewards/accuracies": 1.0, "rewards/chosen": -0.20752720534801483, "rewards/margins": 0.10103483498096466, "rewards/rejected": -0.3085620403289795, "step": 6061 }, { "epoch": 16.5968514715948, "grad_norm": 5.2035908699035645, "learning_rate": 1.7000000000000001e-07, "log_odds_chosen": 1.694701075553894, "log_odds_ratio": -0.2509680688381195, "logits/chosen": 0.8227680325508118, "logits/rejected": 0.8905365467071533, "logps/chosen": -1.762342095375061, "logps/rejected": -3.3115665912628174, "loss": 0.571, "nll_loss": 0.5458959937095642, "rewards/accuracies": 0.875, "rewards/chosen": -0.17623421549797058, "rewards/margins": 0.1549224555492401, "rewards/rejected": -0.3311566710472107, "step": 6062 }, { "epoch": 16.59958932238193, "grad_norm": 5.815262317657471, "learning_rate": 1.698630136986301e-07, "log_odds_chosen": 1.979050874710083, "log_odds_ratio": -0.201846644282341, "logits/chosen": 1.0763626098632812, "logits/rejected": 1.1669222116470337, "logps/chosen": -2.1206867694854736, "logps/rejected": -3.9863107204437256, "loss": 0.5843, "nll_loss": 0.5640743970870972, "rewards/accuracies": 1.0, "rewards/chosen": -0.21206869184970856, "rewards/margins": 0.18656237423419952, "rewards/rejected": -0.3986310362815857, "step": 6063 }, { "epoch": 16.602327173169062, "grad_norm": 5.821183204650879, "learning_rate": 1.6972602739726026e-07, "log_odds_chosen": 2.9386496543884277, "log_odds_ratio": -0.26388460397720337, "logits/chosen": 0.9601845145225525, "logits/rejected": 0.9577137231826782, "logps/chosen": -2.262960433959961, "logps/rejected": -5.136250019073486, "loss": 0.7111, "nll_loss": 0.6846685409545898, "rewards/accuracies": 1.0, "rewards/chosen": -0.226296067237854, "rewards/margins": 0.28732895851135254, "rewards/rejected": -0.5136250257492065, "step": 6064 }, { "epoch": 16.605065023956193, "grad_norm": 9.54289722442627, "learning_rate": 1.695890410958904e-07, "log_odds_chosen": 1.058544635772705, "log_odds_ratio": -0.7104862332344055, "logits/chosen": 0.9753057956695557, "logits/rejected": 1.1536192893981934, "logps/chosen": -3.3695850372314453, "logps/rejected": -4.37789249420166, "loss": 0.6728, "nll_loss": 0.6017389297485352, "rewards/accuracies": 0.875, "rewards/chosen": -0.33695849776268005, "rewards/margins": 0.10083073377609253, "rewards/rejected": -0.43778926134109497, "step": 6065 }, { "epoch": 16.607802874743328, "grad_norm": 5.294715881347656, "learning_rate": 1.6945205479452054e-07, "log_odds_chosen": 1.6254127025604248, "log_odds_ratio": -0.27217990159988403, "logits/chosen": 0.9091463088989258, "logits/rejected": 0.9558411836624146, "logps/chosen": -2.2534306049346924, "logps/rejected": -3.7826499938964844, "loss": 0.5701, "nll_loss": 0.5428807139396667, "rewards/accuracies": 1.0, "rewards/chosen": -0.22534307837486267, "rewards/margins": 0.15292195975780487, "rewards/rejected": -0.37826502323150635, "step": 6066 }, { "epoch": 16.61054072553046, "grad_norm": 6.467851638793945, "learning_rate": 1.6931506849315067e-07, "log_odds_chosen": 2.8217926025390625, "log_odds_ratio": -0.27832600474357605, "logits/chosen": 0.9994101524353027, "logits/rejected": 0.9954721331596375, "logps/chosen": -2.1328542232513428, "logps/rejected": -4.830392360687256, "loss": 0.6636, "nll_loss": 0.6357580423355103, "rewards/accuracies": 0.75, "rewards/chosen": -0.2132854461669922, "rewards/margins": 0.26975375413894653, "rewards/rejected": -0.4830392003059387, "step": 6067 }, { "epoch": 16.61327857631759, "grad_norm": 5.2540764808654785, "learning_rate": 1.6917808219178082e-07, "log_odds_chosen": 0.9887236952781677, "log_odds_ratio": -0.3624635636806488, "logits/chosen": 0.8454385995864868, "logits/rejected": 0.9024110436439514, "logps/chosen": -1.9124126434326172, "logps/rejected": -2.7982375621795654, "loss": 0.5846, "nll_loss": 0.5483931303024292, "rewards/accuracies": 0.875, "rewards/chosen": -0.19124126434326172, "rewards/margins": 0.08858250826597214, "rewards/rejected": -0.27982378005981445, "step": 6068 }, { "epoch": 16.61601642710472, "grad_norm": 5.17363977432251, "learning_rate": 1.6904109589041097e-07, "log_odds_chosen": 2.974074363708496, "log_odds_ratio": -0.15302182734012604, "logits/chosen": 0.6606854796409607, "logits/rejected": 0.626720666885376, "logps/chosen": -1.6908938884735107, "logps/rejected": -4.407460689544678, "loss": 0.6661, "nll_loss": 0.6507503390312195, "rewards/accuracies": 1.0, "rewards/chosen": -0.1690893918275833, "rewards/margins": 0.27165669202804565, "rewards/rejected": -0.44074609875679016, "step": 6069 }, { "epoch": 16.618754277891856, "grad_norm": 5.276653289794922, "learning_rate": 1.6890410958904107e-07, "log_odds_chosen": 2.5545642375946045, "log_odds_ratio": -0.22700802981853485, "logits/chosen": 0.8426294922828674, "logits/rejected": 0.9242480397224426, "logps/chosen": -2.2936418056488037, "logps/rejected": -4.71971321105957, "loss": 0.7521, "nll_loss": 0.7294224500656128, "rewards/accuracies": 0.875, "rewards/chosen": -0.22936420142650604, "rewards/margins": 0.24260714650154114, "rewards/rejected": -0.471971333026886, "step": 6070 }, { "epoch": 16.621492128678987, "grad_norm": 5.211371421813965, "learning_rate": 1.6876712328767122e-07, "log_odds_chosen": 3.3062844276428223, "log_odds_ratio": -0.14194782078266144, "logits/chosen": 0.9703022837638855, "logits/rejected": 1.009859323501587, "logps/chosen": -2.412583827972412, "logps/rejected": -5.595005512237549, "loss": 0.6251, "nll_loss": 0.6109225153923035, "rewards/accuracies": 1.0, "rewards/chosen": -0.2412583827972412, "rewards/margins": 0.3182421326637268, "rewards/rejected": -0.5595005750656128, "step": 6071 }, { "epoch": 16.62422997946612, "grad_norm": 6.160383224487305, "learning_rate": 1.6863013698630135e-07, "log_odds_chosen": 2.8466248512268066, "log_odds_ratio": -0.29212087392807007, "logits/chosen": 0.896214485168457, "logits/rejected": 1.0227924585342407, "logps/chosen": -2.6817572116851807, "logps/rejected": -5.461573600769043, "loss": 0.7311, "nll_loss": 0.7019319534301758, "rewards/accuracies": 0.875, "rewards/chosen": -0.26817572116851807, "rewards/margins": 0.27798163890838623, "rewards/rejected": -0.5461573600769043, "step": 6072 }, { "epoch": 16.62696783025325, "grad_norm": 6.144087791442871, "learning_rate": 1.684931506849315e-07, "log_odds_chosen": 1.9093315601348877, "log_odds_ratio": -0.18719953298568726, "logits/chosen": 0.7599009871482849, "logits/rejected": 0.8208625316619873, "logps/chosen": -2.316715955734253, "logps/rejected": -4.066954135894775, "loss": 0.5611, "nll_loss": 0.5423757433891296, "rewards/accuracies": 1.0, "rewards/chosen": -0.23167160153388977, "rewards/margins": 0.17502379417419434, "rewards/rejected": -0.4066953957080841, "step": 6073 }, { "epoch": 16.629705681040384, "grad_norm": 4.8990068435668945, "learning_rate": 1.6835616438356165e-07, "log_odds_chosen": 2.543320655822754, "log_odds_ratio": -0.19772878289222717, "logits/chosen": 0.8641835451126099, "logits/rejected": 0.9259535074234009, "logps/chosen": -1.6872479915618896, "logps/rejected": -4.05204439163208, "loss": 0.6081, "nll_loss": 0.5883418321609497, "rewards/accuracies": 1.0, "rewards/chosen": -0.16872479021549225, "rewards/margins": 0.23647965490818024, "rewards/rejected": -0.4052044153213501, "step": 6074 }, { "epoch": 16.632443531827516, "grad_norm": 8.176973342895508, "learning_rate": 1.6821917808219178e-07, "log_odds_chosen": 1.471441626548767, "log_odds_ratio": -0.523795485496521, "logits/chosen": 0.7713772058486938, "logits/rejected": 0.7362858653068542, "logps/chosen": -2.356440544128418, "logps/rejected": -3.711097240447998, "loss": 0.7732, "nll_loss": 0.7208451628684998, "rewards/accuracies": 0.875, "rewards/chosen": -0.23564404249191284, "rewards/margins": 0.13546571135520935, "rewards/rejected": -0.3711097240447998, "step": 6075 }, { "epoch": 16.635181382614647, "grad_norm": 4.791026592254639, "learning_rate": 1.6808219178082193e-07, "log_odds_chosen": 3.2966668605804443, "log_odds_ratio": -0.12485833466053009, "logits/chosen": 0.9674805402755737, "logits/rejected": 1.0312108993530273, "logps/chosen": -2.088280200958252, "logps/rejected": -5.242218971252441, "loss": 0.65, "nll_loss": 0.6375178694725037, "rewards/accuracies": 1.0, "rewards/chosen": -0.20882800221443176, "rewards/margins": 0.3153938949108124, "rewards/rejected": -0.5242218971252441, "step": 6076 }, { "epoch": 16.637919233401778, "grad_norm": 6.370687007904053, "learning_rate": 1.6794520547945203e-07, "log_odds_chosen": 2.883742332458496, "log_odds_ratio": -0.32358434796333313, "logits/chosen": 0.6290240287780762, "logits/rejected": 0.7263976335525513, "logps/chosen": -1.7673466205596924, "logps/rejected": -4.470180511474609, "loss": 0.6088, "nll_loss": 0.57640540599823, "rewards/accuracies": 0.875, "rewards/chosen": -0.17673464119434357, "rewards/margins": 0.27028340101242065, "rewards/rejected": -0.4470180571079254, "step": 6077 }, { "epoch": 16.640657084188913, "grad_norm": 5.056363582611084, "learning_rate": 1.6780821917808218e-07, "log_odds_chosen": 1.843839406967163, "log_odds_ratio": -0.22485996782779694, "logits/chosen": 0.8410528898239136, "logits/rejected": 0.879401445388794, "logps/chosen": -2.0817179679870605, "logps/rejected": -3.8041086196899414, "loss": 0.6682, "nll_loss": 0.6457010507583618, "rewards/accuracies": 1.0, "rewards/chosen": -0.2081717848777771, "rewards/margins": 0.17223909497261047, "rewards/rejected": -0.3804108500480652, "step": 6078 }, { "epoch": 16.643394934976044, "grad_norm": 7.497332572937012, "learning_rate": 1.676712328767123e-07, "log_odds_chosen": 1.2290970087051392, "log_odds_ratio": -1.0533881187438965, "logits/chosen": 0.7497706413269043, "logits/rejected": 0.9077169895172119, "logps/chosen": -3.16337251663208, "logps/rejected": -4.354118824005127, "loss": 0.7704, "nll_loss": 0.6650900840759277, "rewards/accuracies": 0.625, "rewards/chosen": -0.3163372874259949, "rewards/margins": 0.11907462030649185, "rewards/rejected": -0.43541187047958374, "step": 6079 }, { "epoch": 16.646132785763175, "grad_norm": 5.233701229095459, "learning_rate": 1.6753424657534246e-07, "log_odds_chosen": 2.234107732772827, "log_odds_ratio": -0.1981154978275299, "logits/chosen": 1.0337492227554321, "logits/rejected": 1.0088081359863281, "logps/chosen": -1.9646599292755127, "logps/rejected": -4.061028957366943, "loss": 0.5946, "nll_loss": 0.5747523307800293, "rewards/accuracies": 1.0, "rewards/chosen": -0.19646596908569336, "rewards/margins": 0.20963691174983978, "rewards/rejected": -0.4061029255390167, "step": 6080 }, { "epoch": 16.648870636550306, "grad_norm": 4.54211950302124, "learning_rate": 1.673972602739726e-07, "log_odds_chosen": 2.951495885848999, "log_odds_ratio": -0.16726741194725037, "logits/chosen": 0.8752793073654175, "logits/rejected": 0.9401112794876099, "logps/chosen": -2.470651388168335, "logps/rejected": -5.31807804107666, "loss": 0.6847, "nll_loss": 0.6679662466049194, "rewards/accuracies": 1.0, "rewards/chosen": -0.24706515669822693, "rewards/margins": 0.2847425937652588, "rewards/rejected": -0.5318077802658081, "step": 6081 }, { "epoch": 16.65160848733744, "grad_norm": 4.820968151092529, "learning_rate": 1.6726027397260274e-07, "log_odds_chosen": 1.55598783493042, "log_odds_ratio": -0.32264482975006104, "logits/chosen": 0.763322114944458, "logits/rejected": 0.7870405316352844, "logps/chosen": -2.0797805786132812, "logps/rejected": -3.534818172454834, "loss": 0.7191, "nll_loss": 0.6868525743484497, "rewards/accuracies": 0.875, "rewards/chosen": -0.20797806978225708, "rewards/margins": 0.14550372958183289, "rewards/rejected": -0.35348182916641235, "step": 6082 }, { "epoch": 16.654346338124572, "grad_norm": 4.845243453979492, "learning_rate": 1.671232876712329e-07, "log_odds_chosen": 3.7064530849456787, "log_odds_ratio": -0.09792238473892212, "logits/chosen": 0.8565762639045715, "logits/rejected": 0.8021761775016785, "logps/chosen": -2.041410446166992, "logps/rejected": -5.441403865814209, "loss": 0.6591, "nll_loss": 0.6493003368377686, "rewards/accuracies": 1.0, "rewards/chosen": -0.2041410505771637, "rewards/margins": 0.33999937772750854, "rewards/rejected": -0.5441403985023499, "step": 6083 }, { "epoch": 16.657084188911703, "grad_norm": 6.1975836753845215, "learning_rate": 1.66986301369863e-07, "log_odds_chosen": 1.6586670875549316, "log_odds_ratio": -0.4909876585006714, "logits/chosen": 1.0654382705688477, "logits/rejected": 1.0885717868804932, "logps/chosen": -2.3046958446502686, "logps/rejected": -3.852604389190674, "loss": 0.72, "nll_loss": 0.6709253191947937, "rewards/accuracies": 0.75, "rewards/chosen": -0.23046958446502686, "rewards/margins": 0.15479087829589844, "rewards/rejected": -0.3852604627609253, "step": 6084 }, { "epoch": 16.659822039698838, "grad_norm": 5.708704948425293, "learning_rate": 1.6684931506849314e-07, "log_odds_chosen": 1.789442777633667, "log_odds_ratio": -0.37305623292922974, "logits/chosen": 0.9413963556289673, "logits/rejected": 0.9399003982543945, "logps/chosen": -2.267434597015381, "logps/rejected": -4.015049457550049, "loss": 0.5788, "nll_loss": 0.5414835214614868, "rewards/accuracies": 0.75, "rewards/chosen": -0.22674345970153809, "rewards/margins": 0.17476147413253784, "rewards/rejected": -0.4015049338340759, "step": 6085 }, { "epoch": 16.66255989048597, "grad_norm": 4.380791664123535, "learning_rate": 1.6671232876712327e-07, "log_odds_chosen": 3.9751296043395996, "log_odds_ratio": -0.13257873058319092, "logits/chosen": 0.9223326444625854, "logits/rejected": 0.9774612188339233, "logps/chosen": -2.0167529582977295, "logps/rejected": -5.83147668838501, "loss": 0.5857, "nll_loss": 0.5724337100982666, "rewards/accuracies": 1.0, "rewards/chosen": -0.20167529582977295, "rewards/margins": 0.3814723491668701, "rewards/rejected": -0.5831476449966431, "step": 6086 }, { "epoch": 16.6652977412731, "grad_norm": 5.421610355377197, "learning_rate": 1.6657534246575342e-07, "log_odds_chosen": 2.5939528942108154, "log_odds_ratio": -0.23541167378425598, "logits/chosen": 0.9029126167297363, "logits/rejected": 0.8615431785583496, "logps/chosen": -1.8767350912094116, "logps/rejected": -4.302009582519531, "loss": 0.5371, "nll_loss": 0.5135351419448853, "rewards/accuracies": 1.0, "rewards/chosen": -0.18767350912094116, "rewards/margins": 0.24252748489379883, "rewards/rejected": -0.43020099401474, "step": 6087 }, { "epoch": 16.66803559206023, "grad_norm": 6.873752593994141, "learning_rate": 1.6643835616438357e-07, "log_odds_chosen": 1.4974117279052734, "log_odds_ratio": -0.4578142762184143, "logits/chosen": 0.8529622554779053, "logits/rejected": 0.9045098423957825, "logps/chosen": -1.9600245952606201, "logps/rejected": -3.288396120071411, "loss": 0.6747, "nll_loss": 0.6288992762565613, "rewards/accuracies": 0.875, "rewards/chosen": -0.19600245356559753, "rewards/margins": 0.132837176322937, "rewards/rejected": -0.32883962988853455, "step": 6088 }, { "epoch": 16.670773442847366, "grad_norm": 5.338632106781006, "learning_rate": 1.663013698630137e-07, "log_odds_chosen": 2.3823177814483643, "log_odds_ratio": -0.1634046584367752, "logits/chosen": 0.9740374088287354, "logits/rejected": 0.9939879179000854, "logps/chosen": -2.120090961456299, "logps/rejected": -4.3583083152771, "loss": 0.6144, "nll_loss": 0.5980166792869568, "rewards/accuracies": 1.0, "rewards/chosen": -0.21200910210609436, "rewards/margins": 0.2238217294216156, "rewards/rejected": -0.43583083152770996, "step": 6089 }, { "epoch": 16.673511293634498, "grad_norm": 5.400759696960449, "learning_rate": 1.6616438356164382e-07, "log_odds_chosen": 2.4116387367248535, "log_odds_ratio": -0.25162094831466675, "logits/chosen": 0.5694437026977539, "logits/rejected": 0.5757840871810913, "logps/chosen": -2.4059343338012695, "logps/rejected": -4.7012248039245605, "loss": 0.6251, "nll_loss": 0.5999625325202942, "rewards/accuracies": 0.875, "rewards/chosen": -0.24059343338012695, "rewards/margins": 0.22952905297279358, "rewards/rejected": -0.47012248635292053, "step": 6090 }, { "epoch": 16.67624914442163, "grad_norm": 5.499825954437256, "learning_rate": 1.6602739726027395e-07, "log_odds_chosen": 1.5183387994766235, "log_odds_ratio": -0.30283528566360474, "logits/chosen": 0.8699575066566467, "logits/rejected": 0.8200516700744629, "logps/chosen": -2.044940948486328, "logps/rejected": -3.4550909996032715, "loss": 0.647, "nll_loss": 0.6166857481002808, "rewards/accuracies": 0.875, "rewards/chosen": -0.20449408888816833, "rewards/margins": 0.14101502299308777, "rewards/rejected": -0.3455091118812561, "step": 6091 }, { "epoch": 16.67898699520876, "grad_norm": 5.126166343688965, "learning_rate": 1.658904109589041e-07, "log_odds_chosen": 1.212001085281372, "log_odds_ratio": -0.30400213599205017, "logits/chosen": 0.7467960119247437, "logits/rejected": 0.7691146731376648, "logps/chosen": -2.0225167274475098, "logps/rejected": -3.1100597381591797, "loss": 0.6128, "nll_loss": 0.582414448261261, "rewards/accuracies": 1.0, "rewards/chosen": -0.2022516429424286, "rewards/margins": 0.10875430703163147, "rewards/rejected": -0.31100597977638245, "step": 6092 }, { "epoch": 16.681724845995895, "grad_norm": 5.8883490562438965, "learning_rate": 1.6575342465753425e-07, "log_odds_chosen": 2.3631820678710938, "log_odds_ratio": -0.4790385961532593, "logits/chosen": 1.011804223060608, "logits/rejected": 1.0862027406692505, "logps/chosen": -2.676443576812744, "logps/rejected": -4.973385334014893, "loss": 0.7133, "nll_loss": 0.665389895439148, "rewards/accuracies": 0.75, "rewards/chosen": -0.26764440536499023, "rewards/margins": 0.2296941876411438, "rewards/rejected": -0.49733853340148926, "step": 6093 }, { "epoch": 16.684462696783026, "grad_norm": 6.057224750518799, "learning_rate": 1.6561643835616438e-07, "log_odds_chosen": 1.6305286884307861, "log_odds_ratio": -0.28989720344543457, "logits/chosen": 0.6370418667793274, "logits/rejected": 0.6883885264396667, "logps/chosen": -1.67656672000885, "logps/rejected": -3.078047275543213, "loss": 0.5349, "nll_loss": 0.5059009790420532, "rewards/accuracies": 1.0, "rewards/chosen": -0.16765667498111725, "rewards/margins": 0.1401480734348297, "rewards/rejected": -0.30780473351478577, "step": 6094 }, { "epoch": 16.687200547570157, "grad_norm": 5.06760835647583, "learning_rate": 1.6547945205479453e-07, "log_odds_chosen": 1.7205084562301636, "log_odds_ratio": -0.35906726121902466, "logits/chosen": 0.8979628086090088, "logits/rejected": 0.938724160194397, "logps/chosen": -2.184736728668213, "logps/rejected": -3.71270751953125, "loss": 0.6193, "nll_loss": 0.5834152102470398, "rewards/accuracies": 0.75, "rewards/chosen": -0.2184736728668213, "rewards/margins": 0.15279708802700043, "rewards/rejected": -0.37127071619033813, "step": 6095 }, { "epoch": 16.689938398357288, "grad_norm": 6.971152305603027, "learning_rate": 1.6534246575342465e-07, "log_odds_chosen": 1.5433348417282104, "log_odds_ratio": -0.33312419056892395, "logits/chosen": 0.8119060397148132, "logits/rejected": 0.6884291768074036, "logps/chosen": -2.459533214569092, "logps/rejected": -3.875229597091675, "loss": 0.6891, "nll_loss": 0.6557961702346802, "rewards/accuracies": 0.75, "rewards/chosen": -0.24595332145690918, "rewards/margins": 0.14156962931156158, "rewards/rejected": -0.38752293586730957, "step": 6096 }, { "epoch": 16.692676249144423, "grad_norm": 5.841012477874756, "learning_rate": 1.6520547945205478e-07, "log_odds_chosen": 1.7564619779586792, "log_odds_ratio": -0.2534462511539459, "logits/chosen": 0.6711060404777527, "logits/rejected": 0.6678016185760498, "logps/chosen": -1.808719515800476, "logps/rejected": -3.401165008544922, "loss": 0.6534, "nll_loss": 0.6280823945999146, "rewards/accuracies": 1.0, "rewards/chosen": -0.18087196350097656, "rewards/margins": 0.15924453735351562, "rewards/rejected": -0.3401165008544922, "step": 6097 }, { "epoch": 16.695414099931554, "grad_norm": 5.373730182647705, "learning_rate": 1.650684931506849e-07, "log_odds_chosen": 2.7402637004852295, "log_odds_ratio": -0.2294405996799469, "logits/chosen": 0.6976008415222168, "logits/rejected": 0.7650001049041748, "logps/chosen": -1.9904484748840332, "logps/rejected": -4.613064765930176, "loss": 0.599, "nll_loss": 0.5760161876678467, "rewards/accuracies": 1.0, "rewards/chosen": -0.1990448534488678, "rewards/margins": 0.26226165890693665, "rewards/rejected": -0.46130651235580444, "step": 6098 }, { "epoch": 16.698151950718685, "grad_norm": 5.624537944793701, "learning_rate": 1.6493150684931506e-07, "log_odds_chosen": 1.6604551076889038, "log_odds_ratio": -0.3449373245239258, "logits/chosen": 0.6196957230567932, "logits/rejected": 0.6703013181686401, "logps/chosen": -2.0565364360809326, "logps/rejected": -3.6107583045959473, "loss": 0.6106, "nll_loss": 0.5761557817459106, "rewards/accuracies": 0.75, "rewards/chosen": -0.20565366744995117, "rewards/margins": 0.155422180891037, "rewards/rejected": -0.36107581853866577, "step": 6099 }, { "epoch": 16.700889801505816, "grad_norm": 6.978909015655518, "learning_rate": 1.647945205479452e-07, "log_odds_chosen": 2.3308701515197754, "log_odds_ratio": -0.35557645559310913, "logits/chosen": 0.7049633860588074, "logits/rejected": 0.6679893136024475, "logps/chosen": -2.1577389240264893, "logps/rejected": -4.3665361404418945, "loss": 0.6648, "nll_loss": 0.6292116641998291, "rewards/accuracies": 0.875, "rewards/chosen": -0.21577388048171997, "rewards/margins": 0.2208797186613083, "rewards/rejected": -0.43665361404418945, "step": 6100 }, { "epoch": 16.70362765229295, "grad_norm": 6.713372707366943, "learning_rate": 1.6465753424657534e-07, "log_odds_chosen": 0.9539422988891602, "log_odds_ratio": -0.3869069218635559, "logits/chosen": 0.8099416494369507, "logits/rejected": 0.760922908782959, "logps/chosen": -2.1334147453308105, "logps/rejected": -2.976256847381592, "loss": 0.6006, "nll_loss": 0.5618670582771301, "rewards/accuracies": 0.875, "rewards/chosen": -0.21334147453308105, "rewards/margins": 0.0842842385172844, "rewards/rejected": -0.29762569069862366, "step": 6101 }, { "epoch": 16.706365503080082, "grad_norm": 5.835838317871094, "learning_rate": 1.645205479452055e-07, "log_odds_chosen": 1.9128549098968506, "log_odds_ratio": -0.21063077449798584, "logits/chosen": 0.9919344782829285, "logits/rejected": 1.0309102535247803, "logps/chosen": -2.3565711975097656, "logps/rejected": -4.127169609069824, "loss": 0.6115, "nll_loss": 0.5904611945152283, "rewards/accuracies": 1.0, "rewards/chosen": -0.23565712571144104, "rewards/margins": 0.1770598292350769, "rewards/rejected": -0.41271698474884033, "step": 6102 }, { "epoch": 16.709103353867214, "grad_norm": 6.829492568969727, "learning_rate": 1.643835616438356e-07, "log_odds_chosen": 3.0909359455108643, "log_odds_ratio": -0.2940663993358612, "logits/chosen": 0.9618681073188782, "logits/rejected": 1.043290376663208, "logps/chosen": -3.3581745624542236, "logps/rejected": -6.396605014801025, "loss": 0.7443, "nll_loss": 0.7149090766906738, "rewards/accuracies": 0.75, "rewards/chosen": -0.33581745624542236, "rewards/margins": 0.30384308099746704, "rewards/rejected": -0.6396605372428894, "step": 6103 }, { "epoch": 16.711841204654345, "grad_norm": 5.168021202087402, "learning_rate": 1.6424657534246574e-07, "log_odds_chosen": 3.5384390354156494, "log_odds_ratio": -0.13338540494441986, "logits/chosen": 0.8227071762084961, "logits/rejected": 0.8588879108428955, "logps/chosen": -2.498429536819458, "logps/rejected": -5.933933258056641, "loss": 0.6509, "nll_loss": 0.6375494003295898, "rewards/accuracies": 1.0, "rewards/chosen": -0.24984297156333923, "rewards/margins": 0.34355035424232483, "rewards/rejected": -0.5933933258056641, "step": 6104 }, { "epoch": 16.71457905544148, "grad_norm": 5.852433204650879, "learning_rate": 1.6410958904109586e-07, "log_odds_chosen": 3.1082630157470703, "log_odds_ratio": -0.29365959763526917, "logits/chosen": 0.9466777443885803, "logits/rejected": 0.9732933044433594, "logps/chosen": -2.1742920875549316, "logps/rejected": -5.156240940093994, "loss": 0.676, "nll_loss": 0.64661705493927, "rewards/accuracies": 0.875, "rewards/chosen": -0.21742920577526093, "rewards/margins": 0.29819491505622864, "rewards/rejected": -0.5156241059303284, "step": 6105 }, { "epoch": 16.71731690622861, "grad_norm": 5.766983509063721, "learning_rate": 1.6397260273972602e-07, "log_odds_chosen": 3.9388885498046875, "log_odds_ratio": -0.2676449418067932, "logits/chosen": 1.0339230298995972, "logits/rejected": 1.0471892356872559, "logps/chosen": -1.6365740299224854, "logps/rejected": -5.350532054901123, "loss": 0.6138, "nll_loss": 0.5870617628097534, "rewards/accuracies": 0.875, "rewards/chosen": -0.16365741193294525, "rewards/margins": 0.3713958263397217, "rewards/rejected": -0.5350532531738281, "step": 6106 }, { "epoch": 16.720054757015742, "grad_norm": 8.137494087219238, "learning_rate": 1.6383561643835617e-07, "log_odds_chosen": 2.40091609954834, "log_odds_ratio": -0.22936944663524628, "logits/chosen": 0.7418375015258789, "logits/rejected": 0.6622846722602844, "logps/chosen": -2.283186197280884, "logps/rejected": -4.589161396026611, "loss": 0.6801, "nll_loss": 0.6572027206420898, "rewards/accuracies": 1.0, "rewards/chosen": -0.22831860184669495, "rewards/margins": 0.23059752583503723, "rewards/rejected": -0.4589161276817322, "step": 6107 }, { "epoch": 16.722792607802873, "grad_norm": 5.518791198730469, "learning_rate": 1.636986301369863e-07, "log_odds_chosen": 2.0805556774139404, "log_odds_ratio": -0.21620479226112366, "logits/chosen": 0.8313942551612854, "logits/rejected": 0.8586440086364746, "logps/chosen": -2.146599054336548, "logps/rejected": -4.092043876647949, "loss": 0.707, "nll_loss": 0.6853877902030945, "rewards/accuracies": 1.0, "rewards/chosen": -0.2146598994731903, "rewards/margins": 0.1945444792509079, "rewards/rejected": -0.409204363822937, "step": 6108 }, { "epoch": 16.725530458590008, "grad_norm": 5.0503034591674805, "learning_rate": 1.6356164383561645e-07, "log_odds_chosen": 2.5452826023101807, "log_odds_ratio": -0.26874399185180664, "logits/chosen": 0.8444271683692932, "logits/rejected": 0.8472388982772827, "logps/chosen": -2.010744571685791, "logps/rejected": -4.449652671813965, "loss": 0.5893, "nll_loss": 0.5624455809593201, "rewards/accuracies": 0.875, "rewards/chosen": -0.201074481010437, "rewards/margins": 0.24389079213142395, "rewards/rejected": -0.4449652433395386, "step": 6109 }, { "epoch": 16.72826830937714, "grad_norm": 5.811104774475098, "learning_rate": 1.6342465753424655e-07, "log_odds_chosen": 2.097449541091919, "log_odds_ratio": -0.2788037657737732, "logits/chosen": 0.9806126952171326, "logits/rejected": 0.976235032081604, "logps/chosen": -2.052605390548706, "logps/rejected": -4.040955066680908, "loss": 0.5921, "nll_loss": 0.5642422437667847, "rewards/accuracies": 0.875, "rewards/chosen": -0.20526055991649628, "rewards/margins": 0.19883498549461365, "rewards/rejected": -0.40409553050994873, "step": 6110 }, { "epoch": 16.73100616016427, "grad_norm": 6.484041213989258, "learning_rate": 1.632876712328767e-07, "log_odds_chosen": 2.799190044403076, "log_odds_ratio": -0.20507609844207764, "logits/chosen": 0.9527204036712646, "logits/rejected": 1.0644607543945312, "logps/chosen": -2.4618096351623535, "logps/rejected": -5.175175666809082, "loss": 0.6611, "nll_loss": 0.6406073570251465, "rewards/accuracies": 0.875, "rewards/chosen": -0.2461809664964676, "rewards/margins": 0.2713366150856018, "rewards/rejected": -0.5175175666809082, "step": 6111 }, { "epoch": 16.733744010951405, "grad_norm": 6.1311750411987305, "learning_rate": 1.6315068493150685e-07, "log_odds_chosen": 2.4408769607543945, "log_odds_ratio": -0.20986051857471466, "logits/chosen": 0.7627747654914856, "logits/rejected": 0.7954980134963989, "logps/chosen": -2.2658472061157227, "logps/rejected": -4.512911319732666, "loss": 0.5949, "nll_loss": 0.5739595293998718, "rewards/accuracies": 1.0, "rewards/chosen": -0.22658473253250122, "rewards/margins": 0.22470644116401672, "rewards/rejected": -0.45129117369651794, "step": 6112 }, { "epoch": 16.736481861738536, "grad_norm": 8.019668579101562, "learning_rate": 1.6301369863013698e-07, "log_odds_chosen": 1.949156641960144, "log_odds_ratio": -0.4536779522895813, "logits/chosen": 0.9543881416320801, "logits/rejected": 0.9788297414779663, "logps/chosen": -3.4245498180389404, "logps/rejected": -5.332066059112549, "loss": 0.7531, "nll_loss": 0.7077304124832153, "rewards/accuracies": 0.75, "rewards/chosen": -0.3424549996852875, "rewards/margins": 0.19075161218643188, "rewards/rejected": -0.533206582069397, "step": 6113 }, { "epoch": 16.739219712525667, "grad_norm": 6.556910991668701, "learning_rate": 1.6287671232876713e-07, "log_odds_chosen": 2.91970157623291, "log_odds_ratio": -0.09692087024450302, "logits/chosen": 1.0670262575149536, "logits/rejected": 1.128074288368225, "logps/chosen": -2.5629372596740723, "logps/rejected": -5.387332916259766, "loss": 0.7126, "nll_loss": 0.7029076814651489, "rewards/accuracies": 1.0, "rewards/chosen": -0.25629371404647827, "rewards/margins": 0.28243958950042725, "rewards/rejected": -0.5387333631515503, "step": 6114 }, { "epoch": 16.7419575633128, "grad_norm": 4.973335266113281, "learning_rate": 1.6273972602739725e-07, "log_odds_chosen": 2.040335178375244, "log_odds_ratio": -0.37380164861679077, "logits/chosen": 0.9061084389686584, "logits/rejected": 0.9703475832939148, "logps/chosen": -1.959791660308838, "logps/rejected": -3.926456928253174, "loss": 0.5709, "nll_loss": 0.5335308909416199, "rewards/accuracies": 0.75, "rewards/chosen": -0.19597916305065155, "rewards/margins": 0.19666653871536255, "rewards/rejected": -0.3926456868648529, "step": 6115 }, { "epoch": 16.744695414099933, "grad_norm": 5.20462703704834, "learning_rate": 1.626027397260274e-07, "log_odds_chosen": 2.024505853652954, "log_odds_ratio": -0.2581612467765808, "logits/chosen": 0.8822290897369385, "logits/rejected": 0.9186819791793823, "logps/chosen": -2.2709848880767822, "logps/rejected": -4.189560890197754, "loss": 0.6405, "nll_loss": 0.6147236227989197, "rewards/accuracies": 1.0, "rewards/chosen": -0.2270985096693039, "rewards/margins": 0.19185759127140045, "rewards/rejected": -0.41895610094070435, "step": 6116 }, { "epoch": 16.747433264887064, "grad_norm": 5.681443691253662, "learning_rate": 1.624657534246575e-07, "log_odds_chosen": 2.14078426361084, "log_odds_ratio": -0.1707763373851776, "logits/chosen": 0.8612991571426392, "logits/rejected": 0.945347011089325, "logps/chosen": -2.567720890045166, "logps/rejected": -4.6259918212890625, "loss": 0.6329, "nll_loss": 0.6158245801925659, "rewards/accuracies": 1.0, "rewards/chosen": -0.25677210092544556, "rewards/margins": 0.20582714676856995, "rewards/rejected": -0.4625992178916931, "step": 6117 }, { "epoch": 16.750171115674195, "grad_norm": 4.747851848602295, "learning_rate": 1.6232876712328766e-07, "log_odds_chosen": 3.408994197845459, "log_odds_ratio": -0.15186087787151337, "logits/chosen": 0.9996382594108582, "logits/rejected": 0.9986417889595032, "logps/chosen": -1.9081709384918213, "logps/rejected": -5.1448974609375, "loss": 0.6367, "nll_loss": 0.621557354927063, "rewards/accuracies": 1.0, "rewards/chosen": -0.19081707298755646, "rewards/margins": 0.3236726224422455, "rewards/rejected": -0.5144897699356079, "step": 6118 }, { "epoch": 16.752908966461327, "grad_norm": 5.989979267120361, "learning_rate": 1.621917808219178e-07, "log_odds_chosen": 2.5815281867980957, "log_odds_ratio": -0.17721478641033173, "logits/chosen": 1.0282106399536133, "logits/rejected": 1.1102931499481201, "logps/chosen": -2.4696731567382812, "logps/rejected": -4.952517509460449, "loss": 0.6502, "nll_loss": 0.6325080990791321, "rewards/accuracies": 1.0, "rewards/chosen": -0.24696731567382812, "rewards/margins": 0.24828442931175232, "rewards/rejected": -0.49525171518325806, "step": 6119 }, { "epoch": 16.75564681724846, "grad_norm": 5.434125900268555, "learning_rate": 1.6205479452054793e-07, "log_odds_chosen": 2.285231828689575, "log_odds_ratio": -0.24193508923053741, "logits/chosen": 0.6675052642822266, "logits/rejected": 0.7229821681976318, "logps/chosen": -1.7685227394104004, "logps/rejected": -3.8862106800079346, "loss": 0.625, "nll_loss": 0.6008034348487854, "rewards/accuracies": 0.875, "rewards/chosen": -0.1768522709608078, "rewards/margins": 0.21176877617835999, "rewards/rejected": -0.388621062040329, "step": 6120 }, { "epoch": 16.758384668035593, "grad_norm": 5.351651668548584, "learning_rate": 1.6191780821917809e-07, "log_odds_chosen": 2.7986764907836914, "log_odds_ratio": -0.17848587036132812, "logits/chosen": 0.8648721575737, "logits/rejected": 0.877548336982727, "logps/chosen": -2.037745475769043, "logps/rejected": -4.67180061340332, "loss": 0.6669, "nll_loss": 0.6490517854690552, "rewards/accuracies": 0.875, "rewards/chosen": -0.203774556517601, "rewards/margins": 0.26340556144714355, "rewards/rejected": -0.46718013286590576, "step": 6121 }, { "epoch": 16.761122518822724, "grad_norm": 5.7327046394348145, "learning_rate": 1.617808219178082e-07, "log_odds_chosen": 1.3215643167495728, "log_odds_ratio": -0.2689886689186096, "logits/chosen": 1.032764196395874, "logits/rejected": 0.9606202244758606, "logps/chosen": -2.125852346420288, "logps/rejected": -3.32967472076416, "loss": 0.5778, "nll_loss": 0.5509060621261597, "rewards/accuracies": 1.0, "rewards/chosen": -0.21258525550365448, "rewards/margins": 0.12038224190473557, "rewards/rejected": -0.33296748995780945, "step": 6122 }, { "epoch": 16.763860369609855, "grad_norm": 5.607973575592041, "learning_rate": 1.6164383561643836e-07, "log_odds_chosen": 1.7792820930480957, "log_odds_ratio": -0.21660490334033966, "logits/chosen": 0.6272069215774536, "logits/rejected": 0.6529845595359802, "logps/chosen": -2.060492753982544, "logps/rejected": -3.720917224884033, "loss": 0.7165, "nll_loss": 0.6947935819625854, "rewards/accuracies": 1.0, "rewards/chosen": -0.20604927837848663, "rewards/margins": 0.1660424768924713, "rewards/rejected": -0.37209174036979675, "step": 6123 }, { "epoch": 16.76659822039699, "grad_norm": 5.700299263000488, "learning_rate": 1.615068493150685e-07, "log_odds_chosen": 1.2957308292388916, "log_odds_ratio": -0.45699965953826904, "logits/chosen": 0.5995631814002991, "logits/rejected": 0.6047924757003784, "logps/chosen": -1.7430009841918945, "logps/rejected": -2.984088659286499, "loss": 0.6811, "nll_loss": 0.635434627532959, "rewards/accuracies": 0.625, "rewards/chosen": -0.17430010437965393, "rewards/margins": 0.12410877645015717, "rewards/rejected": -0.2984088659286499, "step": 6124 }, { "epoch": 16.76933607118412, "grad_norm": 5.837543487548828, "learning_rate": 1.6136986301369861e-07, "log_odds_chosen": 1.8519960641860962, "log_odds_ratio": -0.27005982398986816, "logits/chosen": 0.8726361989974976, "logits/rejected": 0.8809608221054077, "logps/chosen": -2.0950610637664795, "logps/rejected": -3.725464344024658, "loss": 0.6299, "nll_loss": 0.6029404401779175, "rewards/accuracies": 1.0, "rewards/chosen": -0.209506094455719, "rewards/margins": 0.16304032504558563, "rewards/rejected": -0.3725464344024658, "step": 6125 }, { "epoch": 16.772073921971252, "grad_norm": 5.108825206756592, "learning_rate": 1.6123287671232877e-07, "log_odds_chosen": 3.1359481811523438, "log_odds_ratio": -0.12033955752849579, "logits/chosen": 0.9561752080917358, "logits/rejected": 1.0406639575958252, "logps/chosen": -1.7028789520263672, "logps/rejected": -4.649782657623291, "loss": 0.5635, "nll_loss": 0.551476240158081, "rewards/accuracies": 1.0, "rewards/chosen": -0.17028790712356567, "rewards/margins": 0.2946903705596924, "rewards/rejected": -0.46497827768325806, "step": 6126 }, { "epoch": 16.774811772758383, "grad_norm": 5.326782703399658, "learning_rate": 1.610958904109589e-07, "log_odds_chosen": 1.6703786849975586, "log_odds_ratio": -0.2526516020298004, "logits/chosen": 0.902347981929779, "logits/rejected": 0.958556056022644, "logps/chosen": -2.206150770187378, "logps/rejected": -3.729997158050537, "loss": 0.7154, "nll_loss": 0.6901437640190125, "rewards/accuracies": 1.0, "rewards/chosen": -0.22061508893966675, "rewards/margins": 0.1523846536874771, "rewards/rejected": -0.37299972772598267, "step": 6127 }, { "epoch": 16.777549623545518, "grad_norm": 5.614612579345703, "learning_rate": 1.6095890410958904e-07, "log_odds_chosen": 1.7404426336288452, "log_odds_ratio": -0.3786301016807556, "logits/chosen": 0.7390912771224976, "logits/rejected": 0.7328404784202576, "logps/chosen": -2.59118914604187, "logps/rejected": -4.2486724853515625, "loss": 0.6901, "nll_loss": 0.6522418260574341, "rewards/accuracies": 0.875, "rewards/chosen": -0.259118914604187, "rewards/margins": 0.16574832797050476, "rewards/rejected": -0.4248672425746918, "step": 6128 }, { "epoch": 16.78028747433265, "grad_norm": 12.082069396972656, "learning_rate": 1.6082191780821917e-07, "log_odds_chosen": 1.2717390060424805, "log_odds_ratio": -0.4090206027030945, "logits/chosen": 0.7020159959793091, "logits/rejected": 0.6397753357887268, "logps/chosen": -2.7536849975585938, "logps/rejected": -3.940772294998169, "loss": 0.6791, "nll_loss": 0.6382279992103577, "rewards/accuracies": 0.875, "rewards/chosen": -0.27536851167678833, "rewards/margins": 0.11870869994163513, "rewards/rejected": -0.3940771818161011, "step": 6129 }, { "epoch": 16.78302532511978, "grad_norm": 6.463953495025635, "learning_rate": 1.606849315068493e-07, "log_odds_chosen": 2.5195934772491455, "log_odds_ratio": -0.4865877628326416, "logits/chosen": 0.7443552017211914, "logits/rejected": 0.7936102151870728, "logps/chosen": -2.260254383087158, "logps/rejected": -4.658144950866699, "loss": 0.6329, "nll_loss": 0.5842198133468628, "rewards/accuracies": 0.875, "rewards/chosen": -0.22602543234825134, "rewards/margins": 0.23978909850120544, "rewards/rejected": -0.4658145010471344, "step": 6130 }, { "epoch": 16.78576317590691, "grad_norm": 5.420032501220703, "learning_rate": 1.6054794520547945e-07, "log_odds_chosen": 2.733004570007324, "log_odds_ratio": -0.3538844883441925, "logits/chosen": 0.8065899610519409, "logits/rejected": 0.8857266902923584, "logps/chosen": -2.4079689979553223, "logps/rejected": -5.054605484008789, "loss": 0.6164, "nll_loss": 0.5810372233390808, "rewards/accuracies": 0.875, "rewards/chosen": -0.24079689383506775, "rewards/margins": 0.2646636366844177, "rewards/rejected": -0.5054605603218079, "step": 6131 }, { "epoch": 16.788501026694046, "grad_norm": 5.532109260559082, "learning_rate": 1.6041095890410957e-07, "log_odds_chosen": 2.9917852878570557, "log_odds_ratio": -0.16902491450309753, "logits/chosen": 0.9269763827323914, "logits/rejected": 0.9789586663246155, "logps/chosen": -2.3891963958740234, "logps/rejected": -5.27323055267334, "loss": 0.6833, "nll_loss": 0.6663952469825745, "rewards/accuracies": 1.0, "rewards/chosen": -0.23891966044902802, "rewards/margins": 0.28840339183807373, "rewards/rejected": -0.5273230671882629, "step": 6132 }, { "epoch": 16.791238877481177, "grad_norm": 5.439316272735596, "learning_rate": 1.6027397260273973e-07, "log_odds_chosen": 1.8732346296310425, "log_odds_ratio": -0.25887495279312134, "logits/chosen": 1.0213069915771484, "logits/rejected": 1.0168505907058716, "logps/chosen": -1.7919905185699463, "logps/rejected": -3.5108468532562256, "loss": 0.5637, "nll_loss": 0.5378556251525879, "rewards/accuracies": 1.0, "rewards/chosen": -0.17919905483722687, "rewards/margins": 0.1718856394290924, "rewards/rejected": -0.35108470916748047, "step": 6133 }, { "epoch": 16.79397672826831, "grad_norm": 5.840855121612549, "learning_rate": 1.6013698630136985e-07, "log_odds_chosen": 2.2221882343292236, "log_odds_ratio": -0.3859967589378357, "logits/chosen": 1.0103555917739868, "logits/rejected": 1.0498440265655518, "logps/chosen": -2.0984039306640625, "logps/rejected": -4.257784366607666, "loss": 0.6975, "nll_loss": 0.6588783860206604, "rewards/accuracies": 0.75, "rewards/chosen": -0.20984038710594177, "rewards/margins": 0.2159380465745926, "rewards/rejected": -0.42577844858169556, "step": 6134 }, { "epoch": 16.79671457905544, "grad_norm": 6.246403217315674, "learning_rate": 1.6e-07, "log_odds_chosen": 1.726492166519165, "log_odds_ratio": -0.40211814641952515, "logits/chosen": 0.7711188793182373, "logits/rejected": 0.8371446132659912, "logps/chosen": -2.5874266624450684, "logps/rejected": -4.23405647277832, "loss": 0.6761, "nll_loss": 0.6358724236488342, "rewards/accuracies": 0.75, "rewards/chosen": -0.25874266028404236, "rewards/margins": 0.16466298699378967, "rewards/rejected": -0.4234056770801544, "step": 6135 }, { "epoch": 16.799452429842574, "grad_norm": 6.351737022399902, "learning_rate": 1.5986301369863013e-07, "log_odds_chosen": 1.4896767139434814, "log_odds_ratio": -0.37516188621520996, "logits/chosen": 0.666998028755188, "logits/rejected": 0.6602523326873779, "logps/chosen": -2.57706356048584, "logps/rejected": -3.955815315246582, "loss": 0.7519, "nll_loss": 0.7143915295600891, "rewards/accuracies": 0.75, "rewards/chosen": -0.25770634412765503, "rewards/margins": 0.13787518441677094, "rewards/rejected": -0.39558154344558716, "step": 6136 }, { "epoch": 16.802190280629706, "grad_norm": 5.6625075340271, "learning_rate": 1.5972602739726025e-07, "log_odds_chosen": 3.529644250869751, "log_odds_ratio": -0.26368147134780884, "logits/chosen": 0.9789574146270752, "logits/rejected": 1.0548592805862427, "logps/chosen": -2.759779214859009, "logps/rejected": -6.246081352233887, "loss": 0.6757, "nll_loss": 0.6492996215820312, "rewards/accuracies": 0.875, "rewards/chosen": -0.2759779095649719, "rewards/margins": 0.3486301898956299, "rewards/rejected": -0.624608039855957, "step": 6137 }, { "epoch": 16.804928131416837, "grad_norm": 5.619444370269775, "learning_rate": 1.595890410958904e-07, "log_odds_chosen": 3.7499794960021973, "log_odds_ratio": -0.08341216295957565, "logits/chosen": 0.9743043184280396, "logits/rejected": 1.0211939811706543, "logps/chosen": -1.910846471786499, "logps/rejected": -5.501407146453857, "loss": 0.5822, "nll_loss": 0.5738757848739624, "rewards/accuracies": 1.0, "rewards/chosen": -0.19108466804027557, "rewards/margins": 0.3590560853481293, "rewards/rejected": -0.5501407384872437, "step": 6138 }, { "epoch": 16.80766598220397, "grad_norm": 5.849287986755371, "learning_rate": 1.5945205479452053e-07, "log_odds_chosen": 1.6504013538360596, "log_odds_ratio": -0.36514559388160706, "logits/chosen": 0.6929293870925903, "logits/rejected": 0.7714307308197021, "logps/chosen": -2.1157631874084473, "logps/rejected": -3.701441526412964, "loss": 0.583, "nll_loss": 0.546492338180542, "rewards/accuracies": 1.0, "rewards/chosen": -0.21157631278038025, "rewards/margins": 0.15856784582138062, "rewards/rejected": -0.37014415860176086, "step": 6139 }, { "epoch": 16.810403832991103, "grad_norm": 5.374260902404785, "learning_rate": 1.5931506849315068e-07, "log_odds_chosen": 2.370943784713745, "log_odds_ratio": -0.24942632019519806, "logits/chosen": 0.7852674722671509, "logits/rejected": 0.7927035689353943, "logps/chosen": -1.758474349975586, "logps/rejected": -3.967641592025757, "loss": 0.5798, "nll_loss": 0.554903507232666, "rewards/accuracies": 0.875, "rewards/chosen": -0.17584741115570068, "rewards/margins": 0.2209167182445526, "rewards/rejected": -0.3967641294002533, "step": 6140 }, { "epoch": 16.813141683778234, "grad_norm": 7.230597972869873, "learning_rate": 1.591780821917808e-07, "log_odds_chosen": 3.2313156127929688, "log_odds_ratio": -0.18112431466579437, "logits/chosen": 1.0778477191925049, "logits/rejected": 1.110133171081543, "logps/chosen": -2.579134941101074, "logps/rejected": -5.730976104736328, "loss": 0.6808, "nll_loss": 0.6627193093299866, "rewards/accuracies": 1.0, "rewards/chosen": -0.2579135000705719, "rewards/margins": 0.3151841163635254, "rewards/rejected": -0.5730975866317749, "step": 6141 }, { "epoch": 16.815879534565365, "grad_norm": 6.028326988220215, "learning_rate": 1.5904109589041096e-07, "log_odds_chosen": 1.99380624294281, "log_odds_ratio": -0.22683164477348328, "logits/chosen": 0.77064049243927, "logits/rejected": 0.753234326839447, "logps/chosen": -2.7272465229034424, "logps/rejected": -4.647282600402832, "loss": 0.6371, "nll_loss": 0.6143922805786133, "rewards/accuracies": 1.0, "rewards/chosen": -0.2727246880531311, "rewards/margins": 0.19200359284877777, "rewards/rejected": -0.4647282361984253, "step": 6142 }, { "epoch": 16.8186173853525, "grad_norm": 7.805129528045654, "learning_rate": 1.5890410958904111e-07, "log_odds_chosen": 1.952067494392395, "log_odds_ratio": -0.4632294476032257, "logits/chosen": 0.7780568599700928, "logits/rejected": 0.8305001854896545, "logps/chosen": -2.7957539558410645, "logps/rejected": -4.670335292816162, "loss": 0.671, "nll_loss": 0.6246718764305115, "rewards/accuracies": 0.75, "rewards/chosen": -0.2795754075050354, "rewards/margins": 0.1874581277370453, "rewards/rejected": -0.4670335352420807, "step": 6143 }, { "epoch": 16.82135523613963, "grad_norm": 5.241960048675537, "learning_rate": 1.587671232876712e-07, "log_odds_chosen": 1.9999468326568604, "log_odds_ratio": -0.46710115671157837, "logits/chosen": 0.8299064636230469, "logits/rejected": 0.8834526538848877, "logps/chosen": -2.1193766593933105, "logps/rejected": -3.96147084236145, "loss": 0.5884, "nll_loss": 0.5416626930236816, "rewards/accuracies": 0.75, "rewards/chosen": -0.21193768084049225, "rewards/margins": 0.18420939147472382, "rewards/rejected": -0.39614707231521606, "step": 6144 }, { "epoch": 16.824093086926762, "grad_norm": 6.145054340362549, "learning_rate": 1.5863013698630137e-07, "log_odds_chosen": 1.2040886878967285, "log_odds_ratio": -0.39143383502960205, "logits/chosen": 0.8593368530273438, "logits/rejected": 0.8166847229003906, "logps/chosen": -2.2520995140075684, "logps/rejected": -3.3390932083129883, "loss": 0.6243, "nll_loss": 0.5851181745529175, "rewards/accuracies": 0.875, "rewards/chosen": -0.22520995140075684, "rewards/margins": 0.10869939625263214, "rewards/rejected": -0.33390936255455017, "step": 6145 }, { "epoch": 16.826830937713893, "grad_norm": 6.4481587409973145, "learning_rate": 1.584931506849315e-07, "log_odds_chosen": 1.7457234859466553, "log_odds_ratio": -0.40135458111763, "logits/chosen": 1.0997254848480225, "logits/rejected": 1.076309323310852, "logps/chosen": -2.570694923400879, "logps/rejected": -4.249608039855957, "loss": 0.7027, "nll_loss": 0.662604808807373, "rewards/accuracies": 0.75, "rewards/chosen": -0.25706949830055237, "rewards/margins": 0.167891263961792, "rewards/rejected": -0.42496076226234436, "step": 6146 }, { "epoch": 16.829568788501028, "grad_norm": 6.0987019538879395, "learning_rate": 1.5835616438356164e-07, "log_odds_chosen": 2.0674307346343994, "log_odds_ratio": -0.21199269592761993, "logits/chosen": 0.6520969867706299, "logits/rejected": 0.5774471759796143, "logps/chosen": -2.6394436359405518, "logps/rejected": -4.60017204284668, "loss": 0.7354, "nll_loss": 0.7141556739807129, "rewards/accuracies": 1.0, "rewards/chosen": -0.2639443874359131, "rewards/margins": 0.19607284665107727, "rewards/rejected": -0.46001720428466797, "step": 6147 }, { "epoch": 16.83230663928816, "grad_norm": 8.922447204589844, "learning_rate": 1.5821917808219177e-07, "log_odds_chosen": 1.7910479307174683, "log_odds_ratio": -0.6078476309776306, "logits/chosen": 0.862868070602417, "logits/rejected": 0.8388674259185791, "logps/chosen": -2.2329540252685547, "logps/rejected": -3.9195878505706787, "loss": 0.6889, "nll_loss": 0.6281429529190063, "rewards/accuracies": 0.875, "rewards/chosen": -0.2232954353094101, "rewards/margins": 0.1686633676290512, "rewards/rejected": -0.3919587731361389, "step": 6148 }, { "epoch": 16.83504449007529, "grad_norm": 8.667202949523926, "learning_rate": 1.5808219178082192e-07, "log_odds_chosen": 1.790969967842102, "log_odds_ratio": -0.4131637513637543, "logits/chosen": 0.9254156351089478, "logits/rejected": 0.9136302471160889, "logps/chosen": -3.1573984622955322, "logps/rejected": -4.881396293640137, "loss": 0.7898, "nll_loss": 0.7485287189483643, "rewards/accuracies": 0.75, "rewards/chosen": -0.31573987007141113, "rewards/margins": 0.17239978909492493, "rewards/rejected": -0.48813965916633606, "step": 6149 }, { "epoch": 16.83778234086242, "grad_norm": 7.646531105041504, "learning_rate": 1.5794520547945205e-07, "log_odds_chosen": 1.9018867015838623, "log_odds_ratio": -0.510805606842041, "logits/chosen": 0.919033944606781, "logits/rejected": 0.8955636620521545, "logps/chosen": -2.699974298477173, "logps/rejected": -4.525829315185547, "loss": 0.6883, "nll_loss": 0.6372300386428833, "rewards/accuracies": 0.75, "rewards/chosen": -0.2699974477291107, "rewards/margins": 0.18258550763130188, "rewards/rejected": -0.4525829553604126, "step": 6150 }, { "epoch": 16.840520191649556, "grad_norm": 5.205478668212891, "learning_rate": 1.5780821917808217e-07, "log_odds_chosen": 2.0031354427337646, "log_odds_ratio": -0.21702054142951965, "logits/chosen": 1.0805065631866455, "logits/rejected": 1.1397520303726196, "logps/chosen": -2.3066580295562744, "logps/rejected": -4.210651397705078, "loss": 0.5654, "nll_loss": 0.5437458157539368, "rewards/accuracies": 1.0, "rewards/chosen": -0.23066581785678864, "rewards/margins": 0.19039931893348694, "rewards/rejected": -0.42106515169143677, "step": 6151 }, { "epoch": 16.843258042436688, "grad_norm": 5.099272727966309, "learning_rate": 1.5767123287671232e-07, "log_odds_chosen": 2.424105167388916, "log_odds_ratio": -0.2737952470779419, "logits/chosen": 0.995111346244812, "logits/rejected": 0.9056845307350159, "logps/chosen": -1.5762979984283447, "logps/rejected": -3.8453140258789062, "loss": 0.6511, "nll_loss": 0.6236864924430847, "rewards/accuracies": 1.0, "rewards/chosen": -0.1576298177242279, "rewards/margins": 0.2269015908241272, "rewards/rejected": -0.3845314085483551, "step": 6152 }, { "epoch": 16.84599589322382, "grad_norm": 6.025333881378174, "learning_rate": 1.5753424657534245e-07, "log_odds_chosen": 1.0306830406188965, "log_odds_ratio": -0.3977137804031372, "logits/chosen": 0.7350526452064514, "logits/rejected": 0.6997389793395996, "logps/chosen": -1.8097655773162842, "logps/rejected": -2.7094359397888184, "loss": 0.5484, "nll_loss": 0.5086734294891357, "rewards/accuracies": 0.875, "rewards/chosen": -0.18097653985023499, "rewards/margins": 0.0899670422077179, "rewards/rejected": -0.2709435820579529, "step": 6153 }, { "epoch": 16.84873374401095, "grad_norm": 5.437617301940918, "learning_rate": 1.573972602739726e-07, "log_odds_chosen": 1.119649887084961, "log_odds_ratio": -0.3651783764362335, "logits/chosen": 0.7093102335929871, "logits/rejected": 0.7042129635810852, "logps/chosen": -2.184359073638916, "logps/rejected": -3.1882424354553223, "loss": 0.5833, "nll_loss": 0.5468133687973022, "rewards/accuracies": 0.875, "rewards/chosen": -0.21843591332435608, "rewards/margins": 0.10038833320140839, "rewards/rejected": -0.31882423162460327, "step": 6154 }, { "epoch": 16.851471594798085, "grad_norm": 11.361161231994629, "learning_rate": 1.5726027397260273e-07, "log_odds_chosen": 2.8625102043151855, "log_odds_ratio": -0.4356781244277954, "logits/chosen": 0.9570282101631165, "logits/rejected": 1.0063424110412598, "logps/chosen": -3.1237356662750244, "logps/rejected": -5.907063961029053, "loss": 0.6929, "nll_loss": 0.649336576461792, "rewards/accuracies": 0.625, "rewards/chosen": -0.312373548746109, "rewards/margins": 0.27833282947540283, "rewards/rejected": -0.5907064080238342, "step": 6155 }, { "epoch": 16.854209445585216, "grad_norm": 6.40562629699707, "learning_rate": 1.5712328767123288e-07, "log_odds_chosen": 3.8087406158447266, "log_odds_ratio": -0.2619470953941345, "logits/chosen": 0.716008186340332, "logits/rejected": 0.8115740418434143, "logps/chosen": -2.2395501136779785, "logps/rejected": -5.943418502807617, "loss": 0.7255, "nll_loss": 0.699306845664978, "rewards/accuracies": 0.875, "rewards/chosen": -0.22395500540733337, "rewards/margins": 0.37038683891296387, "rewards/rejected": -0.5943418741226196, "step": 6156 }, { "epoch": 16.856947296372347, "grad_norm": 6.995619297027588, "learning_rate": 1.56986301369863e-07, "log_odds_chosen": 3.5235557556152344, "log_odds_ratio": -0.09811946749687195, "logits/chosen": 1.048880934715271, "logits/rejected": 1.1743481159210205, "logps/chosen": -2.2077009677886963, "logps/rejected": -5.5853095054626465, "loss": 0.6919, "nll_loss": 0.6820793747901917, "rewards/accuracies": 1.0, "rewards/chosen": -0.22077010571956635, "rewards/margins": 0.3377608358860016, "rewards/rejected": -0.5585309267044067, "step": 6157 }, { "epoch": 16.859685147159478, "grad_norm": 4.9494547843933105, "learning_rate": 1.5684931506849313e-07, "log_odds_chosen": 2.2784171104431152, "log_odds_ratio": -0.2219662368297577, "logits/chosen": 0.6689677834510803, "logits/rejected": 0.7526413202285767, "logps/chosen": -2.1632652282714844, "logps/rejected": -4.303415775299072, "loss": 0.615, "nll_loss": 0.5928452610969543, "rewards/accuracies": 1.0, "rewards/chosen": -0.2163265198469162, "rewards/margins": 0.21401506662368774, "rewards/rejected": -0.43034160137176514, "step": 6158 }, { "epoch": 16.862422997946613, "grad_norm": 5.383095741271973, "learning_rate": 1.5671232876712328e-07, "log_odds_chosen": 1.8743996620178223, "log_odds_ratio": -0.24399152398109436, "logits/chosen": 0.768670916557312, "logits/rejected": 0.7421939373016357, "logps/chosen": -1.8169374465942383, "logps/rejected": -3.479896306991577, "loss": 0.5611, "nll_loss": 0.536723256111145, "rewards/accuracies": 0.875, "rewards/chosen": -0.18169376254081726, "rewards/margins": 0.1662958562374115, "rewards/rejected": -0.34798961877822876, "step": 6159 }, { "epoch": 16.865160848733744, "grad_norm": 4.967133045196533, "learning_rate": 1.565753424657534e-07, "log_odds_chosen": 2.805243492126465, "log_odds_ratio": -0.2221483439207077, "logits/chosen": 0.9897420406341553, "logits/rejected": 0.9623370170593262, "logps/chosen": -1.7751364707946777, "logps/rejected": -4.444306373596191, "loss": 0.5876, "nll_loss": 0.5654281377792358, "rewards/accuracies": 1.0, "rewards/chosen": -0.17751365900039673, "rewards/margins": 0.266916960477829, "rewards/rejected": -0.4444306194782257, "step": 6160 }, { "epoch": 16.867898699520875, "grad_norm": 10.171964645385742, "learning_rate": 1.5643835616438356e-07, "log_odds_chosen": 0.8748639225959778, "log_odds_ratio": -0.7398644089698792, "logits/chosen": 0.8124990463256836, "logits/rejected": 0.8544479608535767, "logps/chosen": -3.2322099208831787, "logps/rejected": -4.052532196044922, "loss": 0.7963, "nll_loss": 0.722275972366333, "rewards/accuracies": 0.75, "rewards/chosen": -0.32322099804878235, "rewards/margins": 0.0820322334766388, "rewards/rejected": -0.40525323152542114, "step": 6161 }, { "epoch": 16.870636550308006, "grad_norm": 5.510783672332764, "learning_rate": 1.563013698630137e-07, "log_odds_chosen": 1.822898268699646, "log_odds_ratio": -0.22110822796821594, "logits/chosen": 0.6945703029632568, "logits/rejected": 0.6422476172447205, "logps/chosen": -1.9180338382720947, "logps/rejected": -3.6115851402282715, "loss": 0.6738, "nll_loss": 0.6516648530960083, "rewards/accuracies": 1.0, "rewards/chosen": -0.19180338084697723, "rewards/margins": 0.1693551242351532, "rewards/rejected": -0.3611585199832916, "step": 6162 }, { "epoch": 16.87337440109514, "grad_norm": 5.467930316925049, "learning_rate": 1.5616438356164384e-07, "log_odds_chosen": 1.510774850845337, "log_odds_ratio": -0.3586563467979431, "logits/chosen": 0.9491585493087769, "logits/rejected": 0.8824468851089478, "logps/chosen": -1.995536208152771, "logps/rejected": -3.403597116470337, "loss": 0.7099, "nll_loss": 0.6740751266479492, "rewards/accuracies": 0.875, "rewards/chosen": -0.19955362379550934, "rewards/margins": 0.14080609381198883, "rewards/rejected": -0.34035971760749817, "step": 6163 }, { "epoch": 16.876112251882272, "grad_norm": 4.563131332397461, "learning_rate": 1.5602739726027396e-07, "log_odds_chosen": 2.9458627700805664, "log_odds_ratio": -0.19417104125022888, "logits/chosen": 0.8245091438293457, "logits/rejected": 0.8617928624153137, "logps/chosen": -2.044820785522461, "logps/rejected": -4.880651950836182, "loss": 0.5629, "nll_loss": 0.543476402759552, "rewards/accuracies": 1.0, "rewards/chosen": -0.2044820487499237, "rewards/margins": 0.2835831344127655, "rewards/rejected": -0.4880651831626892, "step": 6164 }, { "epoch": 16.878850102669404, "grad_norm": 8.80644416809082, "learning_rate": 1.558904109589041e-07, "log_odds_chosen": 3.835697650909424, "log_odds_ratio": -0.22631123661994934, "logits/chosen": 0.9262949228286743, "logits/rejected": 0.9405468702316284, "logps/chosen": -2.5540521144866943, "logps/rejected": -6.185086727142334, "loss": 0.6974, "nll_loss": 0.6747287511825562, "rewards/accuracies": 0.875, "rewards/chosen": -0.2554052174091339, "rewards/margins": 0.363103449344635, "rewards/rejected": -0.6185086965560913, "step": 6165 }, { "epoch": 16.88158795345654, "grad_norm": 6.37990140914917, "learning_rate": 1.5575342465753424e-07, "log_odds_chosen": 2.4990267753601074, "log_odds_ratio": -0.2425871044397354, "logits/chosen": 0.8496410846710205, "logits/rejected": 0.9314212203025818, "logps/chosen": -2.650329828262329, "logps/rejected": -4.991107940673828, "loss": 0.7007, "nll_loss": 0.6764557361602783, "rewards/accuracies": 0.875, "rewards/chosen": -0.26503297686576843, "rewards/margins": 0.2340778410434723, "rewards/rejected": -0.4991108179092407, "step": 6166 }, { "epoch": 16.88432580424367, "grad_norm": 4.751116752624512, "learning_rate": 1.5561643835616437e-07, "log_odds_chosen": 3.119154930114746, "log_odds_ratio": -0.13697917759418488, "logits/chosen": 0.7282635569572449, "logits/rejected": 0.7480969429016113, "logps/chosen": -2.4726076126098633, "logps/rejected": -5.487752914428711, "loss": 0.5992, "nll_loss": 0.5854867100715637, "rewards/accuracies": 1.0, "rewards/chosen": -0.24726074934005737, "rewards/margins": 0.30151453614234924, "rewards/rejected": -0.548775315284729, "step": 6167 }, { "epoch": 16.8870636550308, "grad_norm": 4.27913761138916, "learning_rate": 1.5547945205479452e-07, "log_odds_chosen": 2.400477409362793, "log_odds_ratio": -0.17677909135818481, "logits/chosen": 1.0931938886642456, "logits/rejected": 1.1305667161941528, "logps/chosen": -2.1968255043029785, "logps/rejected": -4.4709858894348145, "loss": 0.5713, "nll_loss": 0.553607702255249, "rewards/accuracies": 1.0, "rewards/chosen": -0.21968254446983337, "rewards/margins": 0.2274160385131836, "rewards/rejected": -0.4470985531806946, "step": 6168 }, { "epoch": 16.889801505817932, "grad_norm": 9.101463317871094, "learning_rate": 1.5534246575342467e-07, "log_odds_chosen": 2.0789618492126465, "log_odds_ratio": -0.6776818633079529, "logits/chosen": 1.0059494972229004, "logits/rejected": 0.9597320556640625, "logps/chosen": -3.0326879024505615, "logps/rejected": -4.9934163093566895, "loss": 0.7062, "nll_loss": 0.6384279727935791, "rewards/accuracies": 0.875, "rewards/chosen": -0.30326879024505615, "rewards/margins": 0.19607284665107727, "rewards/rejected": -0.49934160709381104, "step": 6169 }, { "epoch": 16.892539356605067, "grad_norm": 5.389812469482422, "learning_rate": 1.5520547945205477e-07, "log_odds_chosen": 2.6385364532470703, "log_odds_ratio": -0.20083282887935638, "logits/chosen": 0.9627090692520142, "logits/rejected": 0.9931467175483704, "logps/chosen": -1.8433635234832764, "logps/rejected": -4.318789005279541, "loss": 0.5987, "nll_loss": 0.5786388516426086, "rewards/accuracies": 1.0, "rewards/chosen": -0.1843363642692566, "rewards/margins": 0.24754256010055542, "rewards/rejected": -0.431878924369812, "step": 6170 }, { "epoch": 16.895277207392198, "grad_norm": 5.541387557983398, "learning_rate": 1.5506849315068492e-07, "log_odds_chosen": 2.082628011703491, "log_odds_ratio": -0.2429976761341095, "logits/chosen": 0.8786885142326355, "logits/rejected": 0.9121692776679993, "logps/chosen": -1.9318699836730957, "logps/rejected": -3.877774477005005, "loss": 0.5687, "nll_loss": 0.5443976521492004, "rewards/accuracies": 1.0, "rewards/chosen": -0.19318699836730957, "rewards/margins": 0.19459044933319092, "rewards/rejected": -0.3877774477005005, "step": 6171 }, { "epoch": 16.89801505817933, "grad_norm": 5.62970495223999, "learning_rate": 1.5493150684931505e-07, "log_odds_chosen": 1.603448510169983, "log_odds_ratio": -0.2737364172935486, "logits/chosen": 0.8119217157363892, "logits/rejected": 0.7470380067825317, "logps/chosen": -2.340019941329956, "logps/rejected": -3.83450984954834, "loss": 0.6322, "nll_loss": 0.6048021912574768, "rewards/accuracies": 1.0, "rewards/chosen": -0.2340020090341568, "rewards/margins": 0.14944899082183838, "rewards/rejected": -0.3834509551525116, "step": 6172 }, { "epoch": 16.90075290896646, "grad_norm": 4.288751125335693, "learning_rate": 1.547945205479452e-07, "log_odds_chosen": 2.769165515899658, "log_odds_ratio": -0.16281560063362122, "logits/chosen": 0.8272103667259216, "logits/rejected": 0.8861464262008667, "logps/chosen": -2.2112178802490234, "logps/rejected": -4.8152289390563965, "loss": 0.6484, "nll_loss": 0.632106363773346, "rewards/accuracies": 1.0, "rewards/chosen": -0.22112181782722473, "rewards/margins": 0.2604010999202728, "rewards/rejected": -0.48152291774749756, "step": 6173 }, { "epoch": 16.903490759753595, "grad_norm": 7.757606506347656, "learning_rate": 1.5465753424657533e-07, "log_odds_chosen": 1.4679064750671387, "log_odds_ratio": -0.38955825567245483, "logits/chosen": 0.8221287727355957, "logits/rejected": 0.8653761148452759, "logps/chosen": -2.973104476928711, "logps/rejected": -4.370921611785889, "loss": 0.8866, "nll_loss": 0.847614049911499, "rewards/accuracies": 0.875, "rewards/chosen": -0.297310471534729, "rewards/margins": 0.13978168368339539, "rewards/rejected": -0.437092125415802, "step": 6174 }, { "epoch": 16.906228610540726, "grad_norm": 5.041393280029297, "learning_rate": 1.5452054794520548e-07, "log_odds_chosen": 2.373786449432373, "log_odds_ratio": -0.3147566616535187, "logits/chosen": 0.8613837957382202, "logits/rejected": 0.8282673954963684, "logps/chosen": -1.6949406862258911, "logps/rejected": -3.930302381515503, "loss": 0.5377, "nll_loss": 0.5062062740325928, "rewards/accuracies": 0.875, "rewards/chosen": -0.16949406266212463, "rewards/margins": 0.2235361635684967, "rewards/rejected": -0.39303022623062134, "step": 6175 }, { "epoch": 16.908966461327857, "grad_norm": 5.653700351715088, "learning_rate": 1.5438356164383563e-07, "log_odds_chosen": 1.5703257322311401, "log_odds_ratio": -0.22808623313903809, "logits/chosen": 0.973351001739502, "logits/rejected": 0.9516106247901917, "logps/chosen": -2.0064096450805664, "logps/rejected": -3.4272396564483643, "loss": 0.5723, "nll_loss": 0.5494644045829773, "rewards/accuracies": 1.0, "rewards/chosen": -0.2006409764289856, "rewards/margins": 0.14208300411701202, "rewards/rejected": -0.3427239656448364, "step": 6176 }, { "epoch": 16.91170431211499, "grad_norm": 4.787747859954834, "learning_rate": 1.5424657534246573e-07, "log_odds_chosen": 2.4706854820251465, "log_odds_ratio": -0.19221311807632446, "logits/chosen": 0.7482741475105286, "logits/rejected": 0.8836188316345215, "logps/chosen": -2.324155807495117, "logps/rejected": -4.669939041137695, "loss": 0.7024, "nll_loss": 0.6832255125045776, "rewards/accuracies": 1.0, "rewards/chosen": -0.2324155569076538, "rewards/margins": 0.23457835614681244, "rewards/rejected": -0.46699389815330505, "step": 6177 }, { "epoch": 16.914442162902123, "grad_norm": 6.2120184898376465, "learning_rate": 1.5410958904109588e-07, "log_odds_chosen": 1.6495869159698486, "log_odds_ratio": -0.21564719080924988, "logits/chosen": 0.8302680253982544, "logits/rejected": 0.8686524629592896, "logps/chosen": -1.6407299041748047, "logps/rejected": -3.043416738510132, "loss": 0.5059, "nll_loss": 0.48429960012435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.16407299041748047, "rewards/margins": 0.14026865363121033, "rewards/rejected": -0.3043416738510132, "step": 6178 }, { "epoch": 16.917180013689254, "grad_norm": 4.963136672973633, "learning_rate": 1.53972602739726e-07, "log_odds_chosen": 2.3763914108276367, "log_odds_ratio": -0.13517074286937714, "logits/chosen": 0.8034083843231201, "logits/rejected": 0.8160853385925293, "logps/chosen": -2.544658660888672, "logps/rejected": -4.821163654327393, "loss": 0.6402, "nll_loss": 0.6266728639602661, "rewards/accuracies": 1.0, "rewards/chosen": -0.25446587800979614, "rewards/margins": 0.2276504933834076, "rewards/rejected": -0.48211634159088135, "step": 6179 }, { "epoch": 16.919917864476385, "grad_norm": 6.190413475036621, "learning_rate": 1.5383561643835616e-07, "log_odds_chosen": 1.6775920391082764, "log_odds_ratio": -0.3645803928375244, "logits/chosen": 0.7540193796157837, "logits/rejected": 0.7643595337867737, "logps/chosen": -1.8456525802612305, "logps/rejected": -3.3622500896453857, "loss": 0.5461, "nll_loss": 0.509645938873291, "rewards/accuracies": 0.75, "rewards/chosen": -0.18456526100635529, "rewards/margins": 0.1516597718000412, "rewards/rejected": -0.3362250328063965, "step": 6180 }, { "epoch": 16.922655715263517, "grad_norm": 6.928447723388672, "learning_rate": 1.536986301369863e-07, "log_odds_chosen": 1.4907313585281372, "log_odds_ratio": -0.43245965242385864, "logits/chosen": 0.979613184928894, "logits/rejected": 1.0229973793029785, "logps/chosen": -2.2601428031921387, "logps/rejected": -3.6441421508789062, "loss": 0.7081, "nll_loss": 0.6648294925689697, "rewards/accuracies": 0.875, "rewards/chosen": -0.22601425647735596, "rewards/margins": 0.13839995861053467, "rewards/rejected": -0.3644142150878906, "step": 6181 }, { "epoch": 16.92539356605065, "grad_norm": 5.130957126617432, "learning_rate": 1.5356164383561644e-07, "log_odds_chosen": 2.414818048477173, "log_odds_ratio": -0.25827595591545105, "logits/chosen": 0.7988036274909973, "logits/rejected": 0.8987108469009399, "logps/chosen": -2.0153515338897705, "logps/rejected": -4.250311851501465, "loss": 0.6097, "nll_loss": 0.5838455557823181, "rewards/accuracies": 0.875, "rewards/chosen": -0.2015351504087448, "rewards/margins": 0.22349601984024048, "rewards/rejected": -0.4250311851501465, "step": 6182 }, { "epoch": 16.928131416837783, "grad_norm": 6.189499855041504, "learning_rate": 1.534246575342466e-07, "log_odds_chosen": 1.700392723083496, "log_odds_ratio": -0.36580488085746765, "logits/chosen": 0.9237239956855774, "logits/rejected": 1.0673959255218506, "logps/chosen": -2.7957231998443604, "logps/rejected": -4.453093528747559, "loss": 0.6929, "nll_loss": 0.6563618183135986, "rewards/accuracies": 0.875, "rewards/chosen": -0.27957233786582947, "rewards/margins": 0.16573700308799744, "rewards/rejected": -0.4453093409538269, "step": 6183 }, { "epoch": 16.930869267624914, "grad_norm": 5.622567653656006, "learning_rate": 1.532876712328767e-07, "log_odds_chosen": 1.8883228302001953, "log_odds_ratio": -0.1904778778553009, "logits/chosen": 0.8901107311248779, "logits/rejected": 0.8371530771255493, "logps/chosen": -1.5088224411010742, "logps/rejected": -3.1847405433654785, "loss": 0.5229, "nll_loss": 0.5038682818412781, "rewards/accuracies": 1.0, "rewards/chosen": -0.15088224411010742, "rewards/margins": 0.16759181022644043, "rewards/rejected": -0.31847408413887024, "step": 6184 }, { "epoch": 16.933607118412045, "grad_norm": 7.6413893699646, "learning_rate": 1.5315068493150684e-07, "log_odds_chosen": 1.245253086090088, "log_odds_ratio": -0.6086498498916626, "logits/chosen": 0.6900300979614258, "logits/rejected": 0.6201685667037964, "logps/chosen": -2.347522735595703, "logps/rejected": -3.4577980041503906, "loss": 0.8116, "nll_loss": 0.7506945729255676, "rewards/accuracies": 0.75, "rewards/chosen": -0.23475226759910583, "rewards/margins": 0.1110275536775589, "rewards/rejected": -0.3457798361778259, "step": 6185 }, { "epoch": 16.93634496919918, "grad_norm": 6.272854804992676, "learning_rate": 1.5301369863013697e-07, "log_odds_chosen": 0.9288007020950317, "log_odds_ratio": -0.650745689868927, "logits/chosen": 0.7966502904891968, "logits/rejected": 0.8807671070098877, "logps/chosen": -2.4833173751831055, "logps/rejected": -3.3026821613311768, "loss": 0.6973, "nll_loss": 0.6322340965270996, "rewards/accuracies": 0.625, "rewards/chosen": -0.2483317255973816, "rewards/margins": 0.08193649351596832, "rewards/rejected": -0.3302682340145111, "step": 6186 }, { "epoch": 16.93908281998631, "grad_norm": 4.258063316345215, "learning_rate": 1.5287671232876712e-07, "log_odds_chosen": 3.431943416595459, "log_odds_ratio": -0.07367367297410965, "logits/chosen": 0.8195356130599976, "logits/rejected": 0.8564957976341248, "logps/chosen": -1.9714219570159912, "logps/rejected": -5.235827445983887, "loss": 0.6327, "nll_loss": 0.6253017783164978, "rewards/accuracies": 1.0, "rewards/chosen": -0.19714219868183136, "rewards/margins": 0.3264405131340027, "rewards/rejected": -0.5235826969146729, "step": 6187 }, { "epoch": 16.941820670773442, "grad_norm": 5.626601219177246, "learning_rate": 1.5273972602739727e-07, "log_odds_chosen": 1.8870491981506348, "log_odds_ratio": -0.2047472894191742, "logits/chosen": 0.6242591142654419, "logits/rejected": 0.6141334772109985, "logps/chosen": -2.274747371673584, "logps/rejected": -4.032066822052002, "loss": 0.615, "nll_loss": 0.5945723056793213, "rewards/accuracies": 1.0, "rewards/chosen": -0.22747474908828735, "rewards/margins": 0.17573192715644836, "rewards/rejected": -0.4032066762447357, "step": 6188 }, { "epoch": 16.944558521560573, "grad_norm": 4.579176425933838, "learning_rate": 1.526027397260274e-07, "log_odds_chosen": 3.48980975151062, "log_odds_ratio": -0.17810049653053284, "logits/chosen": 0.6773588061332703, "logits/rejected": 0.7229125499725342, "logps/chosen": -1.8550338745117188, "logps/rejected": -5.189312934875488, "loss": 0.6384, "nll_loss": 0.6206200122833252, "rewards/accuracies": 0.875, "rewards/chosen": -0.18550339341163635, "rewards/margins": 0.33342787623405457, "rewards/rejected": -0.5189312696456909, "step": 6189 }, { "epoch": 16.947296372347708, "grad_norm": 6.59024715423584, "learning_rate": 1.5246575342465755e-07, "log_odds_chosen": 2.4715819358825684, "log_odds_ratio": -0.233542799949646, "logits/chosen": 0.5373479127883911, "logits/rejected": 0.5698909163475037, "logps/chosen": -1.7264151573181152, "logps/rejected": -4.062770843505859, "loss": 0.6051, "nll_loss": 0.5817134380340576, "rewards/accuracies": 1.0, "rewards/chosen": -0.17264151573181152, "rewards/margins": 0.2336355447769165, "rewards/rejected": -0.406277060508728, "step": 6190 }, { "epoch": 16.95003422313484, "grad_norm": 5.968214511871338, "learning_rate": 1.5232876712328765e-07, "log_odds_chosen": 2.1832921504974365, "log_odds_ratio": -0.19097301363945007, "logits/chosen": 0.9290487766265869, "logits/rejected": 0.9671932458877563, "logps/chosen": -2.4001500606536865, "logps/rejected": -4.444217681884766, "loss": 0.6672, "nll_loss": 0.6481168866157532, "rewards/accuracies": 1.0, "rewards/chosen": -0.24001500010490417, "rewards/margins": 0.2044067531824112, "rewards/rejected": -0.44442176818847656, "step": 6191 }, { "epoch": 16.95277207392197, "grad_norm": 5.112955570220947, "learning_rate": 1.521917808219178e-07, "log_odds_chosen": 2.9426960945129395, "log_odds_ratio": -0.15049433708190918, "logits/chosen": 0.8651043176651001, "logits/rejected": 0.9151223301887512, "logps/chosen": -1.9336457252502441, "logps/rejected": -4.669173717498779, "loss": 0.5764, "nll_loss": 0.5613439083099365, "rewards/accuracies": 1.0, "rewards/chosen": -0.19336457550525665, "rewards/margins": 0.2735527753829956, "rewards/rejected": -0.46691739559173584, "step": 6192 }, { "epoch": 16.955509924709105, "grad_norm": 4.9635725021362305, "learning_rate": 1.5205479452054795e-07, "log_odds_chosen": 2.7291769981384277, "log_odds_ratio": -0.1961802989244461, "logits/chosen": 0.9510229825973511, "logits/rejected": 0.9205286502838135, "logps/chosen": -2.1802213191986084, "logps/rejected": -4.8013458251953125, "loss": 0.716, "nll_loss": 0.6964136362075806, "rewards/accuracies": 1.0, "rewards/chosen": -0.21802213788032532, "rewards/margins": 0.26211243867874146, "rewards/rejected": -0.48013460636138916, "step": 6193 }, { "epoch": 16.958247775496236, "grad_norm": 6.238871097564697, "learning_rate": 1.5191780821917808e-07, "log_odds_chosen": 1.9677854776382446, "log_odds_ratio": -0.2248358130455017, "logits/chosen": 0.981224536895752, "logits/rejected": 0.9094326496124268, "logps/chosen": -2.330223798751831, "logps/rejected": -4.22514533996582, "loss": 0.7084, "nll_loss": 0.6859266757965088, "rewards/accuracies": 1.0, "rewards/chosen": -0.23302239179611206, "rewards/margins": 0.1894921362400055, "rewards/rejected": -0.42251449823379517, "step": 6194 }, { "epoch": 16.960985626283367, "grad_norm": 5.342016696929932, "learning_rate": 1.5178082191780823e-07, "log_odds_chosen": 2.7538411617279053, "log_odds_ratio": -0.2246173918247223, "logits/chosen": 0.7622133493423462, "logits/rejected": 0.7259584665298462, "logps/chosen": -2.4181809425354004, "logps/rejected": -5.05802583694458, "loss": 0.6582, "nll_loss": 0.6357390284538269, "rewards/accuracies": 0.875, "rewards/chosen": -0.24181810021400452, "rewards/margins": 0.2639845013618469, "rewards/rejected": -0.5058026313781738, "step": 6195 }, { "epoch": 16.9637234770705, "grad_norm": 5.032089710235596, "learning_rate": 1.5164383561643835e-07, "log_odds_chosen": 2.843973159790039, "log_odds_ratio": -0.14220818877220154, "logits/chosen": 1.051663875579834, "logits/rejected": 1.0939702987670898, "logps/chosen": -1.7764887809753418, "logps/rejected": -4.392569541931152, "loss": 0.5851, "nll_loss": 0.5709273219108582, "rewards/accuracies": 1.0, "rewards/chosen": -0.1776488721370697, "rewards/margins": 0.2616080939769745, "rewards/rejected": -0.4392569661140442, "step": 6196 }, { "epoch": 16.966461327857633, "grad_norm": 5.379685401916504, "learning_rate": 1.5150684931506848e-07, "log_odds_chosen": 2.81925368309021, "log_odds_ratio": -0.30356353521347046, "logits/chosen": 0.8543606996536255, "logits/rejected": 0.7778089046478271, "logps/chosen": -2.6417043209075928, "logps/rejected": -5.4133734703063965, "loss": 0.7607, "nll_loss": 0.730379045009613, "rewards/accuracies": 0.875, "rewards/chosen": -0.26417043805122375, "rewards/margins": 0.2771669328212738, "rewards/rejected": -0.5413373112678528, "step": 6197 }, { "epoch": 16.969199178644764, "grad_norm": 5.992464065551758, "learning_rate": 1.513698630136986e-07, "log_odds_chosen": 2.7169125080108643, "log_odds_ratio": -0.2512926459312439, "logits/chosen": 0.7975227236747742, "logits/rejected": 0.7544729113578796, "logps/chosen": -2.041998863220215, "logps/rejected": -4.5513434410095215, "loss": 0.5843, "nll_loss": 0.5591672658920288, "rewards/accuracies": 0.875, "rewards/chosen": -0.2041998654603958, "rewards/margins": 0.2509344816207886, "rewards/rejected": -0.4551343321800232, "step": 6198 }, { "epoch": 16.971937029431896, "grad_norm": 4.71646785736084, "learning_rate": 1.5123287671232876e-07, "log_odds_chosen": 2.460451126098633, "log_odds_ratio": -0.17398019134998322, "logits/chosen": 0.9456262588500977, "logits/rejected": 0.9802974462509155, "logps/chosen": -1.673839807510376, "logps/rejected": -3.926856517791748, "loss": 0.5639, "nll_loss": 0.5465517044067383, "rewards/accuracies": 1.0, "rewards/chosen": -0.16738399863243103, "rewards/margins": 0.22530169785022736, "rewards/rejected": -0.3926857113838196, "step": 6199 }, { "epoch": 16.974674880219027, "grad_norm": 4.829199314117432, "learning_rate": 1.510958904109589e-07, "log_odds_chosen": 3.356023073196411, "log_odds_ratio": -0.17739351093769073, "logits/chosen": 0.840923011302948, "logits/rejected": 0.8812237977981567, "logps/chosen": -2.3138725757598877, "logps/rejected": -5.555758953094482, "loss": 0.623, "nll_loss": 0.6053058505058289, "rewards/accuracies": 1.0, "rewards/chosen": -0.23138725757598877, "rewards/margins": 0.3241886496543884, "rewards/rejected": -0.5555759072303772, "step": 6200 }, { "epoch": 16.97741273100616, "grad_norm": 5.489652633666992, "learning_rate": 1.5095890410958903e-07, "log_odds_chosen": 1.1183221340179443, "log_odds_ratio": -0.3708101511001587, "logits/chosen": 1.0136655569076538, "logits/rejected": 0.9741786122322083, "logps/chosen": -1.6302307844161987, "logps/rejected": -2.5661473274230957, "loss": 0.6317, "nll_loss": 0.5946110486984253, "rewards/accuracies": 1.0, "rewards/chosen": -0.16302308440208435, "rewards/margins": 0.09359164535999298, "rewards/rejected": -0.2566147446632385, "step": 6201 }, { "epoch": 16.980150581793293, "grad_norm": 5.31561279296875, "learning_rate": 1.5082191780821919e-07, "log_odds_chosen": 1.8465044498443604, "log_odds_ratio": -0.22470831871032715, "logits/chosen": 0.9342474937438965, "logits/rejected": 0.9724895358085632, "logps/chosen": -1.9652960300445557, "logps/rejected": -3.6790599822998047, "loss": 0.5607, "nll_loss": 0.5382543206214905, "rewards/accuracies": 0.875, "rewards/chosen": -0.19652961194515228, "rewards/margins": 0.17137636244297028, "rewards/rejected": -0.36790597438812256, "step": 6202 }, { "epoch": 16.982888432580424, "grad_norm": 8.25646686553955, "learning_rate": 1.506849315068493e-07, "log_odds_chosen": 2.406179189682007, "log_odds_ratio": -0.41644585132598877, "logits/chosen": 0.8744791746139526, "logits/rejected": 0.7733190655708313, "logps/chosen": -2.0404348373413086, "logps/rejected": -4.2386627197265625, "loss": 0.7102, "nll_loss": 0.668587327003479, "rewards/accuracies": 0.75, "rewards/chosen": -0.20404349267482758, "rewards/margins": 0.21982279419898987, "rewards/rejected": -0.42386627197265625, "step": 6203 }, { "epoch": 16.985626283367555, "grad_norm": 6.257929801940918, "learning_rate": 1.5054794520547944e-07, "log_odds_chosen": 1.9710557460784912, "log_odds_ratio": -0.25359082221984863, "logits/chosen": 0.893663227558136, "logits/rejected": 0.9264283180236816, "logps/chosen": -2.9339537620544434, "logps/rejected": -4.825589179992676, "loss": 0.6868, "nll_loss": 0.6614421606063843, "rewards/accuracies": 1.0, "rewards/chosen": -0.29339537024497986, "rewards/margins": 0.18916352093219757, "rewards/rejected": -0.4825589060783386, "step": 6204 }, { "epoch": 16.98836413415469, "grad_norm": 6.028731346130371, "learning_rate": 1.5041095890410956e-07, "log_odds_chosen": 1.6431994438171387, "log_odds_ratio": -0.20418065786361694, "logits/chosen": 0.5886322855949402, "logits/rejected": 0.5822945833206177, "logps/chosen": -1.8591705560684204, "logps/rejected": -3.3215668201446533, "loss": 0.6032, "nll_loss": 0.5827965140342712, "rewards/accuracies": 1.0, "rewards/chosen": -0.18591706454753876, "rewards/margins": 0.14623962342739105, "rewards/rejected": -0.3321566879749298, "step": 6205 }, { "epoch": 16.99110198494182, "grad_norm": 6.117004871368408, "learning_rate": 1.5027397260273972e-07, "log_odds_chosen": 1.6202046871185303, "log_odds_ratio": -0.29261961579322815, "logits/chosen": 0.8062171339988708, "logits/rejected": 0.7663028240203857, "logps/chosen": -1.5210297107696533, "logps/rejected": -2.9792592525482178, "loss": 0.5622, "nll_loss": 0.5329155921936035, "rewards/accuracies": 1.0, "rewards/chosen": -0.1521029770374298, "rewards/margins": 0.14582297205924988, "rewards/rejected": -0.2979259490966797, "step": 6206 }, { "epoch": 16.993839835728952, "grad_norm": 5.448362827301025, "learning_rate": 1.5013698630136987e-07, "log_odds_chosen": 1.5956971645355225, "log_odds_ratio": -0.25921911001205444, "logits/chosen": 0.9536161422729492, "logits/rejected": 0.9724329710006714, "logps/chosen": -2.076385259628296, "logps/rejected": -3.5820531845092773, "loss": 0.6562, "nll_loss": 0.6302589178085327, "rewards/accuracies": 1.0, "rewards/chosen": -0.20763853192329407, "rewards/margins": 0.15056678652763367, "rewards/rejected": -0.35820531845092773, "step": 6207 }, { "epoch": 16.996577686516083, "grad_norm": 8.650308609008789, "learning_rate": 1.5e-07, "log_odds_chosen": 1.319139838218689, "log_odds_ratio": -0.6091238260269165, "logits/chosen": 0.7735550403594971, "logits/rejected": 0.8504358530044556, "logps/chosen": -3.0100975036621094, "logps/rejected": -4.272180080413818, "loss": 0.8504, "nll_loss": 0.7894571423530579, "rewards/accuracies": 0.75, "rewards/chosen": -0.30100974440574646, "rewards/margins": 0.12620824575424194, "rewards/rejected": -0.4272180199623108, "step": 6208 }, { "epoch": 16.999315537303218, "grad_norm": 6.549643039703369, "learning_rate": 1.4986301369863015e-07, "log_odds_chosen": 1.0675337314605713, "log_odds_ratio": -0.3778907060623169, "logits/chosen": 0.8869907259941101, "logits/rejected": 0.8740028142929077, "logps/chosen": -2.3363852500915527, "logps/rejected": -3.3420498371124268, "loss": 0.6525, "nll_loss": 0.6146881580352783, "rewards/accuracies": 0.875, "rewards/chosen": -0.23363852500915527, "rewards/margins": 0.10056646913290024, "rewards/rejected": -0.3342049717903137, "step": 6209 }, { "epoch": 17.00205338809035, "grad_norm": 5.355690956115723, "learning_rate": 1.4972602739726024e-07, "log_odds_chosen": 2.165193557739258, "log_odds_ratio": -0.3054722547531128, "logits/chosen": 0.9493628740310669, "logits/rejected": 1.0147091150283813, "logps/chosen": -2.478086471557617, "logps/rejected": -4.591655254364014, "loss": 0.7348, "nll_loss": 0.7043005228042603, "rewards/accuracies": 0.75, "rewards/chosen": -0.24780866503715515, "rewards/margins": 0.21135689318180084, "rewards/rejected": -0.4591655135154724, "step": 6210 }, { "epoch": 17.00479123887748, "grad_norm": 4.803868770599365, "learning_rate": 1.495890410958904e-07, "log_odds_chosen": 2.025099992752075, "log_odds_ratio": -0.2340235561132431, "logits/chosen": 0.9600841999053955, "logits/rejected": 1.0697698593139648, "logps/chosen": -1.9418902397155762, "logps/rejected": -3.8319883346557617, "loss": 0.5658, "nll_loss": 0.5423955917358398, "rewards/accuracies": 1.0, "rewards/chosen": -0.19418902695178986, "rewards/margins": 0.18900981545448303, "rewards/rejected": -0.3831988573074341, "step": 6211 }, { "epoch": 17.00752908966461, "grad_norm": 6.191370010375977, "learning_rate": 1.4945205479452055e-07, "log_odds_chosen": 2.8904097080230713, "log_odds_ratio": -0.5896797776222229, "logits/chosen": 0.8712625503540039, "logits/rejected": 1.0013604164123535, "logps/chosen": -2.6491174697875977, "logps/rejected": -5.479757308959961, "loss": 0.7818, "nll_loss": 0.7227832078933716, "rewards/accuracies": 0.75, "rewards/chosen": -0.2649117410182953, "rewards/margins": 0.28306400775909424, "rewards/rejected": -0.5479757189750671, "step": 6212 }, { "epoch": 17.010266940451746, "grad_norm": 5.321317672729492, "learning_rate": 1.4931506849315067e-07, "log_odds_chosen": 2.745245933532715, "log_odds_ratio": -0.14421749114990234, "logits/chosen": 0.8111991882324219, "logits/rejected": 0.84171462059021, "logps/chosen": -2.949881076812744, "logps/rejected": -5.6228718757629395, "loss": 0.8912, "nll_loss": 0.8768041133880615, "rewards/accuracies": 1.0, "rewards/chosen": -0.29498809576034546, "rewards/margins": 0.267299085855484, "rewards/rejected": -0.5622872114181519, "step": 6213 }, { "epoch": 17.013004791238878, "grad_norm": 5.72966194152832, "learning_rate": 1.4917808219178083e-07, "log_odds_chosen": 2.6959056854248047, "log_odds_ratio": -0.25453099608421326, "logits/chosen": 0.6948914527893066, "logits/rejected": 0.6796616315841675, "logps/chosen": -1.600163221359253, "logps/rejected": -4.071896076202393, "loss": 0.5518, "nll_loss": 0.5263688564300537, "rewards/accuracies": 0.875, "rewards/chosen": -0.16001631319522858, "rewards/margins": 0.24717330932617188, "rewards/rejected": -0.40718960762023926, "step": 6214 }, { "epoch": 17.01574264202601, "grad_norm": 5.427211761474609, "learning_rate": 1.4904109589041095e-07, "log_odds_chosen": 1.5538911819458008, "log_odds_ratio": -0.2782241702079773, "logits/chosen": 0.8194196224212646, "logits/rejected": 0.8197026252746582, "logps/chosen": -1.8452028036117554, "logps/rejected": -3.227221965789795, "loss": 0.547, "nll_loss": 0.519212543964386, "rewards/accuracies": 1.0, "rewards/chosen": -0.18452030420303345, "rewards/margins": 0.13820190727710724, "rewards/rejected": -0.3227221965789795, "step": 6215 }, { "epoch": 17.018480492813143, "grad_norm": 6.67185115814209, "learning_rate": 1.489041095890411e-07, "log_odds_chosen": 1.460127353668213, "log_odds_ratio": -0.479505717754364, "logits/chosen": 1.1464987993240356, "logits/rejected": 1.1389961242675781, "logps/chosen": -2.3246169090270996, "logps/rejected": -3.6834564208984375, "loss": 0.6316, "nll_loss": 0.5836489200592041, "rewards/accuracies": 0.75, "rewards/chosen": -0.23246169090270996, "rewards/margins": 0.13588394224643707, "rewards/rejected": -0.36834561824798584, "step": 6216 }, { "epoch": 17.021218343600275, "grad_norm": 5.848849773406982, "learning_rate": 1.487671232876712e-07, "log_odds_chosen": 3.025259017944336, "log_odds_ratio": -0.1083962470293045, "logits/chosen": 1.1325623989105225, "logits/rejected": 1.1721718311309814, "logps/chosen": -2.2211718559265137, "logps/rejected": -5.060766696929932, "loss": 0.5996, "nll_loss": 0.5887848734855652, "rewards/accuracies": 1.0, "rewards/chosen": -0.22211720049381256, "rewards/margins": 0.2839595079421997, "rewards/rejected": -0.5060766935348511, "step": 6217 }, { "epoch": 17.023956194387406, "grad_norm": 10.133679389953613, "learning_rate": 1.4863013698630136e-07, "log_odds_chosen": 3.155134439468384, "log_odds_ratio": -0.20234157145023346, "logits/chosen": 1.156144380569458, "logits/rejected": 1.1252365112304688, "logps/chosen": -2.207362651824951, "logps/rejected": -5.23269510269165, "loss": 0.7165, "nll_loss": 0.6962639093399048, "rewards/accuracies": 0.875, "rewards/chosen": -0.22073625028133392, "rewards/margins": 0.30253320932388306, "rewards/rejected": -0.523269534111023, "step": 6218 }, { "epoch": 17.026694045174537, "grad_norm": 5.5130615234375, "learning_rate": 1.484931506849315e-07, "log_odds_chosen": 1.298363208770752, "log_odds_ratio": -0.2788147032260895, "logits/chosen": 1.0709776878356934, "logits/rejected": 0.9778209924697876, "logps/chosen": -1.6104211807250977, "logps/rejected": -2.7504281997680664, "loss": 0.5848, "nll_loss": 0.5569096207618713, "rewards/accuracies": 1.0, "rewards/chosen": -0.16104212403297424, "rewards/margins": 0.11400073766708374, "rewards/rejected": -0.2750428318977356, "step": 6219 }, { "epoch": 17.02943189596167, "grad_norm": 6.4916181564331055, "learning_rate": 1.4835616438356163e-07, "log_odds_chosen": 2.4130303859710693, "log_odds_ratio": -0.18363648653030396, "logits/chosen": 0.6328775882720947, "logits/rejected": 0.6571786403656006, "logps/chosen": -2.0339608192443848, "logps/rejected": -4.2458415031433105, "loss": 0.5374, "nll_loss": 0.5190631747245789, "rewards/accuracies": 1.0, "rewards/chosen": -0.20339608192443848, "rewards/margins": 0.22118808329105377, "rewards/rejected": -0.42458415031433105, "step": 6220 }, { "epoch": 17.032169746748803, "grad_norm": 5.677326679229736, "learning_rate": 1.4821917808219178e-07, "log_odds_chosen": 2.635976791381836, "log_odds_ratio": -0.18851275742053986, "logits/chosen": 0.7598192691802979, "logits/rejected": 0.6917985677719116, "logps/chosen": -2.2029266357421875, "logps/rejected": -4.679661273956299, "loss": 0.5677, "nll_loss": 0.5488957166671753, "rewards/accuracies": 1.0, "rewards/chosen": -0.22029265761375427, "rewards/margins": 0.24767345190048218, "rewards/rejected": -0.46796610951423645, "step": 6221 }, { "epoch": 17.034907597535934, "grad_norm": 6.017711162567139, "learning_rate": 1.480821917808219e-07, "log_odds_chosen": 2.1594882011413574, "log_odds_ratio": -0.21981209516525269, "logits/chosen": 0.6839300990104675, "logits/rejected": 0.7115508317947388, "logps/chosen": -1.6548938751220703, "logps/rejected": -3.575526714324951, "loss": 0.5806, "nll_loss": 0.5585722923278809, "rewards/accuracies": 1.0, "rewards/chosen": -0.16548937559127808, "rewards/margins": 0.19206330180168152, "rewards/rejected": -0.357552707195282, "step": 6222 }, { "epoch": 17.037645448323065, "grad_norm": 6.418084621429443, "learning_rate": 1.4794520547945206e-07, "log_odds_chosen": 2.1741995811462402, "log_odds_ratio": -0.23052915930747986, "logits/chosen": 1.024207592010498, "logits/rejected": 1.0538105964660645, "logps/chosen": -2.105614423751831, "logps/rejected": -4.176357269287109, "loss": 0.6081, "nll_loss": 0.5850962996482849, "rewards/accuracies": 1.0, "rewards/chosen": -0.21056145429611206, "rewards/margins": 0.20707428455352783, "rewards/rejected": -0.4176357388496399, "step": 6223 }, { "epoch": 17.0403832991102, "grad_norm": 5.459595203399658, "learning_rate": 1.4780821917808216e-07, "log_odds_chosen": 3.770095109939575, "log_odds_ratio": -0.16972224414348602, "logits/chosen": 0.8712713122367859, "logits/rejected": 0.8334184885025024, "logps/chosen": -2.5378715991973877, "logps/rejected": -6.188936233520508, "loss": 0.7668, "nll_loss": 0.7498277425765991, "rewards/accuracies": 1.0, "rewards/chosen": -0.25378718972206116, "rewards/margins": 0.3651064336299896, "rewards/rejected": -0.6188936233520508, "step": 6224 }, { "epoch": 17.04312114989733, "grad_norm": 5.127931594848633, "learning_rate": 1.4767123287671231e-07, "log_odds_chosen": 2.371797561645508, "log_odds_ratio": -0.29680728912353516, "logits/chosen": 0.7559707760810852, "logits/rejected": 0.8135417103767395, "logps/chosen": -1.8893578052520752, "logps/rejected": -4.141293525695801, "loss": 0.6527, "nll_loss": 0.622984766960144, "rewards/accuracies": 0.875, "rewards/chosen": -0.1889358013868332, "rewards/margins": 0.225193589925766, "rewards/rejected": -0.414129376411438, "step": 6225 }, { "epoch": 17.045859000684462, "grad_norm": 8.419585227966309, "learning_rate": 1.4753424657534247e-07, "log_odds_chosen": 2.1082088947296143, "log_odds_ratio": -0.6370548009872437, "logits/chosen": 0.9964869022369385, "logits/rejected": 1.0346678495407104, "logps/chosen": -2.5456743240356445, "logps/rejected": -4.573880195617676, "loss": 0.6965, "nll_loss": 0.6328100562095642, "rewards/accuracies": 0.625, "rewards/chosen": -0.254567414522171, "rewards/margins": 0.2028205692768097, "rewards/rejected": -0.4573879837989807, "step": 6226 }, { "epoch": 17.048596851471594, "grad_norm": 6.281980514526367, "learning_rate": 1.473972602739726e-07, "log_odds_chosen": 1.4638347625732422, "log_odds_ratio": -0.4509272575378418, "logits/chosen": 0.8859171867370605, "logits/rejected": 0.93995600938797, "logps/chosen": -2.86067795753479, "logps/rejected": -4.260067939758301, "loss": 0.7697, "nll_loss": 0.724564790725708, "rewards/accuracies": 0.75, "rewards/chosen": -0.28606778383255005, "rewards/margins": 0.13993903994560242, "rewards/rejected": -0.42600682377815247, "step": 6227 }, { "epoch": 17.05133470225873, "grad_norm": 7.678644180297852, "learning_rate": 1.4726027397260274e-07, "log_odds_chosen": 1.1310524940490723, "log_odds_ratio": -0.3926091492176056, "logits/chosen": 0.7131086587905884, "logits/rejected": 0.7122160792350769, "logps/chosen": -2.3721518516540527, "logps/rejected": -3.4199600219726562, "loss": 0.6944, "nll_loss": 0.6550920605659485, "rewards/accuracies": 0.875, "rewards/chosen": -0.23721519112586975, "rewards/margins": 0.10478079319000244, "rewards/rejected": -0.3419960141181946, "step": 6228 }, { "epoch": 17.05407255304586, "grad_norm": 5.338846683502197, "learning_rate": 1.4712328767123287e-07, "log_odds_chosen": 2.110630512237549, "log_odds_ratio": -0.20686858892440796, "logits/chosen": 0.931221604347229, "logits/rejected": 0.9002652168273926, "logps/chosen": -2.1774017810821533, "logps/rejected": -4.165081977844238, "loss": 0.6917, "nll_loss": 0.6710487008094788, "rewards/accuracies": 1.0, "rewards/chosen": -0.21774017810821533, "rewards/margins": 0.19876804947853088, "rewards/rejected": -0.41650819778442383, "step": 6229 }, { "epoch": 17.05681040383299, "grad_norm": 5.491844177246094, "learning_rate": 1.4698630136986302e-07, "log_odds_chosen": 0.7304439544677734, "log_odds_ratio": -0.42991411685943604, "logits/chosen": 0.8930686116218567, "logits/rejected": 0.9127206802368164, "logps/chosen": -2.0566935539245605, "logps/rejected": -2.6655969619750977, "loss": 0.5251, "nll_loss": 0.4820895493030548, "rewards/accuracies": 0.875, "rewards/chosen": -0.2056693732738495, "rewards/margins": 0.060890305787324905, "rewards/rejected": -0.2665596604347229, "step": 6230 }, { "epoch": 17.059548254620122, "grad_norm": 5.191885948181152, "learning_rate": 1.4684931506849315e-07, "log_odds_chosen": 3.529975175857544, "log_odds_ratio": -0.17916829884052277, "logits/chosen": 0.7979072332382202, "logits/rejected": 0.8222481608390808, "logps/chosen": -2.3344614505767822, "logps/rejected": -5.758946895599365, "loss": 0.6474, "nll_loss": 0.6294398903846741, "rewards/accuracies": 1.0, "rewards/chosen": -0.2334461510181427, "rewards/margins": 0.34244856238365173, "rewards/rejected": -0.5758947134017944, "step": 6231 }, { "epoch": 17.062286105407257, "grad_norm": 6.1286234855651855, "learning_rate": 1.4671232876712327e-07, "log_odds_chosen": 1.5110877752304077, "log_odds_ratio": -0.26504337787628174, "logits/chosen": 1.0544248819351196, "logits/rejected": 1.0648733377456665, "logps/chosen": -2.0231070518493652, "logps/rejected": -3.421480655670166, "loss": 0.584, "nll_loss": 0.5574913024902344, "rewards/accuracies": 1.0, "rewards/chosen": -0.202310711145401, "rewards/margins": 0.1398373395204544, "rewards/rejected": -0.3421480655670166, "step": 6232 }, { "epoch": 17.065023956194388, "grad_norm": 6.584780216217041, "learning_rate": 1.4657534246575342e-07, "log_odds_chosen": 1.1975330114364624, "log_odds_ratio": -0.3098101019859314, "logits/chosen": 0.7910143733024597, "logits/rejected": 0.7497680187225342, "logps/chosen": -1.7289639711380005, "logps/rejected": -2.799027681350708, "loss": 0.566, "nll_loss": 0.5349814891815186, "rewards/accuracies": 1.0, "rewards/chosen": -0.1728964000940323, "rewards/margins": 0.10700637102127075, "rewards/rejected": -0.27990278601646423, "step": 6233 }, { "epoch": 17.06776180698152, "grad_norm": 6.208100318908691, "learning_rate": 1.4643835616438355e-07, "log_odds_chosen": 0.2257312387228012, "log_odds_ratio": -0.6853787302970886, "logits/chosen": 0.8489290475845337, "logits/rejected": 0.9147084355354309, "logps/chosen": -2.5380678176879883, "logps/rejected": -2.716951370239258, "loss": 0.7155, "nll_loss": 0.6469822525978088, "rewards/accuracies": 0.625, "rewards/chosen": -0.2538067698478699, "rewards/margins": 0.01788835972547531, "rewards/rejected": -0.2716951370239258, "step": 6234 }, { "epoch": 17.07049965776865, "grad_norm": 6.2947163581848145, "learning_rate": 1.463013698630137e-07, "log_odds_chosen": 2.295616865158081, "log_odds_ratio": -0.2178555727005005, "logits/chosen": 0.973353385925293, "logits/rejected": 1.039503574371338, "logps/chosen": -2.2690348625183105, "logps/rejected": -4.456324577331543, "loss": 0.7179, "nll_loss": 0.6961370706558228, "rewards/accuracies": 1.0, "rewards/chosen": -0.22690346837043762, "rewards/margins": 0.21872898936271667, "rewards/rejected": -0.4456324577331543, "step": 6235 }, { "epoch": 17.073237508555785, "grad_norm": 5.263803958892822, "learning_rate": 1.4616438356164383e-07, "log_odds_chosen": 2.8927736282348633, "log_odds_ratio": -0.13038666546344757, "logits/chosen": 0.7994148135185242, "logits/rejected": 0.8343757390975952, "logps/chosen": -1.4458386898040771, "logps/rejected": -4.055418014526367, "loss": 0.5256, "nll_loss": 0.5125820636749268, "rewards/accuracies": 1.0, "rewards/chosen": -0.14458386600017548, "rewards/margins": 0.26095789670944214, "rewards/rejected": -0.4055417776107788, "step": 6236 }, { "epoch": 17.075975359342916, "grad_norm": 5.823789119720459, "learning_rate": 1.4602739726027395e-07, "log_odds_chosen": 2.0898802280426025, "log_odds_ratio": -0.3074841797351837, "logits/chosen": 1.0139564275741577, "logits/rejected": 1.0965566635131836, "logps/chosen": -2.5064620971679688, "logps/rejected": -4.526336193084717, "loss": 0.7028, "nll_loss": 0.6720564961433411, "rewards/accuracies": 0.875, "rewards/chosen": -0.2506462037563324, "rewards/margins": 0.2019874006509781, "rewards/rejected": -0.4526336193084717, "step": 6237 }, { "epoch": 17.078713210130047, "grad_norm": 5.354281902313232, "learning_rate": 1.458904109589041e-07, "log_odds_chosen": 1.6278741359710693, "log_odds_ratio": -0.3424648940563202, "logits/chosen": 0.6547519564628601, "logits/rejected": 0.637779951095581, "logps/chosen": -2.0744309425354004, "logps/rejected": -3.6225240230560303, "loss": 0.5946, "nll_loss": 0.5603963136672974, "rewards/accuracies": 0.875, "rewards/chosen": -0.20744308829307556, "rewards/margins": 0.15480931103229523, "rewards/rejected": -0.362252414226532, "step": 6238 }, { "epoch": 17.08145106091718, "grad_norm": 5.5517730712890625, "learning_rate": 1.4575342465753423e-07, "log_odds_chosen": 1.9213777780532837, "log_odds_ratio": -0.215824156999588, "logits/chosen": 0.7380825281143188, "logits/rejected": 0.7399407625198364, "logps/chosen": -2.415607213973999, "logps/rejected": -4.2281599044799805, "loss": 0.5901, "nll_loss": 0.5685570240020752, "rewards/accuracies": 1.0, "rewards/chosen": -0.24156072735786438, "rewards/margins": 0.1812552809715271, "rewards/rejected": -0.4228160083293915, "step": 6239 }, { "epoch": 17.084188911704313, "grad_norm": 5.45958137512207, "learning_rate": 1.4561643835616438e-07, "log_odds_chosen": 1.7960922718048096, "log_odds_ratio": -0.29677635431289673, "logits/chosen": 0.8176549077033997, "logits/rejected": 0.8524960875511169, "logps/chosen": -1.9856157302856445, "logps/rejected": -3.690450668334961, "loss": 0.6864, "nll_loss": 0.6566771864891052, "rewards/accuracies": 0.875, "rewards/chosen": -0.19856157898902893, "rewards/margins": 0.1704835146665573, "rewards/rejected": -0.36904507875442505, "step": 6240 }, { "epoch": 17.086926762491444, "grad_norm": 9.527375221252441, "learning_rate": 1.454794520547945e-07, "log_odds_chosen": 3.0380892753601074, "log_odds_ratio": -0.20237618684768677, "logits/chosen": 1.0805248022079468, "logits/rejected": 1.0744271278381348, "logps/chosen": -2.575901508331299, "logps/rejected": -5.4473772048950195, "loss": 0.8264, "nll_loss": 0.8061262369155884, "rewards/accuracies": 0.875, "rewards/chosen": -0.2575901746749878, "rewards/margins": 0.28714755177497864, "rewards/rejected": -0.544737696647644, "step": 6241 }, { "epoch": 17.089664613278575, "grad_norm": 8.858735084533691, "learning_rate": 1.4534246575342466e-07, "log_odds_chosen": 1.9443511962890625, "log_odds_ratio": -0.5884752869606018, "logits/chosen": 0.9056214690208435, "logits/rejected": 0.8686726093292236, "logps/chosen": -2.621117353439331, "logps/rejected": -4.329117298126221, "loss": 0.5786, "nll_loss": 0.5197960138320923, "rewards/accuracies": 0.875, "rewards/chosen": -0.26211172342300415, "rewards/margins": 0.17080000042915344, "rewards/rejected": -0.43291175365448, "step": 6242 }, { "epoch": 17.09240246406571, "grad_norm": 6.294735908508301, "learning_rate": 1.4520547945205479e-07, "log_odds_chosen": 1.1079081296920776, "log_odds_ratio": -0.3572727143764496, "logits/chosen": 0.8197802305221558, "logits/rejected": 0.8526631593704224, "logps/chosen": -2.2376925945281982, "logps/rejected": -3.2683191299438477, "loss": 0.6656, "nll_loss": 0.629917562007904, "rewards/accuracies": 1.0, "rewards/chosen": -0.22376926243305206, "rewards/margins": 0.10306264460086823, "rewards/rejected": -0.3268319368362427, "step": 6243 }, { "epoch": 17.09514031485284, "grad_norm": 5.103950023651123, "learning_rate": 1.450684931506849e-07, "log_odds_chosen": 2.45227313041687, "log_odds_ratio": -0.2709248960018158, "logits/chosen": 0.826981782913208, "logits/rejected": 0.9013896584510803, "logps/chosen": -1.829394817352295, "logps/rejected": -4.111469268798828, "loss": 0.6763, "nll_loss": 0.6491835117340088, "rewards/accuracies": 1.0, "rewards/chosen": -0.18293948471546173, "rewards/margins": 0.22820740938186646, "rewards/rejected": -0.4111469089984894, "step": 6244 }, { "epoch": 17.097878165639973, "grad_norm": 6.430441856384277, "learning_rate": 1.4493150684931506e-07, "log_odds_chosen": 1.856229305267334, "log_odds_ratio": -0.3443724513053894, "logits/chosen": 0.9432470798492432, "logits/rejected": 0.855498194694519, "logps/chosen": -1.5709609985351562, "logps/rejected": -3.277266502380371, "loss": 0.5348, "nll_loss": 0.5003851652145386, "rewards/accuracies": 0.875, "rewards/chosen": -0.15709610283374786, "rewards/margins": 0.1706305742263794, "rewards/rejected": -0.32772666215896606, "step": 6245 }, { "epoch": 17.100616016427104, "grad_norm": 5.57996129989624, "learning_rate": 1.447945205479452e-07, "log_odds_chosen": 2.7830047607421875, "log_odds_ratio": -0.27290692925453186, "logits/chosen": 0.8042859435081482, "logits/rejected": 0.8513104915618896, "logps/chosen": -2.41511869430542, "logps/rejected": -5.115255355834961, "loss": 0.7304, "nll_loss": 0.7031378149986267, "rewards/accuracies": 0.875, "rewards/chosen": -0.24151185154914856, "rewards/margins": 0.270013689994812, "rewards/rejected": -0.511525571346283, "step": 6246 }, { "epoch": 17.10335386721424, "grad_norm": 4.315341472625732, "learning_rate": 1.4465753424657534e-07, "log_odds_chosen": 2.2137019634246826, "log_odds_ratio": -0.2741427421569824, "logits/chosen": 0.939198911190033, "logits/rejected": 0.9820809960365295, "logps/chosen": -1.747869610786438, "logps/rejected": -3.71809983253479, "loss": 0.5238, "nll_loss": 0.4964084327220917, "rewards/accuracies": 0.875, "rewards/chosen": -0.17478695511817932, "rewards/margins": 0.19702301919460297, "rewards/rejected": -0.3718099594116211, "step": 6247 }, { "epoch": 17.10609171800137, "grad_norm": 5.00148868560791, "learning_rate": 1.4452054794520547e-07, "log_odds_chosen": 1.935950756072998, "log_odds_ratio": -0.2370748668909073, "logits/chosen": 0.630677342414856, "logits/rejected": 0.6656589508056641, "logps/chosen": -2.243464231491089, "logps/rejected": -4.080758571624756, "loss": 0.6164, "nll_loss": 0.5927364826202393, "rewards/accuracies": 1.0, "rewards/chosen": -0.22434642910957336, "rewards/margins": 0.18372943997383118, "rewards/rejected": -0.40807586908340454, "step": 6248 }, { "epoch": 17.1088295687885, "grad_norm": 5.356464862823486, "learning_rate": 1.4438356164383562e-07, "log_odds_chosen": 2.024810314178467, "log_odds_ratio": -0.31605878472328186, "logits/chosen": 0.827018678188324, "logits/rejected": 0.8530247807502747, "logps/chosen": -2.182325839996338, "logps/rejected": -4.151750087738037, "loss": 0.6841, "nll_loss": 0.6525084972381592, "rewards/accuracies": 0.75, "rewards/chosen": -0.21823258697986603, "rewards/margins": 0.19694241881370544, "rewards/rejected": -0.4151749908924103, "step": 6249 }, { "epoch": 17.111567419575632, "grad_norm": 5.801141738891602, "learning_rate": 1.4424657534246577e-07, "log_odds_chosen": 3.08042311668396, "log_odds_ratio": -0.2104441225528717, "logits/chosen": 1.0677828788757324, "logits/rejected": 1.0391830205917358, "logps/chosen": -1.6216485500335693, "logps/rejected": -4.540877342224121, "loss": 0.5971, "nll_loss": 0.5760833024978638, "rewards/accuracies": 1.0, "rewards/chosen": -0.1621648669242859, "rewards/margins": 0.2919228971004486, "rewards/rejected": -0.4540877938270569, "step": 6250 }, { "epoch": 17.114305270362767, "grad_norm": 5.679391860961914, "learning_rate": 1.4410958904109587e-07, "log_odds_chosen": 1.585166573524475, "log_odds_ratio": -0.25168153643608093, "logits/chosen": 0.6214109063148499, "logits/rejected": 0.6670486927032471, "logps/chosen": -1.9265532493591309, "logps/rejected": -3.368396282196045, "loss": 0.5743, "nll_loss": 0.5491576790809631, "rewards/accuracies": 1.0, "rewards/chosen": -0.19265533983707428, "rewards/margins": 0.14418429136276245, "rewards/rejected": -0.3368396461009979, "step": 6251 }, { "epoch": 17.117043121149898, "grad_norm": 5.6392130851745605, "learning_rate": 1.4397260273972602e-07, "log_odds_chosen": 2.1457881927490234, "log_odds_ratio": -0.200660839676857, "logits/chosen": 0.812698483467102, "logits/rejected": 0.8575196266174316, "logps/chosen": -2.4284610748291016, "logps/rejected": -4.425513744354248, "loss": 0.6069, "nll_loss": 0.5868335962295532, "rewards/accuracies": 1.0, "rewards/chosen": -0.24284614622592926, "rewards/margins": 0.19970521330833435, "rewards/rejected": -0.4425513446331024, "step": 6252 }, { "epoch": 17.11978097193703, "grad_norm": 13.658904075622559, "learning_rate": 1.4383561643835615e-07, "log_odds_chosen": 0.9903743863105774, "log_odds_ratio": -0.750058650970459, "logits/chosen": 1.040167212486267, "logits/rejected": 1.0251365900039673, "logps/chosen": -2.871891498565674, "logps/rejected": -3.7234320640563965, "loss": 0.6699, "nll_loss": 0.5949326753616333, "rewards/accuracies": 0.75, "rewards/chosen": -0.2871891260147095, "rewards/margins": 0.08515407145023346, "rewards/rejected": -0.37234318256378174, "step": 6253 }, { "epoch": 17.12251882272416, "grad_norm": 4.8518571853637695, "learning_rate": 1.436986301369863e-07, "log_odds_chosen": 1.6215208768844604, "log_odds_ratio": -0.3355465531349182, "logits/chosen": 0.8791787624359131, "logits/rejected": 0.9040283560752869, "logps/chosen": -2.183154821395874, "logps/rejected": -3.716737747192383, "loss": 0.5981, "nll_loss": 0.5645153522491455, "rewards/accuracies": 0.875, "rewards/chosen": -0.2183154821395874, "rewards/margins": 0.1533583104610443, "rewards/rejected": -0.3716738224029541, "step": 6254 }, { "epoch": 17.125256673511295, "grad_norm": 5.311974048614502, "learning_rate": 1.4356164383561643e-07, "log_odds_chosen": 1.6529090404510498, "log_odds_ratio": -0.2717628479003906, "logits/chosen": 0.7111376523971558, "logits/rejected": 0.7270377278327942, "logps/chosen": -1.8963382244110107, "logps/rejected": -3.4259724617004395, "loss": 0.6315, "nll_loss": 0.6043700575828552, "rewards/accuracies": 0.875, "rewards/chosen": -0.1896338313817978, "rewards/margins": 0.15296345949172974, "rewards/rejected": -0.34259724617004395, "step": 6255 }, { "epoch": 17.127994524298426, "grad_norm": 4.967390537261963, "learning_rate": 1.4342465753424658e-07, "log_odds_chosen": 2.7146799564361572, "log_odds_ratio": -0.21121551096439362, "logits/chosen": 0.580311119556427, "logits/rejected": 0.5682950019836426, "logps/chosen": -1.7360811233520508, "logps/rejected": -4.293991565704346, "loss": 0.6674, "nll_loss": 0.6462323665618896, "rewards/accuracies": 1.0, "rewards/chosen": -0.17360810935497284, "rewards/margins": 0.2557910680770874, "rewards/rejected": -0.42939919233322144, "step": 6256 }, { "epoch": 17.130732375085557, "grad_norm": 6.321154594421387, "learning_rate": 1.432876712328767e-07, "log_odds_chosen": 1.4424372911453247, "log_odds_ratio": -0.30364152789115906, "logits/chosen": 0.8741443157196045, "logits/rejected": 0.8609762191772461, "logps/chosen": -1.6457933187484741, "logps/rejected": -2.925009250640869, "loss": 0.5594, "nll_loss": 0.5290387868881226, "rewards/accuracies": 1.0, "rewards/chosen": -0.1645793467760086, "rewards/margins": 0.12792159616947174, "rewards/rejected": -0.29250094294548035, "step": 6257 }, { "epoch": 17.13347022587269, "grad_norm": 5.899984359741211, "learning_rate": 1.4315068493150683e-07, "log_odds_chosen": 2.1300082206726074, "log_odds_ratio": -0.23813879489898682, "logits/chosen": 0.8629196882247925, "logits/rejected": 0.9753680229187012, "logps/chosen": -2.8093814849853516, "logps/rejected": -4.817089080810547, "loss": 0.6733, "nll_loss": 0.6494603753089905, "rewards/accuracies": 0.875, "rewards/chosen": -0.28093814849853516, "rewards/margins": 0.200770765542984, "rewards/rejected": -0.48170891404151917, "step": 6258 }, { "epoch": 17.136208076659823, "grad_norm": 5.3675217628479, "learning_rate": 1.4301369863013698e-07, "log_odds_chosen": 3.128321647644043, "log_odds_ratio": -0.22678419947624207, "logits/chosen": 0.863744854927063, "logits/rejected": 0.8949021100997925, "logps/chosen": -2.0022737979888916, "logps/rejected": -5.03516960144043, "loss": 0.6024, "nll_loss": 0.5797502398490906, "rewards/accuracies": 1.0, "rewards/chosen": -0.20022737979888916, "rewards/margins": 0.30328959226608276, "rewards/rejected": -0.5035169720649719, "step": 6259 }, { "epoch": 17.138945927446954, "grad_norm": 6.219532489776611, "learning_rate": 1.428767123287671e-07, "log_odds_chosen": 1.5552808046340942, "log_odds_ratio": -0.2726022005081177, "logits/chosen": 0.8778881430625916, "logits/rejected": 0.7762379050254822, "logps/chosen": -2.0439627170562744, "logps/rejected": -3.4684009552001953, "loss": 0.7288, "nll_loss": 0.7015443444252014, "rewards/accuracies": 0.875, "rewards/chosen": -0.20439627766609192, "rewards/margins": 0.14244383573532104, "rewards/rejected": -0.3468400835990906, "step": 6260 }, { "epoch": 17.141683778234086, "grad_norm": 5.0576395988464355, "learning_rate": 1.4273972602739726e-07, "log_odds_chosen": 1.8452900648117065, "log_odds_ratio": -0.31035298109054565, "logits/chosen": 0.6806180477142334, "logits/rejected": 0.7961417436599731, "logps/chosen": -2.112781286239624, "logps/rejected": -3.879511833190918, "loss": 0.5979, "nll_loss": 0.5669032335281372, "rewards/accuracies": 0.875, "rewards/chosen": -0.21127812564373016, "rewards/margins": 0.1766730397939682, "rewards/rejected": -0.38795119524002075, "step": 6261 }, { "epoch": 17.144421629021217, "grad_norm": 5.930049419403076, "learning_rate": 1.426027397260274e-07, "log_odds_chosen": 2.847303867340088, "log_odds_ratio": -0.15793979167938232, "logits/chosen": 0.8955056667327881, "logits/rejected": 1.0067951679229736, "logps/chosen": -2.855766534805298, "logps/rejected": -5.637606143951416, "loss": 0.7759, "nll_loss": 0.7601282596588135, "rewards/accuracies": 1.0, "rewards/chosen": -0.2855766713619232, "rewards/margins": 0.2781839370727539, "rewards/rejected": -0.5637606382369995, "step": 6262 }, { "epoch": 17.14715947980835, "grad_norm": 5.809171199798584, "learning_rate": 1.4246575342465754e-07, "log_odds_chosen": 1.4981725215911865, "log_odds_ratio": -0.31149154901504517, "logits/chosen": 0.9582529664039612, "logits/rejected": 0.955674946308136, "logps/chosen": -3.004642963409424, "logps/rejected": -4.434514045715332, "loss": 0.675, "nll_loss": 0.6438312530517578, "rewards/accuracies": 0.875, "rewards/chosen": -0.30046430230140686, "rewards/margins": 0.14298710227012634, "rewards/rejected": -0.4434514045715332, "step": 6263 }, { "epoch": 17.149897330595483, "grad_norm": 6.61684513092041, "learning_rate": 1.4232876712328766e-07, "log_odds_chosen": 3.181434154510498, "log_odds_ratio": -0.14439372718334198, "logits/chosen": 0.8994030952453613, "logits/rejected": 0.9966883063316345, "logps/chosen": -2.6500327587127686, "logps/rejected": -5.709009170532227, "loss": 0.8422, "nll_loss": 0.8277417421340942, "rewards/accuracies": 0.875, "rewards/chosen": -0.2650032639503479, "rewards/margins": 0.30589771270751953, "rewards/rejected": -0.5709009170532227, "step": 6264 }, { "epoch": 17.152635181382614, "grad_norm": 4.990877151489258, "learning_rate": 1.421917808219178e-07, "log_odds_chosen": 1.4573557376861572, "log_odds_ratio": -0.29951414465904236, "logits/chosen": 0.9782570004463196, "logits/rejected": 1.0114467144012451, "logps/chosen": -2.314439535140991, "logps/rejected": -3.6461875438690186, "loss": 0.5822, "nll_loss": 0.5522410869598389, "rewards/accuracies": 1.0, "rewards/chosen": -0.23144394159317017, "rewards/margins": 0.1331748068332672, "rewards/rejected": -0.36461877822875977, "step": 6265 }, { "epoch": 17.155373032169745, "grad_norm": 5.891591548919678, "learning_rate": 1.4205479452054794e-07, "log_odds_chosen": 3.5129528045654297, "log_odds_ratio": -0.14166077971458435, "logits/chosen": 0.7693616151809692, "logits/rejected": 0.7706714868545532, "logps/chosen": -2.485938549041748, "logps/rejected": -5.8606109619140625, "loss": 0.8295, "nll_loss": 0.8153029680252075, "rewards/accuracies": 1.0, "rewards/chosen": -0.24859385192394257, "rewards/margins": 0.3374672532081604, "rewards/rejected": -0.5860611200332642, "step": 6266 }, { "epoch": 17.15811088295688, "grad_norm": 7.371481895446777, "learning_rate": 1.4191780821917807e-07, "log_odds_chosen": 0.7032297253608704, "log_odds_ratio": -0.717694103717804, "logits/chosen": 1.0507431030273438, "logits/rejected": 1.0909873247146606, "logps/chosen": -2.887599468231201, "logps/rejected": -3.545032024383545, "loss": 0.8142, "nll_loss": 0.7424346208572388, "rewards/accuracies": 0.75, "rewards/chosen": -0.2887599468231201, "rewards/margins": 0.06574323028326035, "rewards/rejected": -0.35450318455696106, "step": 6267 }, { "epoch": 17.16084873374401, "grad_norm": 5.43611478805542, "learning_rate": 1.4178082191780822e-07, "log_odds_chosen": 1.9299614429473877, "log_odds_ratio": -0.25497007369995117, "logits/chosen": 0.8557173609733582, "logits/rejected": 0.8883047699928284, "logps/chosen": -3.1026039123535156, "logps/rejected": -4.931181907653809, "loss": 0.7664, "nll_loss": 0.740858256816864, "rewards/accuracies": 0.875, "rewards/chosen": -0.3102604150772095, "rewards/margins": 0.18285784125328064, "rewards/rejected": -0.4931182265281677, "step": 6268 }, { "epoch": 17.163586584531142, "grad_norm": 7.503617286682129, "learning_rate": 1.4164383561643837e-07, "log_odds_chosen": 2.8767194747924805, "log_odds_ratio": -0.14477261900901794, "logits/chosen": 1.0427532196044922, "logits/rejected": 1.1275593042373657, "logps/chosen": -3.3175456523895264, "logps/rejected": -6.036138534545898, "loss": 0.821, "nll_loss": 0.8065488338470459, "rewards/accuracies": 1.0, "rewards/chosen": -0.33175456523895264, "rewards/margins": 0.271859347820282, "rewards/rejected": -0.6036138534545898, "step": 6269 }, { "epoch": 17.166324435318277, "grad_norm": 8.448521614074707, "learning_rate": 1.415068493150685e-07, "log_odds_chosen": 2.6708407402038574, "log_odds_ratio": -0.47779610753059387, "logits/chosen": 0.9451318383216858, "logits/rejected": 0.9581725597381592, "logps/chosen": -2.7946712970733643, "logps/rejected": -5.405062675476074, "loss": 0.6739, "nll_loss": 0.6261686682701111, "rewards/accuracies": 0.75, "rewards/chosen": -0.2794671356678009, "rewards/margins": 0.261039137840271, "rewards/rejected": -0.5405063033103943, "step": 6270 }, { "epoch": 17.169062286105408, "grad_norm": 6.245478630065918, "learning_rate": 1.4136986301369862e-07, "log_odds_chosen": 3.663355588912964, "log_odds_ratio": -0.0884062796831131, "logits/chosen": 1.1739860773086548, "logits/rejected": 1.2153385877609253, "logps/chosen": -1.972259283065796, "logps/rejected": -5.470076560974121, "loss": 0.5588, "nll_loss": 0.5499935150146484, "rewards/accuracies": 1.0, "rewards/chosen": -0.1972259134054184, "rewards/margins": 0.34978169202804565, "rewards/rejected": -0.54700767993927, "step": 6271 }, { "epoch": 17.17180013689254, "grad_norm": 5.913839340209961, "learning_rate": 1.4123287671232875e-07, "log_odds_chosen": 3.947394847869873, "log_odds_ratio": -0.0872119590640068, "logits/chosen": 0.9330909252166748, "logits/rejected": 0.8824615478515625, "logps/chosen": -2.1929123401641846, "logps/rejected": -5.975270748138428, "loss": 0.7669, "nll_loss": 0.7581854462623596, "rewards/accuracies": 1.0, "rewards/chosen": -0.21929122507572174, "rewards/margins": 0.3782358169555664, "rewards/rejected": -0.5975270867347717, "step": 6272 }, { "epoch": 17.17453798767967, "grad_norm": 5.330695629119873, "learning_rate": 1.410958904109589e-07, "log_odds_chosen": 2.165815830230713, "log_odds_ratio": -0.20759807527065277, "logits/chosen": 0.7041258811950684, "logits/rejected": 0.6725115776062012, "logps/chosen": -2.132503032684326, "logps/rejected": -4.177432537078857, "loss": 0.6073, "nll_loss": 0.5864953994750977, "rewards/accuracies": 1.0, "rewards/chosen": -0.2132502943277359, "rewards/margins": 0.2044929563999176, "rewards/rejected": -0.4177432656288147, "step": 6273 }, { "epoch": 17.177275838466805, "grad_norm": 6.112122535705566, "learning_rate": 1.4095890410958902e-07, "log_odds_chosen": 2.1604366302490234, "log_odds_ratio": -0.3171928822994232, "logits/chosen": 0.8281210064888, "logits/rejected": 0.7745442986488342, "logps/chosen": -1.8452680110931396, "logps/rejected": -3.7920143604278564, "loss": 0.677, "nll_loss": 0.6452873945236206, "rewards/accuracies": 0.875, "rewards/chosen": -0.18452680110931396, "rewards/margins": 0.19467464089393616, "rewards/rejected": -0.3792014718055725, "step": 6274 }, { "epoch": 17.180013689253936, "grad_norm": 4.904154300689697, "learning_rate": 1.4082191780821918e-07, "log_odds_chosen": 3.1334049701690674, "log_odds_ratio": -0.1684686243534088, "logits/chosen": 1.1911685466766357, "logits/rejected": 1.1745762825012207, "logps/chosen": -2.081295967102051, "logps/rejected": -5.06356143951416, "loss": 0.5888, "nll_loss": 0.5719650387763977, "rewards/accuracies": 1.0, "rewards/chosen": -0.20812958478927612, "rewards/margins": 0.2982265055179596, "rewards/rejected": -0.5063561201095581, "step": 6275 }, { "epoch": 17.182751540041068, "grad_norm": 4.521269798278809, "learning_rate": 1.4068493150684933e-07, "log_odds_chosen": 2.195873737335205, "log_odds_ratio": -0.2086617350578308, "logits/chosen": 0.7869952321052551, "logits/rejected": 0.7983076572418213, "logps/chosen": -1.9253902435302734, "logps/rejected": -3.983628749847412, "loss": 0.6241, "nll_loss": 0.603236198425293, "rewards/accuracies": 1.0, "rewards/chosen": -0.1925390362739563, "rewards/margins": 0.2058238387107849, "rewards/rejected": -0.3983628749847412, "step": 6276 }, { "epoch": 17.1854893908282, "grad_norm": 5.7977705001831055, "learning_rate": 1.4054794520547943e-07, "log_odds_chosen": 1.799093246459961, "log_odds_ratio": -0.47345906496047974, "logits/chosen": 0.6311997175216675, "logits/rejected": 0.7346829771995544, "logps/chosen": -2.659193754196167, "logps/rejected": -4.3651123046875, "loss": 0.7226, "nll_loss": 0.6752166748046875, "rewards/accuracies": 0.75, "rewards/chosen": -0.26591938734054565, "rewards/margins": 0.1705918312072754, "rewards/rejected": -0.43651121854782104, "step": 6277 }, { "epoch": 17.188227241615333, "grad_norm": 6.575801849365234, "learning_rate": 1.4041095890410958e-07, "log_odds_chosen": 2.699573040008545, "log_odds_ratio": -0.1884007453918457, "logits/chosen": 0.7359316349029541, "logits/rejected": 0.7660037279129028, "logps/chosen": -3.0523312091827393, "logps/rejected": -5.679346561431885, "loss": 0.6913, "nll_loss": 0.6724989414215088, "rewards/accuracies": 0.875, "rewards/chosen": -0.3052331209182739, "rewards/margins": 0.2627015709877014, "rewards/rejected": -0.5679346919059753, "step": 6278 }, { "epoch": 17.190965092402465, "grad_norm": 5.771233081817627, "learning_rate": 1.402739726027397e-07, "log_odds_chosen": 2.1148509979248047, "log_odds_ratio": -0.24857626855373383, "logits/chosen": 0.8271808624267578, "logits/rejected": 0.7515170574188232, "logps/chosen": -2.635507106781006, "logps/rejected": -4.615962505340576, "loss": 0.7212, "nll_loss": 0.6963108777999878, "rewards/accuracies": 1.0, "rewards/chosen": -0.263550728559494, "rewards/margins": 0.198045551776886, "rewards/rejected": -0.4615962505340576, "step": 6279 }, { "epoch": 17.193702943189596, "grad_norm": 6.549853324890137, "learning_rate": 1.4013698630136986e-07, "log_odds_chosen": 3.0850613117218018, "log_odds_ratio": -0.19927917420864105, "logits/chosen": 0.8069536685943604, "logits/rejected": 0.8721569180488586, "logps/chosen": -2.37910795211792, "logps/rejected": -5.342793941497803, "loss": 0.7699, "nll_loss": 0.7499691247940063, "rewards/accuracies": 0.875, "rewards/chosen": -0.23791080713272095, "rewards/margins": 0.2963685989379883, "rewards/rejected": -0.534279465675354, "step": 6280 }, { "epoch": 17.196440793976727, "grad_norm": 5.709771156311035, "learning_rate": 1.4e-07, "log_odds_chosen": 3.3947181701660156, "log_odds_ratio": -0.2818240821361542, "logits/chosen": 1.0129674673080444, "logits/rejected": 1.0603578090667725, "logps/chosen": -2.162062644958496, "logps/rejected": -5.425894737243652, "loss": 0.6376, "nll_loss": 0.6093934178352356, "rewards/accuracies": 0.875, "rewards/chosen": -0.21620626747608185, "rewards/margins": 0.32638317346572876, "rewards/rejected": -0.5425894856452942, "step": 6281 }, { "epoch": 17.19917864476386, "grad_norm": 5.977923393249512, "learning_rate": 1.3986301369863014e-07, "log_odds_chosen": 2.2741498947143555, "log_odds_ratio": -0.31935232877731323, "logits/chosen": 0.9242011904716492, "logits/rejected": 0.9792031645774841, "logps/chosen": -2.464938163757324, "logps/rejected": -4.616391658782959, "loss": 0.7679, "nll_loss": 0.7359822988510132, "rewards/accuracies": 0.875, "rewards/chosen": -0.24649381637573242, "rewards/margins": 0.21514534950256348, "rewards/rejected": -0.4616391658782959, "step": 6282 }, { "epoch": 17.201916495550993, "grad_norm": 6.283970355987549, "learning_rate": 1.397260273972603e-07, "log_odds_chosen": 2.31451153755188, "log_odds_ratio": -0.318962037563324, "logits/chosen": 1.0140080451965332, "logits/rejected": 1.101500153541565, "logps/chosen": -2.607847213745117, "logps/rejected": -4.848267555236816, "loss": 0.656, "nll_loss": 0.6241331100463867, "rewards/accuracies": 0.875, "rewards/chosen": -0.26078474521636963, "rewards/margins": 0.22404205799102783, "rewards/rejected": -0.4848267734050751, "step": 6283 }, { "epoch": 17.204654346338124, "grad_norm": 5.051912784576416, "learning_rate": 1.3958904109589039e-07, "log_odds_chosen": 3.9669344425201416, "log_odds_ratio": -0.10634857416152954, "logits/chosen": 0.8304684162139893, "logits/rejected": 0.7531482577323914, "logps/chosen": -1.9591950178146362, "logps/rejected": -5.740234851837158, "loss": 0.6962, "nll_loss": 0.6855190396308899, "rewards/accuracies": 1.0, "rewards/chosen": -0.19591951370239258, "rewards/margins": 0.37810397148132324, "rewards/rejected": -0.5740234851837158, "step": 6284 }, { "epoch": 17.207392197125255, "grad_norm": 4.851277828216553, "learning_rate": 1.3945205479452054e-07, "log_odds_chosen": 2.9624338150024414, "log_odds_ratio": -0.19923341274261475, "logits/chosen": 0.703090250492096, "logits/rejected": 0.7546969652175903, "logps/chosen": -2.029247283935547, "logps/rejected": -4.867571830749512, "loss": 0.6442, "nll_loss": 0.6242931485176086, "rewards/accuracies": 1.0, "rewards/chosen": -0.2029247283935547, "rewards/margins": 0.2838324308395386, "rewards/rejected": -0.48675721883773804, "step": 6285 }, { "epoch": 17.21013004791239, "grad_norm": 5.365570545196533, "learning_rate": 1.3931506849315066e-07, "log_odds_chosen": 3.2894344329833984, "log_odds_ratio": -0.2753032147884369, "logits/chosen": 0.8889126777648926, "logits/rejected": 0.8424216508865356, "logps/chosen": -1.3660061359405518, "logps/rejected": -4.404914855957031, "loss": 0.5864, "nll_loss": 0.5588991641998291, "rewards/accuracies": 0.875, "rewards/chosen": -0.13660061359405518, "rewards/margins": 0.3038908541202545, "rewards/rejected": -0.4404914677143097, "step": 6286 }, { "epoch": 17.21286789869952, "grad_norm": 6.266149044036865, "learning_rate": 1.3917808219178082e-07, "log_odds_chosen": 3.0688822269439697, "log_odds_ratio": -0.20114848017692566, "logits/chosen": 1.1581259965896606, "logits/rejected": 1.2276341915130615, "logps/chosen": -2.4725425243377686, "logps/rejected": -5.389184951782227, "loss": 0.6595, "nll_loss": 0.6393453478813171, "rewards/accuracies": 1.0, "rewards/chosen": -0.24725423753261566, "rewards/margins": 0.2916642725467682, "rewards/rejected": -0.5389184951782227, "step": 6287 }, { "epoch": 17.215605749486652, "grad_norm": 5.8801116943359375, "learning_rate": 1.3904109589041097e-07, "log_odds_chosen": 2.0371387004852295, "log_odds_ratio": -0.3387903869152069, "logits/chosen": 0.8682731986045837, "logits/rejected": 0.9094003438949585, "logps/chosen": -1.8170008659362793, "logps/rejected": -3.7429304122924805, "loss": 0.6837, "nll_loss": 0.6497775316238403, "rewards/accuracies": 0.75, "rewards/chosen": -0.18170008063316345, "rewards/margins": 0.19259297847747803, "rewards/rejected": -0.37429308891296387, "step": 6288 }, { "epoch": 17.218343600273784, "grad_norm": 6.01616096496582, "learning_rate": 1.389041095890411e-07, "log_odds_chosen": 1.6409646272659302, "log_odds_ratio": -0.352034330368042, "logits/chosen": 0.7036384344100952, "logits/rejected": 0.7399329543113708, "logps/chosen": -2.396172046661377, "logps/rejected": -3.8894975185394287, "loss": 0.5875, "nll_loss": 0.5522671937942505, "rewards/accuracies": 0.875, "rewards/chosen": -0.23961719870567322, "rewards/margins": 0.14933255314826965, "rewards/rejected": -0.38894975185394287, "step": 6289 }, { "epoch": 17.22108145106092, "grad_norm": 5.4076409339904785, "learning_rate": 1.3876712328767125e-07, "log_odds_chosen": 2.4247000217437744, "log_odds_ratio": -0.220786914229393, "logits/chosen": 0.9109771847724915, "logits/rejected": 0.9512603878974915, "logps/chosen": -2.6171493530273438, "logps/rejected": -4.961662292480469, "loss": 0.6514, "nll_loss": 0.6293413639068604, "rewards/accuracies": 0.875, "rewards/chosen": -0.26171496510505676, "rewards/margins": 0.2344512790441513, "rewards/rejected": -0.4961662292480469, "step": 6290 }, { "epoch": 17.22381930184805, "grad_norm": 8.178009986877441, "learning_rate": 1.3863013698630135e-07, "log_odds_chosen": 1.1471127271652222, "log_odds_ratio": -0.48600077629089355, "logits/chosen": 0.8758881092071533, "logits/rejected": 0.8953516483306885, "logps/chosen": -3.2908685207366943, "logps/rejected": -4.370550155639648, "loss": 0.684, "nll_loss": 0.6354339718818665, "rewards/accuracies": 0.75, "rewards/chosen": -0.3290868401527405, "rewards/margins": 0.10796815156936646, "rewards/rejected": -0.43705499172210693, "step": 6291 }, { "epoch": 17.22655715263518, "grad_norm": 5.5229878425598145, "learning_rate": 1.384931506849315e-07, "log_odds_chosen": 2.4009156227111816, "log_odds_ratio": -0.2966669797897339, "logits/chosen": 1.029550313949585, "logits/rejected": 1.0613348484039307, "logps/chosen": -2.195648431777954, "logps/rejected": -4.5266432762146, "loss": 0.6484, "nll_loss": 0.6187647581100464, "rewards/accuracies": 0.875, "rewards/chosen": -0.21956484019756317, "rewards/margins": 0.23309950530529022, "rewards/rejected": -0.452664315700531, "step": 6292 }, { "epoch": 17.229295003422312, "grad_norm": 6.1021809577941895, "learning_rate": 1.3835616438356162e-07, "log_odds_chosen": 2.648784637451172, "log_odds_ratio": -0.1499490737915039, "logits/chosen": 0.9249733090400696, "logits/rejected": 0.8860102891921997, "logps/chosen": -2.201439380645752, "logps/rejected": -4.708155155181885, "loss": 0.6127, "nll_loss": 0.5977364778518677, "rewards/accuracies": 1.0, "rewards/chosen": -0.22014394402503967, "rewards/margins": 0.2506715655326843, "rewards/rejected": -0.470815509557724, "step": 6293 }, { "epoch": 17.232032854209447, "grad_norm": 6.316434860229492, "learning_rate": 1.3821917808219177e-07, "log_odds_chosen": 2.426220655441284, "log_odds_ratio": -0.20854389667510986, "logits/chosen": 0.8082295656204224, "logits/rejected": 0.8598637580871582, "logps/chosen": -1.556762456893921, "logps/rejected": -3.725724697113037, "loss": 0.4658, "nll_loss": 0.444914311170578, "rewards/accuracies": 1.0, "rewards/chosen": -0.1556762456893921, "rewards/margins": 0.21689625084400177, "rewards/rejected": -0.37257251143455505, "step": 6294 }, { "epoch": 17.234770704996578, "grad_norm": 5.959265232086182, "learning_rate": 1.3808219178082193e-07, "log_odds_chosen": 1.2324508428573608, "log_odds_ratio": -0.38042038679122925, "logits/chosen": 0.7964389324188232, "logits/rejected": 0.8905525803565979, "logps/chosen": -2.2202036380767822, "logps/rejected": -3.367112398147583, "loss": 0.6666, "nll_loss": 0.6285556554794312, "rewards/accuracies": 0.875, "rewards/chosen": -0.22202037274837494, "rewards/margins": 0.114690862596035, "rewards/rejected": -0.33671122789382935, "step": 6295 }, { "epoch": 17.23750855578371, "grad_norm": 5.235355854034424, "learning_rate": 1.3794520547945205e-07, "log_odds_chosen": 2.4015684127807617, "log_odds_ratio": -0.16989734768867493, "logits/chosen": 0.789217472076416, "logits/rejected": 0.7685989141464233, "logps/chosen": -1.9633300304412842, "logps/rejected": -4.215850830078125, "loss": 0.8193, "nll_loss": 0.8023335933685303, "rewards/accuracies": 1.0, "rewards/chosen": -0.19633300602436066, "rewards/margins": 0.22525206208229065, "rewards/rejected": -0.4215850830078125, "step": 6296 }, { "epoch": 17.240246406570844, "grad_norm": 4.948207378387451, "learning_rate": 1.3780821917808218e-07, "log_odds_chosen": 1.845703363418579, "log_odds_ratio": -0.31637054681777954, "logits/chosen": 0.9510917067527771, "logits/rejected": 1.0208854675292969, "logps/chosen": -2.4389123916625977, "logps/rejected": -4.185090065002441, "loss": 0.7636, "nll_loss": 0.7320082187652588, "rewards/accuracies": 0.875, "rewards/chosen": -0.24389123916625977, "rewards/margins": 0.17461778223514557, "rewards/rejected": -0.41850900650024414, "step": 6297 }, { "epoch": 17.242984257357975, "grad_norm": 5.831191539764404, "learning_rate": 1.376712328767123e-07, "log_odds_chosen": 3.762755870819092, "log_odds_ratio": -0.22906340658664703, "logits/chosen": 0.7242254614830017, "logits/rejected": 0.7016246318817139, "logps/chosen": -2.0338692665100098, "logps/rejected": -5.6526103019714355, "loss": 0.6551, "nll_loss": 0.6321782469749451, "rewards/accuracies": 0.875, "rewards/chosen": -0.20338694751262665, "rewards/margins": 0.3618741035461426, "rewards/rejected": -0.5652610063552856, "step": 6298 }, { "epoch": 17.245722108145106, "grad_norm": 5.4936299324035645, "learning_rate": 1.3753424657534246e-07, "log_odds_chosen": 1.6143813133239746, "log_odds_ratio": -0.3926107585430145, "logits/chosen": 1.1431410312652588, "logits/rejected": 1.1173436641693115, "logps/chosen": -1.9978574514389038, "logps/rejected": -3.5064380168914795, "loss": 0.6347, "nll_loss": 0.5954056978225708, "rewards/accuracies": 0.875, "rewards/chosen": -0.1997857391834259, "rewards/margins": 0.1508580446243286, "rewards/rejected": -0.3506438136100769, "step": 6299 }, { "epoch": 17.248459958932237, "grad_norm": 5.978806972503662, "learning_rate": 1.373972602739726e-07, "log_odds_chosen": 1.1763685941696167, "log_odds_ratio": -0.3373546600341797, "logits/chosen": 0.7913365364074707, "logits/rejected": 0.7053296566009521, "logps/chosen": -1.5899436473846436, "logps/rejected": -2.627793788909912, "loss": 0.6001, "nll_loss": 0.5663290023803711, "rewards/accuracies": 0.875, "rewards/chosen": -0.1589943766593933, "rewards/margins": 0.10378500819206238, "rewards/rejected": -0.2627794146537781, "step": 6300 }, { "epoch": 17.251197809719372, "grad_norm": 5.059654712677002, "learning_rate": 1.3726027397260273e-07, "log_odds_chosen": 2.2396225929260254, "log_odds_ratio": -0.30004894733428955, "logits/chosen": 0.7104648947715759, "logits/rejected": 0.7667161226272583, "logps/chosen": -2.0520410537719727, "logps/rejected": -4.16879415512085, "loss": 0.6332, "nll_loss": 0.6032058000564575, "rewards/accuracies": 0.875, "rewards/chosen": -0.2052040994167328, "rewards/margins": 0.21167531609535217, "rewards/rejected": -0.41687941551208496, "step": 6301 }, { "epoch": 17.253935660506503, "grad_norm": 4.766302585601807, "learning_rate": 1.3712328767123289e-07, "log_odds_chosen": 2.7448153495788574, "log_odds_ratio": -0.16809368133544922, "logits/chosen": 0.9728797078132629, "logits/rejected": 0.9591959118843079, "logps/chosen": -1.788553237915039, "logps/rejected": -4.369819164276123, "loss": 0.5704, "nll_loss": 0.5535891056060791, "rewards/accuracies": 1.0, "rewards/chosen": -0.17885532975196838, "rewards/margins": 0.2581266164779663, "rewards/rejected": -0.4369819164276123, "step": 6302 }, { "epoch": 17.256673511293634, "grad_norm": 9.08163833618164, "learning_rate": 1.36986301369863e-07, "log_odds_chosen": 3.9619269371032715, "log_odds_ratio": -0.2533988356590271, "logits/chosen": 1.1170454025268555, "logits/rejected": 1.058736801147461, "logps/chosen": -2.8749520778656006, "logps/rejected": -6.736202239990234, "loss": 0.7897, "nll_loss": 0.7643254995346069, "rewards/accuracies": 0.875, "rewards/chosen": -0.2874951958656311, "rewards/margins": 0.38612496852874756, "rewards/rejected": -0.6736202239990234, "step": 6303 }, { "epoch": 17.259411362080765, "grad_norm": 6.0930681228637695, "learning_rate": 1.3684931506849314e-07, "log_odds_chosen": 1.4207723140716553, "log_odds_ratio": -0.3623107671737671, "logits/chosen": 0.894321620464325, "logits/rejected": 0.8590619564056396, "logps/chosen": -1.7020916938781738, "logps/rejected": -2.9526290893554688, "loss": 0.519, "nll_loss": 0.48280084133148193, "rewards/accuracies": 0.875, "rewards/chosen": -0.17020916938781738, "rewards/margins": 0.1250537633895874, "rewards/rejected": -0.2952629327774048, "step": 6304 }, { "epoch": 17.2621492128679, "grad_norm": 7.8239240646362305, "learning_rate": 1.3671232876712326e-07, "log_odds_chosen": 1.5832242965698242, "log_odds_ratio": -0.45501309633255005, "logits/chosen": 0.9463133215904236, "logits/rejected": 0.8809393644332886, "logps/chosen": -3.050079107284546, "logps/rejected": -4.552342891693115, "loss": 0.8177, "nll_loss": 0.7722423076629639, "rewards/accuracies": 0.75, "rewards/chosen": -0.3050079345703125, "rewards/margins": 0.15022636950016022, "rewards/rejected": -0.4552342891693115, "step": 6305 }, { "epoch": 17.26488706365503, "grad_norm": 5.760147571563721, "learning_rate": 1.3657534246575341e-07, "log_odds_chosen": 2.5791168212890625, "log_odds_ratio": -0.24881523847579956, "logits/chosen": 1.02566659450531, "logits/rejected": 1.0725185871124268, "logps/chosen": -2.6733016967773438, "logps/rejected": -5.17563533782959, "loss": 0.7215, "nll_loss": 0.6966429948806763, "rewards/accuracies": 0.875, "rewards/chosen": -0.2673301696777344, "rewards/margins": 0.25023335218429565, "rewards/rejected": -0.5175635814666748, "step": 6306 }, { "epoch": 17.267624914442163, "grad_norm": 6.194395065307617, "learning_rate": 1.3643835616438357e-07, "log_odds_chosen": 2.6368823051452637, "log_odds_ratio": -0.3430585265159607, "logits/chosen": 0.8791208267211914, "logits/rejected": 1.0187095403671265, "logps/chosen": -2.2908854484558105, "logps/rejected": -4.8178181648254395, "loss": 0.6707, "nll_loss": 0.6363580226898193, "rewards/accuracies": 0.875, "rewards/chosen": -0.22908854484558105, "rewards/margins": 0.2526932954788208, "rewards/rejected": -0.48178184032440186, "step": 6307 }, { "epoch": 17.270362765229294, "grad_norm": 5.338157653808594, "learning_rate": 1.363013698630137e-07, "log_odds_chosen": 1.9074711799621582, "log_odds_ratio": -0.27023351192474365, "logits/chosen": 0.5875080823898315, "logits/rejected": 0.5836569666862488, "logps/chosen": -1.6968238353729248, "logps/rejected": -3.4699671268463135, "loss": 0.6279, "nll_loss": 0.6009249687194824, "rewards/accuracies": 1.0, "rewards/chosen": -0.16968238353729248, "rewards/margins": 0.17731432616710663, "rewards/rejected": -0.3469967246055603, "step": 6308 }, { "epoch": 17.27310061601643, "grad_norm": 5.84068489074707, "learning_rate": 1.3616438356164384e-07, "log_odds_chosen": 2.2263565063476562, "log_odds_ratio": -0.1938171684741974, "logits/chosen": 0.8514524698257446, "logits/rejected": 0.9948991537094116, "logps/chosen": -2.2887678146362305, "logps/rejected": -4.414779186248779, "loss": 0.6386, "nll_loss": 0.619200587272644, "rewards/accuracies": 1.0, "rewards/chosen": -0.2288767695426941, "rewards/margins": 0.21260115504264832, "rewards/rejected": -0.44147789478302, "step": 6309 }, { "epoch": 17.27583846680356, "grad_norm": 6.15905237197876, "learning_rate": 1.3602739726027397e-07, "log_odds_chosen": 2.811513662338257, "log_odds_ratio": -0.17828209698200226, "logits/chosen": 0.726703941822052, "logits/rejected": 0.7278667688369751, "logps/chosen": -1.9322794675827026, "logps/rejected": -4.551231861114502, "loss": 0.5487, "nll_loss": 0.5308704376220703, "rewards/accuracies": 0.875, "rewards/chosen": -0.19322794675827026, "rewards/margins": 0.26189523935317993, "rewards/rejected": -0.4551231861114502, "step": 6310 }, { "epoch": 17.27857631759069, "grad_norm": 5.595644950866699, "learning_rate": 1.358904109589041e-07, "log_odds_chosen": 1.2692526578903198, "log_odds_ratio": -0.3567734658718109, "logits/chosen": 0.6509456634521484, "logits/rejected": 0.6715925931930542, "logps/chosen": -2.065077781677246, "logps/rejected": -3.2400407791137695, "loss": 0.6138, "nll_loss": 0.5781280994415283, "rewards/accuracies": 0.875, "rewards/chosen": -0.20650777220726013, "rewards/margins": 0.1174963042140007, "rewards/rejected": -0.3240041136741638, "step": 6311 }, { "epoch": 17.281314168377822, "grad_norm": 6.8930583000183105, "learning_rate": 1.3575342465753422e-07, "log_odds_chosen": 1.2294611930847168, "log_odds_ratio": -0.31033748388290405, "logits/chosen": 0.7519176006317139, "logits/rejected": 0.7301549315452576, "logps/chosen": -1.8665947914123535, "logps/rejected": -2.954263687133789, "loss": 0.616, "nll_loss": 0.584922730922699, "rewards/accuracies": 0.875, "rewards/chosen": -0.18665948510169983, "rewards/margins": 0.10876689851284027, "rewards/rejected": -0.2954263687133789, "step": 6312 }, { "epoch": 17.284052019164957, "grad_norm": 8.11606216430664, "learning_rate": 1.3561643835616437e-07, "log_odds_chosen": 1.9680969715118408, "log_odds_ratio": -0.3826351463794708, "logits/chosen": 0.8315361738204956, "logits/rejected": 0.8068832159042358, "logps/chosen": -2.6856918334960938, "logps/rejected": -4.539166450500488, "loss": 0.679, "nll_loss": 0.6407451033592224, "rewards/accuracies": 0.875, "rewards/chosen": -0.2685691714286804, "rewards/margins": 0.18534749746322632, "rewards/rejected": -0.45391666889190674, "step": 6313 }, { "epoch": 17.286789869952088, "grad_norm": 5.327160835266113, "learning_rate": 1.3547945205479453e-07, "log_odds_chosen": 1.5643136501312256, "log_odds_ratio": -0.2704225182533264, "logits/chosen": 0.7468855381011963, "logits/rejected": 0.7725393772125244, "logps/chosen": -1.804917335510254, "logps/rejected": -3.2407772541046143, "loss": 0.5182, "nll_loss": 0.4911957383155823, "rewards/accuracies": 1.0, "rewards/chosen": -0.18049173057079315, "rewards/margins": 0.14358599483966827, "rewards/rejected": -0.3240777254104614, "step": 6314 }, { "epoch": 17.28952772073922, "grad_norm": 4.894852161407471, "learning_rate": 1.3534246575342465e-07, "log_odds_chosen": 1.477905511856079, "log_odds_ratio": -0.2587794363498688, "logits/chosen": 0.7192856073379517, "logits/rejected": 0.7416658997535706, "logps/chosen": -2.356001377105713, "logps/rejected": -3.7169923782348633, "loss": 0.6799, "nll_loss": 0.6539809703826904, "rewards/accuracies": 1.0, "rewards/chosen": -0.23560012876987457, "rewards/margins": 0.13609911501407623, "rewards/rejected": -0.3716992437839508, "step": 6315 }, { "epoch": 17.29226557152635, "grad_norm": 7.921233654022217, "learning_rate": 1.352054794520548e-07, "log_odds_chosen": 1.1433913707733154, "log_odds_ratio": -0.4662246108055115, "logits/chosen": 0.8110239505767822, "logits/rejected": 0.8811295032501221, "logps/chosen": -2.409968852996826, "logps/rejected": -3.5094783306121826, "loss": 0.648, "nll_loss": 0.601404070854187, "rewards/accuracies": 0.625, "rewards/chosen": -0.24099688231945038, "rewards/margins": 0.1099509447813034, "rewards/rejected": -0.3509478271007538, "step": 6316 }, { "epoch": 17.295003422313485, "grad_norm": 5.115984916687012, "learning_rate": 1.350684931506849e-07, "log_odds_chosen": 2.9282476902008057, "log_odds_ratio": -0.20890775322914124, "logits/chosen": 0.9275403022766113, "logits/rejected": 1.0415074825286865, "logps/chosen": -2.245248556137085, "logps/rejected": -5.061427116394043, "loss": 0.6594, "nll_loss": 0.6384833455085754, "rewards/accuracies": 1.0, "rewards/chosen": -0.2245248407125473, "rewards/margins": 0.2816178798675537, "rewards/rejected": -0.5061427354812622, "step": 6317 }, { "epoch": 17.297741273100616, "grad_norm": 7.389626979827881, "learning_rate": 1.3493150684931505e-07, "log_odds_chosen": 0.805232584476471, "log_odds_ratio": -0.49345237016677856, "logits/chosen": 0.6586788892745972, "logits/rejected": 0.8243572115898132, "logps/chosen": -3.8036205768585205, "logps/rejected": -4.579791069030762, "loss": 0.7454, "nll_loss": 0.6960436105728149, "rewards/accuracies": 0.75, "rewards/chosen": -0.3803620934486389, "rewards/margins": 0.07761707156896591, "rewards/rejected": -0.45797914266586304, "step": 6318 }, { "epoch": 17.300479123887747, "grad_norm": 6.613946437835693, "learning_rate": 1.347945205479452e-07, "log_odds_chosen": 3.8171870708465576, "log_odds_ratio": -0.20017176866531372, "logits/chosen": 0.7468253374099731, "logits/rejected": 0.7673539519309998, "logps/chosen": -2.901334285736084, "logps/rejected": -6.600292205810547, "loss": 0.6229, "nll_loss": 0.6028847694396973, "rewards/accuracies": 1.0, "rewards/chosen": -0.29013341665267944, "rewards/margins": 0.3698958456516266, "rewards/rejected": -0.6600292921066284, "step": 6319 }, { "epoch": 17.30321697467488, "grad_norm": 6.216060161590576, "learning_rate": 1.3465753424657533e-07, "log_odds_chosen": 2.153884172439575, "log_odds_ratio": -0.2870010733604431, "logits/chosen": 0.8358737826347351, "logits/rejected": 0.7313164472579956, "logps/chosen": -1.8190988302230835, "logps/rejected": -3.864058256149292, "loss": 0.6333, "nll_loss": 0.6045501232147217, "rewards/accuracies": 1.0, "rewards/chosen": -0.18190987408161163, "rewards/margins": 0.20449596643447876, "rewards/rejected": -0.3864058554172516, "step": 6320 }, { "epoch": 17.305954825462013, "grad_norm": 6.364597797393799, "learning_rate": 1.3452054794520548e-07, "log_odds_chosen": 1.8871546983718872, "log_odds_ratio": -0.27202847599983215, "logits/chosen": 0.8434196710586548, "logits/rejected": 0.9163371324539185, "logps/chosen": -2.5559167861938477, "logps/rejected": -4.359118461608887, "loss": 0.6524, "nll_loss": 0.6252202987670898, "rewards/accuracies": 1.0, "rewards/chosen": -0.2555916905403137, "rewards/margins": 0.180320143699646, "rewards/rejected": -0.4359118342399597, "step": 6321 }, { "epoch": 17.308692676249144, "grad_norm": 6.563838005065918, "learning_rate": 1.343835616438356e-07, "log_odds_chosen": 1.3482123613357544, "log_odds_ratio": -0.559881865978241, "logits/chosen": 1.148305892944336, "logits/rejected": 1.1825217008590698, "logps/chosen": -2.8748488426208496, "logps/rejected": -4.129899024963379, "loss": 0.6342, "nll_loss": 0.5782508850097656, "rewards/accuracies": 0.625, "rewards/chosen": -0.28748488426208496, "rewards/margins": 0.1255050003528595, "rewards/rejected": -0.41298985481262207, "step": 6322 }, { "epoch": 17.311430527036276, "grad_norm": 10.08560848236084, "learning_rate": 1.3424657534246576e-07, "log_odds_chosen": 0.6099437475204468, "log_odds_ratio": -0.6762472987174988, "logits/chosen": 0.7373871207237244, "logits/rejected": 0.8172122836112976, "logps/chosen": -2.764347791671753, "logps/rejected": -3.2635443210601807, "loss": 0.7574, "nll_loss": 0.6897985339164734, "rewards/accuracies": 0.75, "rewards/chosen": -0.2764347791671753, "rewards/margins": 0.049919649958610535, "rewards/rejected": -0.326354444026947, "step": 6323 }, { "epoch": 17.31416837782341, "grad_norm": 5.714565277099609, "learning_rate": 1.3410958904109586e-07, "log_odds_chosen": 2.498307943344116, "log_odds_ratio": -0.17230752110481262, "logits/chosen": 0.77882981300354, "logits/rejected": 0.8134269714355469, "logps/chosen": -2.1426198482513428, "logps/rejected": -4.458519458770752, "loss": 0.5394, "nll_loss": 0.5221244096755981, "rewards/accuracies": 1.0, "rewards/chosen": -0.214261993765831, "rewards/margins": 0.23158995807170868, "rewards/rejected": -0.44585198163986206, "step": 6324 }, { "epoch": 17.31690622861054, "grad_norm": 6.991768836975098, "learning_rate": 1.33972602739726e-07, "log_odds_chosen": 4.919806003570557, "log_odds_ratio": -0.026741977781057358, "logits/chosen": 1.1090123653411865, "logits/rejected": 1.1903842687606812, "logps/chosen": -2.115342855453491, "logps/rejected": -6.851196765899658, "loss": 0.735, "nll_loss": 0.7323606014251709, "rewards/accuracies": 1.0, "rewards/chosen": -0.2115342915058136, "rewards/margins": 0.4735853374004364, "rewards/rejected": -0.68511962890625, "step": 6325 }, { "epoch": 17.319644079397673, "grad_norm": 6.080654621124268, "learning_rate": 1.3383561643835616e-07, "log_odds_chosen": 2.3628101348876953, "log_odds_ratio": -0.17961236834526062, "logits/chosen": 0.9365823268890381, "logits/rejected": 0.9388378262519836, "logps/chosen": -2.0928711891174316, "logps/rejected": -4.316929817199707, "loss": 0.6725, "nll_loss": 0.6545044183731079, "rewards/accuracies": 1.0, "rewards/chosen": -0.2092871069908142, "rewards/margins": 0.2224058210849762, "rewards/rejected": -0.4316929578781128, "step": 6326 }, { "epoch": 17.322381930184804, "grad_norm": 6.249042510986328, "learning_rate": 1.336986301369863e-07, "log_odds_chosen": 2.5468945503234863, "log_odds_ratio": -0.2676234543323517, "logits/chosen": 0.9508222341537476, "logits/rejected": 0.926753044128418, "logps/chosen": -2.427440881729126, "logps/rejected": -4.872873306274414, "loss": 0.8309, "nll_loss": 0.8041695952415466, "rewards/accuracies": 0.875, "rewards/chosen": -0.2427440732717514, "rewards/margins": 0.24454325437545776, "rewards/rejected": -0.48728734254837036, "step": 6327 }, { "epoch": 17.32511978097194, "grad_norm": 6.197129726409912, "learning_rate": 1.3356164383561644e-07, "log_odds_chosen": 3.1737663745880127, "log_odds_ratio": -0.22980251908302307, "logits/chosen": 0.8487284183502197, "logits/rejected": 0.7987317442893982, "logps/chosen": -2.079477548599243, "logps/rejected": -5.135185241699219, "loss": 0.5517, "nll_loss": 0.5287085771560669, "rewards/accuracies": 1.0, "rewards/chosen": -0.2079477608203888, "rewards/margins": 0.3055708408355713, "rewards/rejected": -0.5135185718536377, "step": 6328 }, { "epoch": 17.32785763175907, "grad_norm": 5.0954203605651855, "learning_rate": 1.3342465753424657e-07, "log_odds_chosen": 2.256582021713257, "log_odds_ratio": -0.18481355905532837, "logits/chosen": 0.6752633452415466, "logits/rejected": 0.6653269529342651, "logps/chosen": -2.228180408477783, "logps/rejected": -4.340168476104736, "loss": 0.6964, "nll_loss": 0.6778908967971802, "rewards/accuracies": 1.0, "rewards/chosen": -0.2228180468082428, "rewards/margins": 0.21119877696037292, "rewards/rejected": -0.4340168237686157, "step": 6329 }, { "epoch": 17.3305954825462, "grad_norm": 5.6090006828308105, "learning_rate": 1.3328767123287672e-07, "log_odds_chosen": 1.9106780290603638, "log_odds_ratio": -0.33442041277885437, "logits/chosen": 0.778048574924469, "logits/rejected": 0.7821535468101501, "logps/chosen": -1.6637444496154785, "logps/rejected": -3.407480239868164, "loss": 0.6227, "nll_loss": 0.5892747044563293, "rewards/accuracies": 0.75, "rewards/chosen": -0.16637445986270905, "rewards/margins": 0.174373596906662, "rewards/rejected": -0.3407480716705322, "step": 6330 }, { "epoch": 17.333333333333332, "grad_norm": 6.785089492797852, "learning_rate": 1.3315068493150685e-07, "log_odds_chosen": 2.007098436355591, "log_odds_ratio": -0.37731775641441345, "logits/chosen": 0.778240442276001, "logits/rejected": 0.7656826972961426, "logps/chosen": -2.743607521057129, "logps/rejected": -4.702050685882568, "loss": 0.7982, "nll_loss": 0.7605023980140686, "rewards/accuracies": 0.875, "rewards/chosen": -0.2743607759475708, "rewards/margins": 0.19584426283836365, "rewards/rejected": -0.47020500898361206, "step": 6331 }, { "epoch": 17.336071184120467, "grad_norm": 4.826193332672119, "learning_rate": 1.3301369863013697e-07, "log_odds_chosen": 2.248227596282959, "log_odds_ratio": -0.25175297260284424, "logits/chosen": 1.0196421146392822, "logits/rejected": 1.0525659322738647, "logps/chosen": -2.165645122528076, "logps/rejected": -4.285672664642334, "loss": 0.6591, "nll_loss": 0.633888840675354, "rewards/accuracies": 0.875, "rewards/chosen": -0.21656452119350433, "rewards/margins": 0.2120027244091034, "rewards/rejected": -0.4285672605037689, "step": 6332 }, { "epoch": 17.338809034907598, "grad_norm": 5.483830451965332, "learning_rate": 1.3287671232876712e-07, "log_odds_chosen": 2.785367250442505, "log_odds_ratio": -0.13828277587890625, "logits/chosen": 0.9548430442810059, "logits/rejected": 1.0384842157363892, "logps/chosen": -1.9823412895202637, "logps/rejected": -4.578849792480469, "loss": 0.6072, "nll_loss": 0.5933295488357544, "rewards/accuracies": 1.0, "rewards/chosen": -0.19823414087295532, "rewards/margins": 0.259650856256485, "rewards/rejected": -0.4578849673271179, "step": 6333 }, { "epoch": 17.34154688569473, "grad_norm": 5.122596263885498, "learning_rate": 1.3273972602739725e-07, "log_odds_chosen": 1.7382510900497437, "log_odds_ratio": -0.20359990000724792, "logits/chosen": 0.7744624018669128, "logits/rejected": 0.804568886756897, "logps/chosen": -2.0085906982421875, "logps/rejected": -3.594902753829956, "loss": 0.6062, "nll_loss": 0.5858864784240723, "rewards/accuracies": 1.0, "rewards/chosen": -0.20085906982421875, "rewards/margins": 0.15863120555877686, "rewards/rejected": -0.3594902753829956, "step": 6334 }, { "epoch": 17.34428473648186, "grad_norm": 6.446891784667969, "learning_rate": 1.326027397260274e-07, "log_odds_chosen": 3.2629377841949463, "log_odds_ratio": -0.22117064893245697, "logits/chosen": 0.9971753358840942, "logits/rejected": 0.9974574446678162, "logps/chosen": -2.6371045112609863, "logps/rejected": -5.802362442016602, "loss": 0.7849, "nll_loss": 0.7627345323562622, "rewards/accuracies": 1.0, "rewards/chosen": -0.2637104392051697, "rewards/margins": 0.31652581691741943, "rewards/rejected": -0.5802363157272339, "step": 6335 }, { "epoch": 17.347022587268995, "grad_norm": 5.209534168243408, "learning_rate": 1.3246575342465753e-07, "log_odds_chosen": 2.777522563934326, "log_odds_ratio": -0.1752321422100067, "logits/chosen": 0.5663412809371948, "logits/rejected": 0.5481749773025513, "logps/chosen": -1.7852632999420166, "logps/rejected": -4.380069255828857, "loss": 0.5855, "nll_loss": 0.5679462552070618, "rewards/accuracies": 1.0, "rewards/chosen": -0.17852632701396942, "rewards/margins": 0.2594805955886841, "rewards/rejected": -0.4380069375038147, "step": 6336 }, { "epoch": 17.349760438056126, "grad_norm": 4.807313442230225, "learning_rate": 1.3232876712328768e-07, "log_odds_chosen": 3.776655435562134, "log_odds_ratio": -0.17933696508407593, "logits/chosen": 0.8044042587280273, "logits/rejected": 0.8107767105102539, "logps/chosen": -1.5253782272338867, "logps/rejected": -5.010898590087891, "loss": 0.6598, "nll_loss": 0.6418931484222412, "rewards/accuracies": 1.0, "rewards/chosen": -0.15253782272338867, "rewards/margins": 0.34855204820632935, "rewards/rejected": -0.501089870929718, "step": 6337 }, { "epoch": 17.352498288843258, "grad_norm": 6.552398681640625, "learning_rate": 1.321917808219178e-07, "log_odds_chosen": 2.0469465255737305, "log_odds_ratio": -0.25860145688056946, "logits/chosen": 0.9674042463302612, "logits/rejected": 0.9376291036605835, "logps/chosen": -2.2689597606658936, "logps/rejected": -4.167494297027588, "loss": 0.5359, "nll_loss": 0.51006680727005, "rewards/accuracies": 0.875, "rewards/chosen": -0.2268959879875183, "rewards/margins": 0.1898534893989563, "rewards/rejected": -0.4167494773864746, "step": 6338 }, { "epoch": 17.35523613963039, "grad_norm": 5.465068817138672, "learning_rate": 1.3205479452054793e-07, "log_odds_chosen": 1.087457537651062, "log_odds_ratio": -0.5050359964370728, "logits/chosen": 0.9204605221748352, "logits/rejected": 0.9617469310760498, "logps/chosen": -2.652273654937744, "logps/rejected": -3.6843881607055664, "loss": 0.6646, "nll_loss": 0.6141446828842163, "rewards/accuracies": 0.75, "rewards/chosen": -0.26522737741470337, "rewards/margins": 0.10321144759654999, "rewards/rejected": -0.36843881011009216, "step": 6339 }, { "epoch": 17.357973990417523, "grad_norm": 6.532071590423584, "learning_rate": 1.3191780821917808e-07, "log_odds_chosen": 1.7794750928878784, "log_odds_ratio": -0.24269896745681763, "logits/chosen": 0.6650571823120117, "logits/rejected": 0.6412942409515381, "logps/chosen": -1.948758840560913, "logps/rejected": -3.57173490524292, "loss": 0.5717, "nll_loss": 0.5474519729614258, "rewards/accuracies": 1.0, "rewards/chosen": -0.19487589597702026, "rewards/margins": 0.1622975766658783, "rewards/rejected": -0.35717344284057617, "step": 6340 }, { "epoch": 17.360711841204655, "grad_norm": 5.932488441467285, "learning_rate": 1.317808219178082e-07, "log_odds_chosen": 2.2557194232940674, "log_odds_ratio": -0.3117920756340027, "logits/chosen": 0.7563701868057251, "logits/rejected": 0.8619792461395264, "logps/chosen": -2.6594104766845703, "logps/rejected": -4.831323623657227, "loss": 0.7424, "nll_loss": 0.7111815214157104, "rewards/accuracies": 0.875, "rewards/chosen": -0.2659410536289215, "rewards/margins": 0.21719130873680115, "rewards/rejected": -0.48313236236572266, "step": 6341 }, { "epoch": 17.363449691991786, "grad_norm": 4.303171634674072, "learning_rate": 1.3164383561643836e-07, "log_odds_chosen": 3.160637617111206, "log_odds_ratio": -0.1320403665304184, "logits/chosen": 0.931311845779419, "logits/rejected": 0.9511188268661499, "logps/chosen": -1.8147614002227783, "logps/rejected": -4.771914482116699, "loss": 0.5554, "nll_loss": 0.5422138571739197, "rewards/accuracies": 1.0, "rewards/chosen": -0.1814761459827423, "rewards/margins": 0.29571533203125, "rewards/rejected": -0.4771914780139923, "step": 6342 }, { "epoch": 17.366187542778917, "grad_norm": 5.527085304260254, "learning_rate": 1.3150684931506849e-07, "log_odds_chosen": 1.931018590927124, "log_odds_ratio": -0.1892470419406891, "logits/chosen": 0.7689824104309082, "logits/rejected": 0.7747370004653931, "logps/chosen": -2.9371163845062256, "logps/rejected": -4.7722649574279785, "loss": 0.8165, "nll_loss": 0.7975746393203735, "rewards/accuracies": 1.0, "rewards/chosen": -0.2937116324901581, "rewards/margins": 0.18351484835147858, "rewards/rejected": -0.47722649574279785, "step": 6343 }, { "epoch": 17.36892539356605, "grad_norm": 6.38883113861084, "learning_rate": 1.313698630136986e-07, "log_odds_chosen": 1.7925704717636108, "log_odds_ratio": -0.30590227246284485, "logits/chosen": 0.845166802406311, "logits/rejected": 0.9003716111183167, "logps/chosen": -2.070871591567993, "logps/rejected": -3.6280322074890137, "loss": 0.6623, "nll_loss": 0.6317418813705444, "rewards/accuracies": 0.75, "rewards/chosen": -0.20708715915679932, "rewards/margins": 0.15571606159210205, "rewards/rejected": -0.36280322074890137, "step": 6344 }, { "epoch": 17.371663244353183, "grad_norm": 7.592992782592773, "learning_rate": 1.3123287671232876e-07, "log_odds_chosen": 0.648897647857666, "log_odds_ratio": -0.6050294637680054, "logits/chosen": 0.728969395160675, "logits/rejected": 0.763299822807312, "logps/chosen": -2.4311749935150146, "logps/rejected": -3.0630481243133545, "loss": 0.7379, "nll_loss": 0.6773841381072998, "rewards/accuracies": 0.75, "rewards/chosen": -0.2431175410747528, "rewards/margins": 0.06318729370832443, "rewards/rejected": -0.30630481243133545, "step": 6345 }, { "epoch": 17.374401095140314, "grad_norm": 5.320627212524414, "learning_rate": 1.310958904109589e-07, "log_odds_chosen": 3.186408281326294, "log_odds_ratio": -0.12601861357688904, "logits/chosen": 0.7409818172454834, "logits/rejected": 0.7777403593063354, "logps/chosen": -2.038003921508789, "logps/rejected": -4.992623329162598, "loss": 0.6571, "nll_loss": 0.6445145606994629, "rewards/accuracies": 1.0, "rewards/chosen": -0.20380038022994995, "rewards/margins": 0.2954619228839874, "rewards/rejected": -0.49926233291625977, "step": 6346 }, { "epoch": 17.377138945927445, "grad_norm": 5.599518299102783, "learning_rate": 1.3095890410958904e-07, "log_odds_chosen": 2.5448474884033203, "log_odds_ratio": -0.22102244198322296, "logits/chosen": 0.7050408720970154, "logits/rejected": 0.7075567245483398, "logps/chosen": -1.886655330657959, "logps/rejected": -4.299123764038086, "loss": 0.6366, "nll_loss": 0.6144803762435913, "rewards/accuracies": 0.875, "rewards/chosen": -0.18866553902626038, "rewards/margins": 0.24124687910079956, "rewards/rejected": -0.42991238832473755, "step": 6347 }, { "epoch": 17.37987679671458, "grad_norm": 6.217626094818115, "learning_rate": 1.3082191780821917e-07, "log_odds_chosen": 1.9339981079101562, "log_odds_ratio": -0.2573361396789551, "logits/chosen": 0.7073829174041748, "logits/rejected": 0.7190682291984558, "logps/chosen": -2.406682014465332, "logps/rejected": -4.247169494628906, "loss": 0.6812, "nll_loss": 0.6554966568946838, "rewards/accuracies": 1.0, "rewards/chosen": -0.24066820740699768, "rewards/margins": 0.18404875695705414, "rewards/rejected": -0.4247169494628906, "step": 6348 }, { "epoch": 17.38261464750171, "grad_norm": 8.26856803894043, "learning_rate": 1.3068493150684932e-07, "log_odds_chosen": 1.7611539363861084, "log_odds_ratio": -0.38698530197143555, "logits/chosen": 1.1171255111694336, "logits/rejected": 1.1263877153396606, "logps/chosen": -2.295320749282837, "logps/rejected": -3.8968653678894043, "loss": 0.6448, "nll_loss": 0.6061043739318848, "rewards/accuracies": 0.75, "rewards/chosen": -0.22953210771083832, "rewards/margins": 0.16015446186065674, "rewards/rejected": -0.38968658447265625, "step": 6349 }, { "epoch": 17.385352498288842, "grad_norm": 5.498734474182129, "learning_rate": 1.3054794520547947e-07, "log_odds_chosen": 1.7222620248794556, "log_odds_ratio": -0.4081304669380188, "logits/chosen": 0.8234528303146362, "logits/rejected": 0.852938175201416, "logps/chosen": -2.364790439605713, "logps/rejected": -3.9780220985412598, "loss": 0.6509, "nll_loss": 0.6100762486457825, "rewards/accuracies": 0.75, "rewards/chosen": -0.2364790439605713, "rewards/margins": 0.1613231599330902, "rewards/rejected": -0.3978022336959839, "step": 6350 }, { "epoch": 17.388090349075977, "grad_norm": 5.750826358795166, "learning_rate": 1.3041095890410957e-07, "log_odds_chosen": 2.902553081512451, "log_odds_ratio": -0.23600398004055023, "logits/chosen": 0.7678754329681396, "logits/rejected": 0.7102050185203552, "logps/chosen": -1.7881009578704834, "logps/rejected": -4.389339447021484, "loss": 0.5539, "nll_loss": 0.5302892923355103, "rewards/accuracies": 1.0, "rewards/chosen": -0.17881008982658386, "rewards/margins": 0.2601238489151001, "rewards/rejected": -0.43893393874168396, "step": 6351 }, { "epoch": 17.39082819986311, "grad_norm": 7.208788871765137, "learning_rate": 1.3027397260273972e-07, "log_odds_chosen": 1.7088830471038818, "log_odds_ratio": -0.416871041059494, "logits/chosen": 1.0223891735076904, "logits/rejected": 1.007014274597168, "logps/chosen": -2.3628907203674316, "logps/rejected": -3.9464821815490723, "loss": 0.7425, "nll_loss": 0.7007997035980225, "rewards/accuracies": 0.875, "rewards/chosen": -0.23628908395767212, "rewards/margins": 0.15835914015769958, "rewards/rejected": -0.3946482241153717, "step": 6352 }, { "epoch": 17.39356605065024, "grad_norm": 5.494036674499512, "learning_rate": 1.3013698630136985e-07, "log_odds_chosen": 2.2045888900756836, "log_odds_ratio": -0.19394370913505554, "logits/chosen": 0.8176323771476746, "logits/rejected": 0.732297420501709, "logps/chosen": -1.5001646280288696, "logps/rejected": -3.4838714599609375, "loss": 0.6503, "nll_loss": 0.630931556224823, "rewards/accuracies": 1.0, "rewards/chosen": -0.15001647174358368, "rewards/margins": 0.19837068021297455, "rewards/rejected": -0.3483871817588806, "step": 6353 }, { "epoch": 17.39630390143737, "grad_norm": 5.653316020965576, "learning_rate": 1.3e-07, "log_odds_chosen": 1.6218998432159424, "log_odds_ratio": -0.2894287705421448, "logits/chosen": 0.8695032596588135, "logits/rejected": 0.8943372368812561, "logps/chosen": -1.9313656091690063, "logps/rejected": -3.4611949920654297, "loss": 0.524, "nll_loss": 0.4950902462005615, "rewards/accuracies": 0.875, "rewards/chosen": -0.1931365728378296, "rewards/margins": 0.1529829353094101, "rewards/rejected": -0.3461195230484009, "step": 6354 }, { "epoch": 17.399041752224505, "grad_norm": 4.894699573516846, "learning_rate": 1.2986301369863013e-07, "log_odds_chosen": 2.2954344749450684, "log_odds_ratio": -0.1819058358669281, "logits/chosen": 0.7883283495903015, "logits/rejected": 0.7780527472496033, "logps/chosen": -2.3032987117767334, "logps/rejected": -4.473386287689209, "loss": 0.6685, "nll_loss": 0.6502598524093628, "rewards/accuracies": 1.0, "rewards/chosen": -0.23032987117767334, "rewards/margins": 0.21700873970985413, "rewards/rejected": -0.44733861088752747, "step": 6355 }, { "epoch": 17.401779603011637, "grad_norm": 4.9007487297058105, "learning_rate": 1.2972602739726028e-07, "log_odds_chosen": 2.836548328399658, "log_odds_ratio": -0.15072214603424072, "logits/chosen": 0.9532647132873535, "logits/rejected": 0.9265796542167664, "logps/chosen": -2.3425540924072266, "logps/rejected": -5.07723331451416, "loss": 0.6602, "nll_loss": 0.6451001763343811, "rewards/accuracies": 1.0, "rewards/chosen": -0.23425543308258057, "rewards/margins": 0.27346792817115784, "rewards/rejected": -0.507723331451416, "step": 6356 }, { "epoch": 17.404517453798768, "grad_norm": 5.252750396728516, "learning_rate": 1.2958904109589043e-07, "log_odds_chosen": 3.4112586975097656, "log_odds_ratio": -0.1436835527420044, "logits/chosen": 1.1042543649673462, "logits/rejected": 1.1467664241790771, "logps/chosen": -2.087405204772949, "logps/rejected": -5.3142828941345215, "loss": 0.6589, "nll_loss": 0.6445106863975525, "rewards/accuracies": 1.0, "rewards/chosen": -0.20874053239822388, "rewards/margins": 0.3226877450942993, "rewards/rejected": -0.531428337097168, "step": 6357 }, { "epoch": 17.4072553045859, "grad_norm": 5.491335391998291, "learning_rate": 1.2945205479452053e-07, "log_odds_chosen": 3.1576333045959473, "log_odds_ratio": -0.17133723199367523, "logits/chosen": 0.8061087131500244, "logits/rejected": 0.817237138748169, "logps/chosen": -2.1418378353118896, "logps/rejected": -5.170519828796387, "loss": 0.8253, "nll_loss": 0.8081949949264526, "rewards/accuracies": 0.875, "rewards/chosen": -0.2141837775707245, "rewards/margins": 0.3028682470321655, "rewards/rejected": -0.5170519948005676, "step": 6358 }, { "epoch": 17.409993155373034, "grad_norm": 7.947627067565918, "learning_rate": 1.2931506849315068e-07, "log_odds_chosen": 2.135427474975586, "log_odds_ratio": -0.28127437829971313, "logits/chosen": 1.013301134109497, "logits/rejected": 0.966909646987915, "logps/chosen": -1.7507108449935913, "logps/rejected": -3.758998394012451, "loss": 0.6241, "nll_loss": 0.5959368944168091, "rewards/accuracies": 1.0, "rewards/chosen": -0.1750710904598236, "rewards/margins": 0.20082877576351166, "rewards/rejected": -0.3758998513221741, "step": 6359 }, { "epoch": 17.412731006160165, "grad_norm": 5.249411582946777, "learning_rate": 1.291780821917808e-07, "log_odds_chosen": 2.2522435188293457, "log_odds_ratio": -0.30318447947502136, "logits/chosen": 0.6484360694885254, "logits/rejected": 0.7033021450042725, "logps/chosen": -1.9489316940307617, "logps/rejected": -4.1059699058532715, "loss": 0.5856, "nll_loss": 0.5552693605422974, "rewards/accuracies": 0.875, "rewards/chosen": -0.19489318132400513, "rewards/margins": 0.2157038301229477, "rewards/rejected": -0.410597026348114, "step": 6360 }, { "epoch": 17.415468856947296, "grad_norm": 5.990229606628418, "learning_rate": 1.2904109589041096e-07, "log_odds_chosen": 2.3539185523986816, "log_odds_ratio": -0.3454062342643738, "logits/chosen": 1.1147247552871704, "logits/rejected": 1.1707451343536377, "logps/chosen": -2.8792102336883545, "logps/rejected": -5.182558536529541, "loss": 0.702, "nll_loss": 0.6674195528030396, "rewards/accuracies": 0.75, "rewards/chosen": -0.2879210114479065, "rewards/margins": 0.2303348332643509, "rewards/rejected": -0.5182558298110962, "step": 6361 }, { "epoch": 17.418206707734427, "grad_norm": 6.98271369934082, "learning_rate": 1.2890410958904108e-07, "log_odds_chosen": 2.2613778114318848, "log_odds_ratio": -0.3512169420719147, "logits/chosen": 0.646039605140686, "logits/rejected": 0.6386361122131348, "logps/chosen": -2.391538619995117, "logps/rejected": -4.549946308135986, "loss": 0.6823, "nll_loss": 0.6472119688987732, "rewards/accuracies": 0.75, "rewards/chosen": -0.23915386199951172, "rewards/margins": 0.21584075689315796, "rewards/rejected": -0.4549946188926697, "step": 6362 }, { "epoch": 17.420944558521562, "grad_norm": 11.800250053405762, "learning_rate": 1.2876712328767124e-07, "log_odds_chosen": 2.8709540367126465, "log_odds_ratio": -0.1543852984905243, "logits/chosen": 0.9417200088500977, "logits/rejected": 0.9774397611618042, "logps/chosen": -2.5638444423675537, "logps/rejected": -5.302070140838623, "loss": 0.7305, "nll_loss": 0.7150481343269348, "rewards/accuracies": 1.0, "rewards/chosen": -0.2563844323158264, "rewards/margins": 0.2738226056098938, "rewards/rejected": -0.5302070379257202, "step": 6363 }, { "epoch": 17.423682409308693, "grad_norm": 7.087695121765137, "learning_rate": 1.2863013698630136e-07, "log_odds_chosen": 2.053490161895752, "log_odds_ratio": -0.5058876276016235, "logits/chosen": 0.6516547203063965, "logits/rejected": 0.6127217411994934, "logps/chosen": -3.1339714527130127, "logps/rejected": -5.052217960357666, "loss": 0.6997, "nll_loss": 0.6491503119468689, "rewards/accuracies": 0.75, "rewards/chosen": -0.3133971691131592, "rewards/margins": 0.19182464480400085, "rewards/rejected": -0.5052218437194824, "step": 6364 }, { "epoch": 17.426420260095824, "grad_norm": 4.758518695831299, "learning_rate": 1.284931506849315e-07, "log_odds_chosen": 2.3052992820739746, "log_odds_ratio": -0.23467382788658142, "logits/chosen": 0.8736050724983215, "logits/rejected": 0.9198390245437622, "logps/chosen": -2.3380589485168457, "logps/rejected": -4.560713768005371, "loss": 0.6534, "nll_loss": 0.6299161911010742, "rewards/accuracies": 1.0, "rewards/chosen": -0.23380590975284576, "rewards/margins": 0.22226548194885254, "rewards/rejected": -0.4560713768005371, "step": 6365 }, { "epoch": 17.429158110882955, "grad_norm": 5.178077697753906, "learning_rate": 1.2835616438356164e-07, "log_odds_chosen": 1.7235654592514038, "log_odds_ratio": -0.2717190682888031, "logits/chosen": 0.805138885974884, "logits/rejected": 0.7849664688110352, "logps/chosen": -2.0873169898986816, "logps/rejected": -3.681892156600952, "loss": 0.5723, "nll_loss": 0.5451414585113525, "rewards/accuracies": 0.875, "rewards/chosen": -0.20873169600963593, "rewards/margins": 0.15945754945278168, "rewards/rejected": -0.3681892454624176, "step": 6366 }, { "epoch": 17.43189596167009, "grad_norm": 5.5858330726623535, "learning_rate": 1.2821917808219176e-07, "log_odds_chosen": 2.543147087097168, "log_odds_ratio": -0.19212937355041504, "logits/chosen": 0.9578035473823547, "logits/rejected": 0.9986187219619751, "logps/chosen": -1.9198436737060547, "logps/rejected": -4.32320499420166, "loss": 0.7748, "nll_loss": 0.755617618560791, "rewards/accuracies": 1.0, "rewards/chosen": -0.1919843852519989, "rewards/margins": 0.2403361201286316, "rewards/rejected": -0.4323204755783081, "step": 6367 }, { "epoch": 17.43463381245722, "grad_norm": 6.5537848472595215, "learning_rate": 1.2808219178082192e-07, "log_odds_chosen": 0.7424490451812744, "log_odds_ratio": -0.6151899099349976, "logits/chosen": 0.6675496101379395, "logits/rejected": 0.7624166011810303, "logps/chosen": -2.7027289867401123, "logps/rejected": -3.396294593811035, "loss": 0.6819, "nll_loss": 0.6203970313072205, "rewards/accuracies": 0.75, "rewards/chosen": -0.2702729105949402, "rewards/margins": 0.06935658305883408, "rewards/rejected": -0.33962947130203247, "step": 6368 }, { "epoch": 17.437371663244353, "grad_norm": 6.05948543548584, "learning_rate": 1.2794520547945207e-07, "log_odds_chosen": 2.1872823238372803, "log_odds_ratio": -0.26281464099884033, "logits/chosen": 0.7328200936317444, "logits/rejected": 0.6898232102394104, "logps/chosen": -2.004852056503296, "logps/rejected": -4.044926166534424, "loss": 0.6112, "nll_loss": 0.5849286317825317, "rewards/accuracies": 0.875, "rewards/chosen": -0.2004851996898651, "rewards/margins": 0.20400738716125488, "rewards/rejected": -0.4044926166534424, "step": 6369 }, { "epoch": 17.440109514031484, "grad_norm": 6.548836708068848, "learning_rate": 1.278082191780822e-07, "log_odds_chosen": 1.350541114807129, "log_odds_ratio": -0.5346077680587769, "logits/chosen": 0.7952830791473389, "logits/rejected": 0.7976895570755005, "logps/chosen": -2.81862473487854, "logps/rejected": -4.136861324310303, "loss": 0.6521, "nll_loss": 0.598670482635498, "rewards/accuracies": 0.75, "rewards/chosen": -0.2818624973297119, "rewards/margins": 0.13182362914085388, "rewards/rejected": -0.4136861264705658, "step": 6370 }, { "epoch": 17.44284736481862, "grad_norm": 5.384687900543213, "learning_rate": 1.2767123287671232e-07, "log_odds_chosen": 4.317075729370117, "log_odds_ratio": -0.08014161139726639, "logits/chosen": 1.045236349105835, "logits/rejected": 1.112247347831726, "logps/chosen": -2.3569302558898926, "logps/rejected": -6.488214492797852, "loss": 0.6125, "nll_loss": 0.6044939756393433, "rewards/accuracies": 1.0, "rewards/chosen": -0.23569300770759583, "rewards/margins": 0.41312846541404724, "rewards/rejected": -0.6488214731216431, "step": 6371 }, { "epoch": 17.44558521560575, "grad_norm": 5.997472763061523, "learning_rate": 1.2753424657534245e-07, "log_odds_chosen": 3.0756680965423584, "log_odds_ratio": -0.22526219487190247, "logits/chosen": 0.8527526259422302, "logits/rejected": 0.9342146515846252, "logps/chosen": -2.019927501678467, "logps/rejected": -4.951882839202881, "loss": 0.6193, "nll_loss": 0.5967715382575989, "rewards/accuracies": 1.0, "rewards/chosen": -0.20199275016784668, "rewards/margins": 0.29319554567337036, "rewards/rejected": -0.49518829584121704, "step": 6372 }, { "epoch": 17.44832306639288, "grad_norm": 5.572810649871826, "learning_rate": 1.273972602739726e-07, "log_odds_chosen": 2.1972789764404297, "log_odds_ratio": -0.24416899681091309, "logits/chosen": 0.9763004779815674, "logits/rejected": 1.026721477508545, "logps/chosen": -1.5958236455917358, "logps/rejected": -3.614935874938965, "loss": 0.4721, "nll_loss": 0.44772642850875854, "rewards/accuracies": 1.0, "rewards/chosen": -0.15958237648010254, "rewards/margins": 0.20191124081611633, "rewards/rejected": -0.3614935874938965, "step": 6373 }, { "epoch": 17.451060917180012, "grad_norm": 5.062425136566162, "learning_rate": 1.2726027397260272e-07, "log_odds_chosen": 2.4672908782958984, "log_odds_ratio": -0.3230438828468323, "logits/chosen": 0.8764788508415222, "logits/rejected": 0.9287086129188538, "logps/chosen": -2.3376073837280273, "logps/rejected": -4.736293315887451, "loss": 0.7182, "nll_loss": 0.6858543753623962, "rewards/accuracies": 0.75, "rewards/chosen": -0.23376072943210602, "rewards/margins": 0.2398686408996582, "rewards/rejected": -0.4736293852329254, "step": 6374 }, { "epoch": 17.453798767967147, "grad_norm": 5.49802827835083, "learning_rate": 1.2712328767123288e-07, "log_odds_chosen": 1.7910873889923096, "log_odds_ratio": -0.22786742448806763, "logits/chosen": 0.8316518664360046, "logits/rejected": 0.8235406279563904, "logps/chosen": -1.9193274974822998, "logps/rejected": -3.5579068660736084, "loss": 0.5588, "nll_loss": 0.5360579490661621, "rewards/accuracies": 0.875, "rewards/chosen": -0.1919327676296234, "rewards/margins": 0.16385793685913086, "rewards/rejected": -0.3557907044887543, "step": 6375 }, { "epoch": 17.456536618754278, "grad_norm": 6.327114105224609, "learning_rate": 1.2698630136986303e-07, "log_odds_chosen": 1.253851294517517, "log_odds_ratio": -0.41538795828819275, "logits/chosen": 0.8753120303153992, "logits/rejected": 0.9130789041519165, "logps/chosen": -2.144387722015381, "logps/rejected": -3.3649988174438477, "loss": 0.5631, "nll_loss": 0.5215739011764526, "rewards/accuracies": 0.75, "rewards/chosen": -0.2144387662410736, "rewards/margins": 0.1220611184835434, "rewards/rejected": -0.3364998996257782, "step": 6376 }, { "epoch": 17.45927446954141, "grad_norm": 5.948266983032227, "learning_rate": 1.2684931506849315e-07, "log_odds_chosen": 2.330535411834717, "log_odds_ratio": -0.2428348958492279, "logits/chosen": 0.6150343418121338, "logits/rejected": 0.5593434572219849, "logps/chosen": -2.6704254150390625, "logps/rejected": -4.874258995056152, "loss": 0.61, "nll_loss": 0.5856685638427734, "rewards/accuracies": 1.0, "rewards/chosen": -0.26704251766204834, "rewards/margins": 0.2203833907842636, "rewards/rejected": -0.48742595314979553, "step": 6377 }, { "epoch": 17.462012320328544, "grad_norm": 4.698742866516113, "learning_rate": 1.2671232876712328e-07, "log_odds_chosen": 2.7434043884277344, "log_odds_ratio": -0.19620876014232635, "logits/chosen": 0.6330293416976929, "logits/rejected": 0.6393818855285645, "logps/chosen": -2.50228214263916, "logps/rejected": -5.1008620262146, "loss": 0.6779, "nll_loss": 0.6583148241043091, "rewards/accuracies": 1.0, "rewards/chosen": -0.25022822618484497, "rewards/margins": 0.25985798239707947, "rewards/rejected": -0.510086178779602, "step": 6378 }, { "epoch": 17.464750171115675, "grad_norm": 4.9769134521484375, "learning_rate": 1.265753424657534e-07, "log_odds_chosen": 3.2446165084838867, "log_odds_ratio": -0.13361290097236633, "logits/chosen": 1.074798345565796, "logits/rejected": 1.0843558311462402, "logps/chosen": -1.6545987129211426, "logps/rejected": -4.711783409118652, "loss": 0.5599, "nll_loss": 0.5465561151504517, "rewards/accuracies": 1.0, "rewards/chosen": -0.16545987129211426, "rewards/margins": 0.30571845173835754, "rewards/rejected": -0.4711783230304718, "step": 6379 }, { "epoch": 17.467488021902806, "grad_norm": 6.405020236968994, "learning_rate": 1.2643835616438356e-07, "log_odds_chosen": 1.160722017288208, "log_odds_ratio": -0.5461132526397705, "logits/chosen": 0.986149787902832, "logits/rejected": 1.0057218074798584, "logps/chosen": -2.6496036052703857, "logps/rejected": -3.741414785385132, "loss": 0.6821, "nll_loss": 0.6274746060371399, "rewards/accuracies": 0.75, "rewards/chosen": -0.264960378408432, "rewards/margins": 0.10918110609054565, "rewards/rejected": -0.37414148449897766, "step": 6380 }, { "epoch": 17.470225872689937, "grad_norm": 5.7330756187438965, "learning_rate": 1.2630136986301368e-07, "log_odds_chosen": 2.242905616760254, "log_odds_ratio": -0.21644805371761322, "logits/chosen": 0.8014421463012695, "logits/rejected": 0.8575846552848816, "logps/chosen": -2.0438125133514404, "logps/rejected": -4.18243932723999, "loss": 0.5676, "nll_loss": 0.5459390878677368, "rewards/accuracies": 1.0, "rewards/chosen": -0.20438125729560852, "rewards/margins": 0.21386267244815826, "rewards/rejected": -0.418243944644928, "step": 6381 }, { "epoch": 17.472963723477072, "grad_norm": 6.340271472930908, "learning_rate": 1.2616438356164383e-07, "log_odds_chosen": 1.3621551990509033, "log_odds_ratio": -0.33440712094306946, "logits/chosen": 0.7624372839927673, "logits/rejected": 0.7940577268600464, "logps/chosen": -2.399981737136841, "logps/rejected": -3.601665735244751, "loss": 0.625, "nll_loss": 0.5915462970733643, "rewards/accuracies": 1.0, "rewards/chosen": -0.23999817669391632, "rewards/margins": 0.12016841769218445, "rewards/rejected": -0.3601665794849396, "step": 6382 }, { "epoch": 17.475701574264203, "grad_norm": 5.963746070861816, "learning_rate": 1.2602739726027399e-07, "log_odds_chosen": 2.004254102706909, "log_odds_ratio": -0.19171267747879028, "logits/chosen": 0.8262139558792114, "logits/rejected": 0.8322105407714844, "logps/chosen": -1.9959697723388672, "logps/rejected": -3.843660354614258, "loss": 0.5432, "nll_loss": 0.5240041017532349, "rewards/accuracies": 1.0, "rewards/chosen": -0.19959700107574463, "rewards/margins": 0.18476901948451996, "rewards/rejected": -0.3843660354614258, "step": 6383 }, { "epoch": 17.478439425051334, "grad_norm": 4.772543430328369, "learning_rate": 1.2589041095890409e-07, "log_odds_chosen": 3.0831661224365234, "log_odds_ratio": -0.1791444569826126, "logits/chosen": 0.9569947719573975, "logits/rejected": 0.9796054363250732, "logps/chosen": -1.9435266256332397, "logps/rejected": -4.879607200622559, "loss": 0.6076, "nll_loss": 0.5896666646003723, "rewards/accuracies": 1.0, "rewards/chosen": -0.19435268640518188, "rewards/margins": 0.29360806941986084, "rewards/rejected": -0.4879607558250427, "step": 6384 }, { "epoch": 17.481177275838466, "grad_norm": 5.778163909912109, "learning_rate": 1.2575342465753424e-07, "log_odds_chosen": 2.6298298835754395, "log_odds_ratio": -0.21815519034862518, "logits/chosen": 1.0992635488510132, "logits/rejected": 1.100803017616272, "logps/chosen": -1.5193796157836914, "logps/rejected": -3.948961019515991, "loss": 0.4947, "nll_loss": 0.47293388843536377, "rewards/accuracies": 0.875, "rewards/chosen": -0.15193796157836914, "rewards/margins": 0.24295812845230103, "rewards/rejected": -0.39489609003067017, "step": 6385 }, { "epoch": 17.4839151266256, "grad_norm": 5.844307899475098, "learning_rate": 1.2561643835616436e-07, "log_odds_chosen": 1.0574361085891724, "log_odds_ratio": -0.4283989667892456, "logits/chosen": 1.1058626174926758, "logits/rejected": 1.0324801206588745, "logps/chosen": -1.8720588684082031, "logps/rejected": -2.8250651359558105, "loss": 0.5943, "nll_loss": 0.5515062808990479, "rewards/accuracies": 0.75, "rewards/chosen": -0.18720589578151703, "rewards/margins": 0.09530064463615417, "rewards/rejected": -0.2825065553188324, "step": 6386 }, { "epoch": 17.48665297741273, "grad_norm": 5.99423885345459, "learning_rate": 1.2547945205479452e-07, "log_odds_chosen": 2.4854273796081543, "log_odds_ratio": -0.3002573549747467, "logits/chosen": 0.7846125960350037, "logits/rejected": 0.7997812628746033, "logps/chosen": -2.131695508956909, "logps/rejected": -4.520491600036621, "loss": 0.7429, "nll_loss": 0.712903618812561, "rewards/accuracies": 0.875, "rewards/chosen": -0.21316954493522644, "rewards/margins": 0.23887962102890015, "rewards/rejected": -0.452049195766449, "step": 6387 }, { "epoch": 17.489390828199863, "grad_norm": 8.692058563232422, "learning_rate": 1.2534246575342467e-07, "log_odds_chosen": 2.470742702484131, "log_odds_ratio": -0.2043745070695877, "logits/chosen": 0.9773117303848267, "logits/rejected": 1.0041894912719727, "logps/chosen": -2.7290236949920654, "logps/rejected": -5.126734256744385, "loss": 0.7334, "nll_loss": 0.7129854559898376, "rewards/accuracies": 1.0, "rewards/chosen": -0.27290236949920654, "rewards/margins": 0.23977109789848328, "rewards/rejected": -0.5126734972000122, "step": 6388 }, { "epoch": 17.492128678986994, "grad_norm": 5.108413219451904, "learning_rate": 1.252054794520548e-07, "log_odds_chosen": 2.3444418907165527, "log_odds_ratio": -0.19081716239452362, "logits/chosen": 0.9850261807441711, "logits/rejected": 1.0322303771972656, "logps/chosen": -2.382577419281006, "logps/rejected": -4.632070541381836, "loss": 0.6529, "nll_loss": 0.6338487267494202, "rewards/accuracies": 1.0, "rewards/chosen": -0.2382577657699585, "rewards/margins": 0.22494937479496002, "rewards/rejected": -0.4632071256637573, "step": 6389 }, { "epoch": 17.49486652977413, "grad_norm": 7.381885528564453, "learning_rate": 1.2506849315068494e-07, "log_odds_chosen": 2.149988889694214, "log_odds_ratio": -0.17332518100738525, "logits/chosen": 0.8461079597473145, "logits/rejected": 0.7805151343345642, "logps/chosen": -2.6911251544952393, "logps/rejected": -4.752910614013672, "loss": 0.8178, "nll_loss": 0.8004395961761475, "rewards/accuracies": 1.0, "rewards/chosen": -0.2691125273704529, "rewards/margins": 0.20617856085300446, "rewards/rejected": -0.47529110312461853, "step": 6390 }, { "epoch": 17.49760438056126, "grad_norm": 6.181737899780273, "learning_rate": 1.2493150684931507e-07, "log_odds_chosen": 2.742237091064453, "log_odds_ratio": -0.23104143142700195, "logits/chosen": 0.8527319431304932, "logits/rejected": 0.8683645129203796, "logps/chosen": -2.120884895324707, "logps/rejected": -4.7355170249938965, "loss": 0.6147, "nll_loss": 0.5915656089782715, "rewards/accuracies": 0.875, "rewards/chosen": -0.2120884656906128, "rewards/margins": 0.2614632248878479, "rewards/rejected": -0.4735517203807831, "step": 6391 }, { "epoch": 17.50034223134839, "grad_norm": 5.229490756988525, "learning_rate": 1.247945205479452e-07, "log_odds_chosen": 1.227660894393921, "log_odds_ratio": -0.32910022139549255, "logits/chosen": 0.8821324110031128, "logits/rejected": 0.9026627540588379, "logps/chosen": -1.9301975965499878, "logps/rejected": -3.0492148399353027, "loss": 0.4848, "nll_loss": 0.4518841505050659, "rewards/accuracies": 0.875, "rewards/chosen": -0.1930197775363922, "rewards/margins": 0.11190171539783478, "rewards/rejected": -0.3049214780330658, "step": 6392 }, { "epoch": 17.503080082135522, "grad_norm": 6.12589693069458, "learning_rate": 1.2465753424657535e-07, "log_odds_chosen": 1.9720017910003662, "log_odds_ratio": -0.28192275762557983, "logits/chosen": 0.9607502222061157, "logits/rejected": 1.0078564882278442, "logps/chosen": -2.7195324897766113, "logps/rejected": -4.612273216247559, "loss": 0.6121, "nll_loss": 0.5838704109191895, "rewards/accuracies": 1.0, "rewards/chosen": -0.271953284740448, "rewards/margins": 0.18927404284477234, "rewards/rejected": -0.46122732758522034, "step": 6393 }, { "epoch": 17.505817932922657, "grad_norm": 5.443465232849121, "learning_rate": 1.2452054794520547e-07, "log_odds_chosen": 2.8171536922454834, "log_odds_ratio": -0.15949484705924988, "logits/chosen": 0.8322919011116028, "logits/rejected": 0.8571828603744507, "logps/chosen": -2.023226499557495, "logps/rejected": -4.686040878295898, "loss": 0.622, "nll_loss": 0.606029748916626, "rewards/accuracies": 1.0, "rewards/chosen": -0.20232266187667847, "rewards/margins": 0.2662814259529114, "rewards/rejected": -0.46860408782958984, "step": 6394 }, { "epoch": 17.508555783709788, "grad_norm": 6.458099842071533, "learning_rate": 1.243835616438356e-07, "log_odds_chosen": 1.6270413398742676, "log_odds_ratio": -0.25142139196395874, "logits/chosen": 0.9162544012069702, "logits/rejected": 0.915455162525177, "logps/chosen": -1.566877841949463, "logps/rejected": -2.9637250900268555, "loss": 0.4826, "nll_loss": 0.4574551582336426, "rewards/accuracies": 0.875, "rewards/chosen": -0.15668779611587524, "rewards/margins": 0.1396847367286682, "rewards/rejected": -0.29637253284454346, "step": 6395 }, { "epoch": 17.51129363449692, "grad_norm": 5.522887229919434, "learning_rate": 1.2424657534246575e-07, "log_odds_chosen": 3.3352160453796387, "log_odds_ratio": -0.1704178750514984, "logits/chosen": 0.5785351395606995, "logits/rejected": 0.6890784502029419, "logps/chosen": -2.4256668090820312, "logps/rejected": -5.6619415283203125, "loss": 0.877, "nll_loss": 0.8599894046783447, "rewards/accuracies": 0.875, "rewards/chosen": -0.24256667494773865, "rewards/margins": 0.32362741231918335, "rewards/rejected": -0.5661941170692444, "step": 6396 }, { "epoch": 17.51403148528405, "grad_norm": 6.59925651550293, "learning_rate": 1.241095890410959e-07, "log_odds_chosen": 3.5415477752685547, "log_odds_ratio": -0.12051844596862793, "logits/chosen": 0.9502301216125488, "logits/rejected": 0.8812242746353149, "logps/chosen": -2.219313859939575, "logps/rejected": -5.6084442138671875, "loss": 0.7932, "nll_loss": 0.7811641693115234, "rewards/accuracies": 1.0, "rewards/chosen": -0.22193138301372528, "rewards/margins": 0.33891305327415466, "rewards/rejected": -0.5608444213867188, "step": 6397 }, { "epoch": 17.516769336071185, "grad_norm": 5.972667217254639, "learning_rate": 1.2397260273972603e-07, "log_odds_chosen": 1.8010094165802002, "log_odds_ratio": -0.2533397376537323, "logits/chosen": 0.7897137403488159, "logits/rejected": 0.8188514709472656, "logps/chosen": -2.188042163848877, "logps/rejected": -3.797149658203125, "loss": 0.5859, "nll_loss": 0.5605533719062805, "rewards/accuracies": 1.0, "rewards/chosen": -0.21880421042442322, "rewards/margins": 0.1609107404947281, "rewards/rejected": -0.3797149360179901, "step": 6398 }, { "epoch": 17.519507186858316, "grad_norm": 5.227163791656494, "learning_rate": 1.2383561643835615e-07, "log_odds_chosen": 2.2630863189697266, "log_odds_ratio": -0.22530904412269592, "logits/chosen": 0.9081128239631653, "logits/rejected": 0.9681171774864197, "logps/chosen": -1.8554465770721436, "logps/rejected": -3.9826555252075195, "loss": 0.5675, "nll_loss": 0.544949471950531, "rewards/accuracies": 1.0, "rewards/chosen": -0.18554465472698212, "rewards/margins": 0.2127208709716797, "rewards/rejected": -0.398265540599823, "step": 6399 }, { "epoch": 17.522245037645447, "grad_norm": 7.237181663513184, "learning_rate": 1.236986301369863e-07, "log_odds_chosen": 1.1816171407699585, "log_odds_ratio": -0.3941373825073242, "logits/chosen": 0.7131344079971313, "logits/rejected": 0.7154066562652588, "logps/chosen": -2.1366193294525146, "logps/rejected": -3.2072672843933105, "loss": 0.6326, "nll_loss": 0.5931445360183716, "rewards/accuracies": 0.875, "rewards/chosen": -0.21366193890571594, "rewards/margins": 0.10706479102373123, "rewards/rejected": -0.3207267224788666, "step": 6400 }, { "epoch": 17.524982888432582, "grad_norm": 5.780943870544434, "learning_rate": 1.2356164383561643e-07, "log_odds_chosen": 2.80501389503479, "log_odds_ratio": -0.1708507090806961, "logits/chosen": 1.1135841608047485, "logits/rejected": 1.1644850969314575, "logps/chosen": -2.602221727371216, "logps/rejected": -5.290879726409912, "loss": 0.7556, "nll_loss": 0.7384746074676514, "rewards/accuracies": 1.0, "rewards/chosen": -0.2602221965789795, "rewards/margins": 0.26886579394340515, "rewards/rejected": -0.529088020324707, "step": 6401 }, { "epoch": 17.527720739219713, "grad_norm": 5.586180210113525, "learning_rate": 1.2342465753424656e-07, "log_odds_chosen": 2.945979595184326, "log_odds_ratio": -0.3716321587562561, "logits/chosen": 0.835883378982544, "logits/rejected": 0.8646929264068604, "logps/chosen": -2.701598644256592, "logps/rejected": -5.5170745849609375, "loss": 0.7566, "nll_loss": 0.7194729447364807, "rewards/accuracies": 0.875, "rewards/chosen": -0.27015987038612366, "rewards/margins": 0.2815476059913635, "rewards/rejected": -0.5517074465751648, "step": 6402 }, { "epoch": 17.530458590006845, "grad_norm": 6.7960896492004395, "learning_rate": 1.232876712328767e-07, "log_odds_chosen": 2.201904296875, "log_odds_ratio": -0.4950971305370331, "logits/chosen": 0.7271379828453064, "logits/rejected": 0.7816176414489746, "logps/chosen": -2.36802077293396, "logps/rejected": -4.449828624725342, "loss": 0.8368, "nll_loss": 0.7872867584228516, "rewards/accuracies": 0.75, "rewards/chosen": -0.2368020862340927, "rewards/margins": 0.20818080008029938, "rewards/rejected": -0.4449828863143921, "step": 6403 }, { "epoch": 17.533196440793976, "grad_norm": 4.884251117706299, "learning_rate": 1.2315068493150684e-07, "log_odds_chosen": 2.329963445663452, "log_odds_ratio": -0.1325095146894455, "logits/chosen": 0.7699531316757202, "logits/rejected": 0.832714855670929, "logps/chosen": -1.7450697422027588, "logps/rejected": -3.809329032897949, "loss": 0.5707, "nll_loss": 0.5574168562889099, "rewards/accuracies": 1.0, "rewards/chosen": -0.1745069921016693, "rewards/margins": 0.20642593502998352, "rewards/rejected": -0.38093289732933044, "step": 6404 }, { "epoch": 17.53593429158111, "grad_norm": 5.649943828582764, "learning_rate": 1.23013698630137e-07, "log_odds_chosen": 2.1852800846099854, "log_odds_ratio": -0.2416759729385376, "logits/chosen": 0.6572864651679993, "logits/rejected": 0.6999524831771851, "logps/chosen": -2.2094926834106445, "logps/rejected": -4.273894309997559, "loss": 0.6647, "nll_loss": 0.6405470967292786, "rewards/accuracies": 1.0, "rewards/chosen": -0.22094929218292236, "rewards/margins": 0.20644012093544006, "rewards/rejected": -0.4273894429206848, "step": 6405 }, { "epoch": 17.53867214236824, "grad_norm": 8.91427230834961, "learning_rate": 1.2287671232876711e-07, "log_odds_chosen": 2.230778694152832, "log_odds_ratio": -0.24740159511566162, "logits/chosen": 1.038708209991455, "logits/rejected": 1.0701075792312622, "logps/chosen": -3.029029130935669, "logps/rejected": -5.199680805206299, "loss": 0.7083, "nll_loss": 0.683526873588562, "rewards/accuracies": 0.875, "rewards/chosen": -0.3029029071331024, "rewards/margins": 0.21706515550613403, "rewards/rejected": -0.5199680924415588, "step": 6406 }, { "epoch": 17.541409993155373, "grad_norm": 5.436488151550293, "learning_rate": 1.2273972602739727e-07, "log_odds_chosen": 1.612396478652954, "log_odds_ratio": -0.26660776138305664, "logits/chosen": 0.7222930192947388, "logits/rejected": 0.749010443687439, "logps/chosen": -2.2643425464630127, "logps/rejected": -3.742213726043701, "loss": 0.7162, "nll_loss": 0.6895707249641418, "rewards/accuracies": 0.875, "rewards/chosen": -0.22643426060676575, "rewards/margins": 0.14778712391853333, "rewards/rejected": -0.3742213845252991, "step": 6407 }, { "epoch": 17.544147843942504, "grad_norm": 10.029426574707031, "learning_rate": 1.226027397260274e-07, "log_odds_chosen": 1.0179572105407715, "log_odds_ratio": -1.0351452827453613, "logits/chosen": 0.6735870838165283, "logits/rejected": 0.6817120909690857, "logps/chosen": -3.552013397216797, "logps/rejected": -4.524129867553711, "loss": 0.7625, "nll_loss": 0.6589909195899963, "rewards/accuracies": 0.5, "rewards/chosen": -0.3552013635635376, "rewards/margins": 0.09721161425113678, "rewards/rejected": -0.4524129331111908, "step": 6408 }, { "epoch": 17.54688569472964, "grad_norm": 5.272763252258301, "learning_rate": 1.2246575342465752e-07, "log_odds_chosen": 2.3721654415130615, "log_odds_ratio": -0.3553352653980255, "logits/chosen": 0.9543100595474243, "logits/rejected": 0.9756004214286804, "logps/chosen": -1.9344689846038818, "logps/rejected": -4.222991466522217, "loss": 0.6176, "nll_loss": 0.582037627696991, "rewards/accuracies": 0.875, "rewards/chosen": -0.19344690442085266, "rewards/margins": 0.22885224223136902, "rewards/rejected": -0.4222991466522217, "step": 6409 }, { "epoch": 17.54962354551677, "grad_norm": 8.326446533203125, "learning_rate": 1.2232876712328767e-07, "log_odds_chosen": 1.765784502029419, "log_odds_ratio": -0.4287900924682617, "logits/chosen": 1.0199863910675049, "logits/rejected": 0.980313777923584, "logps/chosen": -2.4960098266601562, "logps/rejected": -4.159181118011475, "loss": 0.6918, "nll_loss": 0.6489306688308716, "rewards/accuracies": 0.875, "rewards/chosen": -0.24960099160671234, "rewards/margins": 0.16631710529327393, "rewards/rejected": -0.41591811180114746, "step": 6410 }, { "epoch": 17.5523613963039, "grad_norm": 6.44672966003418, "learning_rate": 1.221917808219178e-07, "log_odds_chosen": 2.256211519241333, "log_odds_ratio": -0.24599596858024597, "logits/chosen": 1.1116268634796143, "logits/rejected": 1.1091665029525757, "logps/chosen": -2.2782058715820312, "logps/rejected": -4.395539283752441, "loss": 0.609, "nll_loss": 0.5843796730041504, "rewards/accuracies": 0.875, "rewards/chosen": -0.22782057523727417, "rewards/margins": 0.2117333710193634, "rewards/rejected": -0.43955397605895996, "step": 6411 }, { "epoch": 17.555099247091032, "grad_norm": 6.550657272338867, "learning_rate": 1.2205479452054795e-07, "log_odds_chosen": 2.226926565170288, "log_odds_ratio": -0.1731054186820984, "logits/chosen": 0.9857954978942871, "logits/rejected": 1.0052744150161743, "logps/chosen": -2.2605624198913574, "logps/rejected": -4.324182033538818, "loss": 0.61, "nll_loss": 0.5926476716995239, "rewards/accuracies": 1.0, "rewards/chosen": -0.22605624794960022, "rewards/margins": 0.20636196434497833, "rewards/rejected": -0.43241822719573975, "step": 6412 }, { "epoch": 17.557837097878167, "grad_norm": 7.820296764373779, "learning_rate": 1.2191780821917807e-07, "log_odds_chosen": 1.450226068496704, "log_odds_ratio": -0.36772140860557556, "logits/chosen": 0.9676134586334229, "logits/rejected": 0.9703601002693176, "logps/chosen": -2.324951410293579, "logps/rejected": -3.6572413444519043, "loss": 0.6674, "nll_loss": 0.6306502819061279, "rewards/accuracies": 0.75, "rewards/chosen": -0.23249514400959015, "rewards/margins": 0.13322898745536804, "rewards/rejected": -0.3657241463661194, "step": 6413 }, { "epoch": 17.560574948665298, "grad_norm": 5.333392143249512, "learning_rate": 1.217808219178082e-07, "log_odds_chosen": 3.2699830532073975, "log_odds_ratio": -0.21092838048934937, "logits/chosen": 0.7766196131706238, "logits/rejected": 0.845221996307373, "logps/chosen": -2.4078307151794434, "logps/rejected": -5.532792568206787, "loss": 0.7214, "nll_loss": 0.7003548741340637, "rewards/accuracies": 0.875, "rewards/chosen": -0.24078308045864105, "rewards/margins": 0.3124961853027344, "rewards/rejected": -0.5532792806625366, "step": 6414 }, { "epoch": 17.56331279945243, "grad_norm": 5.590096950531006, "learning_rate": 1.2164383561643835e-07, "log_odds_chosen": 2.0435516834259033, "log_odds_ratio": -0.21867774426937103, "logits/chosen": 0.7897824645042419, "logits/rejected": 0.8936070799827576, "logps/chosen": -2.6543526649475098, "logps/rejected": -4.613426208496094, "loss": 0.6848, "nll_loss": 0.6629072427749634, "rewards/accuracies": 1.0, "rewards/chosen": -0.26543527841567993, "rewards/margins": 0.1959073841571808, "rewards/rejected": -0.4613426625728607, "step": 6415 }, { "epoch": 17.56605065023956, "grad_norm": 5.662508010864258, "learning_rate": 1.215068493150685e-07, "log_odds_chosen": 1.3195366859436035, "log_odds_ratio": -0.47293299436569214, "logits/chosen": 0.7614758610725403, "logits/rejected": 0.6856114268302917, "logps/chosen": -2.480293035507202, "logps/rejected": -3.7261106967926025, "loss": 0.7002, "nll_loss": 0.6529126763343811, "rewards/accuracies": 0.875, "rewards/chosen": -0.24802932143211365, "rewards/margins": 0.12458177655935287, "rewards/rejected": -0.37261107563972473, "step": 6416 }, { "epoch": 17.568788501026695, "grad_norm": 5.653572082519531, "learning_rate": 1.2136986301369863e-07, "log_odds_chosen": 2.227231025695801, "log_odds_ratio": -0.21490825712680817, "logits/chosen": 0.9742996096611023, "logits/rejected": 1.0237606763839722, "logps/chosen": -1.9210691452026367, "logps/rejected": -3.97603178024292, "loss": 0.5974, "nll_loss": 0.5759445428848267, "rewards/accuracies": 1.0, "rewards/chosen": -0.1921069175004959, "rewards/margins": 0.20549629628658295, "rewards/rejected": -0.39760324358940125, "step": 6417 }, { "epoch": 17.571526351813826, "grad_norm": 4.628233909606934, "learning_rate": 1.2123287671232875e-07, "log_odds_chosen": 1.6378334760665894, "log_odds_ratio": -0.24621513485908508, "logits/chosen": 0.6700360774993896, "logits/rejected": 0.6765749454498291, "logps/chosen": -1.7802594900131226, "logps/rejected": -3.2832159996032715, "loss": 0.6137, "nll_loss": 0.5890422463417053, "rewards/accuracies": 1.0, "rewards/chosen": -0.17802593111991882, "rewards/margins": 0.1502956748008728, "rewards/rejected": -0.3283216059207916, "step": 6418 }, { "epoch": 17.574264202600958, "grad_norm": 4.9145731925964355, "learning_rate": 1.210958904109589e-07, "log_odds_chosen": 2.0029218196868896, "log_odds_ratio": -0.2525787949562073, "logits/chosen": 0.7888600826263428, "logits/rejected": 0.8188806176185608, "logps/chosen": -2.124725818634033, "logps/rejected": -4.044301986694336, "loss": 0.5784, "nll_loss": 0.5531775951385498, "rewards/accuracies": 0.75, "rewards/chosen": -0.2124725878238678, "rewards/margins": 0.19195763766765594, "rewards/rejected": -0.40443024039268494, "step": 6419 }, { "epoch": 17.57700205338809, "grad_norm": 5.869918346405029, "learning_rate": 1.2095890410958903e-07, "log_odds_chosen": 2.614196538925171, "log_odds_ratio": -0.3082558214664459, "logits/chosen": 0.6352999806404114, "logits/rejected": 0.7324055433273315, "logps/chosen": -1.9227714538574219, "logps/rejected": -4.402709007263184, "loss": 0.5855, "nll_loss": 0.5547009110450745, "rewards/accuracies": 0.875, "rewards/chosen": -0.19227714836597443, "rewards/margins": 0.24799378216266632, "rewards/rejected": -0.44027090072631836, "step": 6420 }, { "epoch": 17.579739904175224, "grad_norm": 4.057687282562256, "learning_rate": 1.2082191780821916e-07, "log_odds_chosen": 2.544369697570801, "log_odds_ratio": -0.19634364545345306, "logits/chosen": 1.011275291442871, "logits/rejected": 1.033337116241455, "logps/chosen": -1.6255226135253906, "logps/rejected": -3.91684889793396, "loss": 0.5614, "nll_loss": 0.5418070554733276, "rewards/accuracies": 0.875, "rewards/chosen": -0.16255225241184235, "rewards/margins": 0.22913262248039246, "rewards/rejected": -0.391684889793396, "step": 6421 }, { "epoch": 17.582477754962355, "grad_norm": 5.706302642822266, "learning_rate": 1.206849315068493e-07, "log_odds_chosen": 1.7098253965377808, "log_odds_ratio": -0.2793293297290802, "logits/chosen": 0.9319519400596619, "logits/rejected": 0.9090636968612671, "logps/chosen": -1.8613731861114502, "logps/rejected": -3.4349241256713867, "loss": 0.7207, "nll_loss": 0.6927496194839478, "rewards/accuracies": 1.0, "rewards/chosen": -0.18613731861114502, "rewards/margins": 0.15735509991645813, "rewards/rejected": -0.34349241852760315, "step": 6422 }, { "epoch": 17.585215605749486, "grad_norm": 5.79187536239624, "learning_rate": 1.2054794520547946e-07, "log_odds_chosen": 1.6124210357666016, "log_odds_ratio": -0.4076349139213562, "logits/chosen": 0.6471216678619385, "logits/rejected": 0.6241541504859924, "logps/chosen": -2.034644365310669, "logps/rejected": -3.5923473834991455, "loss": 0.6297, "nll_loss": 0.5889468193054199, "rewards/accuracies": 0.875, "rewards/chosen": -0.20346444845199585, "rewards/margins": 0.15577030181884766, "rewards/rejected": -0.3592347502708435, "step": 6423 }, { "epoch": 17.587953456536617, "grad_norm": 5.875173568725586, "learning_rate": 1.2041095890410959e-07, "log_odds_chosen": 1.6648955345153809, "log_odds_ratio": -0.3074833154678345, "logits/chosen": 0.8408281803131104, "logits/rejected": 0.8669244050979614, "logps/chosen": -2.340613603591919, "logps/rejected": -3.9395880699157715, "loss": 0.5788, "nll_loss": 0.548046350479126, "rewards/accuracies": 1.0, "rewards/chosen": -0.2340613752603531, "rewards/margins": 0.15989747643470764, "rewards/rejected": -0.3939588665962219, "step": 6424 }, { "epoch": 17.590691307323752, "grad_norm": 4.941417217254639, "learning_rate": 1.202739726027397e-07, "log_odds_chosen": 2.456305503845215, "log_odds_ratio": -0.2173374891281128, "logits/chosen": 0.9150668382644653, "logits/rejected": 0.9747965931892395, "logps/chosen": -2.2067923545837402, "logps/rejected": -4.498266696929932, "loss": 0.6099, "nll_loss": 0.588198184967041, "rewards/accuracies": 1.0, "rewards/chosen": -0.22067925333976746, "rewards/margins": 0.22914743423461914, "rewards/rejected": -0.4498266875743866, "step": 6425 }, { "epoch": 17.593429158110883, "grad_norm": 5.796962261199951, "learning_rate": 1.2013698630136986e-07, "log_odds_chosen": 0.9986310005187988, "log_odds_ratio": -0.4338264465332031, "logits/chosen": 0.7585604786872864, "logits/rejected": 0.768700122833252, "logps/chosen": -1.8436321020126343, "logps/rejected": -2.7007198333740234, "loss": 0.5713, "nll_loss": 0.5279009342193604, "rewards/accuracies": 0.75, "rewards/chosen": -0.1843632310628891, "rewards/margins": 0.08570878952741623, "rewards/rejected": -0.27007198333740234, "step": 6426 }, { "epoch": 17.596167008898014, "grad_norm": 7.417569160461426, "learning_rate": 1.2e-07, "log_odds_chosen": 1.7336914539337158, "log_odds_ratio": -0.5339999794960022, "logits/chosen": 0.8997542858123779, "logits/rejected": 0.9163734316825867, "logps/chosen": -2.20914888381958, "logps/rejected": -3.806128740310669, "loss": 0.8246, "nll_loss": 0.7712351083755493, "rewards/accuracies": 0.75, "rewards/chosen": -0.22091487050056458, "rewards/margins": 0.1596980094909668, "rewards/rejected": -0.38061290979385376, "step": 6427 }, { "epoch": 17.59890485968515, "grad_norm": 6.058193683624268, "learning_rate": 1.1986301369863011e-07, "log_odds_chosen": 3.9299423694610596, "log_odds_ratio": -0.10305416584014893, "logits/chosen": 1.0131946802139282, "logits/rejected": 0.9495137929916382, "logps/chosen": -2.216019630432129, "logps/rejected": -6.003959655761719, "loss": 0.7596, "nll_loss": 0.7493278384208679, "rewards/accuracies": 1.0, "rewards/chosen": -0.22160199284553528, "rewards/margins": 0.37879395484924316, "rewards/rejected": -0.600395917892456, "step": 6428 }, { "epoch": 17.60164271047228, "grad_norm": 5.665599346160889, "learning_rate": 1.1972602739726027e-07, "log_odds_chosen": 3.341522693634033, "log_odds_ratio": -0.18238374590873718, "logits/chosen": 1.0603256225585938, "logits/rejected": 1.0688483715057373, "logps/chosen": -2.155240058898926, "logps/rejected": -5.379177093505859, "loss": 0.6012, "nll_loss": 0.5829817652702332, "rewards/accuracies": 0.875, "rewards/chosen": -0.21552401781082153, "rewards/margins": 0.32239365577697754, "rewards/rejected": -0.5379177331924438, "step": 6429 }, { "epoch": 17.60438056125941, "grad_norm": 5.389317035675049, "learning_rate": 1.1958904109589042e-07, "log_odds_chosen": 1.5990597009658813, "log_odds_ratio": -0.2355288416147232, "logits/chosen": 0.8564925193786621, "logits/rejected": 0.8448445796966553, "logps/chosen": -1.8016809225082397, "logps/rejected": -3.2008705139160156, "loss": 0.5717, "nll_loss": 0.5481491088867188, "rewards/accuracies": 1.0, "rewards/chosen": -0.18016810715198517, "rewards/margins": 0.1399189531803131, "rewards/rejected": -0.3200870156288147, "step": 6430 }, { "epoch": 17.607118412046542, "grad_norm": 5.344040393829346, "learning_rate": 1.1945205479452054e-07, "log_odds_chosen": 1.797069787979126, "log_odds_ratio": -0.337472140789032, "logits/chosen": 0.8669840693473816, "logits/rejected": 0.8455711007118225, "logps/chosen": -1.946342945098877, "logps/rejected": -3.6382339000701904, "loss": 0.6568, "nll_loss": 0.6230820417404175, "rewards/accuracies": 0.875, "rewards/chosen": -0.1946343183517456, "rewards/margins": 0.16918909549713135, "rewards/rejected": -0.36382341384887695, "step": 6431 }, { "epoch": 17.609856262833677, "grad_norm": 5.616662502288818, "learning_rate": 1.1931506849315067e-07, "log_odds_chosen": 3.1199545860290527, "log_odds_ratio": -0.17175370454788208, "logits/chosen": 0.9349672794342041, "logits/rejected": 0.9959750175476074, "logps/chosen": -2.188020944595337, "logps/rejected": -5.156487464904785, "loss": 0.578, "nll_loss": 0.5608327984809875, "rewards/accuracies": 1.0, "rewards/chosen": -0.2188020944595337, "rewards/margins": 0.2968466281890869, "rewards/rejected": -0.5156487226486206, "step": 6432 }, { "epoch": 17.61259411362081, "grad_norm": 5.604093551635742, "learning_rate": 1.1917808219178081e-07, "log_odds_chosen": 2.6292409896850586, "log_odds_ratio": -0.22157061100006104, "logits/chosen": 0.8640599846839905, "logits/rejected": 0.8931636810302734, "logps/chosen": -2.5532732009887695, "logps/rejected": -5.133013725280762, "loss": 0.6685, "nll_loss": 0.6463160514831543, "rewards/accuracies": 0.875, "rewards/chosen": -0.2553273141384125, "rewards/margins": 0.2579740285873413, "rewards/rejected": -0.5133013129234314, "step": 6433 }, { "epoch": 17.61533196440794, "grad_norm": 5.543288230895996, "learning_rate": 1.1904109589041095e-07, "log_odds_chosen": 1.8312480449676514, "log_odds_ratio": -0.23552946746349335, "logits/chosen": 0.8151968717575073, "logits/rejected": 0.8260593414306641, "logps/chosen": -1.7350330352783203, "logps/rejected": -3.3681228160858154, "loss": 0.6059, "nll_loss": 0.5823932886123657, "rewards/accuracies": 1.0, "rewards/chosen": -0.1735033094882965, "rewards/margins": 0.1633089780807495, "rewards/rejected": -0.336812287569046, "step": 6434 }, { "epoch": 17.61806981519507, "grad_norm": 6.423748016357422, "learning_rate": 1.189041095890411e-07, "log_odds_chosen": 2.678083896636963, "log_odds_ratio": -0.1815394014120102, "logits/chosen": 1.0159870386123657, "logits/rejected": 1.0855531692504883, "logps/chosen": -2.332047939300537, "logps/rejected": -4.891615867614746, "loss": 0.8194, "nll_loss": 0.8011963367462158, "rewards/accuracies": 1.0, "rewards/chosen": -0.23320479691028595, "rewards/margins": 0.255956768989563, "rewards/rejected": -0.4891616106033325, "step": 6435 }, { "epoch": 17.620807665982205, "grad_norm": 5.905627250671387, "learning_rate": 1.1876712328767123e-07, "log_odds_chosen": 1.8823437690734863, "log_odds_ratio": -0.3188876211643219, "logits/chosen": 0.8050897121429443, "logits/rejected": 0.7999956607818604, "logps/chosen": -1.8377060890197754, "logps/rejected": -3.589590072631836, "loss": 0.6083, "nll_loss": 0.5764344930648804, "rewards/accuracies": 0.875, "rewards/chosen": -0.18377062678337097, "rewards/margins": 0.17518837749958038, "rewards/rejected": -0.35895898938179016, "step": 6436 }, { "epoch": 17.623545516769337, "grad_norm": 5.450281620025635, "learning_rate": 1.1863013698630136e-07, "log_odds_chosen": 1.9402720928192139, "log_odds_ratio": -0.25944826006889343, "logits/chosen": 0.7991194128990173, "logits/rejected": 0.8162626624107361, "logps/chosen": -2.4789302349090576, "logps/rejected": -4.286145210266113, "loss": 0.7233, "nll_loss": 0.6973836421966553, "rewards/accuracies": 1.0, "rewards/chosen": -0.24789303541183472, "rewards/margins": 0.1807214915752411, "rewards/rejected": -0.4286145269870758, "step": 6437 }, { "epoch": 17.626283367556468, "grad_norm": 5.228636741638184, "learning_rate": 1.184931506849315e-07, "log_odds_chosen": 1.7711517810821533, "log_odds_ratio": -0.2559649646282196, "logits/chosen": 0.9353335499763489, "logits/rejected": 0.9913405179977417, "logps/chosen": -1.8115417957305908, "logps/rejected": -3.419055938720703, "loss": 0.5999, "nll_loss": 0.5743207931518555, "rewards/accuracies": 1.0, "rewards/chosen": -0.18115419149398804, "rewards/margins": 0.16075140237808228, "rewards/rejected": -0.3419055938720703, "step": 6438 }, { "epoch": 17.6290212183436, "grad_norm": 11.143916130065918, "learning_rate": 1.1835616438356163e-07, "log_odds_chosen": 1.450482964515686, "log_odds_ratio": -0.796216607093811, "logits/chosen": 0.7953630685806274, "logits/rejected": 0.9396811127662659, "logps/chosen": -3.440295457839966, "logps/rejected": -4.824041366577148, "loss": 0.7564, "nll_loss": 0.6768229603767395, "rewards/accuracies": 0.75, "rewards/chosen": -0.3440295457839966, "rewards/margins": 0.13837453722953796, "rewards/rejected": -0.48240411281585693, "step": 6439 }, { "epoch": 17.631759069130734, "grad_norm": 5.012040615081787, "learning_rate": 1.1821917808219177e-07, "log_odds_chosen": 1.997769832611084, "log_odds_ratio": -0.2594030201435089, "logits/chosen": 0.6165721416473389, "logits/rejected": 0.6380079388618469, "logps/chosen": -2.9198288917541504, "logps/rejected": -4.803384780883789, "loss": 0.6635, "nll_loss": 0.6375797986984253, "rewards/accuracies": 0.875, "rewards/chosen": -0.29198288917541504, "rewards/margins": 0.18835562467575073, "rewards/rejected": -0.48033851385116577, "step": 6440 }, { "epoch": 17.634496919917865, "grad_norm": 7.184423923492432, "learning_rate": 1.1808219178082192e-07, "log_odds_chosen": 2.1661791801452637, "log_odds_ratio": -0.23783232271671295, "logits/chosen": 0.5942014455795288, "logits/rejected": 0.6183215379714966, "logps/chosen": -2.372037649154663, "logps/rejected": -4.437623500823975, "loss": 0.6308, "nll_loss": 0.6070079803466797, "rewards/accuracies": 1.0, "rewards/chosen": -0.23720376193523407, "rewards/margins": 0.20655861496925354, "rewards/rejected": -0.4437623620033264, "step": 6441 }, { "epoch": 17.637234770704996, "grad_norm": 5.537617206573486, "learning_rate": 1.1794520547945206e-07, "log_odds_chosen": 1.8863221406936646, "log_odds_ratio": -0.3219750225543976, "logits/chosen": 0.8411808013916016, "logits/rejected": 0.9652082920074463, "logps/chosen": -2.226109504699707, "logps/rejected": -3.9737439155578613, "loss": 0.6845, "nll_loss": 0.652263879776001, "rewards/accuracies": 0.875, "rewards/chosen": -0.2226109504699707, "rewards/margins": 0.174763485789299, "rewards/rejected": -0.3973744213581085, "step": 6442 }, { "epoch": 17.639972621492127, "grad_norm": 5.530963897705078, "learning_rate": 1.1780821917808218e-07, "log_odds_chosen": 1.9410130977630615, "log_odds_ratio": -0.35574495792388916, "logits/chosen": 0.8035416603088379, "logits/rejected": 0.9183660745620728, "logps/chosen": -2.2572431564331055, "logps/rejected": -4.062943935394287, "loss": 0.6429, "nll_loss": 0.6072961091995239, "rewards/accuracies": 0.75, "rewards/chosen": -0.22572430968284607, "rewards/margins": 0.1805700659751892, "rewards/rejected": -0.40629440546035767, "step": 6443 }, { "epoch": 17.642710472279262, "grad_norm": 5.850437641143799, "learning_rate": 1.1767123287671232e-07, "log_odds_chosen": 2.277472496032715, "log_odds_ratio": -0.29431992769241333, "logits/chosen": 0.5474441051483154, "logits/rejected": 0.5632083415985107, "logps/chosen": -2.0704689025878906, "logps/rejected": -4.207172393798828, "loss": 0.5965, "nll_loss": 0.5670881867408752, "rewards/accuracies": 0.875, "rewards/chosen": -0.20704692602157593, "rewards/margins": 0.21367032825946808, "rewards/rejected": -0.4207172393798828, "step": 6444 }, { "epoch": 17.645448323066393, "grad_norm": 5.415623664855957, "learning_rate": 1.1753424657534246e-07, "log_odds_chosen": 2.2226619720458984, "log_odds_ratio": -0.20978191494941711, "logits/chosen": 0.7151030898094177, "logits/rejected": 0.7644739151000977, "logps/chosen": -2.2213404178619385, "logps/rejected": -4.274773597717285, "loss": 0.6749, "nll_loss": 0.6539403200149536, "rewards/accuracies": 1.0, "rewards/chosen": -0.22213402390480042, "rewards/margins": 0.2053433358669281, "rewards/rejected": -0.4274773895740509, "step": 6445 }, { "epoch": 17.648186173853524, "grad_norm": 10.59304428100586, "learning_rate": 1.1739726027397259e-07, "log_odds_chosen": 1.348618507385254, "log_odds_ratio": -0.7396205067634583, "logits/chosen": 0.8202304840087891, "logits/rejected": 0.8606806993484497, "logps/chosen": -2.784698009490967, "logps/rejected": -4.032729148864746, "loss": 0.6934, "nll_loss": 0.6194655895233154, "rewards/accuracies": 0.625, "rewards/chosen": -0.2784698009490967, "rewards/margins": 0.1248031035065651, "rewards/rejected": -0.40327292680740356, "step": 6446 }, { "epoch": 17.650924024640656, "grad_norm": 5.312087059020996, "learning_rate": 1.1726027397260274e-07, "log_odds_chosen": 2.881659984588623, "log_odds_ratio": -0.18923376500606537, "logits/chosen": 0.5692263841629028, "logits/rejected": 0.6231731176376343, "logps/chosen": -2.361159324645996, "logps/rejected": -5.142310619354248, "loss": 0.6811, "nll_loss": 0.6621490716934204, "rewards/accuracies": 1.0, "rewards/chosen": -0.236115962266922, "rewards/margins": 0.27811509370803833, "rewards/rejected": -0.5142310261726379, "step": 6447 }, { "epoch": 17.65366187542779, "grad_norm": 4.723308086395264, "learning_rate": 1.1712328767123288e-07, "log_odds_chosen": 2.2186241149902344, "log_odds_ratio": -0.1706039011478424, "logits/chosen": 0.7203304767608643, "logits/rejected": 0.7555379867553711, "logps/chosen": -1.9990878105163574, "logps/rejected": -4.063846111297607, "loss": 0.5731, "nll_loss": 0.5560800433158875, "rewards/accuracies": 1.0, "rewards/chosen": -0.1999087929725647, "rewards/margins": 0.20647583901882172, "rewards/rejected": -0.4063846170902252, "step": 6448 }, { "epoch": 17.65639972621492, "grad_norm": 10.081280708312988, "learning_rate": 1.1698630136986302e-07, "log_odds_chosen": 0.9219831228256226, "log_odds_ratio": -0.5150426030158997, "logits/chosen": 0.7756011486053467, "logits/rejected": 0.7441078424453735, "logps/chosen": -2.50485897064209, "logps/rejected": -3.3686025142669678, "loss": 0.5869, "nll_loss": 0.5354043245315552, "rewards/accuracies": 0.75, "rewards/chosen": -0.250485897064209, "rewards/margins": 0.08637437969446182, "rewards/rejected": -0.3368602693080902, "step": 6449 }, { "epoch": 17.659137577002053, "grad_norm": 4.646742820739746, "learning_rate": 1.1684931506849314e-07, "log_odds_chosen": 2.5173213481903076, "log_odds_ratio": -0.1943158507347107, "logits/chosen": 1.0918986797332764, "logits/rejected": 1.139182686805725, "logps/chosen": -2.1518969535827637, "logps/rejected": -4.551799774169922, "loss": 0.6456, "nll_loss": 0.6261958479881287, "rewards/accuracies": 1.0, "rewards/chosen": -0.21518968045711517, "rewards/margins": 0.23999029397964478, "rewards/rejected": -0.45517998933792114, "step": 6450 }, { "epoch": 17.661875427789184, "grad_norm": 5.4004974365234375, "learning_rate": 1.1671232876712328e-07, "log_odds_chosen": 1.8627474308013916, "log_odds_ratio": -0.26371607184410095, "logits/chosen": 0.7220043540000916, "logits/rejected": 0.8217619061470032, "logps/chosen": -2.5519843101501465, "logps/rejected": -4.3399481773376465, "loss": 0.6864, "nll_loss": 0.6600691080093384, "rewards/accuracies": 0.875, "rewards/chosen": -0.2551984488964081, "rewards/margins": 0.1787964105606079, "rewards/rejected": -0.4339948296546936, "step": 6451 }, { "epoch": 17.66461327857632, "grad_norm": 11.99346923828125, "learning_rate": 1.1657534246575342e-07, "log_odds_chosen": 2.972365617752075, "log_odds_ratio": -0.3937678337097168, "logits/chosen": 0.9977836012840271, "logits/rejected": 1.029475212097168, "logps/chosen": -2.771116256713867, "logps/rejected": -5.6398162841796875, "loss": 0.832, "nll_loss": 0.7925742864608765, "rewards/accuracies": 0.875, "rewards/chosen": -0.27711161971092224, "rewards/margins": 0.2868700623512268, "rewards/rejected": -0.5639816522598267, "step": 6452 }, { "epoch": 17.66735112936345, "grad_norm": 7.851259231567383, "learning_rate": 1.1643835616438355e-07, "log_odds_chosen": 1.1527135372161865, "log_odds_ratio": -0.5036760568618774, "logits/chosen": 0.8294076919555664, "logits/rejected": 0.8347682952880859, "logps/chosen": -2.6378750801086426, "logps/rejected": -3.678360939025879, "loss": 0.623, "nll_loss": 0.5726315379142761, "rewards/accuracies": 0.75, "rewards/chosen": -0.26378753781318665, "rewards/margins": 0.10404854267835617, "rewards/rejected": -0.3678360879421234, "step": 6453 }, { "epoch": 17.67008898015058, "grad_norm": 5.262814044952393, "learning_rate": 1.163013698630137e-07, "log_odds_chosen": 3.9669551849365234, "log_odds_ratio": -0.11199302226305008, "logits/chosen": 0.8564538955688477, "logits/rejected": 0.8992936015129089, "logps/chosen": -2.1507339477539062, "logps/rejected": -5.951708793640137, "loss": 0.6869, "nll_loss": 0.6757189035415649, "rewards/accuracies": 1.0, "rewards/chosen": -0.2150733768939972, "rewards/margins": 0.38009750843048096, "rewards/rejected": -0.5951708555221558, "step": 6454 }, { "epoch": 17.672826830937716, "grad_norm": 5.723058700561523, "learning_rate": 1.1616438356164384e-07, "log_odds_chosen": 2.8989744186401367, "log_odds_ratio": -0.22250421345233917, "logits/chosen": 1.0997138023376465, "logits/rejected": 1.115769863128662, "logps/chosen": -2.4021406173706055, "logps/rejected": -5.208434104919434, "loss": 0.687, "nll_loss": 0.6647046208381653, "rewards/accuracies": 1.0, "rewards/chosen": -0.24021407961845398, "rewards/margins": 0.28062933683395386, "rewards/rejected": -0.5208434462547302, "step": 6455 }, { "epoch": 17.675564681724847, "grad_norm": 6.198809623718262, "learning_rate": 1.1602739726027396e-07, "log_odds_chosen": 3.152404308319092, "log_odds_ratio": -0.170972540974617, "logits/chosen": 0.7348554134368896, "logits/rejected": 0.8023245334625244, "logps/chosen": -2.388624668121338, "logps/rejected": -5.447100639343262, "loss": 0.7492, "nll_loss": 0.7321465611457825, "rewards/accuracies": 0.875, "rewards/chosen": -0.23886248469352722, "rewards/margins": 0.30584755539894104, "rewards/rejected": -0.5447100400924683, "step": 6456 }, { "epoch": 17.678302532511978, "grad_norm": 4.948643684387207, "learning_rate": 1.158904109589041e-07, "log_odds_chosen": 2.7242531776428223, "log_odds_ratio": -0.2563875615596771, "logits/chosen": 0.8447847962379456, "logits/rejected": 0.8984413146972656, "logps/chosen": -2.0411267280578613, "logps/rejected": -4.625344753265381, "loss": 0.6329, "nll_loss": 0.6072136163711548, "rewards/accuracies": 0.875, "rewards/chosen": -0.2041126787662506, "rewards/margins": 0.25842180848121643, "rewards/rejected": -0.46253445744514465, "step": 6457 }, { "epoch": 17.68104038329911, "grad_norm": 5.12600564956665, "learning_rate": 1.1575342465753424e-07, "log_odds_chosen": 2.38527250289917, "log_odds_ratio": -0.18518643081188202, "logits/chosen": 0.9714791774749756, "logits/rejected": 0.9679068922996521, "logps/chosen": -2.157356023788452, "logps/rejected": -4.38994026184082, "loss": 0.6117, "nll_loss": 0.59321129322052, "rewards/accuracies": 1.0, "rewards/chosen": -0.21573561429977417, "rewards/margins": 0.22325843572616577, "rewards/rejected": -0.43899402022361755, "step": 6458 }, { "epoch": 17.683778234086244, "grad_norm": 9.476563453674316, "learning_rate": 1.1561643835616437e-07, "log_odds_chosen": 2.8936736583709717, "log_odds_ratio": -0.25500956177711487, "logits/chosen": 0.8557067513465881, "logits/rejected": 0.8003941774368286, "logps/chosen": -2.8961026668548584, "logps/rejected": -5.722506046295166, "loss": 0.7719, "nll_loss": 0.746353268623352, "rewards/accuracies": 0.75, "rewards/chosen": -0.28961026668548584, "rewards/margins": 0.28264033794403076, "rewards/rejected": -0.5722506046295166, "step": 6459 }, { "epoch": 17.686516084873375, "grad_norm": 6.549876689910889, "learning_rate": 1.1547945205479452e-07, "log_odds_chosen": 1.0598716735839844, "log_odds_ratio": -0.4835430383682251, "logits/chosen": 0.9503260850906372, "logits/rejected": 0.8880845904350281, "logps/chosen": -2.516289234161377, "logps/rejected": -3.520510196685791, "loss": 0.7451, "nll_loss": 0.6967864632606506, "rewards/accuracies": 0.625, "rewards/chosen": -0.25162890553474426, "rewards/margins": 0.10042211413383484, "rewards/rejected": -0.3520510494709015, "step": 6460 }, { "epoch": 17.689253935660506, "grad_norm": 7.837014675140381, "learning_rate": 1.1534246575342466e-07, "log_odds_chosen": 2.379633665084839, "log_odds_ratio": -0.2142430543899536, "logits/chosen": 1.0100191831588745, "logits/rejected": 1.0340486764907837, "logps/chosen": -2.552401304244995, "logps/rejected": -4.807158470153809, "loss": 0.7088, "nll_loss": 0.6873897314071655, "rewards/accuracies": 1.0, "rewards/chosen": -0.25524014234542847, "rewards/margins": 0.2254757285118103, "rewards/rejected": -0.4807158410549164, "step": 6461 }, { "epoch": 17.691991786447637, "grad_norm": 5.182708740234375, "learning_rate": 1.152054794520548e-07, "log_odds_chosen": 2.087484359741211, "log_odds_ratio": -0.2151705026626587, "logits/chosen": 0.9489362239837646, "logits/rejected": 0.9821054935455322, "logps/chosen": -2.294053077697754, "logps/rejected": -4.255675315856934, "loss": 0.5686, "nll_loss": 0.5471125841140747, "rewards/accuracies": 0.875, "rewards/chosen": -0.22940528392791748, "rewards/margins": 0.19616226851940155, "rewards/rejected": -0.4255675673484802, "step": 6462 }, { "epoch": 17.694729637234772, "grad_norm": 5.798494338989258, "learning_rate": 1.1506849315068492e-07, "log_odds_chosen": 3.2185535430908203, "log_odds_ratio": -0.21114422380924225, "logits/chosen": 0.919853925704956, "logits/rejected": 0.9345352649688721, "logps/chosen": -2.2664666175842285, "logps/rejected": -5.389782905578613, "loss": 0.5822, "nll_loss": 0.5610647201538086, "rewards/accuracies": 1.0, "rewards/chosen": -0.22664669156074524, "rewards/margins": 0.3123316764831543, "rewards/rejected": -0.5389783382415771, "step": 6463 }, { "epoch": 17.697467488021903, "grad_norm": 5.306678771972656, "learning_rate": 1.1493150684931506e-07, "log_odds_chosen": 2.6823644638061523, "log_odds_ratio": -0.38047781586647034, "logits/chosen": 0.7211070656776428, "logits/rejected": 0.7958767414093018, "logps/chosen": -2.288726329803467, "logps/rejected": -4.858072757720947, "loss": 0.8097, "nll_loss": 0.7716379761695862, "rewards/accuracies": 0.75, "rewards/chosen": -0.2288726270198822, "rewards/margins": 0.25693467259407043, "rewards/rejected": -0.48580729961395264, "step": 6464 }, { "epoch": 17.700205338809035, "grad_norm": 5.469733238220215, "learning_rate": 1.147945205479452e-07, "log_odds_chosen": 1.6162493228912354, "log_odds_ratio": -0.29016080498695374, "logits/chosen": 0.8931583762168884, "logits/rejected": 0.8433302640914917, "logps/chosen": -1.9596436023712158, "logps/rejected": -3.4517674446105957, "loss": 0.7154, "nll_loss": 0.6863651275634766, "rewards/accuracies": 0.875, "rewards/chosen": -0.19596436619758606, "rewards/margins": 0.14921239018440247, "rewards/rejected": -0.3451767563819885, "step": 6465 }, { "epoch": 17.702943189596166, "grad_norm": 6.788610935211182, "learning_rate": 1.1465753424657534e-07, "log_odds_chosen": 1.522766351699829, "log_odds_ratio": -0.27295780181884766, "logits/chosen": 0.9342499375343323, "logits/rejected": 0.9593860507011414, "logps/chosen": -2.861661911010742, "logps/rejected": -4.319180488586426, "loss": 0.6657, "nll_loss": 0.6384042501449585, "rewards/accuracies": 1.0, "rewards/chosen": -0.2861661911010742, "rewards/margins": 0.14575180411338806, "rewards/rejected": -0.43191802501678467, "step": 6466 }, { "epoch": 17.7056810403833, "grad_norm": 5.401443004608154, "learning_rate": 1.1452054794520548e-07, "log_odds_chosen": 3.187936782836914, "log_odds_ratio": -0.20043295621871948, "logits/chosen": 0.9103026986122131, "logits/rejected": 0.8824788928031921, "logps/chosen": -2.312018871307373, "logps/rejected": -5.386981010437012, "loss": 0.6098, "nll_loss": 0.5897819399833679, "rewards/accuracies": 1.0, "rewards/chosen": -0.2312018871307373, "rewards/margins": 0.30749621987342834, "rewards/rejected": -0.538698136806488, "step": 6467 }, { "epoch": 17.70841889117043, "grad_norm": 6.146879196166992, "learning_rate": 1.1438356164383562e-07, "log_odds_chosen": 2.431199073791504, "log_odds_ratio": -0.19568271934986115, "logits/chosen": 0.7290239334106445, "logits/rejected": 0.7365337610244751, "logps/chosen": -1.9023826122283936, "logps/rejected": -4.161807537078857, "loss": 0.5957, "nll_loss": 0.5761716961860657, "rewards/accuracies": 1.0, "rewards/chosen": -0.19023825228214264, "rewards/margins": 0.22594250738620758, "rewards/rejected": -0.4161807596683502, "step": 6468 }, { "epoch": 17.711156741957563, "grad_norm": 5.036697864532471, "learning_rate": 1.1424657534246575e-07, "log_odds_chosen": 2.0853819847106934, "log_odds_ratio": -0.18424645066261292, "logits/chosen": 0.6711446046829224, "logits/rejected": 0.6851654052734375, "logps/chosen": -1.6849493980407715, "logps/rejected": -3.571686029434204, "loss": 0.7419, "nll_loss": 0.7234477996826172, "rewards/accuracies": 1.0, "rewards/chosen": -0.16849493980407715, "rewards/margins": 0.18867364525794983, "rewards/rejected": -0.357168585062027, "step": 6469 }, { "epoch": 17.713894592744694, "grad_norm": 8.374388694763184, "learning_rate": 1.1410958904109588e-07, "log_odds_chosen": 0.17919230461120605, "log_odds_ratio": -0.7745640277862549, "logits/chosen": 0.7650572657585144, "logits/rejected": 0.7846888899803162, "logps/chosen": -3.5587735176086426, "logps/rejected": -3.6872949600219727, "loss": 0.7126, "nll_loss": 0.6351616382598877, "rewards/accuracies": 0.625, "rewards/chosen": -0.3558773696422577, "rewards/margins": 0.012852156534790993, "rewards/rejected": -0.36872950196266174, "step": 6470 }, { "epoch": 17.71663244353183, "grad_norm": 5.314056396484375, "learning_rate": 1.1397260273972602e-07, "log_odds_chosen": 2.70035457611084, "log_odds_ratio": -0.16411054134368896, "logits/chosen": 0.8951354622840881, "logits/rejected": 0.9126706123352051, "logps/chosen": -1.7086186408996582, "logps/rejected": -4.219803810119629, "loss": 0.5638, "nll_loss": 0.5473422408103943, "rewards/accuracies": 1.0, "rewards/chosen": -0.1708618700504303, "rewards/margins": 0.2511184811592102, "rewards/rejected": -0.4219803512096405, "step": 6471 }, { "epoch": 17.71937029431896, "grad_norm": 6.432556629180908, "learning_rate": 1.1383561643835616e-07, "log_odds_chosen": 1.6252825260162354, "log_odds_ratio": -0.5625541806221008, "logits/chosen": 0.6830563545227051, "logits/rejected": 0.6411756873130798, "logps/chosen": -2.452713966369629, "logps/rejected": -3.9830002784729004, "loss": 0.7854, "nll_loss": 0.7291014194488525, "rewards/accuracies": 0.875, "rewards/chosen": -0.24527138471603394, "rewards/margins": 0.15302863717079163, "rewards/rejected": -0.39830002188682556, "step": 6472 }, { "epoch": 17.72210814510609, "grad_norm": 5.147488594055176, "learning_rate": 1.136986301369863e-07, "log_odds_chosen": 2.144578456878662, "log_odds_ratio": -0.17923349142074585, "logits/chosen": 0.9235429167747498, "logits/rejected": 0.9693189859390259, "logps/chosen": -2.2982373237609863, "logps/rejected": -4.313213348388672, "loss": 0.5766, "nll_loss": 0.5587109327316284, "rewards/accuracies": 1.0, "rewards/chosen": -0.2298237383365631, "rewards/margins": 0.20149759948253632, "rewards/rejected": -0.4313213527202606, "step": 6473 }, { "epoch": 17.724845995893222, "grad_norm": 6.383586883544922, "learning_rate": 1.1356164383561644e-07, "log_odds_chosen": 1.8043570518493652, "log_odds_ratio": -0.22716331481933594, "logits/chosen": 0.8375122547149658, "logits/rejected": 0.9578863978385925, "logps/chosen": -2.921708106994629, "logps/rejected": -4.663366317749023, "loss": 0.7274, "nll_loss": 0.7046984434127808, "rewards/accuracies": 0.875, "rewards/chosen": -0.29217082262039185, "rewards/margins": 0.17416583001613617, "rewards/rejected": -0.4663366675376892, "step": 6474 }, { "epoch": 17.727583846680357, "grad_norm": 5.660516262054443, "learning_rate": 1.1342465753424657e-07, "log_odds_chosen": 2.4615907669067383, "log_odds_ratio": -0.3087577223777771, "logits/chosen": 0.7748819589614868, "logits/rejected": 0.6696852445602417, "logps/chosen": -1.3361375331878662, "logps/rejected": -3.58035945892334, "loss": 0.6493, "nll_loss": 0.6183973550796509, "rewards/accuracies": 0.75, "rewards/chosen": -0.13361375033855438, "rewards/margins": 0.22442221641540527, "rewards/rejected": -0.35803595185279846, "step": 6475 }, { "epoch": 17.730321697467488, "grad_norm": 8.6417875289917, "learning_rate": 1.132876712328767e-07, "log_odds_chosen": 1.1626801490783691, "log_odds_ratio": -0.49977976083755493, "logits/chosen": 0.731560468673706, "logits/rejected": 0.6849045157432556, "logps/chosen": -2.1746511459350586, "logps/rejected": -3.24343204498291, "loss": 0.5869, "nll_loss": 0.5369099974632263, "rewards/accuracies": 0.75, "rewards/chosen": -0.2174651175737381, "rewards/margins": 0.10687807202339172, "rewards/rejected": -0.324343204498291, "step": 6476 }, { "epoch": 17.73305954825462, "grad_norm": 4.716396808624268, "learning_rate": 1.1315068493150684e-07, "log_odds_chosen": 2.817953586578369, "log_odds_ratio": -0.3244936764240265, "logits/chosen": 0.7889382839202881, "logits/rejected": 0.8178831934928894, "logps/chosen": -2.282557964324951, "logps/rejected": -4.927386283874512, "loss": 0.6784, "nll_loss": 0.6459641456604004, "rewards/accuracies": 0.875, "rewards/chosen": -0.22825577855110168, "rewards/margins": 0.2644828259944916, "rewards/rejected": -0.49273860454559326, "step": 6477 }, { "epoch": 17.73579739904175, "grad_norm": 4.838833808898926, "learning_rate": 1.1301369863013698e-07, "log_odds_chosen": 1.7524337768554688, "log_odds_ratio": -0.22318804264068604, "logits/chosen": 0.9952907562255859, "logits/rejected": 1.083016037940979, "logps/chosen": -2.5472140312194824, "logps/rejected": -4.230779647827148, "loss": 0.6061, "nll_loss": 0.5837928056716919, "rewards/accuracies": 1.0, "rewards/chosen": -0.25472140312194824, "rewards/margins": 0.16835659742355347, "rewards/rejected": -0.4230780303478241, "step": 6478 }, { "epoch": 17.738535249828885, "grad_norm": 4.913318157196045, "learning_rate": 1.1287671232876713e-07, "log_odds_chosen": 2.9562551975250244, "log_odds_ratio": -0.18338313698768616, "logits/chosen": 0.8700662851333618, "logits/rejected": 0.9360740780830383, "logps/chosen": -2.367924213409424, "logps/rejected": -5.24305534362793, "loss": 0.6536, "nll_loss": 0.6352297067642212, "rewards/accuracies": 0.875, "rewards/chosen": -0.2367924451828003, "rewards/margins": 0.2875130772590637, "rewards/rejected": -0.524305522441864, "step": 6479 }, { "epoch": 17.741273100616016, "grad_norm": 6.2991414070129395, "learning_rate": 1.1273972602739726e-07, "log_odds_chosen": 1.3122810125350952, "log_odds_ratio": -0.343620240688324, "logits/chosen": 0.6645180583000183, "logits/rejected": 0.7657800912857056, "logps/chosen": -1.877907633781433, "logps/rejected": -3.0440549850463867, "loss": 0.6405, "nll_loss": 0.6061686873435974, "rewards/accuracies": 0.875, "rewards/chosen": -0.18779076635837555, "rewards/margins": 0.11661472916603088, "rewards/rejected": -0.3044055104255676, "step": 6480 }, { "epoch": 17.744010951403148, "grad_norm": 6.271764278411865, "learning_rate": 1.126027397260274e-07, "log_odds_chosen": 2.9274978637695312, "log_odds_ratio": -0.27028563618659973, "logits/chosen": 0.8623942136764526, "logits/rejected": 0.8481444716453552, "logps/chosen": -1.7330597639083862, "logps/rejected": -4.506997108459473, "loss": 0.6058, "nll_loss": 0.578751266002655, "rewards/accuracies": 1.0, "rewards/chosen": -0.1733059585094452, "rewards/margins": 0.27739378809928894, "rewards/rejected": -0.45069974660873413, "step": 6481 }, { "epoch": 17.746748802190282, "grad_norm": 5.165040969848633, "learning_rate": 1.1246575342465753e-07, "log_odds_chosen": 2.1493382453918457, "log_odds_ratio": -0.1625487506389618, "logits/chosen": 0.8058134317398071, "logits/rejected": 0.7464593648910522, "logps/chosen": -1.5637396574020386, "logps/rejected": -3.4973812103271484, "loss": 0.6434, "nll_loss": 0.627118706703186, "rewards/accuracies": 1.0, "rewards/chosen": -0.15637394785881042, "rewards/margins": 0.19336417317390442, "rewards/rejected": -0.34973812103271484, "step": 6482 }, { "epoch": 17.749486652977414, "grad_norm": 5.098220348358154, "learning_rate": 1.1232876712328766e-07, "log_odds_chosen": 1.6138042211532593, "log_odds_ratio": -0.2980238199234009, "logits/chosen": 0.8581538796424866, "logits/rejected": 0.856106162071228, "logps/chosen": -1.6470242738723755, "logps/rejected": -3.094994068145752, "loss": 0.5757, "nll_loss": 0.5458532571792603, "rewards/accuracies": 0.875, "rewards/chosen": -0.1647024303674698, "rewards/margins": 0.14479699730873108, "rewards/rejected": -0.30949944257736206, "step": 6483 }, { "epoch": 17.752224503764545, "grad_norm": 5.930700302124023, "learning_rate": 1.121917808219178e-07, "log_odds_chosen": 1.2886791229248047, "log_odds_ratio": -0.3029290735721588, "logits/chosen": 0.8906160593032837, "logits/rejected": 0.8508466482162476, "logps/chosen": -1.5271563529968262, "logps/rejected": -2.6104040145874023, "loss": 0.5152, "nll_loss": 0.4848705530166626, "rewards/accuracies": 1.0, "rewards/chosen": -0.15271562337875366, "rewards/margins": 0.10832476615905762, "rewards/rejected": -0.2610403895378113, "step": 6484 }, { "epoch": 17.754962354551676, "grad_norm": 6.096160411834717, "learning_rate": 1.1205479452054795e-07, "log_odds_chosen": 2.866647720336914, "log_odds_ratio": -0.24915915727615356, "logits/chosen": 0.7312111258506775, "logits/rejected": 0.7453734874725342, "logps/chosen": -1.9919352531433105, "logps/rejected": -4.689043045043945, "loss": 0.5725, "nll_loss": 0.5476059317588806, "rewards/accuracies": 0.875, "rewards/chosen": -0.19919352233409882, "rewards/margins": 0.2697107493877411, "rewards/rejected": -0.4689042568206787, "step": 6485 }, { "epoch": 17.75770020533881, "grad_norm": 4.77444314956665, "learning_rate": 1.1191780821917808e-07, "log_odds_chosen": 2.8980467319488525, "log_odds_ratio": -0.19092601537704468, "logits/chosen": 0.8744702339172363, "logits/rejected": 0.909369707107544, "logps/chosen": -2.2014474868774414, "logps/rejected": -4.972312927246094, "loss": 0.5437, "nll_loss": 0.5246450304985046, "rewards/accuracies": 1.0, "rewards/chosen": -0.22014474868774414, "rewards/margins": 0.2770865559577942, "rewards/rejected": -0.49723127484321594, "step": 6486 }, { "epoch": 17.760438056125942, "grad_norm": 6.1402788162231445, "learning_rate": 1.1178082191780821e-07, "log_odds_chosen": 0.6645276546478271, "log_odds_ratio": -0.5083870887756348, "logits/chosen": 0.7873939275741577, "logits/rejected": 0.7844911813735962, "logps/chosen": -2.445216178894043, "logps/rejected": -3.045358657836914, "loss": 0.6734, "nll_loss": 0.622591495513916, "rewards/accuracies": 0.75, "rewards/chosen": -0.2445215880870819, "rewards/margins": 0.0600142665207386, "rewards/rejected": -0.3045358657836914, "step": 6487 }, { "epoch": 17.763175906913073, "grad_norm": 6.253368854522705, "learning_rate": 1.1164383561643835e-07, "log_odds_chosen": 1.7361819744110107, "log_odds_ratio": -0.3861135244369507, "logits/chosen": 0.7754939794540405, "logits/rejected": 0.8854730129241943, "logps/chosen": -1.8665251731872559, "logps/rejected": -3.425055503845215, "loss": 0.5912, "nll_loss": 0.5525469779968262, "rewards/accuracies": 0.875, "rewards/chosen": -0.1866525113582611, "rewards/margins": 0.15585307776927948, "rewards/rejected": -0.3425055742263794, "step": 6488 }, { "epoch": 17.765913757700204, "grad_norm": 5.6006035804748535, "learning_rate": 1.1150684931506849e-07, "log_odds_chosen": 2.4994311332702637, "log_odds_ratio": -0.1508062183856964, "logits/chosen": 0.7789362668991089, "logits/rejected": 0.8368249535560608, "logps/chosen": -1.4185601472854614, "logps/rejected": -3.6717569828033447, "loss": 0.6516, "nll_loss": 0.6365445256233215, "rewards/accuracies": 1.0, "rewards/chosen": -0.14185601472854614, "rewards/margins": 0.22531966865062714, "rewards/rejected": -0.3671756982803345, "step": 6489 }, { "epoch": 17.76865160848734, "grad_norm": 5.077508449554443, "learning_rate": 1.1136986301369862e-07, "log_odds_chosen": 3.165128231048584, "log_odds_ratio": -0.18270237743854523, "logits/chosen": 0.8862916231155396, "logits/rejected": 0.8783047199249268, "logps/chosen": -1.797316551208496, "logps/rejected": -4.774730205535889, "loss": 0.5844, "nll_loss": 0.566135823726654, "rewards/accuracies": 1.0, "rewards/chosen": -0.17973165214061737, "rewards/margins": 0.2977413535118103, "rewards/rejected": -0.47747302055358887, "step": 6490 }, { "epoch": 17.77138945927447, "grad_norm": 5.272944450378418, "learning_rate": 1.1123287671232877e-07, "log_odds_chosen": 2.522444486618042, "log_odds_ratio": -0.23293009400367737, "logits/chosen": 0.783861517906189, "logits/rejected": 0.8537624478340149, "logps/chosen": -2.838193893432617, "logps/rejected": -5.2527079582214355, "loss": 0.8082, "nll_loss": 0.7849324345588684, "rewards/accuracies": 1.0, "rewards/chosen": -0.28381937742233276, "rewards/margins": 0.2414514273405075, "rewards/rejected": -0.5252708196640015, "step": 6491 }, { "epoch": 17.7741273100616, "grad_norm": 5.258334159851074, "learning_rate": 1.1109589041095891e-07, "log_odds_chosen": 3.4582982063293457, "log_odds_ratio": -0.13314618170261383, "logits/chosen": 0.8602133989334106, "logits/rejected": 0.8335391283035278, "logps/chosen": -1.9195940494537354, "logps/rejected": -5.200130939483643, "loss": 0.6416, "nll_loss": 0.62831050157547, "rewards/accuracies": 1.0, "rewards/chosen": -0.19195939600467682, "rewards/margins": 0.32805371284484863, "rewards/rejected": -0.520013153553009, "step": 6492 }, { "epoch": 17.776865160848732, "grad_norm": 6.349511623382568, "learning_rate": 1.1095890410958903e-07, "log_odds_chosen": 3.329615592956543, "log_odds_ratio": -0.28608354926109314, "logits/chosen": 0.9221541881561279, "logits/rejected": 0.9100903868675232, "logps/chosen": -2.486912250518799, "logps/rejected": -5.723458766937256, "loss": 0.6641, "nll_loss": 0.6355074644088745, "rewards/accuracies": 0.875, "rewards/chosen": -0.24869123101234436, "rewards/margins": 0.3236546218395233, "rewards/rejected": -0.5723458528518677, "step": 6493 }, { "epoch": 17.779603011635867, "grad_norm": 6.325620174407959, "learning_rate": 1.1082191780821917e-07, "log_odds_chosen": 1.7132433652877808, "log_odds_ratio": -0.24594342708587646, "logits/chosen": 0.7582356929779053, "logits/rejected": 0.8083407878875732, "logps/chosen": -2.1441733837127686, "logps/rejected": -3.7624893188476562, "loss": 0.7269, "nll_loss": 0.7022767066955566, "rewards/accuracies": 1.0, "rewards/chosen": -0.21441733837127686, "rewards/margins": 0.16183160245418549, "rewards/rejected": -0.37624892592430115, "step": 6494 }, { "epoch": 17.782340862423, "grad_norm": 6.999423027038574, "learning_rate": 1.1068493150684931e-07, "log_odds_chosen": 1.4493768215179443, "log_odds_ratio": -0.33920836448669434, "logits/chosen": 0.6390520930290222, "logits/rejected": 0.606224775314331, "logps/chosen": -2.2413899898529053, "logps/rejected": -3.5851926803588867, "loss": 0.6136, "nll_loss": 0.5797203183174133, "rewards/accuracies": 1.0, "rewards/chosen": -0.2241390198469162, "rewards/margins": 0.1343802660703659, "rewards/rejected": -0.3585192859172821, "step": 6495 }, { "epoch": 17.78507871321013, "grad_norm": 6.987430572509766, "learning_rate": 1.1054794520547944e-07, "log_odds_chosen": 0.7447077035903931, "log_odds_ratio": -0.4679532051086426, "logits/chosen": 0.6757943630218506, "logits/rejected": 0.7983900308609009, "logps/chosen": -2.4989559650421143, "logps/rejected": -3.2152726650238037, "loss": 0.7733, "nll_loss": 0.7265416383743286, "rewards/accuracies": 0.625, "rewards/chosen": -0.2498956024646759, "rewards/margins": 0.07163168489933014, "rewards/rejected": -0.32152727246284485, "step": 6496 }, { "epoch": 17.78781656399726, "grad_norm": 5.440252304077148, "learning_rate": 1.1041095890410958e-07, "log_odds_chosen": 1.496950626373291, "log_odds_ratio": -0.3087655007839203, "logits/chosen": 0.5766186118125916, "logits/rejected": 0.5933493971824646, "logps/chosen": -2.6484508514404297, "logps/rejected": -4.013501167297363, "loss": 0.6215, "nll_loss": 0.5906509757041931, "rewards/accuracies": 0.875, "rewards/chosen": -0.264845073223114, "rewards/margins": 0.13650503754615784, "rewards/rejected": -0.40135014057159424, "step": 6497 }, { "epoch": 17.790554414784395, "grad_norm": 6.134122848510742, "learning_rate": 1.1027397260273973e-07, "log_odds_chosen": 1.6257762908935547, "log_odds_ratio": -0.2881819009780884, "logits/chosen": 0.8224859237670898, "logits/rejected": 0.8205861449241638, "logps/chosen": -2.600691795349121, "logps/rejected": -4.112661838531494, "loss": 0.6692, "nll_loss": 0.6403830051422119, "rewards/accuracies": 0.875, "rewards/chosen": -0.2600691616535187, "rewards/margins": 0.15119703114032745, "rewards/rejected": -0.41126617789268494, "step": 6498 }, { "epoch": 17.793292265571527, "grad_norm": 5.307075500488281, "learning_rate": 1.1013698630136987e-07, "log_odds_chosen": 2.486647844314575, "log_odds_ratio": -0.15664224326610565, "logits/chosen": 0.9491497278213501, "logits/rejected": 0.968341588973999, "logps/chosen": -2.3766427040100098, "logps/rejected": -4.742246627807617, "loss": 0.7087, "nll_loss": 0.6930411458015442, "rewards/accuracies": 1.0, "rewards/chosen": -0.23766423761844635, "rewards/margins": 0.2365604043006897, "rewards/rejected": -0.47422465682029724, "step": 6499 }, { "epoch": 17.796030116358658, "grad_norm": 7.44140100479126, "learning_rate": 1.0999999999999999e-07, "log_odds_chosen": 3.534778118133545, "log_odds_ratio": -0.2005542814731598, "logits/chosen": 0.7143857479095459, "logits/rejected": 0.6675910353660583, "logps/chosen": -1.7366230487823486, "logps/rejected": -5.043543815612793, "loss": 0.6958, "nll_loss": 0.6757107377052307, "rewards/accuracies": 1.0, "rewards/chosen": -0.17366230487823486, "rewards/margins": 0.3306921124458313, "rewards/rejected": -0.5043544173240662, "step": 6500 }, { "epoch": 17.79876796714579, "grad_norm": 5.410493850708008, "learning_rate": 1.0986301369863013e-07, "log_odds_chosen": 2.416761636734009, "log_odds_ratio": -0.30046147108078003, "logits/chosen": 0.8941925764083862, "logits/rejected": 0.919408917427063, "logps/chosen": -1.658403754234314, "logps/rejected": -3.9716362953186035, "loss": 0.5218, "nll_loss": 0.4917069971561432, "rewards/accuracies": 0.875, "rewards/chosen": -0.16584037244319916, "rewards/margins": 0.2313232421875, "rewards/rejected": -0.39716362953186035, "step": 6501 }, { "epoch": 17.801505817932924, "grad_norm": 6.274441719055176, "learning_rate": 1.0972602739726027e-07, "log_odds_chosen": 2.312098503112793, "log_odds_ratio": -0.21417009830474854, "logits/chosen": 0.9450798034667969, "logits/rejected": 1.0456799268722534, "logps/chosen": -2.3404717445373535, "logps/rejected": -4.545269966125488, "loss": 0.6439, "nll_loss": 0.6225248575210571, "rewards/accuracies": 1.0, "rewards/chosen": -0.23404718935489655, "rewards/margins": 0.220479816198349, "rewards/rejected": -0.45452702045440674, "step": 6502 }, { "epoch": 17.804243668720055, "grad_norm": 5.79857063293457, "learning_rate": 1.095890410958904e-07, "log_odds_chosen": 1.701839566230774, "log_odds_ratio": -0.22344109416007996, "logits/chosen": 0.973878800868988, "logits/rejected": 0.9543752670288086, "logps/chosen": -2.051287889480591, "logps/rejected": -3.5473499298095703, "loss": 0.5577, "nll_loss": 0.5353552103042603, "rewards/accuracies": 1.0, "rewards/chosen": -0.20512878894805908, "rewards/margins": 0.14960619807243347, "rewards/rejected": -0.35473501682281494, "step": 6503 }, { "epoch": 17.806981519507186, "grad_norm": 5.495675086975098, "learning_rate": 1.0945205479452055e-07, "log_odds_chosen": 3.582956075668335, "log_odds_ratio": -0.15719451010227203, "logits/chosen": 0.7469692230224609, "logits/rejected": 0.7789646983146667, "logps/chosen": -1.9475172758102417, "logps/rejected": -5.388509273529053, "loss": 0.6523, "nll_loss": 0.6366142630577087, "rewards/accuracies": 1.0, "rewards/chosen": -0.19475170969963074, "rewards/margins": 0.344099223613739, "rewards/rejected": -0.5388509631156921, "step": 6504 }, { "epoch": 17.809719370294317, "grad_norm": 9.072766304016113, "learning_rate": 1.0931506849315069e-07, "log_odds_chosen": 2.532960891723633, "log_odds_ratio": -0.4855113625526428, "logits/chosen": 0.77525794506073, "logits/rejected": 0.7987385988235474, "logps/chosen": -2.896251678466797, "logps/rejected": -5.344431400299072, "loss": 0.853, "nll_loss": 0.8044040203094482, "rewards/accuracies": 0.875, "rewards/chosen": -0.2896251678466797, "rewards/margins": 0.24481795728206635, "rewards/rejected": -0.5344430804252625, "step": 6505 }, { "epoch": 17.812457221081452, "grad_norm": 6.751113414764404, "learning_rate": 1.0917808219178081e-07, "log_odds_chosen": 2.277604818344116, "log_odds_ratio": -0.22390776872634888, "logits/chosen": 0.8433045744895935, "logits/rejected": 0.7624729871749878, "logps/chosen": -2.092223882675171, "logps/rejected": -4.218078136444092, "loss": 0.6528, "nll_loss": 0.6304175853729248, "rewards/accuracies": 0.875, "rewards/chosen": -0.20922239124774933, "rewards/margins": 0.2125854343175888, "rewards/rejected": -0.42180782556533813, "step": 6506 }, { "epoch": 17.815195071868583, "grad_norm": 6.584065914154053, "learning_rate": 1.0904109589041095e-07, "log_odds_chosen": 2.6486589908599854, "log_odds_ratio": -0.23145148158073425, "logits/chosen": 0.7280457019805908, "logits/rejected": 0.7125147581100464, "logps/chosen": -3.3387978076934814, "logps/rejected": -5.959203243255615, "loss": 0.7949, "nll_loss": 0.7717077136039734, "rewards/accuracies": 0.875, "rewards/chosen": -0.3338797986507416, "rewards/margins": 0.26204055547714233, "rewards/rejected": -0.5959203243255615, "step": 6507 }, { "epoch": 17.817932922655714, "grad_norm": 6.212558746337891, "learning_rate": 1.0890410958904109e-07, "log_odds_chosen": 2.198543071746826, "log_odds_ratio": -0.17925195395946503, "logits/chosen": 1.0294077396392822, "logits/rejected": 1.1191246509552002, "logps/chosen": -2.5743913650512695, "logps/rejected": -4.673728942871094, "loss": 0.6612, "nll_loss": 0.6432293653488159, "rewards/accuracies": 1.0, "rewards/chosen": -0.25743916630744934, "rewards/margins": 0.20993375778198242, "rewards/rejected": -0.4673728942871094, "step": 6508 }, { "epoch": 17.82067077344285, "grad_norm": 4.935568332672119, "learning_rate": 1.0876712328767123e-07, "log_odds_chosen": 3.745245933532715, "log_odds_ratio": -0.12966659665107727, "logits/chosen": 1.036865234375, "logits/rejected": 1.0614429712295532, "logps/chosen": -2.3196351528167725, "logps/rejected": -5.9682817459106445, "loss": 0.6982, "nll_loss": 0.6851906180381775, "rewards/accuracies": 1.0, "rewards/chosen": -0.23196350038051605, "rewards/margins": 0.36486464738845825, "rewards/rejected": -0.5968281030654907, "step": 6509 }, { "epoch": 17.82340862422998, "grad_norm": 6.443896770477295, "learning_rate": 1.0863013698630137e-07, "log_odds_chosen": 1.6126830577850342, "log_odds_ratio": -0.22476428747177124, "logits/chosen": 0.8031750917434692, "logits/rejected": 0.8326506614685059, "logps/chosen": -1.9236936569213867, "logps/rejected": -3.3769471645355225, "loss": 0.5749, "nll_loss": 0.5524260401725769, "rewards/accuracies": 1.0, "rewards/chosen": -0.19236937165260315, "rewards/margins": 0.14532533288002014, "rewards/rejected": -0.3376947045326233, "step": 6510 }, { "epoch": 17.82614647501711, "grad_norm": 4.945916652679443, "learning_rate": 1.084931506849315e-07, "log_odds_chosen": 2.571892261505127, "log_odds_ratio": -0.228012815117836, "logits/chosen": 0.7557119727134705, "logits/rejected": 0.8120860457420349, "logps/chosen": -1.60926353931427, "logps/rejected": -3.989577293395996, "loss": 0.6049, "nll_loss": 0.5821089148521423, "rewards/accuracies": 1.0, "rewards/chosen": -0.16092634201049805, "rewards/margins": 0.23803138732910156, "rewards/rejected": -0.398957759141922, "step": 6511 }, { "epoch": 17.828884325804243, "grad_norm": 7.291045188903809, "learning_rate": 1.0835616438356165e-07, "log_odds_chosen": 0.8350740671157837, "log_odds_ratio": -0.5096457600593567, "logits/chosen": 0.9107030034065247, "logits/rejected": 0.8561411499977112, "logps/chosen": -2.3999533653259277, "logps/rejected": -3.1440844535827637, "loss": 0.7229, "nll_loss": 0.6719449758529663, "rewards/accuracies": 0.75, "rewards/chosen": -0.2399953156709671, "rewards/margins": 0.07441312819719315, "rewards/rejected": -0.31440845131874084, "step": 6512 }, { "epoch": 17.831622176591377, "grad_norm": 5.326809883117676, "learning_rate": 1.0821917808219177e-07, "log_odds_chosen": 3.5349655151367188, "log_odds_ratio": -0.0916392132639885, "logits/chosen": 0.7994222044944763, "logits/rejected": 0.8505160808563232, "logps/chosen": -2.6232688426971436, "logps/rejected": -6.022250175476074, "loss": 0.6759, "nll_loss": 0.6667143702507019, "rewards/accuracies": 1.0, "rewards/chosen": -0.2623268961906433, "rewards/margins": 0.33989813923835754, "rewards/rejected": -0.6022250056266785, "step": 6513 }, { "epoch": 17.83436002737851, "grad_norm": 5.158256530761719, "learning_rate": 1.0808219178082191e-07, "log_odds_chosen": 1.9523738622665405, "log_odds_ratio": -0.19150808453559875, "logits/chosen": 0.6735000610351562, "logits/rejected": 0.7264171838760376, "logps/chosen": -2.3240208625793457, "logps/rejected": -4.185295104980469, "loss": 0.566, "nll_loss": 0.5468709468841553, "rewards/accuracies": 1.0, "rewards/chosen": -0.23240210115909576, "rewards/margins": 0.1861274242401123, "rewards/rejected": -0.4185295104980469, "step": 6514 }, { "epoch": 17.83709787816564, "grad_norm": 6.356820106506348, "learning_rate": 1.0794520547945205e-07, "log_odds_chosen": 2.0340399742126465, "log_odds_ratio": -0.3282383680343628, "logits/chosen": 0.7331005930900574, "logits/rejected": 0.7239082455635071, "logps/chosen": -2.1879239082336426, "logps/rejected": -4.103994369506836, "loss": 0.713, "nll_loss": 0.6801506876945496, "rewards/accuracies": 0.875, "rewards/chosen": -0.2187924087047577, "rewards/margins": 0.1916070282459259, "rewards/rejected": -0.4103994369506836, "step": 6515 }, { "epoch": 17.83983572895277, "grad_norm": 5.38852071762085, "learning_rate": 1.0780821917808219e-07, "log_odds_chosen": 3.9482128620147705, "log_odds_ratio": -0.12235910445451736, "logits/chosen": 0.7045232057571411, "logits/rejected": 0.7600296139717102, "logps/chosen": -2.105475425720215, "logps/rejected": -5.863825798034668, "loss": 0.5854, "nll_loss": 0.5732122659683228, "rewards/accuracies": 1.0, "rewards/chosen": -0.210547536611557, "rewards/margins": 0.37583503127098083, "rewards/rejected": -0.5863825082778931, "step": 6516 }, { "epoch": 17.842573579739906, "grad_norm": 4.748124122619629, "learning_rate": 1.0767123287671233e-07, "log_odds_chosen": 1.6890487670898438, "log_odds_ratio": -0.2134222388267517, "logits/chosen": 0.9005281925201416, "logits/rejected": 0.924026608467102, "logps/chosen": -2.2366936206817627, "logps/rejected": -3.814572334289551, "loss": 0.5758, "nll_loss": 0.5544955730438232, "rewards/accuracies": 1.0, "rewards/chosen": -0.2236693650484085, "rewards/margins": 0.15778785943984985, "rewards/rejected": -0.38145720958709717, "step": 6517 }, { "epoch": 17.845311430527037, "grad_norm": 5.382129669189453, "learning_rate": 1.0753424657534247e-07, "log_odds_chosen": 1.485053539276123, "log_odds_ratio": -0.31216782331466675, "logits/chosen": 0.8957812190055847, "logits/rejected": 0.938594400882721, "logps/chosen": -2.0087788105010986, "logps/rejected": -3.3502161502838135, "loss": 0.5322, "nll_loss": 0.5009351372718811, "rewards/accuracies": 0.875, "rewards/chosen": -0.20087790489196777, "rewards/margins": 0.13414371013641357, "rewards/rejected": -0.33502161502838135, "step": 6518 }, { "epoch": 17.848049281314168, "grad_norm": 5.940335273742676, "learning_rate": 1.073972602739726e-07, "log_odds_chosen": 1.7441589832305908, "log_odds_ratio": -0.45054543018341064, "logits/chosen": 0.7183521389961243, "logits/rejected": 0.7166876196861267, "logps/chosen": -2.2121548652648926, "logps/rejected": -3.844508171081543, "loss": 0.5965, "nll_loss": 0.5514165163040161, "rewards/accuracies": 0.75, "rewards/chosen": -0.22121547162532806, "rewards/margins": 0.16323533654212952, "rewards/rejected": -0.3844508230686188, "step": 6519 }, { "epoch": 17.8507871321013, "grad_norm": 6.562138080596924, "learning_rate": 1.0726027397260273e-07, "log_odds_chosen": 2.2079646587371826, "log_odds_ratio": -0.2014831006526947, "logits/chosen": 0.9959789514541626, "logits/rejected": 0.9582768082618713, "logps/chosen": -2.1671652793884277, "logps/rejected": -4.256361961364746, "loss": 0.657, "nll_loss": 0.6368387937545776, "rewards/accuracies": 1.0, "rewards/chosen": -0.21671651303768158, "rewards/margins": 0.2089196741580963, "rewards/rejected": -0.4256362020969391, "step": 6520 }, { "epoch": 17.853524982888434, "grad_norm": 6.219756126403809, "learning_rate": 1.0712328767123287e-07, "log_odds_chosen": 2.25470232963562, "log_odds_ratio": -0.27248725295066833, "logits/chosen": 0.8328812122344971, "logits/rejected": 0.9462848901748657, "logps/chosen": -3.398653984069824, "logps/rejected": -5.535945415496826, "loss": 0.7484, "nll_loss": 0.7211475372314453, "rewards/accuracies": 0.875, "rewards/chosen": -0.33986544609069824, "rewards/margins": 0.2137291431427002, "rewards/rejected": -0.5535945892333984, "step": 6521 }, { "epoch": 17.856262833675565, "grad_norm": 8.095695495605469, "learning_rate": 1.0698630136986301e-07, "log_odds_chosen": 1.5840466022491455, "log_odds_ratio": -0.5976988673210144, "logits/chosen": 0.7371222376823425, "logits/rejected": 0.7044256329536438, "logps/chosen": -3.2003962993621826, "logps/rejected": -4.691162109375, "loss": 0.7425, "nll_loss": 0.6827734112739563, "rewards/accuracies": 0.75, "rewards/chosen": -0.32003962993621826, "rewards/margins": 0.14907656610012054, "rewards/rejected": -0.4691162109375, "step": 6522 }, { "epoch": 17.859000684462696, "grad_norm": 5.000687122344971, "learning_rate": 1.0684931506849315e-07, "log_odds_chosen": 1.7079439163208008, "log_odds_ratio": -0.24699720740318298, "logits/chosen": 0.9218088388442993, "logits/rejected": 0.9641373157501221, "logps/chosen": -2.0989983081817627, "logps/rejected": -3.6960830688476562, "loss": 0.5993, "nll_loss": 0.5745583772659302, "rewards/accuracies": 1.0, "rewards/chosen": -0.20989982783794403, "rewards/margins": 0.15970847010612488, "rewards/rejected": -0.3696083128452301, "step": 6523 }, { "epoch": 17.861738535249827, "grad_norm": 5.1376543045043945, "learning_rate": 1.0671232876712328e-07, "log_odds_chosen": 3.2683372497558594, "log_odds_ratio": -0.12121124565601349, "logits/chosen": 0.9006569981575012, "logits/rejected": 0.8281867504119873, "logps/chosen": -1.8705188035964966, "logps/rejected": -4.873538494110107, "loss": 0.6755, "nll_loss": 0.6634029150009155, "rewards/accuracies": 1.0, "rewards/chosen": -0.1870518922805786, "rewards/margins": 0.3003019392490387, "rewards/rejected": -0.4873538315296173, "step": 6524 }, { "epoch": 17.864476386036962, "grad_norm": 5.137116432189941, "learning_rate": 1.0657534246575342e-07, "log_odds_chosen": 1.8186851739883423, "log_odds_ratio": -0.27727198600769043, "logits/chosen": 0.7123690247535706, "logits/rejected": 0.6002224683761597, "logps/chosen": -1.9773032665252686, "logps/rejected": -3.6679911613464355, "loss": 0.5675, "nll_loss": 0.5397278070449829, "rewards/accuracies": 0.875, "rewards/chosen": -0.19773030281066895, "rewards/margins": 0.16906875371932983, "rewards/rejected": -0.36679908633232117, "step": 6525 }, { "epoch": 17.867214236824093, "grad_norm": 4.730287075042725, "learning_rate": 1.0643835616438355e-07, "log_odds_chosen": 2.572723627090454, "log_odds_ratio": -0.22093439102172852, "logits/chosen": 1.0488468408584595, "logits/rejected": 0.9947068095207214, "logps/chosen": -2.5254945755004883, "logps/rejected": -4.995079040527344, "loss": 0.7179, "nll_loss": 0.6957831978797913, "rewards/accuracies": 0.875, "rewards/chosen": -0.2525494396686554, "rewards/margins": 0.2469584047794342, "rewards/rejected": -0.499507874250412, "step": 6526 }, { "epoch": 17.869952087611225, "grad_norm": 5.79744815826416, "learning_rate": 1.0630136986301369e-07, "log_odds_chosen": 2.024224281311035, "log_odds_ratio": -0.2523391544818878, "logits/chosen": 0.6853249073028564, "logits/rejected": 0.6487439870834351, "logps/chosen": -1.486878514289856, "logps/rejected": -3.2807064056396484, "loss": 0.5477, "nll_loss": 0.5224653482437134, "rewards/accuracies": 1.0, "rewards/chosen": -0.14868785440921783, "rewards/margins": 0.179382786154747, "rewards/rejected": -0.32807064056396484, "step": 6527 }, { "epoch": 17.872689938398356, "grad_norm": 5.042579650878906, "learning_rate": 1.0616438356164383e-07, "log_odds_chosen": 3.489971876144409, "log_odds_ratio": -0.1362071931362152, "logits/chosen": 0.7436349391937256, "logits/rejected": 0.8044619560241699, "logps/chosen": -2.0495071411132812, "logps/rejected": -5.385176658630371, "loss": 0.6691, "nll_loss": 0.6555175185203552, "rewards/accuracies": 1.0, "rewards/chosen": -0.2049507051706314, "rewards/margins": 0.33356690406799316, "rewards/rejected": -0.5385175943374634, "step": 6528 }, { "epoch": 17.87542778918549, "grad_norm": 4.833226203918457, "learning_rate": 1.0602739726027398e-07, "log_odds_chosen": 2.376830816268921, "log_odds_ratio": -0.29888665676116943, "logits/chosen": 0.7201066613197327, "logits/rejected": 0.7509803175926208, "logps/chosen": -2.168929100036621, "logps/rejected": -4.462665557861328, "loss": 0.6842, "nll_loss": 0.6543428301811218, "rewards/accuracies": 0.875, "rewards/chosen": -0.21689292788505554, "rewards/margins": 0.22937363386154175, "rewards/rejected": -0.4462665617465973, "step": 6529 }, { "epoch": 17.87816563997262, "grad_norm": 5.072608947753906, "learning_rate": 1.058904109589041e-07, "log_odds_chosen": 2.2131476402282715, "log_odds_ratio": -0.2271377593278885, "logits/chosen": 0.7185677289962769, "logits/rejected": 0.6500540971755981, "logps/chosen": -1.6031595468521118, "logps/rejected": -3.6100993156433105, "loss": 0.6155, "nll_loss": 0.5927887558937073, "rewards/accuracies": 0.875, "rewards/chosen": -0.16031594574451447, "rewards/margins": 0.20069395005702972, "rewards/rejected": -0.3610098958015442, "step": 6530 }, { "epoch": 17.880903490759753, "grad_norm": 5.917791366577148, "learning_rate": 1.0575342465753424e-07, "log_odds_chosen": 2.7129645347595215, "log_odds_ratio": -0.16314776241779327, "logits/chosen": 0.8514102101325989, "logits/rejected": 0.9579343795776367, "logps/chosen": -2.2414145469665527, "logps/rejected": -4.773292064666748, "loss": 0.7698, "nll_loss": 0.7534711956977844, "rewards/accuracies": 1.0, "rewards/chosen": -0.224141463637352, "rewards/margins": 0.25318777561187744, "rewards/rejected": -0.47732922434806824, "step": 6531 }, { "epoch": 17.883641341546884, "grad_norm": 7.612727165222168, "learning_rate": 1.0561643835616438e-07, "log_odds_chosen": 2.8286073207855225, "log_odds_ratio": -0.10104412585496902, "logits/chosen": 1.047430396080017, "logits/rejected": 1.1633002758026123, "logps/chosen": -2.461780071258545, "logps/rejected": -5.181078910827637, "loss": 0.7014, "nll_loss": 0.6912493705749512, "rewards/accuracies": 1.0, "rewards/chosen": -0.24617800116539001, "rewards/margins": 0.27192991971969604, "rewards/rejected": -0.5181079506874084, "step": 6532 }, { "epoch": 17.88637919233402, "grad_norm": 6.19871711730957, "learning_rate": 1.0547945205479451e-07, "log_odds_chosen": 4.9290924072265625, "log_odds_ratio": -0.1295970231294632, "logits/chosen": 0.9138272404670715, "logits/rejected": 0.984452486038208, "logps/chosen": -1.5073084831237793, "logps/rejected": -6.210038185119629, "loss": 0.6596, "nll_loss": 0.6465928554534912, "rewards/accuracies": 0.875, "rewards/chosen": -0.15073084831237793, "rewards/margins": 0.470272958278656, "rewards/rejected": -0.6210038065910339, "step": 6533 }, { "epoch": 17.88911704312115, "grad_norm": 7.479520320892334, "learning_rate": 1.0534246575342465e-07, "log_odds_chosen": 1.673678994178772, "log_odds_ratio": -0.5046159625053406, "logits/chosen": 0.8212252259254456, "logits/rejected": 0.7980712652206421, "logps/chosen": -2.7685482501983643, "logps/rejected": -4.386417388916016, "loss": 0.6749, "nll_loss": 0.6244333982467651, "rewards/accuracies": 0.625, "rewards/chosen": -0.27685481309890747, "rewards/margins": 0.16178692877292633, "rewards/rejected": -0.4386417269706726, "step": 6534 }, { "epoch": 17.89185489390828, "grad_norm": 4.911714553833008, "learning_rate": 1.052054794520548e-07, "log_odds_chosen": 3.0136468410491943, "log_odds_ratio": -0.11272329092025757, "logits/chosen": 0.9021570682525635, "logits/rejected": 0.9293819069862366, "logps/chosen": -2.445547580718994, "logps/rejected": -5.303866863250732, "loss": 0.6238, "nll_loss": 0.6125579476356506, "rewards/accuracies": 1.0, "rewards/chosen": -0.24455475807189941, "rewards/margins": 0.2858319580554962, "rewards/rejected": -0.5303866863250732, "step": 6535 }, { "epoch": 17.894592744695416, "grad_norm": 5.8269877433776855, "learning_rate": 1.0506849315068492e-07, "log_odds_chosen": 1.276953935623169, "log_odds_ratio": -0.33737263083457947, "logits/chosen": 0.7927377223968506, "logits/rejected": 0.8450919389724731, "logps/chosen": -1.7425405979156494, "logps/rejected": -2.813338279724121, "loss": 0.5549, "nll_loss": 0.5211501717567444, "rewards/accuracies": 0.875, "rewards/chosen": -0.17425405979156494, "rewards/margins": 0.10707978904247284, "rewards/rejected": -0.281333863735199, "step": 6536 }, { "epoch": 17.897330595482547, "grad_norm": 6.774499893188477, "learning_rate": 1.0493150684931506e-07, "log_odds_chosen": 2.2174336910247803, "log_odds_ratio": -0.33198314905166626, "logits/chosen": 1.1442451477050781, "logits/rejected": 1.2442915439605713, "logps/chosen": -3.039590358734131, "logps/rejected": -5.149129867553711, "loss": 0.6558, "nll_loss": 0.6226317882537842, "rewards/accuracies": 0.875, "rewards/chosen": -0.3039590120315552, "rewards/margins": 0.2109539806842804, "rewards/rejected": -0.5149129629135132, "step": 6537 }, { "epoch": 17.900068446269678, "grad_norm": 5.081761837005615, "learning_rate": 1.047945205479452e-07, "log_odds_chosen": 3.978808879852295, "log_odds_ratio": -0.19193598628044128, "logits/chosen": 0.764653742313385, "logits/rejected": 0.8257752656936646, "logps/chosen": -2.046905517578125, "logps/rejected": -5.853078842163086, "loss": 0.6339, "nll_loss": 0.6146602630615234, "rewards/accuracies": 1.0, "rewards/chosen": -0.20469054579734802, "rewards/margins": 0.38061732053756714, "rewards/rejected": -0.5853078365325928, "step": 6538 }, { "epoch": 17.90280629705681, "grad_norm": 5.897617816925049, "learning_rate": 1.0465753424657534e-07, "log_odds_chosen": 1.1300681829452515, "log_odds_ratio": -0.4992811977863312, "logits/chosen": 0.8645415306091309, "logits/rejected": 0.832831859588623, "logps/chosen": -2.0318245887756348, "logps/rejected": -2.999394416809082, "loss": 0.6163, "nll_loss": 0.5664164423942566, "rewards/accuracies": 0.875, "rewards/chosen": -0.20318247377872467, "rewards/margins": 0.09675700962543488, "rewards/rejected": -0.29993945360183716, "step": 6539 }, { "epoch": 17.905544147843944, "grad_norm": 7.906957626342773, "learning_rate": 1.0452054794520547e-07, "log_odds_chosen": 2.5073516368865967, "log_odds_ratio": -0.20564541220664978, "logits/chosen": 1.0687997341156006, "logits/rejected": 1.0515097379684448, "logps/chosen": -2.7653913497924805, "logps/rejected": -5.174330711364746, "loss": 0.7514, "nll_loss": 0.7308107614517212, "rewards/accuracies": 0.875, "rewards/chosen": -0.276539146900177, "rewards/margins": 0.24089397490024567, "rewards/rejected": -0.5174331068992615, "step": 6540 }, { "epoch": 17.908281998631075, "grad_norm": 6.32479190826416, "learning_rate": 1.043835616438356e-07, "log_odds_chosen": 2.4698500633239746, "log_odds_ratio": -0.345227986574173, "logits/chosen": 0.9774968028068542, "logits/rejected": 0.9572854042053223, "logps/chosen": -1.9863471984863281, "logps/rejected": -4.326857089996338, "loss": 0.6582, "nll_loss": 0.6237013339996338, "rewards/accuracies": 0.875, "rewards/chosen": -0.19863471388816833, "rewards/margins": 0.23405098915100098, "rewards/rejected": -0.4326857030391693, "step": 6541 }, { "epoch": 17.911019849418206, "grad_norm": 5.880894184112549, "learning_rate": 1.0424657534246576e-07, "log_odds_chosen": 1.745751142501831, "log_odds_ratio": -0.44468116760253906, "logits/chosen": 0.7959364056587219, "logits/rejected": 0.7950716614723206, "logps/chosen": -2.300992965698242, "logps/rejected": -3.9762754440307617, "loss": 0.7518, "nll_loss": 0.7073464393615723, "rewards/accuracies": 0.75, "rewards/chosen": -0.23009929060935974, "rewards/margins": 0.16752825677394867, "rewards/rejected": -0.3976275622844696, "step": 6542 }, { "epoch": 17.913757700205338, "grad_norm": 6.833588123321533, "learning_rate": 1.0410958904109588e-07, "log_odds_chosen": 1.7120277881622314, "log_odds_ratio": -0.32652485370635986, "logits/chosen": 0.8441749811172485, "logits/rejected": 0.9038898348808289, "logps/chosen": -2.2107746601104736, "logps/rejected": -3.7785329818725586, "loss": 0.7609, "nll_loss": 0.7282058000564575, "rewards/accuracies": 0.75, "rewards/chosen": -0.22107748687267303, "rewards/margins": 0.1567758023738861, "rewards/rejected": -0.37785327434539795, "step": 6543 }, { "epoch": 17.916495550992472, "grad_norm": 6.755133628845215, "learning_rate": 1.0397260273972602e-07, "log_odds_chosen": 0.9274454712867737, "log_odds_ratio": -0.45096105337142944, "logits/chosen": 0.6813935041427612, "logits/rejected": 0.6419744491577148, "logps/chosen": -2.3826568126678467, "logps/rejected": -3.198509693145752, "loss": 0.7, "nll_loss": 0.6548731327056885, "rewards/accuracies": 0.75, "rewards/chosen": -0.23826569318771362, "rewards/margins": 0.08158528804779053, "rewards/rejected": -0.31985098123550415, "step": 6544 }, { "epoch": 17.919233401779604, "grad_norm": 4.788509845733643, "learning_rate": 1.0383561643835616e-07, "log_odds_chosen": 2.484714984893799, "log_odds_ratio": -0.14605680108070374, "logits/chosen": 0.9496336579322815, "logits/rejected": 0.9728312492370605, "logps/chosen": -2.6972951889038086, "logps/rejected": -5.099910259246826, "loss": 0.7256, "nll_loss": 0.710964560508728, "rewards/accuracies": 1.0, "rewards/chosen": -0.26972952485084534, "rewards/margins": 0.2402615249156952, "rewards/rejected": -0.5099910497665405, "step": 6545 }, { "epoch": 17.921971252566735, "grad_norm": 8.060687065124512, "learning_rate": 1.0369863013698629e-07, "log_odds_chosen": 1.557185411453247, "log_odds_ratio": -0.673754096031189, "logits/chosen": 0.7281994819641113, "logits/rejected": 0.8249317407608032, "logps/chosen": -2.451578140258789, "logps/rejected": -3.867905378341675, "loss": 0.6772, "nll_loss": 0.6098188161849976, "rewards/accuracies": 0.875, "rewards/chosen": -0.24515782296657562, "rewards/margins": 0.14163270592689514, "rewards/rejected": -0.38679054379463196, "step": 6546 }, { "epoch": 17.924709103353866, "grad_norm": 7.820476055145264, "learning_rate": 1.0356164383561643e-07, "log_odds_chosen": 1.5185452699661255, "log_odds_ratio": -0.6805934906005859, "logits/chosen": 0.8399477005004883, "logits/rejected": 0.8314675092697144, "logps/chosen": -2.559920310974121, "logps/rejected": -3.9872679710388184, "loss": 0.7145, "nll_loss": 0.6464141607284546, "rewards/accuracies": 0.75, "rewards/chosen": -0.25599205493927, "rewards/margins": 0.14273472130298615, "rewards/rejected": -0.39872679114341736, "step": 6547 }, { "epoch": 17.927446954141, "grad_norm": 4.522357940673828, "learning_rate": 1.0342465753424658e-07, "log_odds_chosen": 2.760223865509033, "log_odds_ratio": -0.23300020396709442, "logits/chosen": 0.8991246223449707, "logits/rejected": 0.8939633369445801, "logps/chosen": -1.7017391920089722, "logps/rejected": -4.272289752960205, "loss": 0.5973, "nll_loss": 0.5740446448326111, "rewards/accuracies": 0.875, "rewards/chosen": -0.17017392814159393, "rewards/margins": 0.25705504417419434, "rewards/rejected": -0.42722898721694946, "step": 6548 }, { "epoch": 17.930184804928132, "grad_norm": 6.799291133880615, "learning_rate": 1.0328767123287672e-07, "log_odds_chosen": 1.8047770261764526, "log_odds_ratio": -0.3499572277069092, "logits/chosen": 0.885697603225708, "logits/rejected": 0.9262430667877197, "logps/chosen": -2.534189224243164, "logps/rejected": -4.215073585510254, "loss": 0.7403, "nll_loss": 0.7053480744361877, "rewards/accuracies": 0.875, "rewards/chosen": -0.2534189224243164, "rewards/margins": 0.16808845102787018, "rewards/rejected": -0.4215073585510254, "step": 6549 }, { "epoch": 17.932922655715263, "grad_norm": 5.761795997619629, "learning_rate": 1.0315068493150684e-07, "log_odds_chosen": 1.0844902992248535, "log_odds_ratio": -0.3949485421180725, "logits/chosen": 0.8660902380943298, "logits/rejected": 0.8915550708770752, "logps/chosen": -1.8161146640777588, "logps/rejected": -2.7815346717834473, "loss": 0.6194, "nll_loss": 0.579873263835907, "rewards/accuracies": 0.875, "rewards/chosen": -0.18161147832870483, "rewards/margins": 0.09654199331998825, "rewards/rejected": -0.2781534790992737, "step": 6550 }, { "epoch": 17.935660506502394, "grad_norm": 5.76507043838501, "learning_rate": 1.0301369863013698e-07, "log_odds_chosen": 4.58646821975708, "log_odds_ratio": -0.030918315052986145, "logits/chosen": 1.103231430053711, "logits/rejected": 1.1479159593582153, "logps/chosen": -2.3333773612976074, "logps/rejected": -6.814461708068848, "loss": 0.6133, "nll_loss": 0.6102070212364197, "rewards/accuracies": 1.0, "rewards/chosen": -0.23333773016929626, "rewards/margins": 0.448108434677124, "rewards/rejected": -0.6814461350440979, "step": 6551 }, { "epoch": 17.93839835728953, "grad_norm": 5.401240825653076, "learning_rate": 1.0287671232876712e-07, "log_odds_chosen": 2.481205463409424, "log_odds_ratio": -0.28139132261276245, "logits/chosen": 0.9462822675704956, "logits/rejected": 0.9832500219345093, "logps/chosen": -2.5666744709014893, "logps/rejected": -4.954954624176025, "loss": 0.5654, "nll_loss": 0.5372918844223022, "rewards/accuracies": 0.875, "rewards/chosen": -0.25666743516921997, "rewards/margins": 0.23882801830768585, "rewards/rejected": -0.495495468378067, "step": 6552 }, { "epoch": 17.94113620807666, "grad_norm": 5.5914225578308105, "learning_rate": 1.0273972602739725e-07, "log_odds_chosen": 3.2301926612854004, "log_odds_ratio": -0.10678528249263763, "logits/chosen": 0.5798527598381042, "logits/rejected": 0.5344926714897156, "logps/chosen": -2.37394642829895, "logps/rejected": -5.480287551879883, "loss": 0.8294, "nll_loss": 0.818678081035614, "rewards/accuracies": 1.0, "rewards/chosen": -0.23739466071128845, "rewards/margins": 0.31063413619995117, "rewards/rejected": -0.548028826713562, "step": 6553 }, { "epoch": 17.94387405886379, "grad_norm": 4.789680004119873, "learning_rate": 1.026027397260274e-07, "log_odds_chosen": 2.590075731277466, "log_odds_ratio": -0.2029987871646881, "logits/chosen": 0.7257388830184937, "logits/rejected": 0.7259238958358765, "logps/chosen": -1.7240713834762573, "logps/rejected": -4.120407581329346, "loss": 0.5058, "nll_loss": 0.4854919910430908, "rewards/accuracies": 1.0, "rewards/chosen": -0.1724071502685547, "rewards/margins": 0.23963358998298645, "rewards/rejected": -0.4120407700538635, "step": 6554 }, { "epoch": 17.946611909650922, "grad_norm": 5.259463787078857, "learning_rate": 1.0246575342465754e-07, "log_odds_chosen": 1.7457016706466675, "log_odds_ratio": -0.29163017868995667, "logits/chosen": 0.7367517948150635, "logits/rejected": 0.7918524742126465, "logps/chosen": -1.6464312076568604, "logps/rejected": -3.260000228881836, "loss": 0.6271, "nll_loss": 0.5979622602462769, "rewards/accuracies": 1.0, "rewards/chosen": -0.16464310884475708, "rewards/margins": 0.16135692596435547, "rewards/rejected": -0.32600003480911255, "step": 6555 }, { "epoch": 17.949349760438057, "grad_norm": 5.129406929016113, "learning_rate": 1.0232876712328766e-07, "log_odds_chosen": 2.054551124572754, "log_odds_ratio": -0.1923137605190277, "logits/chosen": 0.759874165058136, "logits/rejected": 0.7904407978057861, "logps/chosen": -1.9269452095031738, "logps/rejected": -3.8285884857177734, "loss": 0.5499, "nll_loss": 0.5306878089904785, "rewards/accuracies": 1.0, "rewards/chosen": -0.1926945298910141, "rewards/margins": 0.19016434252262115, "rewards/rejected": -0.38285887241363525, "step": 6556 }, { "epoch": 17.95208761122519, "grad_norm": 5.699928283691406, "learning_rate": 1.021917808219178e-07, "log_odds_chosen": 2.3691418170928955, "log_odds_ratio": -0.23297134041786194, "logits/chosen": 0.6754378080368042, "logits/rejected": 0.6843593120574951, "logps/chosen": -1.7735681533813477, "logps/rejected": -3.930708885192871, "loss": 0.6833, "nll_loss": 0.6600497961044312, "rewards/accuracies": 1.0, "rewards/chosen": -0.1773568093776703, "rewards/margins": 0.21571409702301025, "rewards/rejected": -0.39307090640068054, "step": 6557 }, { "epoch": 17.95482546201232, "grad_norm": 6.275620460510254, "learning_rate": 1.0205479452054794e-07, "log_odds_chosen": 1.7318624258041382, "log_odds_ratio": -0.3392234444618225, "logits/chosen": 0.9805114269256592, "logits/rejected": 0.9679703712463379, "logps/chosen": -2.07143497467041, "logps/rejected": -3.6503050327301025, "loss": 0.6613, "nll_loss": 0.627345860004425, "rewards/accuracies": 0.75, "rewards/chosen": -0.20714351534843445, "rewards/margins": 0.15788699686527252, "rewards/rejected": -0.3650304973125458, "step": 6558 }, { "epoch": 17.95756331279945, "grad_norm": 5.646972179412842, "learning_rate": 1.0191780821917808e-07, "log_odds_chosen": 3.069420099258423, "log_odds_ratio": -0.1875782012939453, "logits/chosen": 0.8693562746047974, "logits/rejected": 0.9085086584091187, "logps/chosen": -2.2122080326080322, "logps/rejected": -5.138339042663574, "loss": 0.6209, "nll_loss": 0.6021425724029541, "rewards/accuracies": 0.875, "rewards/chosen": -0.22122082114219666, "rewards/margins": 0.29261308908462524, "rewards/rejected": -0.5138339400291443, "step": 6559 }, { "epoch": 17.960301163586585, "grad_norm": 6.361242771148682, "learning_rate": 1.0178082191780822e-07, "log_odds_chosen": 4.619348526000977, "log_odds_ratio": -0.0751546174287796, "logits/chosen": 1.0808905363082886, "logits/rejected": 1.1290682554244995, "logps/chosen": -2.500229835510254, "logps/rejected": -6.969235420227051, "loss": 0.7931, "nll_loss": 0.7855634093284607, "rewards/accuracies": 1.0, "rewards/chosen": -0.2500230073928833, "rewards/margins": 0.4469006061553955, "rewards/rejected": -0.696923553943634, "step": 6560 }, { "epoch": 17.963039014373717, "grad_norm": 5.4295196533203125, "learning_rate": 1.0164383561643836e-07, "log_odds_chosen": 2.015021800994873, "log_odds_ratio": -0.41513964533805847, "logits/chosen": 0.8522136807441711, "logits/rejected": 0.7885439395904541, "logps/chosen": -2.5331101417541504, "logps/rejected": -4.4900689125061035, "loss": 0.6382, "nll_loss": 0.5967118144035339, "rewards/accuracies": 0.625, "rewards/chosen": -0.25331100821495056, "rewards/margins": 0.1956958770751953, "rewards/rejected": -0.44900691509246826, "step": 6561 }, { "epoch": 17.965776865160848, "grad_norm": 5.61974573135376, "learning_rate": 1.015068493150685e-07, "log_odds_chosen": 1.4915828704833984, "log_odds_ratio": -0.36663439869880676, "logits/chosen": 0.3997436463832855, "logits/rejected": 0.5445408225059509, "logps/chosen": -1.9865831136703491, "logps/rejected": -3.4022603034973145, "loss": 0.7178, "nll_loss": 0.6811390519142151, "rewards/accuracies": 0.875, "rewards/chosen": -0.1986583173274994, "rewards/margins": 0.14156770706176758, "rewards/rejected": -0.34022602438926697, "step": 6562 }, { "epoch": 17.968514715947983, "grad_norm": 6.3872456550598145, "learning_rate": 1.0136986301369862e-07, "log_odds_chosen": 0.9654494524002075, "log_odds_ratio": -0.37620335817337036, "logits/chosen": 0.8333102464675903, "logits/rejected": 0.8711255788803101, "logps/chosen": -2.274636745452881, "logps/rejected": -3.126924753189087, "loss": 0.5221, "nll_loss": 0.48444199562072754, "rewards/accuracies": 0.75, "rewards/chosen": -0.22746369242668152, "rewards/margins": 0.08522877842187881, "rewards/rejected": -0.31269246339797974, "step": 6563 }, { "epoch": 17.971252566735114, "grad_norm": 5.185598850250244, "learning_rate": 1.0123287671232876e-07, "log_odds_chosen": 3.3703508377075195, "log_odds_ratio": -0.23245373368263245, "logits/chosen": 0.8024520874023438, "logits/rejected": 0.8561743497848511, "logps/chosen": -1.907049536705017, "logps/rejected": -5.021285057067871, "loss": 0.6863, "nll_loss": 0.6630244255065918, "rewards/accuracies": 0.875, "rewards/chosen": -0.19070495665073395, "rewards/margins": 0.31142354011535645, "rewards/rejected": -0.502128541469574, "step": 6564 }, { "epoch": 17.973990417522245, "grad_norm": 5.287975788116455, "learning_rate": 1.010958904109589e-07, "log_odds_chosen": 1.2904045581817627, "log_odds_ratio": -0.31784912943840027, "logits/chosen": 0.6816554069519043, "logits/rejected": 0.688351571559906, "logps/chosen": -1.944562315940857, "logps/rejected": -3.1219279766082764, "loss": 0.6041, "nll_loss": 0.5722680687904358, "rewards/accuracies": 1.0, "rewards/chosen": -0.19445621967315674, "rewards/margins": 0.1177365854382515, "rewards/rejected": -0.31219279766082764, "step": 6565 }, { "epoch": 17.976728268309376, "grad_norm": 5.270440101623535, "learning_rate": 1.0095890410958902e-07, "log_odds_chosen": 1.92338228225708, "log_odds_ratio": -0.19578410685062408, "logits/chosen": 0.8147523999214172, "logits/rejected": 0.8357040882110596, "logps/chosen": -1.6108644008636475, "logps/rejected": -3.3409619331359863, "loss": 0.5987, "nll_loss": 0.5790832042694092, "rewards/accuracies": 1.0, "rewards/chosen": -0.16108644008636475, "rewards/margins": 0.1730097383260727, "rewards/rejected": -0.33409619331359863, "step": 6566 }, { "epoch": 17.97946611909651, "grad_norm": 5.330347537994385, "learning_rate": 1.0082191780821918e-07, "log_odds_chosen": 2.2446539402008057, "log_odds_ratio": -0.2715326249599457, "logits/chosen": 1.1648393869400024, "logits/rejected": 1.2174482345581055, "logps/chosen": -2.3243141174316406, "logps/rejected": -4.36981725692749, "loss": 0.6525, "nll_loss": 0.6253517866134644, "rewards/accuracies": 0.875, "rewards/chosen": -0.23243139684200287, "rewards/margins": 0.2045503556728363, "rewards/rejected": -0.436981737613678, "step": 6567 }, { "epoch": 17.982203969883642, "grad_norm": 5.758352279663086, "learning_rate": 1.0068493150684931e-07, "log_odds_chosen": 1.036045789718628, "log_odds_ratio": -0.4014585018157959, "logits/chosen": 0.9101123809814453, "logits/rejected": 0.9436012506484985, "logps/chosen": -2.3018429279327393, "logps/rejected": -3.2446374893188477, "loss": 0.688, "nll_loss": 0.6478908061981201, "rewards/accuracies": 0.875, "rewards/chosen": -0.23018428683280945, "rewards/margins": 0.0942794680595398, "rewards/rejected": -0.32446375489234924, "step": 6568 }, { "epoch": 17.984941820670773, "grad_norm": 9.362654685974121, "learning_rate": 1.0054794520547945e-07, "log_odds_chosen": 1.2292674779891968, "log_odds_ratio": -0.5677950382232666, "logits/chosen": 0.9180585145950317, "logits/rejected": 0.894817054271698, "logps/chosen": -3.0226361751556396, "logps/rejected": -4.180285453796387, "loss": 0.8186, "nll_loss": 0.7618210315704346, "rewards/accuracies": 0.75, "rewards/chosen": -0.30226361751556396, "rewards/margins": 0.11576495319604874, "rewards/rejected": -0.4180285334587097, "step": 6569 }, { "epoch": 17.987679671457904, "grad_norm": 4.4378767013549805, "learning_rate": 1.0041095890410958e-07, "log_odds_chosen": 2.2765350341796875, "log_odds_ratio": -0.21882550418376923, "logits/chosen": 1.0094376802444458, "logits/rejected": 1.0140718221664429, "logps/chosen": -2.1411447525024414, "logps/rejected": -4.275498390197754, "loss": 0.6035, "nll_loss": 0.5815954208374023, "rewards/accuracies": 1.0, "rewards/chosen": -0.2141144573688507, "rewards/margins": 0.21343539655208588, "rewards/rejected": -0.4275498688220978, "step": 6570 }, { "epoch": 17.99041752224504, "grad_norm": 6.555207252502441, "learning_rate": 1.0027397260273972e-07, "log_odds_chosen": 1.678921103477478, "log_odds_ratio": -0.46119168400764465, "logits/chosen": 0.7955559492111206, "logits/rejected": 0.824325680732727, "logps/chosen": -2.9354166984558105, "logps/rejected": -4.534012794494629, "loss": 0.6917, "nll_loss": 0.645563006401062, "rewards/accuracies": 0.75, "rewards/chosen": -0.29354166984558105, "rewards/margins": 0.15985959768295288, "rewards/rejected": -0.45340126752853394, "step": 6571 }, { "epoch": 17.99315537303217, "grad_norm": 6.434324741363525, "learning_rate": 1.0013698630136986e-07, "log_odds_chosen": 1.0275323390960693, "log_odds_ratio": -0.4245482087135315, "logits/chosen": 0.8122272491455078, "logits/rejected": 0.9030048251152039, "logps/chosen": -2.8102169036865234, "logps/rejected": -3.758236885070801, "loss": 0.6814, "nll_loss": 0.6389678716659546, "rewards/accuracies": 0.75, "rewards/chosen": -0.28102168440818787, "rewards/margins": 0.09480200707912445, "rewards/rejected": -0.3758236765861511, "step": 6572 }, { "epoch": 17.9958932238193, "grad_norm": 5.499684810638428, "learning_rate": 1e-07, "log_odds_chosen": 1.875235676765442, "log_odds_ratio": -0.33259034156799316, "logits/chosen": 1.1542856693267822, "logits/rejected": 1.1249183416366577, "logps/chosen": -1.8122155666351318, "logps/rejected": -3.5849761962890625, "loss": 0.5629, "nll_loss": 0.5296409726142883, "rewards/accuracies": 1.0, "rewards/chosen": -0.18122157454490662, "rewards/margins": 0.17727605998516083, "rewards/rejected": -0.35849764943122864, "step": 6573 }, { "epoch": 17.998631074606433, "grad_norm": 5.119003772735596, "learning_rate": 9.986301369863013e-08, "log_odds_chosen": 2.554783582687378, "log_odds_ratio": -0.19636842608451843, "logits/chosen": 0.7842682003974915, "logits/rejected": 0.7844560146331787, "logps/chosen": -2.08066987991333, "logps/rejected": -4.523752212524414, "loss": 0.5982, "nll_loss": 0.5785951614379883, "rewards/accuracies": 1.0, "rewards/chosen": -0.20806699991226196, "rewards/margins": 0.244308203458786, "rewards/rejected": -0.45237523317337036, "step": 6574 }, { "epoch": 18.001368925393567, "grad_norm": 6.232219696044922, "learning_rate": 9.972602739726027e-08, "log_odds_chosen": 0.8605949878692627, "log_odds_ratio": -0.38422292470932007, "logits/chosen": 0.9564046263694763, "logits/rejected": 0.9324955940246582, "logps/chosen": -1.8288525342941284, "logps/rejected": -2.5580086708068848, "loss": 0.7779, "nll_loss": 0.7394288182258606, "rewards/accuracies": 1.0, "rewards/chosen": -0.18288525938987732, "rewards/margins": 0.07291561365127563, "rewards/rejected": -0.25580087304115295, "step": 6575 }, { "epoch": 18.0041067761807, "grad_norm": 6.68727445602417, "learning_rate": 9.95890410958904e-08, "log_odds_chosen": 1.1533936262130737, "log_odds_ratio": -0.4317530393600464, "logits/chosen": 0.6943520903587341, "logits/rejected": 0.704904317855835, "logps/chosen": -2.3792104721069336, "logps/rejected": -3.3804593086242676, "loss": 0.6424, "nll_loss": 0.5992351174354553, "rewards/accuracies": 0.875, "rewards/chosen": -0.23792105913162231, "rewards/margins": 0.10012490302324295, "rewards/rejected": -0.33804595470428467, "step": 6576 }, { "epoch": 18.00684462696783, "grad_norm": 6.221654415130615, "learning_rate": 9.945205479452054e-08, "log_odds_chosen": 1.1316157579421997, "log_odds_ratio": -0.35383912920951843, "logits/chosen": 0.7708480358123779, "logits/rejected": 0.7578879594802856, "logps/chosen": -2.526442766189575, "logps/rejected": -3.512017250061035, "loss": 0.6003, "nll_loss": 0.5648807287216187, "rewards/accuracies": 0.875, "rewards/chosen": -0.25264430046081543, "rewards/margins": 0.09855743497610092, "rewards/rejected": -0.35120171308517456, "step": 6577 }, { "epoch": 18.00958247775496, "grad_norm": 5.825276851654053, "learning_rate": 9.931506849315068e-08, "log_odds_chosen": 3.4320225715637207, "log_odds_ratio": -0.22555285692214966, "logits/chosen": 1.0302693843841553, "logits/rejected": 1.0636632442474365, "logps/chosen": -2.427354574203491, "logps/rejected": -5.793745517730713, "loss": 0.7102, "nll_loss": 0.6876786947250366, "rewards/accuracies": 0.875, "rewards/chosen": -0.24273546040058136, "rewards/margins": 0.33663907647132874, "rewards/rejected": -0.5793745517730713, "step": 6578 }, { "epoch": 18.012320328542096, "grad_norm": 5.245944499969482, "learning_rate": 9.917808219178083e-08, "log_odds_chosen": 2.3162431716918945, "log_odds_ratio": -0.24672849476337433, "logits/chosen": 0.834964394569397, "logits/rejected": 0.7933584451675415, "logps/chosen": -1.7582125663757324, "logps/rejected": -3.868867874145508, "loss": 0.5613, "nll_loss": 0.5366618633270264, "rewards/accuracies": 1.0, "rewards/chosen": -0.17582125961780548, "rewards/margins": 0.21106554567813873, "rewards/rejected": -0.3868868052959442, "step": 6579 }, { "epoch": 18.015058179329227, "grad_norm": 5.862938404083252, "learning_rate": 9.904109589041095e-08, "log_odds_chosen": 2.147700309753418, "log_odds_ratio": -0.31431490182876587, "logits/chosen": 0.6299666166305542, "logits/rejected": 0.615965723991394, "logps/chosen": -1.7746176719665527, "logps/rejected": -3.819345712661743, "loss": 0.6414, "nll_loss": 0.6099755167961121, "rewards/accuracies": 0.75, "rewards/chosen": -0.17746175825595856, "rewards/margins": 0.20447281002998352, "rewards/rejected": -0.38193458318710327, "step": 6580 }, { "epoch": 18.017796030116358, "grad_norm": 5.051829814910889, "learning_rate": 9.890410958904109e-08, "log_odds_chosen": 2.0875866413116455, "log_odds_ratio": -0.2374088168144226, "logits/chosen": 0.8836090564727783, "logits/rejected": 0.8855979442596436, "logps/chosen": -1.9370903968811035, "logps/rejected": -3.8681998252868652, "loss": 0.5441, "nll_loss": 0.5203613042831421, "rewards/accuracies": 0.875, "rewards/chosen": -0.19370904564857483, "rewards/margins": 0.19311092793941498, "rewards/rejected": -0.3868199586868286, "step": 6581 }, { "epoch": 18.02053388090349, "grad_norm": 4.61602783203125, "learning_rate": 9.876712328767123e-08, "log_odds_chosen": 2.9301047325134277, "log_odds_ratio": -0.17389614880084991, "logits/chosen": 0.9334429502487183, "logits/rejected": 1.0070546865463257, "logps/chosen": -2.3551931381225586, "logps/rejected": -5.18017578125, "loss": 0.6308, "nll_loss": 0.6133610606193542, "rewards/accuracies": 1.0, "rewards/chosen": -0.23551933467388153, "rewards/margins": 0.28249824047088623, "rewards/rejected": -0.5180175304412842, "step": 6582 }, { "epoch": 18.023271731690624, "grad_norm": 6.8403401374816895, "learning_rate": 9.863013698630136e-08, "log_odds_chosen": 1.7089588642120361, "log_odds_ratio": -0.3521845042705536, "logits/chosen": 0.9057621359825134, "logits/rejected": 0.9381083250045776, "logps/chosen": -2.283567428588867, "logps/rejected": -3.8621163368225098, "loss": 0.5748, "nll_loss": 0.5396003723144531, "rewards/accuracies": 0.75, "rewards/chosen": -0.2283567488193512, "rewards/margins": 0.15785491466522217, "rewards/rejected": -0.38621166348457336, "step": 6583 }, { "epoch": 18.026009582477755, "grad_norm": 9.834662437438965, "learning_rate": 9.84931506849315e-08, "log_odds_chosen": 0.512974739074707, "log_odds_ratio": -0.5657281875610352, "logits/chosen": 0.7657792568206787, "logits/rejected": 0.7138677835464478, "logps/chosen": -2.6485753059387207, "logps/rejected": -3.0971298217773438, "loss": 0.6466, "nll_loss": 0.5900481939315796, "rewards/accuracies": 0.75, "rewards/chosen": -0.26485753059387207, "rewards/margins": 0.04485547915101051, "rewards/rejected": -0.3097130060195923, "step": 6584 }, { "epoch": 18.028747433264886, "grad_norm": 6.509829044342041, "learning_rate": 9.835616438356165e-08, "log_odds_chosen": 2.016141653060913, "log_odds_ratio": -0.29797524213790894, "logits/chosen": 0.8283474445343018, "logits/rejected": 0.8718734383583069, "logps/chosen": -2.9639809131622314, "logps/rejected": -4.9276933670043945, "loss": 0.7005, "nll_loss": 0.6707225441932678, "rewards/accuracies": 1.0, "rewards/chosen": -0.2963981032371521, "rewards/margins": 0.19637122750282288, "rewards/rejected": -0.492769330739975, "step": 6585 }, { "epoch": 18.031485284052017, "grad_norm": 7.035714149475098, "learning_rate": 9.821917808219179e-08, "log_odds_chosen": 0.9404433965682983, "log_odds_ratio": -0.7234906554222107, "logits/chosen": 0.7268437147140503, "logits/rejected": 0.7468106746673584, "logps/chosen": -2.5096933841705322, "logps/rejected": -3.3723268508911133, "loss": 0.6877, "nll_loss": 0.6153855919837952, "rewards/accuracies": 0.75, "rewards/chosen": -0.2509693503379822, "rewards/margins": 0.08626335859298706, "rewards/rejected": -0.33723270893096924, "step": 6586 }, { "epoch": 18.034223134839152, "grad_norm": 6.476069450378418, "learning_rate": 9.808219178082191e-08, "log_odds_chosen": 2.157562255859375, "log_odds_ratio": -0.18555548787117004, "logits/chosen": 0.8463095426559448, "logits/rejected": 0.8906701803207397, "logps/chosen": -2.8906073570251465, "logps/rejected": -4.943537712097168, "loss": 0.6256, "nll_loss": 0.607078492641449, "rewards/accuracies": 1.0, "rewards/chosen": -0.2890607416629791, "rewards/margins": 0.20529305934906006, "rewards/rejected": -0.4943537712097168, "step": 6587 }, { "epoch": 18.036960985626283, "grad_norm": 8.788321495056152, "learning_rate": 9.794520547945205e-08, "log_odds_chosen": 3.683659315109253, "log_odds_ratio": -0.32561808824539185, "logits/chosen": 0.9626191854476929, "logits/rejected": 0.9726094007492065, "logps/chosen": -2.777773141860962, "logps/rejected": -6.333008766174316, "loss": 0.6533, "nll_loss": 0.6206904649734497, "rewards/accuracies": 0.875, "rewards/chosen": -0.2777773141860962, "rewards/margins": 0.3555235266685486, "rewards/rejected": -0.6333009004592896, "step": 6588 }, { "epoch": 18.039698836413415, "grad_norm": 4.34479284286499, "learning_rate": 9.780821917808219e-08, "log_odds_chosen": 2.556687831878662, "log_odds_ratio": -0.15239371359348297, "logits/chosen": 1.0317282676696777, "logits/rejected": 1.0029470920562744, "logps/chosen": -1.741133689880371, "logps/rejected": -4.09190559387207, "loss": 0.6187, "nll_loss": 0.6034342646598816, "rewards/accuracies": 1.0, "rewards/chosen": -0.17411336302757263, "rewards/margins": 0.2350771725177765, "rewards/rejected": -0.4091905355453491, "step": 6589 }, { "epoch": 18.04243668720055, "grad_norm": 4.722572326660156, "learning_rate": 9.767123287671232e-08, "log_odds_chosen": 3.284677028656006, "log_odds_ratio": -0.12837277352809906, "logits/chosen": 0.813240647315979, "logits/rejected": 0.8161231279373169, "logps/chosen": -1.4835124015808105, "logps/rejected": -4.517705917358398, "loss": 0.5047, "nll_loss": 0.4918329417705536, "rewards/accuracies": 1.0, "rewards/chosen": -0.14835123717784882, "rewards/margins": 0.3034193217754364, "rewards/rejected": -0.451770544052124, "step": 6590 }, { "epoch": 18.04517453798768, "grad_norm": 6.7491326332092285, "learning_rate": 9.753424657534245e-08, "log_odds_chosen": 1.5322930812835693, "log_odds_ratio": -0.45370054244995117, "logits/chosen": 0.9491972923278809, "logits/rejected": 1.0091016292572021, "logps/chosen": -2.4489617347717285, "logps/rejected": -3.8427681922912598, "loss": 0.6929, "nll_loss": 0.6475145816802979, "rewards/accuracies": 0.75, "rewards/chosen": -0.24489618837833405, "rewards/margins": 0.13938066363334656, "rewards/rejected": -0.3842768371105194, "step": 6591 }, { "epoch": 18.04791238877481, "grad_norm": 7.828908920288086, "learning_rate": 9.739726027397261e-08, "log_odds_chosen": 1.8978346586227417, "log_odds_ratio": -0.3309207558631897, "logits/chosen": 1.060227394104004, "logits/rejected": 1.0994449853897095, "logps/chosen": -2.9674952030181885, "logps/rejected": -4.801543712615967, "loss": 0.7226, "nll_loss": 0.6895139813423157, "rewards/accuracies": 0.75, "rewards/chosen": -0.2967495024204254, "rewards/margins": 0.18340489268302917, "rewards/rejected": -0.4801543951034546, "step": 6592 }, { "epoch": 18.050650239561943, "grad_norm": 6.842532634735107, "learning_rate": 9.726027397260273e-08, "log_odds_chosen": 3.393301010131836, "log_odds_ratio": -0.16563665866851807, "logits/chosen": 1.0449432134628296, "logits/rejected": 1.0758835077285767, "logps/chosen": -2.494539976119995, "logps/rejected": -5.747866630554199, "loss": 0.7427, "nll_loss": 0.7261261940002441, "rewards/accuracies": 1.0, "rewards/chosen": -0.24945402145385742, "rewards/margins": 0.3253326117992401, "rewards/rejected": -0.5747866630554199, "step": 6593 }, { "epoch": 18.053388090349078, "grad_norm": 5.160789966583252, "learning_rate": 9.712328767123287e-08, "log_odds_chosen": 2.9733150005340576, "log_odds_ratio": -0.19119413197040558, "logits/chosen": 0.7410935759544373, "logits/rejected": 0.7722420692443848, "logps/chosen": -1.9673300981521606, "logps/rejected": -4.779071807861328, "loss": 0.6219, "nll_loss": 0.6028043031692505, "rewards/accuracies": 1.0, "rewards/chosen": -0.1967330127954483, "rewards/margins": 0.2811741530895233, "rewards/rejected": -0.4779071807861328, "step": 6594 }, { "epoch": 18.05612594113621, "grad_norm": 6.290205955505371, "learning_rate": 9.698630136986301e-08, "log_odds_chosen": 3.1670944690704346, "log_odds_ratio": -0.22066956758499146, "logits/chosen": 0.7777963876724243, "logits/rejected": 0.7663440704345703, "logps/chosen": -1.825866460800171, "logps/rejected": -4.827906608581543, "loss": 0.727, "nll_loss": 0.7048864364624023, "rewards/accuracies": 0.875, "rewards/chosen": -0.1825866401195526, "rewards/margins": 0.3002040386199951, "rewards/rejected": -0.48279067873954773, "step": 6595 }, { "epoch": 18.05886379192334, "grad_norm": 6.618955612182617, "learning_rate": 9.684931506849315e-08, "log_odds_chosen": 1.6725456714630127, "log_odds_ratio": -0.2812137007713318, "logits/chosen": 0.8665903806686401, "logits/rejected": 0.9949965476989746, "logps/chosen": -2.9873719215393066, "logps/rejected": -4.596440315246582, "loss": 0.6951, "nll_loss": 0.6670045852661133, "rewards/accuracies": 0.875, "rewards/chosen": -0.29873722791671753, "rewards/margins": 0.1609068214893341, "rewards/rejected": -0.45964404940605164, "step": 6596 }, { "epoch": 18.06160164271047, "grad_norm": 5.168435096740723, "learning_rate": 9.671232876712327e-08, "log_odds_chosen": 1.9069408178329468, "log_odds_ratio": -0.3251287639141083, "logits/chosen": 0.770147442817688, "logits/rejected": 0.7563395500183105, "logps/chosen": -2.0931267738342285, "logps/rejected": -3.9240903854370117, "loss": 0.6258, "nll_loss": 0.5932990908622742, "rewards/accuracies": 0.875, "rewards/chosen": -0.20931267738342285, "rewards/margins": 0.18309637904167175, "rewards/rejected": -0.3924090564250946, "step": 6597 }, { "epoch": 18.064339493497606, "grad_norm": 6.040997505187988, "learning_rate": 9.657534246575343e-08, "log_odds_chosen": 3.362790107727051, "log_odds_ratio": -0.06977351009845734, "logits/chosen": 0.6968035697937012, "logits/rejected": 0.7099665999412537, "logps/chosen": -2.134667158126831, "logps/rejected": -5.35432767868042, "loss": 0.5942, "nll_loss": 0.5871815085411072, "rewards/accuracies": 1.0, "rewards/chosen": -0.21346673369407654, "rewards/margins": 0.3219660520553589, "rewards/rejected": -0.5354328155517578, "step": 6598 }, { "epoch": 18.067077344284737, "grad_norm": 6.413759708404541, "learning_rate": 9.643835616438357e-08, "log_odds_chosen": 1.5376577377319336, "log_odds_ratio": -0.49233195185661316, "logits/chosen": 1.1214210987091064, "logits/rejected": 1.0987184047698975, "logps/chosen": -2.7760701179504395, "logps/rejected": -4.221990585327148, "loss": 0.683, "nll_loss": 0.6337285041809082, "rewards/accuracies": 0.75, "rewards/chosen": -0.2776070237159729, "rewards/margins": 0.1445920616388321, "rewards/rejected": -0.4221990704536438, "step": 6599 }, { "epoch": 18.069815195071868, "grad_norm": 4.844972133636475, "learning_rate": 9.630136986301369e-08, "log_odds_chosen": 3.6470553874969482, "log_odds_ratio": -0.23465313017368317, "logits/chosen": 0.7980974912643433, "logits/rejected": 0.8410755395889282, "logps/chosen": -2.4719619750976562, "logps/rejected": -6.012674331665039, "loss": 0.5927, "nll_loss": 0.5692488551139832, "rewards/accuracies": 0.875, "rewards/chosen": -0.24719621241092682, "rewards/margins": 0.3540712594985962, "rewards/rejected": -0.6012674570083618, "step": 6600 }, { "epoch": 18.072553045859, "grad_norm": 5.071158409118652, "learning_rate": 9.616438356164383e-08, "log_odds_chosen": 1.8289391994476318, "log_odds_ratio": -0.2338440716266632, "logits/chosen": 0.7193225026130676, "logits/rejected": 0.7225149869918823, "logps/chosen": -1.7305166721343994, "logps/rejected": -3.395824670791626, "loss": 0.5419, "nll_loss": 0.5185015797615051, "rewards/accuracies": 1.0, "rewards/chosen": -0.17305168509483337, "rewards/margins": 0.1665307879447937, "rewards/rejected": -0.3395824730396271, "step": 6601 }, { "epoch": 18.075290896646134, "grad_norm": 5.653438568115234, "learning_rate": 9.602739726027397e-08, "log_odds_chosen": 1.0832130908966064, "log_odds_ratio": -0.3529645502567291, "logits/chosen": 0.9608824253082275, "logits/rejected": 0.9497785568237305, "logps/chosen": -2.253288745880127, "logps/rejected": -3.267906665802002, "loss": 0.7111, "nll_loss": 0.6758512258529663, "rewards/accuracies": 1.0, "rewards/chosen": -0.22532887756824493, "rewards/margins": 0.10146176815032959, "rewards/rejected": -0.3267906606197357, "step": 6602 }, { "epoch": 18.078028747433265, "grad_norm": 5.357902526855469, "learning_rate": 9.58904109589041e-08, "log_odds_chosen": 2.188039779663086, "log_odds_ratio": -0.19653275609016418, "logits/chosen": 0.76788729429245, "logits/rejected": 0.8104562759399414, "logps/chosen": -2.559885263442993, "logps/rejected": -4.637145042419434, "loss": 0.6804, "nll_loss": 0.6607153415679932, "rewards/accuracies": 1.0, "rewards/chosen": -0.25598853826522827, "rewards/margins": 0.20772594213485718, "rewards/rejected": -0.46371448040008545, "step": 6603 }, { "epoch": 18.080766598220396, "grad_norm": 5.18862771987915, "learning_rate": 9.575342465753425e-08, "log_odds_chosen": 3.2105863094329834, "log_odds_ratio": -0.15455998480319977, "logits/chosen": 0.9303237199783325, "logits/rejected": 0.928233802318573, "logps/chosen": -1.581418752670288, "logps/rejected": -4.576488494873047, "loss": 0.5718, "nll_loss": 0.5563176870346069, "rewards/accuracies": 1.0, "rewards/chosen": -0.1581418812274933, "rewards/margins": 0.2995069921016693, "rewards/rejected": -0.4576488733291626, "step": 6604 }, { "epoch": 18.083504449007528, "grad_norm": 4.814683437347412, "learning_rate": 9.561643835616439e-08, "log_odds_chosen": 1.7195994853973389, "log_odds_ratio": -0.2640197277069092, "logits/chosen": 0.8844695091247559, "logits/rejected": 0.9522796869277954, "logps/chosen": -2.3942840099334717, "logps/rejected": -3.9994449615478516, "loss": 0.5785, "nll_loss": 0.5521023869514465, "rewards/accuracies": 0.875, "rewards/chosen": -0.23942840099334717, "rewards/margins": 0.16051612794399261, "rewards/rejected": -0.3999445140361786, "step": 6605 }, { "epoch": 18.086242299794662, "grad_norm": 5.09944486618042, "learning_rate": 9.547945205479452e-08, "log_odds_chosen": 1.365644931793213, "log_odds_ratio": -0.3004094362258911, "logits/chosen": 0.6879509687423706, "logits/rejected": 0.7008925676345825, "logps/chosen": -2.1804308891296387, "logps/rejected": -3.442885637283325, "loss": 0.7114, "nll_loss": 0.681404709815979, "rewards/accuracies": 1.0, "rewards/chosen": -0.21804307401180267, "rewards/margins": 0.12624549865722656, "rewards/rejected": -0.34428858757019043, "step": 6606 }, { "epoch": 18.088980150581794, "grad_norm": 5.723577499389648, "learning_rate": 9.534246575342465e-08, "log_odds_chosen": 1.77013099193573, "log_odds_ratio": -0.3154398798942566, "logits/chosen": 0.6340748071670532, "logits/rejected": 0.6594293117523193, "logps/chosen": -2.612722396850586, "logps/rejected": -4.294699668884277, "loss": 0.6651, "nll_loss": 0.6335251331329346, "rewards/accuracies": 0.875, "rewards/chosen": -0.26127225160598755, "rewards/margins": 0.16819773614406586, "rewards/rejected": -0.4294700026512146, "step": 6607 }, { "epoch": 18.091718001368925, "grad_norm": 4.809773921966553, "learning_rate": 9.520547945205479e-08, "log_odds_chosen": 2.4415500164031982, "log_odds_ratio": -0.21685752272605896, "logits/chosen": 0.8448514342308044, "logits/rejected": 0.8625484704971313, "logps/chosen": -1.4477477073669434, "logps/rejected": -3.6563329696655273, "loss": 0.6338, "nll_loss": 0.6120940446853638, "rewards/accuracies": 1.0, "rewards/chosen": -0.14477477967739105, "rewards/margins": 0.22085851430892944, "rewards/rejected": -0.3656333088874817, "step": 6608 }, { "epoch": 18.094455852156056, "grad_norm": 7.314305305480957, "learning_rate": 9.506849315068493e-08, "log_odds_chosen": 0.9475589990615845, "log_odds_ratio": -0.5553016662597656, "logits/chosen": 0.8748993873596191, "logits/rejected": 0.8366743326187134, "logps/chosen": -2.5770232677459717, "logps/rejected": -3.4072046279907227, "loss": 0.6966, "nll_loss": 0.6410775184631348, "rewards/accuracies": 0.75, "rewards/chosen": -0.2577023208141327, "rewards/margins": 0.08301813900470734, "rewards/rejected": -0.3407204747200012, "step": 6609 }, { "epoch": 18.09719370294319, "grad_norm": 6.481764793395996, "learning_rate": 9.493150684931505e-08, "log_odds_chosen": 1.105941653251648, "log_odds_ratio": -0.36954137682914734, "logits/chosen": 0.8252308964729309, "logits/rejected": 0.8392447233200073, "logps/chosen": -2.0589144229888916, "logps/rejected": -3.070681571960449, "loss": 0.6309, "nll_loss": 0.5939745306968689, "rewards/accuracies": 0.875, "rewards/chosen": -0.2058914303779602, "rewards/margins": 0.10117671638727188, "rewards/rejected": -0.3070681691169739, "step": 6610 }, { "epoch": 18.099931553730322, "grad_norm": 7.784788608551025, "learning_rate": 9.47945205479452e-08, "log_odds_chosen": 4.714771747589111, "log_odds_ratio": -0.11891654133796692, "logits/chosen": 1.072271704673767, "logits/rejected": 1.1172261238098145, "logps/chosen": -2.7410316467285156, "logps/rejected": -7.356847286224365, "loss": 0.6437, "nll_loss": 0.6318418979644775, "rewards/accuracies": 1.0, "rewards/chosen": -0.27410316467285156, "rewards/margins": 0.46158158779144287, "rewards/rejected": -0.7356847524642944, "step": 6611 }, { "epoch": 18.102669404517453, "grad_norm": 6.567275047302246, "learning_rate": 9.465753424657534e-08, "log_odds_chosen": 2.245483636856079, "log_odds_ratio": -0.4357646107673645, "logits/chosen": 0.9083189964294434, "logits/rejected": 1.0823111534118652, "logps/chosen": -2.455296039581299, "logps/rejected": -4.6151838302612305, "loss": 0.6646, "nll_loss": 0.6210623979568481, "rewards/accuracies": 0.625, "rewards/chosen": -0.24552960693836212, "rewards/margins": 0.21598875522613525, "rewards/rejected": -0.46151837706565857, "step": 6612 }, { "epoch": 18.105407255304584, "grad_norm": 5.456538200378418, "learning_rate": 9.452054794520547e-08, "log_odds_chosen": 1.3707499504089355, "log_odds_ratio": -0.3400781750679016, "logits/chosen": 0.6636376976966858, "logits/rejected": 0.7422150373458862, "logps/chosen": -2.4360029697418213, "logps/rejected": -3.6922249794006348, "loss": 0.6376, "nll_loss": 0.6035990715026855, "rewards/accuracies": 1.0, "rewards/chosen": -0.24360030889511108, "rewards/margins": 0.1256221979856491, "rewards/rejected": -0.3692225217819214, "step": 6613 }, { "epoch": 18.10814510609172, "grad_norm": 5.592423439025879, "learning_rate": 9.438356164383561e-08, "log_odds_chosen": 1.7928216457366943, "log_odds_ratio": -0.22717145085334778, "logits/chosen": 0.8452252745628357, "logits/rejected": 0.8860036134719849, "logps/chosen": -1.967548131942749, "logps/rejected": -3.633570432662964, "loss": 0.5576, "nll_loss": 0.534928023815155, "rewards/accuracies": 1.0, "rewards/chosen": -0.1967548131942749, "rewards/margins": 0.166602224111557, "rewards/rejected": -0.3633570671081543, "step": 6614 }, { "epoch": 18.11088295687885, "grad_norm": 5.278764724731445, "learning_rate": 9.424657534246575e-08, "log_odds_chosen": 2.0905067920684814, "log_odds_ratio": -0.3950560688972473, "logits/chosen": 0.8699646592140198, "logits/rejected": 0.8748486042022705, "logps/chosen": -2.4601831436157227, "logps/rejected": -4.4866414070129395, "loss": 0.6873, "nll_loss": 0.6477711796760559, "rewards/accuracies": 0.875, "rewards/chosen": -0.24601832032203674, "rewards/margins": 0.20264580845832825, "rewards/rejected": -0.4486641585826874, "step": 6615 }, { "epoch": 18.11362080766598, "grad_norm": 6.4889349937438965, "learning_rate": 9.410958904109589e-08, "log_odds_chosen": 3.7102041244506836, "log_odds_ratio": -0.15812347829341888, "logits/chosen": 0.9482528567314148, "logits/rejected": 1.0026609897613525, "logps/chosen": -1.7320780754089355, "logps/rejected": -5.2337541580200195, "loss": 0.6624, "nll_loss": 0.646537721157074, "rewards/accuracies": 0.875, "rewards/chosen": -0.1732078194618225, "rewards/margins": 0.3501676321029663, "rewards/rejected": -0.5233754515647888, "step": 6616 }, { "epoch": 18.116358658453116, "grad_norm": 11.312527656555176, "learning_rate": 9.397260273972603e-08, "log_odds_chosen": 1.073878288269043, "log_odds_ratio": -0.6899343132972717, "logits/chosen": 0.7337518930435181, "logits/rejected": 0.7142298221588135, "logps/chosen": -3.3242552280426025, "logps/rejected": -4.30551290512085, "loss": 0.7208, "nll_loss": 0.6518081426620483, "rewards/accuracies": 0.75, "rewards/chosen": -0.3324255049228668, "rewards/margins": 0.09812579303979874, "rewards/rejected": -0.43055129051208496, "step": 6617 }, { "epoch": 18.119096509240247, "grad_norm": 5.052282333374023, "learning_rate": 9.383561643835616e-08, "log_odds_chosen": 2.6008405685424805, "log_odds_ratio": -0.19901761412620544, "logits/chosen": 1.0345991849899292, "logits/rejected": 1.0381619930267334, "logps/chosen": -1.9989712238311768, "logps/rejected": -4.463505268096924, "loss": 0.6547, "nll_loss": 0.6347948908805847, "rewards/accuracies": 1.0, "rewards/chosen": -0.19989711046218872, "rewards/margins": 0.2464534193277359, "rewards/rejected": -0.4463505148887634, "step": 6618 }, { "epoch": 18.12183436002738, "grad_norm": 6.012728691101074, "learning_rate": 9.36986301369863e-08, "log_odds_chosen": 2.124976634979248, "log_odds_ratio": -0.26851218938827515, "logits/chosen": 0.9333148002624512, "logits/rejected": 0.9170876145362854, "logps/chosen": -1.9401631355285645, "logps/rejected": -3.864470958709717, "loss": 0.6674, "nll_loss": 0.6405289173126221, "rewards/accuracies": 0.875, "rewards/chosen": -0.19401630759239197, "rewards/margins": 0.1924307942390442, "rewards/rejected": -0.38644713163375854, "step": 6619 }, { "epoch": 18.12457221081451, "grad_norm": 5.118819236755371, "learning_rate": 9.356164383561643e-08, "log_odds_chosen": 1.5584099292755127, "log_odds_ratio": -0.2663826048374176, "logits/chosen": 0.7796626687049866, "logits/rejected": 0.8774387836456299, "logps/chosen": -2.260744094848633, "logps/rejected": -3.714653491973877, "loss": 0.6116, "nll_loss": 0.5849635601043701, "rewards/accuracies": 0.875, "rewards/chosen": -0.22607441246509552, "rewards/margins": 0.14539095759391785, "rewards/rejected": -0.37146538496017456, "step": 6620 }, { "epoch": 18.127310061601644, "grad_norm": 6.1150803565979, "learning_rate": 9.342465753424657e-08, "log_odds_chosen": 1.705223798751831, "log_odds_ratio": -0.39248254895210266, "logits/chosen": 0.7818974852561951, "logits/rejected": 0.8480486869812012, "logps/chosen": -2.2265665531158447, "logps/rejected": -3.8446755409240723, "loss": 0.5959, "nll_loss": 0.5566655397415161, "rewards/accuracies": 0.875, "rewards/chosen": -0.22265666723251343, "rewards/margins": 0.16181090474128723, "rewards/rejected": -0.38446754217147827, "step": 6621 }, { "epoch": 18.130047912388775, "grad_norm": 4.800825119018555, "learning_rate": 9.32876712328767e-08, "log_odds_chosen": 2.536085367202759, "log_odds_ratio": -0.18863916397094727, "logits/chosen": 0.7850672602653503, "logits/rejected": 0.8155706524848938, "logps/chosen": -2.244241237640381, "logps/rejected": -4.643567085266113, "loss": 0.5762, "nll_loss": 0.557330846786499, "rewards/accuracies": 1.0, "rewards/chosen": -0.22442413866519928, "rewards/margins": 0.2399325668811798, "rewards/rejected": -0.4643567204475403, "step": 6622 }, { "epoch": 18.132785763175907, "grad_norm": 4.871430397033691, "learning_rate": 9.315068493150684e-08, "log_odds_chosen": 4.121359825134277, "log_odds_ratio": -0.09567655622959137, "logits/chosen": 0.9238436222076416, "logits/rejected": 0.9132462739944458, "logps/chosen": -2.3568978309631348, "logps/rejected": -6.337852478027344, "loss": 0.6109, "nll_loss": 0.6013443470001221, "rewards/accuracies": 1.0, "rewards/chosen": -0.23568978905677795, "rewards/margins": 0.3980954885482788, "rewards/rejected": -0.6337852478027344, "step": 6623 }, { "epoch": 18.135523613963038, "grad_norm": 6.174273490905762, "learning_rate": 9.301369863013698e-08, "log_odds_chosen": 1.9750374555587769, "log_odds_ratio": -0.3126523494720459, "logits/chosen": 0.8094037771224976, "logits/rejected": 0.8670626878738403, "logps/chosen": -2.1833887100219727, "logps/rejected": -4.032742023468018, "loss": 0.6833, "nll_loss": 0.6520456075668335, "rewards/accuracies": 0.875, "rewards/chosen": -0.21833887696266174, "rewards/margins": 0.18493534624576569, "rewards/rejected": -0.40327417850494385, "step": 6624 }, { "epoch": 18.138261464750173, "grad_norm": 5.352768421173096, "learning_rate": 9.287671232876712e-08, "log_odds_chosen": 3.6433863639831543, "log_odds_ratio": -0.25116002559661865, "logits/chosen": 0.9356555938720703, "logits/rejected": 0.9368833899497986, "logps/chosen": -2.0172228813171387, "logps/rejected": -5.525332450866699, "loss": 0.8631, "nll_loss": 0.8379784822463989, "rewards/accuracies": 0.875, "rewards/chosen": -0.20172226428985596, "rewards/margins": 0.3508110046386719, "rewards/rejected": -0.5525332689285278, "step": 6625 }, { "epoch": 18.140999315537304, "grad_norm": 5.428705215454102, "learning_rate": 9.273972602739726e-08, "log_odds_chosen": 3.469862222671509, "log_odds_ratio": -0.07031995058059692, "logits/chosen": 0.8568271398544312, "logits/rejected": 0.8818683624267578, "logps/chosen": -2.256476879119873, "logps/rejected": -5.5624260902404785, "loss": 0.5957, "nll_loss": 0.5887040495872498, "rewards/accuracies": 1.0, "rewards/chosen": -0.2256477028131485, "rewards/margins": 0.330594927072525, "rewards/rejected": -0.5562426447868347, "step": 6626 }, { "epoch": 18.143737166324435, "grad_norm": 7.420658111572266, "learning_rate": 9.260273972602739e-08, "log_odds_chosen": 2.7109460830688477, "log_odds_ratio": -0.3008553981781006, "logits/chosen": 1.0174659490585327, "logits/rejected": 1.0413674116134644, "logps/chosen": -2.2709527015686035, "logps/rejected": -4.889989376068115, "loss": 0.6733, "nll_loss": 0.6431822180747986, "rewards/accuracies": 0.75, "rewards/chosen": -0.22709526121616364, "rewards/margins": 0.26190370321273804, "rewards/rejected": -0.4889989495277405, "step": 6627 }, { "epoch": 18.146475017111566, "grad_norm": 6.593693733215332, "learning_rate": 9.246575342465753e-08, "log_odds_chosen": 2.1670448780059814, "log_odds_ratio": -0.4833972454071045, "logits/chosen": 0.8257576823234558, "logits/rejected": 0.8484724760055542, "logps/chosen": -2.393742799758911, "logps/rejected": -4.481901168823242, "loss": 0.6804, "nll_loss": 0.6320242881774902, "rewards/accuracies": 0.75, "rewards/chosen": -0.23937425017356873, "rewards/margins": 0.20881584286689758, "rewards/rejected": -0.4481900930404663, "step": 6628 }, { "epoch": 18.1492128678987, "grad_norm": 6.354905605316162, "learning_rate": 9.232876712328768e-08, "log_odds_chosen": 1.3162156343460083, "log_odds_ratio": -0.2741076946258545, "logits/chosen": 1.0076580047607422, "logits/rejected": 1.0026873350143433, "logps/chosen": -2.400463342666626, "logps/rejected": -3.6255006790161133, "loss": 0.6996, "nll_loss": 0.6722144484519958, "rewards/accuracies": 1.0, "rewards/chosen": -0.24004635214805603, "rewards/margins": 0.12250371277332306, "rewards/rejected": -0.3625500500202179, "step": 6629 }, { "epoch": 18.151950718685832, "grad_norm": 6.673601150512695, "learning_rate": 9.21917808219178e-08, "log_odds_chosen": 2.241316318511963, "log_odds_ratio": -0.31200388073921204, "logits/chosen": 1.064974069595337, "logits/rejected": 1.1291542053222656, "logps/chosen": -2.123400926589966, "logps/rejected": -4.254674911499023, "loss": 0.6048, "nll_loss": 0.5735576152801514, "rewards/accuracies": 0.875, "rewards/chosen": -0.2123400866985321, "rewards/margins": 0.21312737464904785, "rewards/rejected": -0.42546743154525757, "step": 6630 }, { "epoch": 18.154688569472963, "grad_norm": 5.771238327026367, "learning_rate": 9.205479452054794e-08, "log_odds_chosen": 1.4838993549346924, "log_odds_ratio": -0.4205932319164276, "logits/chosen": 0.8093898892402649, "logits/rejected": 0.772240161895752, "logps/chosen": -2.028064250946045, "logps/rejected": -3.3537209033966064, "loss": 0.803, "nll_loss": 0.7609091997146606, "rewards/accuracies": 0.75, "rewards/chosen": -0.20280641317367554, "rewards/margins": 0.1325656622648239, "rewards/rejected": -0.33537209033966064, "step": 6631 }, { "epoch": 18.157426420260094, "grad_norm": 7.668666362762451, "learning_rate": 9.191780821917808e-08, "log_odds_chosen": 1.386527180671692, "log_odds_ratio": -0.7363523244857788, "logits/chosen": 0.7367873191833496, "logits/rejected": 0.740943968296051, "logps/chosen": -2.722649574279785, "logps/rejected": -3.996577739715576, "loss": 0.6808, "nll_loss": 0.6071755290031433, "rewards/accuracies": 0.75, "rewards/chosen": -0.27226492762565613, "rewards/margins": 0.12739281356334686, "rewards/rejected": -0.3996577858924866, "step": 6632 }, { "epoch": 18.16016427104723, "grad_norm": 6.04561710357666, "learning_rate": 9.178082191780821e-08, "log_odds_chosen": 2.4580395221710205, "log_odds_ratio": -0.2717457115650177, "logits/chosen": 0.8297660946846008, "logits/rejected": 0.8664394617080688, "logps/chosen": -2.720653772354126, "logps/rejected": -5.107755184173584, "loss": 0.7379, "nll_loss": 0.7106922268867493, "rewards/accuracies": 1.0, "rewards/chosen": -0.2720653712749481, "rewards/margins": 0.2387101650238037, "rewards/rejected": -0.5107755064964294, "step": 6633 }, { "epoch": 18.16290212183436, "grad_norm": 6.219044208526611, "learning_rate": 9.164383561643835e-08, "log_odds_chosen": 0.8282184600830078, "log_odds_ratio": -0.42713868618011475, "logits/chosen": 1.0308781862258911, "logits/rejected": 0.989064633846283, "logps/chosen": -1.5666983127593994, "logps/rejected": -2.256845474243164, "loss": 0.5023, "nll_loss": 0.4596048593521118, "rewards/accuracies": 0.875, "rewards/chosen": -0.15666982531547546, "rewards/margins": 0.06901474297046661, "rewards/rejected": -0.22568458318710327, "step": 6634 }, { "epoch": 18.16563997262149, "grad_norm": 5.510679244995117, "learning_rate": 9.150684931506848e-08, "log_odds_chosen": 1.6010313034057617, "log_odds_ratio": -0.23802198469638824, "logits/chosen": 0.9221887588500977, "logits/rejected": 0.8123430013656616, "logps/chosen": -2.117058038711548, "logps/rejected": -3.5792646408081055, "loss": 0.6469, "nll_loss": 0.6230641007423401, "rewards/accuracies": 1.0, "rewards/chosen": -0.21170580387115479, "rewards/margins": 0.14622066915035248, "rewards/rejected": -0.35792648792266846, "step": 6635 }, { "epoch": 18.168377823408623, "grad_norm": 6.814474105834961, "learning_rate": 9.136986301369864e-08, "log_odds_chosen": 1.429081678390503, "log_odds_ratio": -0.4542233347892761, "logits/chosen": 0.6988440155982971, "logits/rejected": 0.6757768392562866, "logps/chosen": -1.7356404066085815, "logps/rejected": -2.974318504333496, "loss": 0.6929, "nll_loss": 0.6474761366844177, "rewards/accuracies": 0.75, "rewards/chosen": -0.17356404662132263, "rewards/margins": 0.12386782467365265, "rewards/rejected": -0.2974318563938141, "step": 6636 }, { "epoch": 18.171115674195757, "grad_norm": 6.157980442047119, "learning_rate": 9.123287671232876e-08, "log_odds_chosen": 2.3773388862609863, "log_odds_ratio": -0.23666396737098694, "logits/chosen": 1.1020317077636719, "logits/rejected": 1.1646980047225952, "logps/chosen": -2.329174518585205, "logps/rejected": -4.605829238891602, "loss": 0.6361, "nll_loss": 0.6124187111854553, "rewards/accuracies": 1.0, "rewards/chosen": -0.23291745781898499, "rewards/margins": 0.2276654690504074, "rewards/rejected": -0.4605829417705536, "step": 6637 }, { "epoch": 18.17385352498289, "grad_norm": 5.353005409240723, "learning_rate": 9.10958904109589e-08, "log_odds_chosen": 2.5183708667755127, "log_odds_ratio": -0.16675947606563568, "logits/chosen": 0.48959070444107056, "logits/rejected": 0.48618918657302856, "logps/chosen": -2.006291151046753, "logps/rejected": -4.3608856201171875, "loss": 0.597, "nll_loss": 0.580289363861084, "rewards/accuracies": 1.0, "rewards/chosen": -0.2006291151046753, "rewards/margins": 0.23545941710472107, "rewards/rejected": -0.43608853220939636, "step": 6638 }, { "epoch": 18.17659137577002, "grad_norm": 6.418349742889404, "learning_rate": 9.095890410958904e-08, "log_odds_chosen": 1.686537742614746, "log_odds_ratio": -0.3121548593044281, "logits/chosen": 0.9256234169006348, "logits/rejected": 0.9007212519645691, "logps/chosen": -2.2782633304595947, "logps/rejected": -3.8409695625305176, "loss": 0.6601, "nll_loss": 0.6288706064224243, "rewards/accuracies": 1.0, "rewards/chosen": -0.2278263419866562, "rewards/margins": 0.15627062320709229, "rewards/rejected": -0.38409698009490967, "step": 6639 }, { "epoch": 18.17932922655715, "grad_norm": 4.5395379066467285, "learning_rate": 9.082191780821917e-08, "log_odds_chosen": 2.4108221530914307, "log_odds_ratio": -0.19987612962722778, "logits/chosen": 0.9323037266731262, "logits/rejected": 0.9652416110038757, "logps/chosen": -1.690314769744873, "logps/rejected": -3.925197124481201, "loss": 0.5146, "nll_loss": 0.49461841583251953, "rewards/accuracies": 1.0, "rewards/chosen": -0.16903147101402283, "rewards/margins": 0.22348825633525848, "rewards/rejected": -0.3925197422504425, "step": 6640 }, { "epoch": 18.182067077344286, "grad_norm": 5.181662559509277, "learning_rate": 9.06849315068493e-08, "log_odds_chosen": 1.8503673076629639, "log_odds_ratio": -0.17226900160312653, "logits/chosen": 0.6806893348693848, "logits/rejected": 0.7274082899093628, "logps/chosen": -2.3820958137512207, "logps/rejected": -4.0894269943237305, "loss": 0.5362, "nll_loss": 0.5189438462257385, "rewards/accuracies": 1.0, "rewards/chosen": -0.2382095754146576, "rewards/margins": 0.17073313891887665, "rewards/rejected": -0.40894269943237305, "step": 6641 }, { "epoch": 18.184804928131417, "grad_norm": 5.195359230041504, "learning_rate": 9.054794520547946e-08, "log_odds_chosen": 1.5724725723266602, "log_odds_ratio": -0.2666054368019104, "logits/chosen": 0.824892520904541, "logits/rejected": 0.734264612197876, "logps/chosen": -1.5160760879516602, "logps/rejected": -2.8910043239593506, "loss": 0.5677, "nll_loss": 0.5410076975822449, "rewards/accuracies": 1.0, "rewards/chosen": -0.15160761773586273, "rewards/margins": 0.1374928206205368, "rewards/rejected": -0.28910043835639954, "step": 6642 }, { "epoch": 18.187542778918548, "grad_norm": 8.312701225280762, "learning_rate": 9.041095890410958e-08, "log_odds_chosen": 0.7274771928787231, "log_odds_ratio": -0.46446073055267334, "logits/chosen": 1.0337116718292236, "logits/rejected": 1.015622854232788, "logps/chosen": -1.9924407005310059, "logps/rejected": -2.6282341480255127, "loss": 0.6622, "nll_loss": 0.6157271862030029, "rewards/accuracies": 0.625, "rewards/chosen": -0.19924408197402954, "rewards/margins": 0.06357933580875397, "rewards/rejected": -0.2628234028816223, "step": 6643 }, { "epoch": 18.190280629705683, "grad_norm": 5.426815032958984, "learning_rate": 9.027397260273972e-08, "log_odds_chosen": 3.6104345321655273, "log_odds_ratio": -0.09041889011859894, "logits/chosen": 0.9899447560310364, "logits/rejected": 1.005049467086792, "logps/chosen": -1.8656421899795532, "logps/rejected": -5.245279312133789, "loss": 0.6197, "nll_loss": 0.610676109790802, "rewards/accuracies": 1.0, "rewards/chosen": -0.18656420707702637, "rewards/margins": 0.33796370029449463, "rewards/rejected": -0.524527907371521, "step": 6644 }, { "epoch": 18.193018480492814, "grad_norm": 6.877242565155029, "learning_rate": 9.013698630136986e-08, "log_odds_chosen": 2.3497323989868164, "log_odds_ratio": -0.47688958048820496, "logits/chosen": 0.8035902380943298, "logits/rejected": 0.8080020546913147, "logps/chosen": -2.8770365715026855, "logps/rejected": -5.070353031158447, "loss": 0.6441, "nll_loss": 0.5963952541351318, "rewards/accuracies": 0.75, "rewards/chosen": -0.28770366311073303, "rewards/margins": 0.21933163702487946, "rewards/rejected": -0.5070353150367737, "step": 6645 }, { "epoch": 18.195756331279945, "grad_norm": 5.632380485534668, "learning_rate": 9e-08, "log_odds_chosen": 2.4813895225524902, "log_odds_ratio": -0.21218986809253693, "logits/chosen": 0.92668217420578, "logits/rejected": 0.9451946020126343, "logps/chosen": -1.864530324935913, "logps/rejected": -4.185912132263184, "loss": 0.6757, "nll_loss": 0.6544659733772278, "rewards/accuracies": 1.0, "rewards/chosen": -0.18645304441452026, "rewards/margins": 0.23213820159435272, "rewards/rejected": -0.4185912311077118, "step": 6646 }, { "epoch": 18.198494182067076, "grad_norm": 5.116313457489014, "learning_rate": 8.986301369863012e-08, "log_odds_chosen": 1.5460031032562256, "log_odds_ratio": -0.43658292293548584, "logits/chosen": 0.677828311920166, "logits/rejected": 0.7194200158119202, "logps/chosen": -2.1609086990356445, "logps/rejected": -3.639784812927246, "loss": 0.6069, "nll_loss": 0.5632280111312866, "rewards/accuracies": 0.75, "rewards/chosen": -0.21609088778495789, "rewards/margins": 0.14788761734962463, "rewards/rejected": -0.3639785051345825, "step": 6647 }, { "epoch": 18.20123203285421, "grad_norm": 5.969721794128418, "learning_rate": 8.972602739726028e-08, "log_odds_chosen": 1.7731733322143555, "log_odds_ratio": -0.3130456209182739, "logits/chosen": 0.8693338632583618, "logits/rejected": 0.9903890490531921, "logps/chosen": -2.7998108863830566, "logps/rejected": -4.5048370361328125, "loss": 0.6717, "nll_loss": 0.6403741836547852, "rewards/accuracies": 0.875, "rewards/chosen": -0.2799810767173767, "rewards/margins": 0.1705026775598526, "rewards/rejected": -0.4504837691783905, "step": 6648 }, { "epoch": 18.203969883641342, "grad_norm": 9.090422630310059, "learning_rate": 8.958904109589042e-08, "log_odds_chosen": 4.391719818115234, "log_odds_ratio": -0.2108113020658493, "logits/chosen": 1.034778118133545, "logits/rejected": 1.0788956880569458, "logps/chosen": -2.9518349170684814, "logps/rejected": -7.250744819641113, "loss": 0.8309, "nll_loss": 0.809848427772522, "rewards/accuracies": 0.875, "rewards/chosen": -0.2951834797859192, "rewards/margins": 0.42989102005958557, "rewards/rejected": -0.7250745296478271, "step": 6649 }, { "epoch": 18.206707734428473, "grad_norm": 5.54019832611084, "learning_rate": 8.945205479452054e-08, "log_odds_chosen": 1.885392665863037, "log_odds_ratio": -0.18440976738929749, "logits/chosen": 0.6329531669616699, "logits/rejected": 0.6273487210273743, "logps/chosen": -2.2860727310180664, "logps/rejected": -4.044994831085205, "loss": 0.6672, "nll_loss": 0.6487706899642944, "rewards/accuracies": 1.0, "rewards/chosen": -0.22860726714134216, "rewards/margins": 0.17589221894741058, "rewards/rejected": -0.40449950098991394, "step": 6650 }, { "epoch": 18.209445585215605, "grad_norm": 5.023853302001953, "learning_rate": 8.931506849315068e-08, "log_odds_chosen": 2.72821307182312, "log_odds_ratio": -0.30455663800239563, "logits/chosen": 0.696157693862915, "logits/rejected": 0.7678021192550659, "logps/chosen": -2.229264736175537, "logps/rejected": -4.867815017700195, "loss": 0.5946, "nll_loss": 0.564185380935669, "rewards/accuracies": 0.75, "rewards/chosen": -0.22292646765708923, "rewards/margins": 0.2638550102710724, "rewards/rejected": -0.4867814779281616, "step": 6651 }, { "epoch": 18.21218343600274, "grad_norm": 4.715953826904297, "learning_rate": 8.917808219178082e-08, "log_odds_chosen": 2.413362503051758, "log_odds_ratio": -0.18190878629684448, "logits/chosen": 0.8364582061767578, "logits/rejected": 0.8713587522506714, "logps/chosen": -2.1265006065368652, "logps/rejected": -4.333620548248291, "loss": 0.6152, "nll_loss": 0.596973717212677, "rewards/accuracies": 1.0, "rewards/chosen": -0.21265006065368652, "rewards/margins": 0.22071197628974915, "rewards/rejected": -0.43336203694343567, "step": 6652 }, { "epoch": 18.21492128678987, "grad_norm": 5.212221622467041, "learning_rate": 8.904109589041094e-08, "log_odds_chosen": 3.1465213298797607, "log_odds_ratio": -0.15998844802379608, "logits/chosen": 0.6833140254020691, "logits/rejected": 0.7409298419952393, "logps/chosen": -2.1673030853271484, "logps/rejected": -5.137459754943848, "loss": 0.6666, "nll_loss": 0.6505608558654785, "rewards/accuracies": 1.0, "rewards/chosen": -0.21673032641410828, "rewards/margins": 0.2970156669616699, "rewards/rejected": -0.5137460231781006, "step": 6653 }, { "epoch": 18.217659137577, "grad_norm": 6.897350788116455, "learning_rate": 8.89041095890411e-08, "log_odds_chosen": 2.3835582733154297, "log_odds_ratio": -0.11629313230514526, "logits/chosen": 0.8942856788635254, "logits/rejected": 0.9742757081985474, "logps/chosen": -2.671755790710449, "logps/rejected": -4.970294952392578, "loss": 0.7582, "nll_loss": 0.7465459108352661, "rewards/accuracies": 1.0, "rewards/chosen": -0.2671755850315094, "rewards/margins": 0.22985391318798065, "rewards/rejected": -0.49702954292297363, "step": 6654 }, { "epoch": 18.220396988364133, "grad_norm": 6.423734188079834, "learning_rate": 8.876712328767123e-08, "log_odds_chosen": 1.6555976867675781, "log_odds_ratio": -0.45466262102127075, "logits/chosen": 1.0076510906219482, "logits/rejected": 0.9825196862220764, "logps/chosen": -2.8454580307006836, "logps/rejected": -4.445821762084961, "loss": 0.7878, "nll_loss": 0.7423632144927979, "rewards/accuracies": 0.75, "rewards/chosen": -0.28454577922821045, "rewards/margins": 0.16003639996051788, "rewards/rejected": -0.4445822238922119, "step": 6655 }, { "epoch": 18.223134839151268, "grad_norm": 8.419344902038574, "learning_rate": 8.863013698630137e-08, "log_odds_chosen": 1.4239097833633423, "log_odds_ratio": -0.7366058230400085, "logits/chosen": 0.7365385890007019, "logits/rejected": 0.6852574348449707, "logps/chosen": -2.7866973876953125, "logps/rejected": -4.136098861694336, "loss": 0.6981, "nll_loss": 0.6244552135467529, "rewards/accuracies": 0.75, "rewards/chosen": -0.27866971492767334, "rewards/margins": 0.13494019210338593, "rewards/rejected": -0.41360992193222046, "step": 6656 }, { "epoch": 18.2258726899384, "grad_norm": 6.145329475402832, "learning_rate": 8.84931506849315e-08, "log_odds_chosen": 1.6354925632476807, "log_odds_ratio": -0.32099902629852295, "logits/chosen": 0.968532383441925, "logits/rejected": 1.0315310955047607, "logps/chosen": -2.148672580718994, "logps/rejected": -3.709414482116699, "loss": 0.7099, "nll_loss": 0.6778039932250977, "rewards/accuracies": 0.875, "rewards/chosen": -0.2148672640323639, "rewards/margins": 0.1560741811990738, "rewards/rejected": -0.3709414601325989, "step": 6657 }, { "epoch": 18.22861054072553, "grad_norm": 4.4677886962890625, "learning_rate": 8.835616438356164e-08, "log_odds_chosen": 3.3880910873413086, "log_odds_ratio": -0.12822693586349487, "logits/chosen": 0.9885337352752686, "logits/rejected": 1.0622479915618896, "logps/chosen": -2.1060614585876465, "logps/rejected": -5.337584018707275, "loss": 0.5711, "nll_loss": 0.5583095550537109, "rewards/accuracies": 1.0, "rewards/chosen": -0.2106061577796936, "rewards/margins": 0.3231523036956787, "rewards/rejected": -0.5337584614753723, "step": 6658 }, { "epoch": 18.23134839151266, "grad_norm": 5.153461933135986, "learning_rate": 8.821917808219178e-08, "log_odds_chosen": 1.1590144634246826, "log_odds_ratio": -0.31078410148620605, "logits/chosen": 0.8595237731933594, "logits/rejected": 0.8642297983169556, "logps/chosen": -2.083265542984009, "logps/rejected": -3.085228204727173, "loss": 0.6183, "nll_loss": 0.5872015953063965, "rewards/accuracies": 0.875, "rewards/chosen": -0.2083265632390976, "rewards/margins": 0.10019627213478088, "rewards/rejected": -0.3085228204727173, "step": 6659 }, { "epoch": 18.234086242299796, "grad_norm": 5.138965129852295, "learning_rate": 8.80821917808219e-08, "log_odds_chosen": 3.5893163681030273, "log_odds_ratio": -0.07975748926401138, "logits/chosen": 0.9290566444396973, "logits/rejected": 0.9402691721916199, "logps/chosen": -2.2598772048950195, "logps/rejected": -5.713594436645508, "loss": 0.5786, "nll_loss": 0.5706038475036621, "rewards/accuracies": 1.0, "rewards/chosen": -0.22598771750926971, "rewards/margins": 0.34537172317504883, "rewards/rejected": -0.5713594555854797, "step": 6660 }, { "epoch": 18.236824093086927, "grad_norm": 6.085325241088867, "learning_rate": 8.794520547945205e-08, "log_odds_chosen": 2.788679838180542, "log_odds_ratio": -0.17024663090705872, "logits/chosen": 0.991235613822937, "logits/rejected": 1.0051697492599487, "logps/chosen": -2.584582805633545, "logps/rejected": -5.292118072509766, "loss": 0.8093, "nll_loss": 0.7922399044036865, "rewards/accuracies": 1.0, "rewards/chosen": -0.25845828652381897, "rewards/margins": 0.27075350284576416, "rewards/rejected": -0.5292117595672607, "step": 6661 }, { "epoch": 18.239561943874058, "grad_norm": 7.141674995422363, "learning_rate": 8.78082191780822e-08, "log_odds_chosen": 2.197160243988037, "log_odds_ratio": -0.35972195863723755, "logits/chosen": 0.6628254652023315, "logits/rejected": 0.7017903923988342, "logps/chosen": -1.9433872699737549, "logps/rejected": -3.9457132816314697, "loss": 0.7404, "nll_loss": 0.7044373750686646, "rewards/accuracies": 0.75, "rewards/chosen": -0.19433873891830444, "rewards/margins": 0.200232595205307, "rewards/rejected": -0.39457133412361145, "step": 6662 }, { "epoch": 18.24229979466119, "grad_norm": 5.916985034942627, "learning_rate": 8.767123287671232e-08, "log_odds_chosen": 3.484375, "log_odds_ratio": -0.09344571828842163, "logits/chosen": 0.8182268142700195, "logits/rejected": 1.005051612854004, "logps/chosen": -1.7933385372161865, "logps/rejected": -5.101844310760498, "loss": 0.8761, "nll_loss": 0.8667455911636353, "rewards/accuracies": 1.0, "rewards/chosen": -0.1793338656425476, "rewards/margins": 0.33085066080093384, "rewards/rejected": -0.5101845264434814, "step": 6663 }, { "epoch": 18.245037645448324, "grad_norm": 7.804462909698486, "learning_rate": 8.753424657534246e-08, "log_odds_chosen": 2.1437745094299316, "log_odds_ratio": -0.2779476046562195, "logits/chosen": 0.7359583973884583, "logits/rejected": 0.7630784511566162, "logps/chosen": -2.2207753658294678, "logps/rejected": -4.243353843688965, "loss": 0.7706, "nll_loss": 0.7427636384963989, "rewards/accuracies": 1.0, "rewards/chosen": -0.22207754850387573, "rewards/margins": 0.20225785672664642, "rewards/rejected": -0.42433542013168335, "step": 6664 }, { "epoch": 18.247775496235455, "grad_norm": 4.828878402709961, "learning_rate": 8.73972602739726e-08, "log_odds_chosen": 2.2056896686553955, "log_odds_ratio": -0.19740204513072968, "logits/chosen": 0.8355239033699036, "logits/rejected": 0.9288315773010254, "logps/chosen": -1.9641141891479492, "logps/rejected": -4.030770301818848, "loss": 0.5326, "nll_loss": 0.5128468871116638, "rewards/accuracies": 1.0, "rewards/chosen": -0.19641143083572388, "rewards/margins": 0.20666560530662537, "rewards/rejected": -0.40307703614234924, "step": 6665 }, { "epoch": 18.250513347022586, "grad_norm": 5.606545925140381, "learning_rate": 8.726027397260274e-08, "log_odds_chosen": 1.9620847702026367, "log_odds_ratio": -0.23209354281425476, "logits/chosen": 0.6249313354492188, "logits/rejected": 0.654279351234436, "logps/chosen": -2.0679538249969482, "logps/rejected": -3.936182737350464, "loss": 0.5766, "nll_loss": 0.553364634513855, "rewards/accuracies": 0.875, "rewards/chosen": -0.20679539442062378, "rewards/margins": 0.18682289123535156, "rewards/rejected": -0.39361828565597534, "step": 6666 }, { "epoch": 18.253251197809718, "grad_norm": 5.218390941619873, "learning_rate": 8.712328767123287e-08, "log_odds_chosen": 1.6592974662780762, "log_odds_ratio": -0.22929705679416656, "logits/chosen": 0.7100405693054199, "logits/rejected": 0.7372654676437378, "logps/chosen": -1.7604715824127197, "logps/rejected": -3.2634081840515137, "loss": 0.5081, "nll_loss": 0.48512983322143555, "rewards/accuracies": 1.0, "rewards/chosen": -0.17604714632034302, "rewards/margins": 0.15029364824295044, "rewards/rejected": -0.32634082436561584, "step": 6667 }, { "epoch": 18.255989048596852, "grad_norm": 6.698179244995117, "learning_rate": 8.698630136986301e-08, "log_odds_chosen": 3.118502378463745, "log_odds_ratio": -0.14530135691165924, "logits/chosen": 0.7566086649894714, "logits/rejected": 0.796823263168335, "logps/chosen": -2.767016649246216, "logps/rejected": -5.706782817840576, "loss": 0.7324, "nll_loss": 0.7178998589515686, "rewards/accuracies": 1.0, "rewards/chosen": -0.2767016589641571, "rewards/margins": 0.29397663474082947, "rewards/rejected": -0.5706782937049866, "step": 6668 }, { "epoch": 18.258726899383984, "grad_norm": 5.678215980529785, "learning_rate": 8.684931506849315e-08, "log_odds_chosen": 2.9285264015197754, "log_odds_ratio": -0.39447104930877686, "logits/chosen": 0.9159888029098511, "logits/rejected": 0.9319755434989929, "logps/chosen": -2.217891216278076, "logps/rejected": -4.994375228881836, "loss": 0.6129, "nll_loss": 0.5734054446220398, "rewards/accuracies": 0.875, "rewards/chosen": -0.22178910672664642, "rewards/margins": 0.2776484489440918, "rewards/rejected": -0.4994375705718994, "step": 6669 }, { "epoch": 18.261464750171115, "grad_norm": 5.7054853439331055, "learning_rate": 8.671232876712328e-08, "log_odds_chosen": 1.201995611190796, "log_odds_ratio": -0.2944084405899048, "logits/chosen": 0.7853903770446777, "logits/rejected": 0.6939921975135803, "logps/chosen": -1.7906510829925537, "logps/rejected": -2.8366308212280273, "loss": 0.5789, "nll_loss": 0.549470067024231, "rewards/accuracies": 0.875, "rewards/chosen": -0.17906510829925537, "rewards/margins": 0.10459796339273453, "rewards/rejected": -0.2836630642414093, "step": 6670 }, { "epoch": 18.26420260095825, "grad_norm": 4.8549113273620605, "learning_rate": 8.657534246575342e-08, "log_odds_chosen": 1.4723248481750488, "log_odds_ratio": -0.3371427655220032, "logits/chosen": 1.0767488479614258, "logits/rejected": 1.0415239334106445, "logps/chosen": -1.6319116353988647, "logps/rejected": -2.982377052307129, "loss": 0.5379, "nll_loss": 0.5041366815567017, "rewards/accuracies": 0.875, "rewards/chosen": -0.16319116950035095, "rewards/margins": 0.13504652678966522, "rewards/rejected": -0.29823771119117737, "step": 6671 }, { "epoch": 18.26694045174538, "grad_norm": 5.608820915222168, "learning_rate": 8.643835616438356e-08, "log_odds_chosen": 2.7148399353027344, "log_odds_ratio": -0.17615048587322235, "logits/chosen": 0.7704626321792603, "logits/rejected": 0.7311692237854004, "logps/chosen": -2.638352870941162, "logps/rejected": -5.268957138061523, "loss": 0.779, "nll_loss": 0.7614043951034546, "rewards/accuracies": 1.0, "rewards/chosen": -0.2638353109359741, "rewards/margins": 0.26306039094924927, "rewards/rejected": -0.5268957018852234, "step": 6672 }, { "epoch": 18.269678302532512, "grad_norm": 5.127963542938232, "learning_rate": 8.630136986301371e-08, "log_odds_chosen": 2.4699182510375977, "log_odds_ratio": -0.1656261831521988, "logits/chosen": 1.0430989265441895, "logits/rejected": 1.0741368532180786, "logps/chosen": -2.065885066986084, "logps/rejected": -4.422770977020264, "loss": 0.5935, "nll_loss": 0.5769704580307007, "rewards/accuracies": 1.0, "rewards/chosen": -0.2065885066986084, "rewards/margins": 0.23568859696388245, "rewards/rejected": -0.44227710366249084, "step": 6673 }, { "epoch": 18.272416153319643, "grad_norm": 5.339622974395752, "learning_rate": 8.616438356164383e-08, "log_odds_chosen": 1.9461205005645752, "log_odds_ratio": -0.21140769124031067, "logits/chosen": 0.9345192313194275, "logits/rejected": 1.0105934143066406, "logps/chosen": -2.487001419067383, "logps/rejected": -4.305505752563477, "loss": 0.6229, "nll_loss": 0.6017946600914001, "rewards/accuracies": 1.0, "rewards/chosen": -0.24870014190673828, "rewards/margins": 0.18185041844844818, "rewards/rejected": -0.43055057525634766, "step": 6674 }, { "epoch": 18.275154004106778, "grad_norm": 6.073760509490967, "learning_rate": 8.602739726027397e-08, "log_odds_chosen": 2.318798303604126, "log_odds_ratio": -0.18649068474769592, "logits/chosen": 0.9439297318458557, "logits/rejected": 1.026069164276123, "logps/chosen": -2.256918430328369, "logps/rejected": -4.463081359863281, "loss": 0.6702, "nll_loss": 0.6515611410140991, "rewards/accuracies": 1.0, "rewards/chosen": -0.22569184005260468, "rewards/margins": 0.22061626613140106, "rewards/rejected": -0.44630807638168335, "step": 6675 }, { "epoch": 18.27789185489391, "grad_norm": 6.129307270050049, "learning_rate": 8.589041095890411e-08, "log_odds_chosen": 2.8337535858154297, "log_odds_ratio": -0.2862069010734558, "logits/chosen": 0.7150188684463501, "logits/rejected": 0.8137274384498596, "logps/chosen": -2.4791269302368164, "logps/rejected": -5.1897406578063965, "loss": 0.7311, "nll_loss": 0.7024792432785034, "rewards/accuracies": 0.875, "rewards/chosen": -0.2479127049446106, "rewards/margins": 0.27106136083602905, "rewards/rejected": -0.5189740657806396, "step": 6676 }, { "epoch": 18.28062970568104, "grad_norm": 4.8163042068481445, "learning_rate": 8.575342465753424e-08, "log_odds_chosen": 2.374207019805908, "log_odds_ratio": -0.26364865899086, "logits/chosen": 0.8667784929275513, "logits/rejected": 0.8310964703559875, "logps/chosen": -1.7461600303649902, "logps/rejected": -3.9847967624664307, "loss": 0.631, "nll_loss": 0.6046765446662903, "rewards/accuracies": 0.875, "rewards/chosen": -0.1746160089969635, "rewards/margins": 0.22386370599269867, "rewards/rejected": -0.39847972989082336, "step": 6677 }, { "epoch": 18.28336755646817, "grad_norm": 5.183533191680908, "learning_rate": 8.561643835616438e-08, "log_odds_chosen": 1.8480703830718994, "log_odds_ratio": -0.3454916179180145, "logits/chosen": 0.8510841727256775, "logits/rejected": 0.8398988842964172, "logps/chosen": -1.8363771438598633, "logps/rejected": -3.5150821208953857, "loss": 0.5851, "nll_loss": 0.5505449771881104, "rewards/accuracies": 0.75, "rewards/chosen": -0.18363770842552185, "rewards/margins": 0.16787050664424896, "rewards/rejected": -0.3515082001686096, "step": 6678 }, { "epoch": 18.286105407255306, "grad_norm": 6.177860260009766, "learning_rate": 8.547945205479453e-08, "log_odds_chosen": 1.79514741897583, "log_odds_ratio": -0.42433470487594604, "logits/chosen": 0.8560174107551575, "logits/rejected": 0.8767724633216858, "logps/chosen": -1.886065125465393, "logps/rejected": -3.539268970489502, "loss": 0.5788, "nll_loss": 0.5363436937332153, "rewards/accuracies": 0.75, "rewards/chosen": -0.18860651552677155, "rewards/margins": 0.16532039642333984, "rewards/rejected": -0.3539268970489502, "step": 6679 }, { "epoch": 18.288843258042437, "grad_norm": 5.634207248687744, "learning_rate": 8.534246575342465e-08, "log_odds_chosen": 1.5382194519042969, "log_odds_ratio": -0.24411556124687195, "logits/chosen": 1.0361154079437256, "logits/rejected": 1.0442700386047363, "logps/chosen": -2.404482841491699, "logps/rejected": -3.8389389514923096, "loss": 0.6798, "nll_loss": 0.6553693413734436, "rewards/accuracies": 1.0, "rewards/chosen": -0.24044829607009888, "rewards/margins": 0.14344562590122223, "rewards/rejected": -0.3838939070701599, "step": 6680 }, { "epoch": 18.29158110882957, "grad_norm": 5.351730823516846, "learning_rate": 8.520547945205479e-08, "log_odds_chosen": 2.0321831703186035, "log_odds_ratio": -0.19320714473724365, "logits/chosen": 0.9040233492851257, "logits/rejected": 0.9245685338973999, "logps/chosen": -2.2607643604278564, "logps/rejected": -4.155912399291992, "loss": 0.6179, "nll_loss": 0.5985949039459229, "rewards/accuracies": 1.0, "rewards/chosen": -0.22607645392417908, "rewards/margins": 0.18951483070850372, "rewards/rejected": -0.415591299533844, "step": 6681 }, { "epoch": 18.2943189596167, "grad_norm": 5.246670246124268, "learning_rate": 8.506849315068493e-08, "log_odds_chosen": 2.614565134048462, "log_odds_ratio": -0.24824023246765137, "logits/chosen": 0.8373059034347534, "logits/rejected": 0.8158130645751953, "logps/chosen": -1.6733055114746094, "logps/rejected": -4.094707012176514, "loss": 0.6826, "nll_loss": 0.6577881574630737, "rewards/accuracies": 0.875, "rewards/chosen": -0.1673305630683899, "rewards/margins": 0.24214014410972595, "rewards/rejected": -0.40947067737579346, "step": 6682 }, { "epoch": 18.297056810403834, "grad_norm": 5.94180965423584, "learning_rate": 8.493150684931506e-08, "log_odds_chosen": 3.1623473167419434, "log_odds_ratio": -0.29722583293914795, "logits/chosen": 0.7074058055877686, "logits/rejected": 0.7328017950057983, "logps/chosen": -1.9724516868591309, "logps/rejected": -5.016796588897705, "loss": 0.5747, "nll_loss": 0.5450081825256348, "rewards/accuracies": 0.875, "rewards/chosen": -0.19724516570568085, "rewards/margins": 0.30443447828292847, "rewards/rejected": -0.5016796588897705, "step": 6683 }, { "epoch": 18.299794661190965, "grad_norm": 4.996016979217529, "learning_rate": 8.47945205479452e-08, "log_odds_chosen": 2.0441102981567383, "log_odds_ratio": -0.30255770683288574, "logits/chosen": 0.8681594729423523, "logits/rejected": 0.9098992347717285, "logps/chosen": -1.8227708339691162, "logps/rejected": -3.6813156604766846, "loss": 0.5739, "nll_loss": 0.5436693429946899, "rewards/accuracies": 0.875, "rewards/chosen": -0.18227708339691162, "rewards/margins": 0.1858544945716858, "rewards/rejected": -0.3681315779685974, "step": 6684 }, { "epoch": 18.302532511978097, "grad_norm": 6.79517936706543, "learning_rate": 8.465753424657533e-08, "log_odds_chosen": 3.0002522468566895, "log_odds_ratio": -0.27209511399269104, "logits/chosen": 0.89687180519104, "logits/rejected": 0.916695237159729, "logps/chosen": -2.3215813636779785, "logps/rejected": -5.213684558868408, "loss": 0.7912, "nll_loss": 0.7639601230621338, "rewards/accuracies": 0.875, "rewards/chosen": -0.2321581095457077, "rewards/margins": 0.28921034932136536, "rewards/rejected": -0.5213684439659119, "step": 6685 }, { "epoch": 18.305270362765228, "grad_norm": 6.926538467407227, "learning_rate": 8.452054794520549e-08, "log_odds_chosen": 2.817979097366333, "log_odds_ratio": -0.17492498457431793, "logits/chosen": 0.7228183150291443, "logits/rejected": 0.8556271195411682, "logps/chosen": -1.97350013256073, "logps/rejected": -4.598060607910156, "loss": 0.6601, "nll_loss": 0.6426029205322266, "rewards/accuracies": 1.0, "rewards/chosen": -0.19735002517700195, "rewards/margins": 0.2624560296535492, "rewards/rejected": -0.45980605483055115, "step": 6686 }, { "epoch": 18.308008213552363, "grad_norm": 5.448905944824219, "learning_rate": 8.438356164383561e-08, "log_odds_chosen": 2.4229867458343506, "log_odds_ratio": -0.19124743342399597, "logits/chosen": 0.6969543695449829, "logits/rejected": 0.7539480328559875, "logps/chosen": -2.4786314964294434, "logps/rejected": -4.788637638092041, "loss": 0.6422, "nll_loss": 0.6230689287185669, "rewards/accuracies": 1.0, "rewards/chosen": -0.24786314368247986, "rewards/margins": 0.2310006022453308, "rewards/rejected": -0.47886374592781067, "step": 6687 }, { "epoch": 18.310746064339494, "grad_norm": 5.7303056716918945, "learning_rate": 8.424657534246575e-08, "log_odds_chosen": 1.1494272947311401, "log_odds_ratio": -0.3783774971961975, "logits/chosen": 0.8202512264251709, "logits/rejected": 0.7483812570571899, "logps/chosen": -2.3490586280822754, "logps/rejected": -3.443138599395752, "loss": 0.746, "nll_loss": 0.7081716656684875, "rewards/accuracies": 0.875, "rewards/chosen": -0.23490583896636963, "rewards/margins": 0.10940801352262497, "rewards/rejected": -0.3443138897418976, "step": 6688 }, { "epoch": 18.313483915126625, "grad_norm": 5.675592422485352, "learning_rate": 8.410958904109589e-08, "log_odds_chosen": 4.346658706665039, "log_odds_ratio": -0.12410946935415268, "logits/chosen": 0.846211314201355, "logits/rejected": 0.8386901617050171, "logps/chosen": -2.0817172527313232, "logps/rejected": -6.277300834655762, "loss": 0.7774, "nll_loss": 0.7650318741798401, "rewards/accuracies": 1.0, "rewards/chosen": -0.20817172527313232, "rewards/margins": 0.4195583760738373, "rewards/rejected": -0.627730131149292, "step": 6689 }, { "epoch": 18.316221765913756, "grad_norm": 6.148256778717041, "learning_rate": 8.397260273972601e-08, "log_odds_chosen": 1.1652061939239502, "log_odds_ratio": -0.35471564531326294, "logits/chosen": 0.8491493463516235, "logits/rejected": 0.8279444575309753, "logps/chosen": -2.3923349380493164, "logps/rejected": -3.4258203506469727, "loss": 0.6505, "nll_loss": 0.6150749325752258, "rewards/accuracies": 0.875, "rewards/chosen": -0.23923350870609283, "rewards/margins": 0.10334853827953339, "rewards/rejected": -0.3425820469856262, "step": 6690 }, { "epoch": 18.31895961670089, "grad_norm": 8.51775074005127, "learning_rate": 8.383561643835615e-08, "log_odds_chosen": 0.19147580862045288, "log_odds_ratio": -0.7565000057220459, "logits/chosen": 0.7611031532287598, "logits/rejected": 0.7846012115478516, "logps/chosen": -2.923482894897461, "logps/rejected": -3.1131927967071533, "loss": 0.845, "nll_loss": 0.7693678140640259, "rewards/accuracies": 0.625, "rewards/chosen": -0.29234829545021057, "rewards/margins": 0.01897096261382103, "rewards/rejected": -0.3113192915916443, "step": 6691 }, { "epoch": 18.321697467488022, "grad_norm": 5.490762710571289, "learning_rate": 8.36986301369863e-08, "log_odds_chosen": 2.867546796798706, "log_odds_ratio": -0.14410972595214844, "logits/chosen": 1.0013737678527832, "logits/rejected": 1.070696234703064, "logps/chosen": -2.3404130935668945, "logps/rejected": -5.06426477432251, "loss": 0.6305, "nll_loss": 0.6160399913787842, "rewards/accuracies": 1.0, "rewards/chosen": -0.23404130339622498, "rewards/margins": 0.2723851799964905, "rewards/rejected": -0.5064265131950378, "step": 6692 }, { "epoch": 18.324435318275153, "grad_norm": 4.782297134399414, "learning_rate": 8.356164383561644e-08, "log_odds_chosen": 2.456549882888794, "log_odds_ratio": -0.18003109097480774, "logits/chosen": 0.7665852904319763, "logits/rejected": 0.8051884174346924, "logps/chosen": -2.1357479095458984, "logps/rejected": -4.435510635375977, "loss": 0.5998, "nll_loss": 0.5817502737045288, "rewards/accuracies": 0.875, "rewards/chosen": -0.21357478201389313, "rewards/margins": 0.22997626662254333, "rewards/rejected": -0.44355106353759766, "step": 6693 }, { "epoch": 18.327173169062284, "grad_norm": 5.098522186279297, "learning_rate": 8.342465753424657e-08, "log_odds_chosen": 3.5904650688171387, "log_odds_ratio": -0.2038458287715912, "logits/chosen": 1.0290113687515259, "logits/rejected": 1.1120619773864746, "logps/chosen": -1.9019513130187988, "logps/rejected": -5.309215545654297, "loss": 0.6039, "nll_loss": 0.5835646986961365, "rewards/accuracies": 1.0, "rewards/chosen": -0.19019514322280884, "rewards/margins": 0.34072649478912354, "rewards/rejected": -0.5309215784072876, "step": 6694 }, { "epoch": 18.32991101984942, "grad_norm": 4.9999589920043945, "learning_rate": 8.328767123287671e-08, "log_odds_chosen": 1.3812601566314697, "log_odds_ratio": -0.34633463621139526, "logits/chosen": 0.7374310493469238, "logits/rejected": 0.7700618505477905, "logps/chosen": -2.6024651527404785, "logps/rejected": -3.926478862762451, "loss": 0.7105, "nll_loss": 0.6758720278739929, "rewards/accuracies": 0.875, "rewards/chosen": -0.26024651527404785, "rewards/margins": 0.13240139186382294, "rewards/rejected": -0.392647922039032, "step": 6695 }, { "epoch": 18.33264887063655, "grad_norm": 9.794615745544434, "learning_rate": 8.315068493150685e-08, "log_odds_chosen": 1.8647079467773438, "log_odds_ratio": -0.6478757262229919, "logits/chosen": 0.9585034251213074, "logits/rejected": 0.963505208492279, "logps/chosen": -2.6710145473480225, "logps/rejected": -4.43515682220459, "loss": 0.6959, "nll_loss": 0.6310876607894897, "rewards/accuracies": 0.75, "rewards/chosen": -0.2671014368534088, "rewards/margins": 0.17641423642635345, "rewards/rejected": -0.44351568818092346, "step": 6696 }, { "epoch": 18.33538672142368, "grad_norm": 4.5734477043151855, "learning_rate": 8.301369863013697e-08, "log_odds_chosen": 2.672238349914551, "log_odds_ratio": -0.15369080007076263, "logits/chosen": 0.9033748507499695, "logits/rejected": 0.920457124710083, "logps/chosen": -1.8658716678619385, "logps/rejected": -4.374910354614258, "loss": 0.5072, "nll_loss": 0.49180835485458374, "rewards/accuracies": 1.0, "rewards/chosen": -0.18658718466758728, "rewards/margins": 0.2509039044380188, "rewards/rejected": -0.4374910891056061, "step": 6697 }, { "epoch": 18.338124572210816, "grad_norm": 5.850107192993164, "learning_rate": 8.287671232876713e-08, "log_odds_chosen": 1.0432902574539185, "log_odds_ratio": -0.3294919729232788, "logits/chosen": 1.1194325685501099, "logits/rejected": 1.114454746246338, "logps/chosen": -1.7474979162216187, "logps/rejected": -2.6565773487091064, "loss": 0.5498, "nll_loss": 0.5168730616569519, "rewards/accuracies": 1.0, "rewards/chosen": -0.17474979162216187, "rewards/margins": 0.09090792387723923, "rewards/rejected": -0.2656577229499817, "step": 6698 }, { "epoch": 18.340862422997947, "grad_norm": 5.393611431121826, "learning_rate": 8.273972602739726e-08, "log_odds_chosen": 1.9124648571014404, "log_odds_ratio": -0.2333870679140091, "logits/chosen": 1.0808525085449219, "logits/rejected": 1.141739010810852, "logps/chosen": -2.351130485534668, "logps/rejected": -4.165160655975342, "loss": 0.6307, "nll_loss": 0.6073588132858276, "rewards/accuracies": 1.0, "rewards/chosen": -0.23511305451393127, "rewards/margins": 0.1814030110836029, "rewards/rejected": -0.41651609539985657, "step": 6699 }, { "epoch": 18.34360027378508, "grad_norm": 5.773099899291992, "learning_rate": 8.260273972602739e-08, "log_odds_chosen": 2.4615495204925537, "log_odds_ratio": -0.25019946694374084, "logits/chosen": 0.7289094924926758, "logits/rejected": 0.724301815032959, "logps/chosen": -1.8030925989151, "logps/rejected": -4.125807762145996, "loss": 0.5531, "nll_loss": 0.5281215310096741, "rewards/accuracies": 1.0, "rewards/chosen": -0.1803092658519745, "rewards/margins": 0.2322714924812317, "rewards/rejected": -0.41258078813552856, "step": 6700 }, { "epoch": 18.34633812457221, "grad_norm": 7.068443775177002, "learning_rate": 8.246575342465753e-08, "log_odds_chosen": 2.407980442047119, "log_odds_ratio": -0.18034642934799194, "logits/chosen": 1.0251588821411133, "logits/rejected": 1.0368304252624512, "logps/chosen": -1.6038696765899658, "logps/rejected": -3.8072521686553955, "loss": 0.4976, "nll_loss": 0.4795494079589844, "rewards/accuracies": 1.0, "rewards/chosen": -0.16038697957992554, "rewards/margins": 0.22033822536468506, "rewards/rejected": -0.3807252049446106, "step": 6701 }, { "epoch": 18.349075975359344, "grad_norm": 6.366843223571777, "learning_rate": 8.232876712328767e-08, "log_odds_chosen": 2.2699661254882812, "log_odds_ratio": -0.2566015124320984, "logits/chosen": 0.7695189118385315, "logits/rejected": 0.782850980758667, "logps/chosen": -2.18735408782959, "logps/rejected": -4.371175765991211, "loss": 0.7089, "nll_loss": 0.6832895874977112, "rewards/accuracies": 1.0, "rewards/chosen": -0.21873542666435242, "rewards/margins": 0.21838217973709106, "rewards/rejected": -0.4371175765991211, "step": 6702 }, { "epoch": 18.351813826146476, "grad_norm": 6.240811824798584, "learning_rate": 8.21917808219178e-08, "log_odds_chosen": 2.883544683456421, "log_odds_ratio": -0.2186771184206009, "logits/chosen": 0.9187527894973755, "logits/rejected": 1.0044934749603271, "logps/chosen": -2.4822707176208496, "logps/rejected": -5.304061412811279, "loss": 0.7347, "nll_loss": 0.7128514051437378, "rewards/accuracies": 1.0, "rewards/chosen": -0.248227059841156, "rewards/margins": 0.282179057598114, "rewards/rejected": -0.53040611743927, "step": 6703 }, { "epoch": 18.354551676933607, "grad_norm": 5.189530849456787, "learning_rate": 8.205479452054793e-08, "log_odds_chosen": 3.090801239013672, "log_odds_ratio": -0.1749972403049469, "logits/chosen": 0.7876036763191223, "logits/rejected": 0.7961779832839966, "logps/chosen": -2.039595127105713, "logps/rejected": -4.9801764488220215, "loss": 0.6225, "nll_loss": 0.605035662651062, "rewards/accuracies": 1.0, "rewards/chosen": -0.20395952463150024, "rewards/margins": 0.2940581440925598, "rewards/rejected": -0.49801766872406006, "step": 6704 }, { "epoch": 18.357289527720738, "grad_norm": 7.756996154785156, "learning_rate": 8.191780821917808e-08, "log_odds_chosen": 2.6306204795837402, "log_odds_ratio": -0.5119835138320923, "logits/chosen": 0.7301651835441589, "logits/rejected": 0.7626240253448486, "logps/chosen": -2.9126663208007812, "logps/rejected": -5.462751388549805, "loss": 0.8144, "nll_loss": 0.7631984353065491, "rewards/accuracies": 0.875, "rewards/chosen": -0.29126664996147156, "rewards/margins": 0.2550085186958313, "rewards/rejected": -0.5462751388549805, "step": 6705 }, { "epoch": 18.360027378507873, "grad_norm": 5.7062273025512695, "learning_rate": 8.178082191780822e-08, "log_odds_chosen": 1.4228006601333618, "log_odds_ratio": -0.2645682692527771, "logits/chosen": 0.8865656852722168, "logits/rejected": 0.8277486562728882, "logps/chosen": -2.1059229373931885, "logps/rejected": -3.4066524505615234, "loss": 0.5327, "nll_loss": 0.5062246322631836, "rewards/accuracies": 1.0, "rewards/chosen": -0.21059229969978333, "rewards/margins": 0.1300729364156723, "rewards/rejected": -0.34066522121429443, "step": 6706 }, { "epoch": 18.362765229295004, "grad_norm": 6.484023094177246, "learning_rate": 8.164383561643835e-08, "log_odds_chosen": 3.1452813148498535, "log_odds_ratio": -0.22339850664138794, "logits/chosen": 1.0517171621322632, "logits/rejected": 1.1599193811416626, "logps/chosen": -2.734405755996704, "logps/rejected": -5.7577033042907715, "loss": 0.6786, "nll_loss": 0.6562767028808594, "rewards/accuracies": 0.875, "rewards/chosen": -0.27344056963920593, "rewards/margins": 0.30232977867126465, "rewards/rejected": -0.575770378112793, "step": 6707 }, { "epoch": 18.365503080082135, "grad_norm": 6.905330657958984, "learning_rate": 8.150684931506849e-08, "log_odds_chosen": 1.30092453956604, "log_odds_ratio": -0.4256460964679718, "logits/chosen": 0.9408655762672424, "logits/rejected": 0.9520925283432007, "logps/chosen": -2.140265464782715, "logps/rejected": -3.3296422958374023, "loss": 0.586, "nll_loss": 0.5434575080871582, "rewards/accuracies": 0.75, "rewards/chosen": -0.214026540517807, "rewards/margins": 0.11893771588802338, "rewards/rejected": -0.3329642713069916, "step": 6708 }, { "epoch": 18.368240930869266, "grad_norm": 6.287791728973389, "learning_rate": 8.136986301369863e-08, "log_odds_chosen": 1.967294454574585, "log_odds_ratio": -0.3504260182380676, "logits/chosen": 0.9332831501960754, "logits/rejected": 0.9694440364837646, "logps/chosen": -2.2092273235321045, "logps/rejected": -4.0757670402526855, "loss": 0.6668, "nll_loss": 0.6318058967590332, "rewards/accuracies": 0.75, "rewards/chosen": -0.22092272341251373, "rewards/margins": 0.1866540014743805, "rewards/rejected": -0.40757670998573303, "step": 6709 }, { "epoch": 18.3709787816564, "grad_norm": 5.058628559112549, "learning_rate": 8.123287671232875e-08, "log_odds_chosen": 1.6732735633850098, "log_odds_ratio": -0.32521548867225647, "logits/chosen": 0.9751399755477905, "logits/rejected": 1.0833549499511719, "logps/chosen": -1.9281845092773438, "logps/rejected": -3.439950942993164, "loss": 0.5474, "nll_loss": 0.5148926377296448, "rewards/accuracies": 0.875, "rewards/chosen": -0.19281843304634094, "rewards/margins": 0.15117666125297546, "rewards/rejected": -0.3439950942993164, "step": 6710 }, { "epoch": 18.373716632443532, "grad_norm": 5.293423652648926, "learning_rate": 8.10958904109589e-08, "log_odds_chosen": 2.5304689407348633, "log_odds_ratio": -0.28269636631011963, "logits/chosen": 0.7491086721420288, "logits/rejected": 0.7295721769332886, "logps/chosen": -1.8396811485290527, "logps/rejected": -4.252534866333008, "loss": 0.5813, "nll_loss": 0.5530120134353638, "rewards/accuracies": 1.0, "rewards/chosen": -0.18396812677383423, "rewards/margins": 0.24128535389900208, "rewards/rejected": -0.4252535104751587, "step": 6711 }, { "epoch": 18.376454483230663, "grad_norm": 5.764980316162109, "learning_rate": 8.095890410958904e-08, "log_odds_chosen": 3.0240962505340576, "log_odds_ratio": -0.22323857247829437, "logits/chosen": 1.0350346565246582, "logits/rejected": 1.0575006008148193, "logps/chosen": -1.907344102859497, "logps/rejected": -4.741352081298828, "loss": 0.5413, "nll_loss": 0.5189780592918396, "rewards/accuracies": 1.0, "rewards/chosen": -0.190734401345253, "rewards/margins": 0.28340083360671997, "rewards/rejected": -0.47413522005081177, "step": 6712 }, { "epoch": 18.379192334017795, "grad_norm": 4.901844501495361, "learning_rate": 8.082191780821918e-08, "log_odds_chosen": 3.1064834594726562, "log_odds_ratio": -0.160866379737854, "logits/chosen": 0.69495689868927, "logits/rejected": 0.7351659536361694, "logps/chosen": -2.382732391357422, "logps/rejected": -5.380216121673584, "loss": 0.7516, "nll_loss": 0.7354851365089417, "rewards/accuracies": 1.0, "rewards/chosen": -0.2382732480764389, "rewards/margins": 0.29974836111068726, "rewards/rejected": -0.5380216836929321, "step": 6713 }, { "epoch": 18.38193018480493, "grad_norm": 4.84044075012207, "learning_rate": 8.068493150684931e-08, "log_odds_chosen": 2.3271963596343994, "log_odds_ratio": -0.32818403840065, "logits/chosen": 0.8358904719352722, "logits/rejected": 0.8427994847297668, "logps/chosen": -2.0945541858673096, "logps/rejected": -4.335722923278809, "loss": 0.5312, "nll_loss": 0.4984302222728729, "rewards/accuracies": 0.875, "rewards/chosen": -0.2094554305076599, "rewards/margins": 0.22411689162254333, "rewards/rejected": -0.43357232213020325, "step": 6714 }, { "epoch": 18.38466803559206, "grad_norm": 5.364277362823486, "learning_rate": 8.054794520547945e-08, "log_odds_chosen": 1.2436692714691162, "log_odds_ratio": -0.36102598905563354, "logits/chosen": 0.9148363471031189, "logits/rejected": 0.9653631448745728, "logps/chosen": -2.1656508445739746, "logps/rejected": -3.2791762351989746, "loss": 0.5647, "nll_loss": 0.5286219120025635, "rewards/accuracies": 0.875, "rewards/chosen": -0.2165651023387909, "rewards/margins": 0.11135251075029373, "rewards/rejected": -0.3279176354408264, "step": 6715 }, { "epoch": 18.38740588637919, "grad_norm": 4.981910228729248, "learning_rate": 8.041095890410959e-08, "log_odds_chosen": 4.109574317932129, "log_odds_ratio": -0.19196859002113342, "logits/chosen": 0.7890111207962036, "logits/rejected": 0.8228846192359924, "logps/chosen": -2.238379955291748, "logps/rejected": -6.244516849517822, "loss": 0.6954, "nll_loss": 0.6762219667434692, "rewards/accuracies": 1.0, "rewards/chosen": -0.2238379716873169, "rewards/margins": 0.4006137251853943, "rewards/rejected": -0.6244516968727112, "step": 6716 }, { "epoch": 18.390143737166323, "grad_norm": 6.686741828918457, "learning_rate": 8.027397260273972e-08, "log_odds_chosen": 1.3994578123092651, "log_odds_ratio": -0.3601107597351074, "logits/chosen": 0.7153149247169495, "logits/rejected": 0.837926983833313, "logps/chosen": -1.749018669128418, "logps/rejected": -2.9938840866088867, "loss": 0.6364, "nll_loss": 0.6004166007041931, "rewards/accuracies": 0.875, "rewards/chosen": -0.17490187287330627, "rewards/margins": 0.1244865134358406, "rewards/rejected": -0.2993883788585663, "step": 6717 }, { "epoch": 18.392881587953458, "grad_norm": 6.559828281402588, "learning_rate": 8.013698630136986e-08, "log_odds_chosen": 1.6067907810211182, "log_odds_ratio": -0.41129934787750244, "logits/chosen": 0.9777618050575256, "logits/rejected": 0.9956744313240051, "logps/chosen": -2.620211601257324, "logps/rejected": -4.199028015136719, "loss": 0.7794, "nll_loss": 0.7382463216781616, "rewards/accuracies": 0.75, "rewards/chosen": -0.26202118396759033, "rewards/margins": 0.15788161754608154, "rewards/rejected": -0.4199028015136719, "step": 6718 }, { "epoch": 18.39561943874059, "grad_norm": 5.063277721405029, "learning_rate": 8e-08, "log_odds_chosen": 1.5523678064346313, "log_odds_ratio": -0.27581703662872314, "logits/chosen": 0.8902558088302612, "logits/rejected": 0.937301754951477, "logps/chosen": -1.9716017246246338, "logps/rejected": -3.3905029296875, "loss": 0.6357, "nll_loss": 0.6080964803695679, "rewards/accuracies": 1.0, "rewards/chosen": -0.19716018438339233, "rewards/margins": 0.14189013838768005, "rewards/rejected": -0.33905029296875, "step": 6719 }, { "epoch": 18.39835728952772, "grad_norm": 5.123894691467285, "learning_rate": 7.986301369863013e-08, "log_odds_chosen": 2.5401268005371094, "log_odds_ratio": -0.19860854744911194, "logits/chosen": 0.7912245988845825, "logits/rejected": 0.8230555057525635, "logps/chosen": -2.0041344165802, "logps/rejected": -4.407651424407959, "loss": 0.8304, "nll_loss": 0.8105270266532898, "rewards/accuracies": 1.0, "rewards/chosen": -0.20041343569755554, "rewards/margins": 0.24035172164440155, "rewards/rejected": -0.4407651424407959, "step": 6720 }, { "epoch": 18.40109514031485, "grad_norm": 6.101592540740967, "learning_rate": 7.972602739726027e-08, "log_odds_chosen": 1.3978509902954102, "log_odds_ratio": -0.3646196126937866, "logits/chosen": 0.8742135167121887, "logits/rejected": 0.8915579319000244, "logps/chosen": -2.1532907485961914, "logps/rejected": -3.493194341659546, "loss": 0.6663, "nll_loss": 0.6297961473464966, "rewards/accuracies": 0.875, "rewards/chosen": -0.215329110622406, "rewards/margins": 0.1339903473854065, "rewards/rejected": -0.3493194282054901, "step": 6721 }, { "epoch": 18.403832991101986, "grad_norm": 5.9445648193359375, "learning_rate": 7.95890410958904e-08, "log_odds_chosen": 0.9269759654998779, "log_odds_ratio": -0.43461036682128906, "logits/chosen": 0.9077532291412354, "logits/rejected": 0.8971507549285889, "logps/chosen": -2.3456339836120605, "logps/rejected": -3.1904549598693848, "loss": 0.6168, "nll_loss": 0.5733853578567505, "rewards/accuracies": 0.875, "rewards/chosen": -0.2345634251832962, "rewards/margins": 0.08448205888271332, "rewards/rejected": -0.3190454840660095, "step": 6722 }, { "epoch": 18.406570841889117, "grad_norm": 5.9734320640563965, "learning_rate": 7.945205479452056e-08, "log_odds_chosen": 1.252614974975586, "log_odds_ratio": -0.30278193950653076, "logits/chosen": 0.7043459415435791, "logits/rejected": 0.7774385213851929, "logps/chosen": -2.507573366165161, "logps/rejected": -3.652297258377075, "loss": 0.6468, "nll_loss": 0.6165130138397217, "rewards/accuracies": 1.0, "rewards/chosen": -0.2507573366165161, "rewards/margins": 0.1144723892211914, "rewards/rejected": -0.3652297258377075, "step": 6723 }, { "epoch": 18.409308692676248, "grad_norm": 5.690271377563477, "learning_rate": 7.931506849315068e-08, "log_odds_chosen": 1.6503760814666748, "log_odds_ratio": -0.2011723667383194, "logits/chosen": 0.831954836845398, "logits/rejected": 0.9353002309799194, "logps/chosen": -3.0249171257019043, "logps/rejected": -4.591087341308594, "loss": 0.7532, "nll_loss": 0.7330948114395142, "rewards/accuracies": 1.0, "rewards/chosen": -0.302491694688797, "rewards/margins": 0.15661704540252686, "rewards/rejected": -0.45910876989364624, "step": 6724 }, { "epoch": 18.412046543463383, "grad_norm": 5.734854221343994, "learning_rate": 7.917808219178082e-08, "log_odds_chosen": 1.7112281322479248, "log_odds_ratio": -0.28351402282714844, "logits/chosen": 0.7681875228881836, "logits/rejected": 0.7627775073051453, "logps/chosen": -1.5720795392990112, "logps/rejected": -3.0569801330566406, "loss": 0.5997, "nll_loss": 0.5713176727294922, "rewards/accuracies": 1.0, "rewards/chosen": -0.15720796585083008, "rewards/margins": 0.1484900563955307, "rewards/rejected": -0.305698037147522, "step": 6725 }, { "epoch": 18.414784394250514, "grad_norm": 5.40477991104126, "learning_rate": 7.904109589041096e-08, "log_odds_chosen": 2.124268054962158, "log_odds_ratio": -0.1926848590373993, "logits/chosen": 0.9365533590316772, "logits/rejected": 0.91899174451828, "logps/chosen": -1.8762892484664917, "logps/rejected": -3.8336353302001953, "loss": 0.6197, "nll_loss": 0.60042405128479, "rewards/accuracies": 1.0, "rewards/chosen": -0.18762895464897156, "rewards/margins": 0.19573460519313812, "rewards/rejected": -0.3833635449409485, "step": 6726 }, { "epoch": 18.417522245037645, "grad_norm": 5.789877891540527, "learning_rate": 7.890410958904109e-08, "log_odds_chosen": 2.7045516967773438, "log_odds_ratio": -0.23272089660167694, "logits/chosen": 0.9221189022064209, "logits/rejected": 0.9500003457069397, "logps/chosen": -2.580620765686035, "logps/rejected": -5.16022253036499, "loss": 0.6559, "nll_loss": 0.6326375603675842, "rewards/accuracies": 0.875, "rewards/chosen": -0.25806206464767456, "rewards/margins": 0.2579602003097534, "rewards/rejected": -0.5160222053527832, "step": 6727 }, { "epoch": 18.420260095824776, "grad_norm": 4.758569240570068, "learning_rate": 7.876712328767122e-08, "log_odds_chosen": 3.637979030609131, "log_odds_ratio": -0.19030041992664337, "logits/chosen": 0.8943924903869629, "logits/rejected": 0.9213706851005554, "logps/chosen": -2.071413993835449, "logps/rejected": -5.57468318939209, "loss": 0.6149, "nll_loss": 0.5958593487739563, "rewards/accuracies": 1.0, "rewards/chosen": -0.20714139938354492, "rewards/margins": 0.3503269553184509, "rewards/rejected": -0.5574683547019958, "step": 6728 }, { "epoch": 18.42299794661191, "grad_norm": 7.0599870681762695, "learning_rate": 7.863013698630136e-08, "log_odds_chosen": 1.531079649925232, "log_odds_ratio": -0.3215852677822113, "logits/chosen": 0.9329233169555664, "logits/rejected": 0.947198212146759, "logps/chosen": -2.708979368209839, "logps/rejected": -4.205901622772217, "loss": 0.6133, "nll_loss": 0.5811722278594971, "rewards/accuracies": 0.875, "rewards/chosen": -0.27089792490005493, "rewards/margins": 0.14969222247600555, "rewards/rejected": -0.4205901324748993, "step": 6729 }, { "epoch": 18.425735797399042, "grad_norm": 5.840306758880615, "learning_rate": 7.84931506849315e-08, "log_odds_chosen": 2.0968189239501953, "log_odds_ratio": -0.36227285861968994, "logits/chosen": 0.7469832897186279, "logits/rejected": 0.7887312173843384, "logps/chosen": -2.169434070587158, "logps/rejected": -4.183313369750977, "loss": 0.6403, "nll_loss": 0.6040973663330078, "rewards/accuracies": 0.75, "rewards/chosen": -0.2169433981180191, "rewards/margins": 0.20138797163963318, "rewards/rejected": -0.4183313846588135, "step": 6730 }, { "epoch": 18.428473648186174, "grad_norm": 5.494467735290527, "learning_rate": 7.835616438356164e-08, "log_odds_chosen": 2.654339075088501, "log_odds_ratio": -0.16556330025196075, "logits/chosen": 0.6831325888633728, "logits/rejected": 0.7218988537788391, "logps/chosen": -2.2304000854492188, "logps/rejected": -4.787937164306641, "loss": 0.6396, "nll_loss": 0.6230121850967407, "rewards/accuracies": 1.0, "rewards/chosen": -0.22304001450538635, "rewards/margins": 0.255753755569458, "rewards/rejected": -0.478793740272522, "step": 6731 }, { "epoch": 18.431211498973305, "grad_norm": 5.050406455993652, "learning_rate": 7.821917808219178e-08, "log_odds_chosen": 2.947283983230591, "log_odds_ratio": -0.15071827173233032, "logits/chosen": 0.9438355565071106, "logits/rejected": 1.0540082454681396, "logps/chosen": -2.429445743560791, "logps/rejected": -5.235482692718506, "loss": 0.6118, "nll_loss": 0.5966867208480835, "rewards/accuracies": 1.0, "rewards/chosen": -0.24294456839561462, "rewards/margins": 0.28060370683670044, "rewards/rejected": -0.5235482454299927, "step": 6732 }, { "epoch": 18.43394934976044, "grad_norm": 5.3644890785217285, "learning_rate": 7.808219178082192e-08, "log_odds_chosen": 1.697303056716919, "log_odds_ratio": -0.33006900548934937, "logits/chosen": 0.8685526251792908, "logits/rejected": 0.8763327598571777, "logps/chosen": -2.0528976917266846, "logps/rejected": -3.663105010986328, "loss": 0.6292, "nll_loss": 0.596193790435791, "rewards/accuracies": 0.875, "rewards/chosen": -0.2052897810935974, "rewards/margins": 0.16102072596549988, "rewards/rejected": -0.3663105070590973, "step": 6733 }, { "epoch": 18.43668720054757, "grad_norm": 5.488085746765137, "learning_rate": 7.794520547945204e-08, "log_odds_chosen": 2.360252857208252, "log_odds_ratio": -0.2192290723323822, "logits/chosen": 0.9573619961738586, "logits/rejected": 1.0512471199035645, "logps/chosen": -2.666447877883911, "logps/rejected": -4.948699951171875, "loss": 0.6702, "nll_loss": 0.6482647657394409, "rewards/accuracies": 1.0, "rewards/chosen": -0.26664477586746216, "rewards/margins": 0.2282252460718155, "rewards/rejected": -0.49487003684043884, "step": 6734 }, { "epoch": 18.439425051334702, "grad_norm": 6.155900478363037, "learning_rate": 7.780821917808218e-08, "log_odds_chosen": 2.0835769176483154, "log_odds_ratio": -0.350615531206131, "logits/chosen": 0.8484897613525391, "logits/rejected": 0.8842746019363403, "logps/chosen": -3.116948127746582, "logps/rejected": -5.100485801696777, "loss": 0.7625, "nll_loss": 0.7274783253669739, "rewards/accuracies": 0.75, "rewards/chosen": -0.311694860458374, "rewards/margins": 0.19835375249385834, "rewards/rejected": -0.5100486278533936, "step": 6735 }, { "epoch": 18.442162902121833, "grad_norm": 4.95733642578125, "learning_rate": 7.767123287671234e-08, "log_odds_chosen": 2.4618067741394043, "log_odds_ratio": -0.4179551601409912, "logits/chosen": 0.794675886631012, "logits/rejected": 0.8765407800674438, "logps/chosen": -2.6317226886749268, "logps/rejected": -5.034266471862793, "loss": 0.7668, "nll_loss": 0.725054144859314, "rewards/accuracies": 0.875, "rewards/chosen": -0.2631722688674927, "rewards/margins": 0.24025440216064453, "rewards/rejected": -0.5034266710281372, "step": 6736 }, { "epoch": 18.444900752908968, "grad_norm": 6.360467910766602, "learning_rate": 7.753424657534246e-08, "log_odds_chosen": 1.52549147605896, "log_odds_ratio": -0.4933786392211914, "logits/chosen": 0.9130533933639526, "logits/rejected": 0.9286308288574219, "logps/chosen": -2.978029489517212, "logps/rejected": -4.43778657913208, "loss": 0.8381, "nll_loss": 0.7887290716171265, "rewards/accuracies": 0.875, "rewards/chosen": -0.29780295491218567, "rewards/margins": 0.1459757387638092, "rewards/rejected": -0.4437786936759949, "step": 6737 }, { "epoch": 18.4476386036961, "grad_norm": 9.737144470214844, "learning_rate": 7.73972602739726e-08, "log_odds_chosen": 2.502570629119873, "log_odds_ratio": -0.3440733551979065, "logits/chosen": 0.7749554514884949, "logits/rejected": 0.793428897857666, "logps/chosen": -3.5398881435394287, "logps/rejected": -5.98095703125, "loss": 0.7531, "nll_loss": 0.7186713814735413, "rewards/accuracies": 0.875, "rewards/chosen": -0.3539888262748718, "rewards/margins": 0.2441069334745407, "rewards/rejected": -0.598095715045929, "step": 6738 }, { "epoch": 18.45037645448323, "grad_norm": 7.010035037994385, "learning_rate": 7.726027397260274e-08, "log_odds_chosen": 1.6583988666534424, "log_odds_ratio": -0.30196863412857056, "logits/chosen": 0.7852351665496826, "logits/rejected": 0.7233861684799194, "logps/chosen": -1.4734975099563599, "logps/rejected": -2.9175286293029785, "loss": 0.6446, "nll_loss": 0.6144246459007263, "rewards/accuracies": 1.0, "rewards/chosen": -0.1473497450351715, "rewards/margins": 0.1444031000137329, "rewards/rejected": -0.2917528748512268, "step": 6739 }, { "epoch": 18.45311430527036, "grad_norm": 5.184445381164551, "learning_rate": 7.712328767123286e-08, "log_odds_chosen": 2.168745517730713, "log_odds_ratio": -0.2023693323135376, "logits/chosen": 0.8448166847229004, "logits/rejected": 0.8624322414398193, "logps/chosen": -1.7660908699035645, "logps/rejected": -3.761260986328125, "loss": 0.5471, "nll_loss": 0.5268217325210571, "rewards/accuracies": 0.875, "rewards/chosen": -0.1766090989112854, "rewards/margins": 0.19951701164245605, "rewards/rejected": -0.37612611055374146, "step": 6740 }, { "epoch": 18.455852156057496, "grad_norm": 5.891830921173096, "learning_rate": 7.6986301369863e-08, "log_odds_chosen": 3.423884391784668, "log_odds_ratio": -0.1249924823641777, "logits/chosen": 0.7814656496047974, "logits/rejected": 0.8642565011978149, "logps/chosen": -2.579246997833252, "logps/rejected": -5.912109851837158, "loss": 0.7419, "nll_loss": 0.729441225528717, "rewards/accuracies": 1.0, "rewards/chosen": -0.2579247057437897, "rewards/margins": 0.333286315202713, "rewards/rejected": -0.5912110209465027, "step": 6741 }, { "epoch": 18.458590006844627, "grad_norm": 4.814009666442871, "learning_rate": 7.684931506849316e-08, "log_odds_chosen": 1.4848638772964478, "log_odds_ratio": -0.33864253759384155, "logits/chosen": 0.8224129676818848, "logits/rejected": 0.8869380950927734, "logps/chosen": -2.2391114234924316, "logps/rejected": -3.6301519870758057, "loss": 0.6401, "nll_loss": 0.6062756776809692, "rewards/accuracies": 0.875, "rewards/chosen": -0.2239111214876175, "rewards/margins": 0.13910405337810516, "rewards/rejected": -0.36301517486572266, "step": 6742 }, { "epoch": 18.46132785763176, "grad_norm": 5.057220935821533, "learning_rate": 7.67123287671233e-08, "log_odds_chosen": 2.1496288776397705, "log_odds_ratio": -0.20204775035381317, "logits/chosen": 1.0143262147903442, "logits/rejected": 1.0626741647720337, "logps/chosen": -1.9196707010269165, "logps/rejected": -3.9478721618652344, "loss": 0.651, "nll_loss": 0.6307769417762756, "rewards/accuracies": 1.0, "rewards/chosen": -0.19196707010269165, "rewards/margins": 0.20282015204429626, "rewards/rejected": -0.3947872221469879, "step": 6743 }, { "epoch": 18.46406570841889, "grad_norm": 5.8109846115112305, "learning_rate": 7.657534246575342e-08, "log_odds_chosen": 0.9080074429512024, "log_odds_ratio": -0.4524697959423065, "logits/chosen": 0.8419750928878784, "logits/rejected": 0.8291100263595581, "logps/chosen": -2.0782318115234375, "logps/rejected": -2.8511786460876465, "loss": 0.5491, "nll_loss": 0.5038158893585205, "rewards/accuracies": 0.75, "rewards/chosen": -0.20782317221164703, "rewards/margins": 0.07729470729827881, "rewards/rejected": -0.28511789441108704, "step": 6744 }, { "epoch": 18.466803559206024, "grad_norm": 4.679601669311523, "learning_rate": 7.643835616438356e-08, "log_odds_chosen": 2.8976149559020996, "log_odds_ratio": -0.2031160295009613, "logits/chosen": 0.6805058717727661, "logits/rejected": 0.7513936161994934, "logps/chosen": -1.5222270488739014, "logps/rejected": -4.240930557250977, "loss": 0.5142, "nll_loss": 0.4939054548740387, "rewards/accuracies": 1.0, "rewards/chosen": -0.15222270786762238, "rewards/margins": 0.27187037467956543, "rewards/rejected": -0.424093097448349, "step": 6745 }, { "epoch": 18.469541409993155, "grad_norm": 5.981588363647461, "learning_rate": 7.63013698630137e-08, "log_odds_chosen": 3.333564281463623, "log_odds_ratio": -0.2624778151512146, "logits/chosen": 0.7782367467880249, "logits/rejected": 0.8741108775138855, "logps/chosen": -1.9805265665054321, "logps/rejected": -5.174858093261719, "loss": 0.6573, "nll_loss": 0.6310268640518188, "rewards/accuracies": 0.875, "rewards/chosen": -0.19805265963077545, "rewards/margins": 0.31943321228027344, "rewards/rejected": -0.5174858570098877, "step": 6746 }, { "epoch": 18.472279260780287, "grad_norm": 4.84327507019043, "learning_rate": 7.616438356164382e-08, "log_odds_chosen": 2.553046703338623, "log_odds_ratio": -0.17896851897239685, "logits/chosen": 0.7459697723388672, "logits/rejected": 0.8310622572898865, "logps/chosen": -2.4871506690979004, "logps/rejected": -4.88866662979126, "loss": 0.6042, "nll_loss": 0.5863295793533325, "rewards/accuracies": 1.0, "rewards/chosen": -0.2487150877714157, "rewards/margins": 0.24015159904956818, "rewards/rejected": -0.4888666868209839, "step": 6747 }, { "epoch": 18.47501711156742, "grad_norm": 7.503884792327881, "learning_rate": 7.602739726027398e-08, "log_odds_chosen": 0.7853525280952454, "log_odds_ratio": -0.5932996273040771, "logits/chosen": 0.7324714064598083, "logits/rejected": 0.690409779548645, "logps/chosen": -2.380760908126831, "logps/rejected": -3.095935821533203, "loss": 0.7269, "nll_loss": 0.6675881147384644, "rewards/accuracies": 0.75, "rewards/chosen": -0.23807606101036072, "rewards/margins": 0.07151751220226288, "rewards/rejected": -0.3095935881137848, "step": 6748 }, { "epoch": 18.477754962354553, "grad_norm": 5.25215482711792, "learning_rate": 7.589041095890411e-08, "log_odds_chosen": 1.9328768253326416, "log_odds_ratio": -0.3138059377670288, "logits/chosen": 0.7654517292976379, "logits/rejected": 0.7543290853500366, "logps/chosen": -1.5470092296600342, "logps/rejected": -3.266427516937256, "loss": 0.5519, "nll_loss": 0.5204917788505554, "rewards/accuracies": 0.75, "rewards/chosen": -0.15470091998577118, "rewards/margins": 0.1719418466091156, "rewards/rejected": -0.326642781496048, "step": 6749 }, { "epoch": 18.480492813141684, "grad_norm": 5.28604793548584, "learning_rate": 7.575342465753424e-08, "log_odds_chosen": 2.1754329204559326, "log_odds_ratio": -0.20032891631126404, "logits/chosen": 0.9813538193702698, "logits/rejected": 0.9843432903289795, "logps/chosen": -1.7708748579025269, "logps/rejected": -3.7893197536468506, "loss": 0.5197, "nll_loss": 0.4996908903121948, "rewards/accuracies": 1.0, "rewards/chosen": -0.17708748579025269, "rewards/margins": 0.2018444836139679, "rewards/rejected": -0.37893199920654297, "step": 6750 }, { "epoch": 18.483230663928815, "grad_norm": 7.7072625160217285, "learning_rate": 7.561643835616438e-08, "log_odds_chosen": 1.2153191566467285, "log_odds_ratio": -0.4050631821155548, "logits/chosen": 0.8054745197296143, "logits/rejected": 0.8376098275184631, "logps/chosen": -3.153834342956543, "logps/rejected": -4.310851097106934, "loss": 0.7407, "nll_loss": 0.7001850605010986, "rewards/accuracies": 0.875, "rewards/chosen": -0.3153834342956543, "rewards/margins": 0.11570169031620026, "rewards/rejected": -0.43108513951301575, "step": 6751 }, { "epoch": 18.48596851471595, "grad_norm": 5.315396785736084, "learning_rate": 7.547945205479452e-08, "log_odds_chosen": 2.3000330924987793, "log_odds_ratio": -0.12425698339939117, "logits/chosen": 0.5953062772750854, "logits/rejected": 0.6353240013122559, "logps/chosen": -2.167102098464966, "logps/rejected": -4.316863536834717, "loss": 0.7001, "nll_loss": 0.6876549124717712, "rewards/accuracies": 1.0, "rewards/chosen": -0.2167101949453354, "rewards/margins": 0.21497611701488495, "rewards/rejected": -0.4316863417625427, "step": 6752 }, { "epoch": 18.48870636550308, "grad_norm": 5.612210273742676, "learning_rate": 7.534246575342466e-08, "log_odds_chosen": 3.084653615951538, "log_odds_ratio": -0.14275287091732025, "logits/chosen": 0.9862174987792969, "logits/rejected": 1.049647331237793, "logps/chosen": -2.5468151569366455, "logps/rejected": -5.508317947387695, "loss": 0.6692, "nll_loss": 0.6549562215805054, "rewards/accuracies": 1.0, "rewards/chosen": -0.2546815276145935, "rewards/margins": 0.29615023732185364, "rewards/rejected": -0.5508317947387695, "step": 6753 }, { "epoch": 18.491444216290212, "grad_norm": 8.757193565368652, "learning_rate": 7.520547945205478e-08, "log_odds_chosen": 1.8358083963394165, "log_odds_ratio": -0.2354242354631424, "logits/chosen": 0.9140123128890991, "logits/rejected": 0.9987138509750366, "logps/chosen": -2.8829057216644287, "logps/rejected": -4.648434638977051, "loss": 0.8002, "nll_loss": 0.7766985893249512, "rewards/accuracies": 1.0, "rewards/chosen": -0.2882905602455139, "rewards/margins": 0.1765529215335846, "rewards/rejected": -0.4648434817790985, "step": 6754 }, { "epoch": 18.494182067077343, "grad_norm": 5.923051357269287, "learning_rate": 7.506849315068493e-08, "log_odds_chosen": 2.625460624694824, "log_odds_ratio": -0.2424028068780899, "logits/chosen": 0.997421145439148, "logits/rejected": 0.9410027861595154, "logps/chosen": -1.827655553817749, "logps/rejected": -4.324795722961426, "loss": 0.6194, "nll_loss": 0.5951475501060486, "rewards/accuracies": 0.875, "rewards/chosen": -0.18276555836200714, "rewards/margins": 0.24971401691436768, "rewards/rejected": -0.4324795603752136, "step": 6755 }, { "epoch": 18.496919917864478, "grad_norm": 5.797228813171387, "learning_rate": 7.493150684931507e-08, "log_odds_chosen": 3.1242294311523438, "log_odds_ratio": -0.3554135859012604, "logits/chosen": 0.6683405041694641, "logits/rejected": 0.7538906335830688, "logps/chosen": -2.634812355041504, "logps/rejected": -5.6363911628723145, "loss": 0.6978, "nll_loss": 0.66224205493927, "rewards/accuracies": 0.875, "rewards/chosen": -0.2634812295436859, "rewards/margins": 0.3001578748226166, "rewards/rejected": -0.5636391043663025, "step": 6756 }, { "epoch": 18.49965776865161, "grad_norm": 6.068838596343994, "learning_rate": 7.47945205479452e-08, "log_odds_chosen": 2.2089412212371826, "log_odds_ratio": -0.24634936451911926, "logits/chosen": 0.8234243392944336, "logits/rejected": 0.8358379602432251, "logps/chosen": -2.8436222076416016, "logps/rejected": -4.94612979888916, "loss": 0.6132, "nll_loss": 0.5885502099990845, "rewards/accuracies": 0.875, "rewards/chosen": -0.28436222672462463, "rewards/margins": 0.21025079488754272, "rewards/rejected": -0.49461299180984497, "step": 6757 }, { "epoch": 18.50239561943874, "grad_norm": 6.156576156616211, "learning_rate": 7.465753424657534e-08, "log_odds_chosen": 1.8974803686141968, "log_odds_ratio": -0.29074519872665405, "logits/chosen": 1.0371309518814087, "logits/rejected": 1.1093969345092773, "logps/chosen": -2.6080498695373535, "logps/rejected": -4.404135704040527, "loss": 0.685, "nll_loss": 0.6559586524963379, "rewards/accuracies": 0.875, "rewards/chosen": -0.2608049511909485, "rewards/margins": 0.179608553647995, "rewards/rejected": -0.44041353464126587, "step": 6758 }, { "epoch": 18.50513347022587, "grad_norm": 6.067608833312988, "learning_rate": 7.452054794520548e-08, "log_odds_chosen": 2.2852582931518555, "log_odds_ratio": -0.2771281599998474, "logits/chosen": 1.15084707736969, "logits/rejected": 1.1594209671020508, "logps/chosen": -2.100374221801758, "logps/rejected": -4.270103454589844, "loss": 0.7098, "nll_loss": 0.6821334362030029, "rewards/accuracies": 0.875, "rewards/chosen": -0.2100374400615692, "rewards/margins": 0.21697290241718292, "rewards/rejected": -0.42701035737991333, "step": 6759 }, { "epoch": 18.507871321013006, "grad_norm": 5.982435703277588, "learning_rate": 7.43835616438356e-08, "log_odds_chosen": 1.8909807205200195, "log_odds_ratio": -0.2878912389278412, "logits/chosen": 1.0519453287124634, "logits/rejected": 0.9918991923332214, "logps/chosen": -1.8904342651367188, "logps/rejected": -3.6567044258117676, "loss": 0.5321, "nll_loss": 0.503305196762085, "rewards/accuracies": 1.0, "rewards/chosen": -0.18904343247413635, "rewards/margins": 0.1766270250082016, "rewards/rejected": -0.36567047238349915, "step": 6760 }, { "epoch": 18.510609171800137, "grad_norm": 5.935518741607666, "learning_rate": 7.424657534246575e-08, "log_odds_chosen": 0.6654832363128662, "log_odds_ratio": -0.5745099186897278, "logits/chosen": 0.8662036657333374, "logits/rejected": 0.8743911981582642, "logps/chosen": -2.6759281158447266, "logps/rejected": -3.2948036193847656, "loss": 0.7435, "nll_loss": 0.6860536336898804, "rewards/accuracies": 0.625, "rewards/chosen": -0.26759278774261475, "rewards/margins": 0.06188756972551346, "rewards/rejected": -0.32948037981987, "step": 6761 }, { "epoch": 18.51334702258727, "grad_norm": 7.5124969482421875, "learning_rate": 7.410958904109589e-08, "log_odds_chosen": 1.5210987329483032, "log_odds_ratio": -0.5838070511817932, "logits/chosen": 0.8759576082229614, "logits/rejected": 0.8866920471191406, "logps/chosen": -2.037229061126709, "logps/rejected": -3.3948588371276855, "loss": 0.7295, "nll_loss": 0.6710997819900513, "rewards/accuracies": 0.75, "rewards/chosen": -0.20372292399406433, "rewards/margins": 0.13576292991638184, "rewards/rejected": -0.33948588371276855, "step": 6762 }, { "epoch": 18.5160848733744, "grad_norm": 5.07658052444458, "learning_rate": 7.397260273972603e-08, "log_odds_chosen": 2.232027292251587, "log_odds_ratio": -0.2605341970920563, "logits/chosen": 0.6965198516845703, "logits/rejected": 0.7344116568565369, "logps/chosen": -1.8180575370788574, "logps/rejected": -3.8668293952941895, "loss": 0.6545, "nll_loss": 0.6284516453742981, "rewards/accuracies": 1.0, "rewards/chosen": -0.18180572986602783, "rewards/margins": 0.20487719774246216, "rewards/rejected": -0.3866829574108124, "step": 6763 }, { "epoch": 18.518822724161534, "grad_norm": 5.885448932647705, "learning_rate": 7.383561643835616e-08, "log_odds_chosen": 3.390683174133301, "log_odds_ratio": -0.23833565413951874, "logits/chosen": 0.9777625799179077, "logits/rejected": 1.0170021057128906, "logps/chosen": -2.7763383388519287, "logps/rejected": -6.0286407470703125, "loss": 0.7436, "nll_loss": 0.7197240591049194, "rewards/accuracies": 0.75, "rewards/chosen": -0.2776338458061218, "rewards/margins": 0.325230211019516, "rewards/rejected": -0.6028640866279602, "step": 6764 }, { "epoch": 18.521560574948666, "grad_norm": 7.456974029541016, "learning_rate": 7.36986301369863e-08, "log_odds_chosen": 2.236801862716675, "log_odds_ratio": -0.3768005073070526, "logits/chosen": 1.0547404289245605, "logits/rejected": 1.0921419858932495, "logps/chosen": -2.705153226852417, "logps/rejected": -4.863222599029541, "loss": 0.7152, "nll_loss": 0.6775519251823425, "rewards/accuracies": 0.875, "rewards/chosen": -0.2705153226852417, "rewards/margins": 0.2158069610595703, "rewards/rejected": -0.486322283744812, "step": 6765 }, { "epoch": 18.524298425735797, "grad_norm": 5.540563106536865, "learning_rate": 7.356164383561643e-08, "log_odds_chosen": 1.6544808149337769, "log_odds_ratio": -0.30800706148147583, "logits/chosen": 0.8797564506530762, "logits/rejected": 0.8700698614120483, "logps/chosen": -1.6232068538665771, "logps/rejected": -3.0996923446655273, "loss": 0.5121, "nll_loss": 0.481253981590271, "rewards/accuracies": 0.875, "rewards/chosen": -0.16232070326805115, "rewards/margins": 0.14764851331710815, "rewards/rejected": -0.3099692165851593, "step": 6766 }, { "epoch": 18.527036276522928, "grad_norm": 5.966721057891846, "learning_rate": 7.342465753424657e-08, "log_odds_chosen": 2.382723569869995, "log_odds_ratio": -0.247523233294487, "logits/chosen": 0.6777847409248352, "logits/rejected": 0.6563432812690735, "logps/chosen": -2.5830719470977783, "logps/rejected": -4.830670356750488, "loss": 0.6686, "nll_loss": 0.6438643932342529, "rewards/accuracies": 1.0, "rewards/chosen": -0.25830721855163574, "rewards/margins": 0.22475983202457428, "rewards/rejected": -0.4830670654773712, "step": 6767 }, { "epoch": 18.529774127310063, "grad_norm": 6.442725658416748, "learning_rate": 7.328767123287671e-08, "log_odds_chosen": 1.6970497369766235, "log_odds_ratio": -0.2695852816104889, "logits/chosen": 0.7380576133728027, "logits/rejected": 0.769270658493042, "logps/chosen": -1.928361177444458, "logps/rejected": -3.444700002670288, "loss": 0.5902, "nll_loss": 0.5632542371749878, "rewards/accuracies": 0.875, "rewards/chosen": -0.19283612072467804, "rewards/margins": 0.15163388848304749, "rewards/rejected": -0.3444700241088867, "step": 6768 }, { "epoch": 18.532511978097194, "grad_norm": 5.046283721923828, "learning_rate": 7.315068493150685e-08, "log_odds_chosen": 2.4467062950134277, "log_odds_ratio": -0.1890733242034912, "logits/chosen": 0.9896843433380127, "logits/rejected": 0.9752607345581055, "logps/chosen": -1.7127609252929688, "logps/rejected": -4.008826732635498, "loss": 0.5677, "nll_loss": 0.5487502813339233, "rewards/accuracies": 1.0, "rewards/chosen": -0.17127610743045807, "rewards/margins": 0.22960656881332397, "rewards/rejected": -0.40088269114494324, "step": 6769 }, { "epoch": 18.535249828884325, "grad_norm": 7.233811378479004, "learning_rate": 7.301369863013698e-08, "log_odds_chosen": 2.389734983444214, "log_odds_ratio": -0.26768162846565247, "logits/chosen": 1.0523768663406372, "logits/rejected": 1.0869098901748657, "logps/chosen": -2.2911715507507324, "logps/rejected": -4.567820072174072, "loss": 0.7367, "nll_loss": 0.7099097967147827, "rewards/accuracies": 0.75, "rewards/chosen": -0.22911715507507324, "rewards/margins": 0.22766485810279846, "rewards/rejected": -0.4567820131778717, "step": 6770 }, { "epoch": 18.537987679671456, "grad_norm": 4.555473327636719, "learning_rate": 7.287671232876712e-08, "log_odds_chosen": 2.7311909198760986, "log_odds_ratio": -0.1669323593378067, "logits/chosen": 0.7361205220222473, "logits/rejected": 0.8184306621551514, "logps/chosen": -1.9867217540740967, "logps/rejected": -4.601078510284424, "loss": 0.5922, "nll_loss": 0.5755232572555542, "rewards/accuracies": 1.0, "rewards/chosen": -0.19867217540740967, "rewards/margins": 0.2614356577396393, "rewards/rejected": -0.46010786294937134, "step": 6771 }, { "epoch": 18.54072553045859, "grad_norm": 6.5864644050598145, "learning_rate": 7.273972602739725e-08, "log_odds_chosen": 3.0502817630767822, "log_odds_ratio": -0.1811818927526474, "logits/chosen": 0.874171257019043, "logits/rejected": 0.912268877029419, "logps/chosen": -2.2083206176757812, "logps/rejected": -5.124221324920654, "loss": 0.6989, "nll_loss": 0.68081134557724, "rewards/accuracies": 1.0, "rewards/chosen": -0.22083207964897156, "rewards/margins": 0.2915900945663452, "rewards/rejected": -0.5124222040176392, "step": 6772 }, { "epoch": 18.543463381245722, "grad_norm": 5.963667392730713, "learning_rate": 7.260273972602739e-08, "log_odds_chosen": 3.617039680480957, "log_odds_ratio": -0.13195490837097168, "logits/chosen": 0.7884234189987183, "logits/rejected": 0.7917709946632385, "logps/chosen": -2.170532464981079, "logps/rejected": -5.582705020904541, "loss": 0.6527, "nll_loss": 0.6395395994186401, "rewards/accuracies": 1.0, "rewards/chosen": -0.21705324947834015, "rewards/margins": 0.3412172198295593, "rewards/rejected": -0.5582704544067383, "step": 6773 }, { "epoch": 18.546201232032853, "grad_norm": 6.1207756996154785, "learning_rate": 7.246575342465753e-08, "log_odds_chosen": 2.032918691635132, "log_odds_ratio": -0.21729490160942078, "logits/chosen": 0.8301206827163696, "logits/rejected": 0.7884936928749084, "logps/chosen": -1.9389317035675049, "logps/rejected": -3.7931301593780518, "loss": 0.6403, "nll_loss": 0.6185435652732849, "rewards/accuracies": 1.0, "rewards/chosen": -0.193893164396286, "rewards/margins": 0.18541985750198364, "rewards/rejected": -0.37931302189826965, "step": 6774 }, { "epoch": 18.548939082819984, "grad_norm": 10.183124542236328, "learning_rate": 7.232876712328767e-08, "log_odds_chosen": 0.6965267658233643, "log_odds_ratio": -0.6723620891571045, "logits/chosen": 0.6798668503761292, "logits/rejected": 0.6903665065765381, "logps/chosen": -2.847729206085205, "logps/rejected": -3.436586380004883, "loss": 0.7015, "nll_loss": 0.6343103647232056, "rewards/accuracies": 0.75, "rewards/chosen": -0.28477293252944946, "rewards/margins": 0.05888569727540016, "rewards/rejected": -0.3436586558818817, "step": 6775 }, { "epoch": 18.55167693360712, "grad_norm": 5.707033157348633, "learning_rate": 7.219178082191781e-08, "log_odds_chosen": 3.665329933166504, "log_odds_ratio": -0.09925565123558044, "logits/chosen": 1.1011379957199097, "logits/rejected": 1.0947462320327759, "logps/chosen": -2.08011531829834, "logps/rejected": -5.605924606323242, "loss": 0.6709, "nll_loss": 0.6609561443328857, "rewards/accuracies": 1.0, "rewards/chosen": -0.20801155269145966, "rewards/margins": 0.3525809049606323, "rewards/rejected": -0.5605924129486084, "step": 6776 }, { "epoch": 18.55441478439425, "grad_norm": 8.831852912902832, "learning_rate": 7.205479452054794e-08, "log_odds_chosen": 2.34637451171875, "log_odds_ratio": -0.22581326961517334, "logits/chosen": 0.8034212589263916, "logits/rejected": 0.7907951474189758, "logps/chosen": -2.609285831451416, "logps/rejected": -4.837646961212158, "loss": 0.652, "nll_loss": 0.629410982131958, "rewards/accuracies": 0.875, "rewards/chosen": -0.26092860102653503, "rewards/margins": 0.22283609211444855, "rewards/rejected": -0.4837647080421448, "step": 6777 }, { "epoch": 18.55715263518138, "grad_norm": 5.4219746589660645, "learning_rate": 7.191780821917807e-08, "log_odds_chosen": 2.1796162128448486, "log_odds_ratio": -0.23464815318584442, "logits/chosen": 0.8762556314468384, "logits/rejected": 0.9435515403747559, "logps/chosen": -2.3657495975494385, "logps/rejected": -4.454049110412598, "loss": 0.6039, "nll_loss": 0.5804235339164734, "rewards/accuracies": 1.0, "rewards/chosen": -0.2365749627351761, "rewards/margins": 0.20882996916770935, "rewards/rejected": -0.44540494680404663, "step": 6778 }, { "epoch": 18.559890485968516, "grad_norm": 6.52736759185791, "learning_rate": 7.178082191780821e-08, "log_odds_chosen": 2.2424590587615967, "log_odds_ratio": -0.12330690771341324, "logits/chosen": 0.9031660556793213, "logits/rejected": 0.8552532196044922, "logps/chosen": -2.6139438152313232, "logps/rejected": -4.745095729827881, "loss": 0.7272, "nll_loss": 0.7148748636245728, "rewards/accuracies": 1.0, "rewards/chosen": -0.2613943815231323, "rewards/margins": 0.21311518549919128, "rewards/rejected": -0.4745096266269684, "step": 6779 }, { "epoch": 18.562628336755647, "grad_norm": 8.308996200561523, "learning_rate": 7.164383561643835e-08, "log_odds_chosen": 2.0195889472961426, "log_odds_ratio": -0.3738993704319, "logits/chosen": 0.9756146669387817, "logits/rejected": 1.0400127172470093, "logps/chosen": -2.879106044769287, "logps/rejected": -4.814292907714844, "loss": 0.7549, "nll_loss": 0.7175477147102356, "rewards/accuracies": 0.875, "rewards/chosen": -0.2879106402397156, "rewards/margins": 0.19351865351200104, "rewards/rejected": -0.4814292788505554, "step": 6780 }, { "epoch": 18.56536618754278, "grad_norm": 5.492630481719971, "learning_rate": 7.150684931506849e-08, "log_odds_chosen": 1.6381282806396484, "log_odds_ratio": -0.3239397406578064, "logits/chosen": 0.7641823291778564, "logits/rejected": 0.8861479163169861, "logps/chosen": -1.95937979221344, "logps/rejected": -3.5095129013061523, "loss": 0.6835, "nll_loss": 0.6511367559432983, "rewards/accuracies": 0.75, "rewards/chosen": -0.19593797624111176, "rewards/margins": 0.1550133377313614, "rewards/rejected": -0.35095131397247314, "step": 6781 }, { "epoch": 18.56810403832991, "grad_norm": 4.895239353179932, "learning_rate": 7.136986301369863e-08, "log_odds_chosen": 2.7084245681762695, "log_odds_ratio": -0.19606763124465942, "logits/chosen": 0.7441246509552002, "logits/rejected": 0.7573586702346802, "logps/chosen": -2.3790764808654785, "logps/rejected": -5.001755714416504, "loss": 0.643, "nll_loss": 0.6233550906181335, "rewards/accuracies": 1.0, "rewards/chosen": -0.23790764808654785, "rewards/margins": 0.26226791739463806, "rewards/rejected": -0.5001755952835083, "step": 6782 }, { "epoch": 18.570841889117045, "grad_norm": 9.180669784545898, "learning_rate": 7.123287671232877e-08, "log_odds_chosen": 1.4205572605133057, "log_odds_ratio": -0.41752246022224426, "logits/chosen": 0.684272825717926, "logits/rejected": 0.6056971549987793, "logps/chosen": -2.436295509338379, "logps/rejected": -3.6912176609039307, "loss": 0.6376, "nll_loss": 0.595805823802948, "rewards/accuracies": 0.75, "rewards/chosen": -0.2436295598745346, "rewards/margins": 0.1254921853542328, "rewards/rejected": -0.3691217601299286, "step": 6783 }, { "epoch": 18.573579739904176, "grad_norm": 7.092604160308838, "learning_rate": 7.10958904109589e-08, "log_odds_chosen": 4.216346263885498, "log_odds_ratio": -0.252442866563797, "logits/chosen": 0.6969498991966248, "logits/rejected": 0.7313452959060669, "logps/chosen": -2.7609071731567383, "logps/rejected": -6.873979568481445, "loss": 0.7163, "nll_loss": 0.691027045249939, "rewards/accuracies": 0.875, "rewards/chosen": -0.27609074115753174, "rewards/margins": 0.4113072454929352, "rewards/rejected": -0.6873980164527893, "step": 6784 }, { "epoch": 18.576317590691307, "grad_norm": 7.190361499786377, "learning_rate": 7.095890410958903e-08, "log_odds_chosen": 2.837291955947876, "log_odds_ratio": -0.31751227378845215, "logits/chosen": 0.8080903887748718, "logits/rejected": 0.7950117588043213, "logps/chosen": -2.336134910583496, "logps/rejected": -4.992790222167969, "loss": 0.6505, "nll_loss": 0.6187887787818909, "rewards/accuracies": 0.75, "rewards/chosen": -0.233613520860672, "rewards/margins": 0.2656655013561249, "rewards/rejected": -0.49927905201911926, "step": 6785 }, { "epoch": 18.579055441478438, "grad_norm": 7.626194953918457, "learning_rate": 7.082191780821918e-08, "log_odds_chosen": 0.4040081799030304, "log_odds_ratio": -0.8651031255722046, "logits/chosen": 0.7573323249816895, "logits/rejected": 0.8458921909332275, "logps/chosen": -2.989248514175415, "logps/rejected": -3.3071956634521484, "loss": 0.7481, "nll_loss": 0.6615929007530212, "rewards/accuracies": 0.5, "rewards/chosen": -0.29892483353614807, "rewards/margins": 0.0317947119474411, "rewards/rejected": -0.33071956038475037, "step": 6786 }, { "epoch": 18.581793292265573, "grad_norm": 4.116944789886475, "learning_rate": 7.068493150684931e-08, "log_odds_chosen": 2.8282573223114014, "log_odds_ratio": -0.16624218225479126, "logits/chosen": 1.0045238733291626, "logits/rejected": 1.009299397468567, "logps/chosen": -2.0922820568084717, "logps/rejected": -4.774225234985352, "loss": 0.5987, "nll_loss": 0.5821204781532288, "rewards/accuracies": 1.0, "rewards/chosen": -0.20922820270061493, "rewards/margins": 0.268194317817688, "rewards/rejected": -0.4774225354194641, "step": 6787 }, { "epoch": 18.584531143052704, "grad_norm": 5.19211483001709, "learning_rate": 7.054794520547945e-08, "log_odds_chosen": 2.6670196056365967, "log_odds_ratio": -0.16543704271316528, "logits/chosen": 0.9341732263565063, "logits/rejected": 0.8709347248077393, "logps/chosen": -2.032064437866211, "logps/rejected": -4.533045291900635, "loss": 0.6396, "nll_loss": 0.6230980157852173, "rewards/accuracies": 1.0, "rewards/chosen": -0.20320641994476318, "rewards/margins": 0.2500980794429779, "rewards/rejected": -0.4533045291900635, "step": 6788 }, { "epoch": 18.587268993839835, "grad_norm": 5.753561019897461, "learning_rate": 7.041095890410959e-08, "log_odds_chosen": 2.7078473567962646, "log_odds_ratio": -0.1968250423669815, "logits/chosen": 1.0155112743377686, "logits/rejected": 1.0342825651168823, "logps/chosen": -1.5642238855361938, "logps/rejected": -4.079191207885742, "loss": 0.5516, "nll_loss": 0.5319148898124695, "rewards/accuracies": 1.0, "rewards/chosen": -0.15642239153385162, "rewards/margins": 0.2514967620372772, "rewards/rejected": -0.40791916847229004, "step": 6789 }, { "epoch": 18.590006844626966, "grad_norm": 5.7060394287109375, "learning_rate": 7.027397260273971e-08, "log_odds_chosen": 2.666822910308838, "log_odds_ratio": -0.2272007018327713, "logits/chosen": 0.8148388862609863, "logits/rejected": 0.7800459861755371, "logps/chosen": -1.7448124885559082, "logps/rejected": -4.2451043128967285, "loss": 0.6893, "nll_loss": 0.6665685176849365, "rewards/accuracies": 0.875, "rewards/chosen": -0.17448124289512634, "rewards/margins": 0.25002914667129517, "rewards/rejected": -0.4245104193687439, "step": 6790 }, { "epoch": 18.5927446954141, "grad_norm": 5.003942966461182, "learning_rate": 7.013698630136985e-08, "log_odds_chosen": 1.7151432037353516, "log_odds_ratio": -0.3196420967578888, "logits/chosen": 0.8625501394271851, "logits/rejected": 0.8297438025474548, "logps/chosen": -1.688340663909912, "logps/rejected": -3.258263349533081, "loss": 0.5285, "nll_loss": 0.49649348855018616, "rewards/accuracies": 0.875, "rewards/chosen": -0.16883407533168793, "rewards/margins": 0.15699227154254913, "rewards/rejected": -0.32582634687423706, "step": 6791 }, { "epoch": 18.595482546201232, "grad_norm": 5.427639484405518, "learning_rate": 7e-08, "log_odds_chosen": 2.7612733840942383, "log_odds_ratio": -0.36338019371032715, "logits/chosen": 0.8440770506858826, "logits/rejected": 0.7857714891433716, "logps/chosen": -2.0416853427886963, "logps/rejected": -4.7180891036987305, "loss": 0.6669, "nll_loss": 0.6305270791053772, "rewards/accuracies": 0.75, "rewards/chosen": -0.20416854321956635, "rewards/margins": 0.2676404118537903, "rewards/rejected": -0.47180894017219543, "step": 6792 }, { "epoch": 18.598220396988363, "grad_norm": 7.460017681121826, "learning_rate": 6.986301369863014e-08, "log_odds_chosen": 2.7567524909973145, "log_odds_ratio": -0.2630270719528198, "logits/chosen": 1.036716103553772, "logits/rejected": 1.1234371662139893, "logps/chosen": -3.2571873664855957, "logps/rejected": -5.949155330657959, "loss": 0.7022, "nll_loss": 0.6759291887283325, "rewards/accuracies": 1.0, "rewards/chosen": -0.3257187604904175, "rewards/margins": 0.2691967487335205, "rewards/rejected": -0.594915509223938, "step": 6793 }, { "epoch": 18.600958247775495, "grad_norm": 8.390812873840332, "learning_rate": 6.972602739726027e-08, "log_odds_chosen": 2.3503026962280273, "log_odds_ratio": -0.24214749038219452, "logits/chosen": 0.8457472324371338, "logits/rejected": 0.8580895662307739, "logps/chosen": -2.662491798400879, "logps/rejected": -4.92165470123291, "loss": 0.8599, "nll_loss": 0.8356596827507019, "rewards/accuracies": 1.0, "rewards/chosen": -0.2662491798400879, "rewards/margins": 0.22591634094715118, "rewards/rejected": -0.4921655058860779, "step": 6794 }, { "epoch": 18.60369609856263, "grad_norm": 5.320406913757324, "learning_rate": 6.958904109589041e-08, "log_odds_chosen": 2.4188313484191895, "log_odds_ratio": -0.23120510578155518, "logits/chosen": 0.8950555324554443, "logits/rejected": 0.9220926761627197, "logps/chosen": -2.2265877723693848, "logps/rejected": -4.53038215637207, "loss": 0.6853, "nll_loss": 0.6621929407119751, "rewards/accuracies": 0.875, "rewards/chosen": -0.22265878319740295, "rewards/margins": 0.23037943243980408, "rewards/rejected": -0.45303821563720703, "step": 6795 }, { "epoch": 18.60643394934976, "grad_norm": 5.035250186920166, "learning_rate": 6.945205479452055e-08, "log_odds_chosen": 2.4385173320770264, "log_odds_ratio": -0.1669168919324875, "logits/chosen": 0.9051364660263062, "logits/rejected": 0.9271437525749207, "logps/chosen": -2.2518539428710938, "logps/rejected": -4.547327041625977, "loss": 0.6451, "nll_loss": 0.6283769607543945, "rewards/accuracies": 1.0, "rewards/chosen": -0.22518539428710938, "rewards/margins": 0.22954732179641724, "rewards/rejected": -0.4547327160835266, "step": 6796 }, { "epoch": 18.60917180013689, "grad_norm": 5.517700672149658, "learning_rate": 6.931506849315067e-08, "log_odds_chosen": 4.39143180847168, "log_odds_ratio": -0.08263082802295685, "logits/chosen": 0.9393951892852783, "logits/rejected": 1.0070648193359375, "logps/chosen": -2.27174711227417, "logps/rejected": -6.454729080200195, "loss": 0.7797, "nll_loss": 0.7714780569076538, "rewards/accuracies": 1.0, "rewards/chosen": -0.22717469930648804, "rewards/margins": 0.41829824447631836, "rewards/rejected": -0.6454729437828064, "step": 6797 }, { "epoch": 18.611909650924023, "grad_norm": 7.27223539352417, "learning_rate": 6.917808219178081e-08, "log_odds_chosen": 1.1950788497924805, "log_odds_ratio": -0.4365313947200775, "logits/chosen": 0.7192499041557312, "logits/rejected": 0.7009720206260681, "logps/chosen": -2.4658870697021484, "logps/rejected": -3.5599558353424072, "loss": 0.6081, "nll_loss": 0.5644868016242981, "rewards/accuracies": 0.875, "rewards/chosen": -0.24658870697021484, "rewards/margins": 0.10940688848495483, "rewards/rejected": -0.3559955954551697, "step": 6798 }, { "epoch": 18.614647501711158, "grad_norm": 5.974014759063721, "learning_rate": 6.904109589041096e-08, "log_odds_chosen": 2.852499008178711, "log_odds_ratio": -0.20348913967609406, "logits/chosen": 0.7973380088806152, "logits/rejected": 0.8730864524841309, "logps/chosen": -2.1101818084716797, "logps/rejected": -4.803299903869629, "loss": 0.6942, "nll_loss": 0.6738516092300415, "rewards/accuracies": 0.875, "rewards/chosen": -0.2110181748867035, "rewards/margins": 0.2693118453025818, "rewards/rejected": -0.4803299903869629, "step": 6799 }, { "epoch": 18.61738535249829, "grad_norm": 5.155965805053711, "learning_rate": 6.890410958904109e-08, "log_odds_chosen": 2.9502744674682617, "log_odds_ratio": -0.1776391714811325, "logits/chosen": 0.8162800073623657, "logits/rejected": 0.8801120519638062, "logps/chosen": -2.2368743419647217, "logps/rejected": -5.025167942047119, "loss": 0.7429, "nll_loss": 0.7250910997390747, "rewards/accuracies": 1.0, "rewards/chosen": -0.22368744015693665, "rewards/margins": 0.2788293659687042, "rewards/rejected": -0.5025168061256409, "step": 6800 }, { "epoch": 18.62012320328542, "grad_norm": 5.195374011993408, "learning_rate": 6.876712328767123e-08, "log_odds_chosen": 1.9277355670928955, "log_odds_ratio": -0.2570662200450897, "logits/chosen": 0.886888861656189, "logits/rejected": 0.9357402324676514, "logps/chosen": -1.7384730577468872, "logps/rejected": -3.521001100540161, "loss": 0.5544, "nll_loss": 0.5287432670593262, "rewards/accuracies": 0.875, "rewards/chosen": -0.1738472878932953, "rewards/margins": 0.17825280129909515, "rewards/rejected": -0.352100133895874, "step": 6801 }, { "epoch": 18.622861054072555, "grad_norm": 9.088397026062012, "learning_rate": 6.863013698630137e-08, "log_odds_chosen": 1.2615649700164795, "log_odds_ratio": -0.40293338894844055, "logits/chosen": 0.8470370173454285, "logits/rejected": 0.7757019996643066, "logps/chosen": -2.6005067825317383, "logps/rejected": -3.73606014251709, "loss": 0.79, "nll_loss": 0.7497191429138184, "rewards/accuracies": 0.75, "rewards/chosen": -0.2600506842136383, "rewards/margins": 0.11355536431074142, "rewards/rejected": -0.37360602617263794, "step": 6802 }, { "epoch": 18.625598904859686, "grad_norm": 5.786431789398193, "learning_rate": 6.84931506849315e-08, "log_odds_chosen": 4.535215377807617, "log_odds_ratio": -0.042716555297374725, "logits/chosen": 0.7268505096435547, "logits/rejected": 0.7280969023704529, "logps/chosen": -2.409501075744629, "logps/rejected": -6.805400371551514, "loss": 0.8733, "nll_loss": 0.8690752387046814, "rewards/accuracies": 1.0, "rewards/chosen": -0.2409501075744629, "rewards/margins": 0.4395899772644043, "rewards/rejected": -0.6805400848388672, "step": 6803 }, { "epoch": 18.628336755646817, "grad_norm": 7.282278060913086, "learning_rate": 6.835616438356163e-08, "log_odds_chosen": 2.7524986267089844, "log_odds_ratio": -0.2723454236984253, "logits/chosen": 0.728664219379425, "logits/rejected": 0.6971510648727417, "logps/chosen": -2.7688395977020264, "logps/rejected": -5.357834815979004, "loss": 0.7327, "nll_loss": 0.7054641246795654, "rewards/accuracies": 0.875, "rewards/chosen": -0.27688395977020264, "rewards/margins": 0.2588995695114136, "rewards/rejected": -0.5357835292816162, "step": 6804 }, { "epoch": 18.63107460643395, "grad_norm": 5.74306058883667, "learning_rate": 6.821917808219178e-08, "log_odds_chosen": 3.5821774005889893, "log_odds_ratio": -0.20646515488624573, "logits/chosen": 0.7600576877593994, "logits/rejected": 0.7631524801254272, "logps/chosen": -1.9891002178192139, "logps/rejected": -5.449178695678711, "loss": 0.6649, "nll_loss": 0.6442462801933289, "rewards/accuracies": 1.0, "rewards/chosen": -0.19891004264354706, "rewards/margins": 0.3460078239440918, "rewards/rejected": -0.5449178814888, "step": 6805 }, { "epoch": 18.633812457221083, "grad_norm": 7.325642108917236, "learning_rate": 6.808219178082192e-08, "log_odds_chosen": 1.2052927017211914, "log_odds_ratio": -0.4027326703071594, "logits/chosen": 0.6876499056816101, "logits/rejected": 0.6640833616256714, "logps/chosen": -2.347121000289917, "logps/rejected": -3.360250234603882, "loss": 0.5974, "nll_loss": 0.5570880174636841, "rewards/accuracies": 0.75, "rewards/chosen": -0.2347121238708496, "rewards/margins": 0.10131291300058365, "rewards/rejected": -0.33602502942085266, "step": 6806 }, { "epoch": 18.636550308008214, "grad_norm": 6.33980655670166, "learning_rate": 6.794520547945205e-08, "log_odds_chosen": 1.577070713043213, "log_odds_ratio": -0.38114064931869507, "logits/chosen": 0.8088399171829224, "logits/rejected": 0.8089296221733093, "logps/chosen": -2.185795307159424, "logps/rejected": -3.6526050567626953, "loss": 0.7667, "nll_loss": 0.728595495223999, "rewards/accuracies": 0.75, "rewards/chosen": -0.21857953071594238, "rewards/margins": 0.14668099582195282, "rewards/rejected": -0.3652605414390564, "step": 6807 }, { "epoch": 18.639288158795345, "grad_norm": 5.929781436920166, "learning_rate": 6.780821917808219e-08, "log_odds_chosen": 3.0938539505004883, "log_odds_ratio": -0.1958352029323578, "logits/chosen": 0.7118398547172546, "logits/rejected": 0.6526466608047485, "logps/chosen": -1.9163323640823364, "logps/rejected": -4.859874725341797, "loss": 0.587, "nll_loss": 0.5674638748168945, "rewards/accuracies": 1.0, "rewards/chosen": -0.19163325428962708, "rewards/margins": 0.29435423016548157, "rewards/rejected": -0.48598745465278625, "step": 6808 }, { "epoch": 18.642026009582477, "grad_norm": 6.629166126251221, "learning_rate": 6.767123287671233e-08, "log_odds_chosen": 1.9716846942901611, "log_odds_ratio": -0.2878553867340088, "logits/chosen": 0.5796233415603638, "logits/rejected": 0.6200712323188782, "logps/chosen": -2.190382480621338, "logps/rejected": -4.056008815765381, "loss": 0.5896, "nll_loss": 0.5607686042785645, "rewards/accuracies": 0.875, "rewards/chosen": -0.2190382480621338, "rewards/margins": 0.1865626573562622, "rewards/rejected": -0.405600905418396, "step": 6809 }, { "epoch": 18.64476386036961, "grad_norm": 4.927224159240723, "learning_rate": 6.753424657534245e-08, "log_odds_chosen": 0.9370247721672058, "log_odds_ratio": -0.5372875928878784, "logits/chosen": 0.6764464974403381, "logits/rejected": 0.6578108668327332, "logps/chosen": -2.158090591430664, "logps/rejected": -3.023144006729126, "loss": 0.7567, "nll_loss": 0.7029750347137451, "rewards/accuracies": 0.75, "rewards/chosen": -0.21580907702445984, "rewards/margins": 0.08650532364845276, "rewards/rejected": -0.3023144006729126, "step": 6810 }, { "epoch": 18.647501711156742, "grad_norm": 4.611063480377197, "learning_rate": 6.73972602739726e-08, "log_odds_chosen": 2.879509449005127, "log_odds_ratio": -0.2245766818523407, "logits/chosen": 0.8907728791236877, "logits/rejected": 0.9209630489349365, "logps/chosen": -2.0692920684814453, "logps/rejected": -4.836418151855469, "loss": 0.7221, "nll_loss": 0.6996078491210938, "rewards/accuracies": 0.875, "rewards/chosen": -0.20692920684814453, "rewards/margins": 0.2767125964164734, "rewards/rejected": -0.4836418032646179, "step": 6811 }, { "epoch": 18.650239561943874, "grad_norm": 6.864272594451904, "learning_rate": 6.726027397260274e-08, "log_odds_chosen": 2.3563649654388428, "log_odds_ratio": -0.42283880710601807, "logits/chosen": 1.001753330230713, "logits/rejected": 1.0186645984649658, "logps/chosen": -2.1217598915100098, "logps/rejected": -4.352105617523193, "loss": 0.5884, "nll_loss": 0.5460687875747681, "rewards/accuracies": 0.75, "rewards/chosen": -0.21217599511146545, "rewards/margins": 0.2230345457792282, "rewards/rejected": -0.43521052598953247, "step": 6812 }, { "epoch": 18.652977412731005, "grad_norm": 5.413885593414307, "learning_rate": 6.712328767123288e-08, "log_odds_chosen": 2.790513038635254, "log_odds_ratio": -0.17636384069919586, "logits/chosen": 0.9703721404075623, "logits/rejected": 1.003960132598877, "logps/chosen": -2.661783218383789, "logps/rejected": -5.361868858337402, "loss": 0.7245, "nll_loss": 0.7068991661071777, "rewards/accuracies": 0.875, "rewards/chosen": -0.26617830991744995, "rewards/margins": 0.2700085937976837, "rewards/rejected": -0.536186933517456, "step": 6813 }, { "epoch": 18.65571526351814, "grad_norm": 6.251461029052734, "learning_rate": 6.6986301369863e-08, "log_odds_chosen": 1.6626299619674683, "log_odds_ratio": -0.3331429064273834, "logits/chosen": 0.7138586640357971, "logits/rejected": 0.8456737399101257, "logps/chosen": -2.6124231815338135, "logps/rejected": -4.237307071685791, "loss": 0.8356, "nll_loss": 0.8022956252098083, "rewards/accuracies": 0.875, "rewards/chosen": -0.2612423300743103, "rewards/margins": 0.1624884009361267, "rewards/rejected": -0.4237307012081146, "step": 6814 }, { "epoch": 18.65845311430527, "grad_norm": 5.424983024597168, "learning_rate": 6.684931506849315e-08, "log_odds_chosen": 2.3245277404785156, "log_odds_ratio": -0.1677023470401764, "logits/chosen": 0.8333562612533569, "logits/rejected": 0.7659820318222046, "logps/chosen": -1.7733033895492554, "logps/rejected": -3.85781192779541, "loss": 0.6137, "nll_loss": 0.5968841910362244, "rewards/accuracies": 1.0, "rewards/chosen": -0.17733034491539001, "rewards/margins": 0.20845085382461548, "rewards/rejected": -0.3857812285423279, "step": 6815 }, { "epoch": 18.661190965092402, "grad_norm": 6.0676374435424805, "learning_rate": 6.671232876712328e-08, "log_odds_chosen": 4.126145839691162, "log_odds_ratio": -0.18267741799354553, "logits/chosen": 0.9964042901992798, "logits/rejected": 1.0751389265060425, "logps/chosen": -2.817293643951416, "logps/rejected": -6.873344421386719, "loss": 0.7176, "nll_loss": 0.6992964148521423, "rewards/accuracies": 0.875, "rewards/chosen": -0.2817293405532837, "rewards/margins": 0.40560510754585266, "rewards/rejected": -0.687334418296814, "step": 6816 }, { "epoch": 18.663928815879533, "grad_norm": 5.633976936340332, "learning_rate": 6.657534246575342e-08, "log_odds_chosen": 2.3864128589630127, "log_odds_ratio": -0.2499457448720932, "logits/chosen": 1.0842374563217163, "logits/rejected": 1.1013315916061401, "logps/chosen": -2.8047802448272705, "logps/rejected": -5.006608963012695, "loss": 0.6291, "nll_loss": 0.604145348072052, "rewards/accuracies": 0.875, "rewards/chosen": -0.28047803044319153, "rewards/margins": 0.2201828807592392, "rewards/rejected": -0.5006608963012695, "step": 6817 }, { "epoch": 18.666666666666668, "grad_norm": 5.9519476890563965, "learning_rate": 6.643835616438356e-08, "log_odds_chosen": 3.238063097000122, "log_odds_ratio": -0.22670212388038635, "logits/chosen": 0.9288874864578247, "logits/rejected": 0.9604482650756836, "logps/chosen": -2.3964107036590576, "logps/rejected": -5.522537708282471, "loss": 0.6111, "nll_loss": 0.5884488821029663, "rewards/accuracies": 0.875, "rewards/chosen": -0.23964108526706696, "rewards/margins": 0.3126126527786255, "rewards/rejected": -0.552253782749176, "step": 6818 }, { "epoch": 18.6694045174538, "grad_norm": 5.785163879394531, "learning_rate": 6.63013698630137e-08, "log_odds_chosen": 2.4860734939575195, "log_odds_ratio": -0.32306256890296936, "logits/chosen": 0.9472464919090271, "logits/rejected": 0.9402492046356201, "logps/chosen": -2.6839170455932617, "logps/rejected": -5.0731682777404785, "loss": 0.6969, "nll_loss": 0.6645795702934265, "rewards/accuracies": 0.875, "rewards/chosen": -0.2683917284011841, "rewards/margins": 0.23892508447170258, "rewards/rejected": -0.5073168277740479, "step": 6819 }, { "epoch": 18.67214236824093, "grad_norm": 6.00881814956665, "learning_rate": 6.616438356164384e-08, "log_odds_chosen": 1.9444923400878906, "log_odds_ratio": -0.20075547695159912, "logits/chosen": 1.0483310222625732, "logits/rejected": 1.0265522003173828, "logps/chosen": -2.0261380672454834, "logps/rejected": -3.843130111694336, "loss": 0.607, "nll_loss": 0.5868869423866272, "rewards/accuracies": 1.0, "rewards/chosen": -0.20261380076408386, "rewards/margins": 0.18169918656349182, "rewards/rejected": -0.3843129873275757, "step": 6820 }, { "epoch": 18.67488021902806, "grad_norm": 5.258277416229248, "learning_rate": 6.602739726027397e-08, "log_odds_chosen": 1.0378553867340088, "log_odds_ratio": -0.4005008935928345, "logits/chosen": 0.6907577514648438, "logits/rejected": 0.7132340669631958, "logps/chosen": -1.6648482084274292, "logps/rejected": -2.5853915214538574, "loss": 0.592, "nll_loss": 0.5519359111785889, "rewards/accuracies": 0.875, "rewards/chosen": -0.16648483276367188, "rewards/margins": 0.0920543223619461, "rewards/rejected": -0.2585391402244568, "step": 6821 }, { "epoch": 18.677618069815196, "grad_norm": 5.969480514526367, "learning_rate": 6.58904109589041e-08, "log_odds_chosen": 2.117525815963745, "log_odds_ratio": -0.22478891909122467, "logits/chosen": 1.0489311218261719, "logits/rejected": 1.0855414867401123, "logps/chosen": -2.593052625656128, "logps/rejected": -4.629364013671875, "loss": 0.6351, "nll_loss": 0.6126211285591125, "rewards/accuracies": 1.0, "rewards/chosen": -0.25930526852607727, "rewards/margins": 0.203631192445755, "rewards/rejected": -0.4629364609718323, "step": 6822 }, { "epoch": 18.680355920602327, "grad_norm": 6.416171550750732, "learning_rate": 6.575342465753424e-08, "log_odds_chosen": 3.0705442428588867, "log_odds_ratio": -0.2300862818956375, "logits/chosen": 0.9036377668380737, "logits/rejected": 0.8485932350158691, "logps/chosen": -2.0521798133850098, "logps/rejected": -4.977864742279053, "loss": 0.6084, "nll_loss": 0.5854138731956482, "rewards/accuracies": 1.0, "rewards/chosen": -0.20521798729896545, "rewards/margins": 0.29256850481033325, "rewards/rejected": -0.4977864623069763, "step": 6823 }, { "epoch": 18.68309377138946, "grad_norm": 7.383396148681641, "learning_rate": 6.561643835616438e-08, "log_odds_chosen": 3.863830804824829, "log_odds_ratio": -0.05270916223526001, "logits/chosen": 1.1385583877563477, "logits/rejected": 1.233844518661499, "logps/chosen": -2.415292263031006, "logps/rejected": -6.168923854827881, "loss": 0.688, "nll_loss": 0.6826930046081543, "rewards/accuracies": 1.0, "rewards/chosen": -0.24152922630310059, "rewards/margins": 0.3753631114959717, "rewards/rejected": -0.616892397403717, "step": 6824 }, { "epoch": 18.68583162217659, "grad_norm": 5.525391578674316, "learning_rate": 6.547945205479452e-08, "log_odds_chosen": 1.666689395904541, "log_odds_ratio": -0.47020307183265686, "logits/chosen": 0.8790977001190186, "logits/rejected": 0.8672149181365967, "logps/chosen": -1.9549305438995361, "logps/rejected": -3.5592122077941895, "loss": 0.6117, "nll_loss": 0.5646882653236389, "rewards/accuracies": 0.75, "rewards/chosen": -0.19549304246902466, "rewards/margins": 0.16042818129062653, "rewards/rejected": -0.35592120885849, "step": 6825 }, { "epoch": 18.688569472963724, "grad_norm": 4.922483444213867, "learning_rate": 6.534246575342466e-08, "log_odds_chosen": 2.839876651763916, "log_odds_ratio": -0.14269588887691498, "logits/chosen": 0.6714425086975098, "logits/rejected": 0.7072629332542419, "logps/chosen": -2.2834250926971436, "logps/rejected": -5.013155937194824, "loss": 0.5961, "nll_loss": 0.581834077835083, "rewards/accuracies": 1.0, "rewards/chosen": -0.22834251821041107, "rewards/margins": 0.27297309041023254, "rewards/rejected": -0.5013156533241272, "step": 6826 }, { "epoch": 18.691307323750856, "grad_norm": 13.546442031860352, "learning_rate": 6.520547945205478e-08, "log_odds_chosen": 0.9081777334213257, "log_odds_ratio": -1.0865204334259033, "logits/chosen": 0.8339664936065674, "logits/rejected": 0.813927173614502, "logps/chosen": -3.4196081161499023, "logps/rejected": -4.174625396728516, "loss": 0.6959, "nll_loss": 0.5872725248336792, "rewards/accuracies": 0.75, "rewards/chosen": -0.3419608175754547, "rewards/margins": 0.0755016952753067, "rewards/rejected": -0.4174625277519226, "step": 6827 }, { "epoch": 18.694045174537987, "grad_norm": 5.490377902984619, "learning_rate": 6.506849315068492e-08, "log_odds_chosen": 2.794969320297241, "log_odds_ratio": -0.17725400626659393, "logits/chosen": 0.9443804025650024, "logits/rejected": 0.9007406234741211, "logps/chosen": -2.5995543003082275, "logps/rejected": -5.326692581176758, "loss": 0.722, "nll_loss": 0.7042919993400574, "rewards/accuracies": 0.875, "rewards/chosen": -0.2599554657936096, "rewards/margins": 0.2727137804031372, "rewards/rejected": -0.532669186592102, "step": 6828 }, { "epoch": 18.69678302532512, "grad_norm": 4.591362953186035, "learning_rate": 6.493150684931506e-08, "log_odds_chosen": 3.232107639312744, "log_odds_ratio": -0.1781664937734604, "logits/chosen": 0.8515761494636536, "logits/rejected": 0.8305668830871582, "logps/chosen": -2.1735379695892334, "logps/rejected": -5.275596618652344, "loss": 0.6516, "nll_loss": 0.6337644457817078, "rewards/accuracies": 1.0, "rewards/chosen": -0.21735380589962006, "rewards/margins": 0.31020587682724, "rewards/rejected": -0.5275596976280212, "step": 6829 }, { "epoch": 18.699520876112253, "grad_norm": 5.585035800933838, "learning_rate": 6.479452054794521e-08, "log_odds_chosen": 2.403989315032959, "log_odds_ratio": -0.20185992121696472, "logits/chosen": 0.5526005029678345, "logits/rejected": 0.6151773929595947, "logps/chosen": -2.2287003993988037, "logps/rejected": -4.514762878417969, "loss": 0.6592, "nll_loss": 0.6390131711959839, "rewards/accuracies": 1.0, "rewards/chosen": -0.22287003695964813, "rewards/margins": 0.22860628366470337, "rewards/rejected": -0.4514762759208679, "step": 6830 }, { "epoch": 18.702258726899384, "grad_norm": 6.363738059997559, "learning_rate": 6.465753424657534e-08, "log_odds_chosen": 2.046398401260376, "log_odds_ratio": -0.24730046093463898, "logits/chosen": 0.8379478454589844, "logits/rejected": 0.8664092421531677, "logps/chosen": -2.578763246536255, "logps/rejected": -4.508862018585205, "loss": 0.5893, "nll_loss": 0.5645254850387573, "rewards/accuracies": 0.875, "rewards/chosen": -0.25787630677223206, "rewards/margins": 0.1930098831653595, "rewards/rejected": -0.45088621973991394, "step": 6831 }, { "epoch": 18.704996577686515, "grad_norm": 5.279110431671143, "learning_rate": 6.452054794520548e-08, "log_odds_chosen": 2.9129703044891357, "log_odds_ratio": -0.1658993810415268, "logits/chosen": 0.7494176030158997, "logits/rejected": 0.7126469612121582, "logps/chosen": -1.5894711017608643, "logps/rejected": -4.3009443283081055, "loss": 0.5689, "nll_loss": 0.5522963404655457, "rewards/accuracies": 1.0, "rewards/chosen": -0.15894711017608643, "rewards/margins": 0.27114731073379517, "rewards/rejected": -0.4300944209098816, "step": 6832 }, { "epoch": 18.70773442847365, "grad_norm": 4.849647045135498, "learning_rate": 6.438356164383562e-08, "log_odds_chosen": 3.057713270187378, "log_odds_ratio": -0.1827443689107895, "logits/chosen": 0.6744446754455566, "logits/rejected": 0.7145818471908569, "logps/chosen": -2.5138871669769287, "logps/rejected": -5.474620819091797, "loss": 0.677, "nll_loss": 0.6587468981742859, "rewards/accuracies": 1.0, "rewards/chosen": -0.2513887584209442, "rewards/margins": 0.2960733473300934, "rewards/rejected": -0.5474621057510376, "step": 6833 }, { "epoch": 18.71047227926078, "grad_norm": 4.969546794891357, "learning_rate": 6.424657534246574e-08, "log_odds_chosen": 1.483588457107544, "log_odds_ratio": -0.28318366408348083, "logits/chosen": 0.7868566513061523, "logits/rejected": 0.8239008188247681, "logps/chosen": -1.72834312915802, "logps/rejected": -3.094078540802002, "loss": 0.5521, "nll_loss": 0.5237684845924377, "rewards/accuracies": 1.0, "rewards/chosen": -0.17283430695533752, "rewards/margins": 0.13657353818416595, "rewards/rejected": -0.3094078600406647, "step": 6834 }, { "epoch": 18.713210130047912, "grad_norm": 4.845910549163818, "learning_rate": 6.410958904109588e-08, "log_odds_chosen": 2.278264284133911, "log_odds_ratio": -0.2268088012933731, "logits/chosen": 0.9730477333068848, "logits/rejected": 0.9934091567993164, "logps/chosen": -2.2310028076171875, "logps/rejected": -4.3866705894470215, "loss": 0.5923, "nll_loss": 0.5696082711219788, "rewards/accuracies": 0.875, "rewards/chosen": -0.22310027480125427, "rewards/margins": 0.21556681394577026, "rewards/rejected": -0.43866708874702454, "step": 6835 }, { "epoch": 18.715947980835043, "grad_norm": 5.422255039215088, "learning_rate": 6.397260273972603e-08, "log_odds_chosen": 1.65480375289917, "log_odds_ratio": -0.29131755232810974, "logits/chosen": 0.7839549779891968, "logits/rejected": 0.7584186792373657, "logps/chosen": -1.9438714981079102, "logps/rejected": -3.481189250946045, "loss": 0.643, "nll_loss": 0.6139015555381775, "rewards/accuracies": 0.875, "rewards/chosen": -0.19438715279102325, "rewards/margins": 0.15373177826404572, "rewards/rejected": -0.34811893105506897, "step": 6836 }, { "epoch": 18.718685831622178, "grad_norm": 7.848330974578857, "learning_rate": 6.383561643835616e-08, "log_odds_chosen": 2.033111572265625, "log_odds_ratio": -0.22291132807731628, "logits/chosen": 0.6698859930038452, "logits/rejected": 0.6790215969085693, "logps/chosen": -2.019686460494995, "logps/rejected": -3.919955253601074, "loss": 0.5999, "nll_loss": 0.5775748491287231, "rewards/accuracies": 1.0, "rewards/chosen": -0.20196864008903503, "rewards/margins": 0.1900268793106079, "rewards/rejected": -0.39199554920196533, "step": 6837 }, { "epoch": 18.72142368240931, "grad_norm": 5.731382846832275, "learning_rate": 6.36986301369863e-08, "log_odds_chosen": 1.6707100868225098, "log_odds_ratio": -0.2971906363964081, "logits/chosen": 0.8537124395370483, "logits/rejected": 0.8938798904418945, "logps/chosen": -2.310235023498535, "logps/rejected": -3.8457493782043457, "loss": 0.6963, "nll_loss": 0.6665701270103455, "rewards/accuracies": 0.875, "rewards/chosen": -0.23102350533008575, "rewards/margins": 0.15355144441127777, "rewards/rejected": -0.3845749795436859, "step": 6838 }, { "epoch": 18.72416153319644, "grad_norm": 5.644087791442871, "learning_rate": 6.356164383561644e-08, "log_odds_chosen": 1.4864208698272705, "log_odds_ratio": -0.26945263147354126, "logits/chosen": 0.8584496974945068, "logits/rejected": 0.8410983085632324, "logps/chosen": -2.3329503536224365, "logps/rejected": -3.7322216033935547, "loss": 0.6652, "nll_loss": 0.6382828950881958, "rewards/accuracies": 1.0, "rewards/chosen": -0.23329505324363708, "rewards/margins": 0.13992710411548615, "rewards/rejected": -0.3732221722602844, "step": 6839 }, { "epoch": 18.72689938398357, "grad_norm": 6.362524509429932, "learning_rate": 6.342465753424658e-08, "log_odds_chosen": 1.8299710750579834, "log_odds_ratio": -0.3613124489784241, "logits/chosen": 0.8002179861068726, "logits/rejected": 0.874276876449585, "logps/chosen": -2.2112984657287598, "logps/rejected": -3.932272434234619, "loss": 0.7719, "nll_loss": 0.7357191443443298, "rewards/accuracies": 0.875, "rewards/chosen": -0.2211298644542694, "rewards/margins": 0.17209741473197937, "rewards/rejected": -0.3932272791862488, "step": 6840 }, { "epoch": 18.729637234770706, "grad_norm": 6.551129341125488, "learning_rate": 6.32876712328767e-08, "log_odds_chosen": 2.841660976409912, "log_odds_ratio": -0.1298903077840805, "logits/chosen": 0.9253287315368652, "logits/rejected": 0.9496561884880066, "logps/chosen": -2.4240517616271973, "logps/rejected": -5.154382705688477, "loss": 0.5851, "nll_loss": 0.5721526145935059, "rewards/accuracies": 1.0, "rewards/chosen": -0.24240520596504211, "rewards/margins": 0.273033082485199, "rewards/rejected": -0.5154382586479187, "step": 6841 }, { "epoch": 18.732375085557837, "grad_norm": 5.595092296600342, "learning_rate": 6.315068493150684e-08, "log_odds_chosen": 3.6283788681030273, "log_odds_ratio": -0.18558251857757568, "logits/chosen": 0.9165291786193848, "logits/rejected": 0.928602933883667, "logps/chosen": -3.27774977684021, "logps/rejected": -6.837104797363281, "loss": 0.6752, "nll_loss": 0.6566481590270996, "rewards/accuracies": 0.875, "rewards/chosen": -0.3277750015258789, "rewards/margins": 0.35593554377555847, "rewards/rejected": -0.6837105751037598, "step": 6842 }, { "epoch": 18.73511293634497, "grad_norm": 5.345863342285156, "learning_rate": 6.301369863013699e-08, "log_odds_chosen": 2.729109287261963, "log_odds_ratio": -0.1342294067144394, "logits/chosen": 1.016266107559204, "logits/rejected": 1.0403406620025635, "logps/chosen": -2.6722655296325684, "logps/rejected": -5.310403823852539, "loss": 0.7207, "nll_loss": 0.7072443962097168, "rewards/accuracies": 1.0, "rewards/chosen": -0.26722657680511475, "rewards/margins": 0.2638137936592102, "rewards/rejected": -0.531040370464325, "step": 6843 }, { "epoch": 18.7378507871321, "grad_norm": 5.151991844177246, "learning_rate": 6.287671232876712e-08, "log_odds_chosen": 1.6205592155456543, "log_odds_ratio": -0.3386717438697815, "logits/chosen": 0.8832122087478638, "logits/rejected": 0.898389995098114, "logps/chosen": -1.6646876335144043, "logps/rejected": -3.099614381790161, "loss": 0.601, "nll_loss": 0.5671532154083252, "rewards/accuracies": 0.875, "rewards/chosen": -0.1664687544107437, "rewards/margins": 0.1434926986694336, "rewards/rejected": -0.3099614381790161, "step": 6844 }, { "epoch": 18.740588637919235, "grad_norm": 6.319535732269287, "learning_rate": 6.273972602739726e-08, "log_odds_chosen": 2.180162191390991, "log_odds_ratio": -0.26508867740631104, "logits/chosen": 0.7363930940628052, "logits/rejected": 0.6401986479759216, "logps/chosen": -1.752323865890503, "logps/rejected": -3.809788227081299, "loss": 0.5927, "nll_loss": 0.5661723017692566, "rewards/accuracies": 1.0, "rewards/chosen": -0.17523238062858582, "rewards/margins": 0.20574644207954407, "rewards/rejected": -0.3809788227081299, "step": 6845 }, { "epoch": 18.743326488706366, "grad_norm": 6.111156463623047, "learning_rate": 6.26027397260274e-08, "log_odds_chosen": 0.6251408457756042, "log_odds_ratio": -0.608039915561676, "logits/chosen": 0.8133411407470703, "logits/rejected": 0.8725719451904297, "logps/chosen": -2.108052968978882, "logps/rejected": -2.634223699569702, "loss": 0.5883, "nll_loss": 0.5275242924690247, "rewards/accuracies": 0.875, "rewards/chosen": -0.21080531179904938, "rewards/margins": 0.052617065608501434, "rewards/rejected": -0.2634223699569702, "step": 6846 }, { "epoch": 18.746064339493497, "grad_norm": 5.2363433837890625, "learning_rate": 6.246575342465754e-08, "log_odds_chosen": 2.6195216178894043, "log_odds_ratio": -0.20524314045906067, "logits/chosen": 0.9775918126106262, "logits/rejected": 0.9578791856765747, "logps/chosen": -1.6365060806274414, "logps/rejected": -3.923776865005493, "loss": 0.5684, "nll_loss": 0.5478705167770386, "rewards/accuracies": 0.875, "rewards/chosen": -0.16365061700344086, "rewards/margins": 0.22872710227966309, "rewards/rejected": -0.39237770438194275, "step": 6847 }, { "epoch": 18.748802190280628, "grad_norm": 5.389712333679199, "learning_rate": 6.232876712328767e-08, "log_odds_chosen": 1.8982489109039307, "log_odds_ratio": -0.2921881377696991, "logits/chosen": 1.0447970628738403, "logits/rejected": 1.0556108951568604, "logps/chosen": -2.2783455848693848, "logps/rejected": -4.095287799835205, "loss": 0.5786, "nll_loss": 0.5493356585502625, "rewards/accuracies": 0.875, "rewards/chosen": -0.227834552526474, "rewards/margins": 0.18169423937797546, "rewards/rejected": -0.40952879190444946, "step": 6848 }, { "epoch": 18.751540041067763, "grad_norm": 5.4201436042785645, "learning_rate": 6.21917808219178e-08, "log_odds_chosen": 2.1209030151367188, "log_odds_ratio": -0.17062868177890778, "logits/chosen": 1.0876901149749756, "logits/rejected": 1.130336880683899, "logps/chosen": -1.6093056201934814, "logps/rejected": -3.528308391571045, "loss": 0.5213, "nll_loss": 0.5042412877082825, "rewards/accuracies": 1.0, "rewards/chosen": -0.1609305739402771, "rewards/margins": 0.1919003129005432, "rewards/rejected": -0.3528308868408203, "step": 6849 }, { "epoch": 18.754277891854894, "grad_norm": 6.539206504821777, "learning_rate": 6.205479452054795e-08, "log_odds_chosen": 1.542598009109497, "log_odds_ratio": -0.31578218936920166, "logits/chosen": 0.8462724685668945, "logits/rejected": 0.9721187353134155, "logps/chosen": -2.918611526489258, "logps/rejected": -4.404272556304932, "loss": 0.8053, "nll_loss": 0.7737168073654175, "rewards/accuracies": 1.0, "rewards/chosen": -0.2918611466884613, "rewards/margins": 0.1485661119222641, "rewards/rejected": -0.4404272437095642, "step": 6850 }, { "epoch": 18.757015742642025, "grad_norm": 4.874863624572754, "learning_rate": 6.191780821917808e-08, "log_odds_chosen": 2.7907938957214355, "log_odds_ratio": -0.22798292338848114, "logits/chosen": 0.7294139266014099, "logits/rejected": 0.836449146270752, "logps/chosen": -1.840482234954834, "logps/rejected": -4.457077503204346, "loss": 0.5937, "nll_loss": 0.5708966255187988, "rewards/accuracies": 1.0, "rewards/chosen": -0.18404823541641235, "rewards/margins": 0.26165953278541565, "rewards/rejected": -0.4457077383995056, "step": 6851 }, { "epoch": 18.75975359342916, "grad_norm": 5.478113174438477, "learning_rate": 6.178082191780822e-08, "log_odds_chosen": 2.0083885192871094, "log_odds_ratio": -0.33150893449783325, "logits/chosen": 0.8706220388412476, "logits/rejected": 0.912796139717102, "logps/chosen": -2.275771141052246, "logps/rejected": -4.206146717071533, "loss": 0.6698, "nll_loss": 0.6366820335388184, "rewards/accuracies": 0.875, "rewards/chosen": -0.2275771200656891, "rewards/margins": 0.19303753972053528, "rewards/rejected": -0.42061468958854675, "step": 6852 }, { "epoch": 18.76249144421629, "grad_norm": 5.324517726898193, "learning_rate": 6.164383561643836e-08, "log_odds_chosen": 1.8217387199401855, "log_odds_ratio": -0.2754698395729065, "logits/chosen": 0.7898925542831421, "logits/rejected": 0.8794133067131042, "logps/chosen": -2.1552186012268066, "logps/rejected": -3.855700969696045, "loss": 0.7589, "nll_loss": 0.731376588344574, "rewards/accuracies": 1.0, "rewards/chosen": -0.21552184224128723, "rewards/margins": 0.17004825174808502, "rewards/rejected": -0.38557010889053345, "step": 6853 }, { "epoch": 18.765229295003422, "grad_norm": 5.112183570861816, "learning_rate": 6.15068493150685e-08, "log_odds_chosen": 1.6771401166915894, "log_odds_ratio": -0.26087599992752075, "logits/chosen": 0.9591479897499084, "logits/rejected": 1.0201616287231445, "logps/chosen": -1.6491913795471191, "logps/rejected": -3.131960391998291, "loss": 0.518, "nll_loss": 0.49191877245903015, "rewards/accuracies": 1.0, "rewards/chosen": -0.16491913795471191, "rewards/margins": 0.1482768952846527, "rewards/rejected": -0.3131960332393646, "step": 6854 }, { "epoch": 18.767967145790553, "grad_norm": 9.76388931274414, "learning_rate": 6.136986301369863e-08, "log_odds_chosen": 2.0274243354797363, "log_odds_ratio": -0.5080066919326782, "logits/chosen": 0.977990984916687, "logits/rejected": 1.048250436782837, "logps/chosen": -2.612351417541504, "logps/rejected": -4.555269241333008, "loss": 0.6706, "nll_loss": 0.6198148727416992, "rewards/accuracies": 0.75, "rewards/chosen": -0.26123517751693726, "rewards/margins": 0.19429177045822144, "rewards/rejected": -0.4555269479751587, "step": 6855 }, { "epoch": 18.770704996577688, "grad_norm": 5.855244159698486, "learning_rate": 6.123287671232876e-08, "log_odds_chosen": 0.8064206838607788, "log_odds_ratio": -0.3981669545173645, "logits/chosen": 0.8354663848876953, "logits/rejected": 0.8142980337142944, "logps/chosen": -1.911097764968872, "logps/rejected": -2.6155426502227783, "loss": 0.579, "nll_loss": 0.5391689538955688, "rewards/accuracies": 0.875, "rewards/chosen": -0.1911097764968872, "rewards/margins": 0.07044447213411331, "rewards/rejected": -0.2615542411804199, "step": 6856 }, { "epoch": 18.77344284736482, "grad_norm": 6.23704719543457, "learning_rate": 6.10958904109589e-08, "log_odds_chosen": 2.5725913047790527, "log_odds_ratio": -0.2797622084617615, "logits/chosen": 0.9715478420257568, "logits/rejected": 1.028169870376587, "logps/chosen": -2.4697329998016357, "logps/rejected": -4.971591472625732, "loss": 0.7368, "nll_loss": 0.708781898021698, "rewards/accuracies": 1.0, "rewards/chosen": -0.24697330594062805, "rewards/margins": 0.25018584728240967, "rewards/rejected": -0.49715912342071533, "step": 6857 }, { "epoch": 18.77618069815195, "grad_norm": 4.869017601013184, "learning_rate": 6.095890410958904e-08, "log_odds_chosen": 2.1726176738739014, "log_odds_ratio": -0.20442809164524078, "logits/chosen": 0.8942879438400269, "logits/rejected": 0.9033382534980774, "logps/chosen": -2.1158900260925293, "logps/rejected": -4.183088779449463, "loss": 0.6558, "nll_loss": 0.6353917717933655, "rewards/accuracies": 1.0, "rewards/chosen": -0.2115890085697174, "rewards/margins": 0.20671986043453217, "rewards/rejected": -0.41830888390541077, "step": 6858 }, { "epoch": 18.77891854893908, "grad_norm": 13.837848663330078, "learning_rate": 6.082191780821917e-08, "log_odds_chosen": 0.9388275742530823, "log_odds_ratio": -0.7524381279945374, "logits/chosen": 0.9913859367370605, "logits/rejected": 0.996418833732605, "logps/chosen": -3.402752637863159, "logps/rejected": -4.303689002990723, "loss": 0.7464, "nll_loss": 0.6711846590042114, "rewards/accuracies": 0.75, "rewards/chosen": -0.34027525782585144, "rewards/margins": 0.09009367972612381, "rewards/rejected": -0.43036895990371704, "step": 6859 }, { "epoch": 18.781656399726216, "grad_norm": 6.94567346572876, "learning_rate": 6.068493150684931e-08, "log_odds_chosen": 2.1514744758605957, "log_odds_ratio": -0.28064367175102234, "logits/chosen": 0.9443405270576477, "logits/rejected": 0.9569723010063171, "logps/chosen": -1.8085873126983643, "logps/rejected": -3.838479995727539, "loss": 0.6086, "nll_loss": 0.5804874897003174, "rewards/accuracies": 0.875, "rewards/chosen": -0.18085873126983643, "rewards/margins": 0.20298923552036285, "rewards/rejected": -0.3838479816913605, "step": 6860 }, { "epoch": 18.784394250513348, "grad_norm": 5.542767524719238, "learning_rate": 6.054794520547945e-08, "log_odds_chosen": 1.8773226737976074, "log_odds_ratio": -0.29466497898101807, "logits/chosen": 0.914225697517395, "logits/rejected": 0.9210264086723328, "logps/chosen": -2.5371992588043213, "logps/rejected": -4.341894149780273, "loss": 0.671, "nll_loss": 0.6415822505950928, "rewards/accuracies": 0.875, "rewards/chosen": -0.25371992588043213, "rewards/margins": 0.18046948313713074, "rewards/rejected": -0.43418940901756287, "step": 6861 }, { "epoch": 18.78713210130048, "grad_norm": 7.23264741897583, "learning_rate": 6.041095890410958e-08, "log_odds_chosen": 1.6021252870559692, "log_odds_ratio": -0.288760781288147, "logits/chosen": 0.7178409099578857, "logits/rejected": 0.6822808980941772, "logps/chosen": -2.0102171897888184, "logps/rejected": -3.508589267730713, "loss": 0.7329, "nll_loss": 0.7039933800697327, "rewards/accuracies": 1.0, "rewards/chosen": -0.2010217308998108, "rewards/margins": 0.1498371809720993, "rewards/rejected": -0.3508589267730713, "step": 6862 }, { "epoch": 18.78986995208761, "grad_norm": 5.9473958015441895, "learning_rate": 6.027397260273973e-08, "log_odds_chosen": 1.7746543884277344, "log_odds_ratio": -0.24681119620800018, "logits/chosen": 0.8641623854637146, "logits/rejected": 0.9138798713684082, "logps/chosen": -1.8161804676055908, "logps/rejected": -3.4119515419006348, "loss": 0.7399, "nll_loss": 0.715197741985321, "rewards/accuracies": 1.0, "rewards/chosen": -0.18161804974079132, "rewards/margins": 0.1595771163702011, "rewards/rejected": -0.34119516611099243, "step": 6863 }, { "epoch": 18.792607802874745, "grad_norm": 4.96299934387207, "learning_rate": 6.013698630136986e-08, "log_odds_chosen": 1.845601201057434, "log_odds_ratio": -0.32684165239334106, "logits/chosen": 1.1283243894577026, "logits/rejected": 1.1097500324249268, "logps/chosen": -1.919055700302124, "logps/rejected": -3.6162524223327637, "loss": 0.5412, "nll_loss": 0.5085485577583313, "rewards/accuracies": 1.0, "rewards/chosen": -0.19190555810928345, "rewards/margins": 0.16971969604492188, "rewards/rejected": -0.3616252541542053, "step": 6864 }, { "epoch": 18.795345653661876, "grad_norm": 5.620733261108398, "learning_rate": 6e-08, "log_odds_chosen": 2.569033145904541, "log_odds_ratio": -0.20772430300712585, "logits/chosen": 0.6463249325752258, "logits/rejected": 0.68155837059021, "logps/chosen": -2.147608518600464, "logps/rejected": -4.60269832611084, "loss": 0.6686, "nll_loss": 0.6478298902511597, "rewards/accuracies": 1.0, "rewards/chosen": -0.21476085484027863, "rewards/margins": 0.24550898373126984, "rewards/rejected": -0.46026986837387085, "step": 6865 }, { "epoch": 18.798083504449007, "grad_norm": 7.255871772766113, "learning_rate": 5.986301369863013e-08, "log_odds_chosen": 1.0308294296264648, "log_odds_ratio": -0.3954745829105377, "logits/chosen": 0.6521709561347961, "logits/rejected": 0.7229133248329163, "logps/chosen": -2.344169855117798, "logps/rejected": -3.317776679992676, "loss": 0.6884, "nll_loss": 0.6488949060440063, "rewards/accuracies": 0.875, "rewards/chosen": -0.23441699147224426, "rewards/margins": 0.09736065566539764, "rewards/rejected": -0.3317776620388031, "step": 6866 }, { "epoch": 18.80082135523614, "grad_norm": 5.209933757781982, "learning_rate": 5.972602739726027e-08, "log_odds_chosen": 2.5283572673797607, "log_odds_ratio": -0.1665586531162262, "logits/chosen": 0.7862107157707214, "logits/rejected": 0.770767867565155, "logps/chosen": -2.057515859603882, "logps/rejected": -4.4257283210754395, "loss": 0.6735, "nll_loss": 0.65688157081604, "rewards/accuracies": 1.0, "rewards/chosen": -0.20575156807899475, "rewards/margins": 0.23682129383087158, "rewards/rejected": -0.44257286190986633, "step": 6867 }, { "epoch": 18.803559206023273, "grad_norm": 4.544558525085449, "learning_rate": 5.9589041095890405e-08, "log_odds_chosen": 2.9042277336120605, "log_odds_ratio": -0.2673528492450714, "logits/chosen": 0.9522314071655273, "logits/rejected": 0.9674729108810425, "logps/chosen": -2.0186126232147217, "logps/rejected": -4.592881202697754, "loss": 0.6309, "nll_loss": 0.6042069792747498, "rewards/accuracies": 0.75, "rewards/chosen": -0.20186129212379456, "rewards/margins": 0.25742682814598083, "rewards/rejected": -0.4592881202697754, "step": 6868 }, { "epoch": 18.806297056810404, "grad_norm": 5.017947673797607, "learning_rate": 5.945205479452055e-08, "log_odds_chosen": 3.697854995727539, "log_odds_ratio": -0.1971731185913086, "logits/chosen": 0.9350740909576416, "logits/rejected": 0.9052848815917969, "logps/chosen": -2.3789496421813965, "logps/rejected": -5.9827117919921875, "loss": 0.6667, "nll_loss": 0.6469692587852478, "rewards/accuracies": 1.0, "rewards/chosen": -0.2378949522972107, "rewards/margins": 0.3603762090206146, "rewards/rejected": -0.5982711911201477, "step": 6869 }, { "epoch": 18.809034907597535, "grad_norm": 8.603202819824219, "learning_rate": 5.931506849315068e-08, "log_odds_chosen": 0.5640988349914551, "log_odds_ratio": -0.8266271352767944, "logits/chosen": 0.7405775189399719, "logits/rejected": 0.737997829914093, "logps/chosen": -2.524718999862671, "logps/rejected": -2.958280086517334, "loss": 0.6466, "nll_loss": 0.563955545425415, "rewards/accuracies": 0.75, "rewards/chosen": -0.252471923828125, "rewards/margins": 0.04335608705878258, "rewards/rejected": -0.2958279848098755, "step": 6870 }, { "epoch": 18.811772758384667, "grad_norm": 5.198703289031982, "learning_rate": 5.9178082191780814e-08, "log_odds_chosen": 1.700270414352417, "log_odds_ratio": -0.24961793422698975, "logits/chosen": 0.6679136753082275, "logits/rejected": 0.763259768486023, "logps/chosen": -2.325793743133545, "logps/rejected": -3.922832489013672, "loss": 0.5437, "nll_loss": 0.5187608599662781, "rewards/accuracies": 1.0, "rewards/chosen": -0.23257938027381897, "rewards/margins": 0.15970386564731598, "rewards/rejected": -0.39228323101997375, "step": 6871 }, { "epoch": 18.8145106091718, "grad_norm": 7.646890640258789, "learning_rate": 5.904109589041096e-08, "log_odds_chosen": 2.569943904876709, "log_odds_ratio": -0.1152036190032959, "logits/chosen": 0.9826420545578003, "logits/rejected": 1.052140474319458, "logps/chosen": -2.281385898590088, "logps/rejected": -4.751650810241699, "loss": 0.5815, "nll_loss": 0.5699543952941895, "rewards/accuracies": 1.0, "rewards/chosen": -0.22813859581947327, "rewards/margins": 0.2470264583826065, "rewards/rejected": -0.47516506910324097, "step": 6872 }, { "epoch": 18.817248459958932, "grad_norm": 5.285590648651123, "learning_rate": 5.890410958904109e-08, "log_odds_chosen": 3.09039568901062, "log_odds_ratio": -0.17819064855575562, "logits/chosen": 0.7575647234916687, "logits/rejected": 0.7646560668945312, "logps/chosen": -2.0231335163116455, "logps/rejected": -4.876589775085449, "loss": 0.577, "nll_loss": 0.5592023730278015, "rewards/accuracies": 0.875, "rewards/chosen": -0.2023133635520935, "rewards/margins": 0.2853456139564514, "rewards/rejected": -0.48765891790390015, "step": 6873 }, { "epoch": 18.819986310746064, "grad_norm": 6.0561113357543945, "learning_rate": 5.876712328767123e-08, "log_odds_chosen": 1.5256175994873047, "log_odds_ratio": -0.2489364743232727, "logits/chosen": 0.9088840484619141, "logits/rejected": 0.8882836103439331, "logps/chosen": -1.8871886730194092, "logps/rejected": -3.25935697555542, "loss": 0.5067, "nll_loss": 0.4817627966403961, "rewards/accuracies": 1.0, "rewards/chosen": -0.18871885538101196, "rewards/margins": 0.13721683621406555, "rewards/rejected": -0.3259356915950775, "step": 6874 }, { "epoch": 18.822724161533195, "grad_norm": 6.180656433105469, "learning_rate": 5.863013698630137e-08, "log_odds_chosen": 3.0552539825439453, "log_odds_ratio": -0.3990040719509125, "logits/chosen": 0.8434901237487793, "logits/rejected": 0.9094300270080566, "logps/chosen": -2.0955772399902344, "logps/rejected": -4.952896595001221, "loss": 0.7783, "nll_loss": 0.738426685333252, "rewards/accuracies": 0.875, "rewards/chosen": -0.20955771207809448, "rewards/margins": 0.2857319116592407, "rewards/rejected": -0.4952896237373352, "step": 6875 }, { "epoch": 18.82546201232033, "grad_norm": 5.016424655914307, "learning_rate": 5.849315068493151e-08, "log_odds_chosen": 1.7938841581344604, "log_odds_ratio": -0.3416438102722168, "logits/chosen": 0.8878183364868164, "logits/rejected": 0.9302608966827393, "logps/chosen": -2.119244337081909, "logps/rejected": -3.6804287433624268, "loss": 0.6339, "nll_loss": 0.5997428894042969, "rewards/accuracies": 0.875, "rewards/chosen": -0.2119244635105133, "rewards/margins": 0.15611843764781952, "rewards/rejected": -0.368042916059494, "step": 6876 }, { "epoch": 18.82819986310746, "grad_norm": 7.358127593994141, "learning_rate": 5.835616438356164e-08, "log_odds_chosen": 2.306389331817627, "log_odds_ratio": -0.23321861028671265, "logits/chosen": 0.8483994007110596, "logits/rejected": 0.8995029330253601, "logps/chosen": -2.1077053546905518, "logps/rejected": -4.2662882804870605, "loss": 0.5945, "nll_loss": 0.5711830854415894, "rewards/accuracies": 0.75, "rewards/chosen": -0.21077054738998413, "rewards/margins": 0.2158583104610443, "rewards/rejected": -0.42662885785102844, "step": 6877 }, { "epoch": 18.830937713894592, "grad_norm": 9.881024360656738, "learning_rate": 5.821917808219177e-08, "log_odds_chosen": 2.8385303020477295, "log_odds_ratio": -0.22679458558559418, "logits/chosen": 0.6990038156509399, "logits/rejected": 0.7873729467391968, "logps/chosen": -2.2393102645874023, "logps/rejected": -5.003706455230713, "loss": 0.6616, "nll_loss": 0.6388864517211914, "rewards/accuracies": 0.875, "rewards/chosen": -0.22393101453781128, "rewards/margins": 0.2764396667480469, "rewards/rejected": -0.5003706812858582, "step": 6878 }, { "epoch": 18.833675564681727, "grad_norm": 8.849723815917969, "learning_rate": 5.808219178082192e-08, "log_odds_chosen": 1.9112639427185059, "log_odds_ratio": -0.29092809557914734, "logits/chosen": 1.0492379665374756, "logits/rejected": 1.1253186464309692, "logps/chosen": -2.5547587871551514, "logps/rejected": -4.261983394622803, "loss": 0.6315, "nll_loss": 0.602445125579834, "rewards/accuracies": 0.875, "rewards/chosen": -0.25547587871551514, "rewards/margins": 0.17072241008281708, "rewards/rejected": -0.426198273897171, "step": 6879 }, { "epoch": 18.836413415468858, "grad_norm": 5.615962505340576, "learning_rate": 5.794520547945205e-08, "log_odds_chosen": 3.2710800170898438, "log_odds_ratio": -0.3769022226333618, "logits/chosen": 0.7265424728393555, "logits/rejected": 0.6936769485473633, "logps/chosen": -2.2206406593322754, "logps/rejected": -5.436485290527344, "loss": 0.7528, "nll_loss": 0.7150732278823853, "rewards/accuracies": 0.75, "rewards/chosen": -0.2220640778541565, "rewards/margins": 0.32158443331718445, "rewards/rejected": -0.5436485409736633, "step": 6880 }, { "epoch": 18.83915126625599, "grad_norm": 6.53967809677124, "learning_rate": 5.780821917808218e-08, "log_odds_chosen": 2.1480655670166016, "log_odds_ratio": -0.1978936493396759, "logits/chosen": 0.7375568151473999, "logits/rejected": 0.6275889277458191, "logps/chosen": -1.4109560251235962, "logps/rejected": -3.3162457942962646, "loss": 0.712, "nll_loss": 0.6921659111976624, "rewards/accuracies": 1.0, "rewards/chosen": -0.1410956084728241, "rewards/margins": 0.1905289739370346, "rewards/rejected": -0.3316245675086975, "step": 6881 }, { "epoch": 18.84188911704312, "grad_norm": 5.088184833526611, "learning_rate": 5.767123287671233e-08, "log_odds_chosen": 4.1579179763793945, "log_odds_ratio": -0.04339353367686272, "logits/chosen": 1.0249311923980713, "logits/rejected": 1.0859453678131104, "logps/chosen": -2.113774061203003, "logps/rejected": -6.088761329650879, "loss": 0.5335, "nll_loss": 0.529154360294342, "rewards/accuracies": 1.0, "rewards/chosen": -0.21137742698192596, "rewards/margins": 0.3974987864494324, "rewards/rejected": -0.6088762283325195, "step": 6882 }, { "epoch": 18.844626967830255, "grad_norm": 7.670748710632324, "learning_rate": 5.753424657534246e-08, "log_odds_chosen": 0.9353013634681702, "log_odds_ratio": -0.42238643765449524, "logits/chosen": 0.8525329232215881, "logits/rejected": 0.919105052947998, "logps/chosen": -4.394961833953857, "logps/rejected": -5.255614280700684, "loss": 0.904, "nll_loss": 0.8617146611213684, "rewards/accuracies": 0.75, "rewards/chosen": -0.4394961893558502, "rewards/margins": 0.08606527000665665, "rewards/rejected": -0.5255614519119263, "step": 6883 }, { "epoch": 18.847364818617386, "grad_norm": 5.516499042510986, "learning_rate": 5.73972602739726e-08, "log_odds_chosen": 2.2356786727905273, "log_odds_ratio": -0.18015329539775848, "logits/chosen": 0.8159183859825134, "logits/rejected": 0.8833856582641602, "logps/chosen": -2.0803442001342773, "logps/rejected": -4.202391147613525, "loss": 0.5992, "nll_loss": 0.5811709761619568, "rewards/accuracies": 1.0, "rewards/chosen": -0.20803441107273102, "rewards/margins": 0.2122047394514084, "rewards/rejected": -0.4202391505241394, "step": 6884 }, { "epoch": 18.850102669404517, "grad_norm": 5.502604007720947, "learning_rate": 5.726027397260274e-08, "log_odds_chosen": 2.7193095684051514, "log_odds_ratio": -0.13753092288970947, "logits/chosen": 0.9548522233963013, "logits/rejected": 0.9274879097938538, "logps/chosen": -1.7226214408874512, "logps/rejected": -4.217957019805908, "loss": 0.5894, "nll_loss": 0.5756832361221313, "rewards/accuracies": 1.0, "rewards/chosen": -0.17226214706897736, "rewards/margins": 0.24953356385231018, "rewards/rejected": -0.42179572582244873, "step": 6885 }, { "epoch": 18.85284052019165, "grad_norm": 7.153244972229004, "learning_rate": 5.712328767123288e-08, "log_odds_chosen": 1.6780284643173218, "log_odds_ratio": -0.28886860609054565, "logits/chosen": 0.9876236319541931, "logits/rejected": 1.018599271774292, "logps/chosen": -2.629868268966675, "logps/rejected": -4.245611190795898, "loss": 0.689, "nll_loss": 0.6600679159164429, "rewards/accuracies": 1.0, "rewards/chosen": -0.26298680901527405, "rewards/margins": 0.16157427430152893, "rewards/rejected": -0.42456111311912537, "step": 6886 }, { "epoch": 18.855578370978783, "grad_norm": 5.732990264892578, "learning_rate": 5.698630136986301e-08, "log_odds_chosen": 2.0852255821228027, "log_odds_ratio": -0.20431222021579742, "logits/chosen": 0.7956732511520386, "logits/rejected": 0.8517128229141235, "logps/chosen": -2.3478848934173584, "logps/rejected": -4.286433696746826, "loss": 0.6736, "nll_loss": 0.6531679630279541, "rewards/accuracies": 1.0, "rewards/chosen": -0.23478850722312927, "rewards/margins": 0.19385486841201782, "rewards/rejected": -0.4286433458328247, "step": 6887 }, { "epoch": 18.858316221765914, "grad_norm": 5.58608865737915, "learning_rate": 5.684931506849315e-08, "log_odds_chosen": 2.32033634185791, "log_odds_ratio": -0.18781913816928864, "logits/chosen": 1.0334455966949463, "logits/rejected": 0.9943580031394958, "logps/chosen": -1.9623494148254395, "logps/rejected": -4.0976104736328125, "loss": 0.5472, "nll_loss": 0.5284454822540283, "rewards/accuracies": 1.0, "rewards/chosen": -0.19623494148254395, "rewards/margins": 0.21352604031562805, "rewards/rejected": -0.4097610116004944, "step": 6888 }, { "epoch": 18.861054072553046, "grad_norm": 5.471319675445557, "learning_rate": 5.671232876712329e-08, "log_odds_chosen": 2.269291400909424, "log_odds_ratio": -0.23806743323802948, "logits/chosen": 0.8906526565551758, "logits/rejected": 0.9062260389328003, "logps/chosen": -2.5930941104888916, "logps/rejected": -4.746079444885254, "loss": 0.7838, "nll_loss": 0.759980320930481, "rewards/accuracies": 0.875, "rewards/chosen": -0.25930941104888916, "rewards/margins": 0.21529856324195862, "rewards/rejected": -0.4746079742908478, "step": 6889 }, { "epoch": 18.863791923340177, "grad_norm": 5.022931098937988, "learning_rate": 5.657534246575342e-08, "log_odds_chosen": 2.682138442993164, "log_odds_ratio": -0.22715416550636292, "logits/chosen": 0.6460101008415222, "logits/rejected": 0.706977367401123, "logps/chosen": -1.9031529426574707, "logps/rejected": -4.402241230010986, "loss": 0.6448, "nll_loss": 0.6220364570617676, "rewards/accuracies": 0.875, "rewards/chosen": -0.19031530618667603, "rewards/margins": 0.24990878999233246, "rewards/rejected": -0.4402240812778473, "step": 6890 }, { "epoch": 18.86652977412731, "grad_norm": 6.038357257843018, "learning_rate": 5.6438356164383565e-08, "log_odds_chosen": 2.180819034576416, "log_odds_ratio": -0.3188713788986206, "logits/chosen": 0.8366938233375549, "logits/rejected": 0.7807372212409973, "logps/chosen": -2.4466521739959717, "logps/rejected": -4.5255537033081055, "loss": 0.7707, "nll_loss": 0.7388195991516113, "rewards/accuracies": 0.75, "rewards/chosen": -0.2446652352809906, "rewards/margins": 0.20789018273353577, "rewards/rejected": -0.45255541801452637, "step": 6891 }, { "epoch": 18.869267624914443, "grad_norm": 6.352615833282471, "learning_rate": 5.63013698630137e-08, "log_odds_chosen": 2.362415075302124, "log_odds_ratio": -0.2771230638027191, "logits/chosen": 0.8629900217056274, "logits/rejected": 0.8312175273895264, "logps/chosen": -2.0062203407287598, "logps/rejected": -4.243801593780518, "loss": 0.6169, "nll_loss": 0.5892003178596497, "rewards/accuracies": 0.875, "rewards/chosen": -0.20062202215194702, "rewards/margins": 0.22375816106796265, "rewards/rejected": -0.42438018321990967, "step": 6892 }, { "epoch": 18.872005475701574, "grad_norm": 6.271656036376953, "learning_rate": 5.616438356164383e-08, "log_odds_chosen": 2.4113428592681885, "log_odds_ratio": -0.17131438851356506, "logits/chosen": 0.8175173401832581, "logits/rejected": 0.9187425374984741, "logps/chosen": -3.149186134338379, "logps/rejected": -5.4823527336120605, "loss": 0.6932, "nll_loss": 0.6760641932487488, "rewards/accuracies": 1.0, "rewards/chosen": -0.3149186372756958, "rewards/margins": 0.23331665992736816, "rewards/rejected": -0.548235297203064, "step": 6893 }, { "epoch": 18.874743326488705, "grad_norm": 4.77911901473999, "learning_rate": 5.6027397260273975e-08, "log_odds_chosen": 1.7236557006835938, "log_odds_ratio": -0.2670763432979584, "logits/chosen": 0.7427082061767578, "logits/rejected": 0.7156059145927429, "logps/chosen": -2.5233185291290283, "logps/rejected": -4.1579484939575195, "loss": 0.6548, "nll_loss": 0.6280823945999146, "rewards/accuracies": 1.0, "rewards/chosen": -0.25233185291290283, "rewards/margins": 0.16346296668052673, "rewards/rejected": -0.41579484939575195, "step": 6894 }, { "epoch": 18.87748117727584, "grad_norm": 5.651379585266113, "learning_rate": 5.589041095890411e-08, "log_odds_chosen": 3.240907669067383, "log_odds_ratio": -0.2198019027709961, "logits/chosen": 0.8018872737884521, "logits/rejected": 0.8469170331954956, "logps/chosen": -2.33286714553833, "logps/rejected": -5.4789299964904785, "loss": 0.7265, "nll_loss": 0.7045641541481018, "rewards/accuracies": 0.875, "rewards/chosen": -0.23328670859336853, "rewards/margins": 0.31460627913475037, "rewards/rejected": -0.5478929877281189, "step": 6895 }, { "epoch": 18.88021902806297, "grad_norm": 5.865577697753906, "learning_rate": 5.5753424657534246e-08, "log_odds_chosen": 2.8389391899108887, "log_odds_ratio": -0.10373139381408691, "logits/chosen": 0.8727014660835266, "logits/rejected": 0.9654969573020935, "logps/chosen": -3.0868313312530518, "logps/rejected": -5.8322553634643555, "loss": 0.8605, "nll_loss": 0.8501613736152649, "rewards/accuracies": 1.0, "rewards/chosen": -0.3086831569671631, "rewards/margins": 0.2745424509048462, "rewards/rejected": -0.5832256078720093, "step": 6896 }, { "epoch": 18.882956878850102, "grad_norm": 6.076688766479492, "learning_rate": 5.5616438356164385e-08, "log_odds_chosen": 2.453740119934082, "log_odds_ratio": -0.158135324716568, "logits/chosen": 0.9153465628623962, "logits/rejected": 0.92225581407547, "logps/chosen": -1.92313551902771, "logps/rejected": -4.235368251800537, "loss": 0.5572, "nll_loss": 0.5413753986358643, "rewards/accuracies": 1.0, "rewards/chosen": -0.192313551902771, "rewards/margins": 0.23122328519821167, "rewards/rejected": -0.42353683710098267, "step": 6897 }, { "epoch": 18.885694729637233, "grad_norm": 7.498085975646973, "learning_rate": 5.547945205479452e-08, "log_odds_chosen": 1.197017788887024, "log_odds_ratio": -0.4947563409805298, "logits/chosen": 0.771712601184845, "logits/rejected": 0.7330340147018433, "logps/chosen": -2.1432571411132812, "logps/rejected": -3.141888380050659, "loss": 0.6008, "nll_loss": 0.5513056516647339, "rewards/accuracies": 0.875, "rewards/chosen": -0.21432572603225708, "rewards/margins": 0.09986314177513123, "rewards/rejected": -0.3141888678073883, "step": 6898 }, { "epoch": 18.888432580424368, "grad_norm": 6.707089900970459, "learning_rate": 5.5342465753424656e-08, "log_odds_chosen": 1.213805079460144, "log_odds_ratio": -0.41079938411712646, "logits/chosen": 0.8860071897506714, "logits/rejected": 0.8227683305740356, "logps/chosen": -2.270477533340454, "logps/rejected": -3.3914794921875, "loss": 0.7018, "nll_loss": 0.6607009172439575, "rewards/accuracies": 0.875, "rewards/chosen": -0.22704775631427765, "rewards/margins": 0.11210017651319504, "rewards/rejected": -0.3391479253768921, "step": 6899 }, { "epoch": 18.8911704312115, "grad_norm": 5.10225248336792, "learning_rate": 5.520547945205479e-08, "log_odds_chosen": 1.9248546361923218, "log_odds_ratio": -0.24307484924793243, "logits/chosen": 0.7859569787979126, "logits/rejected": 0.8121220469474792, "logps/chosen": -1.8959083557128906, "logps/rejected": -3.641289710998535, "loss": 0.6003, "nll_loss": 0.5760379433631897, "rewards/accuracies": 1.0, "rewards/chosen": -0.18959084153175354, "rewards/margins": 0.17453815042972565, "rewards/rejected": -0.3641290068626404, "step": 6900 }, { "epoch": 18.89390828199863, "grad_norm": 5.381389141082764, "learning_rate": 5.5068493150684933e-08, "log_odds_chosen": 2.2924892902374268, "log_odds_ratio": -0.31859415769577026, "logits/chosen": 1.0165302753448486, "logits/rejected": 1.028557300567627, "logps/chosen": -2.014378070831299, "logps/rejected": -4.149642467498779, "loss": 0.5594, "nll_loss": 0.527525782585144, "rewards/accuracies": 0.875, "rewards/chosen": -0.2014378160238266, "rewards/margins": 0.21352644264698029, "rewards/rejected": -0.4149642586708069, "step": 6901 }, { "epoch": 18.89664613278576, "grad_norm": 5.868229389190674, "learning_rate": 5.4931506849315066e-08, "log_odds_chosen": 1.3744196891784668, "log_odds_ratio": -0.4335393011569977, "logits/chosen": 0.9770140051841736, "logits/rejected": 0.9465606808662415, "logps/chosen": -2.3451361656188965, "logps/rejected": -3.6269826889038086, "loss": 0.6098, "nll_loss": 0.5664472579956055, "rewards/accuracies": 0.875, "rewards/chosen": -0.23451361060142517, "rewards/margins": 0.12818464636802673, "rewards/rejected": -0.3626982569694519, "step": 6902 }, { "epoch": 18.899383983572896, "grad_norm": 5.837346076965332, "learning_rate": 5.47945205479452e-08, "log_odds_chosen": 1.3495492935180664, "log_odds_ratio": -0.2798137366771698, "logits/chosen": 0.8084733486175537, "logits/rejected": 0.7979558110237122, "logps/chosen": -1.4538720846176147, "logps/rejected": -2.619208335876465, "loss": 0.5077, "nll_loss": 0.47974491119384766, "rewards/accuracies": 0.875, "rewards/chosen": -0.145387202501297, "rewards/margins": 0.11653363704681396, "rewards/rejected": -0.26192086935043335, "step": 6903 }, { "epoch": 18.902121834360027, "grad_norm": 5.18686580657959, "learning_rate": 5.465753424657534e-08, "log_odds_chosen": 3.9680802822113037, "log_odds_ratio": -0.1262809783220291, "logits/chosen": 0.9476062655448914, "logits/rejected": 0.8975479602813721, "logps/chosen": -1.8871580362319946, "logps/rejected": -5.665026664733887, "loss": 0.6309, "nll_loss": 0.6183062791824341, "rewards/accuracies": 1.0, "rewards/chosen": -0.18871581554412842, "rewards/margins": 0.3777868449687958, "rewards/rejected": -0.5665026903152466, "step": 6904 }, { "epoch": 18.90485968514716, "grad_norm": 5.484930038452148, "learning_rate": 5.4520547945205476e-08, "log_odds_chosen": 2.0673649311065674, "log_odds_ratio": -0.20294857025146484, "logits/chosen": 0.8326682448387146, "logits/rejected": 0.7773179411888123, "logps/chosen": -2.0800282955169678, "logps/rejected": -4.001127243041992, "loss": 0.6629, "nll_loss": 0.6426491737365723, "rewards/accuracies": 0.875, "rewards/chosen": -0.20800283551216125, "rewards/margins": 0.1921098679304123, "rewards/rejected": -0.40011268854141235, "step": 6905 }, { "epoch": 18.907597535934293, "grad_norm": 6.1881232261657715, "learning_rate": 5.4383561643835614e-08, "log_odds_chosen": 2.8860251903533936, "log_odds_ratio": -0.19526557624340057, "logits/chosen": 0.8580312728881836, "logits/rejected": 0.9386339783668518, "logps/chosen": -2.1240625381469727, "logps/rejected": -4.889483451843262, "loss": 0.5953, "nll_loss": 0.5757966637611389, "rewards/accuracies": 1.0, "rewards/chosen": -0.21240626275539398, "rewards/margins": 0.27654212713241577, "rewards/rejected": -0.48894837498664856, "step": 6906 }, { "epoch": 18.910335386721425, "grad_norm": 5.287695407867432, "learning_rate": 5.424657534246575e-08, "log_odds_chosen": 2.5383803844451904, "log_odds_ratio": -0.4196893870830536, "logits/chosen": 0.8280404806137085, "logits/rejected": 0.8577708005905151, "logps/chosen": -2.387218952178955, "logps/rejected": -4.744701385498047, "loss": 0.7245, "nll_loss": 0.6825293898582458, "rewards/accuracies": 0.75, "rewards/chosen": -0.23872189223766327, "rewards/margins": 0.2357482612133026, "rewards/rejected": -0.4744701385498047, "step": 6907 }, { "epoch": 18.913073237508556, "grad_norm": 7.088992118835449, "learning_rate": 5.4109589041095885e-08, "log_odds_chosen": 1.124301552772522, "log_odds_ratio": -0.4877261817455292, "logits/chosen": 0.9260615110397339, "logits/rejected": 0.9515544176101685, "logps/chosen": -2.3053486347198486, "logps/rejected": -3.316135883331299, "loss": 0.6176, "nll_loss": 0.5687870979309082, "rewards/accuracies": 0.875, "rewards/chosen": -0.2305348664522171, "rewards/margins": 0.10107874870300293, "rewards/rejected": -0.33161360025405884, "step": 6908 }, { "epoch": 18.915811088295687, "grad_norm": 5.609760761260986, "learning_rate": 5.3972602739726024e-08, "log_odds_chosen": 2.2300662994384766, "log_odds_ratio": -0.307717889547348, "logits/chosen": 0.6159690618515015, "logits/rejected": 0.7373377084732056, "logps/chosen": -2.0108225345611572, "logps/rejected": -4.055298805236816, "loss": 0.6469, "nll_loss": 0.6161633133888245, "rewards/accuracies": 0.875, "rewards/chosen": -0.2010822594165802, "rewards/margins": 0.20444762706756592, "rewards/rejected": -0.40552985668182373, "step": 6909 }, { "epoch": 18.91854893908282, "grad_norm": 4.992595672607422, "learning_rate": 5.383561643835616e-08, "log_odds_chosen": 2.5013275146484375, "log_odds_ratio": -0.1550264209508896, "logits/chosen": 0.6120076775550842, "logits/rejected": 0.611922562122345, "logps/chosen": -2.0240983963012695, "logps/rejected": -4.376640319824219, "loss": 0.6282, "nll_loss": 0.6126475930213928, "rewards/accuracies": 1.0, "rewards/chosen": -0.20240983366966248, "rewards/margins": 0.2352542132139206, "rewards/rejected": -0.4376640319824219, "step": 6910 }, { "epoch": 18.921286789869953, "grad_norm": 4.948408603668213, "learning_rate": 5.36986301369863e-08, "log_odds_chosen": 2.570676803588867, "log_odds_ratio": -0.14118222892284393, "logits/chosen": 0.642216682434082, "logits/rejected": 0.6683550477027893, "logps/chosen": -1.3562445640563965, "logps/rejected": -3.6523544788360596, "loss": 0.5422, "nll_loss": 0.5281103849411011, "rewards/accuracies": 1.0, "rewards/chosen": -0.1356244683265686, "rewards/margins": 0.22961097955703735, "rewards/rejected": -0.36523544788360596, "step": 6911 }, { "epoch": 18.924024640657084, "grad_norm": 8.055973052978516, "learning_rate": 5.3561643835616434e-08, "log_odds_chosen": 1.3228217363357544, "log_odds_ratio": -0.4781426191329956, "logits/chosen": 1.0575997829437256, "logits/rejected": 1.0667675733566284, "logps/chosen": -2.579962730407715, "logps/rejected": -3.803834915161133, "loss": 0.6916, "nll_loss": 0.6437451839447021, "rewards/accuracies": 0.75, "rewards/chosen": -0.25799626111984253, "rewards/margins": 0.12238721549510956, "rewards/rejected": -0.3803834915161133, "step": 6912 }, { "epoch": 18.926762491444215, "grad_norm": 5.937282085418701, "learning_rate": 5.342465753424657e-08, "log_odds_chosen": 2.258981466293335, "log_odds_ratio": -0.19215336441993713, "logits/chosen": 1.10883367061615, "logits/rejected": 1.174198031425476, "logps/chosen": -2.2836689949035645, "logps/rejected": -4.4264912605285645, "loss": 0.5704, "nll_loss": 0.5512070059776306, "rewards/accuracies": 1.0, "rewards/chosen": -0.2283669114112854, "rewards/margins": 0.21428224444389343, "rewards/rejected": -0.44264912605285645, "step": 6913 }, { "epoch": 18.92950034223135, "grad_norm": 7.709859848022461, "learning_rate": 5.328767123287671e-08, "log_odds_chosen": 1.745088815689087, "log_odds_ratio": -0.4907335937023163, "logits/chosen": 0.7870059013366699, "logits/rejected": 0.7874410152435303, "logps/chosen": -2.3062644004821777, "logps/rejected": -3.9544219970703125, "loss": 0.7959, "nll_loss": 0.746865451335907, "rewards/accuracies": 0.75, "rewards/chosen": -0.2306264489889145, "rewards/margins": 0.164815753698349, "rewards/rejected": -0.3954421877861023, "step": 6914 }, { "epoch": 18.93223819301848, "grad_norm": 4.903132915496826, "learning_rate": 5.3150684931506844e-08, "log_odds_chosen": 2.4313647747039795, "log_odds_ratio": -0.15549302101135254, "logits/chosen": 0.8467850089073181, "logits/rejected": 0.781133770942688, "logps/chosen": -1.9811702966690063, "logps/rejected": -4.224400520324707, "loss": 0.6387, "nll_loss": 0.6231750845909119, "rewards/accuracies": 1.0, "rewards/chosen": -0.19811704754829407, "rewards/margins": 0.22432301938533783, "rewards/rejected": -0.4224400520324707, "step": 6915 }, { "epoch": 18.934976043805612, "grad_norm": 6.228487014770508, "learning_rate": 5.301369863013699e-08, "log_odds_chosen": 1.1652376651763916, "log_odds_ratio": -0.3592683970928192, "logits/chosen": 0.7777478694915771, "logits/rejected": 0.8079230785369873, "logps/chosen": -2.2631866931915283, "logps/rejected": -3.2917633056640625, "loss": 0.6027, "nll_loss": 0.5668156147003174, "rewards/accuracies": 0.875, "rewards/chosen": -0.22631867229938507, "rewards/margins": 0.10285766422748566, "rewards/rejected": -0.32917630672454834, "step": 6916 }, { "epoch": 18.937713894592743, "grad_norm": 5.9733452796936035, "learning_rate": 5.287671232876712e-08, "log_odds_chosen": 3.3020718097686768, "log_odds_ratio": -0.14407889544963837, "logits/chosen": 0.8720763921737671, "logits/rejected": 0.9630382061004639, "logps/chosen": -2.4594600200653076, "logps/rejected": -5.62617301940918, "loss": 0.8701, "nll_loss": 0.8556429147720337, "rewards/accuracies": 0.875, "rewards/chosen": -0.245946004986763, "rewards/margins": 0.31667131185531616, "rewards/rejected": -0.562617301940918, "step": 6917 }, { "epoch": 18.940451745379878, "grad_norm": 5.0223798751831055, "learning_rate": 5.2739726027397254e-08, "log_odds_chosen": 1.674742579460144, "log_odds_ratio": -0.28187814354896545, "logits/chosen": 1.025010585784912, "logits/rejected": 1.1058270931243896, "logps/chosen": -2.683443546295166, "logps/rejected": -4.3030476570129395, "loss": 0.7316, "nll_loss": 0.7033981084823608, "rewards/accuracies": 1.0, "rewards/chosen": -0.26834437251091003, "rewards/margins": 0.1619604229927063, "rewards/rejected": -0.43030479550361633, "step": 6918 }, { "epoch": 18.94318959616701, "grad_norm": 5.497658729553223, "learning_rate": 5.26027397260274e-08, "log_odds_chosen": 2.458487033843994, "log_odds_ratio": -0.27747976779937744, "logits/chosen": 1.0224785804748535, "logits/rejected": 0.9940013885498047, "logps/chosen": -1.9213310480117798, "logps/rejected": -4.278809070587158, "loss": 0.6043, "nll_loss": 0.5765283107757568, "rewards/accuracies": 1.0, "rewards/chosen": -0.1921330988407135, "rewards/margins": 0.2357478141784668, "rewards/rejected": -0.4278809428215027, "step": 6919 }, { "epoch": 18.94592744695414, "grad_norm": 5.046404838562012, "learning_rate": 5.246575342465753e-08, "log_odds_chosen": 2.3044183254241943, "log_odds_ratio": -0.17486271262168884, "logits/chosen": 0.7703589200973511, "logits/rejected": 0.7874240279197693, "logps/chosen": -1.6667776107788086, "logps/rejected": -3.7831003665924072, "loss": 0.5369, "nll_loss": 0.5194286704063416, "rewards/accuracies": 1.0, "rewards/chosen": -0.16667775809764862, "rewards/margins": 0.21163226664066315, "rewards/rejected": -0.37831002473831177, "step": 6920 }, { "epoch": 18.94866529774127, "grad_norm": 7.062991142272949, "learning_rate": 5.232876712328767e-08, "log_odds_chosen": 2.3784897327423096, "log_odds_ratio": -0.44208255410194397, "logits/chosen": 1.0163620710372925, "logits/rejected": 1.0026566982269287, "logps/chosen": -2.4295895099639893, "logps/rejected": -4.6892266273498535, "loss": 0.7012, "nll_loss": 0.6570003628730774, "rewards/accuracies": 0.875, "rewards/chosen": -0.24295896291732788, "rewards/margins": 0.22596372663974762, "rewards/rejected": -0.4689226746559143, "step": 6921 }, { "epoch": 18.951403148528406, "grad_norm": 8.337421417236328, "learning_rate": 5.21917808219178e-08, "log_odds_chosen": 1.9782705307006836, "log_odds_ratio": -0.33986398577690125, "logits/chosen": 0.9696090221405029, "logits/rejected": 1.029271125793457, "logps/chosen": -2.6252903938293457, "logps/rejected": -4.483767986297607, "loss": 0.6349, "nll_loss": 0.6009167432785034, "rewards/accuracies": 0.875, "rewards/chosen": -0.26252907514572144, "rewards/margins": 0.18584774434566498, "rewards/rejected": -0.4483768045902252, "step": 6922 }, { "epoch": 18.954140999315538, "grad_norm": 8.346556663513184, "learning_rate": 5.205479452054794e-08, "log_odds_chosen": 1.8371564149856567, "log_odds_ratio": -0.32770678400993347, "logits/chosen": 0.9684436321258545, "logits/rejected": 0.9654648303985596, "logps/chosen": -2.758925437927246, "logps/rejected": -4.453226566314697, "loss": 0.6703, "nll_loss": 0.6375570297241211, "rewards/accuracies": 0.875, "rewards/chosen": -0.27589255571365356, "rewards/margins": 0.16943010687828064, "rewards/rejected": -0.4453226625919342, "step": 6923 }, { "epoch": 18.95687885010267, "grad_norm": 8.40075397491455, "learning_rate": 5.191780821917808e-08, "log_odds_chosen": 0.49200570583343506, "log_odds_ratio": -0.6166993379592896, "logits/chosen": 0.7645917534828186, "logits/rejected": 0.7499695420265198, "logps/chosen": -2.5339529514312744, "logps/rejected": -2.955587387084961, "loss": 0.8164, "nll_loss": 0.7547780871391296, "rewards/accuracies": 0.75, "rewards/chosen": -0.25339531898498535, "rewards/margins": 0.04216345399618149, "rewards/rejected": -0.29555875062942505, "step": 6924 }, { "epoch": 18.9596167008898, "grad_norm": 5.925360202789307, "learning_rate": 5.178082191780821e-08, "log_odds_chosen": 1.5932817459106445, "log_odds_ratio": -0.3031447231769562, "logits/chosen": 0.7360315918922424, "logits/rejected": 0.7680450677871704, "logps/chosen": -2.5875542163848877, "logps/rejected": -4.0920586585998535, "loss": 0.7424, "nll_loss": 0.7120895385742188, "rewards/accuracies": 0.875, "rewards/chosen": -0.2587554156780243, "rewards/margins": 0.1504504680633545, "rewards/rejected": -0.40920591354370117, "step": 6925 }, { "epoch": 18.962354551676935, "grad_norm": 5.197385311126709, "learning_rate": 5.164383561643836e-08, "log_odds_chosen": 2.7478504180908203, "log_odds_ratio": -0.19309231638908386, "logits/chosen": 0.7979527711868286, "logits/rejected": 0.7603415250778198, "logps/chosen": -1.7193702459335327, "logps/rejected": -4.294547080993652, "loss": 0.7508, "nll_loss": 0.7315029501914978, "rewards/accuracies": 1.0, "rewards/chosen": -0.17193703353405, "rewards/margins": 0.25751763582229614, "rewards/rejected": -0.4294546842575073, "step": 6926 }, { "epoch": 18.965092402464066, "grad_norm": 5.271354675292969, "learning_rate": 5.150684931506849e-08, "log_odds_chosen": 1.6754425764083862, "log_odds_ratio": -0.30871662497520447, "logits/chosen": 0.5430669188499451, "logits/rejected": 0.5403586030006409, "logps/chosen": -1.6387664079666138, "logps/rejected": -3.1526389122009277, "loss": 0.6401, "nll_loss": 0.6092122197151184, "rewards/accuracies": 0.875, "rewards/chosen": -0.16387665271759033, "rewards/margins": 0.15138722956180573, "rewards/rejected": -0.31526386737823486, "step": 6927 }, { "epoch": 18.967830253251197, "grad_norm": 4.874391555786133, "learning_rate": 5.136986301369862e-08, "log_odds_chosen": 2.243788003921509, "log_odds_ratio": -0.21603207290172577, "logits/chosen": 0.7399359941482544, "logits/rejected": 0.7241889238357544, "logps/chosen": -1.9876824617385864, "logps/rejected": -4.060055255889893, "loss": 0.6338, "nll_loss": 0.6121800541877747, "rewards/accuracies": 1.0, "rewards/chosen": -0.1987682431936264, "rewards/margins": 0.20723728835582733, "rewards/rejected": -0.4060055613517761, "step": 6928 }, { "epoch": 18.97056810403833, "grad_norm": 8.186129570007324, "learning_rate": 5.123287671232877e-08, "log_odds_chosen": 1.2038071155548096, "log_odds_ratio": -0.6127539873123169, "logits/chosen": 0.8924062252044678, "logits/rejected": 0.9030771255493164, "logps/chosen": -2.4977636337280273, "logps/rejected": -3.5996992588043213, "loss": 0.712, "nll_loss": 0.6507592797279358, "rewards/accuracies": 0.625, "rewards/chosen": -0.24977636337280273, "rewards/margins": 0.11019358038902283, "rewards/rejected": -0.35996994376182556, "step": 6929 }, { "epoch": 18.973305954825463, "grad_norm": 4.842983245849609, "learning_rate": 5.10958904109589e-08, "log_odds_chosen": 4.697388648986816, "log_odds_ratio": -0.0589073970913887, "logits/chosen": 1.0868504047393799, "logits/rejected": 1.162672758102417, "logps/chosen": -2.2279109954833984, "logps/rejected": -6.786736011505127, "loss": 0.5961, "nll_loss": 0.590189516544342, "rewards/accuracies": 1.0, "rewards/chosen": -0.22279107570648193, "rewards/margins": 0.4558824896812439, "rewards/rejected": -0.6786735653877258, "step": 6930 }, { "epoch": 18.976043805612594, "grad_norm": 5.198413372039795, "learning_rate": 5.095890410958904e-08, "log_odds_chosen": 3.0391385555267334, "log_odds_ratio": -0.18946480751037598, "logits/chosen": 0.801487922668457, "logits/rejected": 0.8326563835144043, "logps/chosen": -1.8856732845306396, "logps/rejected": -4.722117900848389, "loss": 0.6373, "nll_loss": 0.61834317445755, "rewards/accuracies": 1.0, "rewards/chosen": -0.18856734037399292, "rewards/margins": 0.2836444675922394, "rewards/rejected": -0.4722118079662323, "step": 6931 }, { "epoch": 18.978781656399725, "grad_norm": 4.5667595863342285, "learning_rate": 5.082191780821918e-08, "log_odds_chosen": 2.2292327880859375, "log_odds_ratio": -0.1955648958683014, "logits/chosen": 1.0355010032653809, "logits/rejected": 1.0127604007720947, "logps/chosen": -1.4033843278884888, "logps/rejected": -3.401993989944458, "loss": 0.6174, "nll_loss": 0.5978330969810486, "rewards/accuracies": 1.0, "rewards/chosen": -0.1403384506702423, "rewards/margins": 0.19986093044281006, "rewards/rejected": -0.34019935131073, "step": 6932 }, { "epoch": 18.98151950718686, "grad_norm": 5.790486812591553, "learning_rate": 5.068493150684931e-08, "log_odds_chosen": 1.942886233329773, "log_odds_ratio": -0.2074946165084839, "logits/chosen": 0.6843260526657104, "logits/rejected": 0.6113314032554626, "logps/chosen": -1.7615222930908203, "logps/rejected": -3.500805616378784, "loss": 0.6024, "nll_loss": 0.5816536545753479, "rewards/accuracies": 1.0, "rewards/chosen": -0.17615222930908203, "rewards/margins": 0.17392833530902863, "rewards/rejected": -0.35008054971694946, "step": 6933 }, { "epoch": 18.98425735797399, "grad_norm": 14.202045440673828, "learning_rate": 5.054794520547945e-08, "log_odds_chosen": 1.853897213935852, "log_odds_ratio": -0.435479074716568, "logits/chosen": 0.9423480033874512, "logits/rejected": 0.8727929592132568, "logps/chosen": -2.7094078063964844, "logps/rejected": -4.3780412673950195, "loss": 0.7599, "nll_loss": 0.7163357734680176, "rewards/accuracies": 0.75, "rewards/chosen": -0.2709408104419708, "rewards/margins": 0.1668633669614792, "rewards/rejected": -0.4378041625022888, "step": 6934 }, { "epoch": 18.986995208761122, "grad_norm": 5.513906955718994, "learning_rate": 5.041095890410959e-08, "log_odds_chosen": 2.2678005695343018, "log_odds_ratio": -0.21554823219776154, "logits/chosen": 0.7687432169914246, "logits/rejected": 0.7370595335960388, "logps/chosen": -2.204230308532715, "logps/rejected": -4.31460428237915, "loss": 0.6035, "nll_loss": 0.5819676518440247, "rewards/accuracies": 1.0, "rewards/chosen": -0.22042302787303925, "rewards/margins": 0.21103742718696594, "rewards/rejected": -0.431460440158844, "step": 6935 }, { "epoch": 18.989733059548254, "grad_norm": 6.37723445892334, "learning_rate": 5.0273972602739727e-08, "log_odds_chosen": 2.3108410835266113, "log_odds_ratio": -0.1981692761182785, "logits/chosen": 0.9543552994728088, "logits/rejected": 0.8525049686431885, "logps/chosen": -1.5727853775024414, "logps/rejected": -3.6718058586120605, "loss": 0.5275, "nll_loss": 0.5077163577079773, "rewards/accuracies": 1.0, "rewards/chosen": -0.15727853775024414, "rewards/margins": 0.20990203320980072, "rewards/rejected": -0.36718055605888367, "step": 6936 }, { "epoch": 18.99247091033539, "grad_norm": 5.489384651184082, "learning_rate": 5.013698630136986e-08, "log_odds_chosen": 3.7178499698638916, "log_odds_ratio": -0.16743972897529602, "logits/chosen": 0.6403109431266785, "logits/rejected": 0.6618613600730896, "logps/chosen": -2.126523494720459, "logps/rejected": -5.715288162231445, "loss": 0.5971, "nll_loss": 0.5803714990615845, "rewards/accuracies": 1.0, "rewards/chosen": -0.21265235543251038, "rewards/margins": 0.358876496553421, "rewards/rejected": -0.5715289115905762, "step": 6937 }, { "epoch": 18.99520876112252, "grad_norm": 6.617684841156006, "learning_rate": 5e-08, "log_odds_chosen": 1.254073977470398, "log_odds_ratio": -0.4276295304298401, "logits/chosen": 0.9486229419708252, "logits/rejected": 0.8858174085617065, "logps/chosen": -1.893763542175293, "logps/rejected": -3.0449423789978027, "loss": 0.5638, "nll_loss": 0.5210681557655334, "rewards/accuracies": 0.75, "rewards/chosen": -0.1893763542175293, "rewards/margins": 0.11511792987585068, "rewards/rejected": -0.3044942617416382, "step": 6938 }, { "epoch": 18.99794661190965, "grad_norm": 4.65286111831665, "learning_rate": 4.9863013698630137e-08, "log_odds_chosen": 5.089609146118164, "log_odds_ratio": -0.08414080739021301, "logits/chosen": 0.9154491424560547, "logits/rejected": 0.9590319395065308, "logps/chosen": -2.000247001647949, "logps/rejected": -6.945621967315674, "loss": 0.6081, "nll_loss": 0.599692702293396, "rewards/accuracies": 1.0, "rewards/chosen": -0.20002469420433044, "rewards/margins": 0.49453750252723694, "rewards/rejected": -0.6945621967315674, "step": 6939 }, { "epoch": 19.000684462696782, "grad_norm": 6.794179439544678, "learning_rate": 4.972602739726027e-08, "log_odds_chosen": 2.8098931312561035, "log_odds_ratio": -0.23667405545711517, "logits/chosen": 1.188525915145874, "logits/rejected": 1.2619813680648804, "logps/chosen": -2.5668931007385254, "logps/rejected": -5.246286869049072, "loss": 0.583, "nll_loss": 0.5592884421348572, "rewards/accuracies": 0.875, "rewards/chosen": -0.25668931007385254, "rewards/margins": 0.26793941855430603, "rewards/rejected": -0.524628758430481, "step": 6940 }, { "epoch": 19.003422313483917, "grad_norm": 5.5437541007995605, "learning_rate": 4.9589041095890414e-08, "log_odds_chosen": 2.0692193508148193, "log_odds_ratio": -0.19777286052703857, "logits/chosen": 0.7814936637878418, "logits/rejected": 0.7879511117935181, "logps/chosen": -2.0128989219665527, "logps/rejected": -3.8747966289520264, "loss": 0.5719, "nll_loss": 0.5520996451377869, "rewards/accuracies": 1.0, "rewards/chosen": -0.20128989219665527, "rewards/margins": 0.18618977069854736, "rewards/rejected": -0.38747966289520264, "step": 6941 }, { "epoch": 19.006160164271048, "grad_norm": 12.400460243225098, "learning_rate": 4.9452054794520546e-08, "log_odds_chosen": 2.206899881362915, "log_odds_ratio": -0.375490665435791, "logits/chosen": 1.0184954404830933, "logits/rejected": 1.0030832290649414, "logps/chosen": -2.608954668045044, "logps/rejected": -4.75515079498291, "loss": 0.6256, "nll_loss": 0.5880458354949951, "rewards/accuracies": 0.875, "rewards/chosen": -0.2608954906463623, "rewards/margins": 0.21461962163448334, "rewards/rejected": -0.47551506757736206, "step": 6942 }, { "epoch": 19.00889801505818, "grad_norm": 5.173172950744629, "learning_rate": 4.931506849315068e-08, "log_odds_chosen": 3.9331886768341064, "log_odds_ratio": -0.0754152238368988, "logits/chosen": 0.544536828994751, "logits/rejected": 0.5752761363983154, "logps/chosen": -2.3572633266448975, "logps/rejected": -6.102090835571289, "loss": 0.5877, "nll_loss": 0.5801583528518677, "rewards/accuracies": 1.0, "rewards/chosen": -0.23572635650634766, "rewards/margins": 0.37448278069496155, "rewards/rejected": -0.6102091073989868, "step": 6943 }, { "epoch": 19.01163586584531, "grad_norm": 7.23781156539917, "learning_rate": 4.9178082191780824e-08, "log_odds_chosen": 1.5565993785858154, "log_odds_ratio": -0.3877275288105011, "logits/chosen": 0.5062142610549927, "logits/rejected": 0.4800437390804291, "logps/chosen": -1.9867656230926514, "logps/rejected": -3.3939123153686523, "loss": 0.6711, "nll_loss": 0.6323033571243286, "rewards/accuracies": 0.875, "rewards/chosen": -0.19867657124996185, "rewards/margins": 0.14071467518806458, "rewards/rejected": -0.33939123153686523, "step": 6944 }, { "epoch": 19.014373716632445, "grad_norm": 5.108233451843262, "learning_rate": 4.9041095890410956e-08, "log_odds_chosen": 1.600553274154663, "log_odds_ratio": -0.22966696321964264, "logits/chosen": 0.77137291431427, "logits/rejected": 0.8483403921127319, "logps/chosen": -2.023404121398926, "logps/rejected": -3.501492500305176, "loss": 0.6602, "nll_loss": 0.6372287273406982, "rewards/accuracies": 1.0, "rewards/chosen": -0.20234040915966034, "rewards/margins": 0.14780887961387634, "rewards/rejected": -0.3501492738723755, "step": 6945 }, { "epoch": 19.017111567419576, "grad_norm": 8.64572811126709, "learning_rate": 4.8904109589041095e-08, "log_odds_chosen": 0.2684366703033447, "log_odds_ratio": -0.9855453968048096, "logits/chosen": 0.9229651689529419, "logits/rejected": 0.9354018568992615, "logps/chosen": -3.061366558074951, "logps/rejected": -3.2690045833587646, "loss": 0.7487, "nll_loss": 0.6501822471618652, "rewards/accuracies": 0.5, "rewards/chosen": -0.3061366677284241, "rewards/margins": 0.020763806998729706, "rewards/rejected": -0.3269004821777344, "step": 6946 }, { "epoch": 19.019849418206707, "grad_norm": 6.716402053833008, "learning_rate": 4.876712328767123e-08, "log_odds_chosen": 1.9312000274658203, "log_odds_ratio": -0.20075735449790955, "logits/chosen": 0.6066538095474243, "logits/rejected": 0.6087346076965332, "logps/chosen": -2.1622660160064697, "logps/rejected": -3.957970142364502, "loss": 0.6525, "nll_loss": 0.6324141621589661, "rewards/accuracies": 1.0, "rewards/chosen": -0.21622662246227264, "rewards/margins": 0.17957043647766113, "rewards/rejected": -0.3957970440387726, "step": 6947 }, { "epoch": 19.02258726899384, "grad_norm": 6.193398952484131, "learning_rate": 4.8630136986301366e-08, "log_odds_chosen": 3.0189175605773926, "log_odds_ratio": -0.09214967489242554, "logits/chosen": 0.8844454884529114, "logits/rejected": 0.9138240814208984, "logps/chosen": -2.1171305179595947, "logps/rejected": -4.961023807525635, "loss": 0.5555, "nll_loss": 0.5462629795074463, "rewards/accuracies": 1.0, "rewards/chosen": -0.211713045835495, "rewards/margins": 0.28438931703567505, "rewards/rejected": -0.49610236287117004, "step": 6948 }, { "epoch": 19.025325119780973, "grad_norm": 6.201473712921143, "learning_rate": 4.8493150684931505e-08, "log_odds_chosen": 1.8584179878234863, "log_odds_ratio": -0.3621038794517517, "logits/chosen": 0.8735858201980591, "logits/rejected": 0.8151630163192749, "logps/chosen": -1.9228503704071045, "logps/rejected": -3.627103805541992, "loss": 0.6407, "nll_loss": 0.6044853925704956, "rewards/accuracies": 0.875, "rewards/chosen": -0.19228504598140717, "rewards/margins": 0.17042534053325653, "rewards/rejected": -0.3627103865146637, "step": 6949 }, { "epoch": 19.028062970568104, "grad_norm": 6.771303176879883, "learning_rate": 4.835616438356164e-08, "log_odds_chosen": 0.9979456067085266, "log_odds_ratio": -0.45400309562683105, "logits/chosen": 0.9945371747016907, "logits/rejected": 0.9854021072387695, "logps/chosen": -2.2022342681884766, "logps/rejected": -3.1137325763702393, "loss": 0.6153, "nll_loss": 0.5699408650398254, "rewards/accuracies": 0.75, "rewards/chosen": -0.22022342681884766, "rewards/margins": 0.09114985167980194, "rewards/rejected": -0.3113732933998108, "step": 6950 }, { "epoch": 19.030800821355236, "grad_norm": 4.271973133087158, "learning_rate": 4.821917808219178e-08, "log_odds_chosen": 2.3505501747131348, "log_odds_ratio": -0.15682752430438995, "logits/chosen": 0.8333508968353271, "logits/rejected": 0.8880427479743958, "logps/chosen": -1.5595839023590088, "logps/rejected": -3.6942098140716553, "loss": 0.5256, "nll_loss": 0.5098869204521179, "rewards/accuracies": 1.0, "rewards/chosen": -0.1559583991765976, "rewards/margins": 0.21346259117126465, "rewards/rejected": -0.36942100524902344, "step": 6951 }, { "epoch": 19.033538672142367, "grad_norm": 5.996988773345947, "learning_rate": 4.8082191780821915e-08, "log_odds_chosen": 0.8495995998382568, "log_odds_ratio": -0.40232977271080017, "logits/chosen": 0.7453678250312805, "logits/rejected": 0.6797809600830078, "logps/chosen": -1.8002936840057373, "logps/rejected": -2.5162107944488525, "loss": 0.6154, "nll_loss": 0.5751926898956299, "rewards/accuracies": 0.875, "rewards/chosen": -0.18002936244010925, "rewards/margins": 0.07159171253442764, "rewards/rejected": -0.2516210675239563, "step": 6952 }, { "epoch": 19.0362765229295, "grad_norm": 5.828370094299316, "learning_rate": 4.794520547945205e-08, "log_odds_chosen": 4.100905895233154, "log_odds_ratio": -0.1310785412788391, "logits/chosen": 0.7787412405014038, "logits/rejected": 0.7939844727516174, "logps/chosen": -2.331378936767578, "logps/rejected": -6.273525714874268, "loss": 0.6041, "nll_loss": 0.5909713506698608, "rewards/accuracies": 1.0, "rewards/chosen": -0.23313789069652557, "rewards/margins": 0.3942146897315979, "rewards/rejected": -0.6273525953292847, "step": 6953 }, { "epoch": 19.039014373716633, "grad_norm": 6.999913215637207, "learning_rate": 4.780821917808219e-08, "log_odds_chosen": 1.5354444980621338, "log_odds_ratio": -0.3127385377883911, "logits/chosen": 0.7195695042610168, "logits/rejected": 0.7273013591766357, "logps/chosen": -1.8782541751861572, "logps/rejected": -3.29276704788208, "loss": 0.6024, "nll_loss": 0.5711188316345215, "rewards/accuracies": 0.75, "rewards/chosen": -0.18782541155815125, "rewards/margins": 0.14145129919052124, "rewards/rejected": -0.3292766809463501, "step": 6954 }, { "epoch": 19.041752224503764, "grad_norm": 5.464462757110596, "learning_rate": 4.7671232876712325e-08, "log_odds_chosen": 1.2220613956451416, "log_odds_ratio": -0.4301832616329193, "logits/chosen": 0.9701122045516968, "logits/rejected": 0.9477849006652832, "logps/chosen": -1.8302549123764038, "logps/rejected": -2.972057342529297, "loss": 0.5807, "nll_loss": 0.5376869440078735, "rewards/accuracies": 0.75, "rewards/chosen": -0.18302549421787262, "rewards/margins": 0.11418022215366364, "rewards/rejected": -0.29720571637153625, "step": 6955 }, { "epoch": 19.044490075290895, "grad_norm": 6.586067199707031, "learning_rate": 4.7534246575342464e-08, "log_odds_chosen": 2.754140853881836, "log_odds_ratio": -0.3910277187824249, "logits/chosen": 0.7100204825401306, "logits/rejected": 0.7507781982421875, "logps/chosen": -2.3843812942504883, "logps/rejected": -5.048277854919434, "loss": 0.7481, "nll_loss": 0.7089974880218506, "rewards/accuracies": 0.75, "rewards/chosen": -0.23843812942504883, "rewards/margins": 0.2663896679878235, "rewards/rejected": -0.5048277974128723, "step": 6956 }, { "epoch": 19.04722792607803, "grad_norm": 5.612931728363037, "learning_rate": 4.73972602739726e-08, "log_odds_chosen": 2.0421652793884277, "log_odds_ratio": -0.23848296701908112, "logits/chosen": 0.7807935476303101, "logits/rejected": 0.791172981262207, "logps/chosen": -1.8682315349578857, "logps/rejected": -3.750579595565796, "loss": 0.628, "nll_loss": 0.6041363477706909, "rewards/accuracies": 0.875, "rewards/chosen": -0.18682315945625305, "rewards/margins": 0.18823479115962982, "rewards/rejected": -0.37505799531936646, "step": 6957 }, { "epoch": 19.04996577686516, "grad_norm": 6.523785591125488, "learning_rate": 4.7260273972602735e-08, "log_odds_chosen": 0.8655847311019897, "log_odds_ratio": -0.6670442223548889, "logits/chosen": 0.7870301604270935, "logits/rejected": 0.8302517533302307, "logps/chosen": -2.1815545558929443, "logps/rejected": -2.931749105453491, "loss": 0.6464, "nll_loss": 0.5797214508056641, "rewards/accuracies": 0.75, "rewards/chosen": -0.21815544366836548, "rewards/margins": 0.0750194638967514, "rewards/rejected": -0.2931749224662781, "step": 6958 }, { "epoch": 19.052703627652292, "grad_norm": 4.723414421081543, "learning_rate": 4.7123287671232874e-08, "log_odds_chosen": 2.594416618347168, "log_odds_ratio": -0.16579166054725647, "logits/chosen": 0.7041968107223511, "logits/rejected": 0.7516059875488281, "logps/chosen": -1.639314889907837, "logps/rejected": -3.9449374675750732, "loss": 0.5226, "nll_loss": 0.5060491561889648, "rewards/accuracies": 1.0, "rewards/chosen": -0.1639314889907837, "rewards/margins": 0.2305622547864914, "rewards/rejected": -0.3944937586784363, "step": 6959 }, { "epoch": 19.055441478439427, "grad_norm": 5.720627307891846, "learning_rate": 4.698630136986301e-08, "log_odds_chosen": 2.1280136108398438, "log_odds_ratio": -0.2469581812620163, "logits/chosen": 0.5168261528015137, "logits/rejected": 0.5168455243110657, "logps/chosen": -1.7465038299560547, "logps/rejected": -3.7248294353485107, "loss": 0.6876, "nll_loss": 0.6629319190979004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1746503859758377, "rewards/margins": 0.19783258438110352, "rewards/rejected": -0.37248295545578003, "step": 6960 }, { "epoch": 19.058179329226558, "grad_norm": 6.277311325073242, "learning_rate": 4.684931506849315e-08, "log_odds_chosen": 3.9365124702453613, "log_odds_ratio": -0.3300169110298157, "logits/chosen": 0.839191198348999, "logits/rejected": 0.8653591871261597, "logps/chosen": -2.486100196838379, "logps/rejected": -6.3827900886535645, "loss": 0.7873, "nll_loss": 0.754277765750885, "rewards/accuracies": 0.75, "rewards/chosen": -0.24861003458499908, "rewards/margins": 0.3896689713001251, "rewards/rejected": -0.6382790207862854, "step": 6961 }, { "epoch": 19.06091718001369, "grad_norm": 5.883565425872803, "learning_rate": 4.6712328767123284e-08, "log_odds_chosen": 2.0624122619628906, "log_odds_ratio": -0.36783215403556824, "logits/chosen": 0.7282186150550842, "logits/rejected": 0.7020793557167053, "logps/chosen": -2.15952730178833, "logps/rejected": -4.1359357833862305, "loss": 0.6298, "nll_loss": 0.5929815173149109, "rewards/accuracies": 0.875, "rewards/chosen": -0.21595269441604614, "rewards/margins": 0.19764086604118347, "rewards/rejected": -0.413593590259552, "step": 6962 }, { "epoch": 19.06365503080082, "grad_norm": 5.5194172859191895, "learning_rate": 4.657534246575342e-08, "log_odds_chosen": 1.1883395910263062, "log_odds_ratio": -0.39016789197921753, "logits/chosen": 0.7003843784332275, "logits/rejected": 0.7085134983062744, "logps/chosen": -2.3671035766601562, "logps/rejected": -3.468254566192627, "loss": 0.7055, "nll_loss": 0.666501522064209, "rewards/accuracies": 0.875, "rewards/chosen": -0.23671038448810577, "rewards/margins": 0.11011509597301483, "rewards/rejected": -0.3468254804611206, "step": 6963 }, { "epoch": 19.066392881587955, "grad_norm": 6.776454448699951, "learning_rate": 4.643835616438356e-08, "log_odds_chosen": 1.1705152988433838, "log_odds_ratio": -0.5111011266708374, "logits/chosen": 0.7868737578392029, "logits/rejected": 0.7758309841156006, "logps/chosen": -3.051450252532959, "logps/rejected": -4.200199604034424, "loss": 0.7968, "nll_loss": 0.7457127571105957, "rewards/accuracies": 0.75, "rewards/chosen": -0.3051450550556183, "rewards/margins": 0.11487491428852081, "rewards/rejected": -0.4200199842453003, "step": 6964 }, { "epoch": 19.069130732375086, "grad_norm": 4.889810562133789, "learning_rate": 4.6301369863013694e-08, "log_odds_chosen": 2.688572645187378, "log_odds_ratio": -0.2844105362892151, "logits/chosen": 0.8602039813995361, "logits/rejected": 0.8851376175880432, "logps/chosen": -1.8797410726547241, "logps/rejected": -4.4731950759887695, "loss": 0.5801, "nll_loss": 0.551616907119751, "rewards/accuracies": 1.0, "rewards/chosen": -0.18797409534454346, "rewards/margins": 0.2593454122543335, "rewards/rejected": -0.44731950759887695, "step": 6965 }, { "epoch": 19.071868583162217, "grad_norm": 5.281848430633545, "learning_rate": 4.616438356164384e-08, "log_odds_chosen": 2.0261948108673096, "log_odds_ratio": -0.4437449276447296, "logits/chosen": 0.8816747665405273, "logits/rejected": 0.8935823440551758, "logps/chosen": -2.340874671936035, "logps/rejected": -4.224346160888672, "loss": 0.5713, "nll_loss": 0.5269321799278259, "rewards/accuracies": 0.875, "rewards/chosen": -0.23408746719360352, "rewards/margins": 0.18834719061851501, "rewards/rejected": -0.42243462800979614, "step": 6966 }, { "epoch": 19.07460643394935, "grad_norm": 6.457535743713379, "learning_rate": 4.602739726027397e-08, "log_odds_chosen": 3.1946189403533936, "log_odds_ratio": -0.2787898778915405, "logits/chosen": 0.9535381197929382, "logits/rejected": 1.0202174186706543, "logps/chosen": -2.816654682159424, "logps/rejected": -5.941159248352051, "loss": 0.7237, "nll_loss": 0.6958198547363281, "rewards/accuracies": 0.875, "rewards/chosen": -0.28166550397872925, "rewards/margins": 0.3124504089355469, "rewards/rejected": -0.5941159129142761, "step": 6967 }, { "epoch": 19.077344284736483, "grad_norm": 5.481917381286621, "learning_rate": 4.5890410958904103e-08, "log_odds_chosen": 2.3183956146240234, "log_odds_ratio": -0.19095760583877563, "logits/chosen": 0.8960970640182495, "logits/rejected": 1.018228530883789, "logps/chosen": -2.9693048000335693, "logps/rejected": -5.219270706176758, "loss": 0.7448, "nll_loss": 0.7256871461868286, "rewards/accuracies": 1.0, "rewards/chosen": -0.2969304919242859, "rewards/margins": 0.22499661147594452, "rewards/rejected": -0.5219271183013916, "step": 6968 }, { "epoch": 19.080082135523615, "grad_norm": 7.02787446975708, "learning_rate": 4.575342465753424e-08, "log_odds_chosen": 1.4532192945480347, "log_odds_ratio": -0.40320509672164917, "logits/chosen": 0.772961437702179, "logits/rejected": 0.863924503326416, "logps/chosen": -2.98789119720459, "logps/rejected": -4.418853759765625, "loss": 0.6566, "nll_loss": 0.6162499189376831, "rewards/accuracies": 0.875, "rewards/chosen": -0.2987891435623169, "rewards/margins": 0.14309626817703247, "rewards/rejected": -0.441885381937027, "step": 6969 }, { "epoch": 19.082819986310746, "grad_norm": 5.186437129974365, "learning_rate": 4.561643835616438e-08, "log_odds_chosen": 3.612942695617676, "log_odds_ratio": -0.17295783758163452, "logits/chosen": 0.8179242610931396, "logits/rejected": 0.8049758672714233, "logps/chosen": -2.0099639892578125, "logps/rejected": -5.465580940246582, "loss": 0.582, "nll_loss": 0.5647138357162476, "rewards/accuracies": 1.0, "rewards/chosen": -0.20099636912345886, "rewards/margins": 0.3455617427825928, "rewards/rejected": -0.546558141708374, "step": 6970 }, { "epoch": 19.085557837097877, "grad_norm": 9.418669700622559, "learning_rate": 4.547945205479452e-08, "log_odds_chosen": 1.9648886919021606, "log_odds_ratio": -0.39203664660453796, "logits/chosen": 0.963540256023407, "logits/rejected": 0.9027441143989563, "logps/chosen": -2.765767812728882, "logps/rejected": -4.634202003479004, "loss": 0.6744, "nll_loss": 0.6351973414421082, "rewards/accuracies": 0.75, "rewards/chosen": -0.27657678723335266, "rewards/margins": 0.18684346973896027, "rewards/rejected": -0.46342024207115173, "step": 6971 }, { "epoch": 19.08829568788501, "grad_norm": 6.1417646408081055, "learning_rate": 4.534246575342465e-08, "log_odds_chosen": 3.67728590965271, "log_odds_ratio": -0.2140551209449768, "logits/chosen": 0.8489447832107544, "logits/rejected": 0.8246586322784424, "logps/chosen": -2.197723388671875, "logps/rejected": -5.7793684005737305, "loss": 0.6854, "nll_loss": 0.6640026569366455, "rewards/accuracies": 0.875, "rewards/chosen": -0.2197723388671875, "rewards/margins": 0.358164519071579, "rewards/rejected": -0.5779368877410889, "step": 6972 }, { "epoch": 19.091033538672143, "grad_norm": 5.343761444091797, "learning_rate": 4.520547945205479e-08, "log_odds_chosen": 3.406017541885376, "log_odds_ratio": -0.26877570152282715, "logits/chosen": 0.8025913238525391, "logits/rejected": 0.8498321175575256, "logps/chosen": -2.312457799911499, "logps/rejected": -5.607300281524658, "loss": 0.664, "nll_loss": 0.6371536254882812, "rewards/accuracies": 0.875, "rewards/chosen": -0.23124578595161438, "rewards/margins": 0.3294841945171356, "rewards/rejected": -0.5607300400733948, "step": 6973 }, { "epoch": 19.093771389459274, "grad_norm": 8.024456024169922, "learning_rate": 4.506849315068493e-08, "log_odds_chosen": 1.5141379833221436, "log_odds_ratio": -0.5173624753952026, "logits/chosen": 0.8738508224487305, "logits/rejected": 0.8589478731155396, "logps/chosen": -2.8598265647888184, "logps/rejected": -4.204234600067139, "loss": 0.8956, "nll_loss": 0.8438814878463745, "rewards/accuracies": 0.75, "rewards/chosen": -0.2859826385974884, "rewards/margins": 0.1344408243894577, "rewards/rejected": -0.4204234480857849, "step": 6974 }, { "epoch": 19.096509240246405, "grad_norm": 6.673205852508545, "learning_rate": 4.493150684931506e-08, "log_odds_chosen": 1.098968505859375, "log_odds_ratio": -0.5150275230407715, "logits/chosen": 0.8108471632003784, "logits/rejected": 0.8089165687561035, "logps/chosen": -2.6353886127471924, "logps/rejected": -3.671816825866699, "loss": 0.6632, "nll_loss": 0.6117250323295593, "rewards/accuracies": 0.625, "rewards/chosen": -0.26353883743286133, "rewards/margins": 0.10364282876253128, "rewards/rejected": -0.367181658744812, "step": 6975 }, { "epoch": 19.09924709103354, "grad_norm": 5.229751110076904, "learning_rate": 4.479452054794521e-08, "log_odds_chosen": 3.9991252422332764, "log_odds_ratio": -0.05788873881101608, "logits/chosen": 0.8201625347137451, "logits/rejected": 0.8656420111656189, "logps/chosen": -1.9055774211883545, "logps/rejected": -5.651436805725098, "loss": 0.7539, "nll_loss": 0.7481287121772766, "rewards/accuracies": 1.0, "rewards/chosen": -0.19055774807929993, "rewards/margins": 0.37458592653274536, "rewards/rejected": -0.5651437044143677, "step": 6976 }, { "epoch": 19.10198494182067, "grad_norm": 5.420191764831543, "learning_rate": 4.465753424657534e-08, "log_odds_chosen": 1.9600627422332764, "log_odds_ratio": -0.22488345205783844, "logits/chosen": 0.8641549348831177, "logits/rejected": 0.9234740734100342, "logps/chosen": -2.796130895614624, "logps/rejected": -4.6570539474487305, "loss": 0.828, "nll_loss": 0.8055142164230347, "rewards/accuracies": 1.0, "rewards/chosen": -0.27961307764053345, "rewards/margins": 0.186092346906662, "rewards/rejected": -0.46570539474487305, "step": 6977 }, { "epoch": 19.104722792607802, "grad_norm": 6.085092067718506, "learning_rate": 4.452054794520547e-08, "log_odds_chosen": 2.646599292755127, "log_odds_ratio": -0.32288938760757446, "logits/chosen": 1.0725713968276978, "logits/rejected": 1.122145175933838, "logps/chosen": -2.3563637733459473, "logps/rejected": -4.946438312530518, "loss": 0.588, "nll_loss": 0.5557501316070557, "rewards/accuracies": 0.75, "rewards/chosen": -0.2356363832950592, "rewards/margins": 0.25900745391845703, "rewards/rejected": -0.49464383721351624, "step": 6978 }, { "epoch": 19.107460643394933, "grad_norm": 4.9785943031311035, "learning_rate": 4.438356164383562e-08, "log_odds_chosen": 1.378076195716858, "log_odds_ratio": -0.3235206604003906, "logits/chosen": 0.7981255054473877, "logits/rejected": 0.7909725904464722, "logps/chosen": -2.2202863693237305, "logps/rejected": -3.5046534538269043, "loss": 0.5806, "nll_loss": 0.5482754111289978, "rewards/accuracies": 0.875, "rewards/chosen": -0.22202864289283752, "rewards/margins": 0.12843669950962067, "rewards/rejected": -0.3504653573036194, "step": 6979 }, { "epoch": 19.110198494182068, "grad_norm": 5.0356245040893555, "learning_rate": 4.424657534246575e-08, "log_odds_chosen": 1.5371594429016113, "log_odds_ratio": -0.263126939535141, "logits/chosen": 0.7345585823059082, "logits/rejected": 0.7896314263343811, "logps/chosen": -2.1159605979919434, "logps/rejected": -3.5674397945404053, "loss": 0.6791, "nll_loss": 0.6527938842773438, "rewards/accuracies": 1.0, "rewards/chosen": -0.2115960568189621, "rewards/margins": 0.1451479196548462, "rewards/rejected": -0.3567439913749695, "step": 6980 }, { "epoch": 19.1129363449692, "grad_norm": 5.4510626792907715, "learning_rate": 4.410958904109589e-08, "log_odds_chosen": 1.6013820171356201, "log_odds_ratio": -0.29119595885276794, "logits/chosen": 0.7795752882957458, "logits/rejected": 0.7410104274749756, "logps/chosen": -1.6222723722457886, "logps/rejected": -3.0013387203216553, "loss": 0.6021, "nll_loss": 0.572943925857544, "rewards/accuracies": 1.0, "rewards/chosen": -0.16222722828388214, "rewards/margins": 0.13790664076805115, "rewards/rejected": -0.3001338839530945, "step": 6981 }, { "epoch": 19.11567419575633, "grad_norm": 5.834455490112305, "learning_rate": 4.397260273972603e-08, "log_odds_chosen": 2.785968780517578, "log_odds_ratio": -0.1318768858909607, "logits/chosen": 0.8454006910324097, "logits/rejected": 0.7567660808563232, "logps/chosen": -2.4997525215148926, "logps/rejected": -5.148704528808594, "loss": 0.6638, "nll_loss": 0.650580108165741, "rewards/accuracies": 1.0, "rewards/chosen": -0.2499752640724182, "rewards/margins": 0.2648952007293701, "rewards/rejected": -0.5148704648017883, "step": 6982 }, { "epoch": 19.11841204654346, "grad_norm": 7.449141979217529, "learning_rate": 4.383561643835616e-08, "log_odds_chosen": 1.998715877532959, "log_odds_ratio": -0.2933291494846344, "logits/chosen": 0.981520414352417, "logits/rejected": 1.0912261009216309, "logps/chosen": -3.3080544471740723, "logps/rejected": -5.271731853485107, "loss": 0.8247, "nll_loss": 0.7953318357467651, "rewards/accuracies": 0.75, "rewards/chosen": -0.3308054804801941, "rewards/margins": 0.19636771082878113, "rewards/rejected": -0.5271731615066528, "step": 6983 }, { "epoch": 19.121149897330596, "grad_norm": 6.348412036895752, "learning_rate": 4.36986301369863e-08, "log_odds_chosen": 1.2333956956863403, "log_odds_ratio": -0.3159193992614746, "logits/chosen": 0.7697440981864929, "logits/rejected": 0.7370537519454956, "logps/chosen": -1.9646315574645996, "logps/rejected": -3.053481101989746, "loss": 0.605, "nll_loss": 0.573421835899353, "rewards/accuracies": 1.0, "rewards/chosen": -0.19646315276622772, "rewards/margins": 0.10888493806123734, "rewards/rejected": -0.30534809827804565, "step": 6984 }, { "epoch": 19.123887748117728, "grad_norm": 8.966023445129395, "learning_rate": 4.356164383561644e-08, "log_odds_chosen": 1.4287760257720947, "log_odds_ratio": -0.5599353909492493, "logits/chosen": 1.0355174541473389, "logits/rejected": 1.0418601036071777, "logps/chosen": -3.9774317741394043, "logps/rejected": -5.363703727722168, "loss": 0.92, "nll_loss": 0.8639814257621765, "rewards/accuracies": 0.5, "rewards/chosen": -0.3977431356906891, "rewards/margins": 0.13862720131874084, "rewards/rejected": -0.5363703966140747, "step": 6985 }, { "epoch": 19.12662559890486, "grad_norm": 5.206175804138184, "learning_rate": 4.3424657534246576e-08, "log_odds_chosen": 2.2415316104888916, "log_odds_ratio": -0.22449396550655365, "logits/chosen": 0.8604610562324524, "logits/rejected": 0.9190946817398071, "logps/chosen": -2.08638072013855, "logps/rejected": -4.176513671875, "loss": 0.6009, "nll_loss": 0.5784595608711243, "rewards/accuracies": 0.875, "rewards/chosen": -0.20863808691501617, "rewards/margins": 0.20901328325271606, "rewards/rejected": -0.41765138506889343, "step": 6986 }, { "epoch": 19.129363449691994, "grad_norm": 5.348015785217285, "learning_rate": 4.328767123287671e-08, "log_odds_chosen": 2.108334541320801, "log_odds_ratio": -0.2552625238895416, "logits/chosen": 1.0211520195007324, "logits/rejected": 0.9996104836463928, "logps/chosen": -2.1735994815826416, "logps/rejected": -4.155900955200195, "loss": 0.6669, "nll_loss": 0.6413989067077637, "rewards/accuracies": 1.0, "rewards/chosen": -0.21735994517803192, "rewards/margins": 0.19823017716407776, "rewards/rejected": -0.4155901372432709, "step": 6987 }, { "epoch": 19.132101300479125, "grad_norm": 5.750479221343994, "learning_rate": 4.3150684931506854e-08, "log_odds_chosen": 1.5927461385726929, "log_odds_ratio": -0.26988768577575684, "logits/chosen": 0.8925954103469849, "logits/rejected": 0.8258638381958008, "logps/chosen": -2.223039388656616, "logps/rejected": -3.6842188835144043, "loss": 0.7255, "nll_loss": 0.6985476016998291, "rewards/accuracies": 0.875, "rewards/chosen": -0.22230394184589386, "rewards/margins": 0.1461179554462433, "rewards/rejected": -0.36842191219329834, "step": 6988 }, { "epoch": 19.134839151266256, "grad_norm": 5.558426856994629, "learning_rate": 4.3013698630136986e-08, "log_odds_chosen": 2.8106749057769775, "log_odds_ratio": -0.11208496242761612, "logits/chosen": 0.9175055623054504, "logits/rejected": 0.9966937303543091, "logps/chosen": -2.438836097717285, "logps/rejected": -5.123135566711426, "loss": 0.7134, "nll_loss": 0.7021441459655762, "rewards/accuracies": 1.0, "rewards/chosen": -0.2438836246728897, "rewards/margins": 0.2684299647808075, "rewards/rejected": -0.5123136043548584, "step": 6989 }, { "epoch": 19.137577002053387, "grad_norm": 6.56647253036499, "learning_rate": 4.287671232876712e-08, "log_odds_chosen": 1.224471926689148, "log_odds_ratio": -0.5160062313079834, "logits/chosen": 0.9112403988838196, "logits/rejected": 0.960804283618927, "logps/chosen": -2.6072263717651367, "logps/rejected": -3.7871148586273193, "loss": 0.728, "nll_loss": 0.6763554811477661, "rewards/accuracies": 0.875, "rewards/chosen": -0.26072263717651367, "rewards/margins": 0.11798885464668274, "rewards/rejected": -0.3787115216255188, "step": 6990 }, { "epoch": 19.140314852840522, "grad_norm": 5.8907790184021, "learning_rate": 4.2739726027397264e-08, "log_odds_chosen": 1.7265347242355347, "log_odds_ratio": -0.3686618208885193, "logits/chosen": 0.8928122520446777, "logits/rejected": 0.906920313835144, "logps/chosen": -1.922372579574585, "logps/rejected": -3.5508880615234375, "loss": 0.6604, "nll_loss": 0.6235048174858093, "rewards/accuracies": 0.875, "rewards/chosen": -0.1922372728586197, "rewards/margins": 0.16285155713558197, "rewards/rejected": -0.3550888001918793, "step": 6991 }, { "epoch": 19.143052703627653, "grad_norm": 6.453355312347412, "learning_rate": 4.2602739726027396e-08, "log_odds_chosen": 1.7650654315948486, "log_odds_ratio": -0.2586313486099243, "logits/chosen": 0.9365071654319763, "logits/rejected": 0.9078865647315979, "logps/chosen": -2.2317168712615967, "logps/rejected": -3.9066038131713867, "loss": 0.6704, "nll_loss": 0.6445848941802979, "rewards/accuracies": 1.0, "rewards/chosen": -0.22317169606685638, "rewards/margins": 0.1674886792898178, "rewards/rejected": -0.3906603753566742, "step": 6992 }, { "epoch": 19.145790554414784, "grad_norm": 4.888993740081787, "learning_rate": 4.246575342465753e-08, "log_odds_chosen": 2.6759533882141113, "log_odds_ratio": -0.16247089207172394, "logits/chosen": 0.8116064071655273, "logits/rejected": 0.8242050409317017, "logps/chosen": -1.54014253616333, "logps/rejected": -4.018275737762451, "loss": 0.6001, "nll_loss": 0.5838630199432373, "rewards/accuracies": 1.0, "rewards/chosen": -0.1540142446756363, "rewards/margins": 0.2478133738040924, "rewards/rejected": -0.4018275737762451, "step": 6993 }, { "epoch": 19.148528405201915, "grad_norm": 5.3086466789245605, "learning_rate": 4.232876712328767e-08, "log_odds_chosen": 1.39755117893219, "log_odds_ratio": -0.2915745973587036, "logits/chosen": 0.7672632336616516, "logits/rejected": 0.7852078676223755, "logps/chosen": -1.685881495475769, "logps/rejected": -2.8998892307281494, "loss": 0.6668, "nll_loss": 0.6376701593399048, "rewards/accuracies": 1.0, "rewards/chosen": -0.16858816146850586, "rewards/margins": 0.12140077352523804, "rewards/rejected": -0.2899889349937439, "step": 6994 }, { "epoch": 19.15126625598905, "grad_norm": 5.427674293518066, "learning_rate": 4.2191780821917806e-08, "log_odds_chosen": 1.6320836544036865, "log_odds_ratio": -0.23283591866493225, "logits/chosen": 0.7286681532859802, "logits/rejected": 0.7499625086784363, "logps/chosen": -1.8552300930023193, "logps/rejected": -3.3465592861175537, "loss": 0.6405, "nll_loss": 0.6171759963035583, "rewards/accuracies": 1.0, "rewards/chosen": -0.18552303314208984, "rewards/margins": 0.14913290739059448, "rewards/rejected": -0.3346559405326843, "step": 6995 }, { "epoch": 19.15400410677618, "grad_norm": 7.029776096343994, "learning_rate": 4.2054794520547945e-08, "log_odds_chosen": 1.8842309713363647, "log_odds_ratio": -0.2961430549621582, "logits/chosen": 0.8229170441627502, "logits/rejected": 0.8983350992202759, "logps/chosen": -3.20686936378479, "logps/rejected": -4.959008693695068, "loss": 0.7182, "nll_loss": 0.6885897517204285, "rewards/accuracies": 0.875, "rewards/chosen": -0.320686936378479, "rewards/margins": 0.17521396279335022, "rewards/rejected": -0.4959008991718292, "step": 6996 }, { "epoch": 19.156741957563312, "grad_norm": Infinity, "learning_rate": 4.2054794520547945e-08, "log_odds_chosen": 1.8219285011291504, "log_odds_ratio": -0.5777183771133423, "logits/chosen": 0.8786295056343079, "logits/rejected": 0.9030296802520752, "logps/chosen": -2.7244322299957275, "logps/rejected": -4.449680805206299, "loss": 0.621, "nll_loss": 0.5632501840591431, "rewards/accuracies": 0.875, "rewards/chosen": -0.2724432349205017, "rewards/margins": 0.17252488434314728, "rewards/rejected": -0.4449681341648102, "step": 6997 }, { "epoch": 19.159479808350444, "grad_norm": 9.565679550170898, "learning_rate": 4.191780821917808e-08, "log_odds_chosen": 2.3277149200439453, "log_odds_ratio": -0.3077493906021118, "logits/chosen": 0.7767140865325928, "logits/rejected": 0.7112665176391602, "logps/chosen": -2.7587730884552, "logps/rejected": -4.956031322479248, "loss": 0.7467, "nll_loss": 0.7159708142280579, "rewards/accuracies": 0.875, "rewards/chosen": -0.27587732672691345, "rewards/margins": 0.2197258174419403, "rewards/rejected": -0.49560311436653137, "step": 6998 }, { "epoch": 19.16221765913758, "grad_norm": 6.42960262298584, "learning_rate": 4.178082191780822e-08, "log_odds_chosen": 2.6725292205810547, "log_odds_ratio": -0.20589487254619598, "logits/chosen": 0.9674606323242188, "logits/rejected": 0.976865291595459, "logps/chosen": -2.351278066635132, "logps/rejected": -4.899126052856445, "loss": 0.7259, "nll_loss": 0.7053078413009644, "rewards/accuracies": 0.875, "rewards/chosen": -0.23512782156467438, "rewards/margins": 0.25478482246398926, "rewards/rejected": -0.48991262912750244, "step": 6999 }, { "epoch": 19.16495550992471, "grad_norm": 5.303807258605957, "learning_rate": 4.1643835616438355e-08, "log_odds_chosen": 2.1542255878448486, "log_odds_ratio": -0.35830825567245483, "logits/chosen": 0.944707453250885, "logits/rejected": 0.9477528929710388, "logps/chosen": -2.1056694984436035, "logps/rejected": -4.159881591796875, "loss": 0.6039, "nll_loss": 0.5681084394454956, "rewards/accuracies": 0.875, "rewards/chosen": -0.2105669379234314, "rewards/margins": 0.20542126893997192, "rewards/rejected": -0.4159882068634033, "step": 7000 }, { "epoch": 19.16769336071184, "grad_norm": 5.821852684020996, "learning_rate": 4.150684931506849e-08, "log_odds_chosen": 2.0062806606292725, "log_odds_ratio": -0.3056863844394684, "logits/chosen": 0.8299802541732788, "logits/rejected": 0.9040729999542236, "logps/chosen": -2.485002279281616, "logps/rejected": -4.410036087036133, "loss": 0.6509, "nll_loss": 0.6203173995018005, "rewards/accuracies": 0.875, "rewards/chosen": -0.24850022792816162, "rewards/margins": 0.19250337779521942, "rewards/rejected": -0.44100359082221985, "step": 7001 }, { "epoch": 19.170431211498972, "grad_norm": 4.923877716064453, "learning_rate": 4.136986301369863e-08, "log_odds_chosen": 2.3512587547302246, "log_odds_ratio": -0.1842050999403, "logits/chosen": 0.8655501008033752, "logits/rejected": 0.8721745014190674, "logps/chosen": -2.0635902881622314, "logps/rejected": -4.256232738494873, "loss": 0.6439, "nll_loss": 0.6255142688751221, "rewards/accuracies": 1.0, "rewards/chosen": -0.20635904371738434, "rewards/margins": 0.21926425397396088, "rewards/rejected": -0.4256232678890228, "step": 7002 }, { "epoch": 19.173169062286107, "grad_norm": 6.090606689453125, "learning_rate": 4.1232876712328764e-08, "log_odds_chosen": 2.0960183143615723, "log_odds_ratio": -0.22349226474761963, "logits/chosen": 0.8975681662559509, "logits/rejected": 0.8746397495269775, "logps/chosen": -1.697716236114502, "logps/rejected": -3.6083459854125977, "loss": 0.5532, "nll_loss": 0.5308834314346313, "rewards/accuracies": 1.0, "rewards/chosen": -0.16977162659168243, "rewards/margins": 0.19106295704841614, "rewards/rejected": -0.36083459854125977, "step": 7003 }, { "epoch": 19.175906913073238, "grad_norm": 5.3841986656188965, "learning_rate": 4.10958904109589e-08, "log_odds_chosen": 2.0745677947998047, "log_odds_ratio": -0.273648202419281, "logits/chosen": 0.9052079916000366, "logits/rejected": 0.9573733806610107, "logps/chosen": -1.7691164016723633, "logps/rejected": -3.6939377784729004, "loss": 0.5551, "nll_loss": 0.5277611613273621, "rewards/accuracies": 0.875, "rewards/chosen": -0.17691165208816528, "rewards/margins": 0.1924821436405182, "rewards/rejected": -0.36939379572868347, "step": 7004 }, { "epoch": 19.17864476386037, "grad_norm": 6.9704461097717285, "learning_rate": 4.095890410958904e-08, "log_odds_chosen": 2.0474565029144287, "log_odds_ratio": -0.28754228353500366, "logits/chosen": 0.8488600254058838, "logits/rejected": 0.8045223951339722, "logps/chosen": -2.5908656120300293, "logps/rejected": -4.531346321105957, "loss": 0.8192, "nll_loss": 0.7904343605041504, "rewards/accuracies": 1.0, "rewards/chosen": -0.259086549282074, "rewards/margins": 0.19404807686805725, "rewards/rejected": -0.4531346559524536, "step": 7005 }, { "epoch": 19.1813826146475, "grad_norm": 5.923290729522705, "learning_rate": 4.0821917808219174e-08, "log_odds_chosen": 1.4968065023422241, "log_odds_ratio": -0.2967790961265564, "logits/chosen": 0.9410054683685303, "logits/rejected": 1.01163911819458, "logps/chosen": -2.065168857574463, "logps/rejected": -3.447204828262329, "loss": 0.5556, "nll_loss": 0.5258723497390747, "rewards/accuracies": 0.875, "rewards/chosen": -0.20651690661907196, "rewards/margins": 0.13820360600948334, "rewards/rejected": -0.3447205126285553, "step": 7006 }, { "epoch": 19.184120465434635, "grad_norm": 5.764991760253906, "learning_rate": 4.068493150684931e-08, "log_odds_chosen": 2.074521541595459, "log_odds_ratio": -0.25917690992355347, "logits/chosen": 0.811000406742096, "logits/rejected": 0.8596939444541931, "logps/chosen": -2.3277995586395264, "logps/rejected": -4.3172807693481445, "loss": 0.7179, "nll_loss": 0.6919634342193604, "rewards/accuracies": 1.0, "rewards/chosen": -0.23277997970581055, "rewards/margins": 0.19894811511039734, "rewards/rejected": -0.4317280948162079, "step": 7007 }, { "epoch": 19.186858316221766, "grad_norm": 5.941504001617432, "learning_rate": 4.054794520547945e-08, "log_odds_chosen": 0.6894323229789734, "log_odds_ratio": -0.5365188717842102, "logits/chosen": 0.8919175267219543, "logits/rejected": 0.9287813901901245, "logps/chosen": -1.5084342956542969, "logps/rejected": -2.106870412826538, "loss": 0.6177, "nll_loss": 0.5640609860420227, "rewards/accuracies": 0.75, "rewards/chosen": -0.15084344148635864, "rewards/margins": 0.05984359607100487, "rewards/rejected": -0.21068702638149261, "step": 7008 }, { "epoch": 19.189596167008897, "grad_norm": 6.183027267456055, "learning_rate": 4.041095890410959e-08, "log_odds_chosen": 1.8204641342163086, "log_odds_ratio": -0.37851017713546753, "logits/chosen": 0.9336406588554382, "logits/rejected": 1.0168834924697876, "logps/chosen": -2.9555482864379883, "logps/rejected": -4.7232747077941895, "loss": 0.7505, "nll_loss": 0.7126981019973755, "rewards/accuracies": 0.75, "rewards/chosen": -0.2955548167228699, "rewards/margins": 0.17677265405654907, "rewards/rejected": -0.47232747077941895, "step": 7009 }, { "epoch": 19.19233401779603, "grad_norm": 4.577503204345703, "learning_rate": 4.027397260273972e-08, "log_odds_chosen": 4.097665786743164, "log_odds_ratio": -0.11943104863166809, "logits/chosen": 0.8464659452438354, "logits/rejected": 0.9003704786300659, "logps/chosen": -2.2291765213012695, "logps/rejected": -6.229044437408447, "loss": 0.6432, "nll_loss": 0.6312251091003418, "rewards/accuracies": 1.0, "rewards/chosen": -0.22291767597198486, "rewards/margins": 0.39998680353164673, "rewards/rejected": -0.6229044795036316, "step": 7010 }, { "epoch": 19.195071868583163, "grad_norm": 5.200440883636475, "learning_rate": 4.013698630136986e-08, "log_odds_chosen": 1.7113008499145508, "log_odds_ratio": -0.38099101185798645, "logits/chosen": 0.7941117286682129, "logits/rejected": 0.8021267652511597, "logps/chosen": -2.0889244079589844, "logps/rejected": -3.735941171646118, "loss": 0.6024, "nll_loss": 0.5642671585083008, "rewards/accuracies": 0.75, "rewards/chosen": -0.20889243483543396, "rewards/margins": 0.1647017002105713, "rewards/rejected": -0.37359413504600525, "step": 7011 }, { "epoch": 19.197809719370294, "grad_norm": 5.069666385650635, "learning_rate": 4e-08, "log_odds_chosen": 2.707368850708008, "log_odds_ratio": -0.1933947652578354, "logits/chosen": 0.9587909579277039, "logits/rejected": 0.9973883628845215, "logps/chosen": -1.750221848487854, "logps/rejected": -4.253913879394531, "loss": 0.5806, "nll_loss": 0.5612717866897583, "rewards/accuracies": 1.0, "rewards/chosen": -0.1750221848487854, "rewards/margins": 0.25036919116973877, "rewards/rejected": -0.42539137601852417, "step": 7012 }, { "epoch": 19.200547570157426, "grad_norm": 7.320611476898193, "learning_rate": 3.986301369863013e-08, "log_odds_chosen": 0.5468673706054688, "log_odds_ratio": -0.6366113424301147, "logits/chosen": 0.5106719732284546, "logits/rejected": 0.5431402921676636, "logps/chosen": -2.686166763305664, "logps/rejected": -3.177488088607788, "loss": 0.6739, "nll_loss": 0.6102325916290283, "rewards/accuracies": 0.875, "rewards/chosen": -0.2686166763305664, "rewards/margins": 0.04913214594125748, "rewards/rejected": -0.3177488446235657, "step": 7013 }, { "epoch": 19.20328542094456, "grad_norm": 5.172562599182129, "learning_rate": 3.972602739726028e-08, "log_odds_chosen": 2.3585031032562256, "log_odds_ratio": -0.17590449750423431, "logits/chosen": 0.6792213916778564, "logits/rejected": 0.770346999168396, "logps/chosen": -1.7578144073486328, "logps/rejected": -3.931499719619751, "loss": 0.5352, "nll_loss": 0.5176548361778259, "rewards/accuracies": 1.0, "rewards/chosen": -0.17578144371509552, "rewards/margins": 0.21736851334571838, "rewards/rejected": -0.3931499421596527, "step": 7014 }, { "epoch": 19.20602327173169, "grad_norm": 4.620347499847412, "learning_rate": 3.958904109589041e-08, "log_odds_chosen": 2.048264503479004, "log_odds_ratio": -0.2682587504386902, "logits/chosen": 0.9818508625030518, "logits/rejected": 0.9574608206748962, "logps/chosen": -2.1369566917419434, "logps/rejected": -4.060906410217285, "loss": 0.7226, "nll_loss": 0.695728600025177, "rewards/accuracies": 1.0, "rewards/chosen": -0.2136956751346588, "rewards/margins": 0.19239497184753418, "rewards/rejected": -0.406090646982193, "step": 7015 }, { "epoch": 19.208761122518823, "grad_norm": 4.653479099273682, "learning_rate": 3.945205479452054e-08, "log_odds_chosen": 3.1862897872924805, "log_odds_ratio": -0.15034309029579163, "logits/chosen": 0.9895406365394592, "logits/rejected": 0.9710392951965332, "logps/chosen": -1.8515866994857788, "logps/rejected": -4.86723518371582, "loss": 0.7, "nll_loss": 0.6849311590194702, "rewards/accuracies": 1.0, "rewards/chosen": -0.18515866994857788, "rewards/margins": 0.3015648126602173, "rewards/rejected": -0.48672348260879517, "step": 7016 }, { "epoch": 19.211498973305954, "grad_norm": 7.065744400024414, "learning_rate": 3.931506849315068e-08, "log_odds_chosen": 2.2861101627349854, "log_odds_ratio": -0.5306819677352905, "logits/chosen": 0.9538245797157288, "logits/rejected": 0.8979301452636719, "logps/chosen": -1.8689912557601929, "logps/rejected": -3.9383835792541504, "loss": 0.6305, "nll_loss": 0.5774059295654297, "rewards/accuracies": 0.875, "rewards/chosen": -0.1868991255760193, "rewards/margins": 0.206939235329628, "rewards/rejected": -0.3938383460044861, "step": 7017 }, { "epoch": 19.21423682409309, "grad_norm": 4.857422351837158, "learning_rate": 3.917808219178082e-08, "log_odds_chosen": 2.8254261016845703, "log_odds_ratio": -0.18183523416519165, "logits/chosen": 0.67277991771698, "logits/rejected": 0.7395251989364624, "logps/chosen": -1.8909122943878174, "logps/rejected": -4.55649995803833, "loss": 0.5237, "nll_loss": 0.5055127143859863, "rewards/accuracies": 1.0, "rewards/chosen": -0.18909123539924622, "rewards/margins": 0.26655876636505127, "rewards/rejected": -0.4556499719619751, "step": 7018 }, { "epoch": 19.21697467488022, "grad_norm": 6.636419296264648, "learning_rate": 3.904109589041096e-08, "log_odds_chosen": 0.6096845269203186, "log_odds_ratio": -0.6353341341018677, "logits/chosen": 0.6941835880279541, "logits/rejected": 0.7807505130767822, "logps/chosen": -2.7132744789123535, "logps/rejected": -3.2588346004486084, "loss": 0.6668, "nll_loss": 0.6032462120056152, "rewards/accuracies": 0.75, "rewards/chosen": -0.2713274657726288, "rewards/margins": 0.05455601587891579, "rewards/rejected": -0.3258834779262543, "step": 7019 }, { "epoch": 19.21971252566735, "grad_norm": 6.4387383460998535, "learning_rate": 3.890410958904109e-08, "log_odds_chosen": 0.5347347855567932, "log_odds_ratio": -0.5918465852737427, "logits/chosen": 0.8003182411193848, "logits/rejected": 0.7711076736450195, "logps/chosen": -1.7886431217193604, "logps/rejected": -2.201674461364746, "loss": 0.6129, "nll_loss": 0.553675651550293, "rewards/accuracies": 0.875, "rewards/chosen": -0.17886433005332947, "rewards/margins": 0.0413031168282032, "rewards/rejected": -0.22016744315624237, "step": 7020 }, { "epoch": 19.222450376454482, "grad_norm": 5.00521993637085, "learning_rate": 3.876712328767123e-08, "log_odds_chosen": 3.51357364654541, "log_odds_ratio": -0.2341960072517395, "logits/chosen": 0.6993122100830078, "logits/rejected": 0.7477480173110962, "logps/chosen": -2.1549549102783203, "logps/rejected": -5.514404296875, "loss": 0.7609, "nll_loss": 0.7375192046165466, "rewards/accuracies": 1.0, "rewards/chosen": -0.2154955118894577, "rewards/margins": 0.3359449803829193, "rewards/rejected": -0.5514404773712158, "step": 7021 }, { "epoch": 19.225188227241617, "grad_norm": 5.727939128875732, "learning_rate": 3.863013698630137e-08, "log_odds_chosen": 1.909425973892212, "log_odds_ratio": -0.2979387044906616, "logits/chosen": 0.9776355028152466, "logits/rejected": 0.9715617895126343, "logps/chosen": -1.8603880405426025, "logps/rejected": -3.6564764976501465, "loss": 0.6751, "nll_loss": 0.6452836394309998, "rewards/accuracies": 1.0, "rewards/chosen": -0.1860388219356537, "rewards/margins": 0.17960882186889648, "rewards/rejected": -0.36564764380455017, "step": 7022 }, { "epoch": 19.227926078028748, "grad_norm": 4.920504570007324, "learning_rate": 3.84931506849315e-08, "log_odds_chosen": 2.0566153526306152, "log_odds_ratio": -0.24858683347702026, "logits/chosen": 0.9303398132324219, "logits/rejected": 0.8822453022003174, "logps/chosen": -2.2436718940734863, "logps/rejected": -4.221688270568848, "loss": 0.6347, "nll_loss": 0.6098006963729858, "rewards/accuracies": 0.875, "rewards/chosen": -0.2243671864271164, "rewards/margins": 0.1978016346693039, "rewards/rejected": -0.4221688508987427, "step": 7023 }, { "epoch": 19.23066392881588, "grad_norm": 5.224585056304932, "learning_rate": 3.835616438356165e-08, "log_odds_chosen": 1.819701910018921, "log_odds_ratio": -0.24686968326568604, "logits/chosen": 0.8516120910644531, "logits/rejected": 0.8668018579483032, "logps/chosen": -1.7806602716445923, "logps/rejected": -3.4343411922454834, "loss": 0.6111, "nll_loss": 0.5863662362098694, "rewards/accuracies": 1.0, "rewards/chosen": -0.17806601524353027, "rewards/margins": 0.16536808013916016, "rewards/rejected": -0.3434341251850128, "step": 7024 }, { "epoch": 19.23340177960301, "grad_norm": 5.370528221130371, "learning_rate": 3.821917808219178e-08, "log_odds_chosen": 2.2467775344848633, "log_odds_ratio": -0.2567657232284546, "logits/chosen": 0.7684014439582825, "logits/rejected": 0.7517645359039307, "logps/chosen": -1.879908561706543, "logps/rejected": -3.9808554649353027, "loss": 0.5813, "nll_loss": 0.5555814504623413, "rewards/accuracies": 1.0, "rewards/chosen": -0.18799087405204773, "rewards/margins": 0.2100946605205536, "rewards/rejected": -0.3980855345726013, "step": 7025 }, { "epoch": 19.236139630390145, "grad_norm": 6.710797309875488, "learning_rate": 3.808219178082191e-08, "log_odds_chosen": 1.1031386852264404, "log_odds_ratio": -0.44668206572532654, "logits/chosen": 0.9519293904304504, "logits/rejected": 1.099635362625122, "logps/chosen": -2.3517584800720215, "logps/rejected": -3.343750476837158, "loss": 0.6958, "nll_loss": 0.651174008846283, "rewards/accuracies": 0.875, "rewards/chosen": -0.23517584800720215, "rewards/margins": 0.09919921308755875, "rewards/rejected": -0.3343750536441803, "step": 7026 }, { "epoch": 19.238877481177276, "grad_norm": 4.298032760620117, "learning_rate": 3.794520547945206e-08, "log_odds_chosen": 2.5368666648864746, "log_odds_ratio": -0.22614900767803192, "logits/chosen": 0.9093558192253113, "logits/rejected": 0.9041032791137695, "logps/chosen": -1.8377310037612915, "logps/rejected": -4.218477249145508, "loss": 0.6563, "nll_loss": 0.633734941482544, "rewards/accuracies": 1.0, "rewards/chosen": -0.18377311527729034, "rewards/margins": 0.23807461559772491, "rewards/rejected": -0.42184776067733765, "step": 7027 }, { "epoch": 19.241615331964407, "grad_norm": 5.8425750732421875, "learning_rate": 3.780821917808219e-08, "log_odds_chosen": 3.226212501525879, "log_odds_ratio": -0.1545342355966568, "logits/chosen": 0.7380545735359192, "logits/rejected": 0.821861743927002, "logps/chosen": -2.0552124977111816, "logps/rejected": -5.152054786682129, "loss": 0.6383, "nll_loss": 0.6228342652320862, "rewards/accuracies": 1.0, "rewards/chosen": -0.20552122592926025, "rewards/margins": 0.30968427658081055, "rewards/rejected": -0.5152055621147156, "step": 7028 }, { "epoch": 19.24435318275154, "grad_norm": 6.347040176391602, "learning_rate": 3.767123287671233e-08, "log_odds_chosen": 3.099686622619629, "log_odds_ratio": -0.24379462003707886, "logits/chosen": 0.9103974103927612, "logits/rejected": 0.9098849296569824, "logps/chosen": -2.233839988708496, "logps/rejected": -5.191949367523193, "loss": 0.6622, "nll_loss": 0.6378599405288696, "rewards/accuracies": 0.875, "rewards/chosen": -0.22338399291038513, "rewards/margins": 0.2958109676837921, "rewards/rejected": -0.5191949605941772, "step": 7029 }, { "epoch": 19.247091033538673, "grad_norm": 8.154090881347656, "learning_rate": 3.753424657534247e-08, "log_odds_chosen": 0.623532772064209, "log_odds_ratio": -0.5168135762214661, "logits/chosen": 0.8403362035751343, "logits/rejected": 0.8729803562164307, "logps/chosen": -2.43011474609375, "logps/rejected": -2.995328187942505, "loss": 0.6751, "nll_loss": 0.6233916282653809, "rewards/accuracies": 0.875, "rewards/chosen": -0.243011474609375, "rewards/margins": 0.05652135610580444, "rewards/rejected": -0.29953283071517944, "step": 7030 }, { "epoch": 19.249828884325805, "grad_norm": 5.960306167602539, "learning_rate": 3.73972602739726e-08, "log_odds_chosen": 4.192813396453857, "log_odds_ratio": -0.2795885503292084, "logits/chosen": 1.0007153749465942, "logits/rejected": 1.0826284885406494, "logps/chosen": -2.5019450187683105, "logps/rejected": -6.592099666595459, "loss": 0.6506, "nll_loss": 0.6226757764816284, "rewards/accuracies": 0.875, "rewards/chosen": -0.2501945197582245, "rewards/margins": 0.4090154469013214, "rewards/rejected": -0.6592100262641907, "step": 7031 }, { "epoch": 19.252566735112936, "grad_norm": 5.548079967498779, "learning_rate": 3.726027397260274e-08, "log_odds_chosen": 1.5235615968704224, "log_odds_ratio": -0.2367401272058487, "logits/chosen": 0.69383704662323, "logits/rejected": 0.7002387642860413, "logps/chosen": -1.8285274505615234, "logps/rejected": -3.1137242317199707, "loss": 0.5156, "nll_loss": 0.4919113516807556, "rewards/accuracies": 1.0, "rewards/chosen": -0.18285274505615234, "rewards/margins": 0.128519669175148, "rewards/rejected": -0.31137242913246155, "step": 7032 }, { "epoch": 19.255304585900067, "grad_norm": 6.216207027435303, "learning_rate": 3.712328767123288e-08, "log_odds_chosen": 1.6593804359436035, "log_odds_ratio": -0.272818386554718, "logits/chosen": 0.744111180305481, "logits/rejected": 0.6532139182090759, "logps/chosen": -2.1442160606384277, "logps/rejected": -3.6770715713500977, "loss": 0.6472, "nll_loss": 0.6199063658714294, "rewards/accuracies": 0.875, "rewards/chosen": -0.2144216001033783, "rewards/margins": 0.15328557789325714, "rewards/rejected": -0.36770716309547424, "step": 7033 }, { "epoch": 19.2580424366872, "grad_norm": 6.749453544616699, "learning_rate": 3.6986301369863016e-08, "log_odds_chosen": 1.9076957702636719, "log_odds_ratio": -0.3696579337120056, "logits/chosen": 0.9127101302146912, "logits/rejected": 0.9250122904777527, "logps/chosen": -2.668008804321289, "logps/rejected": -4.49724817276001, "loss": 0.6935, "nll_loss": 0.6565216183662415, "rewards/accuracies": 0.875, "rewards/chosen": -0.2668009102344513, "rewards/margins": 0.18292392790317535, "rewards/rejected": -0.44972485303878784, "step": 7034 }, { "epoch": 19.260780287474333, "grad_norm": 6.59414529800415, "learning_rate": 3.684931506849315e-08, "log_odds_chosen": 2.773310899734497, "log_odds_ratio": -0.25803425908088684, "logits/chosen": 0.7823748588562012, "logits/rejected": 0.8183621168136597, "logps/chosen": -2.2421979904174805, "logps/rejected": -4.871616840362549, "loss": 0.8343, "nll_loss": 0.8084905743598938, "rewards/accuracies": 0.875, "rewards/chosen": -0.22421976923942566, "rewards/margins": 0.2629419267177582, "rewards/rejected": -0.4871617257595062, "step": 7035 }, { "epoch": 19.263518138261464, "grad_norm": 5.377628803253174, "learning_rate": 3.671232876712329e-08, "log_odds_chosen": 1.321467399597168, "log_odds_ratio": -0.28841453790664673, "logits/chosen": 0.7489780187606812, "logits/rejected": 0.821260929107666, "logps/chosen": -2.028804302215576, "logps/rejected": -3.244917631149292, "loss": 0.6227, "nll_loss": 0.5938886404037476, "rewards/accuracies": 1.0, "rewards/chosen": -0.20288042724132538, "rewards/margins": 0.1216113418340683, "rewards/rejected": -0.3244917690753937, "step": 7036 }, { "epoch": 19.266255989048595, "grad_norm": 5.113180637359619, "learning_rate": 3.6575342465753426e-08, "log_odds_chosen": 2.7439169883728027, "log_odds_ratio": -0.18873471021652222, "logits/chosen": 0.9808001518249512, "logits/rejected": 1.0577319860458374, "logps/chosen": -2.5592620372772217, "logps/rejected": -5.221878528594971, "loss": 0.7428, "nll_loss": 0.7239649295806885, "rewards/accuracies": 1.0, "rewards/chosen": -0.2559261918067932, "rewards/margins": 0.26626166701316833, "rewards/rejected": -0.5221878886222839, "step": 7037 }, { "epoch": 19.26899383983573, "grad_norm": 4.572550296783447, "learning_rate": 3.643835616438356e-08, "log_odds_chosen": 2.462348699569702, "log_odds_ratio": -0.13163746893405914, "logits/chosen": 0.5905644297599792, "logits/rejected": 0.6710665225982666, "logps/chosen": -2.1124486923217773, "logps/rejected": -4.427859306335449, "loss": 0.6463, "nll_loss": 0.6331191658973694, "rewards/accuracies": 1.0, "rewards/chosen": -0.2112448811531067, "rewards/margins": 0.23154106736183167, "rewards/rejected": -0.44278591871261597, "step": 7038 }, { "epoch": 19.27173169062286, "grad_norm": 5.688426971435547, "learning_rate": 3.6301369863013697e-08, "log_odds_chosen": 0.8960092067718506, "log_odds_ratio": -0.5131751298904419, "logits/chosen": 0.6619997024536133, "logits/rejected": 0.7449167966842651, "logps/chosen": -2.4091877937316895, "logps/rejected": -3.239871025085449, "loss": 0.693, "nll_loss": 0.6417176127433777, "rewards/accuracies": 0.5, "rewards/chosen": -0.24091877043247223, "rewards/margins": 0.08306834101676941, "rewards/rejected": -0.32398712635040283, "step": 7039 }, { "epoch": 19.274469541409992, "grad_norm": 4.543326377868652, "learning_rate": 3.6164383561643835e-08, "log_odds_chosen": 2.0023152828216553, "log_odds_ratio": -0.2081070989370346, "logits/chosen": 0.9673051834106445, "logits/rejected": 0.9863492250442505, "logps/chosen": -2.217221260070801, "logps/rejected": -4.1199212074279785, "loss": 0.6301, "nll_loss": 0.6093251705169678, "rewards/accuracies": 1.0, "rewards/chosen": -0.2217220962047577, "rewards/margins": 0.19027002155780792, "rewards/rejected": -0.4119921326637268, "step": 7040 }, { "epoch": 19.277207392197127, "grad_norm": 5.2680344581604, "learning_rate": 3.602739726027397e-08, "log_odds_chosen": 2.766756296157837, "log_odds_ratio": -0.13952623307704926, "logits/chosen": 0.6455157995223999, "logits/rejected": 0.7077628970146179, "logps/chosen": -2.0058536529541016, "logps/rejected": -4.617608070373535, "loss": 0.5617, "nll_loss": 0.5477630496025085, "rewards/accuracies": 1.0, "rewards/chosen": -0.20058536529541016, "rewards/margins": 0.2611754536628723, "rewards/rejected": -0.4617607891559601, "step": 7041 }, { "epoch": 19.279945242984258, "grad_norm": 5.4018707275390625, "learning_rate": 3.5890410958904107e-08, "log_odds_chosen": 2.3699240684509277, "log_odds_ratio": -0.20784991979599, "logits/chosen": 0.741227388381958, "logits/rejected": 0.8015137910842896, "logps/chosen": -1.7069437503814697, "logps/rejected": -3.9322450160980225, "loss": 0.5549, "nll_loss": 0.5341471433639526, "rewards/accuracies": 1.0, "rewards/chosen": -0.17069438099861145, "rewards/margins": 0.22253012657165527, "rewards/rejected": -0.3932245075702667, "step": 7042 }, { "epoch": 19.28268309377139, "grad_norm": 6.2845659255981445, "learning_rate": 3.5753424657534245e-08, "log_odds_chosen": 2.3377718925476074, "log_odds_ratio": -0.15567678213119507, "logits/chosen": 0.7488459944725037, "logits/rejected": 0.7897841334342957, "logps/chosen": -2.294510841369629, "logps/rejected": -4.536370277404785, "loss": 0.7257, "nll_loss": 0.7101268172264099, "rewards/accuracies": 1.0, "rewards/chosen": -0.22945109009742737, "rewards/margins": 0.224185973405838, "rewards/rejected": -0.4536370635032654, "step": 7043 }, { "epoch": 19.28542094455852, "grad_norm": 5.977526664733887, "learning_rate": 3.5616438356164384e-08, "log_odds_chosen": 1.2665525674819946, "log_odds_ratio": -0.39223170280456543, "logits/chosen": 0.9718151092529297, "logits/rejected": 0.9413694739341736, "logps/chosen": -2.1568586826324463, "logps/rejected": -3.3107352256774902, "loss": 0.5701, "nll_loss": 0.5309191942214966, "rewards/accuracies": 0.75, "rewards/chosen": -0.2156859040260315, "rewards/margins": 0.11538765579462051, "rewards/rejected": -0.3310735523700714, "step": 7044 }, { "epoch": 19.288158795345655, "grad_norm": 10.688945770263672, "learning_rate": 3.5479452054794516e-08, "log_odds_chosen": 2.406191825866699, "log_odds_ratio": -0.602307915687561, "logits/chosen": 0.9285145998001099, "logits/rejected": 0.958592414855957, "logps/chosen": -2.795642137527466, "logps/rejected": -5.017148971557617, "loss": 0.7933, "nll_loss": 0.7330644726753235, "rewards/accuracies": 0.875, "rewards/chosen": -0.27956423163414, "rewards/margins": 0.22215062379837036, "rewards/rejected": -0.5017148852348328, "step": 7045 }, { "epoch": 19.290896646132786, "grad_norm": 5.844061851501465, "learning_rate": 3.5342465753424655e-08, "log_odds_chosen": 2.4801063537597656, "log_odds_ratio": -0.1735600233078003, "logits/chosen": 0.9631973505020142, "logits/rejected": 0.9196287989616394, "logps/chosen": -1.7189722061157227, "logps/rejected": -4.006495475769043, "loss": 0.5791, "nll_loss": 0.5616989731788635, "rewards/accuracies": 1.0, "rewards/chosen": -0.17189723253250122, "rewards/margins": 0.22875232994556427, "rewards/rejected": -0.4006495475769043, "step": 7046 }, { "epoch": 19.293634496919918, "grad_norm": 4.81687068939209, "learning_rate": 3.5205479452054794e-08, "log_odds_chosen": 2.12496018409729, "log_odds_ratio": -0.2625064551830292, "logits/chosen": 0.7693251371383667, "logits/rejected": 0.8250238299369812, "logps/chosen": -2.078883647918701, "logps/rejected": -4.017579078674316, "loss": 0.6457, "nll_loss": 0.6194674968719482, "rewards/accuracies": 0.875, "rewards/chosen": -0.20788836479187012, "rewards/margins": 0.19386953115463257, "rewards/rejected": -0.4017578959465027, "step": 7047 }, { "epoch": 19.29637234770705, "grad_norm": 5.175039291381836, "learning_rate": 3.5068493150684926e-08, "log_odds_chosen": 2.5928783416748047, "log_odds_ratio": -0.21209478378295898, "logits/chosen": 0.7920750975608826, "logits/rejected": 0.7186208963394165, "logps/chosen": -1.691293478012085, "logps/rejected": -4.127351760864258, "loss": 0.6366, "nll_loss": 0.6154196262359619, "rewards/accuracies": 1.0, "rewards/chosen": -0.16912934184074402, "rewards/margins": 0.243605837225914, "rewards/rejected": -0.4127351939678192, "step": 7048 }, { "epoch": 19.299110198494184, "grad_norm": 5.546895980834961, "learning_rate": 3.493150684931507e-08, "log_odds_chosen": 1.9203627109527588, "log_odds_ratio": -0.30044612288475037, "logits/chosen": 0.7177965641021729, "logits/rejected": 0.7673923969268799, "logps/chosen": -2.803814172744751, "logps/rejected": -4.657869338989258, "loss": 0.7038, "nll_loss": 0.6737473011016846, "rewards/accuracies": 0.875, "rewards/chosen": -0.280381441116333, "rewards/margins": 0.18540549278259277, "rewards/rejected": -0.4657869338989258, "step": 7049 }, { "epoch": 19.301848049281315, "grad_norm": 5.211030006408691, "learning_rate": 3.4794520547945204e-08, "log_odds_chosen": 1.4390215873718262, "log_odds_ratio": -0.28107312321662903, "logits/chosen": 0.6552337408065796, "logits/rejected": 0.6633884906768799, "logps/chosen": -1.9360206127166748, "logps/rejected": -3.2159628868103027, "loss": 0.6, "nll_loss": 0.5718741416931152, "rewards/accuracies": 1.0, "rewards/chosen": -0.1936020702123642, "rewards/margins": 0.12799422442913055, "rewards/rejected": -0.32159629464149475, "step": 7050 }, { "epoch": 19.304585900068446, "grad_norm": 7.6118597984313965, "learning_rate": 3.4657534246575336e-08, "log_odds_chosen": 2.0655980110168457, "log_odds_ratio": -0.2406110167503357, "logits/chosen": 1.0024453401565552, "logits/rejected": 1.0584211349487305, "logps/chosen": -2.7749733924865723, "logps/rejected": -4.710570812225342, "loss": 0.6708, "nll_loss": 0.6467251181602478, "rewards/accuracies": 0.875, "rewards/chosen": -0.2774973511695862, "rewards/margins": 0.19355973601341248, "rewards/rejected": -0.47105708718299866, "step": 7051 }, { "epoch": 19.307323750855577, "grad_norm": 4.850823879241943, "learning_rate": 3.452054794520548e-08, "log_odds_chosen": 2.548755168914795, "log_odds_ratio": -0.17886734008789062, "logits/chosen": 0.7485359907150269, "logits/rejected": 0.783893346786499, "logps/chosen": -1.9768750667572021, "logps/rejected": -4.3992486000061035, "loss": 0.6032, "nll_loss": 0.5852715969085693, "rewards/accuracies": 0.875, "rewards/chosen": -0.19768750667572021, "rewards/margins": 0.242237389087677, "rewards/rejected": -0.4399248957633972, "step": 7052 }, { "epoch": 19.310061601642712, "grad_norm": 6.338279724121094, "learning_rate": 3.4383561643835614e-08, "log_odds_chosen": 2.6534321308135986, "log_odds_ratio": -0.13165290653705597, "logits/chosen": 0.7901954650878906, "logits/rejected": 0.8328719735145569, "logps/chosen": -2.374814510345459, "logps/rejected": -4.902936935424805, "loss": 0.6187, "nll_loss": 0.6055558919906616, "rewards/accuracies": 1.0, "rewards/chosen": -0.2374814748764038, "rewards/margins": 0.2528122365474701, "rewards/rejected": -0.4902937114238739, "step": 7053 }, { "epoch": 19.312799452429843, "grad_norm": 5.145993709564209, "learning_rate": 3.424657534246575e-08, "log_odds_chosen": 3.6042234897613525, "log_odds_ratio": -0.1154567301273346, "logits/chosen": 0.8033103346824646, "logits/rejected": 0.8332640528678894, "logps/chosen": -1.663362741470337, "logps/rejected": -4.990616798400879, "loss": 0.6432, "nll_loss": 0.6316378116607666, "rewards/accuracies": 1.0, "rewards/chosen": -0.1663362681865692, "rewards/margins": 0.3327254056930542, "rewards/rejected": -0.4990617036819458, "step": 7054 }, { "epoch": 19.315537303216974, "grad_norm": 6.04508638381958, "learning_rate": 3.410958904109589e-08, "log_odds_chosen": 1.0616737604141235, "log_odds_ratio": -0.5832285284996033, "logits/chosen": 0.63456130027771, "logits/rejected": 0.622535765171051, "logps/chosen": -2.3946192264556885, "logps/rejected": -3.37385630607605, "loss": 0.7163, "nll_loss": 0.6579339504241943, "rewards/accuracies": 0.625, "rewards/chosen": -0.23946191370487213, "rewards/margins": 0.09792372584342957, "rewards/rejected": -0.3373856544494629, "step": 7055 }, { "epoch": 19.318275154004105, "grad_norm": 5.033380508422852, "learning_rate": 3.3972602739726024e-08, "log_odds_chosen": 2.228614330291748, "log_odds_ratio": -0.27010536193847656, "logits/chosen": 0.715829074382782, "logits/rejected": 0.7161762714385986, "logps/chosen": -1.9546329975128174, "logps/rejected": -3.9857325553894043, "loss": 0.6337, "nll_loss": 0.6066768169403076, "rewards/accuracies": 0.75, "rewards/chosen": -0.19546329975128174, "rewards/margins": 0.2031099498271942, "rewards/rejected": -0.39857324957847595, "step": 7056 }, { "epoch": 19.32101300479124, "grad_norm": 5.239751815795898, "learning_rate": 3.383561643835616e-08, "log_odds_chosen": 3.1695470809936523, "log_odds_ratio": -0.15238511562347412, "logits/chosen": 0.7113831043243408, "logits/rejected": 0.7233411073684692, "logps/chosen": -2.020503044128418, "logps/rejected": -5.017024040222168, "loss": 0.5342, "nll_loss": 0.519010066986084, "rewards/accuracies": 1.0, "rewards/chosen": -0.20205029845237732, "rewards/margins": 0.2996521294116974, "rewards/rejected": -0.5017024278640747, "step": 7057 }, { "epoch": 19.32375085557837, "grad_norm": 5.785078525543213, "learning_rate": 3.36986301369863e-08, "log_odds_chosen": 1.403385877609253, "log_odds_ratio": -0.3168128728866577, "logits/chosen": 1.0530468225479126, "logits/rejected": 1.015770435333252, "logps/chosen": -1.8004528284072876, "logps/rejected": -3.0585720539093018, "loss": 0.5236, "nll_loss": 0.49191814661026, "rewards/accuracies": 0.875, "rewards/chosen": -0.18004529178142548, "rewards/margins": 0.12581193447113037, "rewards/rejected": -0.30585721135139465, "step": 7058 }, { "epoch": 19.326488706365502, "grad_norm": 5.213419437408447, "learning_rate": 3.356164383561644e-08, "log_odds_chosen": 2.3196094036102295, "log_odds_ratio": -0.21820110082626343, "logits/chosen": 0.7334664463996887, "logits/rejected": 0.7806510925292969, "logps/chosen": -1.617113709449768, "logps/rejected": -3.7417874336242676, "loss": 0.5379, "nll_loss": 0.5161243677139282, "rewards/accuracies": 1.0, "rewards/chosen": -0.16171136498451233, "rewards/margins": 0.21246738731861115, "rewards/rejected": -0.37417876720428467, "step": 7059 }, { "epoch": 19.329226557152634, "grad_norm": 4.824667453765869, "learning_rate": 3.342465753424657e-08, "log_odds_chosen": 3.563135862350464, "log_odds_ratio": -0.07430464029312134, "logits/chosen": 0.9192962646484375, "logits/rejected": 0.9172980785369873, "logps/chosen": -1.7614221572875977, "logps/rejected": -5.098933219909668, "loss": 0.7137, "nll_loss": 0.7062480449676514, "rewards/accuracies": 1.0, "rewards/chosen": -0.17614221572875977, "rewards/margins": 0.3337511122226715, "rewards/rejected": -0.5098932981491089, "step": 7060 }, { "epoch": 19.33196440793977, "grad_norm": 4.993113994598389, "learning_rate": 3.328767123287671e-08, "log_odds_chosen": 1.5940754413604736, "log_odds_ratio": -0.2684188485145569, "logits/chosen": 0.7337287664413452, "logits/rejected": 0.7296869158744812, "logps/chosen": -2.0815112590789795, "logps/rejected": -3.528146266937256, "loss": 0.607, "nll_loss": 0.5801823139190674, "rewards/accuracies": 0.875, "rewards/chosen": -0.20815111696720123, "rewards/margins": 0.1446635127067566, "rewards/rejected": -0.352814644575119, "step": 7061 }, { "epoch": 19.3347022587269, "grad_norm": 5.990269660949707, "learning_rate": 3.315068493150685e-08, "log_odds_chosen": 1.5297385454177856, "log_odds_ratio": -0.3431442975997925, "logits/chosen": 0.57501220703125, "logits/rejected": 0.6064679622650146, "logps/chosen": -1.9960339069366455, "logps/rejected": -3.429133176803589, "loss": 0.6331, "nll_loss": 0.5988113284111023, "rewards/accuracies": 0.875, "rewards/chosen": -0.19960340857505798, "rewards/margins": 0.14330992102622986, "rewards/rejected": -0.34291332960128784, "step": 7062 }, { "epoch": 19.33744010951403, "grad_norm": 5.394108772277832, "learning_rate": 3.301369863013698e-08, "log_odds_chosen": 1.6697252988815308, "log_odds_ratio": -0.25299978256225586, "logits/chosen": 0.7180152535438538, "logits/rejected": 0.6989168524742126, "logps/chosen": -1.815361738204956, "logps/rejected": -3.3650918006896973, "loss": 0.5781, "nll_loss": 0.5528115034103394, "rewards/accuracies": 1.0, "rewards/chosen": -0.18153618276119232, "rewards/margins": 0.15497303009033203, "rewards/rejected": -0.33650922775268555, "step": 7063 }, { "epoch": 19.340177960301162, "grad_norm": 6.190735816955566, "learning_rate": 3.287671232876712e-08, "log_odds_chosen": 2.709615468978882, "log_odds_ratio": -0.16278935968875885, "logits/chosen": 0.5970456600189209, "logits/rejected": 0.5639387369155884, "logps/chosen": -1.5004401206970215, "logps/rejected": -4.008101463317871, "loss": 0.6035, "nll_loss": 0.5871895551681519, "rewards/accuracies": 1.0, "rewards/chosen": -0.1500440090894699, "rewards/margins": 0.25076615810394287, "rewards/rejected": -0.4008101522922516, "step": 7064 }, { "epoch": 19.342915811088297, "grad_norm": 5.1888651847839355, "learning_rate": 3.273972602739726e-08, "log_odds_chosen": 3.0738446712493896, "log_odds_ratio": -0.31045544147491455, "logits/chosen": 0.9301625490188599, "logits/rejected": 0.9422904253005981, "logps/chosen": -2.3419766426086426, "logps/rejected": -5.2852091789245605, "loss": 0.6014, "nll_loss": 0.570350170135498, "rewards/accuracies": 0.875, "rewards/chosen": -0.23419766128063202, "rewards/margins": 0.29432326555252075, "rewards/rejected": -0.528520941734314, "step": 7065 }, { "epoch": 19.345653661875428, "grad_norm": 5.779733657836914, "learning_rate": 3.260273972602739e-08, "log_odds_chosen": 3.6225030422210693, "log_odds_ratio": -0.1470252424478531, "logits/chosen": 0.7129209041595459, "logits/rejected": 0.7114392518997192, "logps/chosen": -1.9154553413391113, "logps/rejected": -5.211060523986816, "loss": 0.6116, "nll_loss": 0.5969209671020508, "rewards/accuracies": 1.0, "rewards/chosen": -0.1915455460548401, "rewards/margins": 0.3295605182647705, "rewards/rejected": -0.5211060643196106, "step": 7066 }, { "epoch": 19.34839151266256, "grad_norm": 5.245322227478027, "learning_rate": 3.246575342465753e-08, "log_odds_chosen": 1.827195405960083, "log_odds_ratio": -0.2374165952205658, "logits/chosen": 1.1181679964065552, "logits/rejected": 1.161781907081604, "logps/chosen": -1.7640469074249268, "logps/rejected": -3.389101028442383, "loss": 0.5619, "nll_loss": 0.5381389260292053, "rewards/accuracies": 1.0, "rewards/chosen": -0.176404669880867, "rewards/margins": 0.16250543296337128, "rewards/rejected": -0.33891013264656067, "step": 7067 }, { "epoch": 19.351129363449694, "grad_norm": 4.989710330963135, "learning_rate": 3.232876712328767e-08, "log_odds_chosen": 3.340329885482788, "log_odds_ratio": -0.14393293857574463, "logits/chosen": 1.0460082292556763, "logits/rejected": 1.0855228900909424, "logps/chosen": -2.487834930419922, "logps/rejected": -5.695789337158203, "loss": 0.6665, "nll_loss": 0.6521263122558594, "rewards/accuracies": 1.0, "rewards/chosen": -0.24878349900245667, "rewards/margins": 0.320795476436615, "rewards/rejected": -0.569579005241394, "step": 7068 }, { "epoch": 19.353867214236825, "grad_norm": 6.909136772155762, "learning_rate": 3.219178082191781e-08, "log_odds_chosen": 2.196917772293091, "log_odds_ratio": -0.3128502368927002, "logits/chosen": 0.8708579540252686, "logits/rejected": 0.9396109580993652, "logps/chosen": -3.104931592941284, "logps/rejected": -5.256473541259766, "loss": 0.8097, "nll_loss": 0.7783753871917725, "rewards/accuracies": 0.875, "rewards/chosen": -0.3104931712150574, "rewards/margins": 0.21515420079231262, "rewards/rejected": -0.5256473422050476, "step": 7069 }, { "epoch": 19.356605065023956, "grad_norm": 5.5725932121276855, "learning_rate": 3.205479452054794e-08, "log_odds_chosen": 2.5539093017578125, "log_odds_ratio": -0.15652549266815186, "logits/chosen": 0.8070346713066101, "logits/rejected": 0.8492075204849243, "logps/chosen": -2.050811529159546, "logps/rejected": -4.4806976318359375, "loss": 0.5827, "nll_loss": 0.5670496225357056, "rewards/accuracies": 1.0, "rewards/chosen": -0.20508116483688354, "rewards/margins": 0.24298863112926483, "rewards/rejected": -0.44806981086730957, "step": 7070 }, { "epoch": 19.359342915811087, "grad_norm": 4.790813446044922, "learning_rate": 3.191780821917808e-08, "log_odds_chosen": 3.9815711975097656, "log_odds_ratio": -0.16749131679534912, "logits/chosen": 0.9337356090545654, "logits/rejected": 0.9804641008377075, "logps/chosen": -1.9861687421798706, "logps/rejected": -5.839798450469971, "loss": 0.6961, "nll_loss": 0.6793136596679688, "rewards/accuracies": 0.875, "rewards/chosen": -0.1986168771982193, "rewards/margins": 0.3853629231452942, "rewards/rejected": -0.5839798450469971, "step": 7071 }, { "epoch": 19.362080766598222, "grad_norm": 6.689871311187744, "learning_rate": 3.178082191780822e-08, "log_odds_chosen": 1.1564797163009644, "log_odds_ratio": -0.4057447612285614, "logits/chosen": 0.8746067881584167, "logits/rejected": 0.9346021413803101, "logps/chosen": -2.5171475410461426, "logps/rejected": -3.61017107963562, "loss": 0.5819, "nll_loss": 0.541297435760498, "rewards/accuracies": 0.75, "rewards/chosen": -0.2517147660255432, "rewards/margins": 0.1093023493885994, "rewards/rejected": -0.3610171377658844, "step": 7072 }, { "epoch": 19.364818617385353, "grad_norm": 4.855797290802002, "learning_rate": 3.164383561643835e-08, "log_odds_chosen": 3.1989457607269287, "log_odds_ratio": -0.14190945029258728, "logits/chosen": 0.746906578540802, "logits/rejected": 0.7347415685653687, "logps/chosen": -2.424983024597168, "logps/rejected": -5.480145454406738, "loss": 0.5648, "nll_loss": 0.5505861043930054, "rewards/accuracies": 1.0, "rewards/chosen": -0.24249830842018127, "rewards/margins": 0.30551621317863464, "rewards/rejected": -0.5480145215988159, "step": 7073 }, { "epoch": 19.367556468172484, "grad_norm": 6.2280049324035645, "learning_rate": 3.1506849315068497e-08, "log_odds_chosen": 2.5100865364074707, "log_odds_ratio": -0.2660168409347534, "logits/chosen": 1.047157883644104, "logits/rejected": 1.069960355758667, "logps/chosen": -2.3036580085754395, "logps/rejected": -4.71466588973999, "loss": 0.6445, "nll_loss": 0.6178500652313232, "rewards/accuracies": 0.875, "rewards/chosen": -0.2303657829761505, "rewards/margins": 0.24110080301761627, "rewards/rejected": -0.471466600894928, "step": 7074 }, { "epoch": 19.370294318959616, "grad_norm": 5.392657279968262, "learning_rate": 3.136986301369863e-08, "log_odds_chosen": 2.8444745540618896, "log_odds_ratio": -0.21335236728191376, "logits/chosen": 0.8912383317947388, "logits/rejected": 0.9606644511222839, "logps/chosen": -2.2278754711151123, "logps/rejected": -4.883322715759277, "loss": 0.6363, "nll_loss": 0.6149939298629761, "rewards/accuracies": 0.875, "rewards/chosen": -0.22278755903244019, "rewards/margins": 0.26554471254348755, "rewards/rejected": -0.48833224177360535, "step": 7075 }, { "epoch": 19.37303216974675, "grad_norm": 5.445693016052246, "learning_rate": 3.123287671232877e-08, "log_odds_chosen": 2.515795946121216, "log_odds_ratio": -0.2359423041343689, "logits/chosen": 0.9418731331825256, "logits/rejected": 0.9636925458908081, "logps/chosen": -2.402015209197998, "logps/rejected": -4.820485591888428, "loss": 0.6309, "nll_loss": 0.6073205471038818, "rewards/accuracies": 1.0, "rewards/chosen": -0.24020151793956757, "rewards/margins": 0.24184706807136536, "rewards/rejected": -0.48204857110977173, "step": 7076 }, { "epoch": 19.37577002053388, "grad_norm": 5.624946117401123, "learning_rate": 3.10958904109589e-08, "log_odds_chosen": 2.1291518211364746, "log_odds_ratio": -0.2050013691186905, "logits/chosen": 0.9737167358398438, "logits/rejected": 1.0862046480178833, "logps/chosen": -2.179049491882324, "logps/rejected": -4.133727550506592, "loss": 0.5888, "nll_loss": 0.5682721734046936, "rewards/accuracies": 0.875, "rewards/chosen": -0.2179049551486969, "rewards/margins": 0.1954677850008011, "rewards/rejected": -0.4133727550506592, "step": 7077 }, { "epoch": 19.378507871321013, "grad_norm": 7.020151138305664, "learning_rate": 3.095890410958904e-08, "log_odds_chosen": 1.0350532531738281, "log_odds_ratio": -0.5152268409729004, "logits/chosen": 0.8844622373580933, "logits/rejected": 0.9113777279853821, "logps/chosen": -2.5660645961761475, "logps/rejected": -3.5886192321777344, "loss": 0.7847, "nll_loss": 0.7331416606903076, "rewards/accuracies": 0.625, "rewards/chosen": -0.25660645961761475, "rewards/margins": 0.1022554561495781, "rewards/rejected": -0.35886192321777344, "step": 7078 }, { "epoch": 19.381245722108144, "grad_norm": 5.493243217468262, "learning_rate": 3.082191780821918e-08, "log_odds_chosen": 0.869867205619812, "log_odds_ratio": -0.4902566075325012, "logits/chosen": 0.6542472839355469, "logits/rejected": 0.7076250910758972, "logps/chosen": -2.642336368560791, "logps/rejected": -3.4563450813293457, "loss": 0.6433, "nll_loss": 0.5942946076393127, "rewards/accuracies": 0.625, "rewards/chosen": -0.26423364877700806, "rewards/margins": 0.08140087872743607, "rewards/rejected": -0.3456345200538635, "step": 7079 }, { "epoch": 19.38398357289528, "grad_norm": 5.070518493652344, "learning_rate": 3.0684931506849316e-08, "log_odds_chosen": 1.6450722217559814, "log_odds_ratio": -0.3000521659851074, "logits/chosen": 0.9089624881744385, "logits/rejected": 0.9533964991569519, "logps/chosen": -1.893967628479004, "logps/rejected": -3.406078815460205, "loss": 0.5709, "nll_loss": 0.540895402431488, "rewards/accuracies": 0.875, "rewards/chosen": -0.18939676880836487, "rewards/margins": 0.15121111273765564, "rewards/rejected": -0.3406078815460205, "step": 7080 }, { "epoch": 19.38672142368241, "grad_norm": 6.104625701904297, "learning_rate": 3.054794520547945e-08, "log_odds_chosen": 2.3478946685791016, "log_odds_ratio": -0.2288765013217926, "logits/chosen": 1.0191543102264404, "logits/rejected": 1.0653568506240845, "logps/chosen": -2.341224193572998, "logps/rejected": -4.5851359367370605, "loss": 0.7451, "nll_loss": 0.7222574949264526, "rewards/accuracies": 0.875, "rewards/chosen": -0.23412242531776428, "rewards/margins": 0.2243911772966385, "rewards/rejected": -0.4585135579109192, "step": 7081 }, { "epoch": 19.38945927446954, "grad_norm": 5.880022048950195, "learning_rate": 3.041095890410959e-08, "log_odds_chosen": 2.3437962532043457, "log_odds_ratio": -0.26467207074165344, "logits/chosen": 0.8368462920188904, "logits/rejected": 0.9073938131332397, "logps/chosen": -2.3644700050354004, "logps/rejected": -4.616013526916504, "loss": 0.6846, "nll_loss": 0.6581120491027832, "rewards/accuracies": 1.0, "rewards/chosen": -0.23644699156284332, "rewards/margins": 0.2251543402671814, "rewards/rejected": -0.4616013169288635, "step": 7082 }, { "epoch": 19.392197125256672, "grad_norm": 5.692754745483398, "learning_rate": 3.0273972602739726e-08, "log_odds_chosen": 2.763937473297119, "log_odds_ratio": -0.18272270262241364, "logits/chosen": 0.7448593378067017, "logits/rejected": 0.8046771883964539, "logps/chosen": -2.525545597076416, "logps/rejected": -5.197112083435059, "loss": 0.7362, "nll_loss": 0.717896044254303, "rewards/accuracies": 1.0, "rewards/chosen": -0.2525545656681061, "rewards/margins": 0.2671566605567932, "rewards/rejected": -0.5197112560272217, "step": 7083 }, { "epoch": 19.394934976043807, "grad_norm": 5.843564510345459, "learning_rate": 3.0136986301369865e-08, "log_odds_chosen": 3.648327350616455, "log_odds_ratio": -0.04193494841456413, "logits/chosen": 0.9222757816314697, "logits/rejected": 0.932604968547821, "logps/chosen": -1.7842319011688232, "logps/rejected": -5.224109649658203, "loss": 0.6033, "nll_loss": 0.5991452932357788, "rewards/accuracies": 1.0, "rewards/chosen": -0.1784231811761856, "rewards/margins": 0.3439877927303314, "rewards/rejected": -0.5224109888076782, "step": 7084 }, { "epoch": 19.397672826830938, "grad_norm": 5.256205081939697, "learning_rate": 3e-08, "log_odds_chosen": 3.29455304145813, "log_odds_ratio": -0.06347452849149704, "logits/chosen": 0.7004236578941345, "logits/rejected": 0.6054282188415527, "logps/chosen": -1.9933719635009766, "logps/rejected": -5.138467788696289, "loss": 0.7699, "nll_loss": 0.7635896801948547, "rewards/accuracies": 1.0, "rewards/chosen": -0.1993371993303299, "rewards/margins": 0.31450963020324707, "rewards/rejected": -0.5138468146324158, "step": 7085 }, { "epoch": 19.40041067761807, "grad_norm": 6.140570163726807, "learning_rate": 2.9863013698630136e-08, "log_odds_chosen": 2.478041410446167, "log_odds_ratio": -0.2532329857349396, "logits/chosen": 0.8430967330932617, "logits/rejected": 0.9119123816490173, "logps/chosen": -2.5755856037139893, "logps/rejected": -4.965912818908691, "loss": 0.6168, "nll_loss": 0.5914377570152283, "rewards/accuracies": 0.875, "rewards/chosen": -0.25755858421325684, "rewards/margins": 0.23903274536132812, "rewards/rejected": -0.49659135937690735, "step": 7086 }, { "epoch": 19.4031485284052, "grad_norm": 6.561319828033447, "learning_rate": 2.9726027397260275e-08, "log_odds_chosen": 2.1544101238250732, "log_odds_ratio": -0.2249300181865692, "logits/chosen": 0.9353585839271545, "logits/rejected": 1.0086227655410767, "logps/chosen": -2.361492156982422, "logps/rejected": -4.409337520599365, "loss": 0.6966, "nll_loss": 0.6741313934326172, "rewards/accuracies": 1.0, "rewards/chosen": -0.23614923655986786, "rewards/margins": 0.2047845721244812, "rewards/rejected": -0.44093379378318787, "step": 7087 }, { "epoch": 19.405886379192335, "grad_norm": 5.106583118438721, "learning_rate": 2.9589041095890407e-08, "log_odds_chosen": 2.6036581993103027, "log_odds_ratio": -0.211882546544075, "logits/chosen": 0.824610710144043, "logits/rejected": 0.8825144171714783, "logps/chosen": -2.4145240783691406, "logps/rejected": -4.909489631652832, "loss": 0.6906, "nll_loss": 0.6694355607032776, "rewards/accuracies": 0.875, "rewards/chosen": -0.2414524257183075, "rewards/margins": 0.24949653446674347, "rewards/rejected": -0.49094897508621216, "step": 7088 }, { "epoch": 19.408624229979466, "grad_norm": 5.4068498611450195, "learning_rate": 2.9452054794520546e-08, "log_odds_chosen": 1.3634107112884521, "log_odds_ratio": -0.27579545974731445, "logits/chosen": 0.6347370743751526, "logits/rejected": 0.6512196063995361, "logps/chosen": -2.0914788246154785, "logps/rejected": -3.317111015319824, "loss": 0.6749, "nll_loss": 0.6472874879837036, "rewards/accuracies": 1.0, "rewards/chosen": -0.20914790034294128, "rewards/margins": 0.1225631982088089, "rewards/rejected": -0.331711083650589, "step": 7089 }, { "epoch": 19.411362080766597, "grad_norm": 6.966084957122803, "learning_rate": 2.9315068493150685e-08, "log_odds_chosen": 1.5376331806182861, "log_odds_ratio": -0.25621297955513, "logits/chosen": 0.9205570220947266, "logits/rejected": 0.9240586757659912, "logps/chosen": -2.6446778774261475, "logps/rejected": -4.0639967918396, "loss": 0.6662, "nll_loss": 0.6406257748603821, "rewards/accuracies": 1.0, "rewards/chosen": -0.2644678056240082, "rewards/margins": 0.14193187654018402, "rewards/rejected": -0.406399667263031, "step": 7090 }, { "epoch": 19.41409993155373, "grad_norm": 5.1879119873046875, "learning_rate": 2.917808219178082e-08, "log_odds_chosen": 1.8499053716659546, "log_odds_ratio": -0.23940686881542206, "logits/chosen": 0.9432412385940552, "logits/rejected": 0.961164116859436, "logps/chosen": -2.142341136932373, "logps/rejected": -3.830801010131836, "loss": 0.5538, "nll_loss": 0.5298831462860107, "rewards/accuracies": 0.875, "rewards/chosen": -0.2142341285943985, "rewards/margins": 0.1688459813594818, "rewards/rejected": -0.38308006525039673, "step": 7091 }, { "epoch": 19.416837782340863, "grad_norm": 5.36239767074585, "learning_rate": 2.904109589041096e-08, "log_odds_chosen": 2.9371180534362793, "log_odds_ratio": -0.22055432200431824, "logits/chosen": 0.5838105082511902, "logits/rejected": 0.6152318716049194, "logps/chosen": -1.9924044609069824, "logps/rejected": -4.774919033050537, "loss": 0.5376, "nll_loss": 0.5155914425849915, "rewards/accuracies": 1.0, "rewards/chosen": -0.19924044609069824, "rewards/margins": 0.2782514691352844, "rewards/rejected": -0.47749191522598267, "step": 7092 }, { "epoch": 19.419575633127995, "grad_norm": 7.826541900634766, "learning_rate": 2.890410958904109e-08, "log_odds_chosen": 1.1736180782318115, "log_odds_ratio": -0.4606468677520752, "logits/chosen": 0.7100982666015625, "logits/rejected": 0.6720292568206787, "logps/chosen": -2.5216617584228516, "logps/rejected": -3.660940647125244, "loss": 0.6179, "nll_loss": 0.5718672275543213, "rewards/accuracies": 0.875, "rewards/chosen": -0.25216615200042725, "rewards/margins": 0.11392788589000702, "rewards/rejected": -0.36609405279159546, "step": 7093 }, { "epoch": 19.422313483915126, "grad_norm": 5.770288467407227, "learning_rate": 2.876712328767123e-08, "log_odds_chosen": 3.4778828620910645, "log_odds_ratio": -0.12025389820337296, "logits/chosen": 0.8871182799339294, "logits/rejected": 0.9186630845069885, "logps/chosen": -1.928647518157959, "logps/rejected": -5.2350263595581055, "loss": 0.6299, "nll_loss": 0.6178508996963501, "rewards/accuracies": 1.0, "rewards/chosen": -0.19286474585533142, "rewards/margins": 0.33063793182373047, "rewards/rejected": -0.5235026478767395, "step": 7094 }, { "epoch": 19.42505133470226, "grad_norm": 5.191310882568359, "learning_rate": 2.863013698630137e-08, "log_odds_chosen": 1.2229892015457153, "log_odds_ratio": -0.350369930267334, "logits/chosen": 0.8365023136138916, "logits/rejected": 0.8591704368591309, "logps/chosen": -2.0404250621795654, "logps/rejected": -3.1552462577819824, "loss": 0.6481, "nll_loss": 0.6130391359329224, "rewards/accuracies": 1.0, "rewards/chosen": -0.2040424942970276, "rewards/margins": 0.11148209124803543, "rewards/rejected": -0.3155246078968048, "step": 7095 }, { "epoch": 19.42778918548939, "grad_norm": 5.229720592498779, "learning_rate": 2.8493150684931505e-08, "log_odds_chosen": 1.5753546953201294, "log_odds_ratio": -0.26948633790016174, "logits/chosen": 0.7090948820114136, "logits/rejected": 0.7019912004470825, "logps/chosen": -1.9689983129501343, "logps/rejected": -3.447786331176758, "loss": 0.5496, "nll_loss": 0.5226635932922363, "rewards/accuracies": 1.0, "rewards/chosen": -0.19689983129501343, "rewards/margins": 0.14787881076335907, "rewards/rejected": -0.3447786271572113, "step": 7096 }, { "epoch": 19.430527036276523, "grad_norm": 6.199950695037842, "learning_rate": 2.8356164383561644e-08, "log_odds_chosen": 3.3931503295898438, "log_odds_ratio": -0.1485980749130249, "logits/chosen": 0.9230238199234009, "logits/rejected": 1.0079007148742676, "logps/chosen": -2.496126174926758, "logps/rejected": -5.696523666381836, "loss": 0.6662, "nll_loss": 0.6512951850891113, "rewards/accuracies": 0.875, "rewards/chosen": -0.24961259961128235, "rewards/margins": 0.3200397789478302, "rewards/rejected": -0.5696523785591125, "step": 7097 }, { "epoch": 19.433264887063654, "grad_norm": 5.415976047515869, "learning_rate": 2.8219178082191782e-08, "log_odds_chosen": 3.583256244659424, "log_odds_ratio": -0.06651200354099274, "logits/chosen": 1.1827502250671387, "logits/rejected": 1.1770274639129639, "logps/chosen": -2.3580145835876465, "logps/rejected": -5.8301591873168945, "loss": 0.6975, "nll_loss": 0.6908564567565918, "rewards/accuracies": 1.0, "rewards/chosen": -0.23580147325992584, "rewards/margins": 0.3472144603729248, "rewards/rejected": -0.5830159187316895, "step": 7098 }, { "epoch": 19.43600273785079, "grad_norm": 5.789994239807129, "learning_rate": 2.8082191780821915e-08, "log_odds_chosen": 2.370704412460327, "log_odds_ratio": -0.242689847946167, "logits/chosen": 1.2045824527740479, "logits/rejected": 1.2518625259399414, "logps/chosen": -2.409590244293213, "logps/rejected": -4.631979942321777, "loss": 0.5936, "nll_loss": 0.5693804621696472, "rewards/accuracies": 1.0, "rewards/chosen": -0.2409590184688568, "rewards/margins": 0.22223900258541107, "rewards/rejected": -0.4631980061531067, "step": 7099 }, { "epoch": 19.43874058863792, "grad_norm": 4.609948635101318, "learning_rate": 2.7945205479452053e-08, "log_odds_chosen": 2.105870246887207, "log_odds_ratio": -0.1863602101802826, "logits/chosen": 0.857387900352478, "logits/rejected": 0.9359465837478638, "logps/chosen": -2.1154215335845947, "logps/rejected": -4.104156017303467, "loss": 0.5902, "nll_loss": 0.5715861320495605, "rewards/accuracies": 1.0, "rewards/chosen": -0.21154214441776276, "rewards/margins": 0.19887344539165497, "rewards/rejected": -0.4104155898094177, "step": 7100 }, { "epoch": 19.44147843942505, "grad_norm": 6.357501029968262, "learning_rate": 2.7808219178082192e-08, "log_odds_chosen": 2.780032157897949, "log_odds_ratio": -0.23758310079574585, "logits/chosen": 0.6037949919700623, "logits/rejected": 0.6363964080810547, "logps/chosen": -1.949876070022583, "logps/rejected": -4.556743621826172, "loss": 0.6259, "nll_loss": 0.6021827459335327, "rewards/accuracies": 1.0, "rewards/chosen": -0.19498760998249054, "rewards/margins": 0.2606867253780365, "rewards/rejected": -0.45567435026168823, "step": 7101 }, { "epoch": 19.444216290212182, "grad_norm": 6.39138126373291, "learning_rate": 2.7671232876712328e-08, "log_odds_chosen": 1.6350492238998413, "log_odds_ratio": -0.2874336838722229, "logits/chosen": 1.0147435665130615, "logits/rejected": 0.9788340926170349, "logps/chosen": -2.0724716186523438, "logps/rejected": -3.5954933166503906, "loss": 0.6012, "nll_loss": 0.5724079608917236, "rewards/accuracies": 1.0, "rewards/chosen": -0.20724718272686005, "rewards/margins": 0.15230217576026917, "rewards/rejected": -0.359549343585968, "step": 7102 }, { "epoch": 19.446954140999317, "grad_norm": 5.665502071380615, "learning_rate": 2.7534246575342467e-08, "log_odds_chosen": 1.7682480812072754, "log_odds_ratio": -0.21036680042743683, "logits/chosen": 0.6847739219665527, "logits/rejected": 0.6905270218849182, "logps/chosen": -1.9558601379394531, "logps/rejected": -3.5873284339904785, "loss": 0.5442, "nll_loss": 0.523163914680481, "rewards/accuracies": 1.0, "rewards/chosen": -0.19558602571487427, "rewards/margins": 0.16314685344696045, "rewards/rejected": -0.3587328791618347, "step": 7103 }, { "epoch": 19.449691991786448, "grad_norm": 6.983078956604004, "learning_rate": 2.73972602739726e-08, "log_odds_chosen": 1.165693759918213, "log_odds_ratio": -0.3751473128795624, "logits/chosen": 1.070408582687378, "logits/rejected": 1.1003828048706055, "logps/chosen": -2.5122733116149902, "logps/rejected": -3.585641860961914, "loss": 0.7553, "nll_loss": 0.7177387475967407, "rewards/accuracies": 0.875, "rewards/chosen": -0.25122731924057007, "rewards/margins": 0.1073368713259697, "rewards/rejected": -0.35856419801712036, "step": 7104 }, { "epoch": 19.45242984257358, "grad_norm": 5.05204963684082, "learning_rate": 2.7260273972602738e-08, "log_odds_chosen": 2.2431321144104004, "log_odds_ratio": -0.28685128688812256, "logits/chosen": 0.8283282518386841, "logits/rejected": 0.8267799615859985, "logps/chosen": -2.245579242706299, "logps/rejected": -4.430307388305664, "loss": 0.6843, "nll_loss": 0.655587911605835, "rewards/accuracies": 0.875, "rewards/chosen": -0.22455792129039764, "rewards/margins": 0.21847277879714966, "rewards/rejected": -0.4430307149887085, "step": 7105 }, { "epoch": 19.45516769336071, "grad_norm": 5.309350967407227, "learning_rate": 2.7123287671232877e-08, "log_odds_chosen": 3.216264247894287, "log_odds_ratio": -0.2174530178308487, "logits/chosen": 0.8922173380851746, "logits/rejected": 0.9241556525230408, "logps/chosen": -1.7769855260849, "logps/rejected": -4.803396224975586, "loss": 0.6882, "nll_loss": 0.6664518713951111, "rewards/accuracies": 0.875, "rewards/chosen": -0.17769856750965118, "rewards/margins": 0.3026410937309265, "rewards/rejected": -0.4803396463394165, "step": 7106 }, { "epoch": 19.457905544147845, "grad_norm": 8.525679588317871, "learning_rate": 2.6986301369863012e-08, "log_odds_chosen": 0.9118001461029053, "log_odds_ratio": -0.5180738568305969, "logits/chosen": 0.8945729732513428, "logits/rejected": 0.8528484106063843, "logps/chosen": -2.0376968383789062, "logps/rejected": -2.801318407058716, "loss": 0.5792, "nll_loss": 0.5274269580841064, "rewards/accuracies": 0.75, "rewards/chosen": -0.20376966893672943, "rewards/margins": 0.07636214792728424, "rewards/rejected": -0.28013184666633606, "step": 7107 }, { "epoch": 19.460643394934976, "grad_norm": 6.197831630706787, "learning_rate": 2.684931506849315e-08, "log_odds_chosen": 2.469330072402954, "log_odds_ratio": -0.28478237986564636, "logits/chosen": 0.9625564217567444, "logits/rejected": 0.9658464193344116, "logps/chosen": -2.1495492458343506, "logps/rejected": -4.416165351867676, "loss": 0.6477, "nll_loss": 0.6192048192024231, "rewards/accuracies": 0.875, "rewards/chosen": -0.2149549424648285, "rewards/margins": 0.22666160762310028, "rewards/rejected": -0.4416165351867676, "step": 7108 }, { "epoch": 19.463381245722108, "grad_norm": 7.81067419052124, "learning_rate": 2.6712328767123287e-08, "log_odds_chosen": -0.002307899296283722, "log_odds_ratio": -0.7698072791099548, "logits/chosen": 0.7529305219650269, "logits/rejected": 0.8279760479927063, "logps/chosen": -2.263248920440674, "logps/rejected": -2.1873769760131836, "loss": 0.7067, "nll_loss": 0.629734456539154, "rewards/accuracies": 0.625, "rewards/chosen": -0.2263248860836029, "rewards/margins": -0.007587173953652382, "rewards/rejected": -0.21873770654201508, "step": 7109 }, { "epoch": 19.46611909650924, "grad_norm": 6.275543689727783, "learning_rate": 2.6575342465753422e-08, "log_odds_chosen": 3.0890042781829834, "log_odds_ratio": -0.11800668388605118, "logits/chosen": 0.6836516261100769, "logits/rejected": 0.6946923136711121, "logps/chosen": -2.4376778602600098, "logps/rejected": -5.40296745300293, "loss": 0.8592, "nll_loss": 0.8473960161209106, "rewards/accuracies": 1.0, "rewards/chosen": -0.24376779794692993, "rewards/margins": 0.29652902483940125, "rewards/rejected": -0.5402967929840088, "step": 7110 }, { "epoch": 19.468856947296374, "grad_norm": 7.2182722091674805, "learning_rate": 2.643835616438356e-08, "log_odds_chosen": 2.219221353530884, "log_odds_ratio": -0.3157452642917633, "logits/chosen": 0.8500528931617737, "logits/rejected": 0.8264973163604736, "logps/chosen": -2.0619680881500244, "logps/rejected": -4.193777561187744, "loss": 0.5674, "nll_loss": 0.5358455181121826, "rewards/accuracies": 0.875, "rewards/chosen": -0.20619681477546692, "rewards/margins": 0.21318094432353973, "rewards/rejected": -0.41937780380249023, "step": 7111 }, { "epoch": 19.471594798083505, "grad_norm": 5.67108678817749, "learning_rate": 2.63013698630137e-08, "log_odds_chosen": 3.934795618057251, "log_odds_ratio": -0.05759581923484802, "logits/chosen": 1.0579798221588135, "logits/rejected": 1.1269426345825195, "logps/chosen": -2.333186149597168, "logps/rejected": -6.100065231323242, "loss": 0.6312, "nll_loss": 0.6254447102546692, "rewards/accuracies": 1.0, "rewards/chosen": -0.23331865668296814, "rewards/margins": 0.37668782472610474, "rewards/rejected": -0.6100064516067505, "step": 7112 }, { "epoch": 19.474332648870636, "grad_norm": 7.032048225402832, "learning_rate": 2.6164383561643835e-08, "log_odds_chosen": 1.613093376159668, "log_odds_ratio": -0.5947421789169312, "logits/chosen": 0.9246363639831543, "logits/rejected": 1.0064482688903809, "logps/chosen": -2.67757511138916, "logps/rejected": -4.22760534286499, "loss": 0.72, "nll_loss": 0.6605517864227295, "rewards/accuracies": 0.75, "rewards/chosen": -0.2677575349807739, "rewards/margins": 0.15500304102897644, "rewards/rejected": -0.422760546207428, "step": 7113 }, { "epoch": 19.477070499657767, "grad_norm": 5.436582565307617, "learning_rate": 2.602739726027397e-08, "log_odds_chosen": 3.1041295528411865, "log_odds_ratio": -0.15655216574668884, "logits/chosen": 0.885653555393219, "logits/rejected": 0.9113926887512207, "logps/chosen": -2.1203742027282715, "logps/rejected": -5.0417327880859375, "loss": 0.645, "nll_loss": 0.6293385624885559, "rewards/accuracies": 1.0, "rewards/chosen": -0.21203742921352386, "rewards/margins": 0.2921358644962311, "rewards/rejected": -0.5041732788085938, "step": 7114 }, { "epoch": 19.479808350444902, "grad_norm": 6.623655796051025, "learning_rate": 2.5890410958904106e-08, "log_odds_chosen": 2.7742350101470947, "log_odds_ratio": -0.2912573516368866, "logits/chosen": 0.9631614685058594, "logits/rejected": 0.954169511795044, "logps/chosen": -1.7688405513763428, "logps/rejected": -4.430505752563477, "loss": 0.5558, "nll_loss": 0.5266410112380981, "rewards/accuracies": 1.0, "rewards/chosen": -0.17688405513763428, "rewards/margins": 0.2661665678024292, "rewards/rejected": -0.4430506229400635, "step": 7115 }, { "epoch": 19.482546201232033, "grad_norm": 5.760500907897949, "learning_rate": 2.5753424657534245e-08, "log_odds_chosen": 2.741290330886841, "log_odds_ratio": -0.15943750739097595, "logits/chosen": 0.9352970123291016, "logits/rejected": 0.9972386360168457, "logps/chosen": -1.8694345951080322, "logps/rejected": -4.444967269897461, "loss": 0.6191, "nll_loss": 0.6031717658042908, "rewards/accuracies": 1.0, "rewards/chosen": -0.1869434416294098, "rewards/margins": 0.2575532793998718, "rewards/rejected": -0.4444967210292816, "step": 7116 }, { "epoch": 19.485284052019164, "grad_norm": 4.581900119781494, "learning_rate": 2.5616438356164384e-08, "log_odds_chosen": 2.5888943672180176, "log_odds_ratio": -0.16319188475608826, "logits/chosen": 0.9824544191360474, "logits/rejected": 0.9810584783554077, "logps/chosen": -1.936585545539856, "logps/rejected": -4.332738876342773, "loss": 0.6075, "nll_loss": 0.5912164449691772, "rewards/accuracies": 1.0, "rewards/chosen": -0.19365854561328888, "rewards/margins": 0.23961536586284637, "rewards/rejected": -0.43327391147613525, "step": 7117 }, { "epoch": 19.488021902806295, "grad_norm": 5.255423545837402, "learning_rate": 2.547945205479452e-08, "log_odds_chosen": 2.2032079696655273, "log_odds_ratio": -0.19029675424098969, "logits/chosen": 0.7986385822296143, "logits/rejected": 0.8866857290267944, "logps/chosen": -2.4126007556915283, "logps/rejected": -4.500972270965576, "loss": 0.6232, "nll_loss": 0.6041395664215088, "rewards/accuracies": 1.0, "rewards/chosen": -0.2412600815296173, "rewards/margins": 0.20883715152740479, "rewards/rejected": -0.4500972628593445, "step": 7118 }, { "epoch": 19.49075975359343, "grad_norm": 5.500422477722168, "learning_rate": 2.5342465753424655e-08, "log_odds_chosen": 2.8227219581604004, "log_odds_ratio": -0.15531796216964722, "logits/chosen": 0.8249732255935669, "logits/rejected": 0.8353606462478638, "logps/chosen": -1.5547429323196411, "logps/rejected": -4.073410511016846, "loss": 0.6673, "nll_loss": 0.6517350673675537, "rewards/accuracies": 1.0, "rewards/chosen": -0.15547429025173187, "rewards/margins": 0.25186675786972046, "rewards/rejected": -0.4073410630226135, "step": 7119 }, { "epoch": 19.49349760438056, "grad_norm": 5.525050163269043, "learning_rate": 2.5205479452054794e-08, "log_odds_chosen": 3.571138620376587, "log_odds_ratio": -0.19391870498657227, "logits/chosen": 0.8294240832328796, "logits/rejected": 0.86009681224823, "logps/chosen": -2.5843420028686523, "logps/rejected": -6.007990837097168, "loss": 0.7115, "nll_loss": 0.6921097040176392, "rewards/accuracies": 1.0, "rewards/chosen": -0.2584342062473297, "rewards/margins": 0.3423649072647095, "rewards/rejected": -0.6007990837097168, "step": 7120 }, { "epoch": 19.496235455167692, "grad_norm": 6.223099231719971, "learning_rate": 2.506849315068493e-08, "log_odds_chosen": 1.7132635116577148, "log_odds_ratio": -0.25140467286109924, "logits/chosen": 0.7432140111923218, "logits/rejected": 0.7281999588012695, "logps/chosen": -2.2282917499542236, "logps/rejected": -3.8098645210266113, "loss": 0.7342, "nll_loss": 0.7090574502944946, "rewards/accuracies": 1.0, "rewards/chosen": -0.2228291630744934, "rewards/margins": 0.15815728902816772, "rewards/rejected": -0.38098645210266113, "step": 7121 }, { "epoch": 19.498973305954827, "grad_norm": 6.217625617980957, "learning_rate": 2.4931506849315068e-08, "log_odds_chosen": 1.457120656967163, "log_odds_ratio": -0.36262357234954834, "logits/chosen": 0.9947465658187866, "logits/rejected": 1.0111169815063477, "logps/chosen": -2.4558792114257812, "logps/rejected": -3.8320963382720947, "loss": 0.6475, "nll_loss": 0.6111999750137329, "rewards/accuracies": 0.75, "rewards/chosen": -0.24558791518211365, "rewards/margins": 0.1376217007637024, "rewards/rejected": -0.38320961594581604, "step": 7122 }, { "epoch": 19.50171115674196, "grad_norm": 5.558147430419922, "learning_rate": 2.4794520547945207e-08, "log_odds_chosen": 1.72111177444458, "log_odds_ratio": -0.2977756857872009, "logits/chosen": 0.975911021232605, "logits/rejected": 1.0516446828842163, "logps/chosen": -2.113832950592041, "logps/rejected": -3.756411552429199, "loss": 0.5784, "nll_loss": 0.5485790371894836, "rewards/accuracies": 0.875, "rewards/chosen": -0.21138328313827515, "rewards/margins": 0.16425786912441254, "rewards/rejected": -0.3756411671638489, "step": 7123 }, { "epoch": 19.50444900752909, "grad_norm": 7.190802097320557, "learning_rate": 2.465753424657534e-08, "log_odds_chosen": 3.5202643871307373, "log_odds_ratio": -0.17655178904533386, "logits/chosen": 0.9624282717704773, "logits/rejected": 0.9862983822822571, "logps/chosen": -2.7684528827667236, "logps/rejected": -6.211120128631592, "loss": 0.6467, "nll_loss": 0.6290913820266724, "rewards/accuracies": 1.0, "rewards/chosen": -0.2768452763557434, "rewards/margins": 0.34426674246788025, "rewards/rejected": -0.6211119890213013, "step": 7124 }, { "epoch": 19.50718685831622, "grad_norm": 6.504516124725342, "learning_rate": 2.4520547945205478e-08, "log_odds_chosen": 1.0910325050354004, "log_odds_ratio": -0.37953081727027893, "logits/chosen": 0.9630645513534546, "logits/rejected": 0.970579981803894, "logps/chosen": -1.867840051651001, "logps/rejected": -2.8481392860412598, "loss": 0.5662, "nll_loss": 0.5282019376754761, "rewards/accuracies": 0.875, "rewards/chosen": -0.186784029006958, "rewards/margins": 0.09802994132041931, "rewards/rejected": -0.2848139703273773, "step": 7125 }, { "epoch": 19.509924709103355, "grad_norm": 5.596220016479492, "learning_rate": 2.4383561643835614e-08, "log_odds_chosen": 4.226232528686523, "log_odds_ratio": -0.06286559253931046, "logits/chosen": 0.9426831603050232, "logits/rejected": 0.9529659748077393, "logps/chosen": -2.386601448059082, "logps/rejected": -6.488369941711426, "loss": 0.7149, "nll_loss": 0.7085863351821899, "rewards/accuracies": 1.0, "rewards/chosen": -0.23866015672683716, "rewards/margins": 0.4101768732070923, "rewards/rejected": -0.6488369703292847, "step": 7126 }, { "epoch": 19.512662559890487, "grad_norm": 5.17608118057251, "learning_rate": 2.4246575342465753e-08, "log_odds_chosen": 2.398980140686035, "log_odds_ratio": -0.27604973316192627, "logits/chosen": 0.6074768900871277, "logits/rejected": 0.6141762137413025, "logps/chosen": -2.1391806602478027, "logps/rejected": -4.4108452796936035, "loss": 0.5832, "nll_loss": 0.5556043982505798, "rewards/accuracies": 0.875, "rewards/chosen": -0.213918074965477, "rewards/margins": 0.22716647386550903, "rewards/rejected": -0.4410845637321472, "step": 7127 }, { "epoch": 19.515400410677618, "grad_norm": 5.089300155639648, "learning_rate": 2.410958904109589e-08, "log_odds_chosen": 2.436277389526367, "log_odds_ratio": -0.11525608599185944, "logits/chosen": 0.910938560962677, "logits/rejected": 1.0223841667175293, "logps/chosen": -2.1555285453796387, "logps/rejected": -4.433955192565918, "loss": 0.6776, "nll_loss": 0.6660809516906738, "rewards/accuracies": 1.0, "rewards/chosen": -0.21555286645889282, "rewards/margins": 0.22784267365932465, "rewards/rejected": -0.4433955252170563, "step": 7128 }, { "epoch": 19.51813826146475, "grad_norm": 6.716917037963867, "learning_rate": 2.3972602739726024e-08, "log_odds_chosen": 3.849174976348877, "log_odds_ratio": -0.17362400889396667, "logits/chosen": 1.2883752584457397, "logits/rejected": 1.304818868637085, "logps/chosen": -2.4205312728881836, "logps/rejected": -6.13643741607666, "loss": 0.5924, "nll_loss": 0.5750000476837158, "rewards/accuracies": 0.875, "rewards/chosen": -0.24205312132835388, "rewards/margins": 0.37159058451652527, "rewards/rejected": -0.6136437654495239, "step": 7129 }, { "epoch": 19.520876112251884, "grad_norm": 5.8583574295043945, "learning_rate": 2.3835616438356162e-08, "log_odds_chosen": 2.2787137031555176, "log_odds_ratio": -0.20913705229759216, "logits/chosen": 0.8329150676727295, "logits/rejected": 0.8294062614440918, "logps/chosen": -1.9830408096313477, "logps/rejected": -4.128530502319336, "loss": 0.6539, "nll_loss": 0.6330075263977051, "rewards/accuracies": 1.0, "rewards/chosen": -0.19830408692359924, "rewards/margins": 0.2145490050315857, "rewards/rejected": -0.41285309195518494, "step": 7130 }, { "epoch": 19.523613963039015, "grad_norm": 5.914322376251221, "learning_rate": 2.36986301369863e-08, "log_odds_chosen": 2.0732128620147705, "log_odds_ratio": -0.23746809363365173, "logits/chosen": 0.8683455586433411, "logits/rejected": 0.8838909268379211, "logps/chosen": -3.3077051639556885, "logps/rejected": -5.340128421783447, "loss": 0.7201, "nll_loss": 0.6963382959365845, "rewards/accuracies": 1.0, "rewards/chosen": -0.3307705521583557, "rewards/margins": 0.20324234664440155, "rewards/rejected": -0.5340129137039185, "step": 7131 }, { "epoch": 19.526351813826146, "grad_norm": 6.052206039428711, "learning_rate": 2.3561643835616437e-08, "log_odds_chosen": 2.646883249282837, "log_odds_ratio": -0.2708047032356262, "logits/chosen": 0.7661927938461304, "logits/rejected": 0.7505699396133423, "logps/chosen": -2.255403757095337, "logps/rejected": -4.811503887176514, "loss": 0.6534, "nll_loss": 0.6263327598571777, "rewards/accuracies": 0.875, "rewards/chosen": -0.2255403846502304, "rewards/margins": 0.25561001896858215, "rewards/rejected": -0.48115041851997375, "step": 7132 }, { "epoch": 19.529089664613277, "grad_norm": 6.635960102081299, "learning_rate": 2.3424657534246576e-08, "log_odds_chosen": 3.620638370513916, "log_odds_ratio": -0.06884884834289551, "logits/chosen": 0.8915339112281799, "logits/rejected": 0.9640394449234009, "logps/chosen": -2.0169100761413574, "logps/rejected": -5.368278503417969, "loss": 0.9131, "nll_loss": 0.9062584042549133, "rewards/accuracies": 1.0, "rewards/chosen": -0.20169103145599365, "rewards/margins": 0.3351368010044098, "rewards/rejected": -0.536827802658081, "step": 7133 }, { "epoch": 19.531827515400412, "grad_norm": 6.495954990386963, "learning_rate": 2.328767123287671e-08, "log_odds_chosen": 2.7626938819885254, "log_odds_ratio": -0.2827093303203583, "logits/chosen": 1.0449382066726685, "logits/rejected": 1.1261742115020752, "logps/chosen": -2.956996202468872, "logps/rejected": -5.663344383239746, "loss": 0.7594, "nll_loss": 0.7311729788780212, "rewards/accuracies": 1.0, "rewards/chosen": -0.2956996560096741, "rewards/margins": 0.27063480019569397, "rewards/rejected": -0.5663344264030457, "step": 7134 }, { "epoch": 19.534565366187543, "grad_norm": 5.903524398803711, "learning_rate": 2.3150684931506847e-08, "log_odds_chosen": 1.5875566005706787, "log_odds_ratio": -0.2867693603038788, "logits/chosen": 1.0234220027923584, "logits/rejected": 0.9843562841415405, "logps/chosen": -2.353837490081787, "logps/rejected": -3.808168649673462, "loss": 0.7026, "nll_loss": 0.6738781929016113, "rewards/accuracies": 1.0, "rewards/chosen": -0.2353837639093399, "rewards/margins": 0.14543311297893524, "rewards/rejected": -0.38081687688827515, "step": 7135 }, { "epoch": 19.537303216974674, "grad_norm": 7.6527886390686035, "learning_rate": 2.3013698630136986e-08, "log_odds_chosen": 2.025486469268799, "log_odds_ratio": -0.2879277765750885, "logits/chosen": 0.8962898254394531, "logits/rejected": 0.9502828121185303, "logps/chosen": -2.399531364440918, "logps/rejected": -4.257818222045898, "loss": 0.576, "nll_loss": 0.5472407937049866, "rewards/accuracies": 0.875, "rewards/chosen": -0.2399531453847885, "rewards/margins": 0.18582865595817566, "rewards/rejected": -0.42578181624412537, "step": 7136 }, { "epoch": 19.540041067761805, "grad_norm": 6.4553680419921875, "learning_rate": 2.287671232876712e-08, "log_odds_chosen": 1.708345890045166, "log_odds_ratio": -0.34825068712234497, "logits/chosen": 0.7053426504135132, "logits/rejected": 0.7653895616531372, "logps/chosen": -2.8639371395111084, "logps/rejected": -4.4962358474731445, "loss": 0.7175, "nll_loss": 0.6826841831207275, "rewards/accuracies": 0.875, "rewards/chosen": -0.2863937020301819, "rewards/margins": 0.16322985291481018, "rewards/rejected": -0.44962358474731445, "step": 7137 }, { "epoch": 19.54277891854894, "grad_norm": 4.801875114440918, "learning_rate": 2.273972602739726e-08, "log_odds_chosen": 2.7313570976257324, "log_odds_ratio": -0.17704594135284424, "logits/chosen": 0.9142111539840698, "logits/rejected": 0.9886670112609863, "logps/chosen": -1.8985623121261597, "logps/rejected": -4.461176872253418, "loss": 0.5658, "nll_loss": 0.5480502843856812, "rewards/accuracies": 0.875, "rewards/chosen": -0.18985623121261597, "rewards/margins": 0.2562614381313324, "rewards/rejected": -0.44611769914627075, "step": 7138 }, { "epoch": 19.54551676933607, "grad_norm": 8.360750198364258, "learning_rate": 2.2602739726027396e-08, "log_odds_chosen": 2.0280609130859375, "log_odds_ratio": -0.5191869139671326, "logits/chosen": 0.9607465863227844, "logits/rejected": 1.0571446418762207, "logps/chosen": -2.929637908935547, "logps/rejected": -4.882254600524902, "loss": 0.7675, "nll_loss": 0.7155518531799316, "rewards/accuracies": 0.75, "rewards/chosen": -0.29296380281448364, "rewards/margins": 0.19526168704032898, "rewards/rejected": -0.48822546005249023, "step": 7139 }, { "epoch": 19.548254620123203, "grad_norm": 7.621191024780273, "learning_rate": 2.246575342465753e-08, "log_odds_chosen": 1.9991999864578247, "log_odds_ratio": -0.2789176404476166, "logits/chosen": 0.9802473783493042, "logits/rejected": 0.9578017592430115, "logps/chosen": -1.7684450149536133, "logps/rejected": -3.5962252616882324, "loss": 0.5639, "nll_loss": 0.5360286831855774, "rewards/accuracies": 0.875, "rewards/chosen": -0.1768445074558258, "rewards/margins": 0.1827780157327652, "rewards/rejected": -0.3596225082874298, "step": 7140 }, { "epoch": 19.550992470910334, "grad_norm": 6.071606159210205, "learning_rate": 2.232876712328767e-08, "log_odds_chosen": 0.7918411493301392, "log_odds_ratio": -0.5541095733642578, "logits/chosen": 0.9535832405090332, "logits/rejected": 1.007678508758545, "logps/chosen": -2.49444580078125, "logps/rejected": -3.1719512939453125, "loss": 0.7795, "nll_loss": 0.7240960597991943, "rewards/accuracies": 0.625, "rewards/chosen": -0.24944457411766052, "rewards/margins": 0.06775054335594177, "rewards/rejected": -0.3171951174736023, "step": 7141 }, { "epoch": 19.55373032169747, "grad_norm": 5.75046443939209, "learning_rate": 2.219178082191781e-08, "log_odds_chosen": 2.3882861137390137, "log_odds_ratio": -0.24414131045341492, "logits/chosen": 0.8248271346092224, "logits/rejected": 0.8106262683868408, "logps/chosen": -1.8919497728347778, "logps/rejected": -4.134807586669922, "loss": 0.6923, "nll_loss": 0.6678370237350464, "rewards/accuracies": 1.0, "rewards/chosen": -0.18919499218463898, "rewards/margins": 0.2242857813835144, "rewards/rejected": -0.4134807586669922, "step": 7142 }, { "epoch": 19.5564681724846, "grad_norm": 6.241616249084473, "learning_rate": 2.2054794520547944e-08, "log_odds_chosen": 1.7041690349578857, "log_odds_ratio": -0.2621138095855713, "logits/chosen": 0.7949495911598206, "logits/rejected": 0.8640948534011841, "logps/chosen": -2.0833699703216553, "logps/rejected": -3.6112709045410156, "loss": 0.5615, "nll_loss": 0.5353226661682129, "rewards/accuracies": 1.0, "rewards/chosen": -0.20833700895309448, "rewards/margins": 0.15279006958007812, "rewards/rejected": -0.3611270785331726, "step": 7143 }, { "epoch": 19.55920602327173, "grad_norm": 5.303228855133057, "learning_rate": 2.191780821917808e-08, "log_odds_chosen": 3.4275143146514893, "log_odds_ratio": -0.13099394738674164, "logits/chosen": 1.04750394821167, "logits/rejected": 1.1052078008651733, "logps/chosen": -2.0817251205444336, "logps/rejected": -5.281313896179199, "loss": 0.6433, "nll_loss": 0.6301645040512085, "rewards/accuracies": 1.0, "rewards/chosen": -0.2081725150346756, "rewards/margins": 0.3199588358402252, "rewards/rejected": -0.528131365776062, "step": 7144 }, { "epoch": 19.561943874058862, "grad_norm": 6.823891639709473, "learning_rate": 2.178082191780822e-08, "log_odds_chosen": 2.063042402267456, "log_odds_ratio": -0.6161394119262695, "logits/chosen": 0.7521860003471375, "logits/rejected": 0.8747661113739014, "logps/chosen": -3.2659401893615723, "logps/rejected": -5.259973049163818, "loss": 0.7804, "nll_loss": 0.7187910079956055, "rewards/accuracies": 0.875, "rewards/chosen": -0.3265940248966217, "rewards/margins": 0.1994032859802246, "rewards/rejected": -0.5259973406791687, "step": 7145 }, { "epoch": 19.564681724845997, "grad_norm": 10.938176155090332, "learning_rate": 2.1643835616438354e-08, "log_odds_chosen": 2.6285667419433594, "log_odds_ratio": -0.22809697687625885, "logits/chosen": 0.987337589263916, "logits/rejected": 0.9674843549728394, "logps/chosen": -2.7388014793395996, "logps/rejected": -5.258223056793213, "loss": 0.7625, "nll_loss": 0.7397052645683289, "rewards/accuracies": 0.875, "rewards/chosen": -0.27388012409210205, "rewards/margins": 0.2519422173500061, "rewards/rejected": -0.5258223414421082, "step": 7146 }, { "epoch": 19.567419575633128, "grad_norm": 4.865209102630615, "learning_rate": 2.1506849315068493e-08, "log_odds_chosen": 2.3000991344451904, "log_odds_ratio": -0.13533160090446472, "logits/chosen": 0.6469199061393738, "logits/rejected": 0.6491202712059021, "logps/chosen": -1.6025445461273193, "logps/rejected": -3.681002140045166, "loss": 0.5175, "nll_loss": 0.5039324760437012, "rewards/accuracies": 1.0, "rewards/chosen": -0.16025446355342865, "rewards/margins": 0.20784573256969452, "rewards/rejected": -0.36810022592544556, "step": 7147 }, { "epoch": 19.57015742642026, "grad_norm": 5.522884368896484, "learning_rate": 2.1369863013698632e-08, "log_odds_chosen": 2.9357926845550537, "log_odds_ratio": -0.29609620571136475, "logits/chosen": 0.7294914722442627, "logits/rejected": 0.7498772740364075, "logps/chosen": -1.7571816444396973, "logps/rejected": -4.573195457458496, "loss": 0.7215, "nll_loss": 0.6918478012084961, "rewards/accuracies": 0.75, "rewards/chosen": -0.17571817338466644, "rewards/margins": 0.2816013693809509, "rewards/rejected": -0.45731955766677856, "step": 7148 }, { "epoch": 19.572895277207394, "grad_norm": 7.576552391052246, "learning_rate": 2.1232876712328764e-08, "log_odds_chosen": 1.491614818572998, "log_odds_ratio": -0.39112672209739685, "logits/chosen": 0.7985514402389526, "logits/rejected": 0.776665210723877, "logps/chosen": -2.487687349319458, "logps/rejected": -3.8107852935791016, "loss": 0.6587, "nll_loss": 0.6196100115776062, "rewards/accuracies": 0.75, "rewards/chosen": -0.24876874685287476, "rewards/margins": 0.13230979442596436, "rewards/rejected": -0.3810785412788391, "step": 7149 }, { "epoch": 19.575633127994525, "grad_norm": 5.968871116638184, "learning_rate": 2.1095890410958903e-08, "log_odds_chosen": 3.60722279548645, "log_odds_ratio": -0.09289561957120895, "logits/chosen": 0.8453941345214844, "logits/rejected": 0.9452805519104004, "logps/chosen": -2.428152084350586, "logps/rejected": -5.820342063903809, "loss": 0.6742, "nll_loss": 0.6649385690689087, "rewards/accuracies": 1.0, "rewards/chosen": -0.24281522631645203, "rewards/margins": 0.33921897411346436, "rewards/rejected": -0.582034170627594, "step": 7150 }, { "epoch": 19.578370978781656, "grad_norm": 5.390163898468018, "learning_rate": 2.095890410958904e-08, "log_odds_chosen": 2.3147032260894775, "log_odds_ratio": -0.2247277796268463, "logits/chosen": 0.6531720161437988, "logits/rejected": 0.6405133605003357, "logps/chosen": -1.6432621479034424, "logps/rejected": -3.783571481704712, "loss": 0.5728, "nll_loss": 0.5503630042076111, "rewards/accuracies": 0.875, "rewards/chosen": -0.16432620584964752, "rewards/margins": 0.214030921459198, "rewards/rejected": -0.3783571422100067, "step": 7151 }, { "epoch": 19.581108829568787, "grad_norm": 6.242719650268555, "learning_rate": 2.0821917808219177e-08, "log_odds_chosen": 3.452785015106201, "log_odds_ratio": -0.30142948031425476, "logits/chosen": 1.1106758117675781, "logits/rejected": 1.1785179376602173, "logps/chosen": -2.4377455711364746, "logps/rejected": -5.765117645263672, "loss": 0.6404, "nll_loss": 0.610293447971344, "rewards/accuracies": 0.875, "rewards/chosen": -0.24377459287643433, "rewards/margins": 0.33273717761039734, "rewards/rejected": -0.5765117406845093, "step": 7152 }, { "epoch": 19.583846680355922, "grad_norm": 4.872828960418701, "learning_rate": 2.0684931506849316e-08, "log_odds_chosen": 3.5715081691741943, "log_odds_ratio": -0.20307284593582153, "logits/chosen": 0.7008501291275024, "logits/rejected": 0.6775681376457214, "logps/chosen": -2.242609739303589, "logps/rejected": -5.646437168121338, "loss": 0.6351, "nll_loss": 0.6148177981376648, "rewards/accuracies": 1.0, "rewards/chosen": -0.22426098585128784, "rewards/margins": 0.34038275480270386, "rewards/rejected": -0.5646437406539917, "step": 7153 }, { "epoch": 19.586584531143053, "grad_norm": 7.144113540649414, "learning_rate": 2.054794520547945e-08, "log_odds_chosen": 1.9088531732559204, "log_odds_ratio": -0.3212038576602936, "logits/chosen": 0.7778928279876709, "logits/rejected": 0.8079003095626831, "logps/chosen": -2.6939001083374023, "logps/rejected": -4.507778167724609, "loss": 0.8395, "nll_loss": 0.8073376417160034, "rewards/accuracies": 0.75, "rewards/chosen": -0.2693900167942047, "rewards/margins": 0.1813877820968628, "rewards/rejected": -0.4507777988910675, "step": 7154 }, { "epoch": 19.589322381930184, "grad_norm": 6.3803181648254395, "learning_rate": 2.0410958904109587e-08, "log_odds_chosen": 1.5738763809204102, "log_odds_ratio": -0.5632268786430359, "logits/chosen": 0.9263983368873596, "logits/rejected": 0.9456443190574646, "logps/chosen": -3.0225977897644043, "logps/rejected": -4.5744099617004395, "loss": 0.729, "nll_loss": 0.6726623773574829, "rewards/accuracies": 0.75, "rewards/chosen": -0.30225980281829834, "rewards/margins": 0.15518121421337128, "rewards/rejected": -0.4574410319328308, "step": 7155 }, { "epoch": 19.592060232717316, "grad_norm": 5.870506286621094, "learning_rate": 2.0273972602739726e-08, "log_odds_chosen": 2.696178436279297, "log_odds_ratio": -0.33145225048065186, "logits/chosen": 0.9958219528198242, "logits/rejected": 1.0627460479736328, "logps/chosen": -1.6383860111236572, "logps/rejected": -4.218182563781738, "loss": 0.5663, "nll_loss": 0.5331618785858154, "rewards/accuracies": 0.875, "rewards/chosen": -0.16383862495422363, "rewards/margins": 0.2579796612262726, "rewards/rejected": -0.4218182861804962, "step": 7156 }, { "epoch": 19.59479808350445, "grad_norm": 5.100085258483887, "learning_rate": 2.013698630136986e-08, "log_odds_chosen": 2.8811941146850586, "log_odds_ratio": -0.23559324443340302, "logits/chosen": 0.6583268642425537, "logits/rejected": 0.6032588481903076, "logps/chosen": -2.1811325550079346, "logps/rejected": -4.975210189819336, "loss": 0.6949, "nll_loss": 0.6713533997535706, "rewards/accuracies": 0.875, "rewards/chosen": -0.2181132584810257, "rewards/margins": 0.2794077396392822, "rewards/rejected": -0.4975210130214691, "step": 7157 }, { "epoch": 19.59753593429158, "grad_norm": 9.431849479675293, "learning_rate": 2e-08, "log_odds_chosen": 0.38303765654563904, "log_odds_ratio": -0.8048955202102661, "logits/chosen": 0.9687694311141968, "logits/rejected": 0.8766963481903076, "logps/chosen": -2.3762331008911133, "logps/rejected": -2.6104841232299805, "loss": 0.6863, "nll_loss": 0.6058470606803894, "rewards/accuracies": 0.875, "rewards/chosen": -0.23762333393096924, "rewards/margins": 0.023425087332725525, "rewards/rejected": -0.26104843616485596, "step": 7158 }, { "epoch": 19.600273785078713, "grad_norm": 11.138382911682129, "learning_rate": 1.986301369863014e-08, "log_odds_chosen": 1.5179176330566406, "log_odds_ratio": -0.3578624725341797, "logits/chosen": 0.8657358884811401, "logits/rejected": 0.7525377869606018, "logps/chosen": -2.491419792175293, "logps/rejected": -3.838650941848755, "loss": 0.5694, "nll_loss": 0.5335755348205566, "rewards/accuracies": 0.875, "rewards/chosen": -0.24914199113845825, "rewards/margins": 0.13472314178943634, "rewards/rejected": -0.3838651180267334, "step": 7159 }, { "epoch": 19.603011635865844, "grad_norm": 11.73438835144043, "learning_rate": 1.972602739726027e-08, "log_odds_chosen": 0.9699840545654297, "log_odds_ratio": -0.4495154619216919, "logits/chosen": 0.7587530612945557, "logits/rejected": 0.6942044496536255, "logps/chosen": -2.130476474761963, "logps/rejected": -2.987841844558716, "loss": 0.7587, "nll_loss": 0.7137086987495422, "rewards/accuracies": 0.875, "rewards/chosen": -0.21304763853549957, "rewards/margins": 0.08573655784130096, "rewards/rejected": -0.29878419637680054, "step": 7160 }, { "epoch": 19.60574948665298, "grad_norm": 6.279201030731201, "learning_rate": 1.958904109589041e-08, "log_odds_chosen": 1.9318259954452515, "log_odds_ratio": -0.3806763291358948, "logits/chosen": 0.6359233856201172, "logits/rejected": 0.6400230526924133, "logps/chosen": -2.480891227722168, "logps/rejected": -4.3515143394470215, "loss": 0.7933, "nll_loss": 0.7552731037139893, "rewards/accuracies": 0.875, "rewards/chosen": -0.24808913469314575, "rewards/margins": 0.18706229329109192, "rewards/rejected": -0.4351513981819153, "step": 7161 }, { "epoch": 19.60848733744011, "grad_norm": 5.271137237548828, "learning_rate": 1.9452054794520546e-08, "log_odds_chosen": 2.519355058670044, "log_odds_ratio": -0.27308523654937744, "logits/chosen": 0.9482307434082031, "logits/rejected": 0.9454537034034729, "logps/chosen": -2.1521682739257812, "logps/rejected": -4.562930107116699, "loss": 0.5874, "nll_loss": 0.5600703954696655, "rewards/accuracies": 1.0, "rewards/chosen": -0.21521683037281036, "rewards/margins": 0.24107618629932404, "rewards/rejected": -0.456292986869812, "step": 7162 }, { "epoch": 19.61122518822724, "grad_norm": 6.3436198234558105, "learning_rate": 1.9315068493150685e-08, "log_odds_chosen": 2.706883192062378, "log_odds_ratio": -0.2570352256298065, "logits/chosen": 1.008750081062317, "logits/rejected": 0.9839169979095459, "logps/chosen": -1.883346438407898, "logps/rejected": -4.404819965362549, "loss": 0.6662, "nll_loss": 0.6405410766601562, "rewards/accuracies": 1.0, "rewards/chosen": -0.1883346438407898, "rewards/margins": 0.2521473467350006, "rewards/rejected": -0.4404820203781128, "step": 7163 }, { "epoch": 19.613963039014372, "grad_norm": 5.409829616546631, "learning_rate": 1.9178082191780824e-08, "log_odds_chosen": 3.2208456993103027, "log_odds_ratio": -0.2646285593509674, "logits/chosen": 0.7878725528717041, "logits/rejected": 0.8421536087989807, "logps/chosen": -2.209489345550537, "logps/rejected": -5.24824857711792, "loss": 0.5984, "nll_loss": 0.5719541311264038, "rewards/accuracies": 0.875, "rewards/chosen": -0.22094890475273132, "rewards/margins": 0.30387598276138306, "rewards/rejected": -0.524824857711792, "step": 7164 }, { "epoch": 19.616700889801507, "grad_norm": 5.391021251678467, "learning_rate": 1.9041095890410956e-08, "log_odds_chosen": 1.6750690937042236, "log_odds_ratio": -0.310828298330307, "logits/chosen": 1.1362162828445435, "logits/rejected": 1.1700772047042847, "logps/chosen": -2.244547128677368, "logps/rejected": -3.860624074935913, "loss": 0.6376, "nll_loss": 0.606535792350769, "rewards/accuracies": 0.875, "rewards/chosen": -0.22445470094680786, "rewards/margins": 0.16160768270492554, "rewards/rejected": -0.3860623836517334, "step": 7165 }, { "epoch": 19.619438740588638, "grad_norm": 7.854589462280273, "learning_rate": 1.8904109589041095e-08, "log_odds_chosen": 1.410921573638916, "log_odds_ratio": -0.4307529032230377, "logits/chosen": 0.9873043298721313, "logits/rejected": 0.9847738742828369, "logps/chosen": -2.588494300842285, "logps/rejected": -3.903674602508545, "loss": 0.6196, "nll_loss": 0.5765143632888794, "rewards/accuracies": 0.75, "rewards/chosen": -0.25884944200515747, "rewards/margins": 0.13151802122592926, "rewards/rejected": -0.39036744832992554, "step": 7166 }, { "epoch": 19.62217659137577, "grad_norm": 5.733322620391846, "learning_rate": 1.8767123287671233e-08, "log_odds_chosen": 2.1784095764160156, "log_odds_ratio": -0.22847580909729004, "logits/chosen": 1.0034937858581543, "logits/rejected": 1.0599416494369507, "logps/chosen": -2.7503647804260254, "logps/rejected": -4.846172332763672, "loss": 0.6749, "nll_loss": 0.6520026326179504, "rewards/accuracies": 1.0, "rewards/chosen": -0.275036484003067, "rewards/margins": 0.20958074927330017, "rewards/rejected": -0.4846172332763672, "step": 7167 }, { "epoch": 19.6249144421629, "grad_norm": 7.629558563232422, "learning_rate": 1.863013698630137e-08, "log_odds_chosen": 0.824675440788269, "log_odds_ratio": -0.5465794801712036, "logits/chosen": 0.8160562515258789, "logits/rejected": 0.7605851292610168, "logps/chosen": -2.105577230453491, "logps/rejected": -2.8076634407043457, "loss": 0.5891, "nll_loss": 0.5344730615615845, "rewards/accuracies": 0.875, "rewards/chosen": -0.2105577290058136, "rewards/margins": 0.07020862400531769, "rewards/rejected": -0.2807663679122925, "step": 7168 }, { "epoch": 19.627652292950035, "grad_norm": 5.93735933303833, "learning_rate": 1.8493150684931508e-08, "log_odds_chosen": 1.5163774490356445, "log_odds_ratio": -0.44912105798721313, "logits/chosen": 0.7852702140808105, "logits/rejected": 0.8978271484375, "logps/chosen": -2.5334129333496094, "logps/rejected": -3.948068380355835, "loss": 0.6799, "nll_loss": 0.6349914073944092, "rewards/accuracies": 0.875, "rewards/chosen": -0.25334131717681885, "rewards/margins": 0.14146554470062256, "rewards/rejected": -0.3948068618774414, "step": 7169 }, { "epoch": 19.630390143737166, "grad_norm": 7.840579032897949, "learning_rate": 1.8356164383561643e-08, "log_odds_chosen": 2.0671567916870117, "log_odds_ratio": -0.2131180763244629, "logits/chosen": 0.9003133773803711, "logits/rejected": 0.9710123538970947, "logps/chosen": -2.6914539337158203, "logps/rejected": -4.653871059417725, "loss": 0.8664, "nll_loss": 0.8450984954833984, "rewards/accuracies": 1.0, "rewards/chosen": -0.2691453695297241, "rewards/margins": 0.19624173641204834, "rewards/rejected": -0.46538710594177246, "step": 7170 }, { "epoch": 19.633127994524298, "grad_norm": 4.982269287109375, "learning_rate": 1.821917808219178e-08, "log_odds_chosen": 1.7670273780822754, "log_odds_ratio": -0.22641631960868835, "logits/chosen": 0.8392469882965088, "logits/rejected": 0.8816711902618408, "logps/chosen": -1.998764991760254, "logps/rejected": -3.6003756523132324, "loss": 0.5301, "nll_loss": 0.5074974298477173, "rewards/accuracies": 1.0, "rewards/chosen": -0.19987650215625763, "rewards/margins": 0.1601610779762268, "rewards/rejected": -0.36003756523132324, "step": 7171 }, { "epoch": 19.63586584531143, "grad_norm": 5.259109973907471, "learning_rate": 1.8082191780821918e-08, "log_odds_chosen": 1.658519983291626, "log_odds_ratio": -0.25454312562942505, "logits/chosen": 0.9012998938560486, "logits/rejected": 0.8416778445243835, "logps/chosen": -2.032099485397339, "logps/rejected": -3.552286148071289, "loss": 0.5847, "nll_loss": 0.5592399835586548, "rewards/accuracies": 0.875, "rewards/chosen": -0.20320992171764374, "rewards/margins": 0.1520186811685562, "rewards/rejected": -0.35522860288619995, "step": 7172 }, { "epoch": 19.638603696098563, "grad_norm": 5.834939956665039, "learning_rate": 1.7945205479452053e-08, "log_odds_chosen": 2.084092855453491, "log_odds_ratio": -0.1908542513847351, "logits/chosen": 0.8839132189750671, "logits/rejected": 0.8416999578475952, "logps/chosen": -2.2420713901519775, "logps/rejected": -4.196585655212402, "loss": 0.6797, "nll_loss": 0.6605696678161621, "rewards/accuracies": 1.0, "rewards/chosen": -0.22420713305473328, "rewards/margins": 0.19545146822929382, "rewards/rejected": -0.4196586012840271, "step": 7173 }, { "epoch": 19.641341546885695, "grad_norm": 5.253055095672607, "learning_rate": 1.7808219178082192e-08, "log_odds_chosen": 1.9080779552459717, "log_odds_ratio": -0.30122122168540955, "logits/chosen": 0.765691876411438, "logits/rejected": 0.8462209701538086, "logps/chosen": -2.309971570968628, "logps/rejected": -4.1244096755981445, "loss": 0.6782, "nll_loss": 0.6480966210365295, "rewards/accuracies": 0.875, "rewards/chosen": -0.23099717497825623, "rewards/margins": 0.18144382536411285, "rewards/rejected": -0.4124409556388855, "step": 7174 }, { "epoch": 19.644079397672826, "grad_norm": 7.555360794067383, "learning_rate": 1.7671232876712328e-08, "log_odds_chosen": 2.4967212677001953, "log_odds_ratio": -0.39827898144721985, "logits/chosen": 0.9027543067932129, "logits/rejected": 0.971249520778656, "logps/chosen": -2.4780242443084717, "logps/rejected": -4.8490214347839355, "loss": 0.7439, "nll_loss": 0.704085111618042, "rewards/accuracies": 0.75, "rewards/chosen": -0.24780242145061493, "rewards/margins": 0.23709970712661743, "rewards/rejected": -0.48490214347839355, "step": 7175 }, { "epoch": 19.64681724845996, "grad_norm": 5.726337909698486, "learning_rate": 1.7534246575342463e-08, "log_odds_chosen": 2.8901712894439697, "log_odds_ratio": -0.16461239755153656, "logits/chosen": 0.8277485370635986, "logits/rejected": 0.8533694744110107, "logps/chosen": -2.6257753372192383, "logps/rejected": -5.395895004272461, "loss": 0.7064, "nll_loss": 0.6899400353431702, "rewards/accuracies": 0.875, "rewards/chosen": -0.26257753372192383, "rewards/margins": 0.2770119607448578, "rewards/rejected": -0.539589524269104, "step": 7176 }, { "epoch": 19.64955509924709, "grad_norm": 5.268200874328613, "learning_rate": 1.7397260273972602e-08, "log_odds_chosen": 1.7218011617660522, "log_odds_ratio": -0.2860986590385437, "logits/chosen": 0.7487218379974365, "logits/rejected": 0.7398381233215332, "logps/chosen": -2.567291259765625, "logps/rejected": -4.188225746154785, "loss": 0.6238, "nll_loss": 0.5952286720275879, "rewards/accuracies": 0.875, "rewards/chosen": -0.2567291557788849, "rewards/margins": 0.16209343075752258, "rewards/rejected": -0.41882258653640747, "step": 7177 }, { "epoch": 19.652292950034223, "grad_norm": 5.827692985534668, "learning_rate": 1.726027397260274e-08, "log_odds_chosen": 1.7657756805419922, "log_odds_ratio": -0.2576466202735901, "logits/chosen": 0.6609431505203247, "logits/rejected": 0.6788166761398315, "logps/chosen": -2.4651429653167725, "logps/rejected": -4.136078834533691, "loss": 0.6527, "nll_loss": 0.6269136071205139, "rewards/accuracies": 1.0, "rewards/chosen": -0.24651429057121277, "rewards/margins": 0.16709356009960175, "rewards/rejected": -0.4136078655719757, "step": 7178 }, { "epoch": 19.655030800821354, "grad_norm": 5.305169582366943, "learning_rate": 1.7123287671232876e-08, "log_odds_chosen": 2.326084852218628, "log_odds_ratio": -0.19779518246650696, "logits/chosen": 0.7006084322929382, "logits/rejected": 0.7676732540130615, "logps/chosen": -2.0624570846557617, "logps/rejected": -4.233285903930664, "loss": 0.6928, "nll_loss": 0.6730660796165466, "rewards/accuracies": 1.0, "rewards/chosen": -0.20624573528766632, "rewards/margins": 0.2170829027891159, "rewards/rejected": -0.4233286380767822, "step": 7179 }, { "epoch": 19.65776865160849, "grad_norm": 7.980969429016113, "learning_rate": 1.6986301369863012e-08, "log_odds_chosen": 1.675679326057434, "log_odds_ratio": -0.6206679940223694, "logits/chosen": 1.0477230548858643, "logits/rejected": 1.0452791452407837, "logps/chosen": -2.0884928703308105, "logps/rejected": -3.628303050994873, "loss": 0.588, "nll_loss": 0.5259812474250793, "rewards/accuracies": 0.75, "rewards/chosen": -0.20884928107261658, "rewards/margins": 0.1539810299873352, "rewards/rejected": -0.3628303110599518, "step": 7180 }, { "epoch": 19.66050650239562, "grad_norm": 5.249754905700684, "learning_rate": 1.684931506849315e-08, "log_odds_chosen": 3.3346645832061768, "log_odds_ratio": -0.15302906930446625, "logits/chosen": 1.051581859588623, "logits/rejected": 1.096897006034851, "logps/chosen": -2.0741450786590576, "logps/rejected": -5.265621662139893, "loss": 0.6352, "nll_loss": 0.6199106574058533, "rewards/accuracies": 1.0, "rewards/chosen": -0.20741450786590576, "rewards/margins": 0.31914764642715454, "rewards/rejected": -0.5265621542930603, "step": 7181 }, { "epoch": 19.66324435318275, "grad_norm": 6.047196865081787, "learning_rate": 1.6712328767123286e-08, "log_odds_chosen": 1.6046198606491089, "log_odds_ratio": -0.4553568959236145, "logits/chosen": 0.8823164105415344, "logits/rejected": 0.8425759077072144, "logps/chosen": -1.960303544998169, "logps/rejected": -3.4641757011413574, "loss": 0.7356, "nll_loss": 0.6900281310081482, "rewards/accuracies": 0.875, "rewards/chosen": -0.1960303634405136, "rewards/margins": 0.1503872126340866, "rewards/rejected": -0.3464176058769226, "step": 7182 }, { "epoch": 19.665982203969882, "grad_norm": 7.579598903656006, "learning_rate": 1.6575342465753425e-08, "log_odds_chosen": 1.340727686882019, "log_odds_ratio": -0.4586367905139923, "logits/chosen": 0.695270299911499, "logits/rejected": 0.6571088433265686, "logps/chosen": -2.850950002670288, "logps/rejected": -4.095339298248291, "loss": 0.6602, "nll_loss": 0.6142937541007996, "rewards/accuracies": 0.75, "rewards/chosen": -0.2850950062274933, "rewards/margins": 0.12443894147872925, "rewards/rejected": -0.40953391790390015, "step": 7183 }, { "epoch": 19.668720054757017, "grad_norm": 6.52509880065918, "learning_rate": 1.643835616438356e-08, "log_odds_chosen": 1.790719747543335, "log_odds_ratio": -0.25596293807029724, "logits/chosen": 0.9322549700737, "logits/rejected": 1.0080631971359253, "logps/chosen": -2.5204474925994873, "logps/rejected": -4.191074371337891, "loss": 0.67, "nll_loss": 0.6444457769393921, "rewards/accuracies": 1.0, "rewards/chosen": -0.2520447373390198, "rewards/margins": 0.16706272959709167, "rewards/rejected": -0.41910746693611145, "step": 7184 }, { "epoch": 19.67145790554415, "grad_norm": 5.34831428527832, "learning_rate": 1.6301369863013696e-08, "log_odds_chosen": 2.327544689178467, "log_odds_ratio": -0.28525882959365845, "logits/chosen": 0.9537680745124817, "logits/rejected": 0.9125681519508362, "logps/chosen": -1.802110195159912, "logps/rejected": -4.006422996520996, "loss": 0.6314, "nll_loss": 0.6028860807418823, "rewards/accuracies": 0.875, "rewards/chosen": -0.18021102249622345, "rewards/margins": 0.22043125331401825, "rewards/rejected": -0.4006422758102417, "step": 7185 }, { "epoch": 19.67419575633128, "grad_norm": 5.08060359954834, "learning_rate": 1.6164383561643835e-08, "log_odds_chosen": 2.788317918777466, "log_odds_ratio": -0.18994171917438507, "logits/chosen": 1.0385044813156128, "logits/rejected": 1.0727159976959229, "logps/chosen": -1.5108894109725952, "logps/rejected": -4.025615692138672, "loss": 0.5658, "nll_loss": 0.5468274354934692, "rewards/accuracies": 1.0, "rewards/chosen": -0.15108895301818848, "rewards/margins": 0.2514726519584656, "rewards/rejected": -0.40256160497665405, "step": 7186 }, { "epoch": 19.67693360711841, "grad_norm": 4.925084114074707, "learning_rate": 1.602739726027397e-08, "log_odds_chosen": 3.5379490852355957, "log_odds_ratio": -0.21208882331848145, "logits/chosen": 0.6919680833816528, "logits/rejected": 0.7447068691253662, "logps/chosen": -1.969433069229126, "logps/rejected": -5.370128631591797, "loss": 0.6336, "nll_loss": 0.6124391555786133, "rewards/accuracies": 1.0, "rewards/chosen": -0.19694331288337708, "rewards/margins": 0.34006959199905396, "rewards/rejected": -0.5370128750801086, "step": 7187 }, { "epoch": 19.679671457905545, "grad_norm": 5.763513565063477, "learning_rate": 1.589041095890411e-08, "log_odds_chosen": 1.6860437393188477, "log_odds_ratio": -0.44441717863082886, "logits/chosen": 0.8251340985298157, "logits/rejected": 0.9496007561683655, "logps/chosen": -2.650515556335449, "logps/rejected": -4.285867214202881, "loss": 0.6548, "nll_loss": 0.610336422920227, "rewards/accuracies": 0.625, "rewards/chosen": -0.26505154371261597, "rewards/margins": 0.16353517770767212, "rewards/rejected": -0.4285867214202881, "step": 7188 }, { "epoch": 19.682409308692677, "grad_norm": 5.335723876953125, "learning_rate": 1.5753424657534248e-08, "log_odds_chosen": 2.4701879024505615, "log_odds_ratio": -0.22536662220954895, "logits/chosen": 0.7553192377090454, "logits/rejected": 0.7803736925125122, "logps/chosen": -2.153714656829834, "logps/rejected": -4.481320381164551, "loss": 0.5876, "nll_loss": 0.5650371313095093, "rewards/accuracies": 1.0, "rewards/chosen": -0.21537145972251892, "rewards/margins": 0.23276057839393616, "rewards/rejected": -0.4481320381164551, "step": 7189 }, { "epoch": 19.685147159479808, "grad_norm": 7.789052963256836, "learning_rate": 1.5616438356164384e-08, "log_odds_chosen": 1.0857863426208496, "log_odds_ratio": -0.45690739154815674, "logits/chosen": 0.9406248927116394, "logits/rejected": 0.9924880266189575, "logps/chosen": -2.5035388469696045, "logps/rejected": -3.543450355529785, "loss": 0.6807, "nll_loss": 0.634996771812439, "rewards/accuracies": 0.75, "rewards/chosen": -0.2503538727760315, "rewards/margins": 0.10399114340543747, "rewards/rejected": -0.35434502363204956, "step": 7190 }, { "epoch": 19.68788501026694, "grad_norm": 4.900646686553955, "learning_rate": 1.547945205479452e-08, "log_odds_chosen": 3.037877321243286, "log_odds_ratio": -0.19402842223644257, "logits/chosen": 0.8572777509689331, "logits/rejected": 0.881534218788147, "logps/chosen": -2.1174306869506836, "logps/rejected": -5.025728702545166, "loss": 0.6829, "nll_loss": 0.6634632349014282, "rewards/accuracies": 1.0, "rewards/chosen": -0.2117430865764618, "rewards/margins": 0.2908298075199127, "rewards/rejected": -0.5025728940963745, "step": 7191 }, { "epoch": 19.690622861054074, "grad_norm": 5.412469863891602, "learning_rate": 1.5342465753424658e-08, "log_odds_chosen": 1.973379135131836, "log_odds_ratio": -0.28989189863204956, "logits/chosen": 0.7673134207725525, "logits/rejected": 0.807769775390625, "logps/chosen": -2.2262930870056152, "logps/rejected": -4.090938091278076, "loss": 0.6634, "nll_loss": 0.634424090385437, "rewards/accuracies": 0.875, "rewards/chosen": -0.22262930870056152, "rewards/margins": 0.18646448850631714, "rewards/rejected": -0.40909379720687866, "step": 7192 }, { "epoch": 19.693360711841205, "grad_norm": 5.086306571960449, "learning_rate": 1.5205479452054794e-08, "log_odds_chosen": 3.3477444648742676, "log_odds_ratio": -0.1964026391506195, "logits/chosen": 0.7985039949417114, "logits/rejected": 0.8372163772583008, "logps/chosen": -2.2390239238739014, "logps/rejected": -5.427029132843018, "loss": 0.6712, "nll_loss": 0.6515723466873169, "rewards/accuracies": 1.0, "rewards/chosen": -0.2239024043083191, "rewards/margins": 0.31880053877830505, "rewards/rejected": -0.5427029132843018, "step": 7193 }, { "epoch": 19.696098562628336, "grad_norm": 4.569184303283691, "learning_rate": 1.5068493150684933e-08, "log_odds_chosen": 3.024451971054077, "log_odds_ratio": -0.1317649483680725, "logits/chosen": 0.6105729341506958, "logits/rejected": 0.5339264869689941, "logps/chosen": -2.070713996887207, "logps/rejected": -4.933332443237305, "loss": 0.7289, "nll_loss": 0.7157490253448486, "rewards/accuracies": 1.0, "rewards/chosen": -0.20707140862941742, "rewards/margins": 0.2862618565559387, "rewards/rejected": -0.49333328008651733, "step": 7194 }, { "epoch": 19.698836413415467, "grad_norm": 5.194117069244385, "learning_rate": 1.4931506849315068e-08, "log_odds_chosen": 2.046473503112793, "log_odds_ratio": -0.3598254323005676, "logits/chosen": 0.9331389665603638, "logits/rejected": 0.9764624834060669, "logps/chosen": -1.7506563663482666, "logps/rejected": -3.6537232398986816, "loss": 0.5938, "nll_loss": 0.5578001141548157, "rewards/accuracies": 0.75, "rewards/chosen": -0.17506563663482666, "rewards/margins": 0.19030669331550598, "rewards/rejected": -0.36537232995033264, "step": 7195 }, { "epoch": 19.701574264202602, "grad_norm": 6.724704265594482, "learning_rate": 1.4794520547945204e-08, "log_odds_chosen": 4.281174659729004, "log_odds_ratio": -0.0609719417989254, "logits/chosen": 1.060188889503479, "logits/rejected": 1.1378508806228638, "logps/chosen": -2.823788642883301, "logps/rejected": -6.947827339172363, "loss": 0.7444, "nll_loss": 0.7382925152778625, "rewards/accuracies": 1.0, "rewards/chosen": -0.2823788523674011, "rewards/margins": 0.4124038517475128, "rewards/rejected": -0.6947827339172363, "step": 7196 }, { "epoch": 19.704312114989733, "grad_norm": 5.8968729972839355, "learning_rate": 1.4657534246575342e-08, "log_odds_chosen": 3.959062099456787, "log_odds_ratio": -0.12271194159984589, "logits/chosen": 0.7797293066978455, "logits/rejected": 0.8289089798927307, "logps/chosen": -2.331791877746582, "logps/rejected": -6.195694923400879, "loss": 0.8267, "nll_loss": 0.8144683837890625, "rewards/accuracies": 1.0, "rewards/chosen": -0.23317918181419373, "rewards/margins": 0.3863902986049652, "rewards/rejected": -0.6195694804191589, "step": 7197 }, { "epoch": 19.707049965776864, "grad_norm": 4.659570217132568, "learning_rate": 1.452054794520548e-08, "log_odds_chosen": 2.584482192993164, "log_odds_ratio": -0.1681007295846939, "logits/chosen": 0.9768295288085938, "logits/rejected": 1.0224337577819824, "logps/chosen": -1.7313246726989746, "logps/rejected": -4.152353286743164, "loss": 0.6452, "nll_loss": 0.6283464431762695, "rewards/accuracies": 1.0, "rewards/chosen": -0.17313244938850403, "rewards/margins": 0.24210286140441895, "rewards/rejected": -0.41523534059524536, "step": 7198 }, { "epoch": 19.709787816563995, "grad_norm": 8.069238662719727, "learning_rate": 1.4383561643835615e-08, "log_odds_chosen": 1.9241502285003662, "log_odds_ratio": -0.5235568284988403, "logits/chosen": 0.8099099397659302, "logits/rejected": 0.7997772693634033, "logps/chosen": -2.5634396076202393, "logps/rejected": -4.351400852203369, "loss": 0.7871, "nll_loss": 0.7347725629806519, "rewards/accuracies": 0.875, "rewards/chosen": -0.2563439607620239, "rewards/margins": 0.17879612743854523, "rewards/rejected": -0.43514010310173035, "step": 7199 }, { "epoch": 19.71252566735113, "grad_norm": 5.305311679840088, "learning_rate": 1.4246575342465752e-08, "log_odds_chosen": 1.4858986139297485, "log_odds_ratio": -0.32059532403945923, "logits/chosen": 1.0164960622787476, "logits/rejected": 1.054776906967163, "logps/chosen": -2.0165696144104004, "logps/rejected": -3.387239456176758, "loss": 0.6122, "nll_loss": 0.5801498889923096, "rewards/accuracies": 1.0, "rewards/chosen": -0.20165696740150452, "rewards/margins": 0.13706696033477783, "rewards/rejected": -0.33872395753860474, "step": 7200 }, { "epoch": 19.71526351813826, "grad_norm": 5.146012783050537, "learning_rate": 1.4109589041095891e-08, "log_odds_chosen": 2.3453311920166016, "log_odds_ratio": -0.30572474002838135, "logits/chosen": 0.6960639953613281, "logits/rejected": 0.7138023972511292, "logps/chosen": -2.5754270553588867, "logps/rejected": -4.805359363555908, "loss": 0.6029, "nll_loss": 0.5723549127578735, "rewards/accuracies": 0.75, "rewards/chosen": -0.2575427293777466, "rewards/margins": 0.22299322485923767, "rewards/rejected": -0.48053595423698425, "step": 7201 }, { "epoch": 19.718001368925393, "grad_norm": 6.542351722717285, "learning_rate": 1.3972602739726027e-08, "log_odds_chosen": 1.9723037481307983, "log_odds_ratio": -0.2709495425224304, "logits/chosen": 0.799041748046875, "logits/rejected": 0.8105816841125488, "logps/chosen": -2.8733937740325928, "logps/rejected": -4.708075523376465, "loss": 0.6531, "nll_loss": 0.6259987354278564, "rewards/accuracies": 0.875, "rewards/chosen": -0.28733938932418823, "rewards/margins": 0.18346819281578064, "rewards/rejected": -0.47080761194229126, "step": 7202 }, { "epoch": 19.720739219712527, "grad_norm": 5.021443843841553, "learning_rate": 1.3835616438356164e-08, "log_odds_chosen": 2.7770743370056152, "log_odds_ratio": -0.11859843879938126, "logits/chosen": 0.7419145703315735, "logits/rejected": 0.7703227996826172, "logps/chosen": -2.360720634460449, "logps/rejected": -4.984891414642334, "loss": 0.7867, "nll_loss": 0.7748587727546692, "rewards/accuracies": 1.0, "rewards/chosen": -0.2360720932483673, "rewards/margins": 0.2624170780181885, "rewards/rejected": -0.4984891414642334, "step": 7203 }, { "epoch": 19.72347707049966, "grad_norm": 5.256523132324219, "learning_rate": 1.36986301369863e-08, "log_odds_chosen": 2.3627617359161377, "log_odds_ratio": -0.27185308933258057, "logits/chosen": 0.8726872205734253, "logits/rejected": 0.8723238110542297, "logps/chosen": -1.8547496795654297, "logps/rejected": -4.11467981338501, "loss": 0.551, "nll_loss": 0.5238328576087952, "rewards/accuracies": 0.875, "rewards/chosen": -0.18547497689723969, "rewards/margins": 0.22599294781684875, "rewards/rejected": -0.41146790981292725, "step": 7204 }, { "epoch": 19.72621492128679, "grad_norm": 6.39723539352417, "learning_rate": 1.3561643835616438e-08, "log_odds_chosen": 1.7565076351165771, "log_odds_ratio": -0.3384045362472534, "logits/chosen": 0.8318162560462952, "logits/rejected": 0.8517725467681885, "logps/chosen": -2.949676036834717, "logps/rejected": -4.613104820251465, "loss": 0.6786, "nll_loss": 0.6447787284851074, "rewards/accuracies": 0.875, "rewards/chosen": -0.2949675917625427, "rewards/margins": 0.16634288430213928, "rewards/rejected": -0.461310476064682, "step": 7205 }, { "epoch": 19.72895277207392, "grad_norm": 7.3276777267456055, "learning_rate": 1.3424657534246575e-08, "log_odds_chosen": 3.2685303688049316, "log_odds_ratio": -0.1552959680557251, "logits/chosen": 0.9042153358459473, "logits/rejected": 0.9034811854362488, "logps/chosen": -3.0252861976623535, "logps/rejected": -6.14408016204834, "loss": 0.6985, "nll_loss": 0.6829632520675659, "rewards/accuracies": 1.0, "rewards/chosen": -0.30252858996391296, "rewards/margins": 0.311879426240921, "rewards/rejected": -0.614408016204834, "step": 7206 }, { "epoch": 19.731690622861056, "grad_norm": 5.814390659332275, "learning_rate": 1.3287671232876711e-08, "log_odds_chosen": 2.646481513977051, "log_odds_ratio": -0.14448490738868713, "logits/chosen": 0.7992870807647705, "logits/rejected": 0.8615280389785767, "logps/chosen": -1.6667206287384033, "logps/rejected": -4.126216888427734, "loss": 0.5587, "nll_loss": 0.544212281703949, "rewards/accuracies": 1.0, "rewards/chosen": -0.16667206585407257, "rewards/margins": 0.2459496110677719, "rewards/rejected": -0.4126216769218445, "step": 7207 }, { "epoch": 19.734428473648187, "grad_norm": 6.396930694580078, "learning_rate": 1.315068493150685e-08, "log_odds_chosen": 1.5681867599487305, "log_odds_ratio": -0.4117792248725891, "logits/chosen": 0.9420466423034668, "logits/rejected": 0.9453306794166565, "logps/chosen": -2.2457380294799805, "logps/rejected": -3.626704454421997, "loss": 0.6046, "nll_loss": 0.5634104013442993, "rewards/accuracies": 0.875, "rewards/chosen": -0.22457382082939148, "rewards/margins": 0.1380966603755951, "rewards/rejected": -0.3626704812049866, "step": 7208 }, { "epoch": 19.737166324435318, "grad_norm": 5.04703426361084, "learning_rate": 1.3013698630136985e-08, "log_odds_chosen": 3.6329421997070312, "log_odds_ratio": -0.0671996995806694, "logits/chosen": 1.0630390644073486, "logits/rejected": 1.122962236404419, "logps/chosen": -2.3346927165985107, "logps/rejected": -5.841683387756348, "loss": 0.6031, "nll_loss": 0.5964205861091614, "rewards/accuracies": 1.0, "rewards/chosen": -0.23346927762031555, "rewards/margins": 0.3506990373134613, "rewards/rejected": -0.5841683149337769, "step": 7209 }, { "epoch": 19.73990417522245, "grad_norm": 5.18274450302124, "learning_rate": 1.2876712328767123e-08, "log_odds_chosen": 1.9315309524536133, "log_odds_ratio": -0.1920832395553589, "logits/chosen": 0.9621598720550537, "logits/rejected": 0.956360399723053, "logps/chosen": -1.9521347284317017, "logps/rejected": -3.743194341659546, "loss": 0.6063, "nll_loss": 0.5871016383171082, "rewards/accuracies": 1.0, "rewards/chosen": -0.19521349668502808, "rewards/margins": 0.1791059672832489, "rewards/rejected": -0.3743194341659546, "step": 7210 }, { "epoch": 19.742642026009584, "grad_norm": 6.029755592346191, "learning_rate": 1.273972602739726e-08, "log_odds_chosen": 1.1788891553878784, "log_odds_ratio": -0.38445842266082764, "logits/chosen": 0.8256710171699524, "logits/rejected": 0.8165417909622192, "logps/chosen": -1.7964801788330078, "logps/rejected": -2.799039602279663, "loss": 0.5599, "nll_loss": 0.5214725136756897, "rewards/accuracies": 0.875, "rewards/chosen": -0.1796480119228363, "rewards/margins": 0.10025595128536224, "rewards/rejected": -0.27990397810935974, "step": 7211 }, { "epoch": 19.745379876796715, "grad_norm": 8.313549995422363, "learning_rate": 1.2602739726027397e-08, "log_odds_chosen": 1.593749761581421, "log_odds_ratio": -0.5144060850143433, "logits/chosen": 0.7470248341560364, "logits/rejected": 0.7654708027839661, "logps/chosen": -1.9067775011062622, "logps/rejected": -3.214916944503784, "loss": 0.5634, "nll_loss": 0.5120072364807129, "rewards/accuracies": 0.875, "rewards/chosen": -0.19067774713039398, "rewards/margins": 0.13081392645835876, "rewards/rejected": -0.32149168848991394, "step": 7212 }, { "epoch": 19.748117727583846, "grad_norm": 4.712382793426514, "learning_rate": 1.2465753424657534e-08, "log_odds_chosen": 2.36377215385437, "log_odds_ratio": -0.20498599112033844, "logits/chosen": 0.7754312753677368, "logits/rejected": 0.8751890659332275, "logps/chosen": -1.7183647155761719, "logps/rejected": -3.921217203140259, "loss": 0.5291, "nll_loss": 0.5085646510124207, "rewards/accuracies": 1.0, "rewards/chosen": -0.1718364655971527, "rewards/margins": 0.22028523683547974, "rewards/rejected": -0.39212170243263245, "step": 7213 }, { "epoch": 19.750855578370977, "grad_norm": 5.66620397567749, "learning_rate": 1.232876712328767e-08, "log_odds_chosen": 2.1224470138549805, "log_odds_ratio": -0.17426010966300964, "logits/chosen": 0.8855290412902832, "logits/rejected": 0.9746377468109131, "logps/chosen": -2.051650285720825, "logps/rejected": -3.9475269317626953, "loss": 0.5247, "nll_loss": 0.5073121786117554, "rewards/accuracies": 1.0, "rewards/chosen": -0.20516502857208252, "rewards/margins": 0.18958766758441925, "rewards/rejected": -0.3947526812553406, "step": 7214 }, { "epoch": 19.753593429158112, "grad_norm": 5.103238582611084, "learning_rate": 1.2191780821917807e-08, "log_odds_chosen": 2.7471866607666016, "log_odds_ratio": -0.1374198943376541, "logits/chosen": 0.9557803869247437, "logits/rejected": 1.0345404148101807, "logps/chosen": -2.3821845054626465, "logps/rejected": -5.013129234313965, "loss": 0.7048, "nll_loss": 0.6910655498504639, "rewards/accuracies": 1.0, "rewards/chosen": -0.23821842670440674, "rewards/margins": 0.2630945146083832, "rewards/rejected": -0.5013129711151123, "step": 7215 }, { "epoch": 19.756331279945243, "grad_norm": 5.717951774597168, "learning_rate": 1.2054794520547946e-08, "log_odds_chosen": 1.7369625568389893, "log_odds_ratio": -0.25677403807640076, "logits/chosen": 1.0245370864868164, "logits/rejected": 1.0686380863189697, "logps/chosen": -2.3955471515655518, "logps/rejected": -4.070030212402344, "loss": 0.6451, "nll_loss": 0.6194706559181213, "rewards/accuracies": 1.0, "rewards/chosen": -0.23955470323562622, "rewards/margins": 0.16744831204414368, "rewards/rejected": -0.4070030152797699, "step": 7216 }, { "epoch": 19.759069130732374, "grad_norm": 5.05703592300415, "learning_rate": 1.1917808219178081e-08, "log_odds_chosen": 3.9179141521453857, "log_odds_ratio": -0.11405313014984131, "logits/chosen": 0.957089900970459, "logits/rejected": 0.9626188278198242, "logps/chosen": -2.1226189136505127, "logps/rejected": -5.883477687835693, "loss": 0.6163, "nll_loss": 0.6048678159713745, "rewards/accuracies": 1.0, "rewards/chosen": -0.2122618854045868, "rewards/margins": 0.37608587741851807, "rewards/rejected": -0.5883477926254272, "step": 7217 }, { "epoch": 19.761806981519506, "grad_norm": 6.218739032745361, "learning_rate": 1.1780821917808218e-08, "log_odds_chosen": 1.3344709873199463, "log_odds_ratio": -0.33108624815940857, "logits/chosen": 0.9582095742225647, "logits/rejected": 1.0339274406433105, "logps/chosen": -2.5565357208251953, "logps/rejected": -3.7949681282043457, "loss": 0.6047, "nll_loss": 0.5716395378112793, "rewards/accuracies": 0.875, "rewards/chosen": -0.2556535601615906, "rewards/margins": 0.123843252658844, "rewards/rejected": -0.37949681282043457, "step": 7218 }, { "epoch": 19.76454483230664, "grad_norm": 6.281233787536621, "learning_rate": 1.1643835616438356e-08, "log_odds_chosen": 1.7790238857269287, "log_odds_ratio": -0.2821875214576721, "logits/chosen": 1.0106866359710693, "logits/rejected": 0.9913774132728577, "logps/chosen": -1.8683180809020996, "logps/rejected": -3.5196244716644287, "loss": 0.5697, "nll_loss": 0.5414382815361023, "rewards/accuracies": 1.0, "rewards/chosen": -0.18683180212974548, "rewards/margins": 0.1651306450366974, "rewards/rejected": -0.35196244716644287, "step": 7219 }, { "epoch": 19.76728268309377, "grad_norm": 4.8072099685668945, "learning_rate": 1.1506849315068493e-08, "log_odds_chosen": 2.4765989780426025, "log_odds_ratio": -0.182599738240242, "logits/chosen": 1.0022943019866943, "logits/rejected": 0.9429200887680054, "logps/chosen": -1.9879090785980225, "logps/rejected": -4.335886001586914, "loss": 0.6008, "nll_loss": 0.5825891494750977, "rewards/accuracies": 0.875, "rewards/chosen": -0.19879090785980225, "rewards/margins": 0.2347976565361023, "rewards/rejected": -0.43358856439590454, "step": 7220 }, { "epoch": 19.770020533880903, "grad_norm": 10.943231582641602, "learning_rate": 1.136986301369863e-08, "log_odds_chosen": 2.762336254119873, "log_odds_ratio": -0.5263785123825073, "logits/chosen": 0.9083763360977173, "logits/rejected": 0.8606027960777283, "logps/chosen": -3.4200844764709473, "logps/rejected": -6.0655927658081055, "loss": 0.9467, "nll_loss": 0.8941065073013306, "rewards/accuracies": 0.75, "rewards/chosen": -0.34200844168663025, "rewards/margins": 0.2645508944988251, "rewards/rejected": -0.6065592765808105, "step": 7221 }, { "epoch": 19.772758384668034, "grad_norm": 5.296141624450684, "learning_rate": 1.1232876712328766e-08, "log_odds_chosen": 2.0915064811706543, "log_odds_ratio": -0.2044093906879425, "logits/chosen": 0.7918918132781982, "logits/rejected": 0.8484134674072266, "logps/chosen": -1.80631422996521, "logps/rejected": -3.721893787384033, "loss": 0.5883, "nll_loss": 0.567898154258728, "rewards/accuracies": 1.0, "rewards/chosen": -0.18063142895698547, "rewards/margins": 0.19155795872211456, "rewards/rejected": -0.37218937277793884, "step": 7222 }, { "epoch": 19.77549623545517, "grad_norm": 4.840867519378662, "learning_rate": 1.1095890410958904e-08, "log_odds_chosen": 3.589590549468994, "log_odds_ratio": -0.14399638772010803, "logits/chosen": 0.7655056715011597, "logits/rejected": 0.8322533369064331, "logps/chosen": -2.6057450771331787, "logps/rejected": -6.064280986785889, "loss": 0.6396, "nll_loss": 0.625243067741394, "rewards/accuracies": 1.0, "rewards/chosen": -0.2605745196342468, "rewards/margins": 0.3458535671234131, "rewards/rejected": -0.6064280271530151, "step": 7223 }, { "epoch": 19.7782340862423, "grad_norm": 5.896134853363037, "learning_rate": 1.095890410958904e-08, "log_odds_chosen": 0.9175606966018677, "log_odds_ratio": -0.3527719974517822, "logits/chosen": 0.9193757772445679, "logits/rejected": 0.9052683115005493, "logps/chosen": -1.8579059839248657, "logps/rejected": -2.6230061054229736, "loss": 0.5557, "nll_loss": 0.5203887224197388, "rewards/accuracies": 1.0, "rewards/chosen": -0.18579059839248657, "rewards/margins": 0.07651003450155258, "rewards/rejected": -0.26230061054229736, "step": 7224 }, { "epoch": 19.78097193702943, "grad_norm": 6.3515400886535645, "learning_rate": 1.0821917808219177e-08, "log_odds_chosen": 1.9551666975021362, "log_odds_ratio": -0.36129188537597656, "logits/chosen": 1.003301978111267, "logits/rejected": 1.0031201839447021, "logps/chosen": -2.3203394412994385, "logps/rejected": -4.192113876342773, "loss": 0.7053, "nll_loss": 0.669174313545227, "rewards/accuracies": 0.75, "rewards/chosen": -0.23203396797180176, "rewards/margins": 0.18717744946479797, "rewards/rejected": -0.41921138763427734, "step": 7225 }, { "epoch": 19.783709787816562, "grad_norm": 4.6854119300842285, "learning_rate": 1.0684931506849316e-08, "log_odds_chosen": 2.322425127029419, "log_odds_ratio": -0.20826350152492523, "logits/chosen": 0.9070520997047424, "logits/rejected": 1.0096536874771118, "logps/chosen": -2.1539721488952637, "logps/rejected": -4.3420891761779785, "loss": 0.5877, "nll_loss": 0.5668609142303467, "rewards/accuracies": 1.0, "rewards/chosen": -0.2153972089290619, "rewards/margins": 0.21881172060966492, "rewards/rejected": -0.4342089295387268, "step": 7226 }, { "epoch": 19.786447638603697, "grad_norm": 6.453251361846924, "learning_rate": 1.0547945205479451e-08, "log_odds_chosen": 1.9424535036087036, "log_odds_ratio": -0.3138681650161743, "logits/chosen": 0.8619256019592285, "logits/rejected": 0.9577182531356812, "logps/chosen": -2.3151628971099854, "logps/rejected": -4.150216579437256, "loss": 0.6022, "nll_loss": 0.570848822593689, "rewards/accuracies": 0.75, "rewards/chosen": -0.2315162867307663, "rewards/margins": 0.18350538611412048, "rewards/rejected": -0.4150216579437256, "step": 7227 }, { "epoch": 19.789185489390828, "grad_norm": 5.775331974029541, "learning_rate": 1.0410958904109589e-08, "log_odds_chosen": 2.8267924785614014, "log_odds_ratio": -0.24167925119400024, "logits/chosen": 0.8853664398193359, "logits/rejected": 0.9341782331466675, "logps/chosen": -2.439481019973755, "logps/rejected": -5.102022647857666, "loss": 0.6213, "nll_loss": 0.5971312522888184, "rewards/accuracies": 0.75, "rewards/chosen": -0.2439481019973755, "rewards/margins": 0.26625415682792664, "rewards/rejected": -0.5102022886276245, "step": 7228 }, { "epoch": 19.79192334017796, "grad_norm": 4.690515995025635, "learning_rate": 1.0273972602739724e-08, "log_odds_chosen": 3.7891664505004883, "log_odds_ratio": -0.1143760159611702, "logits/chosen": 0.8258046507835388, "logits/rejected": 0.8201256990432739, "logps/chosen": -2.0041286945343018, "logps/rejected": -5.649636268615723, "loss": 0.6262, "nll_loss": 0.6147175431251526, "rewards/accuracies": 1.0, "rewards/chosen": -0.20041286945343018, "rewards/margins": 0.36455070972442627, "rewards/rejected": -0.5649635791778564, "step": 7229 }, { "epoch": 19.794661190965094, "grad_norm": 5.725708961486816, "learning_rate": 1.0136986301369863e-08, "log_odds_chosen": 2.724353313446045, "log_odds_ratio": -0.1945820301771164, "logits/chosen": 0.8312427401542664, "logits/rejected": 0.7928892970085144, "logps/chosen": -1.9918173551559448, "logps/rejected": -4.563589572906494, "loss": 0.5842, "nll_loss": 0.5647544860839844, "rewards/accuracies": 0.875, "rewards/chosen": -0.19918173551559448, "rewards/margins": 0.2571772336959839, "rewards/rejected": -0.45635896921157837, "step": 7230 }, { "epoch": 19.797399041752225, "grad_norm": 6.100582122802734, "learning_rate": 1e-08, "log_odds_chosen": 3.495605945587158, "log_odds_ratio": -0.10106077790260315, "logits/chosen": 1.128645420074463, "logits/rejected": 1.1674613952636719, "logps/chosen": -2.168419361114502, "logps/rejected": -5.515326023101807, "loss": 0.6676, "nll_loss": 0.6574791669845581, "rewards/accuracies": 1.0, "rewards/chosen": -0.2168419361114502, "rewards/margins": 0.3346906900405884, "rewards/rejected": -0.5515326261520386, "step": 7231 }, { "epoch": 19.800136892539356, "grad_norm": 4.926807403564453, "learning_rate": 9.863013698630136e-09, "log_odds_chosen": 2.234346628189087, "log_odds_ratio": -0.28383487462997437, "logits/chosen": 0.8946605920791626, "logits/rejected": 0.9117270112037659, "logps/chosen": -2.3797192573547363, "logps/rejected": -4.513402462005615, "loss": 0.6077, "nll_loss": 0.5793552994728088, "rewards/accuracies": 0.875, "rewards/chosen": -0.2379719465970993, "rewards/margins": 0.21336831152439117, "rewards/rejected": -0.4513402581214905, "step": 7232 }, { "epoch": 19.802874743326488, "grad_norm": 5.781956672668457, "learning_rate": 9.726027397260273e-09, "log_odds_chosen": 3.4234721660614014, "log_odds_ratio": -0.14539749920368195, "logits/chosen": 0.8755404949188232, "logits/rejected": 0.9550754427909851, "logps/chosen": -2.355809450149536, "logps/rejected": -5.655974864959717, "loss": 0.7583, "nll_loss": 0.7437997460365295, "rewards/accuracies": 1.0, "rewards/chosen": -0.2355809509754181, "rewards/margins": 0.3300165832042694, "rewards/rejected": -0.5655975341796875, "step": 7233 }, { "epoch": 19.805612594113622, "grad_norm": 12.76053524017334, "learning_rate": 9.589041095890412e-09, "log_odds_chosen": 1.6009176969528198, "log_odds_ratio": -0.7573508620262146, "logits/chosen": 0.9726158380508423, "logits/rejected": 0.9258589744567871, "logps/chosen": -3.038759469985962, "logps/rejected": -4.519953727722168, "loss": 0.7879, "nll_loss": 0.7122018933296204, "rewards/accuracies": 0.75, "rewards/chosen": -0.30387595295906067, "rewards/margins": 0.14811941981315613, "rewards/rejected": -0.4519953727722168, "step": 7234 }, { "epoch": 19.808350444900753, "grad_norm": 4.962778568267822, "learning_rate": 9.452054794520547e-09, "log_odds_chosen": 1.5415452718734741, "log_odds_ratio": -0.36993855237960815, "logits/chosen": 0.6838128566741943, "logits/rejected": 0.702329158782959, "logps/chosen": -2.9990928173065186, "logps/rejected": -4.440690517425537, "loss": 0.705, "nll_loss": 0.6680381298065186, "rewards/accuracies": 0.75, "rewards/chosen": -0.2999092936515808, "rewards/margins": 0.14415977895259857, "rewards/rejected": -0.4440690577030182, "step": 7235 }, { "epoch": 19.811088295687885, "grad_norm": 5.233080863952637, "learning_rate": 9.315068493150684e-09, "log_odds_chosen": 2.2083353996276855, "log_odds_ratio": -0.18100836873054504, "logits/chosen": 0.7920557260513306, "logits/rejected": 0.7717994451522827, "logps/chosen": -1.8269903659820557, "logps/rejected": -3.870154857635498, "loss": 0.5666, "nll_loss": 0.5484552383422852, "rewards/accuracies": 1.0, "rewards/chosen": -0.1826990246772766, "rewards/margins": 0.20431646704673767, "rewards/rejected": -0.3870154917240143, "step": 7236 }, { "epoch": 19.813826146475016, "grad_norm": 5.675954818725586, "learning_rate": 9.178082191780822e-09, "log_odds_chosen": 1.397740125656128, "log_odds_ratio": -0.3358991742134094, "logits/chosen": 0.8289281129837036, "logits/rejected": 0.803429365158081, "logps/chosen": -2.12933349609375, "logps/rejected": -3.3998539447784424, "loss": 0.6855, "nll_loss": 0.6519054770469666, "rewards/accuracies": 0.875, "rewards/chosen": -0.21293336153030396, "rewards/margins": 0.12705205380916595, "rewards/rejected": -0.3399854302406311, "step": 7237 }, { "epoch": 19.81656399726215, "grad_norm": 4.997766971588135, "learning_rate": 9.041095890410959e-09, "log_odds_chosen": 1.8752084970474243, "log_odds_ratio": -0.2216661274433136, "logits/chosen": 0.6126261949539185, "logits/rejected": 0.6443068981170654, "logps/chosen": -1.7886654138565063, "logps/rejected": -3.4399213790893555, "loss": 0.6655, "nll_loss": 0.6433289051055908, "rewards/accuracies": 1.0, "rewards/chosen": -0.17886653542518616, "rewards/margins": 0.16512557864189148, "rewards/rejected": -0.3439921438694, "step": 7238 }, { "epoch": 19.81930184804928, "grad_norm": 6.746885776519775, "learning_rate": 8.904109589041096e-09, "log_odds_chosen": 2.112020969390869, "log_odds_ratio": -0.18500185012817383, "logits/chosen": 0.8053778409957886, "logits/rejected": 0.8073214888572693, "logps/chosen": -2.001718759536743, "logps/rejected": -3.931283473968506, "loss": 0.6035, "nll_loss": 0.584953784942627, "rewards/accuracies": 1.0, "rewards/chosen": -0.20017187297344208, "rewards/margins": 0.19295649230480194, "rewards/rejected": -0.3931283950805664, "step": 7239 }, { "epoch": 19.822039698836413, "grad_norm": 4.7480363845825195, "learning_rate": 8.767123287671232e-09, "log_odds_chosen": 2.635971784591675, "log_odds_ratio": -0.19437482953071594, "logits/chosen": 0.7410531044006348, "logits/rejected": 0.7053423523902893, "logps/chosen": -1.941290020942688, "logps/rejected": -4.411128997802734, "loss": 0.5995, "nll_loss": 0.5800471901893616, "rewards/accuracies": 1.0, "rewards/chosen": -0.19412901997566223, "rewards/margins": 0.24698391556739807, "rewards/rejected": -0.4411129355430603, "step": 7240 }, { "epoch": 19.824777549623544, "grad_norm": 5.369676113128662, "learning_rate": 8.63013698630137e-09, "log_odds_chosen": 1.7809431552886963, "log_odds_ratio": -0.31810837984085083, "logits/chosen": 0.9075721502304077, "logits/rejected": 0.8629493117332458, "logps/chosen": -1.8657464981079102, "logps/rejected": -3.548954725265503, "loss": 0.6088, "nll_loss": 0.5770354270935059, "rewards/accuracies": 0.875, "rewards/chosen": -0.18657466769218445, "rewards/margins": 0.16832080483436584, "rewards/rejected": -0.3548954427242279, "step": 7241 }, { "epoch": 19.82751540041068, "grad_norm": 5.249059677124023, "learning_rate": 8.493150684931506e-09, "log_odds_chosen": 2.0672202110290527, "log_odds_ratio": -0.2955532670021057, "logits/chosen": 0.9174180626869202, "logits/rejected": 0.9917839765548706, "logps/chosen": -1.9336459636688232, "logps/rejected": -3.8199844360351562, "loss": 0.5689, "nll_loss": 0.5393579006195068, "rewards/accuracies": 0.875, "rewards/chosen": -0.19336460530757904, "rewards/margins": 0.18863385915756226, "rewards/rejected": -0.3819984495639801, "step": 7242 }, { "epoch": 19.83025325119781, "grad_norm": 4.729578495025635, "learning_rate": 8.356164383561643e-09, "log_odds_chosen": 2.4631152153015137, "log_odds_ratio": -0.22623814642429352, "logits/chosen": 0.7138035893440247, "logits/rejected": 0.7641419768333435, "logps/chosen": -2.022840976715088, "logps/rejected": -4.3483076095581055, "loss": 0.5337, "nll_loss": 0.5110982656478882, "rewards/accuracies": 1.0, "rewards/chosen": -0.2022840976715088, "rewards/margins": 0.2325466275215149, "rewards/rejected": -0.4348307251930237, "step": 7243 }, { "epoch": 19.83299110198494, "grad_norm": 5.329409599304199, "learning_rate": 8.21917808219178e-09, "log_odds_chosen": 2.1339187622070312, "log_odds_ratio": -0.21532917022705078, "logits/chosen": 1.122316598892212, "logits/rejected": 1.1648893356323242, "logps/chosen": -3.151440143585205, "logps/rejected": -5.215651512145996, "loss": 0.6591, "nll_loss": 0.63752681016922, "rewards/accuracies": 1.0, "rewards/chosen": -0.31514400243759155, "rewards/margins": 0.2064211368560791, "rewards/rejected": -0.5215651392936707, "step": 7244 }, { "epoch": 19.835728952772072, "grad_norm": 5.5111165046691895, "learning_rate": 8.082191780821918e-09, "log_odds_chosen": 2.4419782161712646, "log_odds_ratio": -0.41475653648376465, "logits/chosen": 0.8208458423614502, "logits/rejected": 0.7891395092010498, "logps/chosen": -2.189812660217285, "logps/rejected": -4.492741584777832, "loss": 0.6824, "nll_loss": 0.6409525871276855, "rewards/accuracies": 0.625, "rewards/chosen": -0.21898126602172852, "rewards/margins": 0.2302929162979126, "rewards/rejected": -0.4492741823196411, "step": 7245 }, { "epoch": 19.838466803559207, "grad_norm": 6.619520664215088, "learning_rate": 7.945205479452055e-09, "log_odds_chosen": 1.556849718093872, "log_odds_ratio": -0.37985196709632874, "logits/chosen": 0.7553983330726624, "logits/rejected": 0.7023948431015015, "logps/chosen": -2.1851422786712646, "logps/rejected": -3.61434006690979, "loss": 0.5799, "nll_loss": 0.5419064164161682, "rewards/accuracies": 0.75, "rewards/chosen": -0.21851421892642975, "rewards/margins": 0.14291979372501373, "rewards/rejected": -0.3614340126514435, "step": 7246 }, { "epoch": 19.84120465434634, "grad_norm": 6.235913276672363, "learning_rate": 7.808219178082192e-09, "log_odds_chosen": 1.1070070266723633, "log_odds_ratio": -0.37431758642196655, "logits/chosen": 0.9757412075996399, "logits/rejected": 1.0386617183685303, "logps/chosen": -2.605358839035034, "logps/rejected": -3.631866216659546, "loss": 0.5747, "nll_loss": 0.5372196435928345, "rewards/accuracies": 1.0, "rewards/chosen": -0.2605358958244324, "rewards/margins": 0.1026507318019867, "rewards/rejected": -0.36318662762641907, "step": 7247 }, { "epoch": 19.84394250513347, "grad_norm": 5.691141128540039, "learning_rate": 7.671232876712329e-09, "log_odds_chosen": 2.915581464767456, "log_odds_ratio": -0.16074472665786743, "logits/chosen": 0.6545538902282715, "logits/rejected": 0.6584224700927734, "logps/chosen": -1.7492507696151733, "logps/rejected": -4.473371505737305, "loss": 0.556, "nll_loss": 0.5399301052093506, "rewards/accuracies": 1.0, "rewards/chosen": -0.1749250888824463, "rewards/margins": 0.2724120616912842, "rewards/rejected": -0.44733718037605286, "step": 7248 }, { "epoch": 19.8466803559206, "grad_norm": 7.148889064788818, "learning_rate": 7.534246575342466e-09, "log_odds_chosen": 0.7817210555076599, "log_odds_ratio": -0.5002468228340149, "logits/chosen": 0.8088564276695251, "logits/rejected": 0.7859086990356445, "logps/chosen": -2.935065746307373, "logps/rejected": -3.6553499698638916, "loss": 0.6979, "nll_loss": 0.6479204893112183, "rewards/accuracies": 0.75, "rewards/chosen": -0.29350659251213074, "rewards/margins": 0.07202840596437454, "rewards/rejected": -0.3655349612236023, "step": 7249 }, { "epoch": 19.849418206707735, "grad_norm": 5.147767543792725, "learning_rate": 7.397260273972602e-09, "log_odds_chosen": 2.3587992191314697, "log_odds_ratio": -0.1884021759033203, "logits/chosen": 0.7793905138969421, "logits/rejected": 0.8159634470939636, "logps/chosen": -2.6297507286071777, "logps/rejected": -4.863039493560791, "loss": 0.7014, "nll_loss": 0.6825534105300903, "rewards/accuracies": 0.875, "rewards/chosen": -0.2629750669002533, "rewards/margins": 0.2233288586139679, "rewards/rejected": -0.4863039255142212, "step": 7250 }, { "epoch": 19.852156057494867, "grad_norm": 5.632201671600342, "learning_rate": 7.26027397260274e-09, "log_odds_chosen": 2.147857189178467, "log_odds_ratio": -0.3175051212310791, "logits/chosen": 0.7559363842010498, "logits/rejected": 0.7114053964614868, "logps/chosen": -2.408003807067871, "logps/rejected": -4.4744696617126465, "loss": 0.7536, "nll_loss": 0.7218688726425171, "rewards/accuracies": 0.875, "rewards/chosen": -0.2408003807067871, "rewards/margins": 0.20664659142494202, "rewards/rejected": -0.44744694232940674, "step": 7251 }, { "epoch": 19.854893908281998, "grad_norm": 5.854885578155518, "learning_rate": 7.123287671232876e-09, "log_odds_chosen": 2.929419994354248, "log_odds_ratio": -0.22270053625106812, "logits/chosen": 0.8204346895217896, "logits/rejected": 0.8942254781723022, "logps/chosen": -2.04537296295166, "logps/rejected": -4.855208396911621, "loss": 0.526, "nll_loss": 0.5037468075752258, "rewards/accuracies": 0.875, "rewards/chosen": -0.2045372873544693, "rewards/margins": 0.280983567237854, "rewards/rejected": -0.4855208694934845, "step": 7252 }, { "epoch": 19.85763175906913, "grad_norm": 5.486149787902832, "learning_rate": 6.986301369863013e-09, "log_odds_chosen": 2.699185848236084, "log_odds_ratio": -0.20026029646396637, "logits/chosen": 0.6940855383872986, "logits/rejected": 0.8191332817077637, "logps/chosen": -1.8990767002105713, "logps/rejected": -4.442511081695557, "loss": 0.5454, "nll_loss": 0.5253434777259827, "rewards/accuracies": 1.0, "rewards/chosen": -0.18990767002105713, "rewards/margins": 0.2543434500694275, "rewards/rejected": -0.444251149892807, "step": 7253 }, { "epoch": 19.860369609856264, "grad_norm": 5.380649089813232, "learning_rate": 6.84931506849315e-09, "log_odds_chosen": 3.303839921951294, "log_odds_ratio": -0.16166895627975464, "logits/chosen": 0.9985990524291992, "logits/rejected": 0.9808046817779541, "logps/chosen": -1.9039872884750366, "logps/rejected": -5.014925956726074, "loss": 0.6382, "nll_loss": 0.6220752000808716, "rewards/accuracies": 1.0, "rewards/chosen": -0.19039873778820038, "rewards/margins": 0.31109389662742615, "rewards/rejected": -0.5014926195144653, "step": 7254 }, { "epoch": 19.863107460643395, "grad_norm": 5.0071187019348145, "learning_rate": 6.712328767123288e-09, "log_odds_chosen": 1.7618660926818848, "log_odds_ratio": -0.2882682979106903, "logits/chosen": 0.6952773928642273, "logits/rejected": 0.7167129516601562, "logps/chosen": -2.0028841495513916, "logps/rejected": -3.6740059852600098, "loss": 0.5542, "nll_loss": 0.5253505110740662, "rewards/accuracies": 1.0, "rewards/chosen": -0.20028841495513916, "rewards/margins": 0.16711214184761047, "rewards/rejected": -0.36740055680274963, "step": 7255 }, { "epoch": 19.865845311430526, "grad_norm": 5.583727836608887, "learning_rate": 6.575342465753425e-09, "log_odds_chosen": 3.0820767879486084, "log_odds_ratio": -0.18350011110305786, "logits/chosen": 1.125396490097046, "logits/rejected": 1.1116113662719727, "logps/chosen": -1.5280187129974365, "logps/rejected": -4.393220901489258, "loss": 0.5738, "nll_loss": 0.555415153503418, "rewards/accuracies": 1.0, "rewards/chosen": -0.15280187129974365, "rewards/margins": 0.28652024269104004, "rewards/rejected": -0.4393221139907837, "step": 7256 }, { "epoch": 19.86858316221766, "grad_norm": 7.379864692687988, "learning_rate": 6.438356164383561e-09, "log_odds_chosen": 1.2966454029083252, "log_odds_ratio": -0.453659325838089, "logits/chosen": 0.7727538347244263, "logits/rejected": 0.7917109727859497, "logps/chosen": -2.4548003673553467, "logps/rejected": -3.672760248184204, "loss": 0.6442, "nll_loss": 0.5988666415214539, "rewards/accuracies": 0.875, "rewards/chosen": -0.2454800307750702, "rewards/margins": 0.12179598212242126, "rewards/rejected": -0.36727601289749146, "step": 7257 }, { "epoch": 19.871321013004792, "grad_norm": 6.121297359466553, "learning_rate": 6.3013698630136985e-09, "log_odds_chosen": 1.7534455060958862, "log_odds_ratio": -0.30767491459846497, "logits/chosen": 1.202021598815918, "logits/rejected": 1.19568932056427, "logps/chosen": -1.9684680700302124, "logps/rejected": -3.5976078510284424, "loss": 0.5531, "nll_loss": 0.5223044753074646, "rewards/accuracies": 0.875, "rewards/chosen": -0.19684679806232452, "rewards/margins": 0.162913978099823, "rewards/rejected": -0.3597607910633087, "step": 7258 }, { "epoch": 19.874058863791923, "grad_norm": 5.3208441734313965, "learning_rate": 6.164383561643835e-09, "log_odds_chosen": 4.129696369171143, "log_odds_ratio": -0.11271613091230392, "logits/chosen": 0.8138279914855957, "logits/rejected": 0.8088957071304321, "logps/chosen": -2.402698040008545, "logps/rejected": -6.3669939041137695, "loss": 0.746, "nll_loss": 0.734698474407196, "rewards/accuracies": 1.0, "rewards/chosen": -0.24026982486248016, "rewards/margins": 0.39642956852912903, "rewards/rejected": -0.636699378490448, "step": 7259 }, { "epoch": 19.876796714579054, "grad_norm": 6.077150344848633, "learning_rate": 6.027397260273973e-09, "log_odds_chosen": 1.90837824344635, "log_odds_ratio": -0.21209239959716797, "logits/chosen": 0.8037589192390442, "logits/rejected": 0.8512548804283142, "logps/chosen": -2.2013795375823975, "logps/rejected": -4.00330924987793, "loss": 0.5831, "nll_loss": 0.5618503093719482, "rewards/accuracies": 1.0, "rewards/chosen": -0.22013795375823975, "rewards/margins": 0.1801930069923401, "rewards/rejected": -0.40033096075057983, "step": 7260 }, { "epoch": 19.87953456536619, "grad_norm": 5.679853916168213, "learning_rate": 5.890410958904109e-09, "log_odds_chosen": 2.27473521232605, "log_odds_ratio": -0.20514142513275146, "logits/chosen": 1.0989794731140137, "logits/rejected": 1.0899126529693604, "logps/chosen": -2.6280882358551025, "logps/rejected": -4.792776107788086, "loss": 0.5634, "nll_loss": 0.5428568720817566, "rewards/accuracies": 1.0, "rewards/chosen": -0.26280879974365234, "rewards/margins": 0.21646884083747864, "rewards/rejected": -0.47927767038345337, "step": 7261 }, { "epoch": 19.88227241615332, "grad_norm": 5.649180889129639, "learning_rate": 5.753424657534246e-09, "log_odds_chosen": 2.027923583984375, "log_odds_ratio": -0.20307156443595886, "logits/chosen": 1.1129286289215088, "logits/rejected": 1.1092627048492432, "logps/chosen": -1.7011289596557617, "logps/rejected": -3.5525083541870117, "loss": 0.5082, "nll_loss": 0.48790037631988525, "rewards/accuracies": 1.0, "rewards/chosen": -0.17011290788650513, "rewards/margins": 0.18513792753219604, "rewards/rejected": -0.35525083541870117, "step": 7262 }, { "epoch": 19.88501026694045, "grad_norm": 5.123351573944092, "learning_rate": 5.616438356164383e-09, "log_odds_chosen": 2.3751001358032227, "log_odds_ratio": -0.26330262422561646, "logits/chosen": 0.983153223991394, "logits/rejected": 0.9273428320884705, "logps/chosen": -1.6110203266143799, "logps/rejected": -3.7974092960357666, "loss": 0.6298, "nll_loss": 0.6034855842590332, "rewards/accuracies": 0.875, "rewards/chosen": -0.1611020267009735, "rewards/margins": 0.21863891184329987, "rewards/rejected": -0.3797409236431122, "step": 7263 }, { "epoch": 19.887748117727583, "grad_norm": 5.131428241729736, "learning_rate": 5.47945205479452e-09, "log_odds_chosen": 3.9427034854888916, "log_odds_ratio": -0.05456038936972618, "logits/chosen": 0.8951569199562073, "logits/rejected": 0.9661002159118652, "logps/chosen": -1.8975372314453125, "logps/rejected": -5.649334907531738, "loss": 0.6832, "nll_loss": 0.6777284741401672, "rewards/accuracies": 1.0, "rewards/chosen": -0.18975374102592468, "rewards/margins": 0.37517982721328735, "rewards/rejected": -0.5649335384368896, "step": 7264 }, { "epoch": 19.890485968514717, "grad_norm": 6.311830520629883, "learning_rate": 5.342465753424658e-09, "log_odds_chosen": 2.4340226650238037, "log_odds_ratio": -0.36164391040802, "logits/chosen": 0.6727046966552734, "logits/rejected": 0.8029763102531433, "logps/chosen": -2.5030405521392822, "logps/rejected": -4.884881019592285, "loss": 0.8887, "nll_loss": 0.8525662422180176, "rewards/accuracies": 0.875, "rewards/chosen": -0.25030407309532166, "rewards/margins": 0.23818407952785492, "rewards/rejected": -0.4884881377220154, "step": 7265 }, { "epoch": 19.89322381930185, "grad_norm": 5.534486770629883, "learning_rate": 5.205479452054794e-09, "log_odds_chosen": 2.3656320571899414, "log_odds_ratio": -0.2140962779521942, "logits/chosen": 0.723146915435791, "logits/rejected": 0.7162635922431946, "logps/chosen": -1.7386353015899658, "logps/rejected": -3.9224061965942383, "loss": 0.5881, "nll_loss": 0.5666589736938477, "rewards/accuracies": 1.0, "rewards/chosen": -0.17386353015899658, "rewards/margins": 0.21837708353996277, "rewards/rejected": -0.39224061369895935, "step": 7266 }, { "epoch": 19.89596167008898, "grad_norm": 5.5756516456604, "learning_rate": 5.0684931506849315e-09, "log_odds_chosen": 1.065713882446289, "log_odds_ratio": -0.438567578792572, "logits/chosen": 0.7819539308547974, "logits/rejected": 0.7528472542762756, "logps/chosen": -1.6678309440612793, "logps/rejected": -2.649078845977783, "loss": 0.6299, "nll_loss": 0.5860294103622437, "rewards/accuracies": 0.875, "rewards/chosen": -0.16678309440612793, "rewards/margins": 0.09812480211257935, "rewards/rejected": -0.2649078965187073, "step": 7267 }, { "epoch": 19.89869952087611, "grad_norm": 5.8100266456604, "learning_rate": 4.931506849315068e-09, "log_odds_chosen": 3.1786611080169678, "log_odds_ratio": -0.2146454006433487, "logits/chosen": 0.6241635084152222, "logits/rejected": 0.5576352477073669, "logps/chosen": -2.3247811794281006, "logps/rejected": -5.404301643371582, "loss": 0.7177, "nll_loss": 0.696231484413147, "rewards/accuracies": 1.0, "rewards/chosen": -0.23247812688350677, "rewards/margins": 0.3079521059989929, "rewards/rejected": -0.5404301881790161, "step": 7268 }, { "epoch": 19.901437371663246, "grad_norm": 5.745066165924072, "learning_rate": 4.794520547945206e-09, "log_odds_chosen": 2.9090042114257812, "log_odds_ratio": -0.15548519790172577, "logits/chosen": 0.7233315110206604, "logits/rejected": 0.7525105476379395, "logps/chosen": -1.4831082820892334, "logps/rejected": -4.128662586212158, "loss": 0.5558, "nll_loss": 0.5402644276618958, "rewards/accuracies": 1.0, "rewards/chosen": -0.1483108401298523, "rewards/margins": 0.2645554542541504, "rewards/rejected": -0.4128662645816803, "step": 7269 }, { "epoch": 19.904175222450377, "grad_norm": 7.666417598724365, "learning_rate": 4.657534246575342e-09, "log_odds_chosen": 2.2350969314575195, "log_odds_ratio": -0.5451566576957703, "logits/chosen": 1.0245667695999146, "logits/rejected": 1.1219353675842285, "logps/chosen": -2.598599672317505, "logps/rejected": -4.756101131439209, "loss": 0.6835, "nll_loss": 0.6290156841278076, "rewards/accuracies": 0.75, "rewards/chosen": -0.25985994935035706, "rewards/margins": 0.21575015783309937, "rewards/rejected": -0.4756101369857788, "step": 7270 }, { "epoch": 19.906913073237508, "grad_norm": 4.615571975708008, "learning_rate": 4.5205479452054794e-09, "log_odds_chosen": 3.218334197998047, "log_odds_ratio": -0.29973429441452026, "logits/chosen": 0.8297216892242432, "logits/rejected": 0.8954567909240723, "logps/chosen": -2.551466464996338, "logps/rejected": -5.652298450469971, "loss": 0.7648, "nll_loss": 0.7348140478134155, "rewards/accuracies": 0.875, "rewards/chosen": -0.25514668226242065, "rewards/margins": 0.31008315086364746, "rewards/rejected": -0.5652298331260681, "step": 7271 }, { "epoch": 19.90965092402464, "grad_norm": 5.55401611328125, "learning_rate": 4.383561643835616e-09, "log_odds_chosen": 2.0456387996673584, "log_odds_ratio": -0.1914445161819458, "logits/chosen": 0.8686176538467407, "logits/rejected": 0.9465190172195435, "logps/chosen": -2.2278294563293457, "logps/rejected": -4.1583123207092285, "loss": 0.5926, "nll_loss": 0.5734376907348633, "rewards/accuracies": 1.0, "rewards/chosen": -0.2227829545736313, "rewards/margins": 0.19304831326007843, "rewards/rejected": -0.41583123803138733, "step": 7272 }, { "epoch": 19.912388774811774, "grad_norm": 8.186509132385254, "learning_rate": 4.246575342465753e-09, "log_odds_chosen": 2.4458320140838623, "log_odds_ratio": -0.5382714867591858, "logits/chosen": 0.8699194192886353, "logits/rejected": 0.946239173412323, "logps/chosen": -3.3533856868743896, "logps/rejected": -5.7003560066223145, "loss": 0.7792, "nll_loss": 0.7253584265708923, "rewards/accuracies": 0.875, "rewards/chosen": -0.3353385925292969, "rewards/margins": 0.23469698429107666, "rewards/rejected": -0.5700355768203735, "step": 7273 }, { "epoch": 19.915126625598905, "grad_norm": 8.613600730895996, "learning_rate": 4.10958904109589e-09, "log_odds_chosen": 3.223292827606201, "log_odds_ratio": -0.3949868083000183, "logits/chosen": 0.9866374135017395, "logits/rejected": 0.9804053902626038, "logps/chosen": -2.6462323665618896, "logps/rejected": -5.756991386413574, "loss": 0.8237, "nll_loss": 0.7842465043067932, "rewards/accuracies": 0.875, "rewards/chosen": -0.26462322473526, "rewards/margins": 0.31107592582702637, "rewards/rejected": -0.5756991505622864, "step": 7274 }, { "epoch": 19.917864476386036, "grad_norm": 9.605100631713867, "learning_rate": 3.972602739726027e-09, "log_odds_chosen": 1.1595335006713867, "log_odds_ratio": -0.39363765716552734, "logits/chosen": 0.7675201892852783, "logits/rejected": 0.728654146194458, "logps/chosen": -2.566511869430542, "logps/rejected": -3.6088707447052, "loss": 0.749, "nll_loss": 0.7096419334411621, "rewards/accuracies": 0.875, "rewards/chosen": -0.25665122270584106, "rewards/margins": 0.10423587262630463, "rewards/rejected": -0.3608870804309845, "step": 7275 }, { "epoch": 19.920602327173167, "grad_norm": 5.0467848777771, "learning_rate": 3.8356164383561645e-09, "log_odds_chosen": 1.7945380210876465, "log_odds_ratio": -0.29907524585723877, "logits/chosen": 0.6764822602272034, "logits/rejected": 0.7394185662269592, "logps/chosen": -2.376535177230835, "logps/rejected": -4.082215309143066, "loss": 0.5823, "nll_loss": 0.5524255037307739, "rewards/accuracies": 0.875, "rewards/chosen": -0.23765350878238678, "rewards/margins": 0.17056804895401, "rewards/rejected": -0.408221572637558, "step": 7276 }, { "epoch": 19.923340177960302, "grad_norm": 5.380558967590332, "learning_rate": 3.698630136986301e-09, "log_odds_chosen": 2.1558682918548584, "log_odds_ratio": -0.2700665295124054, "logits/chosen": 0.7626713514328003, "logits/rejected": 0.6955432295799255, "logps/chosen": -2.478085994720459, "logps/rejected": -4.552294731140137, "loss": 0.6588, "nll_loss": 0.6318050622940063, "rewards/accuracies": 0.875, "rewards/chosen": -0.24780859053134918, "rewards/margins": 0.20742084085941315, "rewards/rejected": -0.4552294611930847, "step": 7277 }, { "epoch": 19.926078028747433, "grad_norm": 6.5699357986450195, "learning_rate": 3.561643835616438e-09, "log_odds_chosen": 1.5737636089324951, "log_odds_ratio": -0.3850848078727722, "logits/chosen": 0.7553640604019165, "logits/rejected": 0.7582642436027527, "logps/chosen": -2.1668853759765625, "logps/rejected": -3.670438051223755, "loss": 0.6639, "nll_loss": 0.6253826022148132, "rewards/accuracies": 0.75, "rewards/chosen": -0.21668852865695953, "rewards/margins": 0.1503552794456482, "rewards/rejected": -0.36704379320144653, "step": 7278 }, { "epoch": 19.928815879534564, "grad_norm": 4.985336780548096, "learning_rate": 3.424657534246575e-09, "log_odds_chosen": 2.8741161823272705, "log_odds_ratio": -0.22812649607658386, "logits/chosen": 0.7679317593574524, "logits/rejected": 0.7695373296737671, "logps/chosen": -1.8978261947631836, "logps/rejected": -4.600327491760254, "loss": 0.5162, "nll_loss": 0.49341636896133423, "rewards/accuracies": 0.875, "rewards/chosen": -0.18978261947631836, "rewards/margins": 0.2702501118183136, "rewards/rejected": -0.46003276109695435, "step": 7279 }, { "epoch": 19.931553730321696, "grad_norm": 7.268202781677246, "learning_rate": 3.2876712328767125e-09, "log_odds_chosen": 1.6697838306427002, "log_odds_ratio": -0.4939446449279785, "logits/chosen": 0.7132423520088196, "logits/rejected": 0.7416423559188843, "logps/chosen": -2.6390509605407715, "logps/rejected": -4.23460578918457, "loss": 0.8115, "nll_loss": 0.7620758414268494, "rewards/accuracies": 0.75, "rewards/chosen": -0.2639051079750061, "rewards/margins": 0.15955550968647003, "rewards/rejected": -0.42346060276031494, "step": 7280 }, { "epoch": 19.93429158110883, "grad_norm": 6.249073505401611, "learning_rate": 3.1506849315068492e-09, "log_odds_chosen": 2.357008457183838, "log_odds_ratio": -0.2552262842655182, "logits/chosen": 1.0535575151443481, "logits/rejected": 1.0614092350006104, "logps/chosen": -2.1636927127838135, "logps/rejected": -4.431567192077637, "loss": 0.5927, "nll_loss": 0.5672104954719543, "rewards/accuracies": 0.875, "rewards/chosen": -0.21636928617954254, "rewards/margins": 0.22678741812705994, "rewards/rejected": -0.44315671920776367, "step": 7281 }, { "epoch": 19.93702943189596, "grad_norm": 6.337828159332275, "learning_rate": 3.0136986301369864e-09, "log_odds_chosen": 1.7858655452728271, "log_odds_ratio": -0.2740512192249298, "logits/chosen": 0.7792927026748657, "logits/rejected": 0.8179754614830017, "logps/chosen": -2.3371472358703613, "logps/rejected": -4.042984962463379, "loss": 0.6595, "nll_loss": 0.6320736408233643, "rewards/accuracies": 1.0, "rewards/chosen": -0.23371471464633942, "rewards/margins": 0.17058375477790833, "rewards/rejected": -0.40429848432540894, "step": 7282 }, { "epoch": 19.939767282683093, "grad_norm": 5.087053298950195, "learning_rate": 2.876712328767123e-09, "log_odds_chosen": 1.6806432008743286, "log_odds_ratio": -0.26269131898880005, "logits/chosen": 0.8286134004592896, "logits/rejected": 0.7737189531326294, "logps/chosen": -1.8568146228790283, "logps/rejected": -3.376077651977539, "loss": 0.5576, "nll_loss": 0.5313290357589722, "rewards/accuracies": 1.0, "rewards/chosen": -0.18568146228790283, "rewards/margins": 0.15192630887031555, "rewards/rejected": -0.337607741355896, "step": 7283 }, { "epoch": 19.942505133470227, "grad_norm": 5.331258773803711, "learning_rate": 2.73972602739726e-09, "log_odds_chosen": 2.898406744003296, "log_odds_ratio": -0.1420406997203827, "logits/chosen": 0.7781802415847778, "logits/rejected": 0.808059811592102, "logps/chosen": -2.3599460124969482, "logps/rejected": -5.089462757110596, "loss": 0.7218, "nll_loss": 0.7075859904289246, "rewards/accuracies": 1.0, "rewards/chosen": -0.2359946072101593, "rewards/margins": 0.2729516923427582, "rewards/rejected": -0.5089462995529175, "step": 7284 }, { "epoch": 19.94524298425736, "grad_norm": 6.107455253601074, "learning_rate": 2.602739726027397e-09, "log_odds_chosen": 2.2089290618896484, "log_odds_ratio": -0.2838178277015686, "logits/chosen": 0.9652178883552551, "logits/rejected": 1.0240007638931274, "logps/chosen": -1.996042251586914, "logps/rejected": -4.096859455108643, "loss": 0.6107, "nll_loss": 0.5823137760162354, "rewards/accuracies": 1.0, "rewards/chosen": -0.19960421323776245, "rewards/margins": 0.21008171141147614, "rewards/rejected": -0.4096859395503998, "step": 7285 }, { "epoch": 19.94798083504449, "grad_norm": 4.997766971588135, "learning_rate": 2.465753424657534e-09, "log_odds_chosen": 3.321902275085449, "log_odds_ratio": -0.11878850311040878, "logits/chosen": 1.0401356220245361, "logits/rejected": 1.0070722103118896, "logps/chosen": -1.8029322624206543, "logps/rejected": -4.9549560546875, "loss": 0.5157, "nll_loss": 0.503848671913147, "rewards/accuracies": 1.0, "rewards/chosen": -0.18029320240020752, "rewards/margins": 0.31520241498947144, "rewards/rejected": -0.49549564719200134, "step": 7286 }, { "epoch": 19.95071868583162, "grad_norm": 5.30078649520874, "learning_rate": 2.328767123287671e-09, "log_odds_chosen": 2.524540662765503, "log_odds_ratio": -0.24347546696662903, "logits/chosen": 0.9297879934310913, "logits/rejected": 0.9805557727813721, "logps/chosen": -2.3871824741363525, "logps/rejected": -4.8163557052612305, "loss": 0.6347, "nll_loss": 0.6103414297103882, "rewards/accuracies": 0.875, "rewards/chosen": -0.23871825635433197, "rewards/margins": 0.24291731417179108, "rewards/rejected": -0.48163560032844543, "step": 7287 }, { "epoch": 19.953456536618756, "grad_norm": 7.463265895843506, "learning_rate": 2.191780821917808e-09, "log_odds_chosen": 2.9606924057006836, "log_odds_ratio": -0.26609742641448975, "logits/chosen": 1.0220775604248047, "logits/rejected": 1.0736287832260132, "logps/chosen": -2.4928157329559326, "logps/rejected": -5.361902236938477, "loss": 0.6625, "nll_loss": 0.6358976364135742, "rewards/accuracies": 0.75, "rewards/chosen": -0.24928157031536102, "rewards/margins": 0.28690868616104126, "rewards/rejected": -0.5361902713775635, "step": 7288 }, { "epoch": 19.956194387405887, "grad_norm": 6.30015230178833, "learning_rate": 2.054794520547945e-09, "log_odds_chosen": 2.658287525177002, "log_odds_ratio": -0.5124968886375427, "logits/chosen": 0.8554383516311646, "logits/rejected": 0.9099058508872986, "logps/chosen": -2.4472012519836426, "logps/rejected": -5.016209125518799, "loss": 0.7119, "nll_loss": 0.6606965065002441, "rewards/accuracies": 0.875, "rewards/chosen": -0.24472013115882874, "rewards/margins": 0.25690075755119324, "rewards/rejected": -0.501620888710022, "step": 7289 }, { "epoch": 19.958932238193018, "grad_norm": 5.1680498123168945, "learning_rate": 1.9178082191780823e-09, "log_odds_chosen": 1.9727954864501953, "log_odds_ratio": -0.27402544021606445, "logits/chosen": 0.8020360469818115, "logits/rejected": 0.7756956219673157, "logps/chosen": -1.4061975479125977, "logps/rejected": -3.1782431602478027, "loss": 0.5469, "nll_loss": 0.5195057392120361, "rewards/accuracies": 1.0, "rewards/chosen": -0.14061975479125977, "rewards/margins": 0.17720456421375275, "rewards/rejected": -0.3178243339061737, "step": 7290 }, { "epoch": 19.96167008898015, "grad_norm": 7.577070713043213, "learning_rate": 1.780821917808219e-09, "log_odds_chosen": 1.5659689903259277, "log_odds_ratio": -0.5725278258323669, "logits/chosen": 0.7885111570358276, "logits/rejected": 0.9086637496948242, "logps/chosen": -3.4645886421203613, "logps/rejected": -4.998415946960449, "loss": 0.8893, "nll_loss": 0.832054615020752, "rewards/accuracies": 0.75, "rewards/chosen": -0.3464588522911072, "rewards/margins": 0.15338271856307983, "rewards/rejected": -0.499841570854187, "step": 7291 }, { "epoch": 19.964407939767284, "grad_norm": 10.970486640930176, "learning_rate": 1.6438356164383562e-09, "log_odds_chosen": 0.6917904615402222, "log_odds_ratio": -0.6144506931304932, "logits/chosen": 1.031857967376709, "logits/rejected": 1.016832709312439, "logps/chosen": -2.2263824939727783, "logps/rejected": -2.8556385040283203, "loss": 0.6922, "nll_loss": 0.6307348608970642, "rewards/accuracies": 0.625, "rewards/chosen": -0.22263824939727783, "rewards/margins": 0.06292562186717987, "rewards/rejected": -0.2855638563632965, "step": 7292 }, { "epoch": 19.967145790554415, "grad_norm": 5.725015640258789, "learning_rate": 1.5068493150684932e-09, "log_odds_chosen": 2.52663516998291, "log_odds_ratio": -0.18749035894870758, "logits/chosen": 1.0325974225997925, "logits/rejected": 1.0227806568145752, "logps/chosen": -1.4850118160247803, "logps/rejected": -3.757761240005493, "loss": 0.5878, "nll_loss": 0.5690316557884216, "rewards/accuracies": 1.0, "rewards/chosen": -0.1485011726617813, "rewards/margins": 0.22727492451667786, "rewards/rejected": -0.37577611207962036, "step": 7293 }, { "epoch": 19.969883641341546, "grad_norm": 4.578065872192383, "learning_rate": 1.36986301369863e-09, "log_odds_chosen": 2.579643726348877, "log_odds_ratio": -0.22368879616260529, "logits/chosen": 0.7106360197067261, "logits/rejected": 0.6917645335197449, "logps/chosen": -2.596177101135254, "logps/rejected": -4.968076229095459, "loss": 0.6187, "nll_loss": 0.59628826379776, "rewards/accuracies": 0.875, "rewards/chosen": -0.25961771607398987, "rewards/margins": 0.23718993365764618, "rewards/rejected": -0.49680763483047485, "step": 7294 }, { "epoch": 19.972621492128678, "grad_norm": 5.753918647766113, "learning_rate": 1.232876712328767e-09, "log_odds_chosen": 2.338796377182007, "log_odds_ratio": -0.21318301558494568, "logits/chosen": 0.5570317506790161, "logits/rejected": 0.5916208028793335, "logps/chosen": -1.8657875061035156, "logps/rejected": -4.076245307922363, "loss": 0.6263, "nll_loss": 0.604944109916687, "rewards/accuracies": 0.875, "rewards/chosen": -0.18657876551151276, "rewards/margins": 0.22104579210281372, "rewards/rejected": -0.40762460231781006, "step": 7295 }, { "epoch": 19.975359342915812, "grad_norm": 4.540257453918457, "learning_rate": 1.095890410958904e-09, "log_odds_chosen": 3.048471450805664, "log_odds_ratio": -0.178314670920372, "logits/chosen": 0.7571278810501099, "logits/rejected": 0.7367595434188843, "logps/chosen": -1.5042322874069214, "logps/rejected": -4.3459038734436035, "loss": 0.5792, "nll_loss": 0.5613313317298889, "rewards/accuracies": 1.0, "rewards/chosen": -0.15042322874069214, "rewards/margins": 0.28416717052459717, "rewards/rejected": -0.4345903992652893, "step": 7296 }, { "epoch": 19.978097193702943, "grad_norm": 7.113174915313721, "learning_rate": 9.589041095890411e-10, "log_odds_chosen": 2.8920915126800537, "log_odds_ratio": -0.13914310932159424, "logits/chosen": 0.867761492729187, "logits/rejected": 0.9268158674240112, "logps/chosen": -2.1304514408111572, "logps/rejected": -4.90423059463501, "loss": 0.5863, "nll_loss": 0.5723491907119751, "rewards/accuracies": 1.0, "rewards/chosen": -0.213045135140419, "rewards/margins": 0.2773779332637787, "rewards/rejected": -0.4904230833053589, "step": 7297 }, { "epoch": 19.980835044490075, "grad_norm": 5.692424774169922, "learning_rate": 8.219178082191781e-10, "log_odds_chosen": 2.508049249649048, "log_odds_ratio": -0.21150577068328857, "logits/chosen": 1.0514793395996094, "logits/rejected": 1.0667359828948975, "logps/chosen": -2.2318918704986572, "logps/rejected": -4.608162879943848, "loss": 0.5763, "nll_loss": 0.5551763772964478, "rewards/accuracies": 1.0, "rewards/chosen": -0.22318919003009796, "rewards/margins": 0.23762710392475128, "rewards/rejected": -0.46081629395484924, "step": 7298 }, { "epoch": 19.983572895277206, "grad_norm": 7.751111030578613, "learning_rate": 6.84931506849315e-10, "log_odds_chosen": 1.3390827178955078, "log_odds_ratio": -0.32986360788345337, "logits/chosen": 0.8430178761482239, "logits/rejected": 0.9344521760940552, "logps/chosen": -2.421731472015381, "logps/rejected": -3.6599924564361572, "loss": 0.6265, "nll_loss": 0.59354567527771, "rewards/accuracies": 0.875, "rewards/chosen": -0.24217312037944794, "rewards/margins": 0.12382614612579346, "rewards/rejected": -0.3659992814064026, "step": 7299 }, { "epoch": 19.98631074606434, "grad_norm": 7.644182205200195, "learning_rate": 5.47945205479452e-10, "log_odds_chosen": 0.9751604795455933, "log_odds_ratio": -0.42672276496887207, "logits/chosen": 0.9561497569084167, "logits/rejected": 0.8889648914337158, "logps/chosen": -2.815711498260498, "logps/rejected": -3.721054792404175, "loss": 0.7505, "nll_loss": 0.707865834236145, "rewards/accuracies": 0.75, "rewards/chosen": -0.2815711498260498, "rewards/margins": 0.09053432941436768, "rewards/rejected": -0.3721054792404175, "step": 7300 } ], "logging_steps": 1, "max_steps": 7300, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }