{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 1, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "grad_norm": 18.625, "learning_rate": 7.599999999999999e-06, "log_odds_chosen": 0.3293280005455017, "log_odds_ratio": -0.545608401298523, "logits/chosen": -0.22181883454322815, "logits/rejected": -0.2948111891746521, "logps/chosen": -1.9494528770446777, "logps/rejected": -2.2380290031433105, "loss": 1.8976, "nll_loss": 1.8430625200271606, "rewards/accuracies": 0.984375, "rewards/chosen": -0.1949452906847, "rewards/margins": 0.028857626020908356, "rewards/rejected": -0.22380293905735016, "step": 1 }, { "epoch": 0.4, "eval_log_odds_chosen": 0.3650469183921814, "eval_log_odds_ratio": -0.5312943458557129, "eval_logits/chosen": -0.11938808858394623, "eval_logits/rejected": -0.15210816264152527, "eval_logps/chosen": -1.7196087837219238, "eval_logps/rejected": -2.026923179626465, "eval_loss": 1.6382209062576294, "eval_nll_loss": 1.585091471672058, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.17196084558963776, "eval_rewards/margins": 0.030731473118066788, "eval_rewards/rejected": -0.20269232988357544, "eval_runtime": 0.9093, "eval_samples_per_second": 19.795, "eval_steps_per_second": 9.898, "step": 1 }, { "epoch": 0.8, "grad_norm": 8.125, "learning_rate": 7.2e-06, "log_odds_chosen": 0.3380122482776642, "log_odds_ratio": -0.5430496335029602, "logits/chosen": -0.07896450906991959, "logits/rejected": -0.11844252794981003, "logps/chosen": -1.65217125415802, "logps/rejected": -1.9326075315475464, "loss": 1.5573, "nll_loss": 1.5030204057693481, "rewards/accuracies": 0.96875, "rewards/chosen": -0.16521713137626648, "rewards/margins": 0.02804364264011383, "rewards/rejected": -0.19326075911521912, "step": 2 }, { "epoch": 0.8, "eval_log_odds_chosen": 0.39069631695747375, "eval_log_odds_ratio": -0.5206953883171082, "eval_logits/chosen": -0.10498537868261337, "eval_logits/rejected": -0.136393204331398, "eval_logps/chosen": -1.5854017734527588, "eval_logps/rejected": -1.9058791399002075, "eval_loss": 1.5333527326583862, "eval_nll_loss": 1.4812833070755005, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.15854017436504364, "eval_rewards/margins": 0.032047729939222336, "eval_rewards/rejected": -0.19058789312839508, "eval_runtime": 0.9117, "eval_samples_per_second": 19.744, "eval_steps_per_second": 9.872, "step": 2 }, { "epoch": 1.2, "grad_norm": 7.875, "learning_rate": 6.799999999999999e-06, "log_odds_chosen": 0.36618566513061523, "log_odds_ratio": -0.5316208004951477, "logits/chosen": -0.045309849083423615, "logits/rejected": -0.09037788212299347, "logps/chosen": -1.4718542098999023, "logps/rejected": -1.7647374868392944, "loss": 1.4427, "nll_loss": 1.3894941806793213, "rewards/accuracies": 0.984375, "rewards/chosen": -0.14718542993068695, "rewards/margins": 0.029288342222571373, "rewards/rejected": -0.17647376656532288, "step": 3 }, { "epoch": 1.2, "eval_log_odds_chosen": 0.41840454936027527, "eval_log_odds_ratio": -0.5097466111183167, "eval_logits/chosen": -0.10623180121183395, "eval_logits/rejected": -0.13603489100933075, "eval_logps/chosen": -1.4396543502807617, "eval_logps/rejected": -1.7711676359176636, "eval_loss": 1.4337514638900757, "eval_nll_loss": 1.3827767372131348, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.1439654380083084, "eval_rewards/margins": 0.03315134346485138, "eval_rewards/rejected": -0.1771167814731598, "eval_runtime": 0.9101, "eval_samples_per_second": 19.778, "eval_steps_per_second": 9.889, "step": 3 }, { "epoch": 1.6, "grad_norm": 7.78125, "learning_rate": 6.4e-06, "log_odds_chosen": 0.4042999744415283, "log_odds_ratio": -0.5167055726051331, "logits/chosen": -0.0431833378970623, "logits/rejected": -0.07951641082763672, "logps/chosen": -1.3549946546554565, "logps/rejected": -1.6684811115264893, "loss": 1.3493, "nll_loss": 1.2975877523422241, "rewards/accuracies": 0.984375, "rewards/chosen": -0.13549946248531342, "rewards/margins": 0.03134865313768387, "rewards/rejected": -0.1668480932712555, "step": 4 }, { "epoch": 1.6, "eval_log_odds_chosen": 0.44134366512298584, "eval_log_odds_ratio": -0.5008935332298279, "eval_logits/chosen": -0.104909747838974, "eval_logits/rejected": -0.13491111993789673, "eval_logps/chosen": -1.340553641319275, "eval_logps/rejected": -1.6810719966888428, "eval_loss": 1.3429497480392456, "eval_nll_loss": 1.292860507965088, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.13405534625053406, "eval_rewards/margins": 0.03405185043811798, "eval_rewards/rejected": -0.16810721158981323, "eval_runtime": 0.9119, "eval_samples_per_second": 19.738, "eval_steps_per_second": 9.869, "step": 4 }, { "epoch": 2.0, "grad_norm": 6.96875, "learning_rate": 6e-06, "log_odds_chosen": 0.4279482960700989, "log_odds_ratio": -0.5070147514343262, "logits/chosen": -0.034176260232925415, "logits/rejected": -0.07206660509109497, "logps/chosen": -1.2959173917770386, "logps/rejected": -1.6209981441497803, "loss": 1.2683, "nll_loss": 1.2175886631011963, "rewards/accuracies": 0.96875, "rewards/chosen": -0.12959173321723938, "rewards/margins": 0.03250807896256447, "rewards/rejected": -0.16209980845451355, "step": 5 }, { "epoch": 2.0, "eval_log_odds_chosen": 0.46249422430992126, "eval_log_odds_ratio": -0.49292659759521484, "eval_logits/chosen": -0.11007735878229141, "eval_logits/rejected": -0.14086602628231049, "eval_logps/chosen": -1.2711644172668457, "eval_logps/rejected": -1.6204769611358643, "eval_loss": 1.2642947435379028, "eval_nll_loss": 1.215002179145813, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.12711645662784576, "eval_rewards/margins": 0.034931257367134094, "eval_rewards/rejected": -0.16204769909381866, "eval_runtime": 0.9131, "eval_samples_per_second": 19.712, "eval_steps_per_second": 9.856, "step": 5 }, { "epoch": 2.4, "grad_norm": 6.65625, "learning_rate": 5.6e-06, "log_odds_chosen": 0.4413740336894989, "log_odds_ratio": -0.5025829672813416, "logits/chosen": -0.026908639818429947, "logits/rejected": -0.06323603540658951, "logps/chosen": -1.2035048007965088, "logps/rejected": -1.5302585363388062, "loss": 1.1736, "nll_loss": 1.1233787536621094, "rewards/accuracies": 0.96875, "rewards/chosen": -0.12035048753023148, "rewards/margins": 0.03267538174986839, "rewards/rejected": -0.15302586555480957, "step": 6 }, { "epoch": 2.4, "eval_log_odds_chosen": 0.4787115454673767, "eval_log_odds_ratio": -0.4869447946548462, "eval_logits/chosen": -0.11934076249599457, "eval_logits/rejected": -0.15017718076705933, "eval_logps/chosen": -1.2190965414047241, "eval_logps/rejected": -1.5743210315704346, "eval_loss": 1.1919617652893066, "eval_nll_loss": 1.1432671546936035, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.12190967053174973, "eval_rewards/margins": 0.03552243858575821, "eval_rewards/rejected": -0.15743210911750793, "eval_runtime": 0.9096, "eval_samples_per_second": 19.79, "eval_steps_per_second": 9.895, "step": 6 }, { "epoch": 2.8, "grad_norm": 6.96875, "learning_rate": 5.2e-06, "log_odds_chosen": 0.46652132272720337, "log_odds_ratio": -0.4945680797100067, "logits/chosen": -0.0492391437292099, "logits/rejected": -0.0838259607553482, "logps/chosen": -1.1840012073516846, "logps/rejected": -1.525024652481079, "loss": 1.1212, "nll_loss": 1.0717414617538452, "rewards/accuracies": 0.984375, "rewards/chosen": -0.11840011179447174, "rewards/margins": 0.03410235792398453, "rewards/rejected": -0.15250247716903687, "step": 7 }, { "epoch": 2.8, "eval_log_odds_chosen": 0.4977823495864868, "eval_log_odds_ratio": -0.4801054000854492, "eval_logits/chosen": -0.12862297892570496, "eval_logits/rejected": -0.16006678342819214, "eval_logps/chosen": -1.1753649711608887, "eval_logps/rejected": -1.5391558408737183, "eval_loss": 1.1233677864074707, "eval_nll_loss": 1.0753573179244995, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.1175365075469017, "eval_rewards/margins": 0.03637908399105072, "eval_rewards/rejected": -0.15391558408737183, "eval_runtime": 0.9109, "eval_samples_per_second": 19.76, "eval_steps_per_second": 9.88, "step": 7 }, { "epoch": 3.2, "grad_norm": 6.84375, "learning_rate": 4.8e-06, "log_odds_chosen": 0.4919642210006714, "log_odds_ratio": -0.48277872800827026, "logits/chosen": -0.05812246352434158, "logits/rejected": -0.0962948203086853, "logps/chosen": -1.1183116436004639, "logps/rejected": -1.4710171222686768, "loss": 1.0518, "nll_loss": 1.0035254955291748, "rewards/accuracies": 0.984375, "rewards/chosen": -0.11183115839958191, "rewards/margins": 0.03527054935693741, "rewards/rejected": -0.14710170030593872, "step": 8 }, { "epoch": 3.2, "eval_log_odds_chosen": 0.5149489045143127, "eval_log_odds_ratio": -0.47412246465682983, "eval_logits/chosen": -0.14218762516975403, "eval_logits/rejected": -0.17368356883525848, "eval_logps/chosen": -1.1381663084030151, "eval_logps/rejected": -1.5092874765396118, "eval_loss": 1.0610299110412598, "eval_nll_loss": 1.013617753982544, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11381663382053375, "eval_rewards/margins": 0.03711211308836937, "eval_rewards/rejected": -0.15092875063419342, "eval_runtime": 0.9136, "eval_samples_per_second": 19.703, "eval_steps_per_second": 9.852, "step": 8 }, { "epoch": 3.6, "grad_norm": 6.71875, "learning_rate": 4.4e-06, "log_odds_chosen": 0.5135414004325867, "log_odds_ratio": -0.47593823075294495, "logits/chosen": -0.06834974884986877, "logits/rejected": -0.11046632379293442, "logps/chosen": -1.0532824993133545, "logps/rejected": -1.4117248058319092, "loss": 0.9805, "nll_loss": 0.9329336285591125, "rewards/accuracies": 0.984375, "rewards/chosen": -0.1053282618522644, "rewards/margins": 0.035844214260578156, "rewards/rejected": -0.14117246866226196, "step": 9 }, { "epoch": 3.6, "eval_log_odds_chosen": 0.5320238471031189, "eval_log_odds_ratio": -0.4681590497493744, "eval_logits/chosen": -0.1644686907529831, "eval_logits/rejected": -0.19686466455459595, "eval_logps/chosen": -1.104932427406311, "eval_logps/rejected": -1.4836636781692505, "eval_loss": 1.0012433528900146, "eval_nll_loss": 0.9544275999069214, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.1104932650923729, "eval_rewards/margins": 0.03787311539053917, "eval_rewards/rejected": -0.14836637675762177, "eval_runtime": 0.9099, "eval_samples_per_second": 19.783, "eval_steps_per_second": 9.892, "step": 9 }, { "epoch": 4.0, "grad_norm": 6.75, "learning_rate": 4e-06, "log_odds_chosen": 0.5470705032348633, "log_odds_ratio": -0.4658868908882141, "logits/chosen": -0.09963471442461014, "logits/rejected": -0.13952209055423737, "logps/chosen": -1.0679322481155396, "logps/rejected": -1.4557496309280396, "loss": 0.9299, "nll_loss": 0.8832955956459045, "rewards/accuracies": 1.0, "rewards/chosen": -0.10679321736097336, "rewards/margins": 0.038781747221946716, "rewards/rejected": -0.14557495713233948, "step": 10 }, { "epoch": 4.0, "eval_log_odds_chosen": 0.547681450843811, "eval_log_odds_ratio": -0.46276018023490906, "eval_logits/chosen": -0.1875133067369461, "eval_logits/rejected": -0.22008682787418365, "eval_logps/chosen": -1.07937490940094, "eval_logps/rejected": -1.4652737379074097, "eval_loss": 0.9495540261268616, "eval_nll_loss": 0.9032779335975647, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10793750733137131, "eval_rewards/margins": 0.03858988359570503, "eval_rewards/rejected": -0.14652739465236664, "eval_runtime": 0.9118, "eval_samples_per_second": 19.742, "eval_steps_per_second": 9.871, "step": 10 }, { "epoch": 4.4, "grad_norm": 6.5625, "learning_rate": 3.6e-06, "log_odds_chosen": 0.5115205645561218, "log_odds_ratio": -0.4781632423400879, "logits/chosen": -0.11988667398691177, "logits/rejected": -0.15675179660320282, "logps/chosen": -1.0269582271575928, "logps/rejected": -1.3790360689163208, "loss": 0.8761, "nll_loss": 0.8282526135444641, "rewards/accuracies": 0.984375, "rewards/chosen": -0.10269583761692047, "rewards/margins": 0.03520777449011803, "rewards/rejected": -0.1379036009311676, "step": 11 }, { "epoch": 4.4, "eval_log_odds_chosen": 0.560771107673645, "eval_log_odds_ratio": -0.45837968587875366, "eval_logits/chosen": -0.21046772599220276, "eval_logits/rejected": -0.2431277632713318, "eval_logps/chosen": -1.0591222047805786, "eval_logps/rejected": -1.4509668350219727, "eval_loss": 0.9070050120353699, "eval_nll_loss": 0.8611669540405273, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10591220110654831, "eval_rewards/margins": 0.03918447345495224, "eval_rewards/rejected": -0.14509668946266174, "eval_runtime": 0.9119, "eval_samples_per_second": 19.739, "eval_steps_per_second": 9.869, "step": 11 }, { "epoch": 4.8, "grad_norm": 6.5625, "learning_rate": 3.2e-06, "log_odds_chosen": 0.6133227944374084, "log_odds_ratio": -0.4412252902984619, "logits/chosen": -0.14904728531837463, "logits/rejected": -0.1963028907775879, "logps/chosen": -0.9898001551628113, "logps/rejected": -1.4105660915374756, "loss": 0.8337, "nll_loss": 0.7895629405975342, "rewards/accuracies": 1.0, "rewards/chosen": -0.09898000955581665, "rewards/margins": 0.04207659140229225, "rewards/rejected": -0.1410566121339798, "step": 12 }, { "epoch": 4.8, "eval_log_odds_chosen": 0.566861093044281, "eval_log_odds_ratio": -0.45656245946884155, "eval_logits/chosen": -0.22315236926078796, "eval_logits/rejected": -0.2561546862125397, "eval_logps/chosen": -1.0492280721664429, "eval_logps/rejected": -1.443569302558899, "eval_loss": 0.8863641023635864, "eval_nll_loss": 0.8407078981399536, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.1049228087067604, "eval_rewards/margins": 0.03943413123488426, "eval_rewards/rejected": -0.14435693621635437, "eval_runtime": 0.9164, "eval_samples_per_second": 19.642, "eval_steps_per_second": 9.821, "step": 12 }, { "epoch": 5.2, "grad_norm": 6.5, "learning_rate": 2.8e-06, "log_odds_chosen": 0.49919161200523376, "log_odds_ratio": -0.4824044108390808, "logits/chosen": -0.15154853463172913, "logits/rejected": -0.18894340097904205, "logps/chosen": -1.0274688005447388, "logps/rejected": -1.3725156784057617, "loss": 0.7975, "nll_loss": 0.7492961883544922, "rewards/accuracies": 0.953125, "rewards/chosen": -0.1027468740940094, "rewards/margins": 0.034504685550928116, "rewards/rejected": -0.13725155591964722, "step": 13 }, { "epoch": 5.2, "eval_log_odds_chosen": 0.57403564453125, "eval_log_odds_ratio": -0.45407184958457947, "eval_logits/chosen": -0.23968791961669922, "eval_logits/rejected": -0.2726818025112152, "eval_logps/chosen": -1.0405869483947754, "eval_logps/rejected": -1.4385521411895752, "eval_loss": 0.8664105534553528, "eval_nll_loss": 0.8210033774375916, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10405868291854858, "eval_rewards/margins": 0.03979651629924774, "eval_rewards/rejected": -0.14385519921779633, "eval_runtime": 0.9125, "eval_samples_per_second": 19.726, "eval_steps_per_second": 9.863, "step": 13 }, { "epoch": 5.6, "grad_norm": 6.40625, "learning_rate": 2.4e-06, "log_odds_chosen": 0.6314184665679932, "log_odds_ratio": -0.4354270100593567, "logits/chosen": -0.1857105791568756, "logits/rejected": -0.22656874358654022, "logps/chosen": -0.9448862075805664, "logps/rejected": -1.3670051097869873, "loss": 0.788, "nll_loss": 0.7444556951522827, "rewards/accuracies": 1.0, "rewards/chosen": -0.09448862820863724, "rewards/margins": 0.04221189767122269, "rewards/rejected": -0.13670052587985992, "step": 14 }, { "epoch": 5.6, "eval_log_odds_chosen": 0.5794126987457275, "eval_log_odds_ratio": -0.4523561894893646, "eval_logits/chosen": -0.2507224678993225, "eval_logits/rejected": -0.2837482690811157, "eval_logps/chosen": -1.0325850248336792, "eval_logps/rejected": -1.4329302310943604, "eval_loss": 0.8492475152015686, "eval_nll_loss": 0.8040118217468262, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10325851291418076, "eval_rewards/margins": 0.04003452509641647, "eval_rewards/rejected": -0.14329302310943604, "eval_runtime": 0.9113, "eval_samples_per_second": 19.752, "eval_steps_per_second": 9.876, "step": 14 }, { "epoch": 6.0, "grad_norm": 6.25, "learning_rate": 2e-06, "log_odds_chosen": 0.5837588906288147, "log_odds_ratio": -0.45432496070861816, "logits/chosen": -0.18676218390464783, "logits/rejected": -0.23436766862869263, "logps/chosen": -1.0190256834030151, "logps/rejected": -1.4252986907958984, "loss": 0.78, "nll_loss": 0.7345477342605591, "rewards/accuracies": 0.984375, "rewards/chosen": -0.1019025593996048, "rewards/margins": 0.040627315640449524, "rewards/rejected": -0.14252987504005432, "step": 15 }, { "epoch": 6.0, "eval_log_odds_chosen": 0.5839331150054932, "eval_log_odds_ratio": -0.45080792903900146, "eval_logits/chosen": -0.26143890619277954, "eval_logits/rejected": -0.29441285133361816, "eval_logps/chosen": -1.0263959169387817, "eval_logps/rejected": -1.4286550283432007, "eval_loss": 0.8333800435066223, "eval_nll_loss": 0.7882992625236511, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10263960063457489, "eval_rewards/margins": 0.040225885808467865, "eval_rewards/rejected": -0.14286547899246216, "eval_runtime": 0.9128, "eval_samples_per_second": 19.72, "eval_steps_per_second": 9.86, "step": 15 }, { "epoch": 6.4, "grad_norm": 6.25, "learning_rate": 1.6e-06, "log_odds_chosen": 0.6216727495193481, "log_odds_ratio": -0.4399999976158142, "logits/chosen": -0.19661211967468262, "logits/rejected": -0.24193710088729858, "logps/chosen": -0.949080228805542, "logps/rejected": -1.3682817220687866, "loss": 0.7395, "nll_loss": 0.6955283880233765, "rewards/accuracies": 1.0, "rewards/chosen": -0.09490802884101868, "rewards/margins": 0.04192016273736954, "rewards/rejected": -0.13682818412780762, "step": 16 }, { "epoch": 6.4, "eval_log_odds_chosen": 0.5865243673324585, "eval_log_odds_ratio": -0.44995343685150146, "eval_logits/chosen": -0.2722480595111847, "eval_logits/rejected": -0.30540305376052856, "eval_logps/chosen": -1.0213567018508911, "eval_logps/rejected": -1.4244111776351929, "eval_loss": 0.821089506149292, "eval_nll_loss": 0.7760941386222839, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10213566571474075, "eval_rewards/margins": 0.040305450558662415, "eval_rewards/rejected": -0.14244110882282257, "eval_runtime": 0.9109, "eval_samples_per_second": 19.76, "eval_steps_per_second": 9.88, "step": 16 }, { "epoch": 6.8, "grad_norm": 6.15625, "learning_rate": 1.2e-06, "log_odds_chosen": 0.5441011786460876, "log_odds_ratio": -0.4661071002483368, "logits/chosen": -0.20644214749336243, "logits/rejected": -0.2489599585533142, "logps/chosen": -1.002626657485962, "logps/rejected": -1.3740018606185913, "loss": 0.7446, "nll_loss": 0.6980130076408386, "rewards/accuracies": 0.96875, "rewards/chosen": -0.10026266425848007, "rewards/margins": 0.037137530744075775, "rewards/rejected": -0.13740019500255585, "step": 17 }, { "epoch": 6.8, "eval_log_odds_chosen": 0.5888115763664246, "eval_log_odds_ratio": -0.44922754168510437, "eval_logits/chosen": -0.2722025513648987, "eval_logits/rejected": -0.30542224645614624, "eval_logps/chosen": -1.0186693668365479, "eval_logps/rejected": -1.4229329824447632, "eval_loss": 0.8164036870002747, "eval_nll_loss": 0.7714808583259583, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10186693072319031, "eval_rewards/margins": 0.040426358580589294, "eval_rewards/rejected": -0.1422932893037796, "eval_runtime": 0.9099, "eval_samples_per_second": 19.782, "eval_steps_per_second": 9.891, "step": 17 }, { "epoch": 7.2, "grad_norm": 6.125, "learning_rate": 8e-07, "log_odds_chosen": 0.603852391242981, "log_odds_ratio": -0.44559258222579956, "logits/chosen": -0.21281519532203674, "logits/rejected": -0.2561994791030884, "logps/chosen": -0.9362643957138062, "logps/rejected": -1.3377454280853271, "loss": 0.7518, "nll_loss": 0.7072104811668396, "rewards/accuracies": 1.0, "rewards/chosen": -0.09362644702196121, "rewards/margins": 0.040148116648197174, "rewards/rejected": -0.1337745636701584, "step": 18 }, { "epoch": 7.2, "eval_log_odds_chosen": 0.5903448462486267, "eval_log_odds_ratio": -0.4487246870994568, "eval_logits/chosen": -0.2774750292301178, "eval_logits/rejected": -0.3105829358100891, "eval_logps/chosen": -1.0175344944000244, "eval_logps/rejected": -1.4226274490356445, "eval_loss": 0.812524139881134, "eval_nll_loss": 0.7676517963409424, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10175344347953796, "eval_rewards/margins": 0.040509287267923355, "eval_rewards/rejected": -0.1422627568244934, "eval_runtime": 0.9145, "eval_samples_per_second": 19.683, "eval_steps_per_second": 9.841, "step": 18 }, { "epoch": 7.6, "grad_norm": 6.21875, "learning_rate": 4e-07, "log_odds_chosen": 0.6179953217506409, "log_odds_ratio": -0.4413047730922699, "logits/chosen": -0.22949087619781494, "logits/rejected": -0.27671945095062256, "logps/chosen": -0.9877333045005798, "logps/rejected": -1.4103630781173706, "loss": 0.7431, "nll_loss": 0.6990159749984741, "rewards/accuracies": 0.984375, "rewards/chosen": -0.09877333790063858, "rewards/margins": 0.0422629676759243, "rewards/rejected": -0.14103631675243378, "step": 19 }, { "epoch": 7.6, "eval_log_odds_chosen": 0.5911502838134766, "eval_log_odds_ratio": -0.44839149713516235, "eval_logits/chosen": -0.276792049407959, "eval_logits/rejected": -0.31038832664489746, "eval_logps/chosen": -1.0161813497543335, "eval_logps/rejected": -1.4216675758361816, "eval_loss": 0.8106683492660522, "eval_nll_loss": 0.7658291459083557, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10161812603473663, "eval_rewards/margins": 0.04054862633347511, "eval_rewards/rejected": -0.14216677844524384, "eval_runtime": 0.9137, "eval_samples_per_second": 19.701, "eval_steps_per_second": 9.85, "step": 19 }, { "epoch": 8.0, "grad_norm": 6.15625, "learning_rate": 0.0, "log_odds_chosen": 0.578275740146637, "log_odds_ratio": -0.45558279752731323, "logits/chosen": -0.210488423705101, "logits/rejected": -0.24888469278812408, "logps/chosen": -0.9616943001747131, "logps/rejected": -1.3508220911026, "loss": 0.726, "nll_loss": 0.6804530620574951, "rewards/accuracies": 0.984375, "rewards/chosen": -0.09616944193840027, "rewards/margins": 0.038912780582904816, "rewards/rejected": -0.1350822150707245, "step": 20 }, { "epoch": 8.0, "eval_log_odds_chosen": 0.5904660224914551, "eval_log_odds_ratio": -0.44868627190589905, "eval_logits/chosen": -0.27975308895111084, "eval_logits/rejected": -0.3132215440273285, "eval_logps/chosen": -1.0163097381591797, "eval_logps/rejected": -1.4213618040084839, "eval_loss": 0.8110137581825256, "eval_nll_loss": 0.7661450505256653, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10163097083568573, "eval_rewards/margins": 0.04050520807504654, "eval_rewards/rejected": -0.14213618636131287, "eval_runtime": 0.9114, "eval_samples_per_second": 19.75, "eval_steps_per_second": 9.875, "step": 20 }, { "epoch": 8.0, "step": 20, "total_flos": 0.0, "train_loss": 1.0276277005672454, "train_runtime": 261.791, "train_samples_per_second": 6.074, "train_steps_per_second": 0.076 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }