{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997038791827065, "eval_steps": 500, "global_step": 1688, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005922416345869114, "grad_norm": 25.375, "learning_rate": 2.9585798816568044e-08, "log_odds_chosen": -0.4997142255306244, "log_odds_ratio": -1.0621646642684937, "logits/chosen": -2.2295050621032715, "logits/rejected": -2.215860366821289, "logps/chosen": -0.7159513235092163, "logps/rejected": -0.47170203924179077, "loss": 1.2686, "nll_loss": 1.285839319229126, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 10 }, { "epoch": 0.011844832691738229, "grad_norm": 25.25, "learning_rate": 5.917159763313609e-08, "log_odds_chosen": -0.6078722476959229, "log_odds_ratio": -1.1548207998275757, "logits/chosen": -2.1872293949127197, "logits/rejected": -2.1639022827148438, "logps/chosen": -0.8250460624694824, "logps/rejected": -0.4715689718723297, "loss": 1.2301, "nll_loss": 1.2283066511154175, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 20 }, { "epoch": 0.017767249037607343, "grad_norm": 27.125, "learning_rate": 8.875739644970414e-08, "log_odds_chosen": -0.5964034199714661, "log_odds_ratio": -1.1720728874206543, "logits/chosen": -2.155057191848755, "logits/rejected": -2.146630048751831, "logps/chosen": -0.8543933033943176, "logps/rejected": -0.4923427104949951, "loss": 1.2398, "nll_loss": 1.313323736190796, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 30 }, { "epoch": 0.023689665383476458, "grad_norm": 28.25, "learning_rate": 1.1834319526627217e-07, "log_odds_chosen": -0.5351605415344238, "log_odds_ratio": -1.0927046537399292, "logits/chosen": -2.2190463542938232, "logits/rejected": -2.206223964691162, "logps/chosen": -0.7575310468673706, "logps/rejected": -0.4635254740715027, "loss": 1.2735, "nll_loss": 1.2356092929840088, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 40 }, { "epoch": 0.029612081729345572, "grad_norm": 26.0, "learning_rate": 1.4792899408284022e-07, "log_odds_chosen": -0.4550475478172302, "log_odds_ratio": -1.0262255668640137, "logits/chosen": -2.163825750350952, "logits/rejected": -2.148223400115967, "logps/chosen": -0.7005314826965332, "logps/rejected": -0.47106480598449707, "loss": 1.2103, "nll_loss": 1.2403192520141602, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 50 }, { "epoch": 0.035534498075214686, "grad_norm": 21.375, "learning_rate": 1.7751479289940827e-07, "log_odds_chosen": -0.6598173975944519, "log_odds_ratio": -1.2315865755081177, "logits/chosen": -2.2192461490631104, "logits/rejected": -2.1879701614379883, "logps/chosen": -0.8897407650947571, "logps/rejected": -0.4609861969947815, "loss": 1.2416, "nll_loss": 1.2300336360931396, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 60 }, { "epoch": 0.041456914421083804, "grad_norm": 27.375, "learning_rate": 2.0710059171597633e-07, "log_odds_chosen": -0.5370969772338867, "log_odds_ratio": -1.1101651191711426, "logits/chosen": -2.233755588531494, "logits/rejected": -2.201343297958374, "logps/chosen": -0.7967244386672974, "logps/rejected": -0.4630069136619568, "loss": 1.2546, "nll_loss": 1.2548679113388062, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 70 }, { "epoch": 0.047379330766952915, "grad_norm": 26.625, "learning_rate": 2.3668639053254435e-07, "log_odds_chosen": -0.5750253796577454, "log_odds_ratio": -1.1556330919265747, "logits/chosen": -2.19846773147583, "logits/rejected": -2.187711715698242, "logps/chosen": -0.7946293950080872, "logps/rejected": -0.4594718813896179, "loss": 1.2238, "nll_loss": 1.2166999578475952, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 80 }, { "epoch": 0.05330174711282203, "grad_norm": 22.375, "learning_rate": 2.662721893491124e-07, "log_odds_chosen": -0.502492368221283, "log_odds_ratio": -1.0737704038619995, "logits/chosen": -2.18656063079834, "logits/rejected": -2.1636054515838623, "logps/chosen": -0.7198958992958069, "logps/rejected": -0.4653542935848236, "loss": 1.1987, "nll_loss": 1.2203375101089478, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 90 }, { "epoch": 0.059224163458691144, "grad_norm": 22.375, "learning_rate": 2.9585798816568045e-07, "log_odds_chosen": -0.41660839319229126, "log_odds_ratio": -0.9962056279182434, "logits/chosen": -2.247572422027588, "logits/rejected": -2.2023332118988037, "logps/chosen": -0.6892199516296387, "logps/rejected": -0.4913715422153473, "loss": 1.1737, "nll_loss": 1.2142550945281982, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 100 }, { "epoch": 0.06514657980456026, "grad_norm": 17.25, "learning_rate": 3.254437869822485e-07, "log_odds_chosen": -0.4817970395088196, "log_odds_ratio": -1.0484726428985596, "logits/chosen": -2.1959776878356934, "logits/rejected": -2.172440767288208, "logps/chosen": -0.7387205958366394, "logps/rejected": -0.47952842712402344, "loss": 1.1196, "nll_loss": 1.1109485626220703, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 110 }, { "epoch": 0.07106899615042937, "grad_norm": 17.75, "learning_rate": 3.5502958579881655e-07, "log_odds_chosen": -0.5072614550590515, "log_odds_ratio": -1.073188066482544, "logits/chosen": -2.2234084606170654, "logits/rejected": -2.212110996246338, "logps/chosen": -0.7518635988235474, "logps/rejected": -0.4725222587585449, "loss": 1.1538, "nll_loss": 1.1456319093704224, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 120 }, { "epoch": 0.07699141249629848, "grad_norm": 22.875, "learning_rate": 3.8461538461538463e-07, "log_odds_chosen": -0.6446342468261719, "log_odds_ratio": -1.2178680896759033, "logits/chosen": -2.2080233097076416, "logits/rejected": -2.1998672485351562, "logps/chosen": -0.8730036020278931, "logps/rejected": -0.4481457769870758, "loss": 1.1644, "nll_loss": 1.1509124040603638, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 130 }, { "epoch": 0.08291382884216761, "grad_norm": 16.25, "learning_rate": 4.1420118343195265e-07, "log_odds_chosen": -0.45662721991539, "log_odds_ratio": -1.0147430896759033, "logits/chosen": -2.237990617752075, "logits/rejected": -2.2128589153289795, "logps/chosen": -0.6646671295166016, "logps/rejected": -0.44373393058776855, "loss": 1.0906, "nll_loss": 1.0673267841339111, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 140 }, { "epoch": 0.08883624518803672, "grad_norm": 26.5, "learning_rate": 4.437869822485207e-07, "log_odds_chosen": -0.46678367257118225, "log_odds_ratio": -1.0147194862365723, "logits/chosen": -2.167670488357544, "logits/rejected": -2.1592793464660645, "logps/chosen": -0.7025789618492126, "logps/rejected": -0.47203493118286133, "loss": 1.1008, "nll_loss": 1.1650502681732178, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 150 }, { "epoch": 0.09475866153390583, "grad_norm": 12.0625, "learning_rate": 4.733727810650887e-07, "log_odds_chosen": -0.3221941888332367, "log_odds_ratio": -0.9352226257324219, "logits/chosen": -2.247824192047119, "logits/rejected": -2.2287344932556152, "logps/chosen": -0.6016725301742554, "logps/rejected": -0.4531864523887634, "loss": 1.0947, "nll_loss": 1.0781590938568115, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 160 }, { "epoch": 0.10068107787977496, "grad_norm": 11.5625, "learning_rate": 4.999994653198566e-07, "log_odds_chosen": -0.4564700722694397, "log_odds_ratio": -1.0602452754974365, "logits/chosen": -2.2789835929870605, "logits/rejected": -2.2523741722106934, "logps/chosen": -0.7461049556732178, "logps/rejected": -0.48730534315109253, "loss": 1.1159, "nll_loss": 1.0658115148544312, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 170 }, { "epoch": 0.10660349422564407, "grad_norm": 8.9375, "learning_rate": 4.999353064699471e-07, "log_odds_chosen": -0.5452951192855835, "log_odds_ratio": -1.1454532146453857, "logits/chosen": -2.237121820449829, "logits/rejected": -2.202718496322632, "logps/chosen": -0.827674388885498, "logps/rejected": -0.49005183577537537, "loss": 0.9748, "nll_loss": 1.0014435052871704, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 180 }, { "epoch": 0.11252591057151318, "grad_norm": 10.375, "learning_rate": 4.99764243036258e-07, "log_odds_chosen": -0.4207037091255188, "log_odds_ratio": -0.9974331855773926, "logits/chosen": -2.27175235748291, "logits/rejected": -2.242116689682007, "logps/chosen": -0.6407202482223511, "logps/rejected": -0.4429788589477539, "loss": 1.0095, "nll_loss": 1.017865777015686, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 190 }, { "epoch": 0.11844832691738229, "grad_norm": 9.5, "learning_rate": 4.994863481875841e-07, "log_odds_chosen": -0.4031923711299896, "log_odds_ratio": -0.973800003528595, "logits/chosen": -2.221717119216919, "logits/rejected": -2.18719482421875, "logps/chosen": -0.6306296586990356, "logps/rejected": -0.43233147263526917, "loss": 1.0045, "nll_loss": 0.9697571992874146, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 200 }, { "epoch": 0.12437074326325141, "grad_norm": 9.8125, "learning_rate": 4.991017407876165e-07, "log_odds_chosen": -0.4411424994468689, "log_odds_ratio": -1.0120642185211182, "logits/chosen": -2.238583564758301, "logits/rejected": -2.1919620037078857, "logps/chosen": -0.7006498575210571, "logps/rejected": -0.4852658808231354, "loss": 0.9832, "nll_loss": 1.0057976245880127, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 210 }, { "epoch": 0.13029315960912052, "grad_norm": 9.0, "learning_rate": 4.98610585344102e-07, "log_odds_chosen": -0.25588923692703247, "log_odds_ratio": -0.9158498048782349, "logits/chosen": -2.258283853530884, "logits/rejected": -2.2223126888275146, "logps/chosen": -0.5977104306221008, "logps/rejected": -0.4761990010738373, "loss": 1.02, "nll_loss": 1.0466753244400024, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 220 }, { "epoch": 0.13621557595498965, "grad_norm": 8.4375, "learning_rate": 4.980130919384768e-07, "log_odds_chosen": -0.5824810266494751, "log_odds_ratio": -1.1220190525054932, "logits/chosen": -2.2531580924987793, "logits/rejected": -2.2409615516662598, "logps/chosen": -0.7504315972328186, "logps/rejected": -0.43684881925582886, "loss": 1.0183, "nll_loss": 1.0061722993850708, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 230 }, { "epoch": 0.14213799230085875, "grad_norm": 8.6875, "learning_rate": 4.973095161360105e-07, "log_odds_chosen": -0.44555410742759705, "log_odds_ratio": -1.0208032131195068, "logits/chosen": -2.2470836639404297, "logits/rejected": -2.214434862136841, "logps/chosen": -0.6731461882591248, "logps/rejected": -0.4670758843421936, "loss": 1.0354, "nll_loss": 1.0512316226959229, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 240 }, { "epoch": 0.14806040864672787, "grad_norm": 8.875, "learning_rate": 4.965001588764913e-07, "log_odds_chosen": -0.4621347486972809, "log_odds_ratio": -1.0333962440490723, "logits/chosen": -2.274649143218994, "logits/rejected": -2.241596221923828, "logps/chosen": -0.6809024214744568, "logps/rejected": -0.4347941279411316, "loss": 1.0076, "nll_loss": 1.007010817527771, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 250 }, { "epoch": 0.15398282499259697, "grad_norm": 8.8125, "learning_rate": 4.955853663455072e-07, "log_odds_chosen": -0.3350891172885895, "log_odds_ratio": -0.9613872766494751, "logits/chosen": -2.260413885116577, "logits/rejected": -2.2278614044189453, "logps/chosen": -0.6426165699958801, "logps/rejected": -0.45530933141708374, "loss": 0.9607, "nll_loss": 0.9523956179618835, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 260 }, { "epoch": 0.1599052413384661, "grad_norm": 9.125, "learning_rate": 4.945655298263713e-07, "log_odds_chosen": -0.4467865824699402, "log_odds_ratio": -1.0078147649765015, "logits/chosen": -2.2099037170410156, "logits/rejected": -2.183701992034912, "logps/chosen": -0.6576748490333557, "logps/rejected": -0.4429934620857239, "loss": 1.0429, "nll_loss": 1.075627326965332, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 270 }, { "epoch": 0.16582765768433522, "grad_norm": 7.96875, "learning_rate": 4.934410855327585e-07, "log_odds_chosen": -0.38402479887008667, "log_odds_ratio": -0.9679163098335266, "logits/chosen": -2.292367458343506, "logits/rejected": -2.2721431255340576, "logps/chosen": -0.6379308104515076, "logps/rejected": -0.442401647567749, "loss": 0.9621, "nll_loss": 1.016234278678894, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 280 }, { "epoch": 0.1717500740302043, "grad_norm": 9.0, "learning_rate": 4.922125144221252e-07, "log_odds_chosen": -0.4171718955039978, "log_odds_ratio": -0.9991844296455383, "logits/chosen": -2.259284257888794, "logits/rejected": -2.205514430999756, "logps/chosen": -0.6329622268676758, "logps/rejected": -0.43889325857162476, "loss": 1.014, "nll_loss": 1.0359452962875366, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 290 }, { "epoch": 0.17767249037607344, "grad_norm": 8.125, "learning_rate": 4.90880341989989e-07, "log_odds_chosen": -0.33935636281967163, "log_odds_ratio": -0.9420417547225952, "logits/chosen": -2.2610156536102295, "logits/rejected": -2.2359061241149902, "logps/chosen": -0.6213563084602356, "logps/rejected": -0.44430437684059143, "loss": 0.9833, "nll_loss": 0.9867600202560425, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 300 }, { "epoch": 0.18359490672194256, "grad_norm": 8.375, "learning_rate": 4.894451380451589e-07, "log_odds_chosen": -0.5468162298202515, "log_odds_ratio": -1.0870132446289062, "logits/chosen": -2.241508722305298, "logits/rejected": -2.22690749168396, "logps/chosen": -0.7115592360496521, "logps/rejected": -0.43017569184303284, "loss": 1.0006, "nll_loss": 0.994620680809021, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 310 }, { "epoch": 0.18951732306781166, "grad_norm": 9.25, "learning_rate": 4.879075164660124e-07, "log_odds_chosen": -0.3401740491390228, "log_odds_ratio": -0.9383065104484558, "logits/chosen": -2.2438132762908936, "logits/rejected": -2.209188938140869, "logps/chosen": -0.5985551476478577, "logps/rejected": -0.43559733033180237, "loss": 0.94, "nll_loss": 0.9133344888687134, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 320 }, { "epoch": 0.19543973941368079, "grad_norm": 10.5, "learning_rate": 4.862681349379212e-07, "log_odds_chosen": -0.3794914484024048, "log_odds_ratio": -0.9728193283081055, "logits/chosen": -2.2533066272735596, "logits/rejected": -2.1980607509613037, "logps/chosen": -0.6138342022895813, "logps/rejected": -0.44097796082496643, "loss": 1.0041, "nll_loss": 1.0256803035736084, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 330 }, { "epoch": 0.2013621557595499, "grad_norm": 7.6875, "learning_rate": 4.8452769467194e-07, "log_odds_chosen": -0.40433868765830994, "log_odds_ratio": -0.9825445413589478, "logits/chosen": -2.2585511207580566, "logits/rejected": -2.233630657196045, "logps/chosen": -0.6160660982131958, "logps/rejected": -0.4248103201389313, "loss": 0.9778, "nll_loss": 0.9514611959457397, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 340 }, { "epoch": 0.207284572105419, "grad_norm": 7.625, "learning_rate": 4.82686940104879e-07, "log_odds_chosen": -0.4215853214263916, "log_odds_ratio": -1.01924729347229, "logits/chosen": -2.30430269241333, "logits/rejected": -2.272357702255249, "logps/chosen": -0.645369291305542, "logps/rejected": -0.4133967459201813, "loss": 0.9287, "nll_loss": 0.9160087704658508, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 350 }, { "epoch": 0.21320698845128813, "grad_norm": 8.125, "learning_rate": 4.807466585808856e-07, "log_odds_chosen": -0.3686332702636719, "log_odds_ratio": -0.9627587199211121, "logits/chosen": -2.282811403274536, "logits/rejected": -2.2714035511016846, "logps/chosen": -0.5806415677070618, "logps/rejected": -0.4163896143436432, "loss": 0.987, "nll_loss": 0.9767228960990906, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 360 }, { "epoch": 0.21912940479715723, "grad_norm": 8.3125, "learning_rate": 4.787076800146752e-07, "log_odds_chosen": -0.34714585542678833, "log_odds_ratio": -0.9853572845458984, "logits/chosen": -2.2601521015167236, "logits/rejected": -2.2084286212921143, "logps/chosen": -0.6458638906478882, "logps/rejected": -0.4349249005317688, "loss": 0.908, "nll_loss": 0.8895160555839539, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 370 }, { "epoch": 0.22505182114302635, "grad_norm": 8.1875, "learning_rate": 4.765708765365526e-07, "log_odds_chosen": -0.30534738302230835, "log_odds_ratio": -0.9373781085014343, "logits/chosen": -2.2653586864471436, "logits/rejected": -2.254210948944092, "logps/chosen": -0.576322615146637, "logps/rejected": -0.4294815957546234, "loss": 1.0005, "nll_loss": 0.958886981010437, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 380 }, { "epoch": 0.23097423748889548, "grad_norm": 9.875, "learning_rate": 4.7433716211937587e-07, "log_odds_chosen": -0.5105515122413635, "log_odds_ratio": -1.0566070079803467, "logits/chosen": -2.328101396560669, "logits/rejected": -2.302281141281128, "logps/chosen": -0.6501199007034302, "logps/rejected": -0.404310941696167, "loss": 0.9396, "nll_loss": 0.9967532157897949, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 390 }, { "epoch": 0.23689665383476458, "grad_norm": 7.8125, "learning_rate": 4.720074921876245e-07, "log_odds_chosen": -0.45067232847213745, "log_odds_ratio": -1.0197547674179077, "logits/chosen": -2.340407133102417, "logits/rejected": -2.293402910232544, "logps/chosen": -0.6130900382995605, "logps/rejected": -0.41540417075157166, "loss": 0.9442, "nll_loss": 0.9423254132270813, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 400 }, { "epoch": 0.2428190701806337, "grad_norm": 8.0625, "learning_rate": 4.6958286320873593e-07, "log_odds_chosen": -0.43627676367759705, "log_odds_ratio": -0.9863921403884888, "logits/chosen": -2.2813560962677, "logits/rejected": -2.275886058807373, "logps/chosen": -0.6022886633872986, "logps/rejected": -0.4021386504173279, "loss": 0.9658, "nll_loss": 0.9948114156723022, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 410 }, { "epoch": 0.24874148652650283, "grad_norm": 8.3125, "learning_rate": 4.6706431226688804e-07, "log_odds_chosen": -0.3637348413467407, "log_odds_ratio": -0.9635465741157532, "logits/chosen": -2.2663254737854004, "logits/rejected": -2.2325570583343506, "logps/chosen": -0.6079740524291992, "logps/rejected": -0.42877498269081116, "loss": 0.971, "nll_loss": 0.9684462547302246, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 420 }, { "epoch": 0.25466390287237195, "grad_norm": 7.71875, "learning_rate": 4.6445291661940777e-07, "log_odds_chosen": -0.29998743534088135, "log_odds_ratio": -0.9151178598403931, "logits/chosen": -2.288652181625366, "logits/rejected": -2.28438138961792, "logps/chosen": -0.5727067589759827, "logps/rejected": -0.43537649512290955, "loss": 0.9344, "nll_loss": 0.8895971179008484, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 430 }, { "epoch": 0.26058631921824105, "grad_norm": 9.6875, "learning_rate": 4.6174979323599715e-07, "log_odds_chosen": -0.5159381031990051, "log_odds_ratio": -1.0749253034591675, "logits/chosen": -2.2701315879821777, "logits/rejected": -2.2190845012664795, "logps/chosen": -0.7043232321739197, "logps/rejected": -0.4317665696144104, "loss": 0.9929, "nll_loss": 1.0871878862380981, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 440 }, { "epoch": 0.26650873556411014, "grad_norm": 7.9375, "learning_rate": 4.5895609832097277e-07, "log_odds_chosen": -0.38775309920310974, "log_odds_ratio": -1.0040466785430908, "logits/chosen": -2.2794992923736572, "logits/rejected": -2.2638792991638184, "logps/chosen": -0.6565039157867432, "logps/rejected": -0.43878334760665894, "loss": 0.9716, "nll_loss": 0.9555328488349915, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 450 }, { "epoch": 0.2724311519099793, "grad_norm": 8.25, "learning_rate": 4.560730268187236e-07, "log_odds_chosen": -0.3349025249481201, "log_odds_ratio": -0.9378219842910767, "logits/chosen": -2.282761812210083, "logits/rejected": -2.244011878967285, "logps/chosen": -0.5650533437728882, "logps/rejected": -0.4169080853462219, "loss": 0.9547, "nll_loss": 0.9367356300354004, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 460 }, { "epoch": 0.2783535682558484, "grad_norm": 8.9375, "learning_rate": 4.531018119025989e-07, "log_odds_chosen": -0.24693968892097473, "log_odds_ratio": -0.9230139851570129, "logits/chosen": -2.338200807571411, "logits/rejected": -2.3114407062530518, "logps/chosen": -0.5866008996963501, "logps/rejected": -0.498542845249176, "loss": 0.9863, "nll_loss": 1.0312178134918213, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 470 }, { "epoch": 0.2842759846017175, "grad_norm": 7.59375, "learning_rate": 4.5004372444744376e-07, "log_odds_chosen": -0.259705126285553, "log_odds_ratio": -0.9033578634262085, "logits/chosen": -2.281229257583618, "logits/rejected": -2.259384870529175, "logps/chosen": -0.6026913523674011, "logps/rejected": -0.46954187750816345, "loss": 0.9559, "nll_loss": 0.9717810750007629, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 480 }, { "epoch": 0.2901984009475866, "grad_norm": 9.0, "learning_rate": 4.4690007248600967e-07, "log_odds_chosen": -0.3773840069770813, "log_odds_ratio": -0.9825248718261719, "logits/chosen": -2.2721426486968994, "logits/rejected": -2.2558834552764893, "logps/chosen": -0.629915714263916, "logps/rejected": -0.43304410576820374, "loss": 0.954, "nll_loss": 0.9644275903701782, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 490 }, { "epoch": 0.29612081729345574, "grad_norm": 7.71875, "learning_rate": 4.436722006494701e-07, "log_odds_chosen": -0.5259193778038025, "log_odds_ratio": -1.1190059185028076, "logits/chosen": -2.266916275024414, "logits/rejected": -2.243081569671631, "logps/chosen": -0.7579408884048462, "logps/rejected": -0.4302619397640228, "loss": 0.9695, "nll_loss": 0.9956067204475403, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 500 }, { "epoch": 0.30204323363932484, "grad_norm": 8.5, "learning_rate": 4.4036148959228356e-07, "log_odds_chosen": -0.4430968165397644, "log_odds_ratio": -1.0375418663024902, "logits/chosen": -2.300400733947754, "logits/rejected": -2.2604432106018066, "logps/chosen": -0.6584800481796265, "logps/rejected": -0.4138873517513275, "loss": 0.9756, "nll_loss": 0.9368442296981812, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 510 }, { "epoch": 0.30796564998519393, "grad_norm": 9.4375, "learning_rate": 4.3696935540164705e-07, "log_odds_chosen": -0.3859555423259735, "log_odds_ratio": -0.9752845764160156, "logits/chosen": -2.2633957862854004, "logits/rejected": -2.2417874336242676, "logps/chosen": -0.6037057638168335, "logps/rejected": -0.41955527663230896, "loss": 0.9235, "nll_loss": 0.9441665410995483, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 520 }, { "epoch": 0.3138880663310631, "grad_norm": 7.71875, "learning_rate": 4.334972489917947e-07, "log_odds_chosen": -0.29654431343078613, "log_odds_ratio": -0.9258224368095398, "logits/chosen": -2.3264002799987793, "logits/rejected": -2.269259214401245, "logps/chosen": -0.5935055017471313, "logps/rejected": -0.4429333806037903, "loss": 0.9497, "nll_loss": 0.9263819456100464, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 530 }, { "epoch": 0.3198104826769322, "grad_norm": 9.0, "learning_rate": 4.299466554833997e-07, "log_odds_chosen": -0.400839239358902, "log_odds_ratio": -0.9843107461929321, "logits/chosen": -2.30580472946167, "logits/rejected": -2.256434440612793, "logps/chosen": -0.5819273591041565, "logps/rejected": -0.408183753490448, "loss": 0.9515, "nll_loss": 0.92247474193573, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 540 }, { "epoch": 0.3257328990228013, "grad_norm": 7.46875, "learning_rate": 4.263190935683449e-07, "log_odds_chosen": -0.32894009351730347, "log_odds_ratio": -0.9418984651565552, "logits/chosen": -2.282500743865967, "logits/rejected": -2.24668025970459, "logps/chosen": -0.5584912896156311, "logps/rejected": -0.4048989713191986, "loss": 0.8853, "nll_loss": 0.8602296113967896, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 550 }, { "epoch": 0.33165531536867043, "grad_norm": 9.0, "learning_rate": 4.2261611486013437e-07, "log_odds_chosen": -0.39398467540740967, "log_odds_ratio": -0.9864169955253601, "logits/chosen": -2.3277463912963867, "logits/rejected": -2.2908778190612793, "logps/chosen": -0.618613064289093, "logps/rejected": -0.4362561106681824, "loss": 0.961, "nll_loss": 0.9670404195785522, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 560 }, { "epoch": 0.33757773171453953, "grad_norm": 9.4375, "learning_rate": 4.188393032302233e-07, "log_odds_chosen": -0.2161109894514084, "log_odds_ratio": -0.8888469934463501, "logits/chosen": -2.266890048980713, "logits/rejected": -2.2078969478607178, "logps/chosen": -0.5593982934951782, "logps/rejected": -0.48008909821510315, "loss": 0.9285, "nll_loss": 0.9204473495483398, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 570 }, { "epoch": 0.3435001480604086, "grad_norm": 10.75, "learning_rate": 4.1499027413055e-07, "log_odds_chosen": -0.41526442766189575, "log_odds_ratio": -0.9975423812866211, "logits/chosen": -2.2734172344207764, "logits/rejected": -2.2457797527313232, "logps/chosen": -0.6249933838844299, "logps/rejected": -0.42492228746414185, "loss": 0.9404, "nll_loss": 0.9193958044052124, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 580 }, { "epoch": 0.3494225644062778, "grad_norm": 9.3125, "learning_rate": 4.1107067390256056e-07, "log_odds_chosen": -0.45963993668556213, "log_odds_ratio": -1.0648995637893677, "logits/chosen": -2.3240678310394287, "logits/rejected": -2.2981557846069336, "logps/chosen": -0.723495364189148, "logps/rejected": -0.454792320728302, "loss": 0.9656, "nll_loss": 1.0240063667297363, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 590 }, { "epoch": 0.3553449807521469, "grad_norm": 9.875, "learning_rate": 4.0708217907302047e-07, "log_odds_chosen": -0.4009949564933777, "log_odds_ratio": -0.9855114221572876, "logits/chosen": -2.2710177898406982, "logits/rejected": -2.237403392791748, "logps/chosen": -0.6120108366012573, "logps/rejected": -0.43240681290626526, "loss": 0.9575, "nll_loss": 0.9712766408920288, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 600 }, { "epoch": 0.361267397098016, "grad_norm": 12.0, "learning_rate": 4.030264956369157e-07, "log_odds_chosen": -0.39438915252685547, "log_odds_ratio": -0.9749253988265991, "logits/chosen": -2.31217098236084, "logits/rejected": -2.273338794708252, "logps/chosen": -0.5822636485099792, "logps/rejected": -0.4064372181892395, "loss": 0.9477, "nll_loss": 0.9778239130973816, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 610 }, { "epoch": 0.3671898134438851, "grad_norm": 9.375, "learning_rate": 3.989053583277492e-07, "log_odds_chosen": -0.5915114879608154, "log_odds_ratio": -1.1409562826156616, "logits/chosen": -2.3212368488311768, "logits/rejected": -2.3024001121520996, "logps/chosen": -0.7573744654655457, "logps/rejected": -0.4205297827720642, "loss": 0.9491, "nll_loss": 0.9616823196411133, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 620 }, { "epoch": 0.3731122297897542, "grad_norm": 8.125, "learning_rate": 3.947205298755447e-07, "log_odds_chosen": -0.32023632526397705, "log_odds_ratio": -0.9460951685905457, "logits/chosen": -2.2791507244110107, "logits/rejected": -2.2480525970458984, "logps/chosen": -0.6137298345565796, "logps/rejected": -0.4523869454860687, "loss": 0.9577, "nll_loss": 0.9420009851455688, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 630 }, { "epoch": 0.3790346461356233, "grad_norm": 8.3125, "learning_rate": 3.9047380025287634e-07, "log_odds_chosen": -0.31926944851875305, "log_odds_ratio": -0.9340398907661438, "logits/chosen": -2.288464069366455, "logits/rejected": -2.257875442504883, "logps/chosen": -0.5796951055526733, "logps/rejected": -0.43441399931907654, "loss": 0.9511, "nll_loss": 0.935884952545166, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 640 }, { "epoch": 0.3849570624814925, "grad_norm": 10.125, "learning_rate": 3.8616698590924523e-07, "log_odds_chosen": -0.3541373610496521, "log_odds_ratio": -0.9547072649002075, "logits/chosen": -2.3075475692749023, "logits/rejected": -2.261488437652588, "logps/chosen": -0.6222845315933228, "logps/rejected": -0.4457763135433197, "loss": 0.9292, "nll_loss": 0.941017746925354, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 650 }, { "epoch": 0.39087947882736157, "grad_norm": 8.6875, "learning_rate": 3.8180192899413123e-07, "log_odds_chosen": -0.373871773481369, "log_odds_ratio": -0.963890552520752, "logits/chosen": -2.3060686588287354, "logits/rejected": -2.2961385250091553, "logps/chosen": -0.5887154936790466, "logps/rejected": -0.41268324851989746, "loss": 0.9644, "nll_loss": 0.9328317642211914, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 660 }, { "epoch": 0.39680189517323067, "grad_norm": 8.1875, "learning_rate": 3.7738049656905225e-07, "log_odds_chosen": -0.3005954623222351, "log_odds_ratio": -0.9146180152893066, "logits/chosen": -2.241210460662842, "logits/rejected": -2.197197437286377, "logps/chosen": -0.5695523023605347, "logps/rejected": -0.4374919533729553, "loss": 0.9635, "nll_loss": 0.9454113841056824, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 670 }, { "epoch": 0.4027243115190998, "grad_norm": 8.25, "learning_rate": 3.7290457980896787e-07, "log_odds_chosen": -0.2508184611797333, "log_odds_ratio": -0.8950401544570923, "logits/chosen": -2.310917377471924, "logits/rejected": -2.2810654640197754, "logps/chosen": -0.5575405955314636, "logps/rejected": -0.44296175241470337, "loss": 0.9245, "nll_loss": 0.9060578346252441, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 680 }, { "epoch": 0.4086467278649689, "grad_norm": 8.375, "learning_rate": 3.68376093193369e-07, "log_odds_chosen": -0.35061341524124146, "log_odds_ratio": -0.9449998140335083, "logits/chosen": -2.3210480213165283, "logits/rejected": -2.281230926513672, "logps/chosen": -0.5540003776550293, "logps/rejected": -0.4034114480018616, "loss": 0.9207, "nll_loss": 0.9037810564041138, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 690 }, { "epoch": 0.414569144210838, "grad_norm": 8.4375, "learning_rate": 3.637969736873992e-07, "log_odds_chosen": -0.29555535316467285, "log_odds_ratio": -0.9367197155952454, "logits/chosen": -2.2944698333740234, "logits/rejected": -2.2611544132232666, "logps/chosen": -0.5592113733291626, "logps/rejected": -0.42597031593322754, "loss": 0.9637, "nll_loss": 0.9748933911323547, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 700 }, { "epoch": 0.4204915605567071, "grad_norm": 8.6875, "learning_rate": 3.591691799133587e-07, "log_odds_chosen": -0.27811819314956665, "log_odds_ratio": -0.8972823023796082, "logits/chosen": -2.3404221534729004, "logits/rejected": -2.3104233741760254, "logps/chosen": -0.5548882484436035, "logps/rejected": -0.42241740226745605, "loss": 0.9489, "nll_loss": 0.9462203979492188, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 710 }, { "epoch": 0.42641397690257626, "grad_norm": 7.8125, "learning_rate": 3.5449469131294476e-07, "log_odds_chosen": -0.282146155834198, "log_odds_ratio": -0.9153865575790405, "logits/chosen": -2.3050596714019775, "logits/rejected": -2.2582859992980957, "logps/chosen": -0.5491407513618469, "logps/rejected": -0.4175952970981598, "loss": 0.9258, "nll_loss": 0.9185633659362793, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 720 }, { "epoch": 0.43233639324844536, "grad_norm": 7.78125, "learning_rate": 3.497755073005868e-07, "log_odds_chosen": -0.17704807221889496, "log_odds_ratio": -0.8550702333450317, "logits/chosen": -2.304471254348755, "logits/rejected": -2.2704811096191406, "logps/chosen": -0.5407411456108093, "logps/rejected": -0.43452388048171997, "loss": 0.9148, "nll_loss": 0.8776341676712036, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 730 }, { "epoch": 0.43825880959431446, "grad_norm": 14.0, "learning_rate": 3.4501364640823926e-07, "log_odds_chosen": -0.4160383343696594, "log_odds_ratio": -0.9982725381851196, "logits/chosen": -2.3177871704101562, "logits/rejected": -2.291195869445801, "logps/chosen": -0.6620553135871887, "logps/rejected": -0.4509620666503906, "loss": 0.9449, "nll_loss": 0.9611420631408691, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 740 }, { "epoch": 0.4441812259401836, "grad_norm": 8.4375, "learning_rate": 3.402111454219966e-07, "log_odds_chosen": -0.2541792690753937, "log_odds_ratio": -0.8975493311882019, "logits/chosen": -2.3212180137634277, "logits/rejected": -2.2709405422210693, "logps/chosen": -0.5664907693862915, "logps/rejected": -0.4353105127811432, "loss": 0.9301, "nll_loss": 0.9432824850082397, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 750 }, { "epoch": 0.4501036422860527, "grad_norm": 7.28125, "learning_rate": 3.353700585109005e-07, "log_odds_chosen": -0.2790587842464447, "log_odds_ratio": -0.9118951559066772, "logits/chosen": -2.3148138523101807, "logits/rejected": -2.2849326133728027, "logps/chosen": -0.5668213963508606, "logps/rejected": -0.4337525963783264, "loss": 0.9239, "nll_loss": 0.9522818326950073, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 760 }, { "epoch": 0.4560260586319218, "grad_norm": 7.90625, "learning_rate": 3.304924563483129e-07, "log_odds_chosen": -0.31332454085350037, "log_odds_ratio": -0.9554667472839355, "logits/chosen": -2.329709053039551, "logits/rejected": -2.3159825801849365, "logps/chosen": -0.6328242421150208, "logps/rejected": -0.4525510370731354, "loss": 0.9725, "nll_loss": 0.9982641935348511, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 770 }, { "epoch": 0.46194847497779096, "grad_norm": 8.6875, "learning_rate": 3.255804252262283e-07, "log_odds_chosen": -0.26954448223114014, "log_odds_ratio": -0.9041155576705933, "logits/chosen": -2.26902437210083, "logits/rejected": -2.2395756244659424, "logps/chosen": -0.5438047647476196, "logps/rejected": -0.4186398386955261, "loss": 0.9454, "nll_loss": 0.9862927198410034, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 780 }, { "epoch": 0.46787089132366005, "grad_norm": 6.78125, "learning_rate": 3.2063606616290626e-07, "log_odds_chosen": -0.40437692403793335, "log_odds_ratio": -0.991305947303772, "logits/chosen": -2.2474241256713867, "logits/rejected": -2.2076640129089355, "logps/chosen": -0.5990616083145142, "logps/rejected": -0.40946364402770996, "loss": 0.8641, "nll_loss": 0.827691376209259, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 790 }, { "epoch": 0.47379330766952915, "grad_norm": 12.25, "learning_rate": 3.1566149400420523e-07, "log_odds_chosen": -0.3424193859100342, "log_odds_ratio": -0.9442498087882996, "logits/chosen": -2.300968885421753, "logits/rejected": -2.289825201034546, "logps/chosen": -0.6068278551101685, "logps/rejected": -0.4422214925289154, "loss": 0.9572, "nll_loss": 0.9411390423774719, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 800 }, { "epoch": 0.4797157240153983, "grad_norm": 8.75, "learning_rate": 3.1065883651900087e-07, "log_odds_chosen": -0.3020106852054596, "log_odds_ratio": -0.9359525442123413, "logits/chosen": -2.288480281829834, "logits/rejected": -2.246896982192993, "logps/chosen": -0.5823680758476257, "logps/rejected": -0.44429031014442444, "loss": 0.9677, "nll_loss": 0.9093478918075562, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 810 }, { "epoch": 0.4856381403612674, "grad_norm": 8.0625, "learning_rate": 3.056302334890786e-07, "log_odds_chosen": -0.38523969054222107, "log_odds_ratio": -0.9780759811401367, "logits/chosen": -2.294841766357422, "logits/rejected": -2.2723891735076904, "logps/chosen": -0.6043334603309631, "logps/rejected": -0.4185991883277893, "loss": 0.9121, "nll_loss": 0.9030720591545105, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 820 }, { "epoch": 0.4915605567071365, "grad_norm": 8.75, "learning_rate": 3.0057783579388586e-07, "log_odds_chosen": -0.24561011791229248, "log_odds_ratio": -0.8836873173713684, "logits/chosen": -2.2996482849121094, "logits/rejected": -2.258457660675049, "logps/chosen": -0.5478182435035706, "logps/rejected": -0.440875768661499, "loss": 0.928, "nll_loss": 0.9274915456771851, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 830 }, { "epoch": 0.49748297305300565, "grad_norm": 7.8125, "learning_rate": 2.9550380449053907e-07, "log_odds_chosen": -0.26652732491493225, "log_odds_ratio": -0.9008363485336304, "logits/chosen": -2.2529563903808594, "logits/rejected": -2.2309823036193848, "logps/chosen": -0.5522275567054749, "logps/rejected": -0.42188987135887146, "loss": 0.914, "nll_loss": 0.8120133280754089, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 840 }, { "epoch": 0.5034053893988747, "grad_norm": 6.4375, "learning_rate": 2.904103098894767e-07, "log_odds_chosen": -0.3553586006164551, "log_odds_ratio": -0.9903032183647156, "logits/chosen": -2.291224956512451, "logits/rejected": -2.2469000816345215, "logps/chosen": -0.6393681764602661, "logps/rejected": -0.42175260186195374, "loss": 0.9129, "nll_loss": 0.9170019030570984, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 850 }, { "epoch": 0.5093278057447439, "grad_norm": 9.75, "learning_rate": 2.852995306261545e-07, "log_odds_chosen": -0.2889431416988373, "log_odds_ratio": -0.9156063795089722, "logits/chosen": -2.318115472793579, "logits/rejected": -2.2808139324188232, "logps/chosen": -0.5667640566825867, "logps/rejected": -0.44533196091651917, "loss": 0.9772, "nll_loss": 0.9936600923538208, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 860 }, { "epoch": 0.515250222090613, "grad_norm": 8.75, "learning_rate": 2.801736527291797e-07, "log_odds_chosen": -0.3678986728191376, "log_odds_ratio": -0.9755579233169556, "logits/chosen": -2.2834322452545166, "logits/rejected": -2.2398197650909424, "logps/chosen": -0.6312032341957092, "logps/rejected": -0.4318135380744934, "loss": 0.9391, "nll_loss": 0.8974191546440125, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 870 }, { "epoch": 0.5211726384364821, "grad_norm": 7.84375, "learning_rate": 2.750348686852836e-07, "log_odds_chosen": -0.40664905309677124, "log_odds_ratio": -0.9781969785690308, "logits/chosen": -2.3389241695404053, "logits/rejected": -2.2737958431243896, "logps/chosen": -0.6092024445533752, "logps/rejected": -0.4250633120536804, "loss": 0.971, "nll_loss": 0.9957748651504517, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 880 }, { "epoch": 0.5270950547823512, "grad_norm": 9.5, "learning_rate": 2.69885376501531e-07, "log_odds_chosen": -0.31569716334342957, "log_odds_ratio": -0.9389151334762573, "logits/chosen": -2.2705588340759277, "logits/rejected": -2.2545580863952637, "logps/chosen": -0.6091697812080383, "logps/rejected": -0.45014920830726624, "loss": 0.9568, "nll_loss": 0.9439749717712402, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 890 }, { "epoch": 0.5330174711282203, "grad_norm": 10.0625, "learning_rate": 2.647273787651687e-07, "log_odds_chosen": -0.27334731817245483, "log_odds_ratio": -0.8902351260185242, "logits/chosen": -2.3029747009277344, "logits/rejected": -2.2809951305389404, "logps/chosen": -0.5580970644950867, "logps/rejected": -0.43120306730270386, "loss": 0.9333, "nll_loss": 0.9487207531929016, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 900 }, { "epoch": 0.5389398874740894, "grad_norm": 11.5, "learning_rate": 2.5956308170151526e-07, "log_odds_chosen": -0.5443618893623352, "log_odds_ratio": -1.1218284368515015, "logits/chosen": -2.275094985961914, "logits/rejected": -2.2452805042266846, "logps/chosen": -0.7426999807357788, "logps/rejected": -0.4159156382083893, "loss": 1.0105, "nll_loss": 0.9728318452835083, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 910 }, { "epoch": 0.5448623038199586, "grad_norm": 8.0625, "learning_rate": 2.543946942302944e-07, "log_odds_chosen": -0.30027318000793457, "log_odds_ratio": -0.9199014902114868, "logits/chosen": -2.265780210494995, "logits/rejected": -2.2284467220306396, "logps/chosen": -0.5695372819900513, "logps/rejected": -0.4176994264125824, "loss": 0.9145, "nll_loss": 0.9305332899093628, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 920 }, { "epoch": 0.5507847201658277, "grad_norm": 9.375, "learning_rate": 2.492244270208158e-07, "log_odds_chosen": -0.2501292824745178, "log_odds_ratio": -0.8865777850151062, "logits/chosen": -2.273998260498047, "logits/rejected": -2.247119665145874, "logps/chosen": -0.562545895576477, "logps/rejected": -0.44244521856307983, "loss": 0.9004, "nll_loss": 0.9349013566970825, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 930 }, { "epoch": 0.5567071365116968, "grad_norm": 8.25, "learning_rate": 2.440544915464078e-07, "log_odds_chosen": -0.29280886054039, "log_odds_ratio": -0.9178652763366699, "logits/chosen": -2.304103374481201, "logits/rejected": -2.2641754150390625, "logps/chosen": -0.5466963052749634, "logps/rejected": -0.41387224197387695, "loss": 0.9221, "nll_loss": 0.9058642387390137, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 940 }, { "epoch": 0.5626295528575659, "grad_norm": 7.875, "learning_rate": 2.3888709913850593e-07, "log_odds_chosen": -0.30006080865859985, "log_odds_ratio": -0.9208847880363464, "logits/chosen": -2.350160837173462, "logits/rejected": -2.3101677894592285, "logps/chosen": -0.5762113928794861, "logps/rejected": -0.43552321195602417, "loss": 0.9856, "nll_loss": 0.938804030418396, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 950 }, { "epoch": 0.568551969203435, "grad_norm": 7.8125, "learning_rate": 2.337244600408025e-07, "log_odds_chosen": -0.39082369208335876, "log_odds_ratio": -0.9911519289016724, "logits/chosen": -2.3191308975219727, "logits/rejected": -2.2875494956970215, "logps/chosen": -0.6351069211959839, "logps/rejected": -0.4366016387939453, "loss": 0.9507, "nll_loss": 0.9739691615104675, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 960 }, { "epoch": 0.5744743855493041, "grad_norm": 9.1875, "learning_rate": 2.2856878246386085e-07, "log_odds_chosen": -0.2834867537021637, "log_odds_ratio": -0.9153987765312195, "logits/chosen": -2.3153960704803467, "logits/rejected": -2.2916574478149414, "logps/chosen": -0.5750494003295898, "logps/rejected": -0.4384193420410156, "loss": 0.9944, "nll_loss": 1.0159144401550293, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 970 }, { "epoch": 0.5803968018951732, "grad_norm": 8.8125, "learning_rate": 2.2342227164060035e-07, "log_odds_chosen": -0.37823957204818726, "log_odds_ratio": -0.9799555540084839, "logits/chosen": -2.2767786979675293, "logits/rejected": -2.2212002277374268, "logps/chosen": -0.6210430860519409, "logps/rejected": -0.4381546378135681, "loss": 0.9342, "nll_loss": 0.9048612713813782, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 980 }, { "epoch": 0.5863192182410424, "grad_norm": 7.625, "learning_rate": 2.182871288830533e-07, "log_odds_chosen": -0.3980916738510132, "log_odds_ratio": -0.9920517206192017, "logits/chosen": -2.3002982139587402, "logits/rejected": -2.237112522125244, "logps/chosen": -0.6255283951759338, "logps/rejected": -0.4390384256839752, "loss": 0.9667, "nll_loss": 0.9580439329147339, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 990 }, { "epoch": 0.5922416345869115, "grad_norm": 7.8125, "learning_rate": 2.131655506408007e-07, "log_odds_chosen": -0.3284297287464142, "log_odds_ratio": -0.9404581785202026, "logits/chosen": -2.3054287433624268, "logits/rejected": -2.263627767562866, "logps/chosen": -0.5948997139930725, "logps/rejected": -0.4438301622867584, "loss": 0.9222, "nll_loss": 0.9223626852035522, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1000 }, { "epoch": 0.5981640509327806, "grad_norm": 7.65625, "learning_rate": 2.0805972756148643e-07, "log_odds_chosen": -0.48507261276245117, "log_odds_ratio": -1.0847915410995483, "logits/chosen": -2.2977192401885986, "logits/rejected": -2.2829782962799072, "logps/chosen": -0.748909592628479, "logps/rejected": -0.43740910291671753, "loss": 0.9675, "nll_loss": 0.9915729761123657, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1010 }, { "epoch": 0.6040864672786497, "grad_norm": 7.59375, "learning_rate": 2.0297184355381432e-07, "log_odds_chosen": -0.3442012667655945, "log_odds_ratio": -0.9447819590568542, "logits/chosen": -2.3124001026153564, "logits/rejected": -2.2729249000549316, "logps/chosen": -0.5671228170394897, "logps/rejected": -0.42450952529907227, "loss": 0.9345, "nll_loss": 0.9476312398910522, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1020 }, { "epoch": 0.6100088836245188, "grad_norm": 7.71875, "learning_rate": 1.9790407485342638e-07, "log_odds_chosen": -0.5493720769882202, "log_odds_ratio": -1.1290843486785889, "logits/chosen": -2.3353946208953857, "logits/rejected": -2.2935006618499756, "logps/chosen": -0.7500286102294922, "logps/rejected": -0.4041396975517273, "loss": 0.9066, "nll_loss": 0.930087685585022, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1030 }, { "epoch": 0.6159312999703879, "grad_norm": 9.125, "learning_rate": 1.928585890920641e-07, "log_odds_chosen": -0.2760196626186371, "log_odds_ratio": -0.9122495651245117, "logits/chosen": -2.303187608718872, "logits/rejected": -2.267937183380127, "logps/chosen": -0.5642871856689453, "logps/rejected": -0.42797571420669556, "loss": 0.9504, "nll_loss": 0.9056134223937988, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1040 }, { "epoch": 0.6218537163162571, "grad_norm": 8.25, "learning_rate": 1.8783754437040902e-07, "log_odds_chosen": -0.34286069869995117, "log_odds_ratio": -0.9555438756942749, "logits/chosen": -2.282454013824463, "logits/rejected": -2.247560739517212, "logps/chosen": -0.5602587461471558, "logps/rejected": -0.41198721528053284, "loss": 0.9096, "nll_loss": 0.8938838243484497, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1050 }, { "epoch": 0.6277761326621262, "grad_norm": 7.53125, "learning_rate": 1.8284308833500118e-07, "log_odds_chosen": -0.30499863624572754, "log_odds_ratio": -0.9316195249557495, "logits/chosen": -2.2879326343536377, "logits/rejected": -2.2628307342529297, "logps/chosen": -0.5727280378341675, "logps/rejected": -0.4329405725002289, "loss": 0.9415, "nll_loss": 0.9282618761062622, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1060 }, { "epoch": 0.6336985490079953, "grad_norm": 9.0, "learning_rate": 1.7787735725962756e-07, "log_odds_chosen": -0.371854692697525, "log_odds_ratio": -0.9633452296257019, "logits/chosen": -2.295264482498169, "logits/rejected": -2.2589457035064697, "logps/chosen": -0.608909010887146, "logps/rejected": -0.43556636571884155, "loss": 0.9874, "nll_loss": 0.9842734336853027, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1070 }, { "epoch": 0.6396209653538644, "grad_norm": 7.4375, "learning_rate": 1.7294247513157616e-07, "log_odds_chosen": -0.30945563316345215, "log_odds_ratio": -0.9170244932174683, "logits/chosen": -2.32027530670166, "logits/rejected": -2.2682743072509766, "logps/chosen": -0.5618667006492615, "logps/rejected": -0.42478686571121216, "loss": 0.9273, "nll_loss": 0.9494869112968445, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1080 }, { "epoch": 0.6455433816997335, "grad_norm": 8.25, "learning_rate": 1.6804055274314494e-07, "log_odds_chosen": -0.28144484758377075, "log_odds_ratio": -0.9044340252876282, "logits/chosen": -2.2794411182403564, "logits/rejected": -2.256417989730835, "logps/chosen": -0.5532391667366028, "logps/rejected": -0.43312329053878784, "loss": 0.9267, "nll_loss": 0.8960529565811157, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1090 }, { "epoch": 0.6514657980456026, "grad_norm": 7.8125, "learning_rate": 1.6317368678879496e-07, "log_odds_chosen": -0.28822919726371765, "log_odds_ratio": -0.9030183553695679, "logits/chosen": -2.3176181316375732, "logits/rejected": -2.2776379585266113, "logps/chosen": -0.5691734552383423, "logps/rejected": -0.4426758885383606, "loss": 0.9774, "nll_loss": 0.9514939188957214, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1100 }, { "epoch": 0.6573882143914718, "grad_norm": 10.0, "learning_rate": 1.5834395896833281e-07, "log_odds_chosen": -0.3918454051017761, "log_odds_ratio": -0.9793996810913086, "logits/chosen": -2.3301963806152344, "logits/rejected": -2.274294376373291, "logps/chosen": -0.6070703864097595, "logps/rejected": -0.42063984274864197, "loss": 0.9521, "nll_loss": 0.956030547618866, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1110 }, { "epoch": 0.6633106307373409, "grad_norm": 8.3125, "learning_rate": 1.535534350965075e-07, "log_odds_chosen": -0.3459760546684265, "log_odds_ratio": -0.9434119462966919, "logits/chosen": -2.3291070461273193, "logits/rejected": -2.3154056072235107, "logps/chosen": -0.5571088790893555, "logps/rejected": -0.3965280055999756, "loss": 0.9099, "nll_loss": 0.9126838445663452, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1120 }, { "epoch": 0.66923304708321, "grad_norm": 9.1875, "learning_rate": 1.4880416421940154e-07, "log_odds_chosen": -0.35412847995758057, "log_odds_ratio": -0.9552983045578003, "logits/chosen": -2.2702512741088867, "logits/rejected": -2.246307849884033, "logps/chosen": -0.6233401298522949, "logps/rejected": -0.4397760033607483, "loss": 1.0082, "nll_loss": 1.0226290225982666, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1130 }, { "epoch": 0.6751554634290791, "grad_norm": 10.8125, "learning_rate": 1.4409817773799459e-07, "log_odds_chosen": -0.30409640073776245, "log_odds_ratio": -0.933831512928009, "logits/chosen": -2.2992305755615234, "logits/rejected": -2.2570438385009766, "logps/chosen": -0.6016424894332886, "logps/rejected": -0.44892677664756775, "loss": 0.9551, "nll_loss": 0.9244022369384766, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1140 }, { "epoch": 0.6810778797749482, "grad_norm": 8.8125, "learning_rate": 1.3943748853927385e-07, "log_odds_chosen": -0.41090458631515503, "log_odds_ratio": -1.001075029373169, "logits/chosen": -2.299729824066162, "logits/rejected": -2.2916903495788574, "logps/chosen": -0.6525920033454895, "logps/rejected": -0.4317931532859802, "loss": 0.9303, "nll_loss": 0.9261299967765808, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1150 }, { "epoch": 0.6870002961208173, "grad_norm": 8.3125, "learning_rate": 1.3482409013526436e-07, "log_odds_chosen": -0.42632365226745605, "log_odds_ratio": -1.0060193538665771, "logits/chosen": -2.2864601612091064, "logits/rejected": -2.2797439098358154, "logps/chosen": -0.615047037601471, "logps/rejected": -0.4228528141975403, "loss": 0.9621, "nll_loss": 0.9737777709960938, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1160 }, { "epoch": 0.6929227124666865, "grad_norm": 7.65625, "learning_rate": 1.302599558103456e-07, "log_odds_chosen": -0.3416286110877991, "log_odds_ratio": -0.971416175365448, "logits/chosen": -2.3440544605255127, "logits/rejected": -2.3086702823638916, "logps/chosen": -0.6329351663589478, "logps/rejected": -0.44783586263656616, "loss": 0.9418, "nll_loss": 0.9567440152168274, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1170 }, { "epoch": 0.6988451288125556, "grad_norm": 9.1875, "learning_rate": 1.257470377772214e-07, "log_odds_chosen": -0.37471523880958557, "log_odds_ratio": -0.9742682576179504, "logits/chosen": -2.320568323135376, "logits/rejected": -2.2932517528533936, "logps/chosen": -0.5956822633743286, "logps/rejected": -0.410876601934433, "loss": 0.9619, "nll_loss": 0.9405835270881653, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1180 }, { "epoch": 0.7047675451584247, "grad_norm": 7.96875, "learning_rate": 1.2128726634190046e-07, "log_odds_chosen": -0.3462384343147278, "log_odds_ratio": -0.9395328760147095, "logits/chosen": -2.3269436359405518, "logits/rejected": -2.2818374633789062, "logps/chosen": -0.5803397297859192, "logps/rejected": -0.4099668860435486, "loss": 0.9172, "nll_loss": 0.9042008519172668, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1190 }, { "epoch": 0.7106899615042938, "grad_norm": 9.5625, "learning_rate": 1.1688254907804992e-07, "log_odds_chosen": -0.3338465392589569, "log_odds_ratio": -0.9492910504341125, "logits/chosen": -2.282212495803833, "logits/rejected": -2.2418830394744873, "logps/chosen": -0.6125479340553284, "logps/rejected": -0.45005935430526733, "loss": 0.9674, "nll_loss": 0.9735835194587708, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1200 }, { "epoch": 0.7166123778501629, "grad_norm": 7.5, "learning_rate": 1.1253477001106956e-07, "log_odds_chosen": -0.26436474919319153, "log_odds_ratio": -0.8985050916671753, "logits/chosen": -2.259978771209717, "logits/rejected": -2.223177433013916, "logps/chosen": -0.5662964582443237, "logps/rejected": -0.4440518915653229, "loss": 0.9408, "nll_loss": 0.90367591381073, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1210 }, { "epoch": 0.722534794196032, "grad_norm": 10.4375, "learning_rate": 1.0824578881224065e-07, "log_odds_chosen": -0.24436886608600616, "log_odds_ratio": -0.8822824358940125, "logits/chosen": -2.332968235015869, "logits/rejected": -2.3182759284973145, "logps/chosen": -0.539296567440033, "logps/rejected": -0.42076578736305237, "loss": 0.8899, "nll_loss": 0.8598573803901672, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1220 }, { "epoch": 0.728457210541901, "grad_norm": 8.125, "learning_rate": 1.0401744000328918e-07, "log_odds_chosen": -0.28977444767951965, "log_odds_ratio": -0.9303587675094604, "logits/chosen": -2.2798142433166504, "logits/rejected": -2.2761147022247314, "logps/chosen": -0.5908230543136597, "logps/rejected": -0.449887216091156, "loss": 0.9195, "nll_loss": 0.9145529866218567, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1230 }, { "epoch": 0.7343796268877703, "grad_norm": 9.375, "learning_rate": 9.985153217170902e-08, "log_odds_chosen": -0.357065886259079, "log_odds_ratio": -0.9588850140571594, "logits/chosen": -2.3385989665985107, "logits/rejected": -2.323024034500122, "logps/chosen": -0.6093414425849915, "logps/rejected": -0.4347008168697357, "loss": 1.008, "nll_loss": 1.0087924003601074, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1240 }, { "epoch": 0.7403020432336394, "grad_norm": 10.5, "learning_rate": 9.574984719717553e-08, "log_odds_chosen": -0.3260490894317627, "log_odds_ratio": -0.9441172480583191, "logits/chosen": -2.321216344833374, "logits/rejected": -2.302063226699829, "logps/chosen": -0.5826759338378906, "logps/rejected": -0.4284025728702545, "loss": 0.9308, "nll_loss": 0.9714264869689941, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1250 }, { "epoch": 0.7462244595795084, "grad_norm": 8.25, "learning_rate": 9.171413948938459e-08, "log_odds_chosen": -0.3101581037044525, "log_odds_ratio": -0.929049015045166, "logits/chosen": -2.310981273651123, "logits/rejected": -2.2564587593078613, "logps/chosen": -0.6019686460494995, "logps/rejected": -0.45062392950057983, "loss": 0.9576, "nll_loss": 0.9787800908088684, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1260 }, { "epoch": 0.7521468759253775, "grad_norm": 7.53125, "learning_rate": 8.774613523764049e-08, "log_odds_chosen": -0.369983971118927, "log_odds_ratio": -0.9563344120979309, "logits/chosen": -2.2968955039978027, "logits/rejected": -2.248944044113159, "logps/chosen": -0.5875356197357178, "logps/rejected": -0.4163094162940979, "loss": 0.9305, "nll_loss": 0.900018572807312, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1270 }, { "epoch": 0.7580692922712466, "grad_norm": 9.125, "learning_rate": 8.384753167251412e-08, "log_odds_chosen": -0.32507094740867615, "log_odds_ratio": -0.9397505521774292, "logits/chosen": -2.2516260147094727, "logits/rejected": -2.226477861404419, "logps/chosen": -0.5629323124885559, "logps/rejected": -0.41151052713394165, "loss": 0.8916, "nll_loss": 0.8657590746879578, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1280 }, { "epoch": 0.7639917086171157, "grad_norm": 8.375, "learning_rate": 8.001999633988942e-08, "log_odds_chosen": -0.3611569106578827, "log_odds_ratio": -0.957983672618866, "logits/chosen": -2.3322999477386475, "logits/rejected": -2.283409833908081, "logps/chosen": -0.5784574747085571, "logps/rejected": -0.4157342314720154, "loss": 0.8973, "nll_loss": 0.8929991722106934, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1290 }, { "epoch": 0.769914124962985, "grad_norm": 8.6875, "learning_rate": 7.62651663877042e-08, "log_odds_chosen": -0.26533371210098267, "log_odds_ratio": -0.9089031219482422, "logits/chosen": -2.2688136100769043, "logits/rejected": -2.2409274578094482, "logps/chosen": -0.5685082077980042, "logps/rejected": -0.4421761631965637, "loss": 0.9965, "nll_loss": 0.9676351547241211, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1300 }, { "epoch": 0.775836541308854, "grad_norm": 7.1875, "learning_rate": 7.258464786569549e-08, "log_odds_chosen": -0.28731244802474976, "log_odds_ratio": -0.9186748266220093, "logits/chosen": -2.3306045532226562, "logits/rejected": -2.2782888412475586, "logps/chosen": -0.5666372776031494, "logps/rejected": -0.43749627470970154, "loss": 0.9659, "nll_loss": 0.9544159770011902, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1310 }, { "epoch": 0.7817589576547231, "grad_norm": 9.75, "learning_rate": 6.898001503844483e-08, "log_odds_chosen": -0.5405977964401245, "log_odds_ratio": -1.1180508136749268, "logits/chosen": -2.3619232177734375, "logits/rejected": -2.3188281059265137, "logps/chosen": -0.7727476358413696, "logps/rejected": -0.4373859465122223, "loss": 0.9429, "nll_loss": 0.9921876192092896, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1320 }, { "epoch": 0.7876813740005922, "grad_norm": 9.3125, "learning_rate": 6.545280971202014e-08, "log_odds_chosen": -0.2534041702747345, "log_odds_ratio": -0.8981307744979858, "logits/chosen": -2.320126533508301, "logits/rejected": -2.289376974105835, "logps/chosen": -0.5534666180610657, "logps/rejected": -0.4292474687099457, "loss": 0.9168, "nll_loss": 0.9440558552742004, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1330 }, { "epoch": 0.7936037903464613, "grad_norm": 8.6875, "learning_rate": 6.200454057450022e-08, "log_odds_chosen": -0.36177825927734375, "log_odds_ratio": -0.9495649337768555, "logits/chosen": -2.2736241817474365, "logits/rejected": -2.226933479309082, "logps/chosen": -0.59937584400177, "logps/rejected": -0.42542099952697754, "loss": 0.9704, "nll_loss": 0.9059191942214966, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1340 }, { "epoch": 0.7995262066923304, "grad_norm": 9.0, "learning_rate": 5.863668255066492e-08, "log_odds_chosen": -0.313324511051178, "log_odds_ratio": -0.919102668762207, "logits/chosen": -2.270073890686035, "logits/rejected": -2.2372827529907227, "logps/chosen": -0.5768779516220093, "logps/rejected": -0.43518179655075073, "loss": 0.9124, "nll_loss": 0.9345908164978027, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1350 }, { "epoch": 0.8054486230381996, "grad_norm": 8.375, "learning_rate": 5.53506761711274e-08, "log_odds_chosen": -0.2887657880783081, "log_odds_ratio": -0.912114143371582, "logits/chosen": -2.305987596511841, "logits/rejected": -2.2752127647399902, "logps/chosen": -0.587549090385437, "logps/rejected": -0.4469973146915436, "loss": 0.9423, "nll_loss": 0.9892560243606567, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1360 }, { "epoch": 0.8113710393840687, "grad_norm": 8.6875, "learning_rate": 5.2147926956177174e-08, "log_odds_chosen": -0.45511436462402344, "log_odds_ratio": -1.0450800657272339, "logits/chosen": -2.294468641281128, "logits/rejected": -2.283860445022583, "logps/chosen": -0.6607165336608887, "logps/rejected": -0.4220455586910248, "loss": 0.9395, "nll_loss": 0.9603630304336548, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1370 }, { "epoch": 0.8172934557299378, "grad_norm": 15.3125, "learning_rate": 4.902980481459834e-08, "log_odds_chosen": -0.26191025972366333, "log_odds_ratio": -0.9043244123458862, "logits/chosen": -2.277843475341797, "logits/rejected": -2.248347520828247, "logps/chosen": -0.5755423307418823, "logps/rejected": -0.4490273594856262, "loss": 0.9003, "nll_loss": 0.9170975685119629, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1380 }, { "epoch": 0.8232158720758069, "grad_norm": 9.25, "learning_rate": 4.5997643457719646e-08, "log_odds_chosen": -0.35434719920158386, "log_odds_ratio": -0.9506848454475403, "logits/chosen": -2.295780658721924, "logits/rejected": -2.2894127368927, "logps/chosen": -0.5851874351501465, "logps/rejected": -0.4212135672569275, "loss": 0.8958, "nll_loss": 0.9056984186172485, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1390 }, { "epoch": 0.829138288421676, "grad_norm": 8.75, "learning_rate": 4.305273982894772e-08, "log_odds_chosen": -0.33616143465042114, "log_odds_ratio": -0.9503694772720337, "logits/chosen": -2.3287465572357178, "logits/rejected": -2.286414623260498, "logps/chosen": -0.6181541681289673, "logps/rejected": -0.4454525113105774, "loss": 0.9425, "nll_loss": 0.9352006912231445, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1400 }, { "epoch": 0.8350607047675451, "grad_norm": 9.8125, "learning_rate": 4.0196353549026786e-08, "log_odds_chosen": -0.30044835805892944, "log_odds_ratio": -0.9167086482048035, "logits/chosen": -2.298393487930298, "logits/rejected": -2.26066255569458, "logps/chosen": -0.5805574655532837, "logps/rejected": -0.43902960419654846, "loss": 0.9712, "nll_loss": 1.0039526224136353, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1410 }, { "epoch": 0.8409831211134142, "grad_norm": 7.75, "learning_rate": 3.742970637726181e-08, "log_odds_chosen": -0.179987370967865, "log_odds_ratio": -0.8582404255867004, "logits/chosen": -2.3168177604675293, "logits/rejected": -2.269207000732422, "logps/chosen": -0.5285545587539673, "logps/rejected": -0.44142407178878784, "loss": 0.9201, "nll_loss": 0.9034355878829956, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1420 }, { "epoch": 0.8469055374592834, "grad_norm": 8.3125, "learning_rate": 3.4753981688937284e-08, "log_odds_chosen": -0.3474799394607544, "log_odds_ratio": -0.9500767588615417, "logits/chosen": -2.2948362827301025, "logits/rejected": -2.2666220664978027, "logps/chosen": -0.5843050479888916, "logps/rejected": -0.4246344566345215, "loss": 0.9549, "nll_loss": 0.9555429220199585, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1430 }, { "epoch": 0.8528279538051525, "grad_norm": 8.25, "learning_rate": 3.217032396915265e-08, "log_odds_chosen": -0.40568438172340393, "log_odds_ratio": -1.0082272291183472, "logits/chosen": -2.3024380207061768, "logits/rejected": -2.268986701965332, "logps/chosen": -0.6786967515945435, "logps/rejected": -0.44073349237442017, "loss": 0.9636, "nll_loss": 0.9871211051940918, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1440 }, { "epoch": 0.8587503701510216, "grad_norm": 9.75, "learning_rate": 2.9679838323293404e-08, "log_odds_chosen": -0.4226885437965393, "log_odds_ratio": -1.0269486904144287, "logits/chosen": -2.2947676181793213, "logits/rejected": -2.2655680179595947, "logps/chosen": -0.6772679090499878, "logps/rejected": -0.44576793909072876, "loss": 0.9064, "nll_loss": 0.9240104556083679, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1450 }, { "epoch": 0.8646727864968907, "grad_norm": 9.6875, "learning_rate": 2.728359000434488e-08, "log_odds_chosen": -0.34337377548217773, "log_odds_ratio": -0.9470311403274536, "logits/chosen": -2.3264529705047607, "logits/rejected": -2.290132761001587, "logps/chosen": -0.5465956926345825, "logps/rejected": -0.41503897309303284, "loss": 0.9481, "nll_loss": 0.8961936831474304, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1460 }, { "epoch": 0.8705952028427598, "grad_norm": 8.8125, "learning_rate": 2.498260395725302e-08, "log_odds_chosen": -0.3448273539543152, "log_odds_ratio": -0.9480770230293274, "logits/chosen": -2.293290376663208, "logits/rejected": -2.278653144836426, "logps/chosen": -0.5983850955963135, "logps/rejected": -0.44902753829956055, "loss": 0.9406, "nll_loss": 0.93559330701828, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1470 }, { "epoch": 0.8765176191886289, "grad_norm": 8.5, "learning_rate": 2.2777864380525426e-08, "log_odds_chosen": -0.29847949743270874, "log_odds_ratio": -0.9293072819709778, "logits/chosen": -2.2964632511138916, "logits/rejected": -2.2744767665863037, "logps/chosen": -0.59266597032547, "logps/rejected": -0.4337979853153229, "loss": 0.9143, "nll_loss": 0.8765565752983093, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1480 }, { "epoch": 0.8824400355344981, "grad_norm": 8.4375, "learning_rate": 2.0670314305261423e-08, "log_odds_chosen": -0.2956581115722656, "log_odds_ratio": -0.914827823638916, "logits/chosen": -2.312617540359497, "logits/rejected": -2.2852249145507812, "logps/chosen": -0.5551884770393372, "logps/rejected": -0.42470401525497437, "loss": 0.8906, "nll_loss": 0.9051159620285034, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1490 }, { "epoch": 0.8883624518803672, "grad_norm": 8.5, "learning_rate": 1.866085519178995e-08, "log_odds_chosen": -0.3115543723106384, "log_odds_ratio": -0.9432921409606934, "logits/chosen": -2.294912815093994, "logits/rejected": -2.2731730937957764, "logps/chosen": -0.6179423332214355, "logps/rejected": -0.47528520226478577, "loss": 0.9807, "nll_loss": 0.9959957003593445, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1500 }, { "epoch": 0.8942848682262363, "grad_norm": 8.0625, "learning_rate": 1.675034654408894e-08, "log_odds_chosen": -0.3954925239086151, "log_odds_ratio": -0.966839611530304, "logits/chosen": -2.331923007965088, "logits/rejected": -2.3019633293151855, "logps/chosen": -0.5650435090065002, "logps/rejected": -0.40297931432724, "loss": 0.9211, "nll_loss": 0.9447514414787292, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1510 }, { "epoch": 0.9002072845721054, "grad_norm": 8.5625, "learning_rate": 1.4939605542150595e-08, "log_odds_chosen": -0.2961687445640564, "log_odds_ratio": -0.9432598352432251, "logits/chosen": -2.31669545173645, "logits/rejected": -2.2790303230285645, "logps/chosen": -0.6368409395217896, "logps/rejected": -0.466596782207489, "loss": 0.9972, "nll_loss": 0.9845758676528931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1520 }, { "epoch": 0.9061297009179745, "grad_norm": 8.8125, "learning_rate": 1.3229406692449791e-08, "log_odds_chosen": -0.22676777839660645, "log_odds_ratio": -0.8955879211425781, "logits/chosen": -2.2553787231445312, "logits/rejected": -2.228121280670166, "logps/chosen": -0.5664690136909485, "logps/rejected": -0.45375269651412964, "loss": 0.9651, "nll_loss": 0.9422292709350586, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1530 }, { "epoch": 0.9120521172638436, "grad_norm": 7.6875, "learning_rate": 1.162048149666503e-08, "log_odds_chosen": -0.28153032064437866, "log_odds_ratio": -0.9496873617172241, "logits/chosen": -2.3055193424224854, "logits/rejected": -2.2621009349823, "logps/chosen": -0.6041845083236694, "logps/rejected": -0.4735984802246094, "loss": 0.9464, "nll_loss": 0.9381273984909058, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1540 }, { "epoch": 0.9179745336097128, "grad_norm": 8.3125, "learning_rate": 1.0113518138794047e-08, "log_odds_chosen": -0.3274211287498474, "log_odds_ratio": -0.9445363283157349, "logits/chosen": -2.2565391063690186, "logits/rejected": -2.233027935028076, "logps/chosen": -0.5867388844490051, "logps/rejected": -0.4352657198905945, "loss": 0.9594, "nll_loss": 0.9287152290344238, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1550 }, { "epoch": 0.9238969499555819, "grad_norm": 8.625, "learning_rate": 8.709161190797565e-09, "log_odds_chosen": -0.23092766106128693, "log_odds_ratio": -0.8911072611808777, "logits/chosen": -2.3257815837860107, "logits/rejected": -2.29530668258667, "logps/chosen": -0.5526595115661621, "logps/rejected": -0.4391084611415863, "loss": 0.9337, "nll_loss": 0.9090098142623901, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1560 }, { "epoch": 0.929819366301451, "grad_norm": 9.0, "learning_rate": 7.408011336897141e-09, "log_odds_chosen": -0.4632336497306824, "log_odds_ratio": -1.089444875717163, "logits/chosen": -2.346909284591675, "logits/rejected": -2.334372043609619, "logps/chosen": -0.7571093440055847, "logps/rejected": -0.4586968421936035, "loss": 0.9736, "nll_loss": 1.0062029361724854, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1570 }, { "epoch": 0.9357417826473201, "grad_norm": 7.46875, "learning_rate": 6.210625116645135e-09, "log_odds_chosen": -0.41088682413101196, "log_odds_ratio": -0.9913327097892761, "logits/chosen": -2.3505208492279053, "logits/rejected": -2.3092150688171387, "logps/chosen": -0.6176980137825012, "logps/rejected": -0.42517074942588806, "loss": 0.9028, "nll_loss": 0.8675041198730469, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1580 }, { "epoch": 0.9416641989931892, "grad_norm": 8.9375, "learning_rate": 5.117514686876378e-09, "log_odds_chosen": -0.29983749985694885, "log_odds_ratio": -0.9341946840286255, "logits/chosen": -2.3163905143737793, "logits/rejected": -2.281881809234619, "logps/chosen": -0.569345235824585, "logps/rejected": -0.4335504174232483, "loss": 0.946, "nll_loss": 0.9458128809928894, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1590 }, { "epoch": 0.9475866153390583, "grad_norm": 8.25, "learning_rate": 4.1291476026441565e-09, "log_odds_chosen": -0.21767720580101013, "log_odds_ratio": -0.8741191029548645, "logits/chosen": -2.2778666019439697, "logits/rejected": -2.2554242610931396, "logps/chosen": -0.560379147529602, "logps/rejected": -0.44631558656692505, "loss": 0.897, "nll_loss": 0.8650028109550476, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1600 }, { "epoch": 0.9535090316849274, "grad_norm": 7.96875, "learning_rate": 3.2459466172331253e-09, "log_odds_chosen": -0.35443753004074097, "log_odds_ratio": -0.9958807229995728, "logits/chosen": -2.2850985527038574, "logits/rejected": -2.264432430267334, "logps/chosen": -0.6543992757797241, "logps/rejected": -0.436093807220459, "loss": 0.9838, "nll_loss": 0.9750429391860962, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1610 }, { "epoch": 0.9594314480307966, "grad_norm": 10.25, "learning_rate": 2.4682895013354854e-09, "log_odds_chosen": -0.3622002899646759, "log_odds_ratio": -0.9864064455032349, "logits/chosen": -2.287553548812866, "logits/rejected": -2.2655410766601562, "logps/chosen": -0.6380153894424438, "logps/rejected": -0.4244503378868103, "loss": 0.9213, "nll_loss": 0.9489747881889343, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1620 }, { "epoch": 0.9653538643766657, "grad_norm": 9.1875, "learning_rate": 1.7965088814675677e-09, "log_odds_chosen": -0.4782753586769104, "log_odds_ratio": -1.0607492923736572, "logits/chosen": -2.290717840194702, "logits/rejected": -2.272459030151367, "logps/chosen": -0.6781035661697388, "logps/rejected": -0.4276870787143707, "loss": 0.9333, "nll_loss": 0.9644565582275391, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1630 }, { "epoch": 0.9712762807225348, "grad_norm": 9.125, "learning_rate": 1.2308920976958348e-09, "log_odds_chosen": -0.29858607053756714, "log_odds_ratio": -0.9460258483886719, "logits/chosen": -2.269747734069824, "logits/rejected": -2.247730016708374, "logps/chosen": -0.6245580911636353, "logps/rejected": -0.45077449083328247, "loss": 0.906, "nll_loss": 0.9039252996444702, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1640 }, { "epoch": 0.9771986970684039, "grad_norm": 9.5625, "learning_rate": 7.716810807330276e-10, "log_odds_chosen": -0.4411376416683197, "log_odds_ratio": -1.0094521045684814, "logits/chosen": -2.2869138717651367, "logits/rejected": -2.24787974357605, "logps/chosen": -0.6290577054023743, "logps/rejected": -0.4051317572593689, "loss": 0.9304, "nll_loss": 0.9296571016311646, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1650 }, { "epoch": 0.983121113414273, "grad_norm": 10.125, "learning_rate": 4.190722484575804e-10, "log_odds_chosen": -0.3509382903575897, "log_odds_ratio": -0.9882933497428894, "logits/chosen": -2.2927916049957275, "logits/rejected": -2.262193202972412, "logps/chosen": -0.6660831570625305, "logps/rejected": -0.4522073268890381, "loss": 0.9544, "nll_loss": 0.9784467816352844, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1660 }, { "epoch": 0.9890435297601421, "grad_norm": 7.71875, "learning_rate": 1.732164218998522e-10, "log_odds_chosen": -0.35314035415649414, "log_odds_ratio": -0.9515780210494995, "logits/chosen": -2.265188694000244, "logits/rejected": -2.2218968868255615, "logps/chosen": -0.5966526865959167, "logps/rejected": -0.4354891777038574, "loss": 0.9111, "nll_loss": 0.9058610796928406, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1670 }, { "epoch": 0.9949659461060113, "grad_norm": 9.6875, "learning_rate": 3.4218760731730136e-11, "log_odds_chosen": -0.3034502863883972, "log_odds_ratio": -0.9285211563110352, "logits/chosen": -2.339616298675537, "logits/rejected": -2.2971951961517334, "logps/chosen": -0.5794862508773804, "logps/rejected": -0.43610063195228577, "loss": 0.9681, "nll_loss": 0.9744264483451843, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1680 }, { "epoch": 0.9997038791827065, "step": 1688, "total_flos": 0.0, "train_loss": 0.9736523162132191, "train_runtime": 25409.1611, "train_samples_per_second": 2.126, "train_steps_per_second": 0.066 } ], "logging_steps": 10, "max_steps": 1688, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }