|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 2907, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.498650312423706, |
|
"logits/rejected": -2.6091811656951904, |
|
"logps/chosen": -96.33251953125, |
|
"logps/rejected": -92.86735534667969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -3.101959228515625, |
|
"logits/rejected": -2.9548017978668213, |
|
"logps/chosen": -224.08120727539062, |
|
"logps/rejected": -199.21075439453125, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.005350981839001179, |
|
"rewards/margins": 0.0050353375263512135, |
|
"rewards/rejected": 0.000315645185764879, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.9904165267944336, |
|
"logits/rejected": -2.987039566040039, |
|
"logps/chosen": -278.6884765625, |
|
"logps/rejected": -236.29263305664062, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007055492140352726, |
|
"rewards/margins": 0.013753985986113548, |
|
"rewards/rejected": -0.006698492914438248, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -3.097510814666748, |
|
"logits/rejected": -3.1187407970428467, |
|
"logps/chosen": -298.73443603515625, |
|
"logps/rejected": -228.46926879882812, |
|
"loss": 0.6756, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.02428005076944828, |
|
"rewards/margins": 0.059538763016462326, |
|
"rewards/rejected": -0.035258710384368896, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -3.0823495388031006, |
|
"logits/rejected": -3.0725135803222656, |
|
"logps/chosen": -384.5050354003906, |
|
"logps/rejected": -236.9876708984375, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1019597053527832, |
|
"rewards/margins": 0.17022760212421417, |
|
"rewards/rejected": -0.06826789677143097, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.94450306892395, |
|
"logits/rejected": -2.8841047286987305, |
|
"logps/chosen": -278.482421875, |
|
"logps/rejected": -227.1036834716797, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.06523530930280685, |
|
"rewards/margins": 0.1872607171535492, |
|
"rewards/rejected": -0.12202541530132294, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.963169574737549, |
|
"logits/rejected": -2.883527994155884, |
|
"logps/chosen": -304.78021240234375, |
|
"logps/rejected": -283.13482666015625, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0056328559294342995, |
|
"rewards/margins": 0.2135467529296875, |
|
"rewards/rejected": -0.21917958557605743, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.9630908966064453, |
|
"logits/rejected": -2.825876235961914, |
|
"logps/chosen": -303.33544921875, |
|
"logps/rejected": -160.35809326171875, |
|
"loss": 0.5848, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.044426899403333664, |
|
"rewards/margins": 0.4234214723110199, |
|
"rewards/rejected": -0.3789946138858795, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -3.0682828426361084, |
|
"logits/rejected": -2.9316720962524414, |
|
"logps/chosen": -249.2676239013672, |
|
"logps/rejected": -224.609619140625, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.14790545403957367, |
|
"rewards/margins": 0.6019492149353027, |
|
"rewards/rejected": -0.4540437161922455, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -3.0987212657928467, |
|
"logits/rejected": -3.0108442306518555, |
|
"logps/chosen": -216.3074188232422, |
|
"logps/rejected": -206.27590942382812, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.19666776061058044, |
|
"rewards/margins": 1.0184780359268188, |
|
"rewards/rejected": -0.821810245513916, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -3.091062068939209, |
|
"logits/rejected": -2.9374005794525146, |
|
"logps/chosen": -314.8296203613281, |
|
"logps/rejected": -176.48611450195312, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.22487859427928925, |
|
"rewards/margins": 0.7823053598403931, |
|
"rewards/rejected": -0.5574267506599426, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -3.054979085922241, |
|
"eval_logits/rejected": -3.0135693550109863, |
|
"eval_logps/chosen": -254.21786499023438, |
|
"eval_logps/rejected": -204.1269073486328, |
|
"eval_loss": 0.5208388566970825, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": 0.05637993663549423, |
|
"eval_rewards/margins": 0.8084549307823181, |
|
"eval_rewards/rejected": -0.7520750761032104, |
|
"eval_runtime": 42.4363, |
|
"eval_samples_per_second": 47.129, |
|
"eval_steps_per_second": 0.377, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.8181204795837402, |
|
"logits/rejected": -2.7204036712646484, |
|
"logps/chosen": -312.5907897949219, |
|
"logps/rejected": -214.4507293701172, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1282820850610733, |
|
"rewards/margins": 0.89714515209198, |
|
"rewards/rejected": -1.025427222251892, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.9315900802612305, |
|
"logits/rejected": -2.943131923675537, |
|
"logps/chosen": -306.6047668457031, |
|
"logps/rejected": -288.87103271484375, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.17532110214233398, |
|
"rewards/margins": 0.4478148818016052, |
|
"rewards/rejected": -0.6231359839439392, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -3.0673885345458984, |
|
"logits/rejected": -2.946303606033325, |
|
"logps/chosen": -288.3648986816406, |
|
"logps/rejected": -202.46240234375, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.5023866891860962, |
|
"rewards/margins": 1.2662431001663208, |
|
"rewards/rejected": -0.7638564705848694, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.935978412628174, |
|
"logits/rejected": -2.9787638187408447, |
|
"logps/chosen": -225.0258331298828, |
|
"logps/rejected": -197.45791625976562, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.20422056317329407, |
|
"rewards/margins": 0.8866212964057922, |
|
"rewards/rejected": -0.6824007630348206, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.891925573348999, |
|
"logits/rejected": -3.011976480484009, |
|
"logps/chosen": -375.8110046386719, |
|
"logps/rejected": -204.3079376220703, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.01360023021697998, |
|
"rewards/margins": 0.7446385025978088, |
|
"rewards/rejected": -0.7582387924194336, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -3.1294617652893066, |
|
"logits/rejected": -3.121156692504883, |
|
"logps/chosen": -448.22021484375, |
|
"logps/rejected": -326.61553955078125, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09536926448345184, |
|
"rewards/margins": 1.3267143964767456, |
|
"rewards/rejected": -1.4220836162567139, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.906979560852051, |
|
"logits/rejected": -2.9241394996643066, |
|
"logps/chosen": -279.4745178222656, |
|
"logps/rejected": -243.3614501953125, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1525634229183197, |
|
"rewards/margins": 0.8320217132568359, |
|
"rewards/rejected": -0.984585165977478, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -3.15456485748291, |
|
"logits/rejected": -3.120079517364502, |
|
"logps/chosen": -283.2970886230469, |
|
"logps/rejected": -260.8895568847656, |
|
"loss": 0.4729, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04973051697015762, |
|
"rewards/margins": 0.9862112998962402, |
|
"rewards/rejected": -1.0359418392181396, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -3.1225669384002686, |
|
"logits/rejected": -3.066546678543091, |
|
"logps/chosen": -312.17938232421875, |
|
"logps/rejected": -226.7977752685547, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.060521602630615234, |
|
"rewards/margins": 1.1039137840270996, |
|
"rewards/rejected": -1.0433921813964844, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -3.0691170692443848, |
|
"logits/rejected": -3.0096538066864014, |
|
"logps/chosen": -269.39080810546875, |
|
"logps/rejected": -207.4473114013672, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.38108959794044495, |
|
"rewards/margins": 0.5996788740158081, |
|
"rewards/rejected": -0.9807685017585754, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -3.140662908554077, |
|
"eval_logits/rejected": -3.105587959289551, |
|
"eval_logps/chosen": -254.91360473632812, |
|
"eval_logps/rejected": -209.2888946533203, |
|
"eval_loss": 0.4881684482097626, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -0.013194993138313293, |
|
"eval_rewards/margins": 1.2550796270370483, |
|
"eval_rewards/rejected": -1.2682745456695557, |
|
"eval_runtime": 43.0581, |
|
"eval_samples_per_second": 46.449, |
|
"eval_steps_per_second": 0.372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -3.2085659503936768, |
|
"logits/rejected": -3.0949082374572754, |
|
"logps/chosen": -386.8721008300781, |
|
"logps/rejected": -247.3134307861328, |
|
"loss": 0.4477, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13939574360847473, |
|
"rewards/margins": 1.158414602279663, |
|
"rewards/rejected": -1.2978103160858154, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.9630062580108643, |
|
"logits/rejected": -2.9715027809143066, |
|
"logps/chosen": -173.3898468017578, |
|
"logps/rejected": -256.95513916015625, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14368362724781036, |
|
"rewards/margins": 0.8015314340591431, |
|
"rewards/rejected": -0.945215106010437, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -2.9683520793914795, |
|
"logits/rejected": -3.0452380180358887, |
|
"logps/chosen": -347.9017028808594, |
|
"logps/rejected": -276.4244079589844, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.010433333925902843, |
|
"rewards/margins": 1.247714877128601, |
|
"rewards/rejected": -1.2581483125686646, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -3.1391446590423584, |
|
"logits/rejected": -3.1013171672821045, |
|
"logps/chosen": -374.7413024902344, |
|
"logps/rejected": -270.0039367675781, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08338235318660736, |
|
"rewards/margins": 0.9544317126274109, |
|
"rewards/rejected": -1.0378139019012451, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -3.069174289703369, |
|
"logits/rejected": -2.962937831878662, |
|
"logps/chosen": -285.2818908691406, |
|
"logps/rejected": -281.72015380859375, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09429286420345306, |
|
"rewards/margins": 1.3244707584381104, |
|
"rewards/rejected": -1.4187636375427246, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -2.8843207359313965, |
|
"logits/rejected": -2.8770415782928467, |
|
"logps/chosen": -211.5966033935547, |
|
"logps/rejected": -242.7068328857422, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.43082481622695923, |
|
"rewards/margins": 1.3341903686523438, |
|
"rewards/rejected": -1.7650152444839478, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -3.0725252628326416, |
|
"logits/rejected": -2.915465831756592, |
|
"logps/chosen": -245.35208129882812, |
|
"logps/rejected": -256.63916015625, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.32778286933898926, |
|
"rewards/margins": 1.0782946348190308, |
|
"rewards/rejected": -1.40607750415802, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -3.064030885696411, |
|
"logits/rejected": -3.022933006286621, |
|
"logps/chosen": -342.7411804199219, |
|
"logps/rejected": -256.2796936035156, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2716239094734192, |
|
"rewards/margins": 1.2219887971878052, |
|
"rewards/rejected": -1.4936127662658691, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -3.0413384437561035, |
|
"logits/rejected": -2.969083070755005, |
|
"logps/chosen": -289.0894470214844, |
|
"logps/rejected": -221.3031463623047, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3141915202140808, |
|
"rewards/margins": 1.3370590209960938, |
|
"rewards/rejected": -1.6512506008148193, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982798165137615e-07, |
|
"logits/chosen": -2.8817899227142334, |
|
"logits/rejected": -2.924848794937134, |
|
"logps/chosen": -220.947021484375, |
|
"logps/rejected": -265.0274658203125, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5559160113334656, |
|
"rewards/margins": 0.9888324737548828, |
|
"rewards/rejected": -1.5447485446929932, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -3.13276743888855, |
|
"eval_logits/rejected": -3.0809109210968018, |
|
"eval_logps/chosen": -255.81625366210938, |
|
"eval_logps/rejected": -210.61862182617188, |
|
"eval_loss": 0.5038384795188904, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -0.10346230119466782, |
|
"eval_rewards/margins": 1.297783613204956, |
|
"eval_rewards/rejected": -1.4012458324432373, |
|
"eval_runtime": 42.7501, |
|
"eval_samples_per_second": 46.784, |
|
"eval_steps_per_second": 0.374, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963685015290519e-07, |
|
"logits/chosen": -3.099226951599121, |
|
"logits/rejected": -2.9154460430145264, |
|
"logps/chosen": -370.572998046875, |
|
"logps/rejected": -260.42279052734375, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08727478235960007, |
|
"rewards/margins": 1.1489875316619873, |
|
"rewards/rejected": -1.0617127418518066, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944571865443424e-07, |
|
"logits/chosen": -2.9195303916931152, |
|
"logits/rejected": -2.822550058364868, |
|
"logps/chosen": -222.0548553466797, |
|
"logps/rejected": -160.9757080078125, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.027837419882416725, |
|
"rewards/margins": 1.3940068483352661, |
|
"rewards/rejected": -1.421844244003296, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.92545871559633e-07, |
|
"logits/chosen": -3.008601188659668, |
|
"logits/rejected": -3.1163241863250732, |
|
"logps/chosen": -331.88385009765625, |
|
"logps/rejected": -305.1783142089844, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.007843529805541039, |
|
"rewards/margins": 0.9688261151313782, |
|
"rewards/rejected": -0.960982620716095, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906345565749235e-07, |
|
"logits/chosen": -2.98130464553833, |
|
"logits/rejected": -2.839319944381714, |
|
"logps/chosen": -245.6178741455078, |
|
"logps/rejected": -155.1374969482422, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3822270333766937, |
|
"rewards/margins": 0.9632396697998047, |
|
"rewards/rejected": -1.3454667329788208, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.88723241590214e-07, |
|
"logits/chosen": -2.945957899093628, |
|
"logits/rejected": -2.7072620391845703, |
|
"logps/chosen": -395.33709716796875, |
|
"logps/rejected": -254.1487274169922, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.023961830884218216, |
|
"rewards/margins": 1.7189300060272217, |
|
"rewards/rejected": -1.6949679851531982, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.868119266055046e-07, |
|
"logits/chosen": -2.8966143131256104, |
|
"logits/rejected": -2.9680020809173584, |
|
"logps/chosen": -226.3685760498047, |
|
"logps/rejected": -311.12890625, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.773408055305481, |
|
"rewards/margins": 1.1420116424560547, |
|
"rewards/rejected": -1.9154198169708252, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.849006116207951e-07, |
|
"logits/chosen": -3.101799964904785, |
|
"logits/rejected": -2.958263874053955, |
|
"logps/chosen": -321.9137878417969, |
|
"logps/rejected": -279.8211364746094, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.48245924711227417, |
|
"rewards/margins": 1.1618621349334717, |
|
"rewards/rejected": -1.6443214416503906, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.829892966360856e-07, |
|
"logits/chosen": -3.0152487754821777, |
|
"logits/rejected": -2.990954637527466, |
|
"logps/chosen": -284.4853820800781, |
|
"logps/rejected": -275.23974609375, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.22872860729694366, |
|
"rewards/margins": 1.912491798400879, |
|
"rewards/rejected": -1.6837631464004517, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810779816513762e-07, |
|
"logits/chosen": -3.0833258628845215, |
|
"logits/rejected": -3.007781982421875, |
|
"logps/chosen": -208.7422332763672, |
|
"logps/rejected": -203.84396362304688, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5863692760467529, |
|
"rewards/margins": 1.30623459815979, |
|
"rewards/rejected": -1.8926036357879639, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791666666666667e-07, |
|
"logits/chosen": -3.0465681552886963, |
|
"logits/rejected": -2.8939976692199707, |
|
"logps/chosen": -295.1468811035156, |
|
"logps/rejected": -208.3681182861328, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.09109281003475189, |
|
"rewards/margins": 1.9571037292480469, |
|
"rewards/rejected": -1.8660109043121338, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": -2.9643757343292236, |
|
"eval_logits/rejected": -2.9103996753692627, |
|
"eval_logps/chosen": -256.70428466796875, |
|
"eval_logps/rejected": -215.38914489746094, |
|
"eval_loss": 0.5154134631156921, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -0.19226306676864624, |
|
"eval_rewards/margins": 1.6860344409942627, |
|
"eval_rewards/rejected": -1.8782974481582642, |
|
"eval_runtime": 42.6041, |
|
"eval_samples_per_second": 46.944, |
|
"eval_steps_per_second": 0.376, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772553516819572e-07, |
|
"logits/chosen": -2.90427827835083, |
|
"logits/rejected": -2.846705675125122, |
|
"logps/chosen": -133.34495544433594, |
|
"logps/rejected": -192.99078369140625, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8221723437309265, |
|
"rewards/margins": 0.5950149297714233, |
|
"rewards/rejected": -1.417187213897705, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753440366972477e-07, |
|
"logits/chosen": -3.146049976348877, |
|
"logits/rejected": -3.0406510829925537, |
|
"logps/chosen": -287.80450439453125, |
|
"logps/rejected": -296.94378662109375, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.45591288805007935, |
|
"rewards/margins": 0.4107814431190491, |
|
"rewards/rejected": -0.8666942715644836, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7343272171253825e-07, |
|
"logits/chosen": -2.9290690422058105, |
|
"logits/rejected": -2.854919910430908, |
|
"logps/chosen": -266.37091064453125, |
|
"logps/rejected": -265.41650390625, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8489448428153992, |
|
"rewards/margins": 1.4326685667037964, |
|
"rewards/rejected": -2.28161358833313, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.715214067278288e-07, |
|
"logits/chosen": -2.983694076538086, |
|
"logits/rejected": -2.8835182189941406, |
|
"logps/chosen": -241.74203491210938, |
|
"logps/rejected": -232.90682983398438, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07152794301509857, |
|
"rewards/margins": 1.2356297969818115, |
|
"rewards/rejected": -1.3071579933166504, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.696100917431192e-07, |
|
"logits/chosen": -3.0933094024658203, |
|
"logits/rejected": -2.934081554412842, |
|
"logps/chosen": -261.3104553222656, |
|
"logps/rejected": -273.6822509765625, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0025750964414328337, |
|
"rewards/margins": 1.066935658454895, |
|
"rewards/rejected": -1.064360499382019, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6769877675840974e-07, |
|
"logits/chosen": -2.9390621185302734, |
|
"logits/rejected": -2.9396934509277344, |
|
"logps/chosen": -233.00534057617188, |
|
"logps/rejected": -216.82998657226562, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2104685753583908, |
|
"rewards/margins": 1.035353660583496, |
|
"rewards/rejected": -1.2458221912384033, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6578746177370027e-07, |
|
"logits/chosen": -3.032357692718506, |
|
"logits/rejected": -2.9270567893981934, |
|
"logps/chosen": -294.4527893066406, |
|
"logps/rejected": -158.98008728027344, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18893463909626007, |
|
"rewards/margins": 1.4661831855773926, |
|
"rewards/rejected": -1.6551177501678467, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.638761467889908e-07, |
|
"logits/chosen": -2.896667957305908, |
|
"logits/rejected": -2.9291975498199463, |
|
"logps/chosen": -211.07479858398438, |
|
"logps/rejected": -226.3457489013672, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.218103289604187, |
|
"rewards/margins": 1.827314019203186, |
|
"rewards/rejected": -2.045417308807373, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6196483180428133e-07, |
|
"logits/chosen": -2.929793119430542, |
|
"logits/rejected": -2.8522984981536865, |
|
"logps/chosen": -258.91400146484375, |
|
"logps/rejected": -175.8672637939453, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1329614520072937, |
|
"rewards/margins": 1.482403039932251, |
|
"rewards/rejected": -1.615364670753479, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.600535168195718e-07, |
|
"logits/chosen": -2.7829298973083496, |
|
"logits/rejected": -2.8831605911254883, |
|
"logps/chosen": -204.31448364257812, |
|
"logps/rejected": -230.2319793701172, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.32916760444641113, |
|
"rewards/margins": 0.9852824211120605, |
|
"rewards/rejected": -1.3144500255584717, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -3.0656890869140625, |
|
"eval_logits/rejected": -3.0061020851135254, |
|
"eval_logps/chosen": -254.57415771484375, |
|
"eval_logps/rejected": -213.1681671142578, |
|
"eval_loss": 0.49787724018096924, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": 0.02074863389134407, |
|
"eval_rewards/margins": 1.6769486665725708, |
|
"eval_rewards/rejected": -1.6561999320983887, |
|
"eval_runtime": 42.8458, |
|
"eval_samples_per_second": 46.679, |
|
"eval_steps_per_second": 0.373, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5814220183486234e-07, |
|
"logits/chosen": -2.8923087120056152, |
|
"logits/rejected": -2.8640570640563965, |
|
"logps/chosen": -368.39154052734375, |
|
"logps/rejected": -338.0500793457031, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.49800175428390503, |
|
"rewards/margins": 1.331751823425293, |
|
"rewards/rejected": -1.8297535181045532, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.562308868501529e-07, |
|
"logits/chosen": -2.9221696853637695, |
|
"logits/rejected": -2.7648494243621826, |
|
"logps/chosen": -258.91290283203125, |
|
"logps/rejected": -301.06298828125, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4446481168270111, |
|
"rewards/margins": 1.80561101436615, |
|
"rewards/rejected": -2.2502589225769043, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.543195718654434e-07, |
|
"logits/chosen": -2.8445491790771484, |
|
"logits/rejected": -2.7915587425231934, |
|
"logps/chosen": -322.16131591796875, |
|
"logps/rejected": -219.9912567138672, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5867956876754761, |
|
"rewards/margins": 0.7803589105606079, |
|
"rewards/rejected": -1.367154598236084, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5240825688073394e-07, |
|
"logits/chosen": -3.095763683319092, |
|
"logits/rejected": -2.850975513458252, |
|
"logps/chosen": -260.27935791015625, |
|
"logps/rejected": -244.0723419189453, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.33804479241371155, |
|
"rewards/margins": 1.2927238941192627, |
|
"rewards/rejected": -1.6307685375213623, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.504969418960244e-07, |
|
"logits/chosen": -3.1108663082122803, |
|
"logits/rejected": -2.967893123626709, |
|
"logps/chosen": -323.0700378417969, |
|
"logps/rejected": -306.6070251464844, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5680254101753235, |
|
"rewards/margins": 1.1552374362945557, |
|
"rewards/rejected": -1.7232627868652344, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4858562691131495e-07, |
|
"logits/chosen": -3.050872325897217, |
|
"logits/rejected": -2.9496700763702393, |
|
"logps/chosen": -264.33990478515625, |
|
"logps/rejected": -256.26849365234375, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7357984781265259, |
|
"rewards/margins": 0.9639188647270203, |
|
"rewards/rejected": -1.6997172832489014, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.466743119266055e-07, |
|
"logits/chosen": -2.926020383834839, |
|
"logits/rejected": -2.8725242614746094, |
|
"logps/chosen": -343.10809326171875, |
|
"logps/rejected": -339.7952880859375, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.068613052368164, |
|
"rewards/margins": 0.6752510070800781, |
|
"rewards/rejected": -1.743863821029663, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.44762996941896e-07, |
|
"logits/chosen": -2.9404196739196777, |
|
"logits/rejected": -2.867061138153076, |
|
"logps/chosen": -307.84832763671875, |
|
"logps/rejected": -239.6619110107422, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06973789632320404, |
|
"rewards/margins": 2.0842127799987793, |
|
"rewards/rejected": -2.1539506912231445, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4285168195718655e-07, |
|
"logits/chosen": -2.954078435897827, |
|
"logits/rejected": -2.972050189971924, |
|
"logps/chosen": -240.4013214111328, |
|
"logps/rejected": -257.27490234375, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3049514889717102, |
|
"rewards/margins": 1.0277297496795654, |
|
"rewards/rejected": -1.33268141746521, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.40940366972477e-07, |
|
"logits/chosen": -3.0263705253601074, |
|
"logits/rejected": -2.9350860118865967, |
|
"logps/chosen": -196.01318359375, |
|
"logps/rejected": -212.79452514648438, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.42911848425865173, |
|
"rewards/margins": 0.917349636554718, |
|
"rewards/rejected": -1.346468210220337, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -3.016953468322754, |
|
"eval_logits/rejected": -2.93742299079895, |
|
"eval_logps/chosen": -255.72560119628906, |
|
"eval_logps/rejected": -212.45274353027344, |
|
"eval_loss": 0.4907330274581909, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -0.09439453482627869, |
|
"eval_rewards/margins": 1.4902656078338623, |
|
"eval_rewards/rejected": -1.584660291671753, |
|
"eval_runtime": 42.0993, |
|
"eval_samples_per_second": 47.507, |
|
"eval_steps_per_second": 0.38, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3902905198776756e-07, |
|
"logits/chosen": -3.165318250656128, |
|
"logits/rejected": -2.9908506870269775, |
|
"logps/chosen": -407.2035217285156, |
|
"logps/rejected": -344.78912353515625, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.24180400371551514, |
|
"rewards/margins": 1.6740745306015015, |
|
"rewards/rejected": -1.9158786535263062, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.371177370030581e-07, |
|
"logits/chosen": -3.012429714202881, |
|
"logits/rejected": -2.929919958114624, |
|
"logps/chosen": -232.5025177001953, |
|
"logps/rejected": -212.27389526367188, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3227713704109192, |
|
"rewards/margins": 1.730791687965393, |
|
"rewards/rejected": -2.053563117980957, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.352064220183486e-07, |
|
"logits/chosen": -2.7762598991394043, |
|
"logits/rejected": -2.8393187522888184, |
|
"logps/chosen": -231.7938690185547, |
|
"logps/rejected": -236.4832763671875, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.46596893668174744, |
|
"rewards/margins": 1.896152138710022, |
|
"rewards/rejected": -2.362121343612671, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3329510703363915e-07, |
|
"logits/chosen": -3.0272417068481445, |
|
"logits/rejected": -2.8095943927764893, |
|
"logps/chosen": -315.4911804199219, |
|
"logps/rejected": -260.47344970703125, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6075493097305298, |
|
"rewards/margins": 1.673418402671814, |
|
"rewards/rejected": -2.2809677124023438, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313837920489297e-07, |
|
"logits/chosen": -2.8686397075653076, |
|
"logits/rejected": -2.8369593620300293, |
|
"logps/chosen": -298.10809326171875, |
|
"logps/rejected": -342.1612243652344, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6422223448753357, |
|
"rewards/margins": 1.362321138381958, |
|
"rewards/rejected": -2.0045435428619385, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2947247706422016e-07, |
|
"logits/chosen": -2.868638753890991, |
|
"logits/rejected": -2.918509006500244, |
|
"logps/chosen": -231.58792114257812, |
|
"logps/rejected": -275.41357421875, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6644395589828491, |
|
"rewards/margins": 1.1484898328781128, |
|
"rewards/rejected": -1.8129297494888306, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.275611620795107e-07, |
|
"logits/chosen": -2.945974349975586, |
|
"logits/rejected": -2.932396411895752, |
|
"logps/chosen": -334.83758544921875, |
|
"logps/rejected": -211.2024688720703, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9673671722412109, |
|
"rewards/margins": 1.478623628616333, |
|
"rewards/rejected": -2.445990562438965, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2564984709480123e-07, |
|
"logits/chosen": -2.999311923980713, |
|
"logits/rejected": -2.986499309539795, |
|
"logps/chosen": -356.9630432128906, |
|
"logps/rejected": -250.017822265625, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9130574464797974, |
|
"rewards/margins": 1.1842575073242188, |
|
"rewards/rejected": -2.0973148345947266, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2373853211009176e-07, |
|
"logits/chosen": -2.953401803970337, |
|
"logits/rejected": -2.949079990386963, |
|
"logps/chosen": -329.53131103515625, |
|
"logps/rejected": -244.694091796875, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6074694395065308, |
|
"rewards/margins": 1.2550690174102783, |
|
"rewards/rejected": -1.8625385761260986, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2182721712538224e-07, |
|
"logits/chosen": -2.9399592876434326, |
|
"logits/rejected": -2.97617506980896, |
|
"logps/chosen": -245.843505859375, |
|
"logps/rejected": -201.2354736328125, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6896957755088806, |
|
"rewards/margins": 0.7539043426513672, |
|
"rewards/rejected": -1.4435999393463135, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -3.0127830505371094, |
|
"eval_logits/rejected": -2.947491407394409, |
|
"eval_logps/chosen": -259.0303649902344, |
|
"eval_logps/rejected": -213.84410095214844, |
|
"eval_loss": 0.4927924573421478, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -0.42487117648124695, |
|
"eval_rewards/margins": 1.2989236116409302, |
|
"eval_rewards/rejected": -1.7237945795059204, |
|
"eval_runtime": 42.9706, |
|
"eval_samples_per_second": 46.543, |
|
"eval_steps_per_second": 0.372, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.199159021406727e-07, |
|
"logits/chosen": -2.9642043113708496, |
|
"logits/rejected": -2.843035936355591, |
|
"logps/chosen": -224.43093872070312, |
|
"logps/rejected": -250.59646606445312, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9030290842056274, |
|
"rewards/margins": 0.5707172155380249, |
|
"rewards/rejected": -1.4737461805343628, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1800458715596325e-07, |
|
"logits/chosen": -2.856870174407959, |
|
"logits/rejected": -2.928572654724121, |
|
"logps/chosen": -238.6852569580078, |
|
"logps/rejected": -312.410400390625, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6243971586227417, |
|
"rewards/margins": 1.3136012554168701, |
|
"rewards/rejected": -1.9379985332489014, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.160932721712538e-07, |
|
"logits/chosen": -2.917513608932495, |
|
"logits/rejected": -2.7057008743286133, |
|
"logps/chosen": -279.5296630859375, |
|
"logps/rejected": -293.60882568359375, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.43406224250793457, |
|
"rewards/margins": 1.3185964822769165, |
|
"rewards/rejected": -1.752658486366272, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.141819571865443e-07, |
|
"logits/chosen": -2.76603364944458, |
|
"logits/rejected": -2.7033543586730957, |
|
"logps/chosen": -257.63519287109375, |
|
"logps/rejected": -247.8678436279297, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7212907075881958, |
|
"rewards/margins": 1.1663872003555298, |
|
"rewards/rejected": -1.887677788734436, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1227064220183485e-07, |
|
"logits/chosen": -2.826500654220581, |
|
"logits/rejected": -2.8519866466522217, |
|
"logps/chosen": -221.1510009765625, |
|
"logps/rejected": -242.4302215576172, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5560463666915894, |
|
"rewards/margins": 1.6448535919189453, |
|
"rewards/rejected": -2.200899839401245, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.103593272171253e-07, |
|
"logits/chosen": -2.749040126800537, |
|
"logits/rejected": -2.7938055992126465, |
|
"logps/chosen": -357.89544677734375, |
|
"logps/rejected": -372.37030029296875, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.965776801109314, |
|
"rewards/margins": 0.8319292068481445, |
|
"rewards/rejected": -1.7977060079574585, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0844801223241586e-07, |
|
"logits/chosen": -2.931018829345703, |
|
"logits/rejected": -2.884331703186035, |
|
"logps/chosen": -241.74319458007812, |
|
"logps/rejected": -216.0192108154297, |
|
"loss": 0.4671, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7199130058288574, |
|
"rewards/margins": 0.7224114537239075, |
|
"rewards/rejected": -1.4423243999481201, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.065366972477064e-07, |
|
"logits/chosen": -2.8741250038146973, |
|
"logits/rejected": -2.789846658706665, |
|
"logps/chosen": -280.8083801269531, |
|
"logps/rejected": -227.48275756835938, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22569909691810608, |
|
"rewards/margins": 1.9187443256378174, |
|
"rewards/rejected": -2.1444432735443115, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.046253822629969e-07, |
|
"logits/chosen": -2.7253146171569824, |
|
"logits/rejected": -2.700835704803467, |
|
"logps/chosen": -168.32371520996094, |
|
"logps/rejected": -203.12583923339844, |
|
"loss": 0.4728, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5355602502822876, |
|
"rewards/margins": 0.9603021740913391, |
|
"rewards/rejected": -1.495862364768982, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0271406727828745e-07, |
|
"logits/chosen": -2.994239091873169, |
|
"logits/rejected": -2.8887484073638916, |
|
"logps/chosen": -285.24359130859375, |
|
"logps/rejected": -230.2656707763672, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.39388689398765564, |
|
"rewards/margins": 1.406872034072876, |
|
"rewards/rejected": -1.800758957862854, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -2.9182941913604736, |
|
"eval_logits/rejected": -2.8454627990722656, |
|
"eval_logps/chosen": -256.34844970703125, |
|
"eval_logps/rejected": -215.71998596191406, |
|
"eval_loss": 0.4767328202724457, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -0.15668097138404846, |
|
"eval_rewards/margins": 1.754701018333435, |
|
"eval_rewards/rejected": -1.9113819599151611, |
|
"eval_runtime": 42.7022, |
|
"eval_samples_per_second": 46.836, |
|
"eval_steps_per_second": 0.375, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.00802752293578e-07, |
|
"logits/chosen": -2.85206937789917, |
|
"logits/rejected": -2.8010294437408447, |
|
"logps/chosen": -251.4945068359375, |
|
"logps/rejected": -256.6982421875, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5882171392440796, |
|
"rewards/margins": 1.6525434255599976, |
|
"rewards/rejected": -2.240760564804077, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9889143730886847e-07, |
|
"logits/chosen": -2.8327362537384033, |
|
"logits/rejected": -2.714212656021118, |
|
"logps/chosen": -290.8333740234375, |
|
"logps/rejected": -215.6833953857422, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06578992307186127, |
|
"rewards/margins": 2.0252296924591064, |
|
"rewards/rejected": -2.09101939201355, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.96980122324159e-07, |
|
"logits/chosen": -2.7779641151428223, |
|
"logits/rejected": -2.794778347015381, |
|
"logps/chosen": -239.24026489257812, |
|
"logps/rejected": -245.0064697265625, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.22896571457386017, |
|
"rewards/margins": 1.9688221216201782, |
|
"rewards/rejected": -2.1977877616882324, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9506880733944953e-07, |
|
"logits/chosen": -2.826106548309326, |
|
"logits/rejected": -2.7670514583587646, |
|
"logps/chosen": -257.3904113769531, |
|
"logps/rejected": -216.4410858154297, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7895628213882446, |
|
"rewards/margins": 1.1692781448364258, |
|
"rewards/rejected": -1.9588409662246704, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9315749235474006e-07, |
|
"logits/chosen": -2.93815016746521, |
|
"logits/rejected": -2.9787683486938477, |
|
"logps/chosen": -214.74887084960938, |
|
"logps/rejected": -249.5219268798828, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9767111539840698, |
|
"rewards/margins": 0.8558231592178345, |
|
"rewards/rejected": -1.8325341939926147, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.912461773700306e-07, |
|
"logits/chosen": -2.8489022254943848, |
|
"logits/rejected": -2.7496302127838135, |
|
"logps/chosen": -400.4466857910156, |
|
"logps/rejected": -282.9391174316406, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4085620045661926, |
|
"rewards/margins": 1.7454227209091187, |
|
"rewards/rejected": -2.153984785079956, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8933486238532107e-07, |
|
"logits/chosen": -2.911019802093506, |
|
"logits/rejected": -2.745748996734619, |
|
"logps/chosen": -374.48126220703125, |
|
"logps/rejected": -262.03973388671875, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.41163554787635803, |
|
"rewards/margins": 1.7663021087646484, |
|
"rewards/rejected": -2.1779379844665527, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.874235474006116e-07, |
|
"logits/chosen": -2.762225389480591, |
|
"logits/rejected": -2.7744991779327393, |
|
"logps/chosen": -216.38070678710938, |
|
"logps/rejected": -269.47991943359375, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5625228881835938, |
|
"rewards/margins": 1.6768239736557007, |
|
"rewards/rejected": -2.239346981048584, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8551223241590214e-07, |
|
"logits/chosen": -2.912327766418457, |
|
"logits/rejected": -2.978773593902588, |
|
"logps/chosen": -230.28842163085938, |
|
"logps/rejected": -241.50076293945312, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5826612710952759, |
|
"rewards/margins": 2.0513291358947754, |
|
"rewards/rejected": -2.6339900493621826, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.8360091743119267e-07, |
|
"logits/chosen": -2.8714957237243652, |
|
"logits/rejected": -2.774735689163208, |
|
"logps/chosen": -286.7504577636719, |
|
"logps/rejected": -263.4833984375, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15738129615783691, |
|
"rewards/margins": 1.1751208305358887, |
|
"rewards/rejected": -1.3325022459030151, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/chosen": -2.909325361251831, |
|
"eval_logits/rejected": -2.8294677734375, |
|
"eval_logps/chosen": -255.66744995117188, |
|
"eval_logps/rejected": -213.5056915283203, |
|
"eval_loss": 0.48539167642593384, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.08858150988817215, |
|
"eval_rewards/margins": 1.6013730764389038, |
|
"eval_rewards/rejected": -1.6899546384811401, |
|
"eval_runtime": 42.9119, |
|
"eval_samples_per_second": 46.607, |
|
"eval_steps_per_second": 0.373, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.816896024464832e-07, |
|
"logits/chosen": -2.864403247833252, |
|
"logits/rejected": -2.858276128768921, |
|
"logps/chosen": -178.23683166503906, |
|
"logps/rejected": -204.80947875976562, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1971687376499176, |
|
"rewards/margins": 1.8712667226791382, |
|
"rewards/rejected": -2.0684351921081543, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797782874617737e-07, |
|
"logits/chosen": -2.890958070755005, |
|
"logits/rejected": -2.8029327392578125, |
|
"logps/chosen": -315.40106201171875, |
|
"logps/rejected": -253.34005737304688, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.26785898208618164, |
|
"rewards/margins": 1.1954081058502197, |
|
"rewards/rejected": -1.463267207145691, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.778669724770642e-07, |
|
"logits/chosen": -2.864638090133667, |
|
"logits/rejected": -2.8815560340881348, |
|
"logps/chosen": -327.3506774902344, |
|
"logps/rejected": -269.56695556640625, |
|
"loss": 0.4877, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4065053462982178, |
|
"rewards/margins": 1.4708530902862549, |
|
"rewards/rejected": -1.8773584365844727, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7595565749235474e-07, |
|
"logits/chosen": -2.8528895378112793, |
|
"logits/rejected": -2.884323835372925, |
|
"logps/chosen": -277.66949462890625, |
|
"logps/rejected": -217.2276153564453, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3980627954006195, |
|
"rewards/margins": 1.872488260269165, |
|
"rewards/rejected": -2.2705509662628174, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.740443425076452e-07, |
|
"logits/chosen": -2.8920092582702637, |
|
"logits/rejected": -2.8774821758270264, |
|
"logps/chosen": -274.6985778808594, |
|
"logps/rejected": -294.6683654785156, |
|
"loss": 0.4798, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3751869201660156, |
|
"rewards/margins": 1.2973663806915283, |
|
"rewards/rejected": -1.6725534200668335, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7213302752293575e-07, |
|
"logits/chosen": -2.9467930793762207, |
|
"logits/rejected": -2.8368477821350098, |
|
"logps/chosen": -275.72119140625, |
|
"logps/rejected": -213.9237060546875, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.3335360884666443, |
|
"rewards/margins": 0.5510531663894653, |
|
"rewards/rejected": -0.8845891952514648, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.702217125382263e-07, |
|
"logits/chosen": -2.8036694526672363, |
|
"logits/rejected": -2.8077778816223145, |
|
"logps/chosen": -280.42120361328125, |
|
"logps/rejected": -295.61383056640625, |
|
"loss": 0.398, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2956100106239319, |
|
"rewards/margins": 1.6340839862823486, |
|
"rewards/rejected": -1.9296939373016357, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6831039755351677e-07, |
|
"logits/chosen": -2.758172035217285, |
|
"logits/rejected": -2.8651859760284424, |
|
"logps/chosen": -231.38320922851562, |
|
"logps/rejected": -276.5523986816406, |
|
"loss": 0.088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8953015208244324, |
|
"rewards/margins": 4.328494071960449, |
|
"rewards/rejected": -3.433192729949951, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.663990825688073e-07, |
|
"logits/chosen": -2.7469801902770996, |
|
"logits/rejected": -2.6729702949523926, |
|
"logps/chosen": -243.54397583007812, |
|
"logps/rejected": -240.7235107421875, |
|
"loss": 0.1085, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.0088516473770142, |
|
"rewards/margins": 5.048202991485596, |
|
"rewards/rejected": -4.039350986480713, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6448776758409783e-07, |
|
"logits/chosen": -2.770702600479126, |
|
"logits/rejected": -2.7252144813537598, |
|
"logps/chosen": -246.9398956298828, |
|
"logps/rejected": -216.59854125976562, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3230278789997101, |
|
"rewards/margins": 3.6204140186309814, |
|
"rewards/rejected": -3.2973856925964355, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/chosen": -2.8436782360076904, |
|
"eval_logits/rejected": -2.757986545562744, |
|
"eval_logps/chosen": -259.6300048828125, |
|
"eval_logps/rejected": -221.89271545410156, |
|
"eval_loss": 0.49383220076560974, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -0.4848347306251526, |
|
"eval_rewards/margins": 2.0438199043273926, |
|
"eval_rewards/rejected": -2.5286545753479004, |
|
"eval_runtime": 43.0225, |
|
"eval_samples_per_second": 46.487, |
|
"eval_steps_per_second": 0.372, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6257645259938836e-07, |
|
"logits/chosen": -2.832165002822876, |
|
"logits/rejected": -2.765643835067749, |
|
"logps/chosen": -281.77423095703125, |
|
"logps/rejected": -261.037841796875, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.658333957195282, |
|
"rewards/margins": 5.224586009979248, |
|
"rewards/rejected": -4.5662522315979, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.606651376146789e-07, |
|
"logits/chosen": -2.770781993865967, |
|
"logits/rejected": -2.5953280925750732, |
|
"logps/chosen": -224.37307739257812, |
|
"logps/rejected": -231.11038208007812, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.5244539380073547, |
|
"rewards/margins": 4.876093864440918, |
|
"rewards/rejected": -4.351639747619629, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5875382262996937e-07, |
|
"logits/chosen": -2.531419038772583, |
|
"logits/rejected": -2.5288774967193604, |
|
"logps/chosen": -202.06593322753906, |
|
"logps/rejected": -248.90481567382812, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.05567428469657898, |
|
"rewards/margins": 4.294209957122803, |
|
"rewards/rejected": -4.349884033203125, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.568425076452599e-07, |
|
"logits/chosen": -2.777543783187866, |
|
"logits/rejected": -2.808948040008545, |
|
"logps/chosen": -216.7978973388672, |
|
"logps/rejected": -242.88247680664062, |
|
"loss": 0.0825, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09309720993041992, |
|
"rewards/margins": 4.671463966369629, |
|
"rewards/rejected": -4.578366279602051, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5493119266055044e-07, |
|
"logits/chosen": -2.6939737796783447, |
|
"logits/rejected": -2.6256613731384277, |
|
"logps/chosen": -260.4227294921875, |
|
"logps/rejected": -225.3296356201172, |
|
"loss": 0.093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5441755056381226, |
|
"rewards/margins": 5.277462482452393, |
|
"rewards/rejected": -4.7332868576049805, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5301987767584097e-07, |
|
"logits/chosen": -3.0205957889556885, |
|
"logits/rejected": -2.9231088161468506, |
|
"logps/chosen": -247.9834442138672, |
|
"logps/rejected": -340.5820007324219, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.32120850682258606, |
|
"rewards/margins": 4.978722095489502, |
|
"rewards/rejected": -4.657513618469238, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.511085626911315e-07, |
|
"logits/chosen": -2.9255290031433105, |
|
"logits/rejected": -2.868518114089966, |
|
"logps/chosen": -375.0356140136719, |
|
"logps/rejected": -345.5308837890625, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.7949947118759155, |
|
"rewards/margins": 6.140542030334473, |
|
"rewards/rejected": -5.345546722412109, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.49197247706422e-07, |
|
"logits/chosen": -2.9496195316314697, |
|
"logits/rejected": -2.848146438598633, |
|
"logps/chosen": -289.2176208496094, |
|
"logps/rejected": -232.7750701904297, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6688757538795471, |
|
"rewards/margins": 5.5320024490356445, |
|
"rewards/rejected": -4.8631272315979, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.472859327217125e-07, |
|
"logits/chosen": -2.7657835483551025, |
|
"logits/rejected": -2.740086555480957, |
|
"logps/chosen": -369.2336120605469, |
|
"logps/rejected": -343.9190979003906, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.10366223007440567, |
|
"rewards/margins": 3.979884624481201, |
|
"rewards/rejected": -3.876222610473633, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4537461773700304e-07, |
|
"logits/chosen": -2.8481802940368652, |
|
"logits/rejected": -2.714881181716919, |
|
"logps/chosen": -180.77984619140625, |
|
"logps/rejected": -169.2972412109375, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.22425577044487, |
|
"rewards/margins": 4.330163955688477, |
|
"rewards/rejected": -4.55441951751709, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_logits/chosen": -2.8857600688934326, |
|
"eval_logits/rejected": -2.803550958633423, |
|
"eval_logps/chosen": -265.58172607421875, |
|
"eval_logps/rejected": -229.02471923828125, |
|
"eval_loss": 0.5071350336074829, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -1.0800076723098755, |
|
"eval_rewards/margins": 2.1618502140045166, |
|
"eval_rewards/rejected": -3.2418575286865234, |
|
"eval_runtime": 43.4425, |
|
"eval_samples_per_second": 46.038, |
|
"eval_steps_per_second": 0.368, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.434633027522936e-07, |
|
"logits/chosen": -2.6898512840270996, |
|
"logits/rejected": -2.666327953338623, |
|
"logps/chosen": -280.2970275878906, |
|
"logps/rejected": -263.78839111328125, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.7688434720039368, |
|
"rewards/margins": 5.902011394500732, |
|
"rewards/rejected": -5.1331682205200195, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.415519877675841e-07, |
|
"logits/chosen": -2.860400915145874, |
|
"logits/rejected": -2.8594813346862793, |
|
"logps/chosen": -216.53561401367188, |
|
"logps/rejected": -322.01275634765625, |
|
"loss": 0.085, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3255351185798645, |
|
"rewards/margins": 4.399062633514404, |
|
"rewards/rejected": -4.724597930908203, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3964067278287464e-07, |
|
"logits/chosen": -2.682889461517334, |
|
"logits/rejected": -2.781895399093628, |
|
"logps/chosen": -315.12042236328125, |
|
"logps/rejected": -330.3970947265625, |
|
"loss": 0.102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.041519224643707275, |
|
"rewards/margins": 5.329067230224609, |
|
"rewards/rejected": -5.370586395263672, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.377293577981651e-07, |
|
"logits/chosen": -2.815574884414673, |
|
"logits/rejected": -2.6275439262390137, |
|
"logps/chosen": -235.42666625976562, |
|
"logps/rejected": -239.9461669921875, |
|
"loss": 0.0884, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18244703114032745, |
|
"rewards/margins": 4.990113258361816, |
|
"rewards/rejected": -5.172560691833496, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3581804281345565e-07, |
|
"logits/chosen": -3.0224831104278564, |
|
"logits/rejected": -2.7311410903930664, |
|
"logps/chosen": -346.58941650390625, |
|
"logps/rejected": -287.28424072265625, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4470561146736145, |
|
"rewards/margins": 5.634953022003174, |
|
"rewards/rejected": -5.187896728515625, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.339067278287462e-07, |
|
"logits/chosen": -2.6187033653259277, |
|
"logits/rejected": -2.674354076385498, |
|
"logps/chosen": -232.01919555664062, |
|
"logps/rejected": -265.0806884765625, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4214044511318207, |
|
"rewards/margins": 4.962105751037598, |
|
"rewards/rejected": -4.540700912475586, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.319954128440367e-07, |
|
"logits/chosen": -2.810262680053711, |
|
"logits/rejected": -2.6492209434509277, |
|
"logps/chosen": -365.208251953125, |
|
"logps/rejected": -247.2949676513672, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2072467803955078, |
|
"rewards/margins": 5.322917938232422, |
|
"rewards/rejected": -5.530165195465088, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3008409785932725e-07, |
|
"logits/chosen": -2.717909097671509, |
|
"logits/rejected": -2.684145450592041, |
|
"logps/chosen": -186.22238159179688, |
|
"logps/rejected": -240.98068237304688, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.022188162431120872, |
|
"rewards/margins": 5.249354362487793, |
|
"rewards/rejected": -5.227167129516602, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2817278287461773e-07, |
|
"logits/chosen": -2.7536282539367676, |
|
"logits/rejected": -2.809913158416748, |
|
"logps/chosen": -212.02871704101562, |
|
"logps/rejected": -356.77178955078125, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17943891882896423, |
|
"rewards/margins": 5.2806596755981445, |
|
"rewards/rejected": -5.460098743438721, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.262614678899082e-07, |
|
"logits/chosen": -2.661210536956787, |
|
"logits/rejected": -2.663743495941162, |
|
"logps/chosen": -237.6588134765625, |
|
"logps/rejected": -316.5496520996094, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.43801799416542053, |
|
"rewards/margins": 5.53380012512207, |
|
"rewards/rejected": -5.9718170166015625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_logits/chosen": -2.8707656860351562, |
|
"eval_logits/rejected": -2.7961297035217285, |
|
"eval_logps/chosen": -264.4635009765625, |
|
"eval_logps/rejected": -230.69354248046875, |
|
"eval_loss": 0.5158514380455017, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -0.9681856036186218, |
|
"eval_rewards/margins": 2.4405524730682373, |
|
"eval_rewards/rejected": -3.408737897872925, |
|
"eval_runtime": 42.8623, |
|
"eval_samples_per_second": 46.661, |
|
"eval_steps_per_second": 0.373, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2435015290519874e-07, |
|
"logits/chosen": -2.7133708000183105, |
|
"logits/rejected": -2.6246652603149414, |
|
"logps/chosen": -197.2749481201172, |
|
"logps/rejected": -306.0080871582031, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03982555866241455, |
|
"rewards/margins": 5.2729597091674805, |
|
"rewards/rejected": -5.312786102294922, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2243883792048927e-07, |
|
"logits/chosen": -2.7416293621063232, |
|
"logits/rejected": -2.7878434658050537, |
|
"logps/chosen": -231.7838592529297, |
|
"logps/rejected": -307.630615234375, |
|
"loss": 0.0939, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4765847623348236, |
|
"rewards/margins": 6.021305561065674, |
|
"rewards/rejected": -5.544720649719238, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.205275229357798e-07, |
|
"logits/chosen": -2.8276381492614746, |
|
"logits/rejected": -2.740967035293579, |
|
"logps/chosen": -268.0155334472656, |
|
"logps/rejected": -276.89300537109375, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.23615550994873047, |
|
"rewards/margins": 5.408732891082764, |
|
"rewards/rejected": -5.172577857971191, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.186162079510703e-07, |
|
"logits/chosen": -2.8147940635681152, |
|
"logits/rejected": -2.8905491828918457, |
|
"logps/chosen": -262.6653747558594, |
|
"logps/rejected": -325.59783935546875, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3194983899593353, |
|
"rewards/margins": 5.201966285705566, |
|
"rewards/rejected": -5.521464824676514, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.167048929663608e-07, |
|
"logits/chosen": -2.7950730323791504, |
|
"logits/rejected": -2.7414755821228027, |
|
"logps/chosen": -225.0370635986328, |
|
"logps/rejected": -268.7679138183594, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6976510286331177, |
|
"rewards/margins": 4.719433784484863, |
|
"rewards/rejected": -5.417084693908691, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1479357798165134e-07, |
|
"logits/chosen": -2.813150405883789, |
|
"logits/rejected": -2.7929835319519043, |
|
"logps/chosen": -238.1572265625, |
|
"logps/rejected": -248.3897705078125, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08550971001386642, |
|
"rewards/margins": 3.972590684890747, |
|
"rewards/rejected": -4.05810022354126, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.128822629969419e-07, |
|
"logits/chosen": -2.989551067352295, |
|
"logits/rejected": -2.859724998474121, |
|
"logps/chosen": -285.981201171875, |
|
"logps/rejected": -308.9172058105469, |
|
"loss": 0.0952, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4108548164367676, |
|
"rewards/margins": 5.075130462646484, |
|
"rewards/rejected": -5.485985279083252, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.109709480122324e-07, |
|
"logits/chosen": -2.619586706161499, |
|
"logits/rejected": -2.7320122718811035, |
|
"logps/chosen": -277.3897399902344, |
|
"logps/rejected": -280.6328430175781, |
|
"loss": 0.0948, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4828956127166748, |
|
"rewards/margins": 7.261415958404541, |
|
"rewards/rejected": -5.778519630432129, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0905963302752294e-07, |
|
"logits/chosen": -2.643578052520752, |
|
"logits/rejected": -2.671351671218872, |
|
"logps/chosen": -221.6022186279297, |
|
"logps/rejected": -271.2128601074219, |
|
"loss": 0.0954, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.4258726239204407, |
|
"rewards/margins": 5.396482467651367, |
|
"rewards/rejected": -5.822355270385742, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.071483180428134e-07, |
|
"logits/chosen": -2.925110340118408, |
|
"logits/rejected": -2.775982618331909, |
|
"logps/chosen": -396.8591003417969, |
|
"logps/rejected": -287.55902099609375, |
|
"loss": 0.0916, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.8476167917251587, |
|
"rewards/margins": 5.994976997375488, |
|
"rewards/rejected": -5.147359371185303, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_logits/chosen": -2.8753504753112793, |
|
"eval_logits/rejected": -2.801880359649658, |
|
"eval_logps/chosen": -265.613525390625, |
|
"eval_logps/rejected": -232.14105224609375, |
|
"eval_loss": 0.5221606492996216, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -1.0831860303878784, |
|
"eval_rewards/margins": 2.4703056812286377, |
|
"eval_rewards/rejected": -3.5534915924072266, |
|
"eval_runtime": 42.8737, |
|
"eval_samples_per_second": 46.649, |
|
"eval_steps_per_second": 0.373, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0523700305810395e-07, |
|
"logits/chosen": -2.9040591716766357, |
|
"logits/rejected": -2.857766628265381, |
|
"logps/chosen": -282.7038879394531, |
|
"logps/rejected": -336.998779296875, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1848091334104538, |
|
"rewards/margins": 5.379525661468506, |
|
"rewards/rejected": -5.194716453552246, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.033256880733945e-07, |
|
"logits/chosen": -2.662896156311035, |
|
"logits/rejected": -2.662188768386841, |
|
"logps/chosen": -175.68914794921875, |
|
"logps/rejected": -254.7020263671875, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9065677523612976, |
|
"rewards/margins": 3.604799270629883, |
|
"rewards/rejected": -4.511366844177246, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.01414373088685e-07, |
|
"logits/chosen": -2.8199753761291504, |
|
"logits/rejected": -2.8096752166748047, |
|
"logps/chosen": -276.9725341796875, |
|
"logps/rejected": -313.1163330078125, |
|
"loss": 0.1005, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.162560373544693, |
|
"rewards/margins": 5.922631740570068, |
|
"rewards/rejected": -5.760071277618408, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9950305810397555e-07, |
|
"logits/chosen": -2.7838358879089355, |
|
"logits/rejected": -2.910243511199951, |
|
"logps/chosen": -271.54608154296875, |
|
"logps/rejected": -326.84429931640625, |
|
"loss": 0.0981, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.6067371964454651, |
|
"rewards/margins": 6.717789649963379, |
|
"rewards/rejected": -7.324526309967041, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9759174311926603e-07, |
|
"logits/chosen": -2.840893507003784, |
|
"logits/rejected": -2.637371063232422, |
|
"logps/chosen": -319.387451171875, |
|
"logps/rejected": -196.8367462158203, |
|
"loss": 0.0884, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.37663254141807556, |
|
"rewards/margins": 4.401788234710693, |
|
"rewards/rejected": -4.778420448303223, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9568042813455656e-07, |
|
"logits/chosen": -2.739501714706421, |
|
"logits/rejected": -2.7261953353881836, |
|
"logps/chosen": -247.82839965820312, |
|
"logps/rejected": -260.4093017578125, |
|
"loss": 0.1072, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5568360090255737, |
|
"rewards/margins": 4.246486663818359, |
|
"rewards/rejected": -4.803322792053223, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.937691131498471e-07, |
|
"logits/chosen": -2.8332924842834473, |
|
"logits/rejected": -2.639050245285034, |
|
"logps/chosen": -309.36627197265625, |
|
"logps/rejected": -355.98858642578125, |
|
"loss": 0.0911, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0330581665039062, |
|
"rewards/margins": 7.80413818359375, |
|
"rewards/rejected": -6.771079063415527, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.918577981651376e-07, |
|
"logits/chosen": -2.7822353839874268, |
|
"logits/rejected": -2.569251537322998, |
|
"logps/chosen": -317.1792297363281, |
|
"logps/rejected": -229.3711395263672, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5411940813064575, |
|
"rewards/margins": 6.266817569732666, |
|
"rewards/rejected": -5.72562313079834, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.8994648318042816e-07, |
|
"logits/chosen": -2.987076759338379, |
|
"logits/rejected": -2.8996262550354004, |
|
"logps/chosen": -253.8076629638672, |
|
"logps/rejected": -246.89614868164062, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.017121601849794388, |
|
"rewards/margins": 4.667881965637207, |
|
"rewards/rejected": -4.68500280380249, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8803516819571863e-07, |
|
"logits/chosen": -2.9189207553863525, |
|
"logits/rejected": -2.7888290882110596, |
|
"logps/chosen": -307.88287353515625, |
|
"logps/rejected": -285.6153564453125, |
|
"loss": 0.0965, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3171554207801819, |
|
"rewards/margins": 6.538872718811035, |
|
"rewards/rejected": -6.22171688079834, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_logits/chosen": -2.888350009918213, |
|
"eval_logits/rejected": -2.8058454990386963, |
|
"eval_logps/chosen": -266.73236083984375, |
|
"eval_logps/rejected": -232.28741455078125, |
|
"eval_loss": 0.5203580260276794, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -1.1950702667236328, |
|
"eval_rewards/margins": 2.373054027557373, |
|
"eval_rewards/rejected": -3.568124532699585, |
|
"eval_runtime": 43.2124, |
|
"eval_samples_per_second": 46.283, |
|
"eval_steps_per_second": 0.37, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8612385321100917e-07, |
|
"logits/chosen": -2.74180269241333, |
|
"logits/rejected": -2.743090867996216, |
|
"logps/chosen": -199.86988830566406, |
|
"logps/rejected": -292.73370361328125, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.6563189625740051, |
|
"rewards/margins": 6.341017246246338, |
|
"rewards/rejected": -5.684699058532715, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.842125382262997e-07, |
|
"logits/chosen": -2.712198495864868, |
|
"logits/rejected": -2.7786831855773926, |
|
"logps/chosen": -284.89263916015625, |
|
"logps/rejected": -333.57489013671875, |
|
"loss": 0.0812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07811599969863892, |
|
"rewards/margins": 6.2695770263671875, |
|
"rewards/rejected": -6.191461563110352, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8230122324159023e-07, |
|
"logits/chosen": -2.4872963428497314, |
|
"logits/rejected": -2.634469747543335, |
|
"logps/chosen": -216.0610809326172, |
|
"logps/rejected": -366.69879150390625, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7169110178947449, |
|
"rewards/margins": 6.171887397766113, |
|
"rewards/rejected": -6.888798713684082, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.8038990825688076e-07, |
|
"logits/chosen": -2.914641857147217, |
|
"logits/rejected": -2.7360188961029053, |
|
"logps/chosen": -219.3689727783203, |
|
"logps/rejected": -247.0272216796875, |
|
"loss": 0.0954, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9662607312202454, |
|
"rewards/margins": 4.363907337188721, |
|
"rewards/rejected": -5.330168724060059, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.784785932721712e-07, |
|
"logits/chosen": -2.8227076530456543, |
|
"logits/rejected": -2.789245367050171, |
|
"logps/chosen": -270.40057373046875, |
|
"logps/rejected": -279.9286804199219, |
|
"loss": 0.1157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15661029517650604, |
|
"rewards/margins": 5.467320442199707, |
|
"rewards/rejected": -5.623930931091309, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.765672782874617e-07, |
|
"logits/chosen": -2.7216007709503174, |
|
"logits/rejected": -2.6529860496520996, |
|
"logps/chosen": -319.59832763671875, |
|
"logps/rejected": -283.9094543457031, |
|
"loss": 0.0845, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.5374232530593872, |
|
"rewards/margins": 7.195462226867676, |
|
"rewards/rejected": -5.65803861618042, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7465596330275225e-07, |
|
"logits/chosen": -2.7975354194641113, |
|
"logits/rejected": -2.7123022079467773, |
|
"logps/chosen": -226.4639129638672, |
|
"logps/rejected": -204.92037963867188, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1759016215801239, |
|
"rewards/margins": 4.392673015594482, |
|
"rewards/rejected": -4.568574905395508, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.727446483180428e-07, |
|
"logits/chosen": -2.9199295043945312, |
|
"logits/rejected": -2.9144251346588135, |
|
"logps/chosen": -325.1085205078125, |
|
"logps/rejected": -300.2225341796875, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7285654544830322, |
|
"rewards/margins": 5.159348011016846, |
|
"rewards/rejected": -5.887913227081299, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.708333333333333e-07, |
|
"logits/chosen": -2.852546453475952, |
|
"logits/rejected": -2.7731895446777344, |
|
"logps/chosen": -293.71307373046875, |
|
"logps/rejected": -298.82818603515625, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1010526418685913, |
|
"rewards/margins": 5.532158374786377, |
|
"rewards/rejected": -6.633211612701416, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6892201834862385e-07, |
|
"logits/chosen": -2.972316265106201, |
|
"logits/rejected": -2.7817482948303223, |
|
"logps/chosen": -322.0675354003906, |
|
"logps/rejected": -286.2783203125, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5001612901687622, |
|
"rewards/margins": 6.432877540588379, |
|
"rewards/rejected": -5.9327168464660645, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_logits/chosen": -2.886213779449463, |
|
"eval_logits/rejected": -2.797916889190674, |
|
"eval_logps/chosen": -271.3697204589844, |
|
"eval_logps/rejected": -237.4441375732422, |
|
"eval_loss": 0.538065493106842, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -1.6588083505630493, |
|
"eval_rewards/margins": 2.424990177154541, |
|
"eval_rewards/rejected": -4.083798408508301, |
|
"eval_runtime": 42.6303, |
|
"eval_samples_per_second": 46.915, |
|
"eval_steps_per_second": 0.375, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6701070336391433e-07, |
|
"logits/chosen": -2.6193156242370605, |
|
"logits/rejected": -2.575343608856201, |
|
"logps/chosen": -286.7375793457031, |
|
"logps/rejected": -227.78652954101562, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46129971742630005, |
|
"rewards/margins": 5.609195232391357, |
|
"rewards/rejected": -5.147895812988281, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6509938837920486e-07, |
|
"logits/chosen": -2.908501148223877, |
|
"logits/rejected": -2.6500637531280518, |
|
"logps/chosen": -303.86737060546875, |
|
"logps/rejected": -269.2611999511719, |
|
"loss": 0.0936, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.5135625600814819, |
|
"rewards/margins": 5.414787292480469, |
|
"rewards/rejected": -4.901224613189697, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.631880733944954e-07, |
|
"logits/chosen": -2.8338589668273926, |
|
"logits/rejected": -2.70253586769104, |
|
"logps/chosen": -328.9350891113281, |
|
"logps/rejected": -259.4769287109375, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.37550514936447144, |
|
"rewards/margins": 4.032449245452881, |
|
"rewards/rejected": -4.407954692840576, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.612767584097859e-07, |
|
"logits/chosen": -2.8646657466888428, |
|
"logits/rejected": -2.771764039993286, |
|
"logps/chosen": -266.7498779296875, |
|
"logps/rejected": -286.897705078125, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10361538082361221, |
|
"rewards/margins": 5.30898380279541, |
|
"rewards/rejected": -5.205368995666504, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5936544342507646e-07, |
|
"logits/chosen": -2.8486104011535645, |
|
"logits/rejected": -2.7594268321990967, |
|
"logps/chosen": -289.22979736328125, |
|
"logps/rejected": -331.20989990234375, |
|
"loss": 0.1022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9251961708068848, |
|
"rewards/margins": 7.291400909423828, |
|
"rewards/rejected": -6.366204261779785, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5745412844036693e-07, |
|
"logits/chosen": -2.771709442138672, |
|
"logits/rejected": -2.580551862716675, |
|
"logps/chosen": -304.80816650390625, |
|
"logps/rejected": -289.4775085449219, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09966902434825897, |
|
"rewards/margins": 4.944860458374023, |
|
"rewards/rejected": -5.044528961181641, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5554281345565747e-07, |
|
"logits/chosen": -2.9294638633728027, |
|
"logits/rejected": -2.6105856895446777, |
|
"logps/chosen": -351.17864990234375, |
|
"logps/rejected": -303.7239685058594, |
|
"loss": 0.0834, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7888059616088867, |
|
"rewards/margins": 5.206669807434082, |
|
"rewards/rejected": -5.995476722717285, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.53631498470948e-07, |
|
"logits/chosen": -2.8216099739074707, |
|
"logits/rejected": -2.7941040992736816, |
|
"logps/chosen": -278.52447509765625, |
|
"logps/rejected": -272.856689453125, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4856332242488861, |
|
"rewards/margins": 6.397647857666016, |
|
"rewards/rejected": -5.912014961242676, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5172018348623853e-07, |
|
"logits/chosen": -2.9010136127471924, |
|
"logits/rejected": -2.6620776653289795, |
|
"logps/chosen": -338.17889404296875, |
|
"logps/rejected": -222.8809814453125, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6562628746032715, |
|
"rewards/margins": 5.940356254577637, |
|
"rewards/rejected": -5.284094333648682, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.4980886850152906e-07, |
|
"logits/chosen": -2.8568649291992188, |
|
"logits/rejected": -2.7494237422943115, |
|
"logps/chosen": -379.17425537109375, |
|
"logps/rejected": -371.10675048828125, |
|
"loss": 0.0957, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17441731691360474, |
|
"rewards/margins": 6.062676429748535, |
|
"rewards/rejected": -5.888258934020996, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_logits/chosen": -2.897590398788452, |
|
"eval_logits/rejected": -2.7959723472595215, |
|
"eval_logps/chosen": -266.52783203125, |
|
"eval_logps/rejected": -234.0833740234375, |
|
"eval_loss": 0.5150620341300964, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.174619197845459, |
|
"eval_rewards/margins": 2.5731022357940674, |
|
"eval_rewards/rejected": -3.7477216720581055, |
|
"eval_runtime": 42.682, |
|
"eval_samples_per_second": 46.858, |
|
"eval_steps_per_second": 0.375, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.478975535168196e-07, |
|
"logits/chosen": -2.6691880226135254, |
|
"logits/rejected": -2.6619346141815186, |
|
"logps/chosen": -242.07443237304688, |
|
"logps/rejected": -234.34457397460938, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0017588138580322266, |
|
"rewards/margins": 4.478671550750732, |
|
"rewards/rejected": -4.480430603027344, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.459862385321101e-07, |
|
"logits/chosen": -2.891451835632324, |
|
"logits/rejected": -2.8165245056152344, |
|
"logps/chosen": -386.6529235839844, |
|
"logps/rejected": -339.13525390625, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.5312672257423401, |
|
"rewards/margins": 6.819835662841797, |
|
"rewards/rejected": -6.288567543029785, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.440749235474006e-07, |
|
"logits/chosen": -2.6342310905456543, |
|
"logits/rejected": -2.497105836868286, |
|
"logps/chosen": -298.6165771484375, |
|
"logps/rejected": -333.8529968261719, |
|
"loss": 0.0812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11463097482919693, |
|
"rewards/margins": 6.6383843421936035, |
|
"rewards/rejected": -6.523752689361572, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.421636085626911e-07, |
|
"logits/chosen": -2.8128368854522705, |
|
"logits/rejected": -2.6747617721557617, |
|
"logps/chosen": -333.1402282714844, |
|
"logps/rejected": -336.7758483886719, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.7955392003059387, |
|
"rewards/margins": 6.469751834869385, |
|
"rewards/rejected": -5.674212455749512, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.402522935779816e-07, |
|
"logits/chosen": -2.743809223175049, |
|
"logits/rejected": -2.582720994949341, |
|
"logps/chosen": -312.7721862792969, |
|
"logps/rejected": -325.0949401855469, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18811270594596863, |
|
"rewards/margins": 7.560137748718262, |
|
"rewards/rejected": -7.372025489807129, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3834097859327215e-07, |
|
"logits/chosen": -2.766622304916382, |
|
"logits/rejected": -2.711757183074951, |
|
"logps/chosen": -232.3721466064453, |
|
"logps/rejected": -341.15313720703125, |
|
"loss": 0.0908, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0886591449379921, |
|
"rewards/margins": 7.887714385986328, |
|
"rewards/rejected": -7.976373195648193, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3642966360856268e-07, |
|
"logits/chosen": -2.8221402168273926, |
|
"logits/rejected": -2.8537559509277344, |
|
"logps/chosen": -290.0625, |
|
"logps/rejected": -268.5934753417969, |
|
"loss": 0.0952, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.37126991152763367, |
|
"rewards/margins": 4.8282670974731445, |
|
"rewards/rejected": -5.199536323547363, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.345183486238532e-07, |
|
"logits/chosen": -2.6492087841033936, |
|
"logits/rejected": -2.7734198570251465, |
|
"logps/chosen": -258.0906066894531, |
|
"logps/rejected": -298.752197265625, |
|
"loss": 0.0758, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40164145827293396, |
|
"rewards/margins": 5.819032192230225, |
|
"rewards/rejected": -5.4173903465271, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3260703363914372e-07, |
|
"logits/chosen": -2.8272266387939453, |
|
"logits/rejected": -2.8661937713623047, |
|
"logps/chosen": -342.8156433105469, |
|
"logps/rejected": -301.5121765136719, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8689159154891968, |
|
"rewards/margins": 5.930572032928467, |
|
"rewards/rejected": -5.0616559982299805, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3069571865443425e-07, |
|
"logits/chosen": -2.723177671432495, |
|
"logits/rejected": -2.4498984813690186, |
|
"logps/chosen": -235.50997924804688, |
|
"logps/rejected": -218.8724365234375, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9274832010269165, |
|
"rewards/margins": 4.0410966873168945, |
|
"rewards/rejected": -4.96858024597168, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_logits/chosen": -2.8591511249542236, |
|
"eval_logits/rejected": -2.748270034790039, |
|
"eval_logps/chosen": -272.372802734375, |
|
"eval_logps/rejected": -242.61672973632812, |
|
"eval_loss": 0.5392942428588867, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -1.7591183185577393, |
|
"eval_rewards/margins": 2.841939926147461, |
|
"eval_rewards/rejected": -4.601057529449463, |
|
"eval_runtime": 43.4291, |
|
"eval_samples_per_second": 46.052, |
|
"eval_steps_per_second": 0.368, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2878440366972476e-07, |
|
"logits/chosen": -2.917429208755493, |
|
"logits/rejected": -2.831324338912964, |
|
"logps/chosen": -285.4388122558594, |
|
"logps/rejected": -409.280517578125, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.4222320318222046, |
|
"rewards/margins": 4.949131488800049, |
|
"rewards/rejected": -6.371363162994385, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.268730886850153e-07, |
|
"logits/chosen": -2.5324103832244873, |
|
"logits/rejected": -2.5078229904174805, |
|
"logps/chosen": -213.4552459716797, |
|
"logps/rejected": -233.57766723632812, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24673457443714142, |
|
"rewards/margins": 6.043379783630371, |
|
"rewards/rejected": -5.796644687652588, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.249617737003058e-07, |
|
"logits/chosen": -2.5907716751098633, |
|
"logits/rejected": -2.6474852561950684, |
|
"logps/chosen": -296.5213317871094, |
|
"logps/rejected": -345.16925048828125, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4688749313354492, |
|
"rewards/margins": 7.06952428817749, |
|
"rewards/rejected": -6.600649833679199, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2305045871559633e-07, |
|
"logits/chosen": -2.7180168628692627, |
|
"logits/rejected": -2.6339097023010254, |
|
"logps/chosen": -273.3642578125, |
|
"logps/rejected": -363.3600769042969, |
|
"loss": 0.0755, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16731056571006775, |
|
"rewards/margins": 6.712351322174072, |
|
"rewards/rejected": -6.545041084289551, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2113914373088686e-07, |
|
"logits/chosen": -2.711622953414917, |
|
"logits/rejected": -2.8453264236450195, |
|
"logps/chosen": -166.16708374023438, |
|
"logps/rejected": -307.5210266113281, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6864116191864014, |
|
"rewards/margins": 5.068359375, |
|
"rewards/rejected": -5.754770755767822, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1922782874617736e-07, |
|
"logits/chosen": -2.812593460083008, |
|
"logits/rejected": -2.9017271995544434, |
|
"logps/chosen": -255.8525848388672, |
|
"logps/rejected": -350.1146240234375, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.615868091583252, |
|
"rewards/margins": 7.678023338317871, |
|
"rewards/rejected": -7.062155246734619, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1731651376146787e-07, |
|
"logits/chosen": -2.7244350910186768, |
|
"logits/rejected": -2.595527172088623, |
|
"logps/chosen": -258.34674072265625, |
|
"logps/rejected": -282.9205627441406, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06415136158466339, |
|
"rewards/margins": 4.976747035980225, |
|
"rewards/rejected": -4.912595272064209, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.154051987767584e-07, |
|
"logits/chosen": -2.7545602321624756, |
|
"logits/rejected": -2.7094874382019043, |
|
"logps/chosen": -297.81500244140625, |
|
"logps/rejected": -347.14471435546875, |
|
"loss": 0.0747, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3948817849159241, |
|
"rewards/margins": 4.995589733123779, |
|
"rewards/rejected": -5.390471935272217, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.134938837920489e-07, |
|
"logits/chosen": -2.6277823448181152, |
|
"logits/rejected": -2.582498550415039, |
|
"logps/chosen": -216.9835662841797, |
|
"logps/rejected": -271.9996337890625, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8243429064750671, |
|
"rewards/margins": 5.609129905700684, |
|
"rewards/rejected": -6.433472633361816, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1158256880733944e-07, |
|
"logits/chosen": -2.853558301925659, |
|
"logits/rejected": -2.6372504234313965, |
|
"logps/chosen": -347.44024658203125, |
|
"logps/rejected": -251.0436553955078, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.08665396273136139, |
|
"rewards/margins": 6.390503406524658, |
|
"rewards/rejected": -6.303849697113037, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_logits/chosen": -2.8382647037506104, |
|
"eval_logits/rejected": -2.7311384677886963, |
|
"eval_logps/chosen": -271.38751220703125, |
|
"eval_logps/rejected": -241.25450134277344, |
|
"eval_loss": 0.5384629964828491, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -1.6605874300003052, |
|
"eval_rewards/margins": 2.804246425628662, |
|
"eval_rewards/rejected": -4.464834213256836, |
|
"eval_runtime": 42.7598, |
|
"eval_samples_per_second": 46.773, |
|
"eval_steps_per_second": 0.374, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0967125382262994e-07, |
|
"logits/chosen": -2.921271324157715, |
|
"logits/rejected": -2.6984505653381348, |
|
"logps/chosen": -233.25341796875, |
|
"logps/rejected": -301.328125, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09845151007175446, |
|
"rewards/margins": 6.211066246032715, |
|
"rewards/rejected": -6.112614631652832, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0775993883792048e-07, |
|
"logits/chosen": -2.722374677658081, |
|
"logits/rejected": -2.577514410018921, |
|
"logps/chosen": -351.37493896484375, |
|
"logps/rejected": -284.2919616699219, |
|
"loss": 0.0788, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9589091539382935, |
|
"rewards/margins": 5.428890228271484, |
|
"rewards/rejected": -6.3877997398376465, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.05848623853211e-07, |
|
"logits/chosen": -2.9829728603363037, |
|
"logits/rejected": -2.8239240646362305, |
|
"logps/chosen": -316.7395935058594, |
|
"logps/rejected": -299.128173828125, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.36243343353271484, |
|
"rewards/margins": 5.3592963218688965, |
|
"rewards/rejected": -5.7217302322387695, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0393730886850151e-07, |
|
"logits/chosen": -2.8860998153686523, |
|
"logits/rejected": -2.7891647815704346, |
|
"logps/chosen": -238.3377227783203, |
|
"logps/rejected": -254.6820831298828, |
|
"loss": 0.0682, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6590354442596436, |
|
"rewards/margins": 4.998110294342041, |
|
"rewards/rejected": -5.6571455001831055, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0202599388379205e-07, |
|
"logits/chosen": -2.9648146629333496, |
|
"logits/rejected": -2.9021639823913574, |
|
"logps/chosen": -235.9986114501953, |
|
"logps/rejected": -505.4097595214844, |
|
"loss": 0.0892, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.47126349806785583, |
|
"rewards/margins": 7.691262245178223, |
|
"rewards/rejected": -8.16252613067627, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0011467889908258e-07, |
|
"logits/chosen": -2.822788953781128, |
|
"logits/rejected": -2.631824254989624, |
|
"logps/chosen": -326.16778564453125, |
|
"logps/rejected": -283.84796142578125, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3089871108531952, |
|
"rewards/margins": 6.355311393737793, |
|
"rewards/rejected": -6.664299011230469, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9820336391437308e-07, |
|
"logits/chosen": -2.7996997833251953, |
|
"logits/rejected": -2.878431797027588, |
|
"logps/chosen": -291.4371337890625, |
|
"logps/rejected": -283.9778747558594, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3241414725780487, |
|
"rewards/margins": 5.393677711486816, |
|
"rewards/rejected": -5.7178192138671875, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9629204892966362e-07, |
|
"logits/chosen": -2.98240327835083, |
|
"logits/rejected": -2.914886951446533, |
|
"logps/chosen": -396.53271484375, |
|
"logps/rejected": -338.84820556640625, |
|
"loss": 0.0744, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0023688077926635742, |
|
"rewards/margins": 6.182188510894775, |
|
"rewards/rejected": -6.184557914733887, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.943807339449541e-07, |
|
"logits/chosen": -2.5186867713928223, |
|
"logits/rejected": -2.5948076248168945, |
|
"logps/chosen": -219.8833465576172, |
|
"logps/rejected": -241.77197265625, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.869981586933136, |
|
"rewards/margins": 5.099352836608887, |
|
"rewards/rejected": -5.969334602355957, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9246941896024463e-07, |
|
"logits/chosen": -2.75243878364563, |
|
"logits/rejected": -2.6535823345184326, |
|
"logps/chosen": -186.34742736816406, |
|
"logps/rejected": -197.3961181640625, |
|
"loss": 0.1106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7161014676094055, |
|
"rewards/margins": 6.270589351654053, |
|
"rewards/rejected": -6.986691951751709, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_logits/chosen": -2.9132931232452393, |
|
"eval_logits/rejected": -2.8193860054016113, |
|
"eval_logps/chosen": -270.4024963378906, |
|
"eval_logps/rejected": -236.38502502441406, |
|
"eval_loss": 0.5321760177612305, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -1.5620850324630737, |
|
"eval_rewards/margins": 2.4158010482788086, |
|
"eval_rewards/rejected": -3.977886199951172, |
|
"eval_runtime": 43.3233, |
|
"eval_samples_per_second": 46.164, |
|
"eval_steps_per_second": 0.369, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9055810397553516e-07, |
|
"logits/chosen": -2.850160598754883, |
|
"logits/rejected": -2.7610700130462646, |
|
"logps/chosen": -312.760498046875, |
|
"logps/rejected": -301.4549255371094, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.179014191031456, |
|
"rewards/margins": 6.159252166748047, |
|
"rewards/rejected": -6.338266849517822, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8864678899082566e-07, |
|
"logits/chosen": -2.8218185901641846, |
|
"logits/rejected": -2.8268847465515137, |
|
"logps/chosen": -292.2250061035156, |
|
"logps/rejected": -259.8616027832031, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9055793881416321, |
|
"rewards/margins": 4.949950218200684, |
|
"rewards/rejected": -5.85552978515625, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.867354740061162e-07, |
|
"logits/chosen": -2.836451530456543, |
|
"logits/rejected": -2.727323055267334, |
|
"logps/chosen": -257.83428955078125, |
|
"logps/rejected": -360.0964050292969, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.067908525466919, |
|
"rewards/margins": 5.707231044769287, |
|
"rewards/rejected": -6.775138854980469, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8482415902140673e-07, |
|
"logits/chosen": -2.6650755405426025, |
|
"logits/rejected": -2.539886474609375, |
|
"logps/chosen": -234.5982208251953, |
|
"logps/rejected": -312.34375, |
|
"loss": 0.0677, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.1976377964019775, |
|
"rewards/margins": 5.782257556915283, |
|
"rewards/rejected": -6.979895114898682, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8291284403669723e-07, |
|
"logits/chosen": -2.880758285522461, |
|
"logits/rejected": -2.8343312740325928, |
|
"logps/chosen": -205.7776336669922, |
|
"logps/rejected": -282.3731994628906, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2522627115249634, |
|
"rewards/margins": 5.962088584899902, |
|
"rewards/rejected": -5.709825038909912, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8100152905198777e-07, |
|
"logits/chosen": -2.331038475036621, |
|
"logits/rejected": -2.5355992317199707, |
|
"logps/chosen": -274.9292907714844, |
|
"logps/rejected": -410.0576171875, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.284432590007782, |
|
"rewards/margins": 8.246986389160156, |
|
"rewards/rejected": -8.53141975402832, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7909021406727827e-07, |
|
"logits/chosen": -2.6835944652557373, |
|
"logits/rejected": -2.727116107940674, |
|
"logps/chosen": -190.50384521484375, |
|
"logps/rejected": -258.0693664550781, |
|
"loss": 0.0214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4610201120376587, |
|
"rewards/margins": 6.77774715423584, |
|
"rewards/rejected": -8.238768577575684, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.771788990825688e-07, |
|
"logits/chosen": -2.8552534580230713, |
|
"logits/rejected": -2.79771089553833, |
|
"logps/chosen": -312.20550537109375, |
|
"logps/rejected": -283.9766540527344, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4660851061344147, |
|
"rewards/margins": 7.324423313140869, |
|
"rewards/rejected": -7.7905073165893555, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7526758409785934e-07, |
|
"logits/chosen": -2.8241209983825684, |
|
"logits/rejected": -2.612579584121704, |
|
"logps/chosen": -366.5120849609375, |
|
"logps/rejected": -359.84820556640625, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40640372037887573, |
|
"rewards/margins": 8.778280258178711, |
|
"rewards/rejected": -9.184685707092285, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7335626911314984e-07, |
|
"logits/chosen": -2.6581811904907227, |
|
"logits/rejected": -2.6591739654541016, |
|
"logps/chosen": -254.0058135986328, |
|
"logps/rejected": -281.71539306640625, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3120492696762085, |
|
"rewards/margins": 7.992934226989746, |
|
"rewards/rejected": -9.304984092712402, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_logits/chosen": -2.863063097000122, |
|
"eval_logits/rejected": -2.7579469680786133, |
|
"eval_logps/chosen": -279.7497863769531, |
|
"eval_logps/rejected": -256.11993408203125, |
|
"eval_loss": 0.592135488986969, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -2.496814250946045, |
|
"eval_rewards/margins": 3.454561948776245, |
|
"eval_rewards/rejected": -5.951375961303711, |
|
"eval_runtime": 42.8121, |
|
"eval_samples_per_second": 46.716, |
|
"eval_steps_per_second": 0.374, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7144495412844037e-07, |
|
"logits/chosen": -3.115104913711548, |
|
"logits/rejected": -2.79913330078125, |
|
"logps/chosen": -401.72235107421875, |
|
"logps/rejected": -373.38885498046875, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9059139490127563, |
|
"rewards/margins": 8.271968841552734, |
|
"rewards/rejected": -9.177882194519043, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6953363914373088e-07, |
|
"logits/chosen": -2.8683724403381348, |
|
"logits/rejected": -2.759824752807617, |
|
"logps/chosen": -328.4763488769531, |
|
"logps/rejected": -324.87188720703125, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.38512125611305237, |
|
"rewards/margins": 10.028793334960938, |
|
"rewards/rejected": -9.643671989440918, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6762232415902138e-07, |
|
"logits/chosen": -2.7730555534362793, |
|
"logits/rejected": -2.7145891189575195, |
|
"logps/chosen": -264.50897216796875, |
|
"logps/rejected": -298.73931884765625, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.688241958618164, |
|
"rewards/margins": 6.888033390045166, |
|
"rewards/rejected": -9.576274871826172, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6571100917431192e-07, |
|
"logits/chosen": -2.8545258045196533, |
|
"logits/rejected": -2.622382640838623, |
|
"logps/chosen": -253.060791015625, |
|
"logps/rejected": -260.3923645019531, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4856998026371002, |
|
"rewards/margins": 8.40421199798584, |
|
"rewards/rejected": -8.889912605285645, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6379969418960242e-07, |
|
"logits/chosen": -2.813387870788574, |
|
"logits/rejected": -2.8429861068725586, |
|
"logps/chosen": -388.98516845703125, |
|
"logps/rejected": -383.153076171875, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13710257411003113, |
|
"rewards/margins": 8.103338241577148, |
|
"rewards/rejected": -8.240442276000977, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6188837920489295e-07, |
|
"logits/chosen": -2.682894706726074, |
|
"logits/rejected": -2.447547197341919, |
|
"logps/chosen": -270.86126708984375, |
|
"logps/rejected": -295.6491394042969, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3708886504173279, |
|
"rewards/margins": 8.017080307006836, |
|
"rewards/rejected": -8.387969970703125, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5997706422018349e-07, |
|
"logits/chosen": -2.909653663635254, |
|
"logits/rejected": -2.926835536956787, |
|
"logps/chosen": -334.53753662109375, |
|
"logps/rejected": -379.61907958984375, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7136735916137695, |
|
"rewards/margins": 9.404067993164062, |
|
"rewards/rejected": -7.690394401550293, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.58065749235474e-07, |
|
"logits/chosen": -2.819035768508911, |
|
"logits/rejected": -2.7074294090270996, |
|
"logps/chosen": -294.74957275390625, |
|
"logps/rejected": -300.90716552734375, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25709009170532227, |
|
"rewards/margins": 7.958430290222168, |
|
"rewards/rejected": -8.215520858764648, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5615443425076452e-07, |
|
"logits/chosen": -2.8023552894592285, |
|
"logits/rejected": -2.638780355453491, |
|
"logps/chosen": -327.9793701171875, |
|
"logps/rejected": -352.86492919921875, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8785334825515747, |
|
"rewards/margins": 9.093118667602539, |
|
"rewards/rejected": -9.971652030944824, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5424311926605506e-07, |
|
"logits/chosen": -2.6098523139953613, |
|
"logits/rejected": -2.3775291442871094, |
|
"logps/chosen": -197.1171112060547, |
|
"logps/rejected": -261.989990234375, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.098048686981201, |
|
"rewards/margins": 7.947665214538574, |
|
"rewards/rejected": -10.045713424682617, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_logits/chosen": -2.8318982124328613, |
|
"eval_logits/rejected": -2.7316362857818604, |
|
"eval_logps/chosen": -283.78375244140625, |
|
"eval_logps/rejected": -260.8829345703125, |
|
"eval_loss": 0.624667227268219, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -2.9002106189727783, |
|
"eval_rewards/margins": 3.5274696350097656, |
|
"eval_rewards/rejected": -6.427680015563965, |
|
"eval_runtime": 43.0871, |
|
"eval_samples_per_second": 46.418, |
|
"eval_steps_per_second": 0.371, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5233180428134556e-07, |
|
"logits/chosen": -2.7438690662384033, |
|
"logits/rejected": -2.7524821758270264, |
|
"logps/chosen": -290.9019470214844, |
|
"logps/rejected": -347.4677734375, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.1246836185455322, |
|
"rewards/margins": 7.351356506347656, |
|
"rewards/rejected": -8.47603988647461, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.504204892966361e-07, |
|
"logits/chosen": -2.690046548843384, |
|
"logits/rejected": -2.62469482421875, |
|
"logps/chosen": -311.0474548339844, |
|
"logps/rejected": -298.30712890625, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0858474969863892, |
|
"rewards/margins": 8.117149353027344, |
|
"rewards/rejected": -9.202996253967285, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.485091743119266e-07, |
|
"logits/chosen": -2.8562185764312744, |
|
"logits/rejected": -2.830827236175537, |
|
"logps/chosen": -255.25137329101562, |
|
"logps/rejected": -300.23699951171875, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9563596844673157, |
|
"rewards/margins": 7.325954437255859, |
|
"rewards/rejected": -8.282313346862793, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.465978593272171e-07, |
|
"logits/chosen": -2.6940760612487793, |
|
"logits/rejected": -2.521862506866455, |
|
"logps/chosen": -249.3166961669922, |
|
"logps/rejected": -279.20245361328125, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.594799518585205, |
|
"rewards/margins": 8.052389144897461, |
|
"rewards/rejected": -9.64719009399414, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4468654434250764e-07, |
|
"logits/chosen": -2.561811923980713, |
|
"logits/rejected": -2.610355854034424, |
|
"logps/chosen": -373.43359375, |
|
"logps/rejected": -411.88275146484375, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.011890411376953125, |
|
"rewards/margins": 9.87723159790039, |
|
"rewards/rejected": -9.88912296295166, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4277522935779814e-07, |
|
"logits/chosen": -2.9392905235290527, |
|
"logits/rejected": -2.6764588356018066, |
|
"logps/chosen": -287.12225341796875, |
|
"logps/rejected": -307.7725524902344, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.639965832233429, |
|
"rewards/margins": 8.369826316833496, |
|
"rewards/rejected": -9.009793281555176, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4086391437308867e-07, |
|
"logits/chosen": -2.7557687759399414, |
|
"logits/rejected": -2.8420677185058594, |
|
"logps/chosen": -300.71356201171875, |
|
"logps/rejected": -359.48260498046875, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5477391481399536, |
|
"rewards/margins": 8.222277641296387, |
|
"rewards/rejected": -9.770015716552734, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.389525993883792e-07, |
|
"logits/chosen": -2.740018844604492, |
|
"logits/rejected": -2.632035970687866, |
|
"logps/chosen": -366.1664733886719, |
|
"logps/rejected": -380.9911193847656, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1040074825286865, |
|
"rewards/margins": 9.593006134033203, |
|
"rewards/rejected": -10.697013854980469, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.370412844036697e-07, |
|
"logits/chosen": -2.78129243850708, |
|
"logits/rejected": -2.6934008598327637, |
|
"logps/chosen": -292.3312072753906, |
|
"logps/rejected": -350.51715087890625, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5786378383636475, |
|
"rewards/margins": 9.707735061645508, |
|
"rewards/rejected": -10.286372184753418, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3512996941896024e-07, |
|
"logits/chosen": -2.6506826877593994, |
|
"logits/rejected": -2.67533016204834, |
|
"logps/chosen": -273.0309143066406, |
|
"logps/rejected": -347.6629333496094, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5998280048370361, |
|
"rewards/margins": 7.78562068939209, |
|
"rewards/rejected": -9.385449409484863, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_logits/chosen": -2.8063864707946777, |
|
"eval_logits/rejected": -2.6991143226623535, |
|
"eval_logps/chosen": -287.3020324707031, |
|
"eval_logps/rejected": -267.23297119140625, |
|
"eval_loss": 0.6402204632759094, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -3.2520391941070557, |
|
"eval_rewards/margins": 3.8106439113616943, |
|
"eval_rewards/rejected": -7.062682628631592, |
|
"eval_runtime": 42.9356, |
|
"eval_samples_per_second": 46.581, |
|
"eval_steps_per_second": 0.373, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3321865443425075e-07, |
|
"logits/chosen": -2.5518293380737305, |
|
"logits/rejected": -2.3668487071990967, |
|
"logps/chosen": -313.5392761230469, |
|
"logps/rejected": -260.40521240234375, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6749378442764282, |
|
"rewards/margins": 8.3352632522583, |
|
"rewards/rejected": -9.010199546813965, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3130733944954128e-07, |
|
"logits/chosen": -2.58048677444458, |
|
"logits/rejected": -2.2754364013671875, |
|
"logps/chosen": -338.11065673828125, |
|
"logps/rejected": -319.00518798828125, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6327890157699585, |
|
"rewards/margins": 8.703729629516602, |
|
"rewards/rejected": -9.336518287658691, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.293960244648318e-07, |
|
"logits/chosen": -2.8132143020629883, |
|
"logits/rejected": -2.7200472354888916, |
|
"logps/chosen": -389.08538818359375, |
|
"logps/rejected": -325.16363525390625, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.311343789100647, |
|
"rewards/margins": 8.575726509094238, |
|
"rewards/rejected": -9.887070655822754, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2748470948012232e-07, |
|
"logits/chosen": -2.630483865737915, |
|
"logits/rejected": -2.5980231761932373, |
|
"logps/chosen": -340.43365478515625, |
|
"logps/rejected": -373.0090637207031, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6955748796463013, |
|
"rewards/margins": 8.990997314453125, |
|
"rewards/rejected": -9.686572074890137, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2557339449541285e-07, |
|
"logits/chosen": -2.6748175621032715, |
|
"logits/rejected": -2.5872254371643066, |
|
"logps/chosen": -335.07373046875, |
|
"logps/rejected": -352.5212097167969, |
|
"loss": 0.0202, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3692783117294312, |
|
"rewards/margins": 9.16440200805664, |
|
"rewards/rejected": -10.53368091583252, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2366207951070336e-07, |
|
"logits/chosen": -2.8568575382232666, |
|
"logits/rejected": -2.7505245208740234, |
|
"logps/chosen": -275.38433837890625, |
|
"logps/rejected": -354.7310485839844, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.296015739440918, |
|
"rewards/margins": 11.606992721557617, |
|
"rewards/rejected": -12.903007507324219, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.217507645259939e-07, |
|
"logits/chosen": -2.8438689708709717, |
|
"logits/rejected": -2.772092342376709, |
|
"logps/chosen": -342.33770751953125, |
|
"logps/rejected": -421.205078125, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9183975458145142, |
|
"rewards/margins": 9.3962984085083, |
|
"rewards/rejected": -10.314695358276367, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.198394495412844e-07, |
|
"logits/chosen": -2.9177348613739014, |
|
"logits/rejected": -2.8442885875701904, |
|
"logps/chosen": -354.0315856933594, |
|
"logps/rejected": -350.19561767578125, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20095424354076385, |
|
"rewards/margins": 8.697885513305664, |
|
"rewards/rejected": -8.898839950561523, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1792813455657493e-07, |
|
"logits/chosen": -2.6511244773864746, |
|
"logits/rejected": -2.679464340209961, |
|
"logps/chosen": -277.9000244140625, |
|
"logps/rejected": -424.24658203125, |
|
"loss": 0.0193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.517256736755371, |
|
"rewards/margins": 10.369354248046875, |
|
"rewards/rejected": -11.886610984802246, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1601681957186543e-07, |
|
"logits/chosen": -2.781261920928955, |
|
"logits/rejected": -2.7080893516540527, |
|
"logps/chosen": -325.17877197265625, |
|
"logps/rejected": -291.9747619628906, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1653848886489868, |
|
"rewards/margins": 8.179758071899414, |
|
"rewards/rejected": -9.34514331817627, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_logits/chosen": -2.799234628677368, |
|
"eval_logits/rejected": -2.6871373653411865, |
|
"eval_logps/chosen": -287.49615478515625, |
|
"eval_logps/rejected": -267.9088134765625, |
|
"eval_loss": 0.6562942862510681, |
|
"eval_rewards/accuracies": 0.828125, |
|
"eval_rewards/chosen": -3.2714524269104004, |
|
"eval_rewards/margins": 3.8588109016418457, |
|
"eval_rewards/rejected": -7.130263328552246, |
|
"eval_runtime": 42.8613, |
|
"eval_samples_per_second": 46.662, |
|
"eval_steps_per_second": 0.373, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1410550458715595e-07, |
|
"logits/chosen": -2.5371177196502686, |
|
"logits/rejected": -2.641981601715088, |
|
"logps/chosen": -329.0419006347656, |
|
"logps/rejected": -356.4971618652344, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5353990793228149, |
|
"rewards/margins": 8.938331604003906, |
|
"rewards/rejected": -9.473730087280273, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1219418960244648e-07, |
|
"logits/chosen": -2.671187400817871, |
|
"logits/rejected": -2.650254249572754, |
|
"logps/chosen": -236.1153564453125, |
|
"logps/rejected": -307.6558532714844, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7011358737945557, |
|
"rewards/margins": 8.071800231933594, |
|
"rewards/rejected": -9.77293586730957, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.10282874617737e-07, |
|
"logits/chosen": -2.7763831615448, |
|
"logits/rejected": -2.5476737022399902, |
|
"logps/chosen": -263.58221435546875, |
|
"logps/rejected": -328.7640075683594, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8821464776992798, |
|
"rewards/margins": 7.959317207336426, |
|
"rewards/rejected": -9.841463088989258, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0837155963302752e-07, |
|
"logits/chosen": -2.524266242980957, |
|
"logits/rejected": -2.485341787338257, |
|
"logps/chosen": -288.97955322265625, |
|
"logps/rejected": -307.2179870605469, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7493270635604858, |
|
"rewards/margins": 7.521073818206787, |
|
"rewards/rejected": -9.270401954650879, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0646024464831804e-07, |
|
"logits/chosen": -2.592519998550415, |
|
"logits/rejected": -2.5024571418762207, |
|
"logps/chosen": -289.25762939453125, |
|
"logps/rejected": -368.61883544921875, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.6334786415100098, |
|
"rewards/margins": 9.766633033752441, |
|
"rewards/rejected": -12.400110244750977, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0454892966360856e-07, |
|
"logits/chosen": -2.598473072052002, |
|
"logits/rejected": -2.5985465049743652, |
|
"logps/chosen": -300.30975341796875, |
|
"logps/rejected": -309.1223449707031, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0343241691589355, |
|
"rewards/margins": 9.686413764953613, |
|
"rewards/rejected": -11.720738410949707, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0263761467889908e-07, |
|
"logits/chosen": -2.6408867835998535, |
|
"logits/rejected": -2.503786087036133, |
|
"logps/chosen": -291.9281005859375, |
|
"logps/rejected": -322.25714111328125, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5202815532684326, |
|
"rewards/margins": 9.04098129272461, |
|
"rewards/rejected": -9.561263084411621, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.007262996941896e-07, |
|
"logits/chosen": -2.3971962928771973, |
|
"logits/rejected": -2.137141466140747, |
|
"logps/chosen": -314.1961364746094, |
|
"logps/rejected": -306.4624328613281, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.843491792678833, |
|
"rewards/margins": 8.445917129516602, |
|
"rewards/rejected": -10.289408683776855, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.881498470948011e-08, |
|
"logits/chosen": -2.681727409362793, |
|
"logits/rejected": -2.449688673019409, |
|
"logps/chosen": -270.8292541503906, |
|
"logps/rejected": -305.0626525878906, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.713470220565796, |
|
"rewards/margins": 7.436466217041016, |
|
"rewards/rejected": -10.14993667602539, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.690366972477065e-08, |
|
"logits/chosen": -2.7360880374908447, |
|
"logits/rejected": -2.639930248260498, |
|
"logps/chosen": -212.0613250732422, |
|
"logps/rejected": -236.4873504638672, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5846588611602783, |
|
"rewards/margins": 8.755882263183594, |
|
"rewards/rejected": -10.34054183959961, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_logits/chosen": -2.7717225551605225, |
|
"eval_logits/rejected": -2.655519723892212, |
|
"eval_logps/chosen": -287.77764892578125, |
|
"eval_logps/rejected": -268.86431884765625, |
|
"eval_loss": 0.6604524254798889, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -3.2996015548706055, |
|
"eval_rewards/margins": 3.926215648651123, |
|
"eval_rewards/rejected": -7.22581672668457, |
|
"eval_runtime": 42.9016, |
|
"eval_samples_per_second": 46.618, |
|
"eval_steps_per_second": 0.373, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.499235474006116e-08, |
|
"logits/chosen": -2.685112237930298, |
|
"logits/rejected": -2.5567049980163574, |
|
"logps/chosen": -453.57830810546875, |
|
"logps/rejected": -347.819091796875, |
|
"loss": 0.018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1265848875045776, |
|
"rewards/margins": 10.750268936157227, |
|
"rewards/rejected": -11.876853942871094, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.308103975535168e-08, |
|
"logits/chosen": -2.835233211517334, |
|
"logits/rejected": -2.7287702560424805, |
|
"logps/chosen": -341.2040710449219, |
|
"logps/rejected": -335.99957275390625, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6093537211418152, |
|
"rewards/margins": 8.289168357849121, |
|
"rewards/rejected": -8.898521423339844, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.116972477064219e-08, |
|
"logits/chosen": -2.782958507537842, |
|
"logits/rejected": -2.7175862789154053, |
|
"logps/chosen": -376.49114990234375, |
|
"logps/rejected": -467.7449645996094, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5592962503433228, |
|
"rewards/margins": 8.31694221496582, |
|
"rewards/rejected": -9.876237869262695, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.925840978593272e-08, |
|
"logits/chosen": -2.642646312713623, |
|
"logits/rejected": -2.5226454734802246, |
|
"logps/chosen": -199.7229461669922, |
|
"logps/rejected": -280.9407043457031, |
|
"loss": 0.0167, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0735208988189697, |
|
"rewards/margins": 8.644987106323242, |
|
"rewards/rejected": -9.718507766723633, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.734709480122324e-08, |
|
"logits/chosen": -2.500033378601074, |
|
"logits/rejected": -2.669753074645996, |
|
"logps/chosen": -184.5748748779297, |
|
"logps/rejected": -312.9557189941406, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.3214828968048096, |
|
"rewards/margins": 7.528273105621338, |
|
"rewards/rejected": -9.849756240844727, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.543577981651376e-08, |
|
"logits/chosen": -2.832641124725342, |
|
"logits/rejected": -2.600156545639038, |
|
"logps/chosen": -452.07177734375, |
|
"logps/rejected": -371.8671569824219, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0375783443450928, |
|
"rewards/margins": 9.188015937805176, |
|
"rewards/rejected": -10.225595474243164, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.352446483180428e-08, |
|
"logits/chosen": -2.801546096801758, |
|
"logits/rejected": -2.646188259124756, |
|
"logps/chosen": -281.76971435546875, |
|
"logps/rejected": -274.42327880859375, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7577645182609558, |
|
"rewards/margins": 8.514206886291504, |
|
"rewards/rejected": -9.271970748901367, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.161314984709481e-08, |
|
"logits/chosen": -2.8526995182037354, |
|
"logits/rejected": -2.7566418647766113, |
|
"logps/chosen": -354.4234924316406, |
|
"logps/rejected": -326.2621765136719, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5101653933525085, |
|
"rewards/margins": 9.834528923034668, |
|
"rewards/rejected": -10.344694137573242, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.970183486238531e-08, |
|
"logits/chosen": -2.714329957962036, |
|
"logits/rejected": -2.7505898475646973, |
|
"logps/chosen": -221.5650177001953, |
|
"logps/rejected": -294.24505615234375, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7582021951675415, |
|
"rewards/margins": 8.098190307617188, |
|
"rewards/rejected": -9.856393814086914, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.779051987767583e-08, |
|
"logits/chosen": -2.8804407119750977, |
|
"logits/rejected": -2.6511168479919434, |
|
"logps/chosen": -427.35406494140625, |
|
"logps/rejected": -327.26007080078125, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4930397272109985, |
|
"rewards/margins": 9.664579391479492, |
|
"rewards/rejected": -11.15761947631836, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_logits/chosen": -2.7902204990386963, |
|
"eval_logits/rejected": -2.6779799461364746, |
|
"eval_logps/chosen": -291.1807861328125, |
|
"eval_logps/rejected": -276.8377380371094, |
|
"eval_loss": 0.6934967041015625, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -3.639915704727173, |
|
"eval_rewards/margins": 4.383244514465332, |
|
"eval_rewards/rejected": -8.023159980773926, |
|
"eval_runtime": 43.4056, |
|
"eval_samples_per_second": 46.077, |
|
"eval_steps_per_second": 0.369, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.587920489296635e-08, |
|
"logits/chosen": -2.53355073928833, |
|
"logits/rejected": -2.469438076019287, |
|
"logps/chosen": -219.6329803466797, |
|
"logps/rejected": -303.95587158203125, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3920328617095947, |
|
"rewards/margins": 9.254063606262207, |
|
"rewards/rejected": -10.646096229553223, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.396788990825688e-08, |
|
"logits/chosen": -2.6555113792419434, |
|
"logits/rejected": -2.589629888534546, |
|
"logps/chosen": -236.41653442382812, |
|
"logps/rejected": -295.16082763671875, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.670567035675049, |
|
"rewards/margins": 7.430187225341797, |
|
"rewards/rejected": -10.100754737854004, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.20565749235474e-08, |
|
"logits/chosen": -2.6323156356811523, |
|
"logits/rejected": -2.664762258529663, |
|
"logps/chosen": -193.96356201171875, |
|
"logps/rejected": -328.59417724609375, |
|
"loss": 0.0171, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1080195903778076, |
|
"rewards/margins": 8.095452308654785, |
|
"rewards/rejected": -9.203471183776855, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.014525993883792e-08, |
|
"logits/chosen": -2.6163525581359863, |
|
"logits/rejected": -2.47815203666687, |
|
"logps/chosen": -309.06378173828125, |
|
"logps/rejected": -324.7481689453125, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1383777856826782, |
|
"rewards/margins": 8.445047378540039, |
|
"rewards/rejected": -9.583425521850586, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.823394495412843e-08, |
|
"logits/chosen": -2.5601565837860107, |
|
"logits/rejected": -2.7058122158050537, |
|
"logps/chosen": -250.9035186767578, |
|
"logps/rejected": -373.5662536621094, |
|
"loss": 0.013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8016271591186523, |
|
"rewards/margins": 12.705131530761719, |
|
"rewards/rejected": -13.506759643554688, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.632262996941895e-08, |
|
"logits/chosen": -2.721140146255493, |
|
"logits/rejected": -2.4993348121643066, |
|
"logps/chosen": -321.29547119140625, |
|
"logps/rejected": -343.4897155761719, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9347482919692993, |
|
"rewards/margins": 9.293264389038086, |
|
"rewards/rejected": -11.228011131286621, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.441131498470948e-08, |
|
"logits/chosen": -2.821812391281128, |
|
"logits/rejected": -2.5831847190856934, |
|
"logps/chosen": -294.45684814453125, |
|
"logps/rejected": -276.64080810546875, |
|
"loss": 0.0204, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6778271198272705, |
|
"rewards/margins": 7.781607151031494, |
|
"rewards/rejected": -9.459434509277344, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.762652635574341, |
|
"logits/rejected": -2.630335569381714, |
|
"logps/chosen": -346.9800720214844, |
|
"logps/rejected": -324.47760009765625, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6068371534347534, |
|
"rewards/margins": 8.518617630004883, |
|
"rewards/rejected": -9.125454902648926, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.058868501529052e-08, |
|
"logits/chosen": -2.796215534210205, |
|
"logits/rejected": -2.7548739910125732, |
|
"logps/chosen": -265.49566650390625, |
|
"logps/rejected": -304.27313232421875, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0402780771255493, |
|
"rewards/margins": 9.19719123840332, |
|
"rewards/rejected": -10.237469673156738, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.8677370030581035e-08, |
|
"logits/chosen": -2.604027271270752, |
|
"logits/rejected": -2.4126858711242676, |
|
"logps/chosen": -307.8719787597656, |
|
"logps/rejected": -329.3260803222656, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9125175476074219, |
|
"rewards/margins": 9.744293212890625, |
|
"rewards/rejected": -10.65680980682373, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_logits/chosen": -2.7994275093078613, |
|
"eval_logits/rejected": -2.6884756088256836, |
|
"eval_logps/chosen": -289.603271484375, |
|
"eval_logps/rejected": -274.7880554199219, |
|
"eval_loss": 0.6772685647010803, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -3.4821622371673584, |
|
"eval_rewards/margins": 4.336028099060059, |
|
"eval_rewards/rejected": -7.818190574645996, |
|
"eval_runtime": 42.7413, |
|
"eval_samples_per_second": 46.793, |
|
"eval_steps_per_second": 0.374, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6766055045871554e-08, |
|
"logits/chosen": -2.764072895050049, |
|
"logits/rejected": -2.712106227874756, |
|
"logps/chosen": -356.59771728515625, |
|
"logps/rejected": -422.31121826171875, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.934819221496582, |
|
"rewards/margins": 10.432962417602539, |
|
"rewards/rejected": -11.367780685424805, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.485474006116208e-08, |
|
"logits/chosen": -2.730092763900757, |
|
"logits/rejected": -2.6763250827789307, |
|
"logps/chosen": -278.66619873046875, |
|
"logps/rejected": -317.0282287597656, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1639602184295654, |
|
"rewards/margins": 8.831220626831055, |
|
"rewards/rejected": -9.9951810836792, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.294342507645259e-08, |
|
"logits/chosen": -2.771960496902466, |
|
"logits/rejected": -2.5834813117980957, |
|
"logps/chosen": -342.79241943359375, |
|
"logps/rejected": -364.6011657714844, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05748433992266655, |
|
"rewards/margins": 10.371294975280762, |
|
"rewards/rejected": -10.313809394836426, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.1032110091743117e-08, |
|
"logits/chosen": -2.6307904720306396, |
|
"logits/rejected": -2.3652987480163574, |
|
"logps/chosen": -281.32806396484375, |
|
"logps/rejected": -276.0794677734375, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8738715648651123, |
|
"rewards/margins": 7.841960906982422, |
|
"rewards/rejected": -9.715832710266113, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.9120795107033635e-08, |
|
"logits/chosen": -2.741020441055298, |
|
"logits/rejected": -2.4758517742156982, |
|
"logps/chosen": -283.36956787109375, |
|
"logps/rejected": -374.82598876953125, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3181073665618896, |
|
"rewards/margins": 10.364333152770996, |
|
"rewards/rejected": -11.682439804077148, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.7209480122324154e-08, |
|
"logits/chosen": -2.8300743103027344, |
|
"logits/rejected": -2.601854085922241, |
|
"logps/chosen": -338.0480041503906, |
|
"logps/rejected": -443.51226806640625, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.905073881149292, |
|
"rewards/margins": 9.982545852661133, |
|
"rewards/rejected": -12.88762092590332, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.529816513761467e-08, |
|
"logits/chosen": -2.666491985321045, |
|
"logits/rejected": -2.4424288272857666, |
|
"logps/chosen": -267.314208984375, |
|
"logps/rejected": -384.9258117675781, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7637933492660522, |
|
"rewards/margins": 8.94968318939209, |
|
"rewards/rejected": -10.713478088378906, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.33868501529052e-08, |
|
"logits/chosen": -2.690896511077881, |
|
"logits/rejected": -2.5063717365264893, |
|
"logps/chosen": -264.0552978515625, |
|
"logps/rejected": -322.1650085449219, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4952847957611084, |
|
"rewards/margins": 10.171586990356445, |
|
"rewards/rejected": -11.666872024536133, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.147553516819572e-08, |
|
"logits/chosen": -2.720097780227661, |
|
"logits/rejected": -2.623161792755127, |
|
"logps/chosen": -245.23593139648438, |
|
"logps/rejected": -336.7532653808594, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.500422477722168, |
|
"rewards/margins": 8.012624740600586, |
|
"rewards/rejected": -10.513047218322754, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.9564220183486236e-08, |
|
"logits/chosen": -2.734628438949585, |
|
"logits/rejected": -2.626295804977417, |
|
"logps/chosen": -258.6006164550781, |
|
"logps/rejected": -283.94329833984375, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5415168404579163, |
|
"rewards/margins": 9.378320693969727, |
|
"rewards/rejected": -9.919837951660156, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_logits/chosen": -2.797022819519043, |
|
"eval_logits/rejected": -2.687746286392212, |
|
"eval_logps/chosen": -290.6907958984375, |
|
"eval_logps/rejected": -274.7027587890625, |
|
"eval_loss": 0.6812653541564941, |
|
"eval_rewards/accuracies": 0.828125, |
|
"eval_rewards/chosen": -3.590911865234375, |
|
"eval_rewards/margins": 4.218749523162842, |
|
"eval_rewards/rejected": -7.809660911560059, |
|
"eval_runtime": 43.3751, |
|
"eval_samples_per_second": 46.109, |
|
"eval_steps_per_second": 0.369, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.7652905198776755e-08, |
|
"logits/chosen": -2.766024351119995, |
|
"logits/rejected": -2.5780766010284424, |
|
"logps/chosen": -317.5596618652344, |
|
"logps/rejected": -323.1799011230469, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4215131998062134, |
|
"rewards/margins": 7.726994514465332, |
|
"rewards/rejected": -9.148508071899414, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.574159021406728e-08, |
|
"logits/chosen": -2.7624149322509766, |
|
"logits/rejected": -2.6529033184051514, |
|
"logps/chosen": -281.3433837890625, |
|
"logps/rejected": -344.85369873046875, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8688713312149048, |
|
"rewards/margins": 9.345159530639648, |
|
"rewards/rejected": -11.214031219482422, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.383027522935779e-08, |
|
"logits/chosen": -2.845158338546753, |
|
"logits/rejected": -2.767922878265381, |
|
"logps/chosen": -278.7703857421875, |
|
"logps/rejected": -385.30291748046875, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0106799602508545, |
|
"rewards/margins": 10.258461952209473, |
|
"rewards/rejected": -11.26914119720459, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.191896024464832e-08, |
|
"logits/chosen": -2.7531659603118896, |
|
"logits/rejected": -2.599841594696045, |
|
"logps/chosen": -311.2244873046875, |
|
"logps/rejected": -315.21246337890625, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4379760026931763, |
|
"rewards/margins": 10.220308303833008, |
|
"rewards/rejected": -11.658284187316895, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.0007645259938836e-08, |
|
"logits/chosen": -2.6417484283447266, |
|
"logits/rejected": -2.6449437141418457, |
|
"logps/chosen": -268.1544189453125, |
|
"logps/rejected": -399.4880676269531, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2721059322357178, |
|
"rewards/margins": 14.419519424438477, |
|
"rewards/rejected": -12.147417068481445, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.809633027522936e-08, |
|
"logits/chosen": -2.5494630336761475, |
|
"logits/rejected": -2.4989705085754395, |
|
"logps/chosen": -230.28109741210938, |
|
"logps/rejected": -325.3707580566406, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.975464344024658, |
|
"rewards/margins": 7.614091396331787, |
|
"rewards/rejected": -10.589555740356445, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6185015290519877e-08, |
|
"logits/chosen": -2.411627769470215, |
|
"logits/rejected": -2.542217493057251, |
|
"logps/chosen": -201.55873107910156, |
|
"logps/rejected": -297.8031005859375, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.786314606666565, |
|
"rewards/margins": 9.065244674682617, |
|
"rewards/rejected": -10.85155963897705, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4273700305810396e-08, |
|
"logits/chosen": -2.826395034790039, |
|
"logits/rejected": -2.7339437007904053, |
|
"logps/chosen": -320.2567443847656, |
|
"logps/rejected": -365.04473876953125, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3468475341796875, |
|
"rewards/margins": 10.051078796386719, |
|
"rewards/rejected": -12.39792537689209, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2362385321100918e-08, |
|
"logits/chosen": -2.5493578910827637, |
|
"logits/rejected": -2.556107997894287, |
|
"logps/chosen": -293.81561279296875, |
|
"logps/rejected": -364.3607482910156, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6632717251777649, |
|
"rewards/margins": 10.876815795898438, |
|
"rewards/rejected": -11.540087699890137, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0451070336391437e-08, |
|
"logits/chosen": -2.2315673828125, |
|
"logits/rejected": -2.153177499771118, |
|
"logps/chosen": -197.33572387695312, |
|
"logps/rejected": -307.0080261230469, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3446338474750519, |
|
"rewards/margins": 11.82870864868164, |
|
"rewards/rejected": -11.484074592590332, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_logits/chosen": -2.786235809326172, |
|
"eval_logits/rejected": -2.6765427589416504, |
|
"eval_logps/chosen": -293.01751708984375, |
|
"eval_logps/rejected": -278.0957336425781, |
|
"eval_loss": 0.6891822218894958, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -3.8235886096954346, |
|
"eval_rewards/margins": 4.3253679275512695, |
|
"eval_rewards/rejected": -8.148956298828125, |
|
"eval_runtime": 43.4023, |
|
"eval_samples_per_second": 46.081, |
|
"eval_steps_per_second": 0.369, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8539755351681956e-08, |
|
"logits/chosen": -2.6543760299682617, |
|
"logits/rejected": -2.726107358932495, |
|
"logps/chosen": -423.95465087890625, |
|
"logps/rejected": -416.55419921875, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1608713865280151, |
|
"rewards/margins": 9.049515724182129, |
|
"rewards/rejected": -10.210386276245117, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6628440366972478e-08, |
|
"logits/chosen": -2.6333303451538086, |
|
"logits/rejected": -2.356459856033325, |
|
"logps/chosen": -344.8193054199219, |
|
"logps/rejected": -257.2393493652344, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.881445288658142, |
|
"rewards/margins": 8.154787063598633, |
|
"rewards/rejected": -10.036233901977539, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4717125382262997e-08, |
|
"logits/chosen": -2.818207025527954, |
|
"logits/rejected": -2.738051176071167, |
|
"logps/chosen": -335.53314208984375, |
|
"logps/rejected": -399.3725891113281, |
|
"loss": 0.011, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7855017185211182, |
|
"rewards/margins": 8.13086223602295, |
|
"rewards/rejected": -9.916363716125488, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2805810397553517e-08, |
|
"logits/chosen": -2.5987372398376465, |
|
"logits/rejected": -2.5041844844818115, |
|
"logps/chosen": -395.9434509277344, |
|
"logps/rejected": -483.74322509765625, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.267432689666748, |
|
"rewards/margins": 10.311139106750488, |
|
"rewards/rejected": -12.578573226928711, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0894495412844038e-08, |
|
"logits/chosen": -2.592224597930908, |
|
"logits/rejected": -2.6307315826416016, |
|
"logps/chosen": -259.36126708984375, |
|
"logps/rejected": -329.85626220703125, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8243284225463867, |
|
"rewards/margins": 8.955255508422852, |
|
"rewards/rejected": -9.779583930969238, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.983180428134555e-09, |
|
"logits/chosen": -2.685051441192627, |
|
"logits/rejected": -2.527430295944214, |
|
"logps/chosen": -244.1125946044922, |
|
"logps/rejected": -238.81472778320312, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.916625738143921, |
|
"rewards/margins": 6.8498854637146, |
|
"rewards/rejected": -9.766510963439941, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.071865443425076e-09, |
|
"logits/chosen": -2.5415682792663574, |
|
"logits/rejected": -2.7124288082122803, |
|
"logps/chosen": -298.91815185546875, |
|
"logps/rejected": -437.7132873535156, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.020925998687744, |
|
"rewards/margins": 9.953208923339844, |
|
"rewards/rejected": -11.97413444519043, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.1605504587155965e-09, |
|
"logits/chosen": -2.8062963485717773, |
|
"logits/rejected": -2.5917065143585205, |
|
"logps/chosen": -269.49462890625, |
|
"logps/rejected": -275.78240966796875, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.393111228942871, |
|
"rewards/margins": 8.628837585449219, |
|
"rewards/rejected": -10.021946907043457, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.249235474006116e-09, |
|
"logits/chosen": -2.7617385387420654, |
|
"logits/rejected": -2.6802616119384766, |
|
"logps/chosen": -315.0368347167969, |
|
"logps/rejected": -349.5107116699219, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1777875423431396, |
|
"rewards/margins": 8.661017417907715, |
|
"rewards/rejected": -11.83880615234375, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.3379204892966359e-09, |
|
"logits/chosen": -2.701010227203369, |
|
"logits/rejected": -2.6551761627197266, |
|
"logps/chosen": -245.7960662841797, |
|
"logps/rejected": -332.9742736816406, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9432682991027832, |
|
"rewards/margins": 9.252389907836914, |
|
"rewards/rejected": -10.195657730102539, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_logits/chosen": -2.7830052375793457, |
|
"eval_logits/rejected": -2.6728432178497314, |
|
"eval_logps/chosen": -293.980224609375, |
|
"eval_logps/rejected": -279.6104431152344, |
|
"eval_loss": 0.6913489699363708, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -3.9198596477508545, |
|
"eval_rewards/margins": 4.380568027496338, |
|
"eval_rewards/rejected": -8.30042839050293, |
|
"eval_runtime": 43.3515, |
|
"eval_samples_per_second": 46.134, |
|
"eval_steps_per_second": 0.369, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2907, |
|
"total_flos": 0.0, |
|
"train_loss": 0.20427082364947516, |
|
"train_runtime": 9903.6907, |
|
"train_samples_per_second": 18.771, |
|
"train_steps_per_second": 0.294 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2907, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|