|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.6615384615384614, |
|
"eval_steps": 20, |
|
"global_step": 360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009230769230769232, |
|
"grad_norm": 52.40730345789634, |
|
"learning_rate": 2.2727272727272725e-08, |
|
"logits/chosen": -1.2901445627212524, |
|
"logits/rejected": -1.2963205575942993, |
|
"logps/chosen": -16.113027572631836, |
|
"logps/rejected": -27.10122299194336, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.018461538461538463, |
|
"grad_norm": 64.88802449206628, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": -1.3016295433044434, |
|
"logits/rejected": -1.3255655765533447, |
|
"logps/chosen": -20.355079650878906, |
|
"logps/rejected": -39.93232727050781, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.008816350251436234, |
|
"rewards/margins": 0.0047285472974181175, |
|
"rewards/rejected": 0.004087802488356829, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.027692307692307693, |
|
"grad_norm": 59.6800701771534, |
|
"learning_rate": 6.818181818181817e-08, |
|
"logits/chosen": -1.31508207321167, |
|
"logits/rejected": -1.3189733028411865, |
|
"logps/chosen": -23.069622039794922, |
|
"logps/rejected": -26.97477149963379, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.007372706197202206, |
|
"rewards/margins": -0.013017671182751656, |
|
"rewards/rejected": 0.0056449659168720245, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.036923076923076927, |
|
"grad_norm": 52.983511533208585, |
|
"learning_rate": 9.09090909090909e-08, |
|
"logits/chosen": -1.277503252029419, |
|
"logits/rejected": -1.3002785444259644, |
|
"logps/chosen": -20.34660530090332, |
|
"logps/rejected": -31.0557861328125, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.020842621102929115, |
|
"rewards/margins": 0.020597590133547783, |
|
"rewards/rejected": 0.00024503222084604204, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.046153846153846156, |
|
"grad_norm": 66.2747581823961, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -1.3306350708007812, |
|
"logits/rejected": -1.3309379816055298, |
|
"logps/chosen": -26.48358917236328, |
|
"logps/rejected": -30.445173263549805, |
|
"loss": 0.7046, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.01246996782720089, |
|
"rewards/margins": -0.032543592154979706, |
|
"rewards/rejected": 0.020073626190423965, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.055384615384615386, |
|
"grad_norm": 57.271529486531605, |
|
"learning_rate": 1.3636363636363635e-07, |
|
"logits/chosen": -1.280084252357483, |
|
"logits/rejected": -1.295721411705017, |
|
"logps/chosen": -25.79343032836914, |
|
"logps/rejected": -36.58183288574219, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.023966560140252113, |
|
"rewards/margins": 0.030559096485376358, |
|
"rewards/rejected": -0.006592527963221073, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06461538461538462, |
|
"grad_norm": 67.94854888195144, |
|
"learning_rate": 1.5909090909090907e-07, |
|
"logits/chosen": -1.2790985107421875, |
|
"logits/rejected": -1.296931266784668, |
|
"logps/chosen": -24.833446502685547, |
|
"logps/rejected": -31.11182403564453, |
|
"loss": 0.7006, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": 0.013436201959848404, |
|
"rewards/margins": 0.002752000233158469, |
|
"rewards/rejected": 0.010684202425181866, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07384615384615385, |
|
"grad_norm": 49.36191286721225, |
|
"learning_rate": 1.818181818181818e-07, |
|
"logits/chosen": -1.301368236541748, |
|
"logits/rejected": -1.3136367797851562, |
|
"logps/chosen": -26.273963928222656, |
|
"logps/rejected": -35.63306427001953, |
|
"loss": 0.6949, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": 0.015296169556677341, |
|
"rewards/margins": 0.013788570649921894, |
|
"rewards/rejected": 0.0015075993724167347, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08307692307692308, |
|
"grad_norm": 56.43976674406361, |
|
"learning_rate": 2.0454545454545456e-07, |
|
"logits/chosen": -1.3201720714569092, |
|
"logits/rejected": -1.3183202743530273, |
|
"logps/chosen": -25.70770263671875, |
|
"logps/rejected": -26.178009033203125, |
|
"loss": 0.7006, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.0011544560547918081, |
|
"rewards/margins": 0.01863468438386917, |
|
"rewards/rejected": -0.01748022995889187, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09230769230769231, |
|
"grad_norm": 56.010590202518365, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -1.2482044696807861, |
|
"logits/rejected": -1.262031078338623, |
|
"logps/chosen": -28.337791442871094, |
|
"logps/rejected": -29.38203239440918, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.00024333276087418199, |
|
"rewards/margins": -0.0005785864195786417, |
|
"rewards/rejected": 0.000821918249130249, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09230769230769231, |
|
"eval_logits/chosen": -1.3220677375793457, |
|
"eval_logits/rejected": -1.33245849609375, |
|
"eval_logps/chosen": -23.036666870117188, |
|
"eval_logps/rejected": -26.372356414794922, |
|
"eval_loss": 0.6916412115097046, |
|
"eval_rewards/accuracies": 0.4965437650680542, |
|
"eval_rewards/chosen": 0.00501647312194109, |
|
"eval_rewards/margins": 0.010797887109220028, |
|
"eval_rewards/rejected": -0.0057814153842628, |
|
"eval_runtime": 216.2201, |
|
"eval_samples_per_second": 8.02, |
|
"eval_steps_per_second": 2.007, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10153846153846154, |
|
"grad_norm": 67.30805212172523, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.2273086309432983, |
|
"logits/rejected": -1.2565299272537231, |
|
"logps/chosen": -21.540626525878906, |
|
"logps/rejected": -47.4769172668457, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.007773838937282562, |
|
"rewards/margins": 0.026619136333465576, |
|
"rewards/rejected": -0.018845297396183014, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11076923076923077, |
|
"grad_norm": 51.29780655120263, |
|
"learning_rate": 2.727272727272727e-07, |
|
"logits/chosen": -1.219795823097229, |
|
"logits/rejected": -1.235877513885498, |
|
"logps/chosen": -30.82242774963379, |
|
"logps/rejected": -37.68511962890625, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.03086034394800663, |
|
"rewards/margins": 0.055920813232660294, |
|
"rewards/rejected": -0.025060458108782768, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 55.0939959360046, |
|
"learning_rate": 2.9545454545454545e-07, |
|
"logits/chosen": -1.258486270904541, |
|
"logits/rejected": -1.2752680778503418, |
|
"logps/chosen": -25.136966705322266, |
|
"logps/rejected": -43.23137664794922, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": 0.0216163769364357, |
|
"rewards/margins": 0.08480846881866455, |
|
"rewards/rejected": -0.06319208443164825, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.12923076923076923, |
|
"grad_norm": 48.332663649143974, |
|
"learning_rate": 3.1818181818181815e-07, |
|
"logits/chosen": -1.320160150527954, |
|
"logits/rejected": -1.330212950706482, |
|
"logps/chosen": -19.24217414855957, |
|
"logps/rejected": -27.22931671142578, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": 0.01321298535913229, |
|
"rewards/margins": 0.009595979005098343, |
|
"rewards/rejected": 0.0036170051898807287, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.13846153846153847, |
|
"grad_norm": 49.59877928678631, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -1.2795339822769165, |
|
"logits/rejected": -1.2929219007492065, |
|
"logps/chosen": -21.841049194335938, |
|
"logps/rejected": -28.89714813232422, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.013576723635196686, |
|
"rewards/margins": 0.05021868646144867, |
|
"rewards/rejected": -0.036641962826251984, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1476923076923077, |
|
"grad_norm": 51.02397460357053, |
|
"learning_rate": 3.636363636363636e-07, |
|
"logits/chosen": -1.2797447443008423, |
|
"logits/rejected": -1.3022751808166504, |
|
"logps/chosen": -24.65501594543457, |
|
"logps/rejected": -36.741573333740234, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.6527777910232544, |
|
"rewards/chosen": 0.04290567338466644, |
|
"rewards/margins": 0.09170582890510559, |
|
"rewards/rejected": -0.04880015552043915, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.15692307692307692, |
|
"grad_norm": 45.39524675384609, |
|
"learning_rate": 3.8636363636363636e-07, |
|
"logits/chosen": -1.2498574256896973, |
|
"logits/rejected": -1.2657580375671387, |
|
"logps/chosen": -21.32640838623047, |
|
"logps/rejected": -39.71310806274414, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.007356289308518171, |
|
"rewards/margins": 0.06605351716279984, |
|
"rewards/rejected": -0.058697231113910675, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.16615384615384615, |
|
"grad_norm": 52.58099443727954, |
|
"learning_rate": 4.090909090909091e-07, |
|
"logits/chosen": -1.2139866352081299, |
|
"logits/rejected": -1.2340948581695557, |
|
"logps/chosen": -18.409015655517578, |
|
"logps/rejected": -35.20015335083008, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.022290384396910667, |
|
"rewards/margins": 0.06140115484595299, |
|
"rewards/rejected": -0.03911077231168747, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1753846153846154, |
|
"grad_norm": 53.938952453151614, |
|
"learning_rate": 4.318181818181818e-07, |
|
"logits/chosen": -1.2461514472961426, |
|
"logits/rejected": -1.2598522901535034, |
|
"logps/chosen": -27.248275756835938, |
|
"logps/rejected": -32.50380325317383, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.04994047060608864, |
|
"rewards/margins": 0.1001262366771698, |
|
"rewards/rejected": -0.05018576979637146, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"grad_norm": 46.949545804629956, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -1.2425076961517334, |
|
"logits/rejected": -1.2611976861953735, |
|
"logps/chosen": -14.459053993225098, |
|
"logps/rejected": -22.981327056884766, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 0.06477613002061844, |
|
"rewards/margins": 0.08449113368988037, |
|
"rewards/rejected": -0.019715001806616783, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"eval_logits/chosen": -1.3191018104553223, |
|
"eval_logits/rejected": -1.3294612169265747, |
|
"eval_logps/chosen": -22.93289566040039, |
|
"eval_logps/rejected": -26.52239418029785, |
|
"eval_loss": 0.6399217247962952, |
|
"eval_rewards/accuracies": 0.671658992767334, |
|
"eval_rewards/chosen": 0.05690104886889458, |
|
"eval_rewards/margins": 0.13770265877246857, |
|
"eval_rewards/rejected": -0.08080162853002548, |
|
"eval_runtime": 216.334, |
|
"eval_samples_per_second": 8.015, |
|
"eval_steps_per_second": 2.006, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19384615384615383, |
|
"grad_norm": 41.53559188167412, |
|
"learning_rate": 4.772727272727273e-07, |
|
"logits/chosen": -1.2119545936584473, |
|
"logits/rejected": -1.2175490856170654, |
|
"logps/chosen": -23.42240333557129, |
|
"logps/rejected": -29.862327575683594, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": 0.059619419276714325, |
|
"rewards/margins": 0.15751110017299652, |
|
"rewards/rejected": -0.09789170324802399, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.20307692307692307, |
|
"grad_norm": 49.942474151893265, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.3206286430358887, |
|
"logits/rejected": -1.3300279378890991, |
|
"logps/chosen": -22.983713150024414, |
|
"logps/rejected": -23.000356674194336, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.7083333134651184, |
|
"rewards/chosen": 0.05540511757135391, |
|
"rewards/margins": 0.1078185960650444, |
|
"rewards/rejected": -0.05241347849369049, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2123076923076923, |
|
"grad_norm": 40.96104792630147, |
|
"learning_rate": 4.99967220916408e-07, |
|
"logits/chosen": -1.2594552040100098, |
|
"logits/rejected": -1.270306944847107, |
|
"logps/chosen": -19.131641387939453, |
|
"logps/rejected": -29.00514793395996, |
|
"loss": 0.617, |
|
"rewards/accuracies": 0.7083333134651184, |
|
"rewards/chosen": 0.09214716404676437, |
|
"rewards/margins": 0.24131464958190918, |
|
"rewards/rejected": -0.14916746318340302, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.22153846153846155, |
|
"grad_norm": 44.60792696333844, |
|
"learning_rate": 4.998688922613787e-07, |
|
"logits/chosen": -1.3020961284637451, |
|
"logits/rejected": -1.3101927042007446, |
|
"logps/chosen": -31.274911880493164, |
|
"logps/rejected": -32.11240005493164, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.7083333134651184, |
|
"rewards/chosen": 0.10760927200317383, |
|
"rewards/margins": 0.294413298368454, |
|
"rewards/rejected": -0.18680399656295776, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 43.17860095734465, |
|
"learning_rate": 4.997050398198976e-07, |
|
"logits/chosen": -1.291076421737671, |
|
"logits/rejected": -1.2982360124588013, |
|
"logps/chosen": -22.59940528869629, |
|
"logps/rejected": -22.504961013793945, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.14835722744464874, |
|
"rewards/margins": 0.3006143271923065, |
|
"rewards/rejected": -0.15225709974765778, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 40.923959372883246, |
|
"learning_rate": 4.994757065594279e-07, |
|
"logits/chosen": -1.2361193895339966, |
|
"logits/rejected": -1.2530244588851929, |
|
"logps/chosen": -19.440345764160156, |
|
"logps/rejected": -29.653764724731445, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.12961499392986298, |
|
"rewards/margins": 0.2747644782066345, |
|
"rewards/rejected": -0.14514949917793274, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.24923076923076923, |
|
"grad_norm": 40.97149688332116, |
|
"learning_rate": 4.991809526186423e-07, |
|
"logits/chosen": -1.2297606468200684, |
|
"logits/rejected": -1.25152587890625, |
|
"logps/chosen": -21.388309478759766, |
|
"logps/rejected": -44.34809112548828, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.151195228099823, |
|
"rewards/margins": 0.48822492361068726, |
|
"rewards/rejected": -0.33702969551086426, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.25846153846153846, |
|
"grad_norm": 41.37645783028047, |
|
"learning_rate": 4.988208552916535e-07, |
|
"logits/chosen": -1.2540967464447021, |
|
"logits/rejected": -1.2566981315612793, |
|
"logps/chosen": -22.95637321472168, |
|
"logps/rejected": -23.91745376586914, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.20567570626735687, |
|
"rewards/margins": 0.3446711003780365, |
|
"rewards/rejected": -0.13899540901184082, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2676923076923077, |
|
"grad_norm": 37.07709893155658, |
|
"learning_rate": 4.983955090077444e-07, |
|
"logits/chosen": -1.2924391031265259, |
|
"logits/rejected": -1.2913458347320557, |
|
"logps/chosen": -18.923715591430664, |
|
"logps/rejected": -22.57257843017578, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.16216707229614258, |
|
"rewards/margins": 0.27626025676727295, |
|
"rewards/rejected": -0.11409316956996918, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.27692307692307694, |
|
"grad_norm": 33.00415567764037, |
|
"learning_rate": 4.979050253066063e-07, |
|
"logits/chosen": -1.2263813018798828, |
|
"logits/rejected": -1.2465788125991821, |
|
"logps/chosen": -20.503381729125977, |
|
"logps/rejected": -37.98419189453125, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.7083333134651184, |
|
"rewards/chosen": 0.17731823027133942, |
|
"rewards/margins": 0.593184769153595, |
|
"rewards/rejected": -0.41586652398109436, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.27692307692307694, |
|
"eval_logits/chosen": -1.303908109664917, |
|
"eval_logits/rejected": -1.3140496015548706, |
|
"eval_logps/chosen": -22.596784591674805, |
|
"eval_logps/rejected": -26.880229949951172, |
|
"eval_loss": 0.5301286578178406, |
|
"eval_rewards/accuracies": 0.7718893885612488, |
|
"eval_rewards/chosen": 0.22495588660240173, |
|
"eval_rewards/margins": 0.484672486782074, |
|
"eval_rewards/rejected": -0.259716659784317, |
|
"eval_runtime": 215.7229, |
|
"eval_samples_per_second": 8.038, |
|
"eval_steps_per_second": 2.012, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28615384615384615, |
|
"grad_norm": 32.870504270075905, |
|
"learning_rate": 4.973495328090889e-07, |
|
"logits/chosen": -1.2028117179870605, |
|
"logits/rejected": -1.2163152694702148, |
|
"logps/chosen": -25.100025177001953, |
|
"logps/rejected": -35.97075653076172, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7083333134651184, |
|
"rewards/chosen": 0.20213226974010468, |
|
"rewards/margins": 0.5411441326141357, |
|
"rewards/rejected": -0.33901187777519226, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2953846153846154, |
|
"grad_norm": 38.13033333375434, |
|
"learning_rate": 4.967291771834726e-07, |
|
"logits/chosen": -1.2682946920394897, |
|
"logits/rejected": -1.2830837965011597, |
|
"logps/chosen": -22.399858474731445, |
|
"logps/rejected": -35.47315979003906, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.24411238729953766, |
|
"rewards/margins": 0.7097706198692322, |
|
"rewards/rejected": -0.46565818786621094, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3046153846153846, |
|
"grad_norm": 34.6917991893696, |
|
"learning_rate": 4.960441211072685e-07, |
|
"logits/chosen": -1.240267038345337, |
|
"logits/rejected": -1.2494441270828247, |
|
"logps/chosen": -16.752328872680664, |
|
"logps/rejected": -21.625200271606445, |
|
"loss": 0.52, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.2749379575252533, |
|
"rewards/margins": 0.5106962323188782, |
|
"rewards/rejected": -0.23575833439826965, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.31384615384615383, |
|
"grad_norm": 32.938257449212315, |
|
"learning_rate": 4.952945442245597e-07, |
|
"logits/chosen": -1.282260775566101, |
|
"logits/rejected": -1.2961454391479492, |
|
"logps/chosen": -16.818540573120117, |
|
"logps/rejected": -31.804317474365234, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.20085500180721283, |
|
"rewards/margins": 0.6287386417388916, |
|
"rewards/rejected": -0.42788365483283997, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3230769230769231, |
|
"grad_norm": 36.12880857430109, |
|
"learning_rate": 4.944806430988927e-07, |
|
"logits/chosen": -1.2567392587661743, |
|
"logits/rejected": -1.263179063796997, |
|
"logps/chosen": -23.333267211914062, |
|
"logps/rejected": -24.862985610961914, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.7361111044883728, |
|
"rewards/chosen": 0.22914116084575653, |
|
"rewards/margins": 0.6000176668167114, |
|
"rewards/rejected": -0.3708764612674713, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3323076923076923, |
|
"grad_norm": 31.746333807337315, |
|
"learning_rate": 4.936026311617316e-07, |
|
"logits/chosen": -1.2413491010665894, |
|
"logits/rejected": -1.2490180730819702, |
|
"logps/chosen": -27.870990753173828, |
|
"logps/rejected": -28.86038589477539, |
|
"loss": 0.4797, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.35419517755508423, |
|
"rewards/margins": 0.7417442202568054, |
|
"rewards/rejected": -0.3875490427017212, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3415384615384615, |
|
"grad_norm": 31.965936446320438, |
|
"learning_rate": 4.926607386564898e-07, |
|
"logits/chosen": -1.3071357011795044, |
|
"logits/rejected": -1.3031624555587769, |
|
"logps/chosen": -24.66501808166504, |
|
"logps/rejected": -19.646629333496094, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.3141394257545471, |
|
"rewards/margins": 0.6052231788635254, |
|
"rewards/rejected": -0.29108375310897827, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3507692307692308, |
|
"grad_norm": 30.420218056003396, |
|
"learning_rate": 4.916552125781528e-07, |
|
"logits/chosen": -1.2826448678970337, |
|
"logits/rejected": -1.2921828031539917, |
|
"logps/chosen": -21.71385955810547, |
|
"logps/rejected": -26.265592575073242, |
|
"loss": 0.443, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3491870164871216, |
|
"rewards/margins": 0.7558759450912476, |
|
"rewards/rejected": -0.4066888988018036, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 35.262762131347294, |
|
"learning_rate": 4.905863166085075e-07, |
|
"logits/chosen": -1.2882230281829834, |
|
"logits/rejected": -1.3004416227340698, |
|
"logps/chosen": -25.61620330810547, |
|
"logps/rejected": -26.73788833618164, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.29705706238746643, |
|
"rewards/margins": 0.6734262108802795, |
|
"rewards/rejected": -0.3763691484928131, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.36923076923076925, |
|
"grad_norm": 32.454214562336674, |
|
"learning_rate": 4.894543310469967e-07, |
|
"logits/chosen": -1.292490839958191, |
|
"logits/rejected": -1.3075741529464722, |
|
"logps/chosen": -24.23374366760254, |
|
"logps/rejected": -27.662269592285156, |
|
"loss": 0.4233, |
|
"rewards/accuracies": 0.7222222089767456, |
|
"rewards/chosen": 0.3347330093383789, |
|
"rewards/margins": 0.7462683320045471, |
|
"rewards/rejected": -0.4115353524684906, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36923076923076925, |
|
"eval_logits/chosen": -1.2837809324264526, |
|
"eval_logits/rejected": -1.293448567390442, |
|
"eval_logps/chosen": -22.318069458007812, |
|
"eval_logps/rejected": -27.420156478881836, |
|
"eval_loss": 0.4364205598831177, |
|
"eval_rewards/accuracies": 0.7937787771224976, |
|
"eval_rewards/chosen": 0.3643138110637665, |
|
"eval_rewards/margins": 0.893993616104126, |
|
"eval_rewards/rejected": -0.5296797752380371, |
|
"eval_runtime": 215.7088, |
|
"eval_samples_per_second": 8.039, |
|
"eval_steps_per_second": 2.012, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.37846153846153846, |
|
"grad_norm": 30.100728508551764, |
|
"learning_rate": 4.882595527372152e-07, |
|
"logits/chosen": -1.219198226928711, |
|
"logits/rejected": -1.2316464185714722, |
|
"logps/chosen": -21.758522033691406, |
|
"logps/rejected": -32.21995544433594, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.33725497126579285, |
|
"rewards/margins": 0.9134353995323181, |
|
"rewards/rejected": -0.5761803984642029, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.38769230769230767, |
|
"grad_norm": 27.99260854977849, |
|
"learning_rate": 4.870022949890676e-07, |
|
"logits/chosen": -1.25475013256073, |
|
"logits/rejected": -1.258756160736084, |
|
"logps/chosen": -29.569332122802734, |
|
"logps/rejected": -32.13206481933594, |
|
"loss": 0.4048, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3496508300304413, |
|
"rewards/margins": 1.0080742835998535, |
|
"rewards/rejected": -0.6584234237670898, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.39692307692307693, |
|
"grad_norm": 28.434505768144174, |
|
"learning_rate": 4.856828874966086e-07, |
|
"logits/chosen": -1.2163680791854858, |
|
"logits/rejected": -1.2340407371520996, |
|
"logps/chosen": -18.534114837646484, |
|
"logps/rejected": -36.619850158691406, |
|
"loss": 0.422, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": 0.2995716333389282, |
|
"rewards/margins": 1.0983738899230957, |
|
"rewards/rejected": -0.7988021969795227, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.40615384615384614, |
|
"grad_norm": 28.794469436567187, |
|
"learning_rate": 4.843016762515859e-07, |
|
"logits/chosen": -1.2752939462661743, |
|
"logits/rejected": -1.285552978515625, |
|
"logps/chosen": -21.55384635925293, |
|
"logps/rejected": -30.397226333618164, |
|
"loss": 0.3905, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.37557560205459595, |
|
"rewards/margins": 1.0376694202423096, |
|
"rewards/rejected": -0.6620937585830688, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4153846153846154, |
|
"grad_norm": 24.699190483704957, |
|
"learning_rate": 4.828590234527106e-07, |
|
"logits/chosen": -1.2076385021209717, |
|
"logits/rejected": -1.2378058433532715, |
|
"logps/chosen": -20.13502311706543, |
|
"logps/rejected": -49.50822067260742, |
|
"loss": 0.3616, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.29748064279556274, |
|
"rewards/margins": 1.576164722442627, |
|
"rewards/rejected": -1.2786839008331299, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4246153846153846, |
|
"grad_norm": 24.998257178693006, |
|
"learning_rate": 4.81355307410676e-07, |
|
"logits/chosen": -1.268651008605957, |
|
"logits/rejected": -1.2737505435943604, |
|
"logps/chosen": -21.684688568115234, |
|
"logps/rejected": -20.43457794189453, |
|
"loss": 0.3963, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.701554536819458, |
|
"rewards/margins": 1.2370011806488037, |
|
"rewards/rejected": -0.5354464650154114, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.4338461538461538, |
|
"grad_norm": 30.39233888946852, |
|
"learning_rate": 4.79790922448953e-07, |
|
"logits/chosen": -1.2319780588150024, |
|
"logits/rejected": -1.234665870666504, |
|
"logps/chosen": -22.746065139770508, |
|
"logps/rejected": -37.10270309448242, |
|
"loss": 0.4055, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.33227479457855225, |
|
"rewards/margins": 1.4662950038909912, |
|
"rewards/rejected": -1.1340200901031494, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4430769230769231, |
|
"grad_norm": 28.351607065877335, |
|
"learning_rate": 4.78166278800385e-07, |
|
"logits/chosen": -1.2103080749511719, |
|
"logits/rejected": -1.2216867208480835, |
|
"logps/chosen": -22.36292839050293, |
|
"logps/rejected": -36.19468307495117, |
|
"loss": 0.3633, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.46569257974624634, |
|
"rewards/margins": 1.3663029670715332, |
|
"rewards/rejected": -0.9006102681159973, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4523076923076923, |
|
"grad_norm": 27.63597035013981, |
|
"learning_rate": 4.7648180249961165e-07, |
|
"logits/chosen": -1.2609645128250122, |
|
"logits/rejected": -1.2675108909606934, |
|
"logps/chosen": -19.6772403717041, |
|
"logps/rejected": -22.703941345214844, |
|
"loss": 0.3425, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.42960312962532043, |
|
"rewards/margins": 1.3149679899215698, |
|
"rewards/rejected": -0.8853649497032166, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 27.095171417356656, |
|
"learning_rate": 4.747379352713488e-07, |
|
"logits/chosen": -1.2016191482543945, |
|
"logits/rejected": -1.212724208831787, |
|
"logps/chosen": -26.863676071166992, |
|
"logps/rejected": -35.31084442138672, |
|
"loss": 0.3626, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.43012529611587524, |
|
"rewards/margins": 1.344970703125, |
|
"rewards/rejected": -0.9148455858230591, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"eval_logits/chosen": -1.2631281614303589, |
|
"eval_logits/rejected": -1.2726249694824219, |
|
"eval_logps/chosen": -22.157392501831055, |
|
"eval_logps/rejected": -28.169017791748047, |
|
"eval_loss": 0.3646220564842224, |
|
"eval_rewards/accuracies": 0.7972350120544434, |
|
"eval_rewards/chosen": 0.4446515440940857, |
|
"eval_rewards/margins": 1.348763346672058, |
|
"eval_rewards/rejected": -0.904111921787262, |
|
"eval_runtime": 215.7885, |
|
"eval_samples_per_second": 8.036, |
|
"eval_steps_per_second": 2.011, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4707692307692308, |
|
"grad_norm": 32.35798457566701, |
|
"learning_rate": 4.7293513441455357e-07, |
|
"logits/chosen": -1.2197188138961792, |
|
"logits/rejected": -1.2320291996002197, |
|
"logps/chosen": -19.279041290283203, |
|
"logps/rejected": -35.00586700439453, |
|
"loss": 0.3714, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3205001652240753, |
|
"rewards/margins": 1.3763878345489502, |
|
"rewards/rejected": -1.0558876991271973, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 21.70119714606352, |
|
"learning_rate": 4.7107387268250586e-07, |
|
"logits/chosen": -1.1967614889144897, |
|
"logits/rejected": -1.220970630645752, |
|
"logps/chosen": -10.033695220947266, |
|
"logps/rejected": -38.51593017578125, |
|
"loss": 0.3835, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.4108971059322357, |
|
"rewards/margins": 1.6398005485534668, |
|
"rewards/rejected": -1.2289036512374878, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.48923076923076925, |
|
"grad_norm": 22.839162689384967, |
|
"learning_rate": 4.691546381588369e-07, |
|
"logits/chosen": -1.2221455574035645, |
|
"logits/rejected": -1.2347490787506104, |
|
"logps/chosen": -20.123445510864258, |
|
"logps/rejected": -34.73093032836914, |
|
"loss": 0.3528, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.3299613296985626, |
|
"rewards/margins": 1.6646933555603027, |
|
"rewards/rejected": -1.3347320556640625, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.49846153846153846, |
|
"grad_norm": 30.91989303041632, |
|
"learning_rate": 4.6717793412953776e-07, |
|
"logits/chosen": -1.2001112699508667, |
|
"logits/rejected": -1.2213759422302246, |
|
"logps/chosen": -18.639766693115234, |
|
"logps/rejected": -38.698211669921875, |
|
"loss": 0.3751, |
|
"rewards/accuracies": 0.7361111044883728, |
|
"rewards/chosen": 0.3170078694820404, |
|
"rewards/margins": 1.7733925580978394, |
|
"rewards/rejected": -1.456384539604187, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5076923076923077, |
|
"grad_norm": 22.21075058785491, |
|
"learning_rate": 4.651442789509813e-07, |
|
"logits/chosen": -1.172301173210144, |
|
"logits/rejected": -1.1873422861099243, |
|
"logps/chosen": -19.037778854370117, |
|
"logps/rejected": -35.6918830871582, |
|
"loss": 0.3632, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.44801807403564453, |
|
"rewards/margins": 1.6537230014801025, |
|
"rewards/rejected": -1.2057050466537476, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5169230769230769, |
|
"grad_norm": 22.23191382020911, |
|
"learning_rate": 4.630542059139923e-07, |
|
"logits/chosen": -1.1621766090393066, |
|
"logits/rejected": -1.1781913042068481, |
|
"logps/chosen": -26.200401306152344, |
|
"logps/rejected": -28.19536590576172, |
|
"loss": 0.3117, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.4852801561355591, |
|
"rewards/margins": 1.5631003379821777, |
|
"rewards/rejected": -1.0778203010559082, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5261538461538462, |
|
"grad_norm": 26.06519967082825, |
|
"learning_rate": 4.609082631040011e-07, |
|
"logits/chosen": -1.1710741519927979, |
|
"logits/rejected": -1.1770610809326172, |
|
"logps/chosen": -26.139328002929688, |
|
"logps/rejected": -38.44914627075195, |
|
"loss": 0.3191, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.42665359377861023, |
|
"rewards/margins": 1.9680951833724976, |
|
"rewards/rejected": -1.5414414405822754, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5353846153846153, |
|
"grad_norm": 23.76055177774163, |
|
"learning_rate": 4.5870701325731773e-07, |
|
"logits/chosen": -1.1841078996658325, |
|
"logits/rejected": -1.2016386985778809, |
|
"logps/chosen": -18.3129940032959, |
|
"logps/rejected": -38.7909049987793, |
|
"loss": 0.3422, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.31725624203681946, |
|
"rewards/margins": 1.8888146877288818, |
|
"rewards/rejected": -1.5715583562850952, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5446153846153846, |
|
"grad_norm": 22.451458526325442, |
|
"learning_rate": 4.5645103361356407e-07, |
|
"logits/chosen": -1.203595519065857, |
|
"logits/rejected": -1.1993364095687866, |
|
"logps/chosen": -29.456233978271484, |
|
"logps/rejected": -24.436891555786133, |
|
"loss": 0.3111, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.4006561040878296, |
|
"rewards/margins": 1.460686206817627, |
|
"rewards/rejected": -1.0600301027297974, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5538461538461539, |
|
"grad_norm": 20.899441336146108, |
|
"learning_rate": 4.541409157643027e-07, |
|
"logits/chosen": -1.113027811050415, |
|
"logits/rejected": -1.1339952945709229, |
|
"logps/chosen": -22.780738830566406, |
|
"logps/rejected": -37.4469108581543, |
|
"loss": 0.263, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5090766549110413, |
|
"rewards/margins": 2.038201332092285, |
|
"rewards/rejected": -1.5291246175765991, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5538461538461539, |
|
"eval_logits/chosen": -1.2401551008224487, |
|
"eval_logits/rejected": -1.249323844909668, |
|
"eval_logps/chosen": -22.120243072509766, |
|
"eval_logps/rejected": -28.963603973388672, |
|
"eval_loss": 0.32304224371910095, |
|
"eval_rewards/accuracies": 0.8122119903564453, |
|
"eval_rewards/chosen": 0.46322670578956604, |
|
"eval_rewards/margins": 1.764631986618042, |
|
"eval_rewards/rejected": -1.3014051914215088, |
|
"eval_runtime": 215.8398, |
|
"eval_samples_per_second": 8.034, |
|
"eval_steps_per_second": 2.011, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.563076923076923, |
|
"grad_norm": 25.722122527925197, |
|
"learning_rate": 4.517772654979023e-07, |
|
"logits/chosen": -1.1628613471984863, |
|
"logits/rejected": -1.1666890382766724, |
|
"logps/chosen": -28.28006935119629, |
|
"logps/rejected": -32.06778335571289, |
|
"loss": 0.2967, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.42497023940086365, |
|
"rewards/margins": 1.8420732021331787, |
|
"rewards/rejected": -1.4171031713485718, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5723076923076923, |
|
"grad_norm": 18.859437245079093, |
|
"learning_rate": 4.4936070264068016e-07, |
|
"logits/chosen": -1.097366452217102, |
|
"logits/rejected": -1.1257672309875488, |
|
"logps/chosen": -19.26881217956543, |
|
"logps/rejected": -50.698387145996094, |
|
"loss": 0.3122, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.3975294530391693, |
|
"rewards/margins": 2.376965045928955, |
|
"rewards/rejected": -1.9794355630874634, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5815384615384616, |
|
"grad_norm": 24.12611784808478, |
|
"learning_rate": 4.468918608943636e-07, |
|
"logits/chosen": -1.188425064086914, |
|
"logits/rejected": -1.2095468044281006, |
|
"logps/chosen": -22.594573974609375, |
|
"logps/rejected": -33.808677673339844, |
|
"loss": 0.2989, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.4648338854312897, |
|
"rewards/margins": 2.128401756286621, |
|
"rewards/rejected": -1.6635680198669434, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5907692307692308, |
|
"grad_norm": 21.121113872126465, |
|
"learning_rate": 4.443713876699123e-07, |
|
"logits/chosen": -1.176856279373169, |
|
"logits/rejected": -1.175789713859558, |
|
"logps/chosen": -31.682504653930664, |
|
"logps/rejected": -26.862850189208984, |
|
"loss": 0.2881, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.47753646969795227, |
|
"rewards/margins": 1.661524772644043, |
|
"rewards/rejected": -1.183988332748413, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 24.221092280098347, |
|
"learning_rate": 4.417999439177465e-07, |
|
"logits/chosen": -1.1786390542984009, |
|
"logits/rejected": -1.1881896257400513, |
|
"logps/chosen": -18.69803237915039, |
|
"logps/rejected": -28.687692642211914, |
|
"loss": 0.2737, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.5532296895980835, |
|
"rewards/margins": 2.0457603931427, |
|
"rewards/rejected": -1.4925308227539062, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6092307692307692, |
|
"grad_norm": 19.171893778962126, |
|
"learning_rate": 4.391782039544238e-07, |
|
"logits/chosen": -1.2097636461257935, |
|
"logits/rejected": -1.2146636247634888, |
|
"logps/chosen": -19.53115463256836, |
|
"logps/rejected": -19.350337982177734, |
|
"loss": 0.3284, |
|
"rewards/accuracies": 0.7083333134651184, |
|
"rewards/chosen": 0.28336918354034424, |
|
"rewards/margins": 1.5194146633148193, |
|
"rewards/rejected": -1.236045479774475, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6184615384615385, |
|
"grad_norm": 22.368959777821875, |
|
"learning_rate": 4.365068552858115e-07, |
|
"logits/chosen": -1.2042018175125122, |
|
"logits/rejected": -1.2163949012756348, |
|
"logps/chosen": -24.11139488220215, |
|
"logps/rejected": -33.35640335083008, |
|
"loss": 0.3137, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.1719236522912979, |
|
"rewards/margins": 1.7209672927856445, |
|
"rewards/rejected": -1.5490436553955078, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6276923076923077, |
|
"grad_norm": 17.354174303387865, |
|
"learning_rate": 4.337865984268001e-07, |
|
"logits/chosen": -1.1561534404754639, |
|
"logits/rejected": -1.1622954607009888, |
|
"logps/chosen": -15.14254093170166, |
|
"logps/rejected": -27.18238067626953, |
|
"loss": 0.2954, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.34695935249328613, |
|
"rewards/margins": 1.897645115852356, |
|
"rewards/rejected": -1.5506855249404907, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6369230769230769, |
|
"grad_norm": 14.475969356318869, |
|
"learning_rate": 4.310181467176054e-07, |
|
"logits/chosen": -1.1768825054168701, |
|
"logits/rejected": -1.1757102012634277, |
|
"logps/chosen": -25.93258285522461, |
|
"logps/rejected": -32.286590576171875, |
|
"loss": 0.2914, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.42600950598716736, |
|
"rewards/margins": 2.0175862312316895, |
|
"rewards/rejected": -1.5915768146514893, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6461538461538462, |
|
"grad_norm": 18.34569474287581, |
|
"learning_rate": 4.282022261367073e-07, |
|
"logits/chosen": -1.2166173458099365, |
|
"logits/rejected": -1.2223114967346191, |
|
"logps/chosen": -20.700721740722656, |
|
"logps/rejected": -25.006229400634766, |
|
"loss": 0.2717, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.5470355749130249, |
|
"rewards/margins": 1.990134358406067, |
|
"rewards/rejected": -1.4430986642837524, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6461538461538462, |
|
"eval_logits/chosen": -1.221505880355835, |
|
"eval_logits/rejected": -1.2305463552474976, |
|
"eval_logps/chosen": -22.114253997802734, |
|
"eval_logps/rejected": -29.54737663269043, |
|
"eval_loss": 0.29700523614883423, |
|
"eval_rewards/accuracies": 0.8179723620414734, |
|
"eval_rewards/chosen": 0.46622127294540405, |
|
"eval_rewards/margins": 2.0595133304595947, |
|
"eval_rewards/rejected": -1.5932921171188354, |
|
"eval_runtime": 215.9245, |
|
"eval_samples_per_second": 8.031, |
|
"eval_steps_per_second": 2.01, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6553846153846153, |
|
"grad_norm": 24.003361700026115, |
|
"learning_rate": 4.253395751104748e-07, |
|
"logits/chosen": -1.2128342390060425, |
|
"logits/rejected": -1.2202144861221313, |
|
"logps/chosen": -20.926525115966797, |
|
"logps/rejected": -33.759159088134766, |
|
"loss": 0.2796, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.4563888907432556, |
|
"rewards/margins": 2.332362413406372, |
|
"rewards/rejected": -1.8759733438491821, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6646153846153846, |
|
"grad_norm": 22.96956018291041, |
|
"learning_rate": 4.2243094431952607e-07, |
|
"logits/chosen": -1.1733120679855347, |
|
"logits/rejected": -1.1876205205917358, |
|
"logps/chosen": -20.787324905395508, |
|
"logps/rejected": -44.41487503051758, |
|
"loss": 0.2904, |
|
"rewards/accuracies": 0.7638888955116272, |
|
"rewards/chosen": 0.4227790832519531, |
|
"rewards/margins": 2.513406753540039, |
|
"rewards/rejected": -2.090627431869507, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6738461538461539, |
|
"grad_norm": 20.337910027315395, |
|
"learning_rate": 4.194770965018758e-07, |
|
"logits/chosen": -1.1829084157943726, |
|
"logits/rejected": -1.1901525259017944, |
|
"logps/chosen": -22.88217544555664, |
|
"logps/rejected": -40.51693344116211, |
|
"loss": 0.2982, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.32644984126091003, |
|
"rewards/margins": 2.2273294925689697, |
|
"rewards/rejected": -1.9008797407150269, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.683076923076923, |
|
"grad_norm": 16.955507402789948, |
|
"learning_rate": 4.1647880625292027e-07, |
|
"logits/chosen": -1.1585676670074463, |
|
"logits/rejected": -1.1673483848571777, |
|
"logps/chosen": -17.565954208374023, |
|
"logps/rejected": -30.01752471923828, |
|
"loss": 0.2381, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.6770419478416443, |
|
"rewards/margins": 2.5649421215057373, |
|
"rewards/rejected": -1.8879002332687378, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6923076923076923, |
|
"grad_norm": 16.268353553690783, |
|
"learning_rate": 4.1343685982231315e-07, |
|
"logits/chosen": -1.2300368547439575, |
|
"logits/rejected": -1.2412070035934448, |
|
"logps/chosen": -19.158246994018555, |
|
"logps/rejected": -30.00787353515625, |
|
"loss": 0.2576, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.28651073575019836, |
|
"rewards/margins": 2.1342878341674805, |
|
"rewards/rejected": -1.8477774858474731, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7015384615384616, |
|
"grad_norm": 22.707867679754226, |
|
"learning_rate": 4.1035205490778496e-07, |
|
"logits/chosen": -1.1675605773925781, |
|
"logits/rejected": -1.1745511293411255, |
|
"logps/chosen": -24.983802795410156, |
|
"logps/rejected": -32.00082015991211, |
|
"loss": 0.3007, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.4517359137535095, |
|
"rewards/margins": 2.2256662845611572, |
|
"rewards/rejected": -1.7739304304122925, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.7107692307692308, |
|
"grad_norm": 17.503865371681442, |
|
"learning_rate": 4.072252004459611e-07, |
|
"logits/chosen": -1.1371846199035645, |
|
"logits/rejected": -1.1358321905136108, |
|
"logps/chosen": -26.079011917114258, |
|
"logps/rejected": -27.951416015625, |
|
"loss": 0.2471, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.44966569542884827, |
|
"rewards/margins": 2.104396104812622, |
|
"rewards/rejected": -1.6547303199768066, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 15.32657259953523, |
|
"learning_rate": 4.040571164002318e-07, |
|
"logits/chosen": -1.189456820487976, |
|
"logits/rejected": -1.1948577165603638, |
|
"logps/chosen": -20.083751678466797, |
|
"logps/rejected": -30.10634994506836, |
|
"loss": 0.2351, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.42890670895576477, |
|
"rewards/margins": 2.341860771179199, |
|
"rewards/rejected": -1.9129540920257568, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7292307692307692, |
|
"grad_norm": 17.946669808646828, |
|
"learning_rate": 4.0084863354573116e-07, |
|
"logits/chosen": -1.1215004920959473, |
|
"logits/rejected": -1.1300181150436401, |
|
"logps/chosen": -23.436655044555664, |
|
"logps/rejected": -34.97710418701172, |
|
"loss": 0.2706, |
|
"rewards/accuracies": 0.9444444179534912, |
|
"rewards/chosen": 0.22723568975925446, |
|
"rewards/margins": 2.1446826457977295, |
|
"rewards/rejected": -1.9174467325210571, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7384615384615385, |
|
"grad_norm": 16.72039592892195, |
|
"learning_rate": 3.9760059325148063e-07, |
|
"logits/chosen": -1.2237818241119385, |
|
"logits/rejected": -1.2211045026779175, |
|
"logps/chosen": -24.31806755065918, |
|
"logps/rejected": -25.250701904296875, |
|
"loss": 0.2351, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.4868224859237671, |
|
"rewards/margins": 2.124577522277832, |
|
"rewards/rejected": -1.637755274772644, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7384615384615385, |
|
"eval_logits/chosen": -1.2072025537490845, |
|
"eval_logits/rejected": -1.216115951538086, |
|
"eval_logps/chosen": -22.174776077270508, |
|
"eval_logps/rejected": -30.134973526000977, |
|
"eval_loss": 0.27949145436286926, |
|
"eval_rewards/accuracies": 0.8248847723007202, |
|
"eval_rewards/chosen": 0.4359608590602875, |
|
"eval_rewards/margins": 2.3230507373809814, |
|
"eval_rewards/rejected": -1.8870899677276611, |
|
"eval_runtime": 216.1181, |
|
"eval_samples_per_second": 8.023, |
|
"eval_steps_per_second": 2.008, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7476923076923077, |
|
"grad_norm": 16.877732796497064, |
|
"learning_rate": 3.9431384725975485e-07, |
|
"logits/chosen": -1.1728930473327637, |
|
"logits/rejected": -1.1828408241271973, |
|
"logps/chosen": -20.051979064941406, |
|
"logps/rejected": -30.078739166259766, |
|
"loss": 0.2806, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.4627165198326111, |
|
"rewards/margins": 2.1041107177734375, |
|
"rewards/rejected": -1.641394019126892, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7569230769230769, |
|
"grad_norm": 17.236677422360824, |
|
"learning_rate": 3.909892574627266e-07, |
|
"logits/chosen": -1.1840589046478271, |
|
"logits/rejected": -1.205323338508606, |
|
"logps/chosen": -20.25952911376953, |
|
"logps/rejected": -43.16006851196289, |
|
"loss": 0.267, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.34341666102409363, |
|
"rewards/margins": 2.8926875591278076, |
|
"rewards/rejected": -2.5492708683013916, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7661538461538462, |
|
"grad_norm": 15.084626056041332, |
|
"learning_rate": 3.876276956764509e-07, |
|
"logits/chosen": -1.172157883644104, |
|
"logits/rejected": -1.1869869232177734, |
|
"logps/chosen": -20.39401626586914, |
|
"logps/rejected": -35.54499816894531, |
|
"loss": 0.2191, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.533491313457489, |
|
"rewards/margins": 3.2933194637298584, |
|
"rewards/rejected": -2.7598280906677246, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7753846153846153, |
|
"grad_norm": 16.522846792297653, |
|
"learning_rate": 3.8423004341224595e-07, |
|
"logits/chosen": -1.1675995588302612, |
|
"logits/rejected": -1.1726378202438354, |
|
"logps/chosen": -22.266756057739258, |
|
"logps/rejected": -27.90992546081543, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3478531837463379, |
|
"rewards/margins": 2.3764336109161377, |
|
"rewards/rejected": -2.028580665588379, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7846153846153846, |
|
"grad_norm": 18.709310219062342, |
|
"learning_rate": 3.807971916455325e-07, |
|
"logits/chosen": -1.1257578134536743, |
|
"logits/rejected": -1.1353437900543213, |
|
"logps/chosen": -25.48769187927246, |
|
"logps/rejected": -37.34423065185547, |
|
"loss": 0.2439, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.30796098709106445, |
|
"rewards/margins": 2.5804708003997803, |
|
"rewards/rejected": -2.2725095748901367, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7938461538461539, |
|
"grad_norm": 18.811516964897933, |
|
"learning_rate": 3.773300405821908e-07, |
|
"logits/chosen": -1.2032923698425293, |
|
"logits/rejected": -1.1944453716278076, |
|
"logps/chosen": -22.42747688293457, |
|
"logps/rejected": -24.809179306030273, |
|
"loss": 0.2706, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.4598681628704071, |
|
"rewards/margins": 2.331010103225708, |
|
"rewards/rejected": -1.871142029762268, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.803076923076923, |
|
"grad_norm": 27.213611533570646, |
|
"learning_rate": 3.738294994224969e-07, |
|
"logits/chosen": -1.1406216621398926, |
|
"logits/rejected": -1.1456246376037598, |
|
"logps/chosen": -22.41916847229004, |
|
"logps/rejected": -25.79179573059082, |
|
"loss": 0.2525, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5410938858985901, |
|
"rewards/margins": 2.5380003452301025, |
|
"rewards/rejected": -1.9969062805175781, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8123076923076923, |
|
"grad_norm": 22.120419375719585, |
|
"learning_rate": 3.7029648612270123e-07, |
|
"logits/chosen": -1.1604636907577515, |
|
"logits/rejected": -1.166500210762024, |
|
"logps/chosen": -23.140409469604492, |
|
"logps/rejected": -32.539859771728516, |
|
"loss": 0.2445, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.5552553534507751, |
|
"rewards/margins": 2.451958656311035, |
|
"rewards/rejected": -1.8967031240463257, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.8215384615384616, |
|
"grad_norm": 23.529456123726142, |
|
"learning_rate": 3.6673192715431014e-07, |
|
"logits/chosen": -1.172749638557434, |
|
"logits/rejected": -1.1873490810394287, |
|
"logps/chosen": -19.344928741455078, |
|
"logps/rejected": -46.30924987792969, |
|
"loss": 0.2576, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.3556906580924988, |
|
"rewards/margins": 3.198575973510742, |
|
"rewards/rejected": -2.8428850173950195, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.8307692307692308, |
|
"grad_norm": 16.07954647927614, |
|
"learning_rate": 3.6313675726113475e-07, |
|
"logits/chosen": -1.1696263551712036, |
|
"logits/rejected": -1.1719523668289185, |
|
"logps/chosen": -24.40313148498535, |
|
"logps/rejected": -30.179893493652344, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.5325056314468384, |
|
"rewards/margins": 2.6024298667907715, |
|
"rewards/rejected": -2.0699243545532227, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8307692307692308, |
|
"eval_logits/chosen": -1.1957546472549438, |
|
"eval_logits/rejected": -1.2044621706008911, |
|
"eval_logps/chosen": -22.226091384887695, |
|
"eval_logps/rejected": -30.679323196411133, |
|
"eval_loss": 0.2662460505962372, |
|
"eval_rewards/accuracies": 0.8271889686584473, |
|
"eval_rewards/chosen": 0.4103015661239624, |
|
"eval_rewards/margins": 2.569566011428833, |
|
"eval_rewards/rejected": -2.15926456451416, |
|
"eval_runtime": 216.1605, |
|
"eval_samples_per_second": 8.022, |
|
"eval_steps_per_second": 2.008, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 12.027824441881227, |
|
"learning_rate": 3.595119192141706e-07, |
|
"logits/chosen": -1.1798688173294067, |
|
"logits/rejected": -1.190478801727295, |
|
"logps/chosen": -23.84467315673828, |
|
"logps/rejected": -27.77214241027832, |
|
"loss": 0.1945, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5185620784759521, |
|
"rewards/margins": 2.7370386123657227, |
|
"rewards/rejected": -2.2184765338897705, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8492307692307692, |
|
"grad_norm": 21.657852790803656, |
|
"learning_rate": 3.558583635643726e-07, |
|
"logits/chosen": -1.1619257926940918, |
|
"logits/rejected": -1.1783702373504639, |
|
"logps/chosen": -20.357545852661133, |
|
"logps/rejected": -36.6799430847168, |
|
"loss": 0.2859, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.39101898670196533, |
|
"rewards/margins": 2.5226354598999023, |
|
"rewards/rejected": -2.1316165924072266, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8584615384615385, |
|
"grad_norm": 15.850729398525738, |
|
"learning_rate": 3.5217704839338905e-07, |
|
"logits/chosen": -1.2039780616760254, |
|
"logits/rejected": -1.2015321254730225, |
|
"logps/chosen": -25.71788787841797, |
|
"logps/rejected": -29.20301628112793, |
|
"loss": 0.2245, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.43592390418052673, |
|
"rewards/margins": 2.691300392150879, |
|
"rewards/rejected": -2.2553763389587402, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8676923076923077, |
|
"grad_norm": 20.33987602806827, |
|
"learning_rate": 3.484689390623218e-07, |
|
"logits/chosen": -1.173121452331543, |
|
"logits/rejected": -1.1853346824645996, |
|
"logps/chosen": -21.594472885131836, |
|
"logps/rejected": -36.92512130737305, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.22467082738876343, |
|
"rewards/margins": 2.8943564891815186, |
|
"rewards/rejected": -2.6696856021881104, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8769230769230769, |
|
"grad_norm": 15.456781978721555, |
|
"learning_rate": 3.447350079585767e-07, |
|
"logits/chosen": -1.20560884475708, |
|
"logits/rejected": -1.2095773220062256, |
|
"logps/chosen": -18.067840576171875, |
|
"logps/rejected": -24.3345890045166, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.2674013674259186, |
|
"rewards/margins": 2.3308472633361816, |
|
"rewards/rejected": -2.063445568084717, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8861538461538462, |
|
"grad_norm": 24.575966523755373, |
|
"learning_rate": 3.409762342408719e-07, |
|
"logits/chosen": -1.1767027378082275, |
|
"logits/rejected": -1.1829452514648438, |
|
"logps/chosen": -23.147159576416016, |
|
"logps/rejected": -38.63761901855469, |
|
"loss": 0.3063, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.2949807345867157, |
|
"rewards/margins": 2.8994204998016357, |
|
"rewards/rejected": -2.6044397354125977, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8953846153846153, |
|
"grad_norm": 13.903082439233941, |
|
"learning_rate": 3.3719360358247053e-07, |
|
"logits/chosen": -1.1678471565246582, |
|
"logits/rejected": -1.1855759620666504, |
|
"logps/chosen": -19.064098358154297, |
|
"logps/rejected": -36.09113693237305, |
|
"loss": 0.288, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.4278064966201782, |
|
"rewards/margins": 2.7983500957489014, |
|
"rewards/rejected": -2.3705434799194336, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.9046153846153846, |
|
"grad_norm": 16.97717210575951, |
|
"learning_rate": 3.3338810791270517e-07, |
|
"logits/chosen": -1.1488627195358276, |
|
"logits/rejected": -1.161072015762329, |
|
"logps/chosen": -16.16121482849121, |
|
"logps/rejected": -35.24711608886719, |
|
"loss": 0.2587, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.2668210566043854, |
|
"rewards/margins": 2.758829116821289, |
|
"rewards/rejected": -2.4920082092285156, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.9138461538461539, |
|
"grad_norm": 21.684346277519417, |
|
"learning_rate": 3.29560745156861e-07, |
|
"logits/chosen": -1.1681840419769287, |
|
"logits/rejected": -1.1707243919372559, |
|
"logps/chosen": -27.238510131835938, |
|
"logps/rejected": -29.843427658081055, |
|
"loss": 0.2945, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.487039715051651, |
|
"rewards/margins": 2.7937545776367188, |
|
"rewards/rejected": -2.3067147731781006, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 15.010044100424757, |
|
"learning_rate": 3.2571251897448763e-07, |
|
"logits/chosen": -1.1483420133590698, |
|
"logits/rejected": -1.172219157218933, |
|
"logps/chosen": -20.701204299926758, |
|
"logps/rejected": -47.092777252197266, |
|
"loss": 0.2393, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.5242102742195129, |
|
"rewards/margins": 3.446150302886963, |
|
"rewards/rejected": -2.9219398498535156, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_logits/chosen": -1.188868761062622, |
|
"eval_logits/rejected": -1.1974678039550781, |
|
"eval_logps/chosen": -22.205198287963867, |
|
"eval_logps/rejected": -30.90268325805664, |
|
"eval_loss": 0.25766730308532715, |
|
"eval_rewards/accuracies": 0.8306451439857483, |
|
"eval_rewards/chosen": 0.42075031995773315, |
|
"eval_rewards/margins": 2.6916959285736084, |
|
"eval_rewards/rejected": -2.2709455490112305, |
|
"eval_runtime": 216.204, |
|
"eval_samples_per_second": 8.02, |
|
"eval_steps_per_second": 2.007, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9323076923076923, |
|
"grad_norm": 24.918463307740545, |
|
"learning_rate": 3.218444384962071e-07, |
|
"logits/chosen": -1.1572585105895996, |
|
"logits/rejected": -1.1649041175842285, |
|
"logps/chosen": -20.337928771972656, |
|
"logps/rejected": -25.251022338867188, |
|
"loss": 0.2872, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.051526255905628204, |
|
"rewards/margins": 2.169602155685425, |
|
"rewards/rejected": -2.1180758476257324, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9415384615384615, |
|
"grad_norm": 17.132653548760572, |
|
"learning_rate": 3.179575180590857e-07, |
|
"logits/chosen": -1.1708558797836304, |
|
"logits/rejected": -1.1774191856384277, |
|
"logps/chosen": -16.72760772705078, |
|
"logps/rejected": -29.532522201538086, |
|
"loss": 0.2703, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.3555985391139984, |
|
"rewards/margins": 2.5367255210876465, |
|
"rewards/rejected": -2.1811270713806152, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9507692307692308, |
|
"grad_norm": 18.808695685272248, |
|
"learning_rate": 3.1405277694064305e-07, |
|
"logits/chosen": -1.13996422290802, |
|
"logits/rejected": -1.1603398323059082, |
|
"logps/chosen": -20.1070613861084, |
|
"logps/rejected": -43.8044319152832, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3765062689781189, |
|
"rewards/margins": 3.3217618465423584, |
|
"rewards/rejected": -2.9452552795410156, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 29.593271367025817, |
|
"learning_rate": 3.101312390915634e-07, |
|
"logits/chosen": -1.1117515563964844, |
|
"logits/rejected": -1.1254826784133911, |
|
"logps/chosen": -18.95772933959961, |
|
"logps/rejected": -38.70570373535156, |
|
"loss": 0.2626, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.19062408804893494, |
|
"rewards/margins": 2.819202423095703, |
|
"rewards/rejected": -2.6285784244537354, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.9692307692307692, |
|
"grad_norm": 19.2158248846026, |
|
"learning_rate": 3.0619393286718237e-07, |
|
"logits/chosen": -1.1758193969726562, |
|
"logits/rejected": -1.18528413772583, |
|
"logps/chosen": -25.30388069152832, |
|
"logps/rejected": -24.64061737060547, |
|
"loss": 0.2715, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.22175876796245575, |
|
"rewards/margins": 2.10679292678833, |
|
"rewards/rejected": -1.8850340843200684, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9784615384615385, |
|
"grad_norm": 23.720067200725047, |
|
"learning_rate": 3.022418907578188e-07, |
|
"logits/chosen": -1.1191242933273315, |
|
"logits/rejected": -1.1329889297485352, |
|
"logps/chosen": -25.677099227905273, |
|
"logps/rejected": -39.06088638305664, |
|
"loss": 0.2898, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.20263215899467468, |
|
"rewards/margins": 3.0255513191223145, |
|
"rewards/rejected": -2.8229193687438965, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9876923076923076, |
|
"grad_norm": 15.354779350521344, |
|
"learning_rate": 2.98276149118022e-07, |
|
"logits/chosen": -1.1088786125183105, |
|
"logits/rejected": -1.1292033195495605, |
|
"logps/chosen": -24.54433250427246, |
|
"logps/rejected": -38.054649353027344, |
|
"loss": 0.2164, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.5917240381240845, |
|
"rewards/margins": 3.370425224304199, |
|
"rewards/rejected": -2.7787015438079834, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9969230769230769, |
|
"grad_norm": 15.922459499539187, |
|
"learning_rate": 2.942977478948057e-07, |
|
"logits/chosen": -1.134361743927002, |
|
"logits/rejected": -1.1381641626358032, |
|
"logps/chosen": -29.736419677734375, |
|
"logps/rejected": -34.28538513183594, |
|
"loss": 0.209, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.47491705417633057, |
|
"rewards/margins": 3.0054473876953125, |
|
"rewards/rejected": -2.5305304527282715, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.0061538461538462, |
|
"grad_norm": 14.602088714669993, |
|
"learning_rate": 2.903077303549399e-07, |
|
"logits/chosen": -1.1926045417785645, |
|
"logits/rejected": -1.2005811929702759, |
|
"logps/chosen": -21.338937759399414, |
|
"logps/rejected": -31.98470115661621, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.49925586581230164, |
|
"rewards/margins": 3.034120559692383, |
|
"rewards/rejected": -2.534864664077759, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.0153846153846153, |
|
"grad_norm": 12.776565445469831, |
|
"learning_rate": 2.863071428113726e-07, |
|
"logits/chosen": -1.180498719215393, |
|
"logits/rejected": -1.1876842975616455, |
|
"logps/chosen": -21.977970123291016, |
|
"logps/rejected": -26.06908416748047, |
|
"loss": 0.2223, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.23817205429077148, |
|
"rewards/margins": 2.4826109409332275, |
|
"rewards/rejected": -2.244438409805298, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0153846153846153, |
|
"eval_logits/chosen": -1.1809991598129272, |
|
"eval_logits/rejected": -1.189637303352356, |
|
"eval_logps/chosen": -22.231857299804688, |
|
"eval_logps/rejected": -31.20700454711914, |
|
"eval_loss": 0.25129908323287964, |
|
"eval_rewards/accuracies": 0.8329492807388306, |
|
"eval_rewards/chosen": 0.4074196219444275, |
|
"eval_rewards/margins": 2.8305253982543945, |
|
"eval_rewards/rejected": -2.4231057167053223, |
|
"eval_runtime": 216.0555, |
|
"eval_samples_per_second": 8.026, |
|
"eval_steps_per_second": 2.009, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0246153846153847, |
|
"grad_norm": 14.54877776678067, |
|
"learning_rate": 2.822970343488516e-07, |
|
"logits/chosen": -1.1495935916900635, |
|
"logits/rejected": -1.1574082374572754, |
|
"logps/chosen": -25.172189712524414, |
|
"logps/rejected": -33.7739372253418, |
|
"loss": 0.224, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.4095478355884552, |
|
"rewards/margins": 2.9969334602355957, |
|
"rewards/rejected": -2.587385416030884, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.0338461538461539, |
|
"grad_norm": 12.987637533805088, |
|
"learning_rate": 2.782784565488211e-07, |
|
"logits/chosen": -1.09419846534729, |
|
"logits/rejected": -1.1150177717208862, |
|
"logps/chosen": -21.80037498474121, |
|
"logps/rejected": -47.742916107177734, |
|
"loss": 0.2056, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": 0.49535179138183594, |
|
"rewards/margins": 4.081587314605713, |
|
"rewards/rejected": -3.5862362384796143, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.043076923076923, |
|
"grad_norm": 12.537917774467841, |
|
"learning_rate": 2.7425246321366205e-07, |
|
"logits/chosen": -1.1532597541809082, |
|
"logits/rejected": -1.1558729410171509, |
|
"logps/chosen": -23.903770446777344, |
|
"logps/rejected": -22.89252471923828, |
|
"loss": 0.2188, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.5978649258613586, |
|
"rewards/margins": 2.4770026206970215, |
|
"rewards/rejected": -1.8791378736495972, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.0523076923076924, |
|
"grad_norm": 11.390266637295149, |
|
"learning_rate": 2.7022011009035107e-07, |
|
"logits/chosen": -1.1780048608779907, |
|
"logits/rejected": -1.1780657768249512, |
|
"logps/chosen": -20.99365997314453, |
|
"logps/rejected": -35.256507873535156, |
|
"loss": 0.1785, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.25620290637016296, |
|
"rewards/margins": 3.1927871704101562, |
|
"rewards/rejected": -2.936584234237671, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.0615384615384615, |
|
"grad_norm": 13.274197122497501, |
|
"learning_rate": 2.661824545936089e-07, |
|
"logits/chosen": -1.1301528215408325, |
|
"logits/rejected": -1.141854166984558, |
|
"logps/chosen": -22.90785789489746, |
|
"logps/rejected": -39.776309967041016, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": 0.23726129531860352, |
|
"rewards/margins": 3.6220147609710693, |
|
"rewards/rejected": -3.3847532272338867, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0707692307692307, |
|
"grad_norm": 11.899842789993972, |
|
"learning_rate": 2.621405555286121e-07, |
|
"logits/chosen": -1.1494054794311523, |
|
"logits/rejected": -1.158327579498291, |
|
"logps/chosen": -27.49151611328125, |
|
"logps/rejected": -33.164703369140625, |
|
"loss": 0.1801, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.473955363035202, |
|
"rewards/margins": 3.1881282329559326, |
|
"rewards/rejected": -2.7141730785369873, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 12.024964222481547, |
|
"learning_rate": 2.58095472813339e-07, |
|
"logits/chosen": -1.1302716732025146, |
|
"logits/rejected": -1.1499823331832886, |
|
"logps/chosen": -25.619178771972656, |
|
"logps/rejected": -35.781768798828125, |
|
"loss": 0.1808, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.593082070350647, |
|
"rewards/margins": 3.549994468688965, |
|
"rewards/rejected": -2.9569127559661865, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.0892307692307692, |
|
"grad_norm": 16.982420323384893, |
|
"learning_rate": 2.540482672006254e-07, |
|
"logits/chosen": -1.1983014345169067, |
|
"logits/rejected": -1.2088627815246582, |
|
"logps/chosen": -20.2447566986084, |
|
"logps/rejected": -33.8237419128418, |
|
"loss": 0.2502, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.2588607966899872, |
|
"rewards/margins": 2.6979219913482666, |
|
"rewards/rejected": -2.439061164855957, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.0984615384615384, |
|
"grad_norm": 14.78335151339772, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.1217488050460815, |
|
"logits/rejected": -1.126597285270691, |
|
"logps/chosen": -24.313417434692383, |
|
"logps/rejected": -32.5634880065918, |
|
"loss": 0.1857, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.4129423499107361, |
|
"rewards/margins": 3.0672991275787354, |
|
"rewards/rejected": -2.6543567180633545, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.1076923076923078, |
|
"grad_norm": 9.560418611995035, |
|
"learning_rate": 2.459517327993746e-07, |
|
"logits/chosen": -1.1439785957336426, |
|
"logits/rejected": -1.1501950025558472, |
|
"logps/chosen": -21.520601272583008, |
|
"logps/rejected": -36.128475189208984, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.16983138024806976, |
|
"rewards/margins": 3.335303544998169, |
|
"rewards/rejected": -3.1654722690582275, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1076923076923078, |
|
"eval_logits/chosen": -1.1771941184997559, |
|
"eval_logits/rejected": -1.1856648921966553, |
|
"eval_logps/chosen": -22.31366539001465, |
|
"eval_logps/rejected": -31.599573135375977, |
|
"eval_loss": 0.24783480167388916, |
|
"eval_rewards/accuracies": 0.8317972421646118, |
|
"eval_rewards/chosen": 0.3665139377117157, |
|
"eval_rewards/margins": 2.98590350151062, |
|
"eval_rewards/rejected": -2.619389295578003, |
|
"eval_runtime": 216.1562, |
|
"eval_samples_per_second": 8.022, |
|
"eval_steps_per_second": 2.008, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.116923076923077, |
|
"grad_norm": 13.013402968505392, |
|
"learning_rate": 2.4190452718666105e-07, |
|
"logits/chosen": -1.0899126529693604, |
|
"logits/rejected": -1.1027652025222778, |
|
"logps/chosen": -15.734682083129883, |
|
"logps/rejected": -27.53190803527832, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.5433827638626099, |
|
"rewards/margins": 3.0215795040130615, |
|
"rewards/rejected": -2.478196859359741, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.126153846153846, |
|
"grad_norm": 12.301318346382136, |
|
"learning_rate": 2.37859444471388e-07, |
|
"logits/chosen": -1.1361184120178223, |
|
"logits/rejected": -1.151028037071228, |
|
"logps/chosen": -24.852954864501953, |
|
"logps/rejected": -40.693912506103516, |
|
"loss": 0.1914, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.489397794008255, |
|
"rewards/margins": 3.448162794113159, |
|
"rewards/rejected": -2.9587647914886475, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.1353846153846154, |
|
"grad_norm": 13.708460236846275, |
|
"learning_rate": 2.3381754540639106e-07, |
|
"logits/chosen": -1.1237130165100098, |
|
"logits/rejected": -1.1399991512298584, |
|
"logps/chosen": -21.652952194213867, |
|
"logps/rejected": -30.665048599243164, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.5611749291419983, |
|
"rewards/margins": 3.155482292175293, |
|
"rewards/rejected": -2.5943074226379395, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.1446153846153846, |
|
"grad_norm": 11.563478452101487, |
|
"learning_rate": 2.2977988990964896e-07, |
|
"logits/chosen": -1.0979208946228027, |
|
"logits/rejected": -1.111803650856018, |
|
"logps/chosen": -21.861614227294922, |
|
"logps/rejected": -38.676361083984375, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.13799840211868286, |
|
"rewards/margins": 3.1060800552368164, |
|
"rewards/rejected": -2.968081474304199, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"grad_norm": 12.63303273344697, |
|
"learning_rate": 2.2574753678633798e-07, |
|
"logits/chosen": -1.2150633335113525, |
|
"logits/rejected": -1.2195019721984863, |
|
"logps/chosen": -19.639219284057617, |
|
"logps/rejected": -22.85377311706543, |
|
"loss": 0.2111, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.478664755821228, |
|
"rewards/margins": 2.8225910663604736, |
|
"rewards/rejected": -2.343926429748535, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1630769230769231, |
|
"grad_norm": 15.55104305702512, |
|
"learning_rate": 2.2172154345117894e-07, |
|
"logits/chosen": -1.1489689350128174, |
|
"logits/rejected": -1.1607710123062134, |
|
"logps/chosen": -22.335952758789062, |
|
"logps/rejected": -43.476783752441406, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6636537909507751, |
|
"rewards/margins": 4.2835187911987305, |
|
"rewards/rejected": -3.6198649406433105, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.1723076923076923, |
|
"grad_norm": 19.58611284576425, |
|
"learning_rate": 2.1770296565114846e-07, |
|
"logits/chosen": -1.174638271331787, |
|
"logits/rejected": -1.1910815238952637, |
|
"logps/chosen": -19.441059112548828, |
|
"logps/rejected": -23.29158592224121, |
|
"loss": 0.2382, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.15968316793441772, |
|
"rewards/margins": 2.543644428253174, |
|
"rewards/rejected": -2.3839612007141113, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.1815384615384614, |
|
"grad_norm": 14.83480005382789, |
|
"learning_rate": 2.1369285718862748e-07, |
|
"logits/chosen": -1.0653572082519531, |
|
"logits/rejected": -1.0726639032363892, |
|
"logps/chosen": -24.378429412841797, |
|
"logps/rejected": -48.50611877441406, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2468690127134323, |
|
"rewards/margins": 4.218470096588135, |
|
"rewards/rejected": -3.9716007709503174, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.1907692307692308, |
|
"grad_norm": 14.627626741140055, |
|
"learning_rate": 2.0969226964506005e-07, |
|
"logits/chosen": -1.1564842462539673, |
|
"logits/rejected": -1.1586439609527588, |
|
"logps/chosen": -25.08201789855957, |
|
"logps/rejected": -26.51468849182129, |
|
"loss": 0.2157, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.42589980363845825, |
|
"rewards/margins": 3.205916166305542, |
|
"rewards/rejected": -2.7800166606903076, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 13.600232617567109, |
|
"learning_rate": 2.0570225210519433e-07, |
|
"logits/chosen": -1.1147321462631226, |
|
"logits/rejected": -1.1307651996612549, |
|
"logps/chosen": -22.724639892578125, |
|
"logps/rejected": -38.13914489746094, |
|
"loss": 0.1956, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5592103004455566, |
|
"rewards/margins": 3.5806994438171387, |
|
"rewards/rejected": -3.021489143371582, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_logits/chosen": -1.1710869073867798, |
|
"eval_logits/rejected": -1.179579496383667, |
|
"eval_logps/chosen": -22.368024826049805, |
|
"eval_logps/rejected": -31.889461517333984, |
|
"eval_loss": 0.24438533186912537, |
|
"eval_rewards/accuracies": 0.8317972421646118, |
|
"eval_rewards/chosen": 0.33933624625205994, |
|
"eval_rewards/margins": 3.1036696434020996, |
|
"eval_rewards/rejected": -2.764333963394165, |
|
"eval_runtime": 216.3298, |
|
"eval_samples_per_second": 8.016, |
|
"eval_steps_per_second": 2.006, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.209230769230769, |
|
"grad_norm": 16.513762580218792, |
|
"learning_rate": 2.0172385088197803e-07, |
|
"logits/chosen": -1.14779531955719, |
|
"logits/rejected": -1.1652312278747559, |
|
"logps/chosen": -26.26132583618164, |
|
"logps/rejected": -40.5022087097168, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.7777777910232544, |
|
"rewards/chosen": 0.41341039538383484, |
|
"rewards/margins": 3.354189157485962, |
|
"rewards/rejected": -2.940778970718384, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.2184615384615385, |
|
"grad_norm": 11.212524578416895, |
|
"learning_rate": 1.977581092421812e-07, |
|
"logits/chosen": -1.1520088911056519, |
|
"logits/rejected": -1.1642160415649414, |
|
"logps/chosen": -20.592201232910156, |
|
"logps/rejected": -30.868377685546875, |
|
"loss": 0.1657, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.40944963693618774, |
|
"rewards/margins": 3.2444136142730713, |
|
"rewards/rejected": -2.8349640369415283, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.2276923076923076, |
|
"grad_norm": 11.01146404378747, |
|
"learning_rate": 1.9380606713281772e-07, |
|
"logits/chosen": -1.1583861112594604, |
|
"logits/rejected": -1.1652624607086182, |
|
"logps/chosen": -18.12959098815918, |
|
"logps/rejected": -34.5963134765625, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.3663688898086548, |
|
"rewards/margins": 3.504619836807251, |
|
"rewards/rejected": -3.1382510662078857, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.236923076923077, |
|
"grad_norm": 12.264405123220332, |
|
"learning_rate": 1.8986876090843664e-07, |
|
"logits/chosen": -1.13167142868042, |
|
"logits/rejected": -1.14499831199646, |
|
"logps/chosen": -20.43359375, |
|
"logps/rejected": -37.75240707397461, |
|
"loss": 0.1807, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.36272215843200684, |
|
"rewards/margins": 3.8877878189086914, |
|
"rewards/rejected": -3.5250654220581055, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.2461538461538462, |
|
"grad_norm": 11.919291580876626, |
|
"learning_rate": 1.859472230593569e-07, |
|
"logits/chosen": -1.1225872039794922, |
|
"logits/rejected": -1.1367418766021729, |
|
"logps/chosen": -26.361604690551758, |
|
"logps/rejected": -43.534812927246094, |
|
"loss": 0.2145, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.43198204040527344, |
|
"rewards/margins": 3.9310781955718994, |
|
"rewards/rejected": -3.499096155166626, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2553846153846153, |
|
"grad_norm": 12.440022575260326, |
|
"learning_rate": 1.8204248194091425e-07, |
|
"logits/chosen": -1.1526453495025635, |
|
"logits/rejected": -1.1696141958236694, |
|
"logps/chosen": -23.60825538635254, |
|
"logps/rejected": -57.63713836669922, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": 0.27740761637687683, |
|
"rewards/margins": 4.90004301071167, |
|
"rewards/rejected": -4.622635841369629, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.2646153846153847, |
|
"grad_norm": 8.286919730890018, |
|
"learning_rate": 1.7815556150379296e-07, |
|
"logits/chosen": -1.1683982610702515, |
|
"logits/rejected": -1.169435977935791, |
|
"logps/chosen": -22.41632652282715, |
|
"logps/rejected": -32.76851272583008, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.5325616002082825, |
|
"rewards/margins": 3.4823427200317383, |
|
"rewards/rejected": -2.9497809410095215, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.2738461538461539, |
|
"grad_norm": 11.685150583165354, |
|
"learning_rate": 1.7428748102551234e-07, |
|
"logits/chosen": -1.106712818145752, |
|
"logits/rejected": -1.1161227226257324, |
|
"logps/chosen": -20.291996002197266, |
|
"logps/rejected": -28.43364715576172, |
|
"loss": 0.1994, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.5047957897186279, |
|
"rewards/margins": 3.1466941833496094, |
|
"rewards/rejected": -2.6418981552124023, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.283076923076923, |
|
"grad_norm": 13.842054601252082, |
|
"learning_rate": 1.704392548431391e-07, |
|
"logits/chosen": -1.1573395729064941, |
|
"logits/rejected": -1.1763123273849487, |
|
"logps/chosen": -13.727288246154785, |
|
"logps/rejected": -40.552120208740234, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.0833960473537445, |
|
"rewards/margins": 3.4928784370422363, |
|
"rewards/rejected": -3.40948224067688, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.2923076923076924, |
|
"grad_norm": 19.81840697060037, |
|
"learning_rate": 1.6661189208729489e-07, |
|
"logits/chosen": -1.1369847059249878, |
|
"logits/rejected": -1.1503101587295532, |
|
"logps/chosen": -29.371524810791016, |
|
"logps/rejected": -31.74928092956543, |
|
"loss": 0.174, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.46892601251602173, |
|
"rewards/margins": 3.2968459129333496, |
|
"rewards/rejected": -2.8279199600219727, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2923076923076924, |
|
"eval_logits/chosen": -1.165863275527954, |
|
"eval_logits/rejected": -1.1743441820144653, |
|
"eval_logps/chosen": -22.31157875061035, |
|
"eval_logps/rejected": -31.91876792907715, |
|
"eval_loss": 0.23967565596103668, |
|
"eval_rewards/accuracies": 0.8341013789176941, |
|
"eval_rewards/chosen": 0.3675578236579895, |
|
"eval_rewards/margins": 3.146545171737671, |
|
"eval_rewards/rejected": -2.778987407684326, |
|
"eval_runtime": 216.3352, |
|
"eval_samples_per_second": 8.015, |
|
"eval_steps_per_second": 2.006, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3015384615384615, |
|
"grad_norm": 8.930251698810418, |
|
"learning_rate": 1.6280639641752942e-07, |
|
"logits/chosen": -1.1316086053848267, |
|
"logits/rejected": -1.1440240144729614, |
|
"logps/chosen": -20.34646987915039, |
|
"logps/rejected": -49.82673645019531, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.23807168006896973, |
|
"rewards/margins": 4.113887310028076, |
|
"rewards/rejected": -3.8758151531219482, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.3107692307692307, |
|
"grad_norm": 12.563220339411409, |
|
"learning_rate": 1.5902376575912814e-07, |
|
"logits/chosen": -1.11788809299469, |
|
"logits/rejected": -1.1216245889663696, |
|
"logps/chosen": -26.72078514099121, |
|
"logps/rejected": -35.561317443847656, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.3794720470905304, |
|
"rewards/margins": 3.400892734527588, |
|
"rewards/rejected": -3.021420478820801, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 12.663334489473607, |
|
"learning_rate": 1.552649920414233e-07, |
|
"logits/chosen": -1.1346993446350098, |
|
"logits/rejected": -1.135698676109314, |
|
"logps/chosen": -30.942975997924805, |
|
"logps/rejected": -28.223663330078125, |
|
"loss": 0.209, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.1739700883626938, |
|
"rewards/margins": 2.763653039932251, |
|
"rewards/rejected": -2.5896828174591064, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.3292307692307692, |
|
"grad_norm": 14.8989835155845, |
|
"learning_rate": 1.5153106093767825e-07, |
|
"logits/chosen": -1.0928491353988647, |
|
"logits/rejected": -1.115010142326355, |
|
"logps/chosen": -18.197795867919922, |
|
"logps/rejected": -37.05016326904297, |
|
"loss": 0.2571, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4650332033634186, |
|
"rewards/margins": 2.95278000831604, |
|
"rewards/rejected": -2.4877467155456543, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.3384615384615386, |
|
"grad_norm": 7.959815386261902, |
|
"learning_rate": 1.47822951606611e-07, |
|
"logits/chosen": -1.1016626358032227, |
|
"logits/rejected": -1.1072629690170288, |
|
"logps/chosen": -27.025487899780273, |
|
"logps/rejected": -32.04999923706055, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.37108778953552246, |
|
"rewards/margins": 3.5628809928894043, |
|
"rewards/rejected": -3.191793441772461, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.3476923076923077, |
|
"grad_norm": 9.883542506968235, |
|
"learning_rate": 1.4414163643562753e-07, |
|
"logits/chosen": -1.1510549783706665, |
|
"logits/rejected": -1.161637783050537, |
|
"logps/chosen": -26.81183433532715, |
|
"logps/rejected": -45.584022521972656, |
|
"loss": 0.1694, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.7165854573249817, |
|
"rewards/margins": 4.145462989807129, |
|
"rewards/rejected": -3.428877353668213, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.356923076923077, |
|
"grad_norm": 16.819884237605038, |
|
"learning_rate": 1.4048808078582942e-07, |
|
"logits/chosen": -1.156364917755127, |
|
"logits/rejected": -1.158648133277893, |
|
"logps/chosen": -25.07522964477539, |
|
"logps/rejected": -37.01847839355469, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": -0.062492769211530685, |
|
"rewards/margins": 3.447725534439087, |
|
"rewards/rejected": -3.5102179050445557, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.3661538461538463, |
|
"grad_norm": 9.730872259730013, |
|
"learning_rate": 1.3686324273886528e-07, |
|
"logits/chosen": -1.0902260541915894, |
|
"logits/rejected": -1.1149543523788452, |
|
"logps/chosen": -21.78764533996582, |
|
"logps/rejected": -47.82768249511719, |
|
"loss": 0.1618, |
|
"rewards/accuracies": 0.9444444179534912, |
|
"rewards/chosen": 0.330030232667923, |
|
"rewards/margins": 4.0784478187561035, |
|
"rewards/rejected": -3.748418092727661, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.3753846153846154, |
|
"grad_norm": 11.017633003526004, |
|
"learning_rate": 1.3326807284568984e-07, |
|
"logits/chosen": -1.1744215488433838, |
|
"logits/rejected": -1.1781681776046753, |
|
"logps/chosen": -20.410446166992188, |
|
"logps/rejected": -33.22405242919922, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.266373872756958, |
|
"rewards/margins": 3.171236515045166, |
|
"rewards/rejected": -2.904862642288208, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.3846153846153846, |
|
"grad_norm": 12.616723945362331, |
|
"learning_rate": 1.2970351387729872e-07, |
|
"logits/chosen": -1.1809624433517456, |
|
"logits/rejected": -1.1951857805252075, |
|
"logps/chosen": -18.240955352783203, |
|
"logps/rejected": -40.42936706542969, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.6317293643951416, |
|
"rewards/margins": 3.926286458969116, |
|
"rewards/rejected": -3.2945568561553955, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3846153846153846, |
|
"eval_logits/chosen": -1.1625326871871948, |
|
"eval_logits/rejected": -1.1709260940551758, |
|
"eval_logps/chosen": -22.30373764038086, |
|
"eval_logps/rejected": -32.03895568847656, |
|
"eval_loss": 0.23691046237945557, |
|
"eval_rewards/accuracies": 0.8387096524238586, |
|
"eval_rewards/chosen": 0.3714797794818878, |
|
"eval_rewards/margins": 3.2105631828308105, |
|
"eval_rewards/rejected": -2.839083194732666, |
|
"eval_runtime": 216.5842, |
|
"eval_samples_per_second": 8.006, |
|
"eval_steps_per_second": 2.004, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.393846153846154, |
|
"grad_norm": 11.126146094324666, |
|
"learning_rate": 1.261705005775032e-07, |
|
"logits/chosen": -1.1696714162826538, |
|
"logits/rejected": -1.1861652135849, |
|
"logps/chosen": -22.42890167236328, |
|
"logps/rejected": -34.44594192504883, |
|
"loss": 0.1635, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": 0.2834773361682892, |
|
"rewards/margins": 3.5443296432495117, |
|
"rewards/rejected": -3.260852813720703, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.403076923076923, |
|
"grad_norm": 10.479052450533084, |
|
"learning_rate": 1.2266995941780933e-07, |
|
"logits/chosen": -1.130216121673584, |
|
"logits/rejected": -1.1414945125579834, |
|
"logps/chosen": -25.476299285888672, |
|
"logps/rejected": -40.09599304199219, |
|
"loss": 0.1598, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.3959537744522095, |
|
"rewards/margins": 3.8914499282836914, |
|
"rewards/rejected": -3.4954960346221924, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.4123076923076923, |
|
"grad_norm": 15.900407241334104, |
|
"learning_rate": 1.1920280835446748e-07, |
|
"logits/chosen": -1.1561819314956665, |
|
"logits/rejected": -1.160946011543274, |
|
"logps/chosen": -26.870162963867188, |
|
"logps/rejected": -45.102787017822266, |
|
"loss": 0.1771, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5023772120475769, |
|
"rewards/margins": 4.30380392074585, |
|
"rewards/rejected": -3.801426887512207, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.4215384615384616, |
|
"grad_norm": 10.845292151115956, |
|
"learning_rate": 1.1576995658775404e-07, |
|
"logits/chosen": -1.1523799896240234, |
|
"logits/rejected": -1.1634249687194824, |
|
"logps/chosen": -20.11031723022461, |
|
"logps/rejected": -28.449501037597656, |
|
"loss": 0.155, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.4131190776824951, |
|
"rewards/margins": 3.466240882873535, |
|
"rewards/rejected": -3.053121328353882, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.4307692307692308, |
|
"grad_norm": 13.811097447536184, |
|
"learning_rate": 1.123723043235491e-07, |
|
"logits/chosen": -1.1037707328796387, |
|
"logits/rejected": -1.1196866035461426, |
|
"logps/chosen": -22.25092315673828, |
|
"logps/rejected": -41.13553237915039, |
|
"loss": 0.2394, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.44290411472320557, |
|
"rewards/margins": 3.9364805221557617, |
|
"rewards/rejected": -3.4935765266418457, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 7.336736527232887, |
|
"learning_rate": 1.0901074253727336e-07, |
|
"logits/chosen": -1.132401943206787, |
|
"logits/rejected": -1.1375315189361572, |
|
"logps/chosen": -21.84718132019043, |
|
"logps/rejected": -32.056617736816406, |
|
"loss": 0.1639, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.543586015701294, |
|
"rewards/margins": 3.536188840866089, |
|
"rewards/rejected": -2.9926023483276367, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.4492307692307693, |
|
"grad_norm": 9.238298739154985, |
|
"learning_rate": 1.056861527402452e-07, |
|
"logits/chosen": -1.1301486492156982, |
|
"logits/rejected": -1.130847454071045, |
|
"logps/chosen": -30.35249137878418, |
|
"logps/rejected": -39.42829513549805, |
|
"loss": 0.1854, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/chosen": 0.6695830821990967, |
|
"rewards/margins": 3.61427903175354, |
|
"rewards/rejected": -2.9446957111358643, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.4584615384615385, |
|
"grad_norm": 13.901867549459764, |
|
"learning_rate": 1.0239940674851941e-07, |
|
"logits/chosen": -1.1156858205795288, |
|
"logits/rejected": -1.114392638206482, |
|
"logps/chosen": -24.01244354248047, |
|
"logps/rejected": -34.20494842529297, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.37583643198013306, |
|
"rewards/margins": 3.5291662216186523, |
|
"rewards/rejected": -3.153329610824585, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.4676923076923076, |
|
"grad_norm": 11.080424296345777, |
|
"learning_rate": 9.915136645426883e-08, |
|
"logits/chosen": -1.1818937063217163, |
|
"logits/rejected": -1.1808428764343262, |
|
"logps/chosen": -24.881999969482422, |
|
"logps/rejected": -28.97332763671875, |
|
"loss": 0.173, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": 0.36953669786453247, |
|
"rewards/margins": 3.261909246444702, |
|
"rewards/rejected": -2.8923726081848145, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.476923076923077, |
|
"grad_norm": 15.189646270302608, |
|
"learning_rate": 9.594288359976815e-08, |
|
"logits/chosen": -1.1282167434692383, |
|
"logits/rejected": -1.1426851749420166, |
|
"logps/chosen": -17.99266815185547, |
|
"logps/rejected": -47.12626266479492, |
|
"loss": 0.2092, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.30799973011016846, |
|
"rewards/margins": 4.037694454193115, |
|
"rewards/rejected": -3.729694366455078, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.476923076923077, |
|
"eval_logits/chosen": -1.1610218286514282, |
|
"eval_logits/rejected": -1.1692686080932617, |
|
"eval_logps/chosen": -22.297130584716797, |
|
"eval_logps/rejected": -32.10142135620117, |
|
"eval_loss": 0.23491987586021423, |
|
"eval_rewards/accuracies": 0.8329492807388306, |
|
"eval_rewards/chosen": 0.3747842013835907, |
|
"eval_rewards/margins": 3.245098829269409, |
|
"eval_rewards/rejected": -2.870314836502075, |
|
"eval_runtime": 216.0919, |
|
"eval_samples_per_second": 8.024, |
|
"eval_steps_per_second": 2.008, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.4861538461538462, |
|
"grad_norm": 11.193355120949441, |
|
"learning_rate": 9.277479955403886e-08, |
|
"logits/chosen": -1.147449016571045, |
|
"logits/rejected": -1.1808828115463257, |
|
"logps/chosen": -19.78190040588379, |
|
"logps/rejected": -68.74774932861328, |
|
"loss": 0.1519, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.25842922925949097, |
|
"rewards/margins": 5.480890274047852, |
|
"rewards/rejected": -5.222461223602295, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.4953846153846153, |
|
"grad_norm": 11.257040825977688, |
|
"learning_rate": 8.964794509221507e-08, |
|
"logits/chosen": -1.1383910179138184, |
|
"logits/rejected": -1.148794412612915, |
|
"logps/chosen": -25.653322219848633, |
|
"logps/rejected": -34.04636001586914, |
|
"loss": 0.1653, |
|
"rewards/accuracies": 0.9305555820465088, |
|
"rewards/chosen": 0.33952367305755615, |
|
"rewards/margins": 3.5638911724090576, |
|
"rewards/rejected": -3.224367380142212, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.5046153846153847, |
|
"grad_norm": 14.248331413419937, |
|
"learning_rate": 8.656314017768693e-08, |
|
"logits/chosen": -1.1353636980056763, |
|
"logits/rejected": -1.1488914489746094, |
|
"logps/chosen": -23.45088768005371, |
|
"logps/rejected": -36.34320831298828, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.5625240802764893, |
|
"rewards/margins": 3.636873483657837, |
|
"rewards/rejected": -3.0743494033813477, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.5138461538461538, |
|
"grad_norm": 11.13430757826836, |
|
"learning_rate": 8.352119374707977e-08, |
|
"logits/chosen": -1.1736154556274414, |
|
"logits/rejected": -1.1819250583648682, |
|
"logps/chosen": -21.08655548095703, |
|
"logps/rejected": -31.81151580810547, |
|
"loss": 0.1618, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3814205825328827, |
|
"rewards/margins": 3.455685615539551, |
|
"rewards/rejected": -3.0742650032043457, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.523076923076923, |
|
"grad_norm": 9.775792350949882, |
|
"learning_rate": 8.052290349812419e-08, |
|
"logits/chosen": -1.1424063444137573, |
|
"logits/rejected": -1.1474817991256714, |
|
"logps/chosen": -21.133007049560547, |
|
"logps/rejected": -25.102752685546875, |
|
"loss": 0.2071, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4940270781517029, |
|
"rewards/margins": 2.9714784622192383, |
|
"rewards/rejected": -2.4774513244628906, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.5323076923076924, |
|
"grad_norm": 6.768309866947245, |
|
"learning_rate": 7.756905568047392e-08, |
|
"logits/chosen": -1.1152650117874146, |
|
"logits/rejected": -1.12236750125885, |
|
"logps/chosen": -17.50248146057129, |
|
"logps/rejected": -29.518686294555664, |
|
"loss": 0.159, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.6183215379714966, |
|
"rewards/margins": 3.7438418865203857, |
|
"rewards/rejected": -3.1255204677581787, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.5415384615384615, |
|
"grad_norm": 12.853827774295516, |
|
"learning_rate": 7.46604248895252e-08, |
|
"logits/chosen": -1.1082737445831299, |
|
"logits/rejected": -1.1175150871276855, |
|
"logps/chosen": -20.219505310058594, |
|
"logps/rejected": -28.43560218811035, |
|
"loss": 0.1827, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.34581294655799866, |
|
"rewards/margins": 3.1769955158233643, |
|
"rewards/rejected": -2.8311829566955566, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.5507692307692307, |
|
"grad_norm": 7.493668682648857, |
|
"learning_rate": 7.179777386329275e-08, |
|
"logits/chosen": -1.1045269966125488, |
|
"logits/rejected": -1.1183186769485474, |
|
"logps/chosen": -21.421226501464844, |
|
"logps/rejected": -39.41886901855469, |
|
"loss": 0.1748, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.5396389365196228, |
|
"rewards/margins": 3.9202401638031006, |
|
"rewards/rejected": -3.380601167678833, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 12.452229910069226, |
|
"learning_rate": 6.898185328239467e-08, |
|
"logits/chosen": -1.145583987236023, |
|
"logits/rejected": -1.1488795280456543, |
|
"logps/chosen": -22.65854263305664, |
|
"logps/rejected": -31.751142501831055, |
|
"loss": 0.1845, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.2917179465293884, |
|
"rewards/margins": 3.111690044403076, |
|
"rewards/rejected": -2.819972038269043, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.5692307692307692, |
|
"grad_norm": 10.84177308211244, |
|
"learning_rate": 6.621340157319996e-08, |
|
"logits/chosen": -1.1560921669006348, |
|
"logits/rejected": -1.1605477333068848, |
|
"logps/chosen": -16.325712203979492, |
|
"logps/rejected": -24.499792098999023, |
|
"loss": 0.2045, |
|
"rewards/accuracies": 0.9027777910232544, |
|
"rewards/chosen": 0.44531428813934326, |
|
"rewards/margins": 3.1462950706481934, |
|
"rewards/rejected": -2.7009804248809814, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.5692307692307692, |
|
"eval_logits/chosen": -1.1584707498550415, |
|
"eval_logits/rejected": -1.1668710708618164, |
|
"eval_logps/chosen": -22.341110229492188, |
|
"eval_logps/rejected": -32.223533630371094, |
|
"eval_loss": 0.23495733737945557, |
|
"eval_rewards/accuracies": 0.8341013789176941, |
|
"eval_rewards/chosen": 0.35279345512390137, |
|
"eval_rewards/margins": 3.2841641902923584, |
|
"eval_rewards/rejected": -2.931370496749878, |
|
"eval_runtime": 216.2511, |
|
"eval_samples_per_second": 8.018, |
|
"eval_steps_per_second": 2.007, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.5784615384615384, |
|
"grad_norm": 8.225696594197464, |
|
"learning_rate": 6.349314471418849e-08, |
|
"logits/chosen": -1.0857443809509277, |
|
"logits/rejected": -1.0922576189041138, |
|
"logps/chosen": -16.084243774414062, |
|
"logps/rejected": -30.81378173828125, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.5106647610664368, |
|
"rewards/margins": 3.7973814010620117, |
|
"rewards/rejected": -3.2867166996002197, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.5876923076923077, |
|
"grad_norm": 15.760247716168218, |
|
"learning_rate": 6.082179604557616e-08, |
|
"logits/chosen": -1.1193811893463135, |
|
"logits/rejected": -1.121721863746643, |
|
"logps/chosen": -22.19783592224121, |
|
"logps/rejected": -28.761178970336914, |
|
"loss": 0.197, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.40734562277793884, |
|
"rewards/margins": 3.452158212661743, |
|
"rewards/rejected": -3.0448129177093506, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.596923076923077, |
|
"grad_norm": 10.909974494088763, |
|
"learning_rate": 5.8200056082253453e-08, |
|
"logits/chosen": -1.125333547592163, |
|
"logits/rejected": -1.142914056777954, |
|
"logps/chosen": -19.27569007873535, |
|
"logps/rejected": -45.170040130615234, |
|
"loss": 0.1653, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.4003957509994507, |
|
"rewards/margins": 4.2396368980407715, |
|
"rewards/rejected": -3.839240550994873, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.606153846153846, |
|
"grad_norm": 10.855639719670084, |
|
"learning_rate": 5.5628612330087724e-08, |
|
"logits/chosen": -1.131655216217041, |
|
"logits/rejected": -1.1401116847991943, |
|
"logps/chosen": -17.995466232299805, |
|
"logps/rejected": -32.176475524902344, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.8472222089767456, |
|
"rewards/chosen": 0.4925755262374878, |
|
"rewards/margins": 3.6894967555999756, |
|
"rewards/rejected": -3.196920871734619, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.6153846153846154, |
|
"grad_norm": 16.085282454030374, |
|
"learning_rate": 5.310813910563644e-08, |
|
"logits/chosen": -1.0810273885726929, |
|
"logits/rejected": -1.0798935890197754, |
|
"logps/chosen": -22.392784118652344, |
|
"logps/rejected": -28.961748123168945, |
|
"loss": 0.2082, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": 0.39071983098983765, |
|
"rewards/margins": 2.970240592956543, |
|
"rewards/rejected": -2.5795204639434814, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.6246153846153846, |
|
"grad_norm": 16.9671493136513, |
|
"learning_rate": 5.0639297359319846e-08, |
|
"logits/chosen": -1.1683417558670044, |
|
"logits/rejected": -1.1672459840774536, |
|
"logps/chosen": -24.353551864624023, |
|
"logps/rejected": -27.454164505004883, |
|
"loss": 0.2106, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": 0.26455923914909363, |
|
"rewards/margins": 2.982168674468994, |
|
"rewards/rejected": -2.717609167098999, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.6338461538461537, |
|
"grad_norm": 10.455898381248911, |
|
"learning_rate": 4.8222734502097655e-08, |
|
"logits/chosen": -1.1433789730072021, |
|
"logits/rejected": -1.153548240661621, |
|
"logps/chosen": -24.5914363861084, |
|
"logps/rejected": -42.36714172363281, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.8194444179534912, |
|
"rewards/chosen": 0.36157724261283875, |
|
"rewards/margins": 3.6608800888061523, |
|
"rewards/rejected": -3.2993030548095703, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.643076923076923, |
|
"grad_norm": 19.280259828969186, |
|
"learning_rate": 4.5859084235697235e-08, |
|
"logits/chosen": -1.164656639099121, |
|
"logits/rejected": -1.1599383354187012, |
|
"logps/chosen": -19.223194122314453, |
|
"logps/rejected": -24.446197509765625, |
|
"loss": 0.2371, |
|
"rewards/accuracies": 0.8611111044883728, |
|
"rewards/chosen": 0.3862743377685547, |
|
"rewards/margins": 2.9600579738616943, |
|
"rewards/rejected": -2.5737838745117188, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.6523076923076923, |
|
"grad_norm": 8.14493222848995, |
|
"learning_rate": 4.35489663864359e-08, |
|
"logits/chosen": -1.0972024202346802, |
|
"logits/rejected": -1.1305886507034302, |
|
"logps/chosen": -17.79538345336914, |
|
"logps/rejected": -59.57120895385742, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.8888888955116272, |
|
"rewards/chosen": 0.5223473310470581, |
|
"rewards/margins": 4.91096830368042, |
|
"rewards/rejected": -4.388620853424072, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.6615384615384614, |
|
"grad_norm": 11.376614389062514, |
|
"learning_rate": 4.1292986742682254e-08, |
|
"logits/chosen": -1.140592098236084, |
|
"logits/rejected": -1.1457772254943848, |
|
"logps/chosen": -19.596229553222656, |
|
"logps/rejected": -32.57119369506836, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": 0.34850603342056274, |
|
"rewards/margins": 3.6875181198120117, |
|
"rewards/rejected": -3.339012622833252, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.6615384615384614, |
|
"eval_logits/chosen": -1.1585197448730469, |
|
"eval_logits/rejected": -1.1669610738754272, |
|
"eval_logps/chosen": -22.363513946533203, |
|
"eval_logps/rejected": -32.30293273925781, |
|
"eval_loss": 0.23404575884342194, |
|
"eval_rewards/accuracies": 0.8352534770965576, |
|
"eval_rewards/chosen": 0.3415912091732025, |
|
"eval_rewards/margins": 3.3126602172851562, |
|
"eval_rewards/rejected": -2.9710693359375, |
|
"eval_runtime": 216.0202, |
|
"eval_samples_per_second": 8.027, |
|
"eval_steps_per_second": 2.009, |
|
"step": 360 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 432, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 20, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|