phi3m0128-wds-0.85-kendall-onof-ofif-corr-max-2-simpo-max1500-default
/
checkpoint-1250
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.039962535123322, | |
"eval_steps": 50, | |
"global_step": 1250, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.00832552815069206, | |
"grad_norm": 0.04514288529753685, | |
"learning_rate": 4.999451708687114e-06, | |
"logits/chosen": 14.412135124206543, | |
"logits/rejected": 14.867518424987793, | |
"logps/chosen": -0.29279541969299316, | |
"logps/rejected": -0.33705300092697144, | |
"loss": 0.9248, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -0.43919315934181213, | |
"rewards/margins": 0.066386379301548, | |
"rewards/rejected": -0.5055795311927795, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.01665105630138412, | |
"grad_norm": 0.05052826926112175, | |
"learning_rate": 4.997807075247147e-06, | |
"logits/chosen": 14.956459045410156, | |
"logits/rejected": 15.363263130187988, | |
"logps/chosen": -0.3096744120121002, | |
"logps/rejected": -0.36214715242385864, | |
"loss": 0.9355, | |
"rewards/accuracies": 0.5, | |
"rewards/chosen": -0.46451157331466675, | |
"rewards/margins": 0.07870914041996002, | |
"rewards/rejected": -0.5432207584381104, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.024976584452076178, | |
"grad_norm": 0.04879612475633621, | |
"learning_rate": 4.9950668210706795e-06, | |
"logits/chosen": 14.485757827758789, | |
"logits/rejected": 15.057507514953613, | |
"logps/chosen": -0.27136802673339844, | |
"logps/rejected": -0.31497400999069214, | |
"loss": 0.9268, | |
"rewards/accuracies": 0.4625000059604645, | |
"rewards/chosen": -0.4070519804954529, | |
"rewards/margins": 0.06540900468826294, | |
"rewards/rejected": -0.4724610447883606, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.03330211260276824, | |
"grad_norm": 0.05672155320644379, | |
"learning_rate": 4.9912321481237616e-06, | |
"logits/chosen": 14.529332160949707, | |
"logits/rejected": 14.814855575561523, | |
"logps/chosen": -0.29139184951782227, | |
"logps/rejected": -0.31259119510650635, | |
"loss": 0.9267, | |
"rewards/accuracies": 0.4625000059604645, | |
"rewards/chosen": -0.4370877742767334, | |
"rewards/margins": 0.03179898113012314, | |
"rewards/rejected": -0.46888676285743713, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.041627640753460295, | |
"grad_norm": 0.065071240067482, | |
"learning_rate": 4.986304738420684e-06, | |
"logits/chosen": 14.174386978149414, | |
"logits/rejected": 15.223234176635742, | |
"logps/chosen": -0.2745029330253601, | |
"logps/rejected": -0.37693315744400024, | |
"loss": 0.9243, | |
"rewards/accuracies": 0.5874999761581421, | |
"rewards/chosen": -0.41175442934036255, | |
"rewards/margins": 0.1536453813314438, | |
"rewards/rejected": -0.5653998255729675, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.041627640753460295, | |
"eval_logits/chosen": 14.56569766998291, | |
"eval_logits/rejected": 15.157320976257324, | |
"eval_logps/chosen": -0.27527979016304016, | |
"eval_logps/rejected": -0.3633999824523926, | |
"eval_loss": 0.9083622694015503, | |
"eval_rewards/accuracies": 0.5612244606018066, | |
"eval_rewards/chosen": -0.41291970014572144, | |
"eval_rewards/margins": 0.13218028843402863, | |
"eval_rewards/rejected": -0.5450999736785889, | |
"eval_runtime": 29.029, | |
"eval_samples_per_second": 26.766, | |
"eval_steps_per_second": 3.376, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.049953168904152356, | |
"grad_norm": 0.14002270996570587, | |
"learning_rate": 4.980286753286196e-06, | |
"logits/chosen": 14.408930778503418, | |
"logits/rejected": 14.791458129882812, | |
"logps/chosen": -0.285602867603302, | |
"logps/rejected": -0.3351826071739197, | |
"loss": 0.9177, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -0.4284043312072754, | |
"rewards/margins": 0.07436960190534592, | |
"rewards/rejected": -0.5027738809585571, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.05827869705484442, | |
"grad_norm": 0.05595069006085396, | |
"learning_rate": 4.973180832407471e-06, | |
"logits/chosen": 14.41168212890625, | |
"logits/rejected": 14.865121841430664, | |
"logps/chosen": -0.25851207971572876, | |
"logps/rejected": -0.32240185141563416, | |
"loss": 0.9168, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -0.3877681493759155, | |
"rewards/margins": 0.0958346277475357, | |
"rewards/rejected": -0.4836028218269348, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.06660422520553648, | |
"grad_norm": 0.058645494282245636, | |
"learning_rate": 4.964990092676263e-06, | |
"logits/chosen": 14.897825241088867, | |
"logits/rejected": 15.01073932647705, | |
"logps/chosen": -0.2668797969818115, | |
"logps/rejected": -0.3204379975795746, | |
"loss": 0.9242, | |
"rewards/accuracies": 0.5, | |
"rewards/chosen": -0.4003197252750397, | |
"rewards/margins": 0.08033724129199982, | |
"rewards/rejected": -0.4806569516658783, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.07492975335622853, | |
"grad_norm": 0.0597861111164093, | |
"learning_rate": 4.9557181268217225e-06, | |
"logits/chosen": 14.531021118164062, | |
"logits/rejected": 14.767858505249023, | |
"logps/chosen": -0.26787540316581726, | |
"logps/rejected": -0.32972821593284607, | |
"loss": 0.9077, | |
"rewards/accuracies": 0.48750001192092896, | |
"rewards/chosen": -0.4018131196498871, | |
"rewards/margins": 0.09277921915054321, | |
"rewards/rejected": -0.4945923686027527, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.08325528150692059, | |
"grad_norm": 0.0863095372915268, | |
"learning_rate": 4.9453690018345144e-06, | |
"logits/chosen": 14.179275512695312, | |
"logits/rejected": 14.909070014953613, | |
"logps/chosen": -0.2532978057861328, | |
"logps/rejected": -0.35474082827568054, | |
"loss": 0.903, | |
"rewards/accuracies": 0.5874999761581421, | |
"rewards/chosen": -0.3799467086791992, | |
"rewards/margins": 0.1521645337343216, | |
"rewards/rejected": -0.5321112275123596, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.08325528150692059, | |
"eval_logits/chosen": 14.326024055480957, | |
"eval_logits/rejected": 14.979863166809082, | |
"eval_logps/chosen": -0.2673422694206238, | |
"eval_logps/rejected": -0.3668619990348816, | |
"eval_loss": 0.8989922404289246, | |
"eval_rewards/accuracies": 0.6020408272743225, | |
"eval_rewards/chosen": -0.4010133445262909, | |
"eval_rewards/margins": 0.1492796391248703, | |
"eval_rewards/rejected": -0.5502930283546448, | |
"eval_runtime": 29.0209, | |
"eval_samples_per_second": 26.774, | |
"eval_steps_per_second": 3.377, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.09158080965761266, | |
"grad_norm": 0.07181967049837112, | |
"learning_rate": 4.933947257182901e-06, | |
"logits/chosen": 14.118756294250488, | |
"logits/rejected": 14.755918502807617, | |
"logps/chosen": -0.27995947003364563, | |
"logps/rejected": -0.3749552369117737, | |
"loss": 0.9097, | |
"rewards/accuracies": 0.5874999761581421, | |
"rewards/chosen": -0.41993919014930725, | |
"rewards/margins": 0.14249366521835327, | |
"rewards/rejected": -0.5624328255653381, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.09990633780830471, | |
"grad_norm": 0.08269819617271423, | |
"learning_rate": 4.921457902821578e-06, | |
"logits/chosen": 13.764413833618164, | |
"logits/rejected": 14.43315315246582, | |
"logps/chosen": -0.28177163004875183, | |
"logps/rejected": -0.3637630343437195, | |
"loss": 0.9075, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.42265743017196655, | |
"rewards/margins": 0.12298711389303207, | |
"rewards/rejected": -0.5456445813179016, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.10823186595899677, | |
"grad_norm": 1.9071497917175293, | |
"learning_rate": 4.907906416994146e-06, | |
"logits/chosen": 14.103793144226074, | |
"logits/rejected": 14.727777481079102, | |
"logps/chosen": -0.2665451765060425, | |
"logps/rejected": -0.3827117085456848, | |
"loss": 0.9217, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -0.3998177647590637, | |
"rewards/margins": 0.1742497682571411, | |
"rewards/rejected": -0.5740675926208496, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.11655739410968884, | |
"grad_norm": 0.12107716500759125, | |
"learning_rate": 4.893298743830168e-06, | |
"logits/chosen": 13.517863273620605, | |
"logits/rejected": 14.42052173614502, | |
"logps/chosen": -0.26627904176712036, | |
"logps/rejected": -0.3745174705982208, | |
"loss": 0.904, | |
"rewards/accuracies": 0.5625, | |
"rewards/chosen": -0.39941853284835815, | |
"rewards/margins": 0.16235767304897308, | |
"rewards/rejected": -0.5617762207984924, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.12488292226038089, | |
"grad_norm": 0.1638205647468567, | |
"learning_rate": 4.8776412907378845e-06, | |
"logits/chosen": 12.83032512664795, | |
"logits/rejected": 13.673515319824219, | |
"logps/chosen": -0.24289576709270477, | |
"logps/rejected": -0.37163227796554565, | |
"loss": 0.8779, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -0.36434367299079895, | |
"rewards/margins": 0.19310477375984192, | |
"rewards/rejected": -0.5574483871459961, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.12488292226038089, | |
"eval_logits/chosen": 12.317696571350098, | |
"eval_logits/rejected": 13.164616584777832, | |
"eval_logps/chosen": -0.266156405210495, | |
"eval_logps/rejected": -0.4009220004081726, | |
"eval_loss": 0.8768696784973145, | |
"eval_rewards/accuracies": 0.6224489808082581, | |
"eval_rewards/chosen": -0.3992346227169037, | |
"eval_rewards/margins": 0.20214837789535522, | |
"eval_rewards/rejected": -0.6013829708099365, | |
"eval_runtime": 29.0257, | |
"eval_samples_per_second": 26.769, | |
"eval_steps_per_second": 3.376, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.13320845041107296, | |
"grad_norm": 0.1479438841342926, | |
"learning_rate": 4.860940925593703e-06, | |
"logits/chosen": 12.736433029174805, | |
"logits/rejected": 13.475964546203613, | |
"logps/chosen": -0.2913517355918884, | |
"logps/rejected": -0.36094629764556885, | |
"loss": 0.8756, | |
"rewards/accuracies": 0.4749999940395355, | |
"rewards/chosen": -0.43702763319015503, | |
"rewards/margins": 0.10439182817935944, | |
"rewards/rejected": -0.5414193868637085, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.141533978561765, | |
"grad_norm": 0.17609630525112152, | |
"learning_rate": 4.84320497372973e-06, | |
"logits/chosen": 10.606362342834473, | |
"logits/rejected": 11.537567138671875, | |
"logps/chosen": -0.2560296952724457, | |
"logps/rejected": -0.4312233328819275, | |
"loss": 0.8489, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -0.38404449820518494, | |
"rewards/margins": 0.2627905011177063, | |
"rewards/rejected": -0.6468349695205688, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.14985950671245707, | |
"grad_norm": 0.18054936826229095, | |
"learning_rate": 4.824441214720629e-06, | |
"logits/chosen": 10.13754653930664, | |
"logits/rejected": 10.914222717285156, | |
"logps/chosen": -0.29278701543807983, | |
"logps/rejected": -0.43448886275291443, | |
"loss": 0.8715, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -0.43918052315711975, | |
"rewards/margins": 0.21255281567573547, | |
"rewards/rejected": -0.6517333388328552, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.15818503486314914, | |
"grad_norm": 0.19739146530628204, | |
"learning_rate": 4.804657878971252e-06, | |
"logits/chosen": 8.077766418457031, | |
"logits/rejected": 9.669368743896484, | |
"logps/chosen": -0.2844889760017395, | |
"logps/rejected": -0.5050357580184937, | |
"loss": 0.8582, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -0.42673349380493164, | |
"rewards/margins": 0.3308201730251312, | |
"rewards/rejected": -0.7575536966323853, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.16651056301384118, | |
"grad_norm": 0.2397814244031906, | |
"learning_rate": 4.783863644106502e-06, | |
"logits/chosen": 6.790783882141113, | |
"logits/rejected": 7.849525451660156, | |
"logps/chosen": -0.2940555512905121, | |
"logps/rejected": -0.5699166059494019, | |
"loss": 0.8196, | |
"rewards/accuracies": 0.75, | |
"rewards/chosen": -0.4410833418369293, | |
"rewards/margins": 0.41379159688949585, | |
"rewards/rejected": -0.8548749089241028, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.16651056301384118, | |
"eval_logits/chosen": 6.290835857391357, | |
"eval_logits/rejected": 6.757873058319092, | |
"eval_logps/chosen": -0.317629337310791, | |
"eval_logps/rejected": -0.581989586353302, | |
"eval_loss": 0.8032433986663818, | |
"eval_rewards/accuracies": 0.6734693646430969, | |
"eval_rewards/chosen": -0.47644397616386414, | |
"eval_rewards/margins": 0.39654040336608887, | |
"eval_rewards/rejected": -0.8729843497276306, | |
"eval_runtime": 29.025, | |
"eval_samples_per_second": 26.77, | |
"eval_steps_per_second": 3.376, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.17483609116453325, | |
"grad_norm": 0.2858545184135437, | |
"learning_rate": 4.762067631165049e-06, | |
"logits/chosen": 6.875879764556885, | |
"logits/rejected": 6.691536903381348, | |
"logps/chosen": -0.37194910645484924, | |
"logps/rejected": -0.5639354586601257, | |
"loss": 0.8129, | |
"rewards/accuracies": 0.5625, | |
"rewards/chosen": -0.5579236745834351, | |
"rewards/margins": 0.2879795432090759, | |
"rewards/rejected": -0.8459032773971558, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.18316161931522532, | |
"grad_norm": 0.30206382274627686, | |
"learning_rate": 4.7392794005985324e-06, | |
"logits/chosen": 4.656112194061279, | |
"logits/rejected": 4.483086585998535, | |
"logps/chosen": -0.360150009393692, | |
"logps/rejected": -0.6204283833503723, | |
"loss": 0.7954, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -0.5402250289916992, | |
"rewards/margins": 0.39041754603385925, | |
"rewards/rejected": -0.9306427240371704, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.19148714746591736, | |
"grad_norm": 0.40204310417175293, | |
"learning_rate": 4.715508948078037e-06, | |
"logits/chosen": 3.9398162364959717, | |
"logits/rejected": 3.38537859916687, | |
"logps/chosen": -0.39010342955589294, | |
"logps/rejected": -0.7167688608169556, | |
"loss": 0.7664, | |
"rewards/accuracies": 0.6875, | |
"rewards/chosen": -0.5851551294326782, | |
"rewards/margins": 0.4899981617927551, | |
"rewards/rejected": -1.0751533508300781, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.19981267561660943, | |
"grad_norm": 0.48389795422554016, | |
"learning_rate": 4.690766700109659e-06, | |
"logits/chosen": 2.925476551055908, | |
"logits/rejected": 2.824068069458008, | |
"logps/chosen": -0.41053348779678345, | |
"logps/rejected": -0.8508625030517578, | |
"loss": 0.7606, | |
"rewards/accuracies": 0.7124999761581421, | |
"rewards/chosen": -0.6158002018928528, | |
"rewards/margins": 0.6604936718940735, | |
"rewards/rejected": -1.2762939929962158, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.2081382037673015, | |
"grad_norm": 0.6687452793121338, | |
"learning_rate": 4.665063509461098e-06, | |
"logits/chosen": 2.751737594604492, | |
"logits/rejected": 2.2424545288085938, | |
"logps/chosen": -0.4365699291229248, | |
"logps/rejected": -0.8550359606742859, | |
"loss": 0.7234, | |
"rewards/accuracies": 0.637499988079071, | |
"rewards/chosen": -0.6548548936843872, | |
"rewards/margins": 0.6276990175247192, | |
"rewards/rejected": -1.2825539112091064, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.2081382037673015, | |
"eval_logits/chosen": 2.1380228996276855, | |
"eval_logits/rejected": 1.3922746181488037, | |
"eval_logps/chosen": -0.48307570815086365, | |
"eval_logps/rejected": -1.0382359027862549, | |
"eval_loss": 0.668463945388794, | |
"eval_rewards/accuracies": 0.6938775777816772, | |
"eval_rewards/chosen": -0.7246134877204895, | |
"eval_rewards/margins": 0.8327403664588928, | |
"eval_rewards/rejected": -1.5573538541793823, | |
"eval_runtime": 29.0228, | |
"eval_samples_per_second": 26.772, | |
"eval_steps_per_second": 3.377, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.21646373191799353, | |
"grad_norm": 0.7085956335067749, | |
"learning_rate": 4.638410650401267e-06, | |
"logits/chosen": 1.7889283895492554, | |
"logits/rejected": 0.9420136213302612, | |
"logps/chosen": -0.5195389986038208, | |
"logps/rejected": -1.0534025430679321, | |
"loss": 0.6863, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -0.7793084979057312, | |
"rewards/margins": 0.8007953763008118, | |
"rewards/rejected": -1.580103874206543, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.2247892600686856, | |
"grad_norm": 0.4416671097278595, | |
"learning_rate": 4.610819813755038e-06, | |
"logits/chosen": 1.582745909690857, | |
"logits/rejected": 0.3820720911026001, | |
"logps/chosen": -0.5181297063827515, | |
"logps/rejected": -1.2198141813278198, | |
"loss": 0.5809, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -0.7771945595741272, | |
"rewards/margins": 1.0525267124176025, | |
"rewards/rejected": -1.8297210931777954, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.23311478821937767, | |
"grad_norm": 2.7746617794036865, | |
"learning_rate": 4.582303101775249e-06, | |
"logits/chosen": 1.2947760820388794, | |
"logits/rejected": 0.27237796783447266, | |
"logps/chosen": -0.643541693687439, | |
"logps/rejected": -1.7467323541641235, | |
"loss": 0.5775, | |
"rewards/accuracies": 0.6875, | |
"rewards/chosen": -0.9653124809265137, | |
"rewards/margins": 1.6547861099243164, | |
"rewards/rejected": -2.62009859085083, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.2414403163700697, | |
"grad_norm": 0.6444702744483948, | |
"learning_rate": 4.55287302283426e-06, | |
"logits/chosen": 1.2399464845657349, | |
"logits/rejected": 0.22667090594768524, | |
"logps/chosen": -0.7517040967941284, | |
"logps/rejected": -1.9010766744613647, | |
"loss": 0.5314, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -1.1275560855865479, | |
"rewards/margins": 1.724058747291565, | |
"rewards/rejected": -2.8516147136688232, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.24976584452076178, | |
"grad_norm": 0.5103917717933655, | |
"learning_rate": 4.522542485937369e-06, | |
"logits/chosen": 1.438954472541809, | |
"logits/rejected": 0.5288833379745483, | |
"logps/chosen": -0.7871009707450867, | |
"logps/rejected": -2.0329811573028564, | |
"loss": 0.5271, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -1.1806514263153076, | |
"rewards/margins": 1.8688204288482666, | |
"rewards/rejected": -3.049471616744995, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.24976584452076178, | |
"eval_logits/chosen": 1.3706706762313843, | |
"eval_logits/rejected": 0.8007871508598328, | |
"eval_logps/chosen": -0.7460500001907349, | |
"eval_logps/rejected": -2.209245443344116, | |
"eval_loss": 0.5008835792541504, | |
"eval_rewards/accuracies": 0.7244898080825806, | |
"eval_rewards/chosen": -1.1190749406814575, | |
"eval_rewards/margins": 2.194793224334717, | |
"eval_rewards/rejected": -3.313868284225464, | |
"eval_runtime": 29.0227, | |
"eval_samples_per_second": 26.772, | |
"eval_steps_per_second": 3.377, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.2580913726714538, | |
"grad_norm": 0.7984316945075989, | |
"learning_rate": 4.491324795060491e-06, | |
"logits/chosen": 0.9250973463058472, | |
"logits/rejected": 0.1887839138507843, | |
"logps/chosen": -0.8511486053466797, | |
"logps/rejected": -2.447072982788086, | |
"loss": 0.5506, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -1.2767229080200195, | |
"rewards/margins": 2.3938865661621094, | |
"rewards/rejected": -3.670609712600708, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.2664169008221459, | |
"grad_norm": 0.5243161916732788, | |
"learning_rate": 4.4592336433146e-06, | |
"logits/chosen": 2.437886953353882, | |
"logits/rejected": 1.6011940240859985, | |
"logps/chosen": -0.7107629776000977, | |
"logps/rejected": -2.132263422012329, | |
"loss": 0.5423, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -1.0661444664001465, | |
"rewards/margins": 2.1322507858276367, | |
"rewards/rejected": -3.198395013809204, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.27474242897283796, | |
"grad_norm": 0.4742359220981598, | |
"learning_rate": 4.426283106939474e-06, | |
"logits/chosen": 1.8433977365493774, | |
"logits/rejected": 1.199568748474121, | |
"logps/chosen": -0.8737133145332336, | |
"logps/rejected": -2.1652615070343018, | |
"loss": 0.5015, | |
"rewards/accuracies": 0.612500011920929, | |
"rewards/chosen": -1.3105700016021729, | |
"rewards/margins": 1.9373222589492798, | |
"rewards/rejected": -3.247892379760742, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.28306795712353, | |
"grad_norm": 0.5529736280441284, | |
"learning_rate": 4.3924876391293915e-06, | |
"logits/chosen": 2.0044589042663574, | |
"logits/rejected": 0.9263212084770203, | |
"logps/chosen": -0.9175036549568176, | |
"logps/rejected": -2.6408374309539795, | |
"loss": 0.4921, | |
"rewards/accuracies": 0.699999988079071, | |
"rewards/chosen": -1.3762553930282593, | |
"rewards/margins": 2.585000991821289, | |
"rewards/rejected": -3.961256504058838, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.2913934852742221, | |
"grad_norm": 0.7060612440109253, | |
"learning_rate": 4.357862063693486e-06, | |
"logits/chosen": 2.243232250213623, | |
"logits/rejected": 1.6251205205917358, | |
"logps/chosen": -0.9481338262557983, | |
"logps/rejected": -2.9519124031066895, | |
"loss": 0.4753, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.4222007989883423, | |
"rewards/margins": 3.0056674480438232, | |
"rewards/rejected": -4.427868366241455, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.2913934852742221, | |
"eval_logits/chosen": 1.7781500816345215, | |
"eval_logits/rejected": 1.412752628326416, | |
"eval_logps/chosen": -0.9692521095275879, | |
"eval_logps/rejected": -2.8247811794281006, | |
"eval_loss": 0.4446474015712738, | |
"eval_rewards/accuracies": 0.7346938848495483, | |
"eval_rewards/chosen": -1.4538781642913818, | |
"eval_rewards/margins": 2.7832937240600586, | |
"eval_rewards/rejected": -4.2371721267700195, | |
"eval_runtime": 29.0245, | |
"eval_samples_per_second": 26.77, | |
"eval_steps_per_second": 3.376, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.29971901342491414, | |
"grad_norm": 0.9664792418479919, | |
"learning_rate": 4.322421568553529e-06, | |
"logits/chosen": 1.7094570398330688, | |
"logits/rejected": 1.1617993116378784, | |
"logps/chosen": -0.992924690246582, | |
"logps/rejected": -2.7834811210632324, | |
"loss": 0.4972, | |
"rewards/accuracies": 0.675000011920929, | |
"rewards/chosen": -1.4893869161605835, | |
"rewards/margins": 2.6858346462249756, | |
"rewards/rejected": -4.1752214431762695, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.3080445415756062, | |
"grad_norm": 0.7800536155700684, | |
"learning_rate": 4.286181699082008e-06, | |
"logits/chosen": 2.9170143604278564, | |
"logits/rejected": 2.384690523147583, | |
"logps/chosen": -1.0323909521102905, | |
"logps/rejected": -2.726369857788086, | |
"loss": 0.4689, | |
"rewards/accuracies": 0.625, | |
"rewards/chosen": -1.548586368560791, | |
"rewards/margins": 2.540968418121338, | |
"rewards/rejected": -4.089555263519287, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.3163700697262983, | |
"grad_norm": 1.3163660764694214, | |
"learning_rate": 4.249158351283414e-06, | |
"logits/chosen": 2.780831813812256, | |
"logits/rejected": 1.753291130065918, | |
"logps/chosen": -1.0468894243240356, | |
"logps/rejected": -2.7425389289855957, | |
"loss": 0.4835, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -1.5703339576721191, | |
"rewards/margins": 2.5434746742248535, | |
"rewards/rejected": -4.113808631896973, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.3246955978769903, | |
"grad_norm": 0.6381780505180359, | |
"learning_rate": 4.211367764821722e-06, | |
"logits/chosen": 2.585071086883545, | |
"logits/rejected": 1.9254558086395264, | |
"logps/chosen": -1.2089946269989014, | |
"logps/rejected": -3.615030288696289, | |
"loss": 0.4518, | |
"rewards/accuracies": 0.737500011920929, | |
"rewards/chosen": -1.8134920597076416, | |
"rewards/margins": 3.6090526580810547, | |
"rewards/rejected": -5.422544956207275, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.33302112602768236, | |
"grad_norm": 0.9214782118797302, | |
"learning_rate": 4.172826515897146e-06, | |
"logits/chosen": 1.9765586853027344, | |
"logits/rejected": 1.1926987171173096, | |
"logps/chosen": -1.2852815389633179, | |
"logps/rejected": -3.786972761154175, | |
"loss": 0.4165, | |
"rewards/accuracies": 0.7250000238418579, | |
"rewards/chosen": -1.9279224872589111, | |
"rewards/margins": 3.7525367736816406, | |
"rewards/rejected": -5.680459022521973, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.33302112602768236, | |
"eval_logits/chosen": 2.6366844177246094, | |
"eval_logits/rejected": 2.394319534301758, | |
"eval_logps/chosen": -1.322396993637085, | |
"eval_logps/rejected": -3.686817169189453, | |
"eval_loss": 0.4065541923046112, | |
"eval_rewards/accuracies": 0.7551020383834839, | |
"eval_rewards/chosen": -1.9835957288742065, | |
"eval_rewards/margins": 3.5466296672821045, | |
"eval_rewards/rejected": -5.5302252769470215, | |
"eval_runtime": 29.025, | |
"eval_samples_per_second": 26.77, | |
"eval_steps_per_second": 3.376, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.34134665417837445, | |
"grad_norm": 1.5113208293914795, | |
"learning_rate": 4.133551509975264e-06, | |
"logits/chosen": 2.0068416595458984, | |
"logits/rejected": 1.5152744054794312, | |
"logps/chosen": -1.5090525150299072, | |
"logps/rejected": -3.9272122383117676, | |
"loss": 0.4004, | |
"rewards/accuracies": 0.7875000238418579, | |
"rewards/chosen": -2.2635788917541504, | |
"rewards/margins": 3.627239227294922, | |
"rewards/rejected": -5.890818119049072, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.3496721823290665, | |
"grad_norm": 11.516369819641113, | |
"learning_rate": 4.093559974371725e-06, | |
"logits/chosen": 3.343449115753174, | |
"logits/rejected": 2.920070171356201, | |
"logps/chosen": -1.8312532901763916, | |
"logps/rejected": -4.115124702453613, | |
"loss": 0.4045, | |
"rewards/accuracies": 0.7749999761581421, | |
"rewards/chosen": -2.746879816055298, | |
"rewards/margins": 3.425807476043701, | |
"rewards/rejected": -6.17268705368042, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.35799771047975854, | |
"grad_norm": 3.0497395992279053, | |
"learning_rate": 4.052869450695776e-06, | |
"logits/chosen": 2.5527279376983643, | |
"logits/rejected": 2.2495744228363037, | |
"logps/chosen": -2.2998366355895996, | |
"logps/rejected": -4.966278076171875, | |
"loss": 0.3758, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -3.4497551918029785, | |
"rewards/margins": 3.9996612071990967, | |
"rewards/rejected": -7.4494171142578125, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.36632323863045063, | |
"grad_norm": 3.900503158569336, | |
"learning_rate": 4.011497787155938e-06, | |
"logits/chosen": 2.4560112953186035, | |
"logits/rejected": 2.3936328887939453, | |
"logps/chosen": -2.563218593597412, | |
"logps/rejected": -5.063398838043213, | |
"loss": 0.3739, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -3.8448281288146973, | |
"rewards/margins": 3.750270366668701, | |
"rewards/rejected": -7.595097541809082, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.3746487667811427, | |
"grad_norm": 2.8846070766448975, | |
"learning_rate": 3.969463130731183e-06, | |
"logits/chosen": 2.5467796325683594, | |
"logits/rejected": 2.4370405673980713, | |
"logps/chosen": -2.4494822025299072, | |
"logps/rejected": -5.12601900100708, | |
"loss": 0.2905, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -3.6742234230041504, | |
"rewards/margins": 4.014804840087891, | |
"rewards/rejected": -7.689028263092041, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.3746487667811427, | |
"eval_logits/chosen": 2.922081232070923, | |
"eval_logits/rejected": 2.879075050354004, | |
"eval_logps/chosen": -2.352473020553589, | |
"eval_logps/rejected": -5.1224799156188965, | |
"eval_loss": 0.3302614390850067, | |
"eval_rewards/accuracies": 0.8673469424247742, | |
"eval_rewards/chosen": -3.5287091732025146, | |
"eval_rewards/margins": 4.155009746551514, | |
"eval_rewards/rejected": -7.683719635009766, | |
"eval_runtime": 29.0235, | |
"eval_samples_per_second": 26.771, | |
"eval_steps_per_second": 3.377, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.3829742949318347, | |
"grad_norm": 4.662614345550537, | |
"learning_rate": 3.92678391921108e-06, | |
"logits/chosen": 2.428154468536377, | |
"logits/rejected": 2.2403202056884766, | |
"logps/chosen": -2.5936172008514404, | |
"logps/rejected": -5.356133460998535, | |
"loss": 0.2881, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -3.89042592048645, | |
"rewards/margins": 4.143774509429932, | |
"rewards/rejected": -8.034199714660645, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.3912998230825268, | |
"grad_norm": 2.716899871826172, | |
"learning_rate": 3.88347887310836e-06, | |
"logits/chosen": 2.437295436859131, | |
"logits/rejected": 2.271914005279541, | |
"logps/chosen": -2.470245361328125, | |
"logps/rejected": -5.719494819641113, | |
"loss": 0.31, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -3.70536732673645, | |
"rewards/margins": 4.873874187469482, | |
"rewards/rejected": -8.579241752624512, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.39962535123321885, | |
"grad_norm": 3.343271255493164, | |
"learning_rate": 3.839566987447492e-06, | |
"logits/chosen": 2.144461154937744, | |
"logits/rejected": 2.0314810276031494, | |
"logps/chosen": -2.5805585384368896, | |
"logps/rejected": -5.418456077575684, | |
"loss": 0.3194, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -3.870838165283203, | |
"rewards/margins": 4.256844997406006, | |
"rewards/rejected": -8.12768268585205, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.4079508793839109, | |
"grad_norm": 6.411283493041992, | |
"learning_rate": 3.795067523432826e-06, | |
"logits/chosen": 2.408092498779297, | |
"logits/rejected": 2.2996156215667725, | |
"logps/chosen": -2.8846375942230225, | |
"logps/rejected": -5.957771301269531, | |
"loss": 0.3353, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -4.326956748962402, | |
"rewards/margins": 4.6097002029418945, | |
"rewards/rejected": -8.936657905578613, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.416276407534603, | |
"grad_norm": 3.2472238540649414, | |
"learning_rate": 3.7500000000000005e-06, | |
"logits/chosen": 3.0815653800964355, | |
"logits/rejected": 2.8496975898742676, | |
"logps/chosen": -3.061626434326172, | |
"logps/rejected": -5.966124534606934, | |
"loss": 0.3018, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -4.592440128326416, | |
"rewards/margins": 4.356747627258301, | |
"rewards/rejected": -8.949186325073242, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.416276407534603, | |
"eval_logits/chosen": 2.7115373611450195, | |
"eval_logits/rejected": 2.763493061065674, | |
"eval_logps/chosen": -2.85333251953125, | |
"eval_logps/rejected": -5.915884017944336, | |
"eval_loss": 0.3079966604709625, | |
"eval_rewards/accuracies": 0.8979591727256775, | |
"eval_rewards/chosen": -4.279998302459717, | |
"eval_rewards/margins": 4.593828201293945, | |
"eval_rewards/rejected": -8.873826026916504, | |
"eval_runtime": 29.0268, | |
"eval_samples_per_second": 26.768, | |
"eval_steps_per_second": 3.376, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.42460193568529503, | |
"grad_norm": 10.017457962036133, | |
"learning_rate": 3.7043841852542884e-06, | |
"logits/chosen": 2.775202989578247, | |
"logits/rejected": 2.6122496128082275, | |
"logps/chosen": -3.0054879188537598, | |
"logps/rejected": -6.258307456970215, | |
"loss": 0.3101, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -4.5082316398620605, | |
"rewards/margins": 4.879229545593262, | |
"rewards/rejected": -9.387460708618164, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.43292746383598707, | |
"grad_norm": 4.494226932525635, | |
"learning_rate": 3.658240087799655e-06, | |
"logits/chosen": 2.816701889038086, | |
"logits/rejected": 2.4107789993286133, | |
"logps/chosen": -3.2932097911834717, | |
"logps/rejected": -6.099677562713623, | |
"loss": 0.2925, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -4.939814567565918, | |
"rewards/margins": 4.209702014923096, | |
"rewards/rejected": -9.149517059326172, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.44125299198667917, | |
"grad_norm": 2.957486391067505, | |
"learning_rate": 3.611587947962319e-06, | |
"logits/chosen": 2.3626818656921387, | |
"logits/rejected": 2.4196550846099854, | |
"logps/chosen": -3.085209608078003, | |
"logps/rejected": -6.118277072906494, | |
"loss": 0.3169, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -4.627814292907715, | |
"rewards/margins": 4.549601078033447, | |
"rewards/rejected": -9.17741584777832, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.4495785201373712, | |
"grad_norm": 3.429408550262451, | |
"learning_rate": 3.564448228912682e-06, | |
"logits/chosen": 2.559816360473633, | |
"logits/rejected": 2.598250150680542, | |
"logps/chosen": -3.3060078620910645, | |
"logps/rejected": -6.124637126922607, | |
"loss": 0.3271, | |
"rewards/accuracies": 0.800000011920929, | |
"rewards/chosen": -4.959012031555176, | |
"rewards/margins": 4.227944850921631, | |
"rewards/rejected": -9.186956405639648, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.45790404828806325, | |
"grad_norm": 2.110722780227661, | |
"learning_rate": 3.516841607689501e-06, | |
"logits/chosen": 2.4487693309783936, | |
"logits/rejected": 2.0568625926971436, | |
"logps/chosen": -3.396770477294922, | |
"logps/rejected": -6.35222864151001, | |
"loss": 0.3172, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -5.095156192779541, | |
"rewards/margins": 4.4331865310668945, | |
"rewards/rejected": -9.528343200683594, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.45790404828806325, | |
"eval_logits/chosen": 2.5644595623016357, | |
"eval_logits/rejected": 2.6437506675720215, | |
"eval_logps/chosen": -3.1958370208740234, | |
"eval_logps/rejected": -6.542325496673584, | |
"eval_loss": 0.28538385033607483, | |
"eval_rewards/accuracies": 0.918367326259613, | |
"eval_rewards/chosen": -4.793755054473877, | |
"eval_rewards/margins": 5.0197319984436035, | |
"eval_rewards/rejected": -9.813486099243164, | |
"eval_runtime": 29.0252, | |
"eval_samples_per_second": 26.77, | |
"eval_steps_per_second": 3.376, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.46622957643875534, | |
"grad_norm": 2.0929551124572754, | |
"learning_rate": 3.4687889661302577e-06, | |
"logits/chosen": 2.497122287750244, | |
"logits/rejected": 2.1119792461395264, | |
"logps/chosen": -3.586158037185669, | |
"logps/rejected": -6.939994812011719, | |
"loss": 0.2826, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.379237174987793, | |
"rewards/margins": 5.030755043029785, | |
"rewards/rejected": -10.409992218017578, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.4745551045894474, | |
"grad_norm": 3.344160556793213, | |
"learning_rate": 3.4203113817116955e-06, | |
"logits/chosen": 3.181488275527954, | |
"logits/rejected": 2.8188672065734863, | |
"logps/chosen": -3.465902328491211, | |
"logps/rejected": -6.737443447113037, | |
"loss": 0.3027, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.198853492736816, | |
"rewards/margins": 4.90731143951416, | |
"rewards/rejected": -10.106164932250977, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.4828806327401394, | |
"grad_norm": 6.381539344787598, | |
"learning_rate": 3.3714301183045382e-06, | |
"logits/chosen": 3.8848679065704346, | |
"logits/rejected": 3.54484224319458, | |
"logps/chosen": -3.321965456008911, | |
"logps/rejected": -6.796433448791504, | |
"loss": 0.2619, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -4.982948303222656, | |
"rewards/margins": 5.211700916290283, | |
"rewards/rejected": -10.194650650024414, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.4912061608908315, | |
"grad_norm": 3.058936834335327, | |
"learning_rate": 3.3221666168464584e-06, | |
"logits/chosen": 2.9645297527313232, | |
"logits/rejected": 2.7630581855773926, | |
"logps/chosen": -3.2019195556640625, | |
"logps/rejected": -6.635239601135254, | |
"loss": 0.2573, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -4.802879810333252, | |
"rewards/margins": 5.149979114532471, | |
"rewards/rejected": -9.952859878540039, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.49953168904152356, | |
"grad_norm": 4.1828155517578125, | |
"learning_rate": 3.272542485937369e-06, | |
"logits/chosen": 2.696993350982666, | |
"logits/rejected": 2.7842001914978027, | |
"logps/chosen": -3.3624558448791504, | |
"logps/rejected": -6.4542059898376465, | |
"loss": 0.2598, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -5.043683052062988, | |
"rewards/margins": 4.637625217437744, | |
"rewards/rejected": -9.68130874633789, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.49953168904152356, | |
"eval_logits/chosen": 2.9141366481781006, | |
"eval_logits/rejected": 2.9971513748168945, | |
"eval_logps/chosen": -3.1258208751678467, | |
"eval_logps/rejected": -6.787447452545166, | |
"eval_loss": 0.27035781741142273, | |
"eval_rewards/accuracies": 0.918367326259613, | |
"eval_rewards/chosen": -4.688731670379639, | |
"eval_rewards/margins": 5.492439270019531, | |
"eval_rewards/rejected": -10.181171417236328, | |
"eval_runtime": 29.0227, | |
"eval_samples_per_second": 26.772, | |
"eval_steps_per_second": 3.377, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.5078572171922157, | |
"grad_norm": 3.1104886531829834, | |
"learning_rate": 3.222579492361179e-06, | |
"logits/chosen": 2.582984447479248, | |
"logits/rejected": 2.424341917037964, | |
"logps/chosen": -3.0132031440734863, | |
"logps/rejected": -6.317469596862793, | |
"loss": 0.2598, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -4.519804954528809, | |
"rewards/margins": 4.956398963928223, | |
"rewards/rejected": -9.476203918457031, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.5161827453429076, | |
"grad_norm": 12.320380210876465, | |
"learning_rate": 3.1722995515381644e-06, | |
"logits/chosen": 2.1016178131103516, | |
"logits/rejected": 2.345324754714966, | |
"logps/chosen": -3.1399683952331543, | |
"logps/rejected": -7.096994876861572, | |
"loss": 0.2601, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -4.709952354431152, | |
"rewards/margins": 5.935539722442627, | |
"rewards/rejected": -10.645492553710938, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.5245082734935997, | |
"grad_norm": 2.704423189163208, | |
"learning_rate": 3.121724717912138e-06, | |
"logits/chosen": 2.108675718307495, | |
"logits/rejected": 2.369410991668701, | |
"logps/chosen": -3.6519737243652344, | |
"logps/rejected": -6.964946746826172, | |
"loss": 0.2351, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.477960586547852, | |
"rewards/margins": 4.96945858001709, | |
"rewards/rejected": -10.447419166564941, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.5328338016442918, | |
"grad_norm": 4.401206970214844, | |
"learning_rate": 3.0708771752766397e-06, | |
"logits/chosen": 2.3692595958709717, | |
"logits/rejected": 2.5313620567321777, | |
"logps/chosen": -4.0485663414001465, | |
"logps/rejected": -7.747661590576172, | |
"loss": 0.2265, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -6.072849750518799, | |
"rewards/margins": 5.548642635345459, | |
"rewards/rejected": -11.621491432189941, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.5411593297949838, | |
"grad_norm": 4.68662166595459, | |
"learning_rate": 3.019779227044398e-06, | |
"logits/chosen": 2.4383034706115723, | |
"logits/rejected": 2.4655585289001465, | |
"logps/chosen": -3.8650074005126953, | |
"logps/rejected": -7.987051963806152, | |
"loss": 0.263, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.797511100769043, | |
"rewards/margins": 6.183066368103027, | |
"rewards/rejected": -11.98057746887207, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.5411593297949838, | |
"eval_logits/chosen": 2.7321341037750244, | |
"eval_logits/rejected": 2.906801700592041, | |
"eval_logps/chosen": -3.7255136966705322, | |
"eval_logps/rejected": -7.620375633239746, | |
"eval_loss": 0.26394686102867126, | |
"eval_rewards/accuracies": 0.9285714030265808, | |
"eval_rewards/chosen": -5.5882697105407715, | |
"eval_rewards/margins": 5.8422932624816895, | |
"eval_rewards/rejected": -11.430564880371094, | |
"eval_runtime": 29.0258, | |
"eval_samples_per_second": 26.769, | |
"eval_steps_per_second": 3.376, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.5494848579456759, | |
"grad_norm": 4.704371929168701, | |
"learning_rate": 2.9684532864643123e-06, | |
"logits/chosen": 2.7277207374572754, | |
"logits/rejected": 2.7106287479400635, | |
"logps/chosen": -3.979590654373169, | |
"logps/rejected": -6.88008975982666, | |
"loss": 0.2933, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -5.969386100769043, | |
"rewards/margins": 4.350748062133789, | |
"rewards/rejected": -10.320135116577148, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.557810386096368, | |
"grad_norm": 3.2897160053253174, | |
"learning_rate": 2.9169218667902562e-06, | |
"logits/chosen": 2.207106113433838, | |
"logits/rejected": 2.454056978225708, | |
"logps/chosen": -3.760200023651123, | |
"logps/rejected": -7.504108428955078, | |
"loss": 0.2262, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.6402997970581055, | |
"rewards/margins": 5.615862846374512, | |
"rewards/rejected": -11.256162643432617, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.56613591424706, | |
"grad_norm": 3.6699540615081787, | |
"learning_rate": 2.8652075714060296e-06, | |
"logits/chosen": 2.5904622077941895, | |
"logits/rejected": 2.693467617034912, | |
"logps/chosen": -3.2713139057159424, | |
"logps/rejected": -7.3422722816467285, | |
"loss": 0.2721, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -4.906970500946045, | |
"rewards/margins": 6.106438636779785, | |
"rewards/rejected": -11.013408660888672, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.5744614423977521, | |
"grad_norm": 3.054532289505005, | |
"learning_rate": 2.813333083910761e-06, | |
"logits/chosen": 2.9145145416259766, | |
"logits/rejected": 2.7135214805603027, | |
"logps/chosen": -3.5082690715789795, | |
"logps/rejected": -7.293328762054443, | |
"loss": 0.271, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.26240348815918, | |
"rewards/margins": 5.677589416503906, | |
"rewards/rejected": -10.939992904663086, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.5827869705484442, | |
"grad_norm": 3.5161256790161133, | |
"learning_rate": 2.761321158169134e-06, | |
"logits/chosen": 2.915343761444092, | |
"logits/rejected": 2.731520891189575, | |
"logps/chosen": -3.4292550086975098, | |
"logps/rejected": -8.124921798706055, | |
"loss": 0.1985, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.143881797790527, | |
"rewards/margins": 7.043501377105713, | |
"rewards/rejected": -12.187383651733398, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.5827869705484442, | |
"eval_logits/chosen": 2.5902156829833984, | |
"eval_logits/rejected": 2.774846315383911, | |
"eval_logps/chosen": -3.5158140659332275, | |
"eval_logps/rejected": -7.544556140899658, | |
"eval_loss": 0.24698135256767273, | |
"eval_rewards/accuracies": 0.9285714030265808, | |
"eval_rewards/chosen": -5.273721694946289, | |
"eval_rewards/margins": 6.043112754821777, | |
"eval_rewards/rejected": -11.31683349609375, | |
"eval_runtime": 29.0187, | |
"eval_samples_per_second": 26.776, | |
"eval_steps_per_second": 3.377, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.5911124986991362, | |
"grad_norm": 3.2246947288513184, | |
"learning_rate": 2.70919460833079e-06, | |
"logits/chosen": 2.9566922187805176, | |
"logits/rejected": 2.874277353286743, | |
"logps/chosen": -3.772322177886963, | |
"logps/rejected": -7.461319923400879, | |
"loss": 0.2565, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.658483028411865, | |
"rewards/margins": 5.533496856689453, | |
"rewards/rejected": -11.191980361938477, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.5994380268498283, | |
"grad_norm": 4.457447052001953, | |
"learning_rate": 2.6569762988232838e-06, | |
"logits/chosen": 2.653148889541626, | |
"logits/rejected": 2.646437168121338, | |
"logps/chosen": -3.8250937461853027, | |
"logps/rejected": -7.855221748352051, | |
"loss": 0.2244, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -5.737640857696533, | |
"rewards/margins": 6.045191287994385, | |
"rewards/rejected": -11.782832145690918, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.6077635550005204, | |
"grad_norm": 3.477293014526367, | |
"learning_rate": 2.604689134322999e-06, | |
"logits/chosen": 2.2635607719421387, | |
"logits/rejected": 2.2247064113616943, | |
"logps/chosen": -3.974703550338745, | |
"logps/rejected": -8.289571762084961, | |
"loss": 0.2294, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.962055206298828, | |
"rewards/margins": 6.4723029136657715, | |
"rewards/rejected": -12.434357643127441, | |
"step": 730 | |
}, | |
{ | |
"epoch": 0.6160890831512124, | |
"grad_norm": 1.6821621656417847, | |
"learning_rate": 2.5523560497083927e-06, | |
"logits/chosen": 1.8432185649871826, | |
"logits/rejected": 1.9002739191055298, | |
"logps/chosen": -3.8650963306427, | |
"logps/rejected": -7.553779602050781, | |
"loss": 0.2221, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -5.79764461517334, | |
"rewards/margins": 5.533024787902832, | |
"rewards/rejected": -11.330669403076172, | |
"step": 740 | |
}, | |
{ | |
"epoch": 0.6244146113019045, | |
"grad_norm": 24.729644775390625, | |
"learning_rate": 2.5e-06, | |
"logits/chosen": 2.5135562419891357, | |
"logits/rejected": 2.6035869121551514, | |
"logps/chosen": -3.6619372367858887, | |
"logps/rejected": -7.801999568939209, | |
"loss": 0.2724, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.492905616760254, | |
"rewards/margins": 6.2100934982299805, | |
"rewards/rejected": -11.702998161315918, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.6244146113019045, | |
"eval_logits/chosen": 2.876950979232788, | |
"eval_logits/rejected": 3.0243964195251465, | |
"eval_logps/chosen": -3.517216682434082, | |
"eval_logps/rejected": -7.607268810272217, | |
"eval_loss": 0.24484822154045105, | |
"eval_rewards/accuracies": 0.9387755393981934, | |
"eval_rewards/chosen": -5.275824546813965, | |
"eval_rewards/margins": 6.135078430175781, | |
"eval_rewards/rejected": -11.410903930664062, | |
"eval_runtime": 28.9129, | |
"eval_samples_per_second": 26.874, | |
"eval_steps_per_second": 3.389, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.6327401394525966, | |
"grad_norm": 9.702905654907227, | |
"learning_rate": 2.447643950291608e-06, | |
"logits/chosen": 2.693587064743042, | |
"logits/rejected": 2.6106948852539062, | |
"logps/chosen": -3.7441153526306152, | |
"logps/rejected": -7.564157009124756, | |
"loss": 0.2506, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -5.61617374420166, | |
"rewards/margins": 5.730062961578369, | |
"rewards/rejected": -11.346236228942871, | |
"step": 760 | |
}, | |
{ | |
"epoch": 0.6410656676032885, | |
"grad_norm": 8.551860809326172, | |
"learning_rate": 2.3953108656770018e-06, | |
"logits/chosen": 2.894711971282959, | |
"logits/rejected": 3.036170482635498, | |
"logps/chosen": -3.972269058227539, | |
"logps/rejected": -8.38014030456543, | |
"loss": 0.2107, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.958403587341309, | |
"rewards/margins": 6.6118059158325195, | |
"rewards/rejected": -12.570208549499512, | |
"step": 770 | |
}, | |
{ | |
"epoch": 0.6493911957539806, | |
"grad_norm": 2.4394350051879883, | |
"learning_rate": 2.3430237011767166e-06, | |
"logits/chosen": 3.1415820121765137, | |
"logits/rejected": 3.1218018531799316, | |
"logps/chosen": -4.007376194000244, | |
"logps/rejected": -8.103262901306152, | |
"loss": 0.1886, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -6.011064052581787, | |
"rewards/margins": 6.1438307762146, | |
"rewards/rejected": -12.154894828796387, | |
"step": 780 | |
}, | |
{ | |
"epoch": 0.6577167239046727, | |
"grad_norm": 3.69184947013855, | |
"learning_rate": 2.290805391669212e-06, | |
"logits/chosen": 3.3487350940704346, | |
"logits/rejected": 3.5375237464904785, | |
"logps/chosen": -3.7646141052246094, | |
"logps/rejected": -7.569940090179443, | |
"loss": 0.2106, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.646921634674072, | |
"rewards/margins": 5.707989692687988, | |
"rewards/rejected": -11.354910850524902, | |
"step": 790 | |
}, | |
{ | |
"epoch": 0.6660422520553647, | |
"grad_norm": 4.604506015777588, | |
"learning_rate": 2.238678841830867e-06, | |
"logits/chosen": 3.159898519515991, | |
"logits/rejected": 3.09334135055542, | |
"logps/chosen": -4.009636878967285, | |
"logps/rejected": -7.4454545974731445, | |
"loss": 0.2379, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -6.014455318450928, | |
"rewards/margins": 5.1537251472473145, | |
"rewards/rejected": -11.168180465698242, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.6660422520553647, | |
"eval_logits/chosen": 2.748328924179077, | |
"eval_logits/rejected": 2.9500906467437744, | |
"eval_logps/chosen": -3.652164936065674, | |
"eval_logps/rejected": -7.951470375061035, | |
"eval_loss": 0.23568958044052124, | |
"eval_rewards/accuracies": 0.9387755393981934, | |
"eval_rewards/chosen": -5.478247165679932, | |
"eval_rewards/margins": 6.448958396911621, | |
"eval_rewards/rejected": -11.927205085754395, | |
"eval_runtime": 29.021, | |
"eval_samples_per_second": 26.774, | |
"eval_steps_per_second": 3.377, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.6743677802060568, | |
"grad_norm": 3.968970537185669, | |
"learning_rate": 2.186666916089239e-06, | |
"logits/chosen": 2.384208917617798, | |
"logits/rejected": 2.3336739540100098, | |
"logps/chosen": -3.8832621574401855, | |
"logps/rejected": -7.72598123550415, | |
"loss": 0.2706, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.824892520904541, | |
"rewards/margins": 5.764077663421631, | |
"rewards/rejected": -11.588971138000488, | |
"step": 810 | |
}, | |
{ | |
"epoch": 0.6826933083567489, | |
"grad_norm": 3.6892929077148438, | |
"learning_rate": 2.134792428593971e-06, | |
"logits/chosen": 3.5869107246398926, | |
"logits/rejected": 3.517749786376953, | |
"logps/chosen": -3.306342363357544, | |
"logps/rejected": -7.020272254943848, | |
"loss": 0.2398, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -4.9595136642456055, | |
"rewards/margins": 5.570894718170166, | |
"rewards/rejected": -10.530407905578613, | |
"step": 820 | |
}, | |
{ | |
"epoch": 0.6910188365074409, | |
"grad_norm": 4.89448881149292, | |
"learning_rate": 2.0830781332097446e-06, | |
"logits/chosen": 2.5076346397399902, | |
"logits/rejected": 2.3836727142333984, | |
"logps/chosen": -3.843027114868164, | |
"logps/rejected": -7.852384090423584, | |
"loss": 0.2116, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.764540672302246, | |
"rewards/margins": 6.014035701751709, | |
"rewards/rejected": -11.77857494354248, | |
"step": 830 | |
}, | |
{ | |
"epoch": 0.699344364658133, | |
"grad_norm": 8.198432922363281, | |
"learning_rate": 2.031546713535688e-06, | |
"logits/chosen": 2.5533287525177, | |
"logits/rejected": 2.407637357711792, | |
"logps/chosen": -3.574105739593506, | |
"logps/rejected": -8.23727798461914, | |
"loss": 0.2415, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.361158847808838, | |
"rewards/margins": 6.994758605957031, | |
"rewards/rejected": -12.355916976928711, | |
"step": 840 | |
}, | |
{ | |
"epoch": 0.7076698928088251, | |
"grad_norm": 4.123171329498291, | |
"learning_rate": 1.9802207729556023e-06, | |
"logits/chosen": 2.4909422397613525, | |
"logits/rejected": 2.3119165897369385, | |
"logps/chosen": -3.927218198776245, | |
"logps/rejected": -7.961021423339844, | |
"loss": 0.2217, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.89082670211792, | |
"rewards/margins": 6.050704002380371, | |
"rewards/rejected": -11.94153118133545, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.7076698928088251, | |
"eval_logits/chosen": 2.858954668045044, | |
"eval_logits/rejected": 3.012629270553589, | |
"eval_logps/chosen": -3.577458381652832, | |
"eval_logps/rejected": -7.837220668792725, | |
"eval_loss": 0.23848077654838562, | |
"eval_rewards/accuracies": 0.9387755393981934, | |
"eval_rewards/chosen": -5.36618709564209, | |
"eval_rewards/margins": 6.389642715454102, | |
"eval_rewards/rejected": -11.755829811096191, | |
"eval_runtime": 29.02, | |
"eval_samples_per_second": 26.775, | |
"eval_steps_per_second": 3.377, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.7159954209595171, | |
"grad_norm": 3.4179177284240723, | |
"learning_rate": 1.9291228247233607e-06, | |
"logits/chosen": 2.535378932952881, | |
"logits/rejected": 2.5335640907287598, | |
"logps/chosen": -3.541815996170044, | |
"logps/rejected": -7.519083499908447, | |
"loss": 0.2167, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.312723159790039, | |
"rewards/margins": 5.965902328491211, | |
"rewards/rejected": -11.27862548828125, | |
"step": 860 | |
}, | |
{ | |
"epoch": 0.7243209491102092, | |
"grad_norm": 1.8562341928482056, | |
"learning_rate": 1.8782752820878636e-06, | |
"logits/chosen": 3.0650887489318848, | |
"logits/rejected": 2.7918925285339355, | |
"logps/chosen": -3.791342258453369, | |
"logps/rejected": -7.656645774841309, | |
"loss": 0.1925, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.687013149261475, | |
"rewards/margins": 5.7979559898376465, | |
"rewards/rejected": -11.484968185424805, | |
"step": 870 | |
}, | |
{ | |
"epoch": 0.7326464772609013, | |
"grad_norm": 9.719799995422363, | |
"learning_rate": 1.827700448461836e-06, | |
"logits/chosen": 2.4594621658325195, | |
"logits/rejected": 2.4324564933776855, | |
"logps/chosen": -3.6558470726013184, | |
"logps/rejected": -8.101290702819824, | |
"loss": 0.1975, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -5.483770847320557, | |
"rewards/margins": 6.6681647300720215, | |
"rewards/rejected": -12.151935577392578, | |
"step": 880 | |
}, | |
{ | |
"epoch": 0.7409720054115932, | |
"grad_norm": 3.240176200866699, | |
"learning_rate": 1.7774205076388207e-06, | |
"logits/chosen": 2.689762592315674, | |
"logits/rejected": 2.553614616394043, | |
"logps/chosen": -3.3837451934814453, | |
"logps/rejected": -7.7740020751953125, | |
"loss": 0.177, | |
"rewards/accuracies": 1.0, | |
"rewards/chosen": -5.075617790222168, | |
"rewards/margins": 6.585384368896484, | |
"rewards/rejected": -11.661002159118652, | |
"step": 890 | |
}, | |
{ | |
"epoch": 0.7492975335622853, | |
"grad_norm": 3.8752946853637695, | |
"learning_rate": 1.7274575140626318e-06, | |
"logits/chosen": 3.2561440467834473, | |
"logits/rejected": 3.13822603225708, | |
"logps/chosen": -3.69258451461792, | |
"logps/rejected": -7.472433567047119, | |
"loss": 0.213, | |
"rewards/accuracies": 0.8999999761581421, | |
"rewards/chosen": -5.538876533508301, | |
"rewards/margins": 5.669772624969482, | |
"rewards/rejected": -11.208650588989258, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.7492975335622853, | |
"eval_logits/chosen": 2.8268215656280518, | |
"eval_logits/rejected": 3.031662702560425, | |
"eval_logps/chosen": -3.6311440467834473, | |
"eval_logps/rejected": -8.067394256591797, | |
"eval_loss": 0.23127013444900513, | |
"eval_rewards/accuracies": 0.9285714030265808, | |
"eval_rewards/chosen": -5.44671630859375, | |
"eval_rewards/margins": 6.654376029968262, | |
"eval_rewards/rejected": -12.101091384887695, | |
"eval_runtime": 29.022, | |
"eval_samples_per_second": 26.773, | |
"eval_steps_per_second": 3.377, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.7576230617129774, | |
"grad_norm": 3.4024012088775635, | |
"learning_rate": 1.677833383153542e-06, | |
"logits/chosen": 2.0691773891448975, | |
"logits/rejected": 2.190563201904297, | |
"logps/chosen": -3.483668565750122, | |
"logps/rejected": -8.020956039428711, | |
"loss": 0.198, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -5.225502967834473, | |
"rewards/margins": 6.805932521820068, | |
"rewards/rejected": -12.0314359664917, | |
"step": 910 | |
}, | |
{ | |
"epoch": 0.7659485898636694, | |
"grad_norm": 4.999133586883545, | |
"learning_rate": 1.6285698816954626e-06, | |
"logits/chosen": 2.4453094005584717, | |
"logits/rejected": 2.440931558609009, | |
"logps/chosen": -4.1138916015625, | |
"logps/rejected": -8.617280960083008, | |
"loss": 0.253, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -6.170836925506592, | |
"rewards/margins": 6.7550835609436035, | |
"rewards/rejected": -12.925920486450195, | |
"step": 920 | |
}, | |
{ | |
"epoch": 0.7742741180143615, | |
"grad_norm": 3.1391687393188477, | |
"learning_rate": 1.5796886182883053e-06, | |
"logits/chosen": 2.892235517501831, | |
"logits/rejected": 2.8754334449768066, | |
"logps/chosen": -3.8762309551239014, | |
"logps/rejected": -7.991665840148926, | |
"loss": 0.2171, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.814346790313721, | |
"rewards/margins": 6.173151969909668, | |
"rewards/rejected": -11.98749828338623, | |
"step": 930 | |
}, | |
{ | |
"epoch": 0.7825996461650536, | |
"grad_norm": 6.850193023681641, | |
"learning_rate": 1.5312110338697427e-06, | |
"logits/chosen": 3.0068447589874268, | |
"logits/rejected": 3.0385780334472656, | |
"logps/chosen": -3.7039177417755127, | |
"logps/rejected": -8.53662109375, | |
"loss": 0.1907, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.555876731872559, | |
"rewards/margins": 7.2490553855896, | |
"rewards/rejected": -12.804931640625, | |
"step": 940 | |
}, | |
{ | |
"epoch": 0.7909251743157456, | |
"grad_norm": 16.202392578125, | |
"learning_rate": 1.4831583923105e-06, | |
"logits/chosen": 2.445254325866699, | |
"logits/rejected": 2.6017098426818848, | |
"logps/chosen": -4.0695037841796875, | |
"logps/rejected": -8.545947074890137, | |
"loss": 0.2033, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -6.104255676269531, | |
"rewards/margins": 6.714664459228516, | |
"rewards/rejected": -12.818921089172363, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.7909251743157456, | |
"eval_logits/chosen": 2.7845335006713867, | |
"eval_logits/rejected": 3.037020206451416, | |
"eval_logps/chosen": -3.982541799545288, | |
"eval_logps/rejected": -8.498592376708984, | |
"eval_loss": 0.22774070501327515, | |
"eval_rewards/accuracies": 0.9387755393981934, | |
"eval_rewards/chosen": -5.973812580108643, | |
"eval_rewards/margins": 6.77407693862915, | |
"eval_rewards/rejected": -12.747888565063477, | |
"eval_runtime": 29.0201, | |
"eval_samples_per_second": 26.775, | |
"eval_steps_per_second": 3.377, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.7992507024664377, | |
"grad_norm": 4.31044864654541, | |
"learning_rate": 1.4355517710873184e-06, | |
"logits/chosen": 2.1485049724578857, | |
"logits/rejected": 2.493374824523926, | |
"logps/chosen": -3.8115482330322266, | |
"logps/rejected": -8.553500175476074, | |
"loss": 0.2109, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.71732234954834, | |
"rewards/margins": 7.112928867340088, | |
"rewards/rejected": -12.83025074005127, | |
"step": 960 | |
}, | |
{ | |
"epoch": 0.8075762306171298, | |
"grad_norm": 4.177423000335693, | |
"learning_rate": 1.388412052037682e-06, | |
"logits/chosen": 2.9300179481506348, | |
"logits/rejected": 2.9548909664154053, | |
"logps/chosen": -3.9784176349639893, | |
"logps/rejected": -8.308394432067871, | |
"loss": 0.2012, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -5.967626094818115, | |
"rewards/margins": 6.49496603012085, | |
"rewards/rejected": -12.462592124938965, | |
"step": 970 | |
}, | |
{ | |
"epoch": 0.8159017587678218, | |
"grad_norm": 4.683027744293213, | |
"learning_rate": 1.3417599122003464e-06, | |
"logits/chosen": 2.5800061225891113, | |
"logits/rejected": 2.526090145111084, | |
"logps/chosen": -3.86810564994812, | |
"logps/rejected": -8.47614574432373, | |
"loss": 0.2141, | |
"rewards/accuracies": 0.925000011920929, | |
"rewards/chosen": -5.802158355712891, | |
"rewards/margins": 6.9120612144470215, | |
"rewards/rejected": -12.714218139648438, | |
"step": 980 | |
}, | |
{ | |
"epoch": 0.8242272869185139, | |
"grad_norm": 3.7419984340667725, | |
"learning_rate": 1.2956158147457116e-06, | |
"logits/chosen": 3.4706058502197266, | |
"logits/rejected": 3.4088757038116455, | |
"logps/chosen": -4.216760158538818, | |
"logps/rejected": -8.575207710266113, | |
"loss": 0.2422, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -6.325140953063965, | |
"rewards/margins": 6.537671089172363, | |
"rewards/rejected": -12.862811088562012, | |
"step": 990 | |
}, | |
{ | |
"epoch": 0.832552815069206, | |
"grad_norm": 8.953512191772461, | |
"learning_rate": 1.2500000000000007e-06, | |
"logits/chosen": 2.9276206493377686, | |
"logits/rejected": 2.946265459060669, | |
"logps/chosen": -3.9976966381073, | |
"logps/rejected": -8.48410701751709, | |
"loss": 0.2139, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.996545314788818, | |
"rewards/margins": 6.729616641998291, | |
"rewards/rejected": -12.726162910461426, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.832552815069206, | |
"eval_logits/chosen": 2.9153146743774414, | |
"eval_logits/rejected": 3.0989012718200684, | |
"eval_logps/chosen": -3.6678271293640137, | |
"eval_logps/rejected": -8.173608779907227, | |
"eval_loss": 0.22841480374336243, | |
"eval_rewards/accuracies": 0.9285714030265808, | |
"eval_rewards/chosen": -5.5017409324646, | |
"eval_rewards/margins": 6.758671760559082, | |
"eval_rewards/rejected": -12.26041316986084, | |
"eval_runtime": 29.0504, | |
"eval_samples_per_second": 26.747, | |
"eval_steps_per_second": 3.373, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.840878343219898, | |
"grad_norm": 4.267103672027588, | |
"learning_rate": 1.204932476567175e-06, | |
"logits/chosen": 2.3057751655578613, | |
"logits/rejected": 2.3750340938568115, | |
"logps/chosen": -3.576403856277466, | |
"logps/rejected": -8.301278114318848, | |
"loss": 0.2211, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.364605903625488, | |
"rewards/margins": 7.087311744689941, | |
"rewards/rejected": -12.45191764831543, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 0.8492038713705901, | |
"grad_norm": 6.008708477020264, | |
"learning_rate": 1.160433012552508e-06, | |
"logits/chosen": 3.0167624950408936, | |
"logits/rejected": 2.817478895187378, | |
"logps/chosen": -4.053152084350586, | |
"logps/rejected": -8.841009140014648, | |
"loss": 0.195, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -6.079728126525879, | |
"rewards/margins": 7.181784152984619, | |
"rewards/rejected": -13.261512756347656, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 0.8575293995212822, | |
"grad_norm": 3.7652032375335693, | |
"learning_rate": 1.11652112689164e-06, | |
"logits/chosen": 2.3387794494628906, | |
"logits/rejected": 2.420820474624634, | |
"logps/chosen": -4.114675045013428, | |
"logps/rejected": -8.801934242248535, | |
"loss": 0.2226, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -6.172013282775879, | |
"rewards/margins": 7.030886650085449, | |
"rewards/rejected": -13.202900886535645, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 0.8658549276719741, | |
"grad_norm": 3.811018466949463, | |
"learning_rate": 1.073216080788921e-06, | |
"logits/chosen": 3.4545624256134033, | |
"logits/rejected": 2.934145212173462, | |
"logps/chosen": -3.841254472732544, | |
"logps/rejected": -8.547441482543945, | |
"loss": 0.2039, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.7618818283081055, | |
"rewards/margins": 7.059278964996338, | |
"rewards/rejected": -12.821161270141602, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 0.8741804558226662, | |
"grad_norm": 3.5039620399475098, | |
"learning_rate": 1.0305368692688175e-06, | |
"logits/chosen": 1.9293429851531982, | |
"logits/rejected": 2.530273914337158, | |
"logps/chosen": -3.6742749214172363, | |
"logps/rejected": -8.751821517944336, | |
"loss": 0.2168, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -5.511412143707275, | |
"rewards/margins": 7.616321563720703, | |
"rewards/rejected": -13.127734184265137, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 0.8741804558226662, | |
"eval_logits/chosen": 2.9263997077941895, | |
"eval_logits/rejected": 3.1277804374694824, | |
"eval_logps/chosen": -3.6399991512298584, | |
"eval_logps/rejected": -8.258426666259766, | |
"eval_loss": 0.2207891196012497, | |
"eval_rewards/accuracies": 0.9387755393981934, | |
"eval_rewards/chosen": -5.45999813079834, | |
"eval_rewards/margins": 6.92764139175415, | |
"eval_rewards/rejected": -12.387639045715332, | |
"eval_runtime": 29.0247, | |
"eval_samples_per_second": 26.77, | |
"eval_steps_per_second": 3.376, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 0.8825059839733583, | |
"grad_norm": 5.916813373565674, | |
"learning_rate": 9.88502212844063e-07, | |
"logits/chosen": 3.1234467029571533, | |
"logits/rejected": 3.058065891265869, | |
"logps/chosen": -3.8148319721221924, | |
"logps/rejected": -8.512906074523926, | |
"loss": 0.2123, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.722247123718262, | |
"rewards/margins": 7.047112464904785, | |
"rewards/rejected": -12.769360542297363, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 0.8908315121240503, | |
"grad_norm": 1.9670017957687378, | |
"learning_rate": 9.471305493042243e-07, | |
"logits/chosen": 3.48276948928833, | |
"logits/rejected": 2.9211738109588623, | |
"logps/chosen": -4.147209167480469, | |
"logps/rejected": -8.622703552246094, | |
"loss": 0.1951, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -6.220814228057861, | |
"rewards/margins": 6.7132415771484375, | |
"rewards/rejected": -12.934056282043457, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 0.8991570402747424, | |
"grad_norm": 4.044788837432861, | |
"learning_rate": 9.064400256282757e-07, | |
"logits/chosen": 1.3344472646713257, | |
"logits/rejected": 2.0601110458374023, | |
"logps/chosen": -4.18247127532959, | |
"logps/rejected": -9.543882369995117, | |
"loss": 0.2129, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -6.273707389831543, | |
"rewards/margins": 8.042116165161133, | |
"rewards/rejected": -14.315823554992676, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 0.9074825684254345, | |
"grad_norm": 8.981308937072754, | |
"learning_rate": 8.664484900247363e-07, | |
"logits/chosen": 3.2444870471954346, | |
"logits/rejected": 3.3333630561828613, | |
"logps/chosen": -3.4744930267333984, | |
"logps/rejected": -8.478456497192383, | |
"loss": 0.1827, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.2117390632629395, | |
"rewards/margins": 7.50594425201416, | |
"rewards/rejected": -12.717683792114258, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 0.9158080965761265, | |
"grad_norm": 5.613018035888672, | |
"learning_rate": 8.271734841028553e-07, | |
"logits/chosen": 3.114234209060669, | |
"logits/rejected": 2.8669419288635254, | |
"logps/chosen": -3.5276169776916504, | |
"logps/rejected": -7.26898193359375, | |
"loss": 0.1883, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.291424751281738, | |
"rewards/margins": 5.6120476722717285, | |
"rewards/rejected": -10.903471946716309, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.9158080965761265, | |
"eval_logits/chosen": 2.896069288253784, | |
"eval_logits/rejected": 3.120903730392456, | |
"eval_logps/chosen": -3.6780893802642822, | |
"eval_logps/rejected": -8.3290376663208, | |
"eval_loss": 0.21761386096477509, | |
"eval_rewards/accuracies": 0.9285714030265808, | |
"eval_rewards/chosen": -5.517134189605713, | |
"eval_rewards/margins": 6.9764227867126465, | |
"eval_rewards/rejected": -12.493557929992676, | |
"eval_runtime": 29.0228, | |
"eval_samples_per_second": 26.772, | |
"eval_steps_per_second": 3.377, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.9241336247268186, | |
"grad_norm": 6.375245571136475, | |
"learning_rate": 7.886322351782782e-07, | |
"logits/chosen": 2.5309860706329346, | |
"logits/rejected": 2.6255879402160645, | |
"logps/chosen": -4.059412002563477, | |
"logps/rejected": -9.413751602172852, | |
"loss": 0.2197, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -6.089118003845215, | |
"rewards/margins": 8.03150749206543, | |
"rewards/rejected": -14.120626449584961, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 0.9324591528775107, | |
"grad_norm": 3.5590834617614746, | |
"learning_rate": 7.508416487165862e-07, | |
"logits/chosen": 3.3510899543762207, | |
"logits/rejected": 3.4622738361358643, | |
"logps/chosen": -4.005453586578369, | |
"logps/rejected": -9.220897674560547, | |
"loss": 0.2071, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -6.008180141448975, | |
"rewards/margins": 7.823166847229004, | |
"rewards/rejected": -13.83134651184082, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 0.9407846810282027, | |
"grad_norm": 4.397263050079346, | |
"learning_rate": 7.138183009179922e-07, | |
"logits/chosen": 3.1275603771209717, | |
"logits/rejected": 2.9770944118499756, | |
"logps/chosen": -4.224826812744141, | |
"logps/rejected": -8.15820026397705, | |
"loss": 0.2331, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -6.337240219116211, | |
"rewards/margins": 5.900059700012207, | |
"rewards/rejected": -12.237300872802734, | |
"step": 1130 | |
}, | |
{ | |
"epoch": 0.9491102091788948, | |
"grad_norm": 4.102133750915527, | |
"learning_rate": 6.775784314464717e-07, | |
"logits/chosen": 2.8464298248291016, | |
"logits/rejected": 2.4384379386901855, | |
"logps/chosen": -3.9073352813720703, | |
"logps/rejected": -7.805499076843262, | |
"loss": 0.2223, | |
"rewards/accuracies": 0.887499988079071, | |
"rewards/chosen": -5.8610029220581055, | |
"rewards/margins": 5.847245216369629, | |
"rewards/rejected": -11.708248138427734, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 0.9574357373295869, | |
"grad_norm": 4.830289363861084, | |
"learning_rate": 6.421379363065142e-07, | |
"logits/chosen": 2.3135313987731934, | |
"logits/rejected": 2.514207124710083, | |
"logps/chosen": -4.163815498352051, | |
"logps/rejected": -9.554147720336914, | |
"loss": 0.184, | |
"rewards/accuracies": 0.987500011920929, | |
"rewards/chosen": -6.245722770690918, | |
"rewards/margins": 8.085500717163086, | |
"rewards/rejected": -14.331222534179688, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 0.9574357373295869, | |
"eval_logits/chosen": 2.9589338302612305, | |
"eval_logits/rejected": 3.1358554363250732, | |
"eval_logps/chosen": -3.7160890102386475, | |
"eval_logps/rejected": -8.306242942810059, | |
"eval_loss": 0.22364133596420288, | |
"eval_rewards/accuracies": 0.9489796161651611, | |
"eval_rewards/chosen": -5.57413387298584, | |
"eval_rewards/margins": 6.8852314949035645, | |
"eval_rewards/rejected": -12.45936393737793, | |
"eval_runtime": 29.0274, | |
"eval_samples_per_second": 26.768, | |
"eval_steps_per_second": 3.376, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 0.9657612654802789, | |
"grad_norm": 3.550083637237549, | |
"learning_rate": 6.075123608706093e-07, | |
"logits/chosen": 3.0239412784576416, | |
"logits/rejected": 3.124316930770874, | |
"logps/chosen": -3.9214415550231934, | |
"logps/rejected": -8.38886547088623, | |
"loss": 0.2249, | |
"rewards/accuracies": 0.9750000238418579, | |
"rewards/chosen": -5.882162094116211, | |
"rewards/margins": 6.701135158538818, | |
"rewards/rejected": -12.583298683166504, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 0.974086793630971, | |
"grad_norm": 6.0046515464782715, | |
"learning_rate": 5.737168930605272e-07, | |
"logits/chosen": 3.2713863849639893, | |
"logits/rejected": 3.0897414684295654, | |
"logps/chosen": -3.4735615253448486, | |
"logps/rejected": -8.098161697387695, | |
"loss": 0.1977, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.2103424072265625, | |
"rewards/margins": 6.936898708343506, | |
"rewards/rejected": -12.147241592407227, | |
"step": 1170 | |
}, | |
{ | |
"epoch": 0.982412321781663, | |
"grad_norm": 4.833160400390625, | |
"learning_rate": 5.407663566854008e-07, | |
"logits/chosen": 2.0586276054382324, | |
"logits/rejected": 2.014996290206909, | |
"logps/chosen": -4.311732292175293, | |
"logps/rejected": -9.875633239746094, | |
"loss": 0.2184, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -6.467599391937256, | |
"rewards/margins": 8.345849990844727, | |
"rewards/rejected": -14.813450813293457, | |
"step": 1180 | |
}, | |
{ | |
"epoch": 0.990737849932355, | |
"grad_norm": 3.9890189170837402, | |
"learning_rate": 5.086752049395094e-07, | |
"logits/chosen": 3.0970911979675293, | |
"logits/rejected": 2.8563153743743896, | |
"logps/chosen": -3.985518217086792, | |
"logps/rejected": -8.199251174926758, | |
"loss": 0.2178, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.978277683258057, | |
"rewards/margins": 6.320598125457764, | |
"rewards/rejected": -12.29887580871582, | |
"step": 1190 | |
}, | |
{ | |
"epoch": 0.9990633780830471, | |
"grad_norm": 25.71741485595703, | |
"learning_rate": 4.774575140626317e-07, | |
"logits/chosen": 2.927126884460449, | |
"logits/rejected": 2.802952527999878, | |
"logps/chosen": -3.5128173828125, | |
"logps/rejected": -8.298576354980469, | |
"loss": 0.1799, | |
"rewards/accuracies": 0.9375, | |
"rewards/chosen": -5.269227027893066, | |
"rewards/margins": 7.1786394119262695, | |
"rewards/rejected": -12.44786548614502, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.9990633780830471, | |
"eval_logits/chosen": 2.952441692352295, | |
"eval_logits/rejected": 3.141965389251709, | |
"eval_logps/chosen": -3.6855292320251465, | |
"eval_logps/rejected": -8.327260971069336, | |
"eval_loss": 0.21951240301132202, | |
"eval_rewards/accuracies": 0.9489796161651611, | |
"eval_rewards/chosen": -5.528294086456299, | |
"eval_rewards/margins": 6.962599277496338, | |
"eval_rewards/rejected": -12.490893363952637, | |
"eval_runtime": 29.0225, | |
"eval_samples_per_second": 26.772, | |
"eval_steps_per_second": 3.377, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 1.0066604225205535, | |
"grad_norm": 1.9987434148788452, | |
"learning_rate": 4.4712697716573994e-07, | |
"logits/chosen": 2.8181862831115723, | |
"logits/rejected": 2.7676520347595215, | |
"logps/chosen": -4.099400043487549, | |
"logps/rejected": -9.125377655029297, | |
"loss": 0.1878, | |
"rewards/accuracies": 0.9452054500579834, | |
"rewards/chosen": -6.149099826812744, | |
"rewards/margins": 7.538967132568359, | |
"rewards/rejected": -13.688066482543945, | |
"step": 1210 | |
}, | |
{ | |
"epoch": 1.0149859506712458, | |
"grad_norm": 3.452667474746704, | |
"learning_rate": 4.1769689822475147e-07, | |
"logits/chosen": 2.893108367919922, | |
"logits/rejected": 2.860814332962036, | |
"logps/chosen": -3.798161268234253, | |
"logps/rejected": -7.9546356201171875, | |
"loss": 0.1861, | |
"rewards/accuracies": 0.9125000238418579, | |
"rewards/chosen": -5.697242259979248, | |
"rewards/margins": 6.23471212387085, | |
"rewards/rejected": -11.931954383850098, | |
"step": 1220 | |
}, | |
{ | |
"epoch": 1.0233114788219377, | |
"grad_norm": 3.3297345638275146, | |
"learning_rate": 3.891801862449629e-07, | |
"logits/chosen": 2.500004529953003, | |
"logits/rejected": 2.4870145320892334, | |
"logps/chosen": -3.8301002979278564, | |
"logps/rejected": -8.578625679016113, | |
"loss": 0.2212, | |
"rewards/accuracies": 0.987500011920929, | |
"rewards/chosen": -5.745150566101074, | |
"rewards/margins": 7.1227874755859375, | |
"rewards/rejected": -12.867938041687012, | |
"step": 1230 | |
}, | |
{ | |
"epoch": 1.0316370069726297, | |
"grad_norm": 5.022886276245117, | |
"learning_rate": 3.615893495987335e-07, | |
"logits/chosen": 2.6115026473999023, | |
"logits/rejected": 2.8262619972229004, | |
"logps/chosen": -3.562458038330078, | |
"logps/rejected": -8.772577285766602, | |
"loss": 0.1869, | |
"rewards/accuracies": 0.9624999761581421, | |
"rewards/chosen": -5.343687057495117, | |
"rewards/margins": 7.815177917480469, | |
"rewards/rejected": -13.15886402130127, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 1.039962535123322, | |
"grad_norm": 5.58774995803833, | |
"learning_rate": 3.3493649053890325e-07, | |
"logits/chosen": 2.9379220008850098, | |
"logits/rejected": 2.9155845642089844, | |
"logps/chosen": -3.8242735862731934, | |
"logps/rejected": -8.058219909667969, | |
"loss": 0.1817, | |
"rewards/accuracies": 0.949999988079071, | |
"rewards/chosen": -5.736410140991211, | |
"rewards/margins": 6.3509202003479, | |
"rewards/rejected": -12.08733081817627, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 1.039962535123322, | |
"eval_logits/chosen": 2.932387113571167, | |
"eval_logits/rejected": 3.14872407913208, | |
"eval_logps/chosen": -3.7640583515167236, | |
"eval_logps/rejected": -8.459839820861816, | |
"eval_loss": 0.21760709583759308, | |
"eval_rewards/accuracies": 0.9489796161651611, | |
"eval_rewards/chosen": -5.646087169647217, | |
"eval_rewards/margins": 7.043673038482666, | |
"eval_rewards/rejected": -12.689759254455566, | |
"eval_runtime": 29.0272, | |
"eval_samples_per_second": 26.768, | |
"eval_steps_per_second": 3.376, | |
"step": 1250 | |
} | |
], | |
"logging_steps": 10, | |
"max_steps": 1500, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 50, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 3.0429645337117327e+18, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |