|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.998919113673212, |
|
"eval_steps": 100, |
|
"global_step": 2774, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.798561151079137e-08, |
|
"logits/chosen": -2.5878467559814453, |
|
"logits/rejected": -2.596919059753418, |
|
"logps/chosen": -50.55097579956055, |
|
"logps/rejected": -53.270023345947266, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.7985611510791368e-07, |
|
"logits/chosen": -2.6599929332733154, |
|
"logits/rejected": -2.6492068767547607, |
|
"logps/chosen": -58.52377700805664, |
|
"logps/rejected": -61.61543273925781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -4.6036697312956676e-05, |
|
"rewards/margins": 4.705908213509247e-05, |
|
"rewards/rejected": -9.309577581007034e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 3.5971223021582736e-07, |
|
"logits/chosen": -2.65588641166687, |
|
"logits/rejected": -2.661142110824585, |
|
"logps/chosen": -60.95711135864258, |
|
"logps/rejected": -63.73247146606445, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.00015254078607540578, |
|
"rewards/margins": -0.00013396346184890717, |
|
"rewards/rejected": -1.857726601883769e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 5.39568345323741e-07, |
|
"logits/chosen": -2.626067876815796, |
|
"logits/rejected": -2.6205759048461914, |
|
"logps/chosen": -65.40022277832031, |
|
"logps/rejected": -68.29045104980469, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -3.5934197512688115e-05, |
|
"rewards/margins": -0.00024712778395041823, |
|
"rewards/rejected": 0.0002111935755237937, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 7.194244604316547e-07, |
|
"logits/chosen": -2.6541905403137207, |
|
"logits/rejected": -2.6613316535949707, |
|
"logps/chosen": -58.868675231933594, |
|
"logps/rejected": -62.767356872558594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00014814280439168215, |
|
"rewards/margins": 0.00017737274174578488, |
|
"rewards/rejected": -2.922994281107094e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 8.992805755395684e-07, |
|
"logits/chosen": -2.614741802215576, |
|
"logits/rejected": -2.617932081222534, |
|
"logps/chosen": -59.7147216796875, |
|
"logps/rejected": -61.980995178222656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.00018150641699321568, |
|
"rewards/margins": 0.00013201191904954612, |
|
"rewards/rejected": 4.949455615133047e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 1.079136690647482e-06, |
|
"logits/chosen": -2.6651856899261475, |
|
"logits/rejected": -2.6654398441314697, |
|
"logps/chosen": -68.95173645019531, |
|
"logps/rejected": -71.27698516845703, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00047902195365168154, |
|
"rewards/margins": 0.0001825519575504586, |
|
"rewards/rejected": 0.0002964699815493077, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 1.2589928057553958e-06, |
|
"logits/chosen": -2.6852972507476807, |
|
"logits/rejected": -2.6725258827209473, |
|
"logps/chosen": -68.01790618896484, |
|
"logps/rejected": -72.10859680175781, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.00035084557021036744, |
|
"rewards/margins": 0.00020036422938574106, |
|
"rewards/rejected": 0.00015048135537654161, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 1.4388489208633094e-06, |
|
"logits/chosen": -2.6697287559509277, |
|
"logits/rejected": -2.668147563934326, |
|
"logps/chosen": -70.40176391601562, |
|
"logps/rejected": -73.07498931884766, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.00043141478090547025, |
|
"rewards/margins": 0.00031604542164132, |
|
"rewards/rejected": 0.00011536936654010788, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.618705035971223e-06, |
|
"logits/chosen": -2.670775890350342, |
|
"logits/rejected": -2.674410343170166, |
|
"logps/chosen": -66.90149688720703, |
|
"logps/rejected": -69.80754089355469, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0009530371753498912, |
|
"rewards/margins": 0.0005468233721330762, |
|
"rewards/rejected": 0.0004062137159053236, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.7985611510791368e-06, |
|
"logits/chosen": -2.657923460006714, |
|
"logits/rejected": -2.658536911010742, |
|
"logps/chosen": -62.22175979614258, |
|
"logps/rejected": -66.25755310058594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0009646881371736526, |
|
"rewards/margins": 0.00016965254326350987, |
|
"rewards/rejected": 0.0007950355065986514, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 1.9784172661870504e-06, |
|
"logits/chosen": -2.6612608432769775, |
|
"logits/rejected": -2.6600637435913086, |
|
"logps/chosen": -66.11808013916016, |
|
"logps/rejected": -69.09329986572266, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0011165592586621642, |
|
"rewards/margins": 0.00037375936517491937, |
|
"rewards/rejected": 0.0007428000681102276, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 2.158273381294964e-06, |
|
"logits/chosen": -2.6269524097442627, |
|
"logits/rejected": -2.627486228942871, |
|
"logps/chosen": -61.392478942871094, |
|
"logps/rejected": -64.30213165283203, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.001123746857047081, |
|
"rewards/margins": 0.0006171964341774583, |
|
"rewards/rejected": 0.0005065504228696227, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 2.3381294964028776e-06, |
|
"logits/chosen": -2.7004921436309814, |
|
"logits/rejected": -2.7044999599456787, |
|
"logps/chosen": -68.17378234863281, |
|
"logps/rejected": -70.49411010742188, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0015111321117728949, |
|
"rewards/margins": 0.0004890409181825817, |
|
"rewards/rejected": 0.0010220912517979741, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 2.5179856115107916e-06, |
|
"logits/chosen": -2.646902084350586, |
|
"logits/rejected": -2.6411759853363037, |
|
"logps/chosen": -63.913551330566406, |
|
"logps/rejected": -68.77268981933594, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.002318110316991806, |
|
"rewards/margins": 0.0013834238052368164, |
|
"rewards/rejected": 0.0009346865117549896, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 2.6978417266187052e-06, |
|
"logits/chosen": -2.6741390228271484, |
|
"logits/rejected": -2.6727261543273926, |
|
"logps/chosen": -65.45186614990234, |
|
"logps/rejected": -69.5618667602539, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0026420415379107, |
|
"rewards/margins": 0.001897258684039116, |
|
"rewards/rejected": 0.0007447830284945667, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 2.877697841726619e-06, |
|
"logits/chosen": -2.645352602005005, |
|
"logits/rejected": -2.6511170864105225, |
|
"logps/chosen": -57.53055953979492, |
|
"logps/rejected": -61.90361404418945, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.003107175463810563, |
|
"rewards/margins": 0.0023571993224322796, |
|
"rewards/rejected": 0.0007499762577936053, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 3.0575539568345324e-06, |
|
"logits/chosen": -2.650054454803467, |
|
"logits/rejected": -2.6488354206085205, |
|
"logps/chosen": -61.54291534423828, |
|
"logps/rejected": -64.30693054199219, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0038940298836678267, |
|
"rewards/margins": 0.0028370567597448826, |
|
"rewards/rejected": 0.001056973123922944, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 3.237410071942446e-06, |
|
"logits/chosen": -2.645240306854248, |
|
"logits/rejected": -2.642111301422119, |
|
"logps/chosen": -61.097076416015625, |
|
"logps/rejected": -64.71502685546875, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004625825677067041, |
|
"rewards/margins": 0.004492693580687046, |
|
"rewards/rejected": 0.00013313218369148672, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 3.4172661870503596e-06, |
|
"logits/chosen": -2.7127881050109863, |
|
"logits/rejected": -2.710603713989258, |
|
"logps/chosen": -57.48582077026367, |
|
"logps/rejected": -62.37571334838867, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.004764446523040533, |
|
"rewards/margins": 0.003814270021393895, |
|
"rewards/rejected": 0.0009501769091002643, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 3.5971223021582737e-06, |
|
"logits/chosen": -2.6896915435791016, |
|
"logits/rejected": -2.684767246246338, |
|
"logps/chosen": -59.758766174316406, |
|
"logps/rejected": -64.19367980957031, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0045052021741867065, |
|
"rewards/margins": 0.003016799222677946, |
|
"rewards/rejected": 0.0014884021366015077, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 3.7769784172661873e-06, |
|
"logits/chosen": -2.6605515480041504, |
|
"logits/rejected": -2.6634459495544434, |
|
"logps/chosen": -58.80467987060547, |
|
"logps/rejected": -60.49141311645508, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0028493874706327915, |
|
"rewards/margins": 0.004452340304851532, |
|
"rewards/rejected": -0.0016029527178034186, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 3.956834532374101e-06, |
|
"logits/chosen": -2.6214749813079834, |
|
"logits/rejected": -2.6205554008483887, |
|
"logps/chosen": -63.977142333984375, |
|
"logps/rejected": -71.72235870361328, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.0048708124086260796, |
|
"rewards/margins": 0.009143907576799393, |
|
"rewards/rejected": -0.0042730942368507385, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 4.1366906474820145e-06, |
|
"logits/chosen": -2.663078784942627, |
|
"logits/rejected": -2.667092800140381, |
|
"logps/chosen": -61.06050491333008, |
|
"logps/rejected": -66.15110778808594, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.005165424197912216, |
|
"rewards/margins": 0.007069968618452549, |
|
"rewards/rejected": -0.0019045437220484018, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 4.316546762589928e-06, |
|
"logits/chosen": -2.675718069076538, |
|
"logits/rejected": -2.6735589504241943, |
|
"logps/chosen": -65.82478332519531, |
|
"logps/rejected": -69.08268737792969, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.00702512264251709, |
|
"rewards/margins": 0.008214818313717842, |
|
"rewards/rejected": -0.0011896961368620396, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 4.496402877697842e-06, |
|
"logits/chosen": -2.6274218559265137, |
|
"logits/rejected": -2.6306469440460205, |
|
"logps/chosen": -67.89946746826172, |
|
"logps/rejected": -71.547119140625, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.003348552156239748, |
|
"rewards/margins": 0.011096605099737644, |
|
"rewards/rejected": -0.0077480534091591835, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 4.676258992805755e-06, |
|
"logits/chosen": -2.6246440410614014, |
|
"logits/rejected": -2.643188238143921, |
|
"logps/chosen": -67.15058135986328, |
|
"logps/rejected": -71.12448120117188, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0009993333369493484, |
|
"rewards/margins": 0.011576562188565731, |
|
"rewards/rejected": -0.012575894594192505, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 4.856115107913669e-06, |
|
"logits/chosen": -2.6977336406707764, |
|
"logits/rejected": -2.6968212127685547, |
|
"logps/chosen": -65.34959411621094, |
|
"logps/rejected": -68.08098602294922, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0017364125233143568, |
|
"rewards/margins": 0.010187914595007896, |
|
"rewards/rejected": -0.011924326419830322, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 4.999992078993707e-06, |
|
"logits/chosen": -2.6335489749908447, |
|
"logits/rejected": -2.640903949737549, |
|
"logps/chosen": -58.345176696777344, |
|
"logps/rejected": -61.308982849121094, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.000765187491197139, |
|
"rewards/margins": 0.012545737437903881, |
|
"rewards/rejected": -0.01178054977208376, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 4.999714849043746e-06, |
|
"logits/chosen": -2.662158489227295, |
|
"logits/rejected": -2.674367904663086, |
|
"logps/chosen": -62.21772003173828, |
|
"logps/rejected": -65.60545349121094, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.007329708430916071, |
|
"rewards/margins": 0.013346971943974495, |
|
"rewards/rejected": -0.020676681771874428, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 4.999041618971537e-06, |
|
"logits/chosen": -2.6512532234191895, |
|
"logits/rejected": -2.6503214836120605, |
|
"logps/chosen": -67.29080963134766, |
|
"logps/rejected": -72.53589630126953, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.010256023146212101, |
|
"rewards/margins": 0.014919854700565338, |
|
"rewards/rejected": -0.025175878778100014, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 4.997972495428924e-06, |
|
"logits/chosen": -2.615621328353882, |
|
"logits/rejected": -2.6233325004577637, |
|
"logps/chosen": -66.02967071533203, |
|
"logps/rejected": -70.49574279785156, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.007946187630295753, |
|
"rewards/margins": 0.016535501927137375, |
|
"rewards/rejected": -0.02448168769478798, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 4.996507647784446e-06, |
|
"logits/chosen": -2.638176441192627, |
|
"logits/rejected": -2.6347122192382812, |
|
"logps/chosen": -67.33381652832031, |
|
"logps/rejected": -73.75712585449219, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.01815110817551613, |
|
"rewards/margins": 0.016093209385871887, |
|
"rewards/rejected": -0.034244317561388016, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 4.994647308096509e-06, |
|
"logits/chosen": -2.629110813140869, |
|
"logits/rejected": -2.6443512439727783, |
|
"logps/chosen": -69.91134643554688, |
|
"logps/rejected": -69.85363006591797, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.014942710287868977, |
|
"rewards/margins": 0.012206320650875568, |
|
"rewards/rejected": -0.027149027213454247, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 4.9923917710766266e-06, |
|
"logits/chosen": -2.6785271167755127, |
|
"logits/rejected": -2.6757400035858154, |
|
"logps/chosen": -71.02973937988281, |
|
"logps/rejected": -75.72981262207031, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.014097088947892189, |
|
"rewards/margins": 0.026118427515029907, |
|
"rewards/rejected": -0.04021551460027695, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 4.989741394042728e-06, |
|
"logits/chosen": -2.598215103149414, |
|
"logits/rejected": -2.5950300693511963, |
|
"logps/chosen": -65.64091491699219, |
|
"logps/rejected": -70.74314880371094, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.028698483482003212, |
|
"rewards/margins": 0.019990913569927216, |
|
"rewards/rejected": -0.04868939518928528, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 4.986696596862556e-06, |
|
"logits/chosen": -2.625063180923462, |
|
"logits/rejected": -2.631725788116455, |
|
"logps/chosen": -78.42835998535156, |
|
"logps/rejected": -84.2737045288086, |
|
"loss": 0.6802, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03507710248231888, |
|
"rewards/margins": 0.027645844966173172, |
|
"rewards/rejected": -0.06272295117378235, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 4.983257861887148e-06, |
|
"logits/chosen": -2.6487419605255127, |
|
"logits/rejected": -2.6524715423583984, |
|
"logps/chosen": -71.53236389160156, |
|
"logps/rejected": -81.15141296386719, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.05367087572813034, |
|
"rewards/margins": 0.04164598509669304, |
|
"rewards/rejected": -0.09531687200069427, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 4.979425733874431e-06, |
|
"logits/chosen": -2.575629472732544, |
|
"logits/rejected": -2.5949313640594482, |
|
"logps/chosen": -71.41996765136719, |
|
"logps/rejected": -75.95075225830078, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.07148171961307526, |
|
"rewards/margins": 0.03022712469100952, |
|
"rewards/rejected": -0.10170884430408478, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 4.975200819902911e-06, |
|
"logits/chosen": -2.608182430267334, |
|
"logits/rejected": -2.613959550857544, |
|
"logps/chosen": -77.80644226074219, |
|
"logps/rejected": -86.1133804321289, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10805950313806534, |
|
"rewards/margins": 0.04201812297105789, |
|
"rewards/rejected": -0.15007762610912323, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 4.970583789275508e-06, |
|
"logits/chosen": -2.565563440322876, |
|
"logits/rejected": -2.575218677520752, |
|
"logps/chosen": -72.14826965332031, |
|
"logps/rejected": -76.73294830322266, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09949363768100739, |
|
"rewards/margins": 0.02443886548280716, |
|
"rewards/rejected": -0.12393250316381454, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.965575373413527e-06, |
|
"logits/chosen": -2.5901551246643066, |
|
"logits/rejected": -2.592224359512329, |
|
"logps/chosen": -78.75377655029297, |
|
"logps/rejected": -87.20631408691406, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1401161104440689, |
|
"rewards/margins": 0.05296441912651062, |
|
"rewards/rejected": -0.19308052957057953, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 4.960176365740783e-06, |
|
"logits/chosen": -2.568718671798706, |
|
"logits/rejected": -2.5703847408294678, |
|
"logps/chosen": -82.48625183105469, |
|
"logps/rejected": -91.03981018066406, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1931959092617035, |
|
"rewards/margins": 0.04230925068259239, |
|
"rewards/rejected": -0.2355051338672638, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 4.954387621557911e-06, |
|
"logits/chosen": -2.472228527069092, |
|
"logits/rejected": -2.4818115234375, |
|
"logps/chosen": -83.677978515625, |
|
"logps/rejected": -90.09959411621094, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.20174559950828552, |
|
"rewards/margins": 0.060646455734968185, |
|
"rewards/rejected": -0.2623920440673828, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.046875, |
|
"learning_rate": 4.948210057906871e-06, |
|
"logits/chosen": -2.424100637435913, |
|
"logits/rejected": -2.4418275356292725, |
|
"logps/chosen": -88.38543701171875, |
|
"logps/rejected": -100.091552734375, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.25682324171066284, |
|
"rewards/margins": 0.07104425877332687, |
|
"rewards/rejected": -0.3278675079345703, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 4.941644653425671e-06, |
|
"logits/chosen": -2.452075481414795, |
|
"logits/rejected": -2.4671432971954346, |
|
"logps/chosen": -100.57665252685547, |
|
"logps/rejected": -104.40872955322266, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.2847747802734375, |
|
"rewards/margins": 0.05320798233151436, |
|
"rewards/rejected": -0.33798274397850037, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 4.9346924481933345e-06, |
|
"logits/chosen": -2.459083318710327, |
|
"logits/rejected": -2.4748520851135254, |
|
"logps/chosen": -96.22517395019531, |
|
"logps/rejected": -105.0258560180664, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.23906302452087402, |
|
"rewards/margins": 0.06904648244380951, |
|
"rewards/rejected": -0.3081095516681671, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 4.927354543565131e-06, |
|
"logits/chosen": -2.404327630996704, |
|
"logits/rejected": -2.4193339347839355, |
|
"logps/chosen": -101.07810974121094, |
|
"logps/rejected": -109.60540771484375, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.2962692379951477, |
|
"rewards/margins": 0.07886885851621628, |
|
"rewards/rejected": -0.3751381039619446, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 4.919632101998101e-06, |
|
"logits/chosen": -2.4055585861206055, |
|
"logits/rejected": -2.4047584533691406, |
|
"logps/chosen": -83.26808166503906, |
|
"logps/rejected": -96.07670593261719, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.25196436047554016, |
|
"rewards/margins": 0.09068725258111954, |
|
"rewards/rejected": -0.3426516056060791, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 4.911526346866907e-06, |
|
"logits/chosen": -2.3670365810394287, |
|
"logits/rejected": -2.380223512649536, |
|
"logps/chosen": -96.45356750488281, |
|
"logps/rejected": -111.1182861328125, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.32315492630004883, |
|
"rewards/margins": 0.11133052408695221, |
|
"rewards/rejected": -0.43448543548583984, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 4.9030385622700225e-06, |
|
"logits/chosen": -2.3522255420684814, |
|
"logits/rejected": -2.358100414276123, |
|
"logps/chosen": -96.55814361572266, |
|
"logps/rejected": -112.30567932128906, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.37474876642227173, |
|
"rewards/margins": 0.10861654579639435, |
|
"rewards/rejected": -0.4833652973175049, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.125, |
|
"learning_rate": 4.89417009282631e-06, |
|
"logits/chosen": -2.3778271675109863, |
|
"logits/rejected": -2.390409469604492, |
|
"logps/chosen": -98.19575500488281, |
|
"logps/rejected": -111.83695983886719, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3482569754123688, |
|
"rewards/margins": 0.11011794954538345, |
|
"rewards/rejected": -0.45837491750717163, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 4.88492234346201e-06, |
|
"logits/chosen": -2.3503499031066895, |
|
"logits/rejected": -2.3607373237609863, |
|
"logps/chosen": -109.85440826416016, |
|
"logps/rejected": -122.345703125, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.37264811992645264, |
|
"rewards/margins": 0.09832003712654114, |
|
"rewards/rejected": -0.47096818685531616, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 4.8752967791881735e-06, |
|
"logits/chosen": -2.356555461883545, |
|
"logits/rejected": -2.362435817718506, |
|
"logps/chosen": -101.36775970458984, |
|
"logps/rejected": -111.54450988769531, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.3653712868690491, |
|
"rewards/margins": 0.08574860543012619, |
|
"rewards/rejected": -0.45111989974975586, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 4.865294924868578e-06, |
|
"logits/chosen": -2.3726258277893066, |
|
"logits/rejected": -2.3774704933166504, |
|
"logps/chosen": -95.63130950927734, |
|
"logps/rejected": -108.5069580078125, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.3163696825504303, |
|
"rewards/margins": 0.10675134509801865, |
|
"rewards/rejected": -0.42312103509902954, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 4.854918364978163e-06, |
|
"logits/chosen": -2.318713665008545, |
|
"logits/rejected": -2.3387556076049805, |
|
"logps/chosen": -92.5940933227539, |
|
"logps/rejected": -102.9334945678711, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.27377182245254517, |
|
"rewards/margins": 0.09678633511066437, |
|
"rewards/rejected": -0.3705581724643707, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 4.844168743352019e-06, |
|
"logits/chosen": -2.3034849166870117, |
|
"logits/rejected": -2.322415828704834, |
|
"logps/chosen": -93.90568542480469, |
|
"logps/rejected": -103.6268081665039, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.28542959690093994, |
|
"rewards/margins": 0.07337291538715363, |
|
"rewards/rejected": -0.35880252718925476, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.833047762924975e-06, |
|
"logits/chosen": -2.3396031856536865, |
|
"logits/rejected": -2.3490686416625977, |
|
"logps/chosen": -106.96073913574219, |
|
"logps/rejected": -120.58781433105469, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.35568124055862427, |
|
"rewards/margins": 0.10962893068790436, |
|
"rewards/rejected": -0.4653101861476898, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 4.8215571854618216e-06, |
|
"logits/chosen": -2.2915313243865967, |
|
"logits/rejected": -2.3102214336395264, |
|
"logps/chosen": -95.60445404052734, |
|
"logps/rejected": -107.88221740722656, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3615376055240631, |
|
"rewards/margins": 0.10991451889276505, |
|
"rewards/rejected": -0.47145208716392517, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9765625, |
|
"learning_rate": 4.809698831278217e-06, |
|
"logits/chosen": -2.359297513961792, |
|
"logits/rejected": -2.364837884902954, |
|
"logps/chosen": -97.43984985351562, |
|
"logps/rejected": -118.7339096069336, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3123398721218109, |
|
"rewards/margins": 0.1503853052854538, |
|
"rewards/rejected": -0.4627251625061035, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 4.797474578952315e-06, |
|
"logits/chosen": -2.364551544189453, |
|
"logits/rejected": -2.368478536605835, |
|
"logps/chosen": -97.71867370605469, |
|
"logps/rejected": -116.11451721191406, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3463926315307617, |
|
"rewards/margins": 0.13709910213947296, |
|
"rewards/rejected": -0.4834917485713959, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.7848863650271645e-06, |
|
"logits/chosen": -2.346735954284668, |
|
"logits/rejected": -2.349565029144287, |
|
"logps/chosen": -99.36726379394531, |
|
"logps/rejected": -108.9990463256836, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.31063222885131836, |
|
"rewards/margins": 0.08253936469554901, |
|
"rewards/rejected": -0.39317160844802856, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 4.771936183703927e-06, |
|
"logits/chosen": -2.2801272869110107, |
|
"logits/rejected": -2.286823034286499, |
|
"logps/chosen": -90.63265228271484, |
|
"logps/rejected": -99.37889099121094, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.30601394176483154, |
|
"rewards/margins": 0.0690702348947525, |
|
"rewards/rejected": -0.37508416175842285, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 4.758626086525956e-06, |
|
"logits/chosen": -2.3465566635131836, |
|
"logits/rejected": -2.3557307720184326, |
|
"logps/chosen": -91.6519546508789, |
|
"logps/rejected": -107.96331787109375, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.32235291600227356, |
|
"rewards/margins": 0.11441938579082489, |
|
"rewards/rejected": -0.43677228689193726, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 4.7449581820538e-06, |
|
"logits/chosen": -2.3313632011413574, |
|
"logits/rejected": -2.3418033123016357, |
|
"logps/chosen": -95.18330383300781, |
|
"logps/rejected": -111.15785217285156, |
|
"loss": 0.6478, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.32686877250671387, |
|
"rewards/margins": 0.12550675868988037, |
|
"rewards/rejected": -0.45237550139427185, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.46875, |
|
"learning_rate": 4.730934635531161e-06, |
|
"logits/chosen": -2.3043503761291504, |
|
"logits/rejected": -2.310375690460205, |
|
"logps/chosen": -97.12528228759766, |
|
"logps/rejected": -108.67928314208984, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.34178540110588074, |
|
"rewards/margins": 0.11280516535043716, |
|
"rewards/rejected": -0.4545906186103821, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.7578125, |
|
"learning_rate": 4.716557668541893e-06, |
|
"logits/chosen": -2.343346118927002, |
|
"logits/rejected": -2.3510937690734863, |
|
"logps/chosen": -97.1328125, |
|
"logps/rejected": -114.3864974975586, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.31538838148117065, |
|
"rewards/margins": 0.13619980216026306, |
|
"rewards/rejected": -0.4515882134437561, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 4.701829558658047e-06, |
|
"logits/chosen": -2.3206913471221924, |
|
"logits/rejected": -2.3359267711639404, |
|
"logps/chosen": -102.4328384399414, |
|
"logps/rejected": -114.6532211303711, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.343585729598999, |
|
"rewards/margins": 0.11280594021081924, |
|
"rewards/rejected": -0.4563916325569153, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.686752639079076e-06, |
|
"logits/chosen": -2.28320050239563, |
|
"logits/rejected": -2.2843213081359863, |
|
"logps/chosen": -101.29241180419922, |
|
"logps/rejected": -113.52595520019531, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.33359086513519287, |
|
"rewards/margins": 0.11714836210012436, |
|
"rewards/rejected": -0.45073920488357544, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.7734375, |
|
"learning_rate": 4.671329298262208e-06, |
|
"logits/chosen": -2.351982593536377, |
|
"logits/rejected": -2.357144832611084, |
|
"logps/chosen": -102.9426498413086, |
|
"logps/rejected": -118.25224304199219, |
|
"loss": 0.6478, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3722483217716217, |
|
"rewards/margins": 0.11313033103942871, |
|
"rewards/rejected": -0.4853786528110504, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 4.655561979544069e-06, |
|
"logits/chosen": -2.2974140644073486, |
|
"logits/rejected": -2.307819366455078, |
|
"logps/chosen": -101.06309509277344, |
|
"logps/rejected": -117.390625, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3607991635799408, |
|
"rewards/margins": 0.1265007108449936, |
|
"rewards/rejected": -0.4872998595237732, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 4.639453180753619e-06, |
|
"logits/chosen": -2.248704433441162, |
|
"logits/rejected": -2.257744789123535, |
|
"logps/chosen": -100.77429962158203, |
|
"logps/rejected": -117.0492935180664, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.43067851662635803, |
|
"rewards/margins": 0.1362551599740982, |
|
"rewards/rejected": -0.5669336318969727, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 4.623005453816447e-06, |
|
"logits/chosen": -2.3472437858581543, |
|
"logits/rejected": -2.352238655090332, |
|
"logps/chosen": -115.71247863769531, |
|
"logps/rejected": -131.8263702392578, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.47270336747169495, |
|
"rewards/margins": 0.12508396804332733, |
|
"rewards/rejected": -0.5977872610092163, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 4.606221404350504e-06, |
|
"logits/chosen": -2.28971529006958, |
|
"logits/rejected": -2.29419207572937, |
|
"logps/chosen": -109.21917724609375, |
|
"logps/rejected": -124.91645812988281, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.4223889708518982, |
|
"rewards/margins": 0.13507941365242004, |
|
"rewards/rejected": -0.5574684143066406, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 4.589103691253317e-06, |
|
"logits/chosen": -2.250274658203125, |
|
"logits/rejected": -2.2717387676239014, |
|
"logps/chosen": -112.26399230957031, |
|
"logps/rejected": -119.15122985839844, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.47238197922706604, |
|
"rewards/margins": 0.08317569643259048, |
|
"rewards/rejected": -0.5555577278137207, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.921875, |
|
"learning_rate": 4.571655026280785e-06, |
|
"logits/chosen": -2.2718663215637207, |
|
"logits/rejected": -2.284795045852661, |
|
"logps/chosen": -112.97920227050781, |
|
"logps/rejected": -127.8006591796875, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.4664887487888336, |
|
"rewards/margins": 0.13087505102157593, |
|
"rewards/rejected": -0.5973638296127319, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.234375, |
|
"learning_rate": 4.553878173617576e-06, |
|
"logits/chosen": -2.28155517578125, |
|
"logits/rejected": -2.289883613586426, |
|
"logps/chosen": -99.67669677734375, |
|
"logps/rejected": -116.39395904541016, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3680870532989502, |
|
"rewards/margins": 0.13191227614879608, |
|
"rewards/rejected": -0.49999934434890747, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 4.5357759494392354e-06, |
|
"logits/chosen": -2.2865400314331055, |
|
"logits/rejected": -2.301579475402832, |
|
"logps/chosen": -103.4640884399414, |
|
"logps/rejected": -120.11143493652344, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.42991527915000916, |
|
"rewards/margins": 0.13045726716518402, |
|
"rewards/rejected": -0.560372531414032, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 4.5173512214660495e-06, |
|
"logits/chosen": -2.290435314178467, |
|
"logits/rejected": -2.3016152381896973, |
|
"logps/chosen": -104.1209716796875, |
|
"logps/rejected": -120.07255554199219, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3812081217765808, |
|
"rewards/margins": 0.13323888182640076, |
|
"rewards/rejected": -0.5144469738006592, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.5546875, |
|
"learning_rate": 4.498606908508754e-06, |
|
"logits/chosen": -2.281541109085083, |
|
"logits/rejected": -2.2845287322998047, |
|
"logps/chosen": -108.74382019042969, |
|
"logps/rejected": -127.21275329589844, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.4051045775413513, |
|
"rewards/margins": 0.14055274426937103, |
|
"rewards/rejected": -0.5456573367118835, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 4.47954598000613e-06, |
|
"logits/chosen": -2.3543689250946045, |
|
"logits/rejected": -2.363257646560669, |
|
"logps/chosen": -96.17681884765625, |
|
"logps/rejected": -110.3593521118164, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.319545716047287, |
|
"rewards/margins": 0.120999276638031, |
|
"rewards/rejected": -0.440544992685318, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.460171455554603e-06, |
|
"logits/chosen": -2.2809572219848633, |
|
"logits/rejected": -2.2786245346069336, |
|
"logps/chosen": -99.40967559814453, |
|
"logps/rejected": -117.13094329833984, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.37397512793540955, |
|
"rewards/margins": 0.14184913039207458, |
|
"rewards/rejected": -0.5158242583274841, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.890625, |
|
"learning_rate": 4.4404864044298755e-06, |
|
"logits/chosen": -2.23799467086792, |
|
"logits/rejected": -2.245177745819092, |
|
"logps/chosen": -108.29356384277344, |
|
"logps/rejected": -121.1603012084961, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.43523016571998596, |
|
"rewards/margins": 0.12470052391290665, |
|
"rewards/rejected": -0.559930682182312, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.420493945100702e-06, |
|
"logits/chosen": -2.266139507293701, |
|
"logits/rejected": -2.2764334678649902, |
|
"logps/chosen": -99.6155014038086, |
|
"logps/rejected": -117.4134521484375, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.40347641706466675, |
|
"rewards/margins": 0.1434660702943802, |
|
"rewards/rejected": -0.5469424724578857, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.3203125, |
|
"learning_rate": 4.400197244734866e-06, |
|
"logits/chosen": -2.3086845874786377, |
|
"logits/rejected": -2.3136982917785645, |
|
"logps/chosen": -105.7784652709961, |
|
"logps/rejected": -123.13346099853516, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.41168150305747986, |
|
"rewards/margins": 0.1643691062927246, |
|
"rewards/rejected": -0.5760505795478821, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 4.379599518697444e-06, |
|
"logits/chosen": -2.302346706390381, |
|
"logits/rejected": -2.305290699005127, |
|
"logps/chosen": -110.01615905761719, |
|
"logps/rejected": -132.0598602294922, |
|
"loss": 0.6283, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.45303821563720703, |
|
"rewards/margins": 0.17430761456489563, |
|
"rewards/rejected": -0.627345860004425, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 4.3587040300414325e-06, |
|
"logits/chosen": -2.249532461166382, |
|
"logits/rejected": -2.2589190006256104, |
|
"logps/chosen": -117.6961441040039, |
|
"logps/rejected": -128.64663696289062, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.40040212869644165, |
|
"rewards/margins": 0.11040042340755463, |
|
"rewards/rejected": -0.5108025670051575, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.296875, |
|
"learning_rate": 4.337514088990822e-06, |
|
"logits/chosen": -2.278533458709717, |
|
"logits/rejected": -2.281517267227173, |
|
"logps/chosen": -103.110595703125, |
|
"logps/rejected": -122.44902038574219, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3665826916694641, |
|
"rewards/margins": 0.16001132130622864, |
|
"rewards/rejected": -0.5265940427780151, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 4.316033052416196e-06, |
|
"logits/chosen": -2.2408275604248047, |
|
"logits/rejected": -2.2425954341888428, |
|
"logps/chosen": -104.7763442993164, |
|
"logps/rejected": -116.91007232666016, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.4133949875831604, |
|
"rewards/margins": 0.10547590255737305, |
|
"rewards/rejected": -0.5188708901405334, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 4.294264323302946e-06, |
|
"logits/chosen": -2.3082475662231445, |
|
"logits/rejected": -2.3192391395568848, |
|
"logps/chosen": -103.19981384277344, |
|
"logps/rejected": -117.45137023925781, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.413215696811676, |
|
"rewards/margins": 0.1274113655090332, |
|
"rewards/rejected": -0.5406270027160645, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 4.272211350212171e-06, |
|
"logits/chosen": -2.3206677436828613, |
|
"logits/rejected": -2.3214950561523438, |
|
"logps/chosen": -110.54658508300781, |
|
"logps/rejected": -124.14703369140625, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.38299205899238586, |
|
"rewards/margins": 0.10237312316894531, |
|
"rewards/rejected": -0.48536521196365356, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.7578125, |
|
"learning_rate": 4.249877626734366e-06, |
|
"logits/chosen": -2.2740793228149414, |
|
"logits/rejected": -2.2952816486358643, |
|
"logps/chosen": -108.4576644897461, |
|
"logps/rejected": -121.01176452636719, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.38486912846565247, |
|
"rewards/margins": 0.1163020133972168, |
|
"rewards/rejected": -0.5011711120605469, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 4.2272666909359784e-06, |
|
"logits/chosen": -2.2910335063934326, |
|
"logits/rejected": -2.295705795288086, |
|
"logps/chosen": -102.13375091552734, |
|
"logps/rejected": -124.9188003540039, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.4369734823703766, |
|
"rewards/margins": 0.20262956619262695, |
|
"rewards/rejected": -0.6396030783653259, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 4.2043821247989036e-06, |
|
"logits/chosen": -2.278778553009033, |
|
"logits/rejected": -2.2924065589904785, |
|
"logps/chosen": -103.04777526855469, |
|
"logps/rejected": -120.569091796875, |
|
"loss": 0.6403, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.38494181632995605, |
|
"rewards/margins": 0.1459546983242035, |
|
"rewards/rejected": -0.5308965444564819, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 4.181227553653045e-06, |
|
"logits/chosen": -2.278262138366699, |
|
"logits/rejected": -2.3009345531463623, |
|
"logps/chosen": -121.49980163574219, |
|
"logps/rejected": -137.34304809570312, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4821853041648865, |
|
"rewards/margins": 0.14446020126342773, |
|
"rewards/rejected": -0.6266454458236694, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 4.1578066456019885e-06, |
|
"logits/chosen": -2.2163925170898438, |
|
"logits/rejected": -2.204552412033081, |
|
"logps/chosen": -114.92356872558594, |
|
"logps/rejected": -136.88050842285156, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5124049186706543, |
|
"rewards/margins": 0.1587189882993698, |
|
"rewards/rejected": -0.6711238622665405, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.734375, |
|
"learning_rate": 4.1341231109419135e-06, |
|
"logits/chosen": -2.203275442123413, |
|
"logits/rejected": -2.2119054794311523, |
|
"logps/chosen": -123.16712951660156, |
|
"logps/rejected": -137.21139526367188, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.5569948554039001, |
|
"rewards/margins": 0.1151203066110611, |
|
"rewards/rejected": -0.6721151471138, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 4.110180701573809e-06, |
|
"logits/chosen": -2.200212001800537, |
|
"logits/rejected": -2.198477268218994, |
|
"logps/chosen": -109.5115966796875, |
|
"logps/rejected": -132.9260711669922, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4715866148471832, |
|
"rewards/margins": 0.19370624423027039, |
|
"rewards/rejected": -0.6652928590774536, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 4.085983210409114e-06, |
|
"logits/chosen": -2.227853775024414, |
|
"logits/rejected": -2.2186429500579834, |
|
"logps/chosen": -118.89933013916016, |
|
"logps/rejected": -136.7680206298828, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.49003705382347107, |
|
"rewards/margins": 0.12437830865383148, |
|
"rewards/rejected": -0.6144154071807861, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 4.061534470768841e-06, |
|
"logits/chosen": -2.2407491207122803, |
|
"logits/rejected": -2.2455482482910156, |
|
"logps/chosen": -111.09078216552734, |
|
"logps/rejected": -124.2870101928711, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.42453131079673767, |
|
"rewards/margins": 0.11639855057001114, |
|
"rewards/rejected": -0.540929913520813, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 4.036838355776313e-06, |
|
"logits/chosen": -2.1629438400268555, |
|
"logits/rejected": -2.169175386428833, |
|
"logps/chosen": -115.92777252197266, |
|
"logps/rejected": -131.81134033203125, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.5123138427734375, |
|
"rewards/margins": 0.13096138834953308, |
|
"rewards/rejected": -0.6432752013206482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 4.011898777743594e-06, |
|
"logits/chosen": -2.211540699005127, |
|
"logits/rejected": -2.2166659832000732, |
|
"logps/chosen": -101.62760162353516, |
|
"logps/rejected": -119.66593933105469, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4295649528503418, |
|
"rewards/margins": 0.15017978847026825, |
|
"rewards/rejected": -0.5797447562217712, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 3.9867196875517025e-06, |
|
"logits/chosen": -2.20629620552063, |
|
"logits/rejected": -2.2112419605255127, |
|
"logps/chosen": -107.81523132324219, |
|
"logps/rejected": -119.03303527832031, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.4413573145866394, |
|
"rewards/margins": 0.09499961137771606, |
|
"rewards/rejected": -0.5363569855690002, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 3.961305074024722e-06, |
|
"logits/chosen": -2.125932216644287, |
|
"logits/rejected": -2.130676746368408, |
|
"logps/chosen": -112.90400695800781, |
|
"logps/rejected": -138.2743377685547, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5330706834793091, |
|
"rewards/margins": 0.2042117565870285, |
|
"rewards/rejected": -0.737282395362854, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 3.935658963297902e-06, |
|
"logits/chosen": -2.212306261062622, |
|
"logits/rejected": -2.2203996181488037, |
|
"logps/chosen": -109.0052261352539, |
|
"logps/rejected": -125.78495788574219, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.44839105010032654, |
|
"rewards/margins": 0.14669093489646912, |
|
"rewards/rejected": -0.5950819849967957, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 3.90978541817984e-06, |
|
"logits/chosen": -2.1384072303771973, |
|
"logits/rejected": -2.143054485321045, |
|
"logps/chosen": -108.29044342041016, |
|
"logps/rejected": -127.79345703125, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5090142488479614, |
|
"rewards/margins": 0.16035351157188416, |
|
"rewards/rejected": -0.669367790222168, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 3.8836885375088635e-06, |
|
"logits/chosen": -2.131621837615967, |
|
"logits/rejected": -2.1531243324279785, |
|
"logps/chosen": -115.8088150024414, |
|
"logps/rejected": -133.919921875, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5001341104507446, |
|
"rewards/margins": 0.1682174801826477, |
|
"rewards/rejected": -0.6683515310287476, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 3.857372455503698e-06, |
|
"logits/chosen": -2.1725549697875977, |
|
"logits/rejected": -2.1732017993927, |
|
"logps/chosen": -117.09075927734375, |
|
"logps/rejected": -135.18533325195312, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.43959444761276245, |
|
"rewards/margins": 0.13271991908550262, |
|
"rewards/rejected": -0.5723143815994263, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 3.830841341108528e-06, |
|
"logits/chosen": -2.212951421737671, |
|
"logits/rejected": -2.2198729515075684, |
|
"logps/chosen": -111.72041320800781, |
|
"logps/rejected": -132.552001953125, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.45366325974464417, |
|
"rewards/margins": 0.17270301282405853, |
|
"rewards/rejected": -0.6263662576675415, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 3.804099397332572e-06, |
|
"logits/chosen": -2.215224027633667, |
|
"logits/rejected": -2.210907459259033, |
|
"logps/chosen": -112.65693664550781, |
|
"logps/rejected": -135.70443725585938, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.41242438554763794, |
|
"rewards/margins": 0.18879784643650055, |
|
"rewards/rejected": -0.6012222766876221, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.9375, |
|
"learning_rate": 3.7771508605842372e-06, |
|
"logits/chosen": -2.112990140914917, |
|
"logits/rejected": -2.1238150596618652, |
|
"logps/chosen": -116.11753845214844, |
|
"logps/rejected": -138.2332000732422, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5049333572387695, |
|
"rewards/margins": 0.20600661635398865, |
|
"rewards/rejected": -0.7109400033950806, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -2.1379756927490234, |
|
"logits/rejected": -2.13820219039917, |
|
"logps/chosen": -114.88435363769531, |
|
"logps/rejected": -133.52407836914062, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4984721541404724, |
|
"rewards/margins": 0.16139784455299377, |
|
"rewards/rejected": -0.6598700881004333, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 3.7226511167681014e-06, |
|
"logits/chosen": -2.135016918182373, |
|
"logits/rejected": -2.126314163208008, |
|
"logps/chosen": -111.63895416259766, |
|
"logps/rejected": -126.47679138183594, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.48495396971702576, |
|
"rewards/margins": 0.13388456404209137, |
|
"rewards/rejected": -0.6188385486602783, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.8203125, |
|
"learning_rate": 3.6951085434471544e-06, |
|
"logits/chosen": -2.1722989082336426, |
|
"logits/rejected": -2.166605234146118, |
|
"logps/chosen": -105.00669860839844, |
|
"logps/rejected": -117.9762191772461, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.44906917214393616, |
|
"rewards/margins": 0.1117476224899292, |
|
"rewards/rejected": -0.5608168840408325, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 3.6673766432797948e-06, |
|
"logits/chosen": -2.1750612258911133, |
|
"logits/rejected": -2.1879947185516357, |
|
"logps/chosen": -123.87275695800781, |
|
"logps/rejected": -144.77828979492188, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.584213137626648, |
|
"rewards/margins": 0.1833323985338211, |
|
"rewards/rejected": -0.7675455808639526, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.671875, |
|
"learning_rate": 3.6394598095014577e-06, |
|
"logits/chosen": -2.210446834564209, |
|
"logits/rejected": -2.213280200958252, |
|
"logps/chosen": -107.16650390625, |
|
"logps/rejected": -124.11837005615234, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.40074795484542847, |
|
"rewards/margins": 0.14601869881153107, |
|
"rewards/rejected": -0.5467666387557983, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.25, |
|
"learning_rate": 3.611362464644415e-06, |
|
"logits/chosen": -2.128871202468872, |
|
"logits/rejected": -2.1372876167297363, |
|
"logps/chosen": -116.8992919921875, |
|
"logps/rejected": -126.83099365234375, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.47680729627609253, |
|
"rewards/margins": 0.09380488097667694, |
|
"rewards/rejected": -0.5706123113632202, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.734375, |
|
"learning_rate": 3.5830890598371636e-06, |
|
"logits/chosen": -2.23905611038208, |
|
"logits/rejected": -2.252377510070801, |
|
"logps/chosen": -107.7120361328125, |
|
"logps/rejected": -124.34709167480469, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4397021234035492, |
|
"rewards/margins": 0.1647356003522873, |
|
"rewards/rejected": -0.6044376492500305, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.859375, |
|
"learning_rate": 3.5546440740992856e-06, |
|
"logits/chosen": -2.1930408477783203, |
|
"logits/rejected": -2.2014918327331543, |
|
"logps/chosen": -117.5343017578125, |
|
"logps/rejected": -131.0255889892578, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5026694536209106, |
|
"rewards/margins": 0.11716220527887344, |
|
"rewards/rejected": -0.6198316812515259, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.765625, |
|
"learning_rate": 3.5260320136318927e-06, |
|
"logits/chosen": -2.1664159297943115, |
|
"logits/rejected": -2.176593542098999, |
|
"logps/chosen": -120.20884704589844, |
|
"logps/rejected": -136.73406982421875, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.522860586643219, |
|
"rewards/margins": 0.15705768764019012, |
|
"rewards/rejected": -0.6799181699752808, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 3.4972574111037587e-06, |
|
"logits/chosen": -2.1755106449127197, |
|
"logits/rejected": -2.1772923469543457, |
|
"logps/chosen": -115.28816223144531, |
|
"logps/rejected": -133.49465942382812, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44179558753967285, |
|
"rewards/margins": 0.16654905676841736, |
|
"rewards/rejected": -0.6083446741104126, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 3.468324824933267e-06, |
|
"logits/chosen": -2.151540756225586, |
|
"logits/rejected": -2.1717865467071533, |
|
"logps/chosen": -115.64791107177734, |
|
"logps/rejected": -132.0801239013672, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.548166036605835, |
|
"rewards/margins": 0.13710884749889374, |
|
"rewards/rejected": -0.6852747797966003, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.0390625, |
|
"learning_rate": 3.4392388385662713e-06, |
|
"logits/chosen": -2.1935017108917236, |
|
"logits/rejected": -2.195500612258911, |
|
"logps/chosen": -107.0605697631836, |
|
"logps/rejected": -129.38616943359375, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.43120819330215454, |
|
"rewards/margins": 0.18481549620628357, |
|
"rewards/rejected": -0.6160237193107605, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.671875, |
|
"learning_rate": 3.410004059749996e-06, |
|
"logits/chosen": -2.164797067642212, |
|
"logits/rejected": -2.172008514404297, |
|
"logps/chosen": -110.7061767578125, |
|
"logps/rejected": -132.82736206054688, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.4369031488895416, |
|
"rewards/margins": 0.19907937943935394, |
|
"rewards/rejected": -0.6359825134277344, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.6796875, |
|
"learning_rate": 3.3806251198030843e-06, |
|
"logits/chosen": -2.1183745861053467, |
|
"logits/rejected": -2.13506817817688, |
|
"logps/chosen": -103.0443344116211, |
|
"logps/rejected": -128.4714813232422, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4332190155982971, |
|
"rewards/margins": 0.22365593910217285, |
|
"rewards/rejected": -0.65687495470047, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 3.351106672881915e-06, |
|
"logits/chosen": -2.1771786212921143, |
|
"logits/rejected": -2.1897802352905273, |
|
"logps/chosen": -114.33122253417969, |
|
"logps/rejected": -135.31690979003906, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.44074517488479614, |
|
"rewards/margins": 0.17149756848812103, |
|
"rewards/rejected": -0.6122426986694336, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 3.3214533952433017e-06, |
|
"logits/chosen": -2.203437328338623, |
|
"logits/rejected": -2.194852113723755, |
|
"logps/chosen": -114.83846282958984, |
|
"logps/rejected": -132.64036560058594, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5022262334823608, |
|
"rewards/margins": 0.1002851277589798, |
|
"rewards/rejected": -0.6025113463401794, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 3.291669984503682e-06, |
|
"logits/chosen": -2.09834361076355, |
|
"logits/rejected": -2.1034817695617676, |
|
"logps/chosen": -119.2296371459961, |
|
"logps/rejected": -144.80642700195312, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5479999780654907, |
|
"rewards/margins": 0.22145429253578186, |
|
"rewards/rejected": -0.7694542407989502, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 3.261761158894937e-06, |
|
"logits/chosen": -2.072908878326416, |
|
"logits/rejected": -2.075850009918213, |
|
"logps/chosen": -121.2236099243164, |
|
"logps/rejected": -149.28993225097656, |
|
"loss": 0.6121, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5698887705802917, |
|
"rewards/margins": 0.23544566333293915, |
|
"rewards/rejected": -0.8053344488143921, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 3.231731656516936e-06, |
|
"logits/chosen": -2.1070938110351562, |
|
"logits/rejected": -2.1028828620910645, |
|
"logps/chosen": -110.7509765625, |
|
"logps/rejected": -132.27105712890625, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.46972590684890747, |
|
"rewards/margins": 0.17397567629814148, |
|
"rewards/rejected": -0.6437015533447266, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 3.2015862345869335e-06, |
|
"logits/chosen": -2.1732888221740723, |
|
"logits/rejected": -2.181213855743408, |
|
"logps/chosen": -111.25699615478516, |
|
"logps/rejected": -123.9486083984375, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.44834476709365845, |
|
"rewards/margins": 0.11553524434566498, |
|
"rewards/rejected": -0.5638800263404846, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7734375, |
|
"learning_rate": 3.171329668685942e-06, |
|
"logits/chosen": -2.0767674446105957, |
|
"logits/rejected": -2.070704936981201, |
|
"logps/chosen": -110.462890625, |
|
"logps/rejected": -133.4669647216797, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5272938013076782, |
|
"rewards/margins": 0.18891780078411102, |
|
"rewards/rejected": -0.7162116765975952, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 3.140966752002193e-06, |
|
"logits/chosen": -2.0980172157287598, |
|
"logits/rejected": -2.102271556854248, |
|
"logps/chosen": -103.88057708740234, |
|
"logps/rejected": -130.67318725585938, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.39303597807884216, |
|
"rewards/margins": 0.2400202453136444, |
|
"rewards/rejected": -0.6330562233924866, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 3.1105022945718076e-06, |
|
"logits/chosen": -2.0586235523223877, |
|
"logits/rejected": -2.080989360809326, |
|
"logps/chosen": -132.25013732910156, |
|
"logps/rejected": -150.72528076171875, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6205312013626099, |
|
"rewards/margins": 0.177308589220047, |
|
"rewards/rejected": -0.7978397607803345, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.78125, |
|
"learning_rate": 3.079941122516803e-06, |
|
"logits/chosen": -2.0391013622283936, |
|
"logits/rejected": -2.037480592727661, |
|
"logps/chosen": -114.75526428222656, |
|
"logps/rejected": -132.84378051757812, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5115147829055786, |
|
"rewards/margins": 0.1457727551460266, |
|
"rewards/rejected": -0.6572875380516052, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 3.0492880772805433e-06, |
|
"logits/chosen": -2.05072283744812, |
|
"logits/rejected": -2.057342767715454, |
|
"logps/chosen": -120.1377182006836, |
|
"logps/rejected": -134.4777374267578, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.4882054328918457, |
|
"rewards/margins": 0.1324242353439331, |
|
"rewards/rejected": -0.6206297278404236, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.5, |
|
"learning_rate": 3.018548014860769e-06, |
|
"logits/chosen": -2.007279872894287, |
|
"logits/rejected": -2.0130622386932373, |
|
"logps/chosen": -120.3442153930664, |
|
"logps/rejected": -143.51101684570312, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5948348045349121, |
|
"rewards/margins": 0.20342986285686493, |
|
"rewards/rejected": -0.7982646822929382, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 2.9877258050403214e-06, |
|
"logits/chosen": -2.031801223754883, |
|
"logits/rejected": -2.023563861846924, |
|
"logps/chosen": -121.27830505371094, |
|
"logps/rejected": -140.32757568359375, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6015397310256958, |
|
"rewards/margins": 0.1492132693529129, |
|
"rewards/rejected": -0.7507530450820923, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.7578125, |
|
"learning_rate": 2.9568263306156754e-06, |
|
"logits/chosen": -2.0874016284942627, |
|
"logits/rejected": -2.0975565910339355, |
|
"logps/chosen": -107.88935852050781, |
|
"logps/rejected": -121.3756103515625, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.4233587682247162, |
|
"rewards/margins": 0.1184331625699997, |
|
"rewards/rejected": -0.5417919754981995, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.0, |
|
"learning_rate": 2.9258544866234206e-06, |
|
"logits/chosen": -2.070168972015381, |
|
"logits/rejected": -2.0742554664611816, |
|
"logps/chosen": -112.909423828125, |
|
"logps/rejected": -130.5807342529297, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.49084582924842834, |
|
"rewards/margins": 0.15788979828357697, |
|
"rewards/rejected": -0.6487356424331665, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 2.8948151795647994e-06, |
|
"logits/chosen": -1.9922540187835693, |
|
"logits/rejected": -2.0036025047302246, |
|
"logps/chosen": -108.33551025390625, |
|
"logps/rejected": -134.94259643554688, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.44488996267318726, |
|
"rewards/margins": 0.2470279037952423, |
|
"rewards/rejected": -0.6919177770614624, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 2.863713326628422e-06, |
|
"logits/chosen": -1.9860804080963135, |
|
"logits/rejected": -1.98525071144104, |
|
"logps/chosen": -111.2559585571289, |
|
"logps/rejected": -138.44357299804688, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4986530840396881, |
|
"rewards/margins": 0.2310309112071991, |
|
"rewards/rejected": -0.7296839952468872, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.828125, |
|
"learning_rate": 2.8325538549113006e-06, |
|
"logits/chosen": -2.030186891555786, |
|
"logits/rejected": -2.0408942699432373, |
|
"logps/chosen": -113.96142578125, |
|
"logps/rejected": -141.45428466796875, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.46484389901161194, |
|
"rewards/margins": 0.2421310842037201, |
|
"rewards/rejected": -0.706974983215332, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 2.8013417006383078e-06, |
|
"logits/chosen": -1.9514284133911133, |
|
"logits/rejected": -1.963894248008728, |
|
"logps/chosen": -110.47412109375, |
|
"logps/rejected": -128.65701293945312, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5639623999595642, |
|
"rewards/margins": 0.1714598834514618, |
|
"rewards/rejected": -0.7354224324226379, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 2.770081808380186e-06, |
|
"logits/chosen": -2.04837703704834, |
|
"logits/rejected": -2.05118989944458, |
|
"logps/chosen": -125.71846771240234, |
|
"logps/rejected": -144.2294158935547, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.48360365629196167, |
|
"rewards/margins": 0.19055330753326416, |
|
"rewards/rejected": -0.6741569638252258, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 2.328125, |
|
"learning_rate": 2.7387791302702398e-06, |
|
"logits/chosen": -1.980463981628418, |
|
"logits/rejected": -1.980164885520935, |
|
"logps/chosen": -123.45314025878906, |
|
"logps/rejected": -152.37655639648438, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6476872563362122, |
|
"rewards/margins": 0.214513897895813, |
|
"rewards/rejected": -0.8622010946273804, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 2.707438625219827e-06, |
|
"logits/chosen": -1.942488670349121, |
|
"logits/rejected": -1.9465347528457642, |
|
"logps/chosen": -128.45228576660156, |
|
"logps/rejected": -162.00949096679688, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6348342895507812, |
|
"rewards/margins": 0.292163610458374, |
|
"rewards/rejected": -0.9269979596138, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 2.67606525813278e-06, |
|
"logits/chosen": -1.9286657571792603, |
|
"logits/rejected": -1.9460529088974, |
|
"logps/chosen": -115.72102355957031, |
|
"logps/rejected": -142.26600646972656, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5551806092262268, |
|
"rewards/margins": 0.2322642058134079, |
|
"rewards/rejected": -0.7874448299407959, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.6796875, |
|
"learning_rate": 2.6446639991188716e-06, |
|
"logits/chosen": -1.973655343055725, |
|
"logits/rejected": -1.9923969507217407, |
|
"logps/chosen": -116.53816223144531, |
|
"logps/rejected": -137.4978790283203, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5160804986953735, |
|
"rewards/margins": 0.17561517655849457, |
|
"rewards/rejected": -0.6916956305503845, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 2.6132398227064615e-06, |
|
"logits/chosen": -2.0569424629211426, |
|
"logits/rejected": -2.061692237854004, |
|
"logps/chosen": -129.03684997558594, |
|
"logps/rejected": -151.86997985839844, |
|
"loss": 0.6229, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5522770285606384, |
|
"rewards/margins": 0.2137361317873001, |
|
"rewards/rejected": -0.7660132050514221, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 2.5817977070544408e-06, |
|
"logits/chosen": -1.9222244024276733, |
|
"logits/rejected": -1.928789496421814, |
|
"logps/chosen": -122.2890396118164, |
|
"logps/rejected": -146.0366668701172, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6112322807312012, |
|
"rewards/margins": 0.2106863260269165, |
|
"rewards/rejected": -0.8219184875488281, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 2.0, |
|
"learning_rate": 2.550342633163601e-06, |
|
"logits/chosen": -1.994757890701294, |
|
"logits/rejected": -1.998810052871704, |
|
"logps/chosen": -119.19169616699219, |
|
"logps/rejected": -146.54074096679688, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.596774697303772, |
|
"rewards/margins": 0.23899777233600616, |
|
"rewards/rejected": -0.8357726335525513, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.9609375, |
|
"learning_rate": 2.5188795840875546e-06, |
|
"logits/chosen": -1.98430597782135, |
|
"logits/rejected": -1.989297866821289, |
|
"logps/chosen": -124.1072769165039, |
|
"logps/rejected": -133.15005493164062, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5129493474960327, |
|
"rewards/margins": 0.10699422657489777, |
|
"rewards/rejected": -0.6199434995651245, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 2.487413544143325e-06, |
|
"logits/chosen": -2.003361701965332, |
|
"logits/rejected": -1.9991636276245117, |
|
"logps/chosen": -120.54267883300781, |
|
"logps/rejected": -145.92556762695312, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5307844281196594, |
|
"rewards/margins": 0.21387752890586853, |
|
"rewards/rejected": -0.7446619868278503, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 2.4559494981217464e-06, |
|
"logits/chosen": -2.009737968444824, |
|
"logits/rejected": -2.0052223205566406, |
|
"logps/chosen": -115.0971450805664, |
|
"logps/rejected": -140.93968200683594, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.513275146484375, |
|
"rewards/margins": 0.22737202048301697, |
|
"rewards/rejected": -0.7406471967697144, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 2.4244924304977785e-06, |
|
"logits/chosen": -1.9526363611221313, |
|
"logits/rejected": -1.9619600772857666, |
|
"logps/chosen": -117.15467834472656, |
|
"logps/rejected": -141.62472534179688, |
|
"loss": 0.6155, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.529160737991333, |
|
"rewards/margins": 0.21543464064598083, |
|
"rewards/rejected": -0.7445953488349915, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 2.3930473246408752e-06, |
|
"logits/chosen": -2.0411906242370605, |
|
"logits/rejected": -2.056326389312744, |
|
"logps/chosen": -129.92892456054688, |
|
"logps/rejected": -157.43417358398438, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.612835705280304, |
|
"rewards/margins": 0.2452736347913742, |
|
"rewards/rejected": -0.8581092953681946, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 2.3616191620255307e-06, |
|
"logits/chosen": -2.016146421432495, |
|
"logits/rejected": -2.031141996383667, |
|
"logps/chosen": -125.10477447509766, |
|
"logps/rejected": -144.62144470214844, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5862436890602112, |
|
"rewards/margins": 0.16933271288871765, |
|
"rewards/rejected": -0.7555764317512512, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.84375, |
|
"learning_rate": 2.3302129214421244e-06, |
|
"logits/chosen": -1.9942152500152588, |
|
"logits/rejected": -1.9925035238265991, |
|
"logps/chosen": -126.97408294677734, |
|
"logps/rejected": -157.83267211914062, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5849136114120483, |
|
"rewards/margins": 0.2714024782180786, |
|
"rewards/rejected": -0.8563162088394165, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 2.2988335782081854e-06, |
|
"logits/chosen": -1.9507849216461182, |
|
"logits/rejected": -1.9640982151031494, |
|
"logps/chosen": -114.99700927734375, |
|
"logps/rejected": -141.39306640625, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5272036194801331, |
|
"rewards/margins": 0.21672149002552032, |
|
"rewards/rejected": -0.7439250349998474, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 2.2674861033802182e-06, |
|
"logits/chosen": -1.9975817203521729, |
|
"logits/rejected": -2.006187915802002, |
|
"logps/chosen": -121.58160400390625, |
|
"logps/rejected": -147.51416015625, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5389881134033203, |
|
"rewards/margins": 0.24041156470775604, |
|
"rewards/rejected": -0.7793997526168823, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 2.236175462966192e-06, |
|
"logits/chosen": -1.9745140075683594, |
|
"logits/rejected": -1.990915060043335, |
|
"logps/chosen": -119.48726654052734, |
|
"logps/rejected": -139.58609008789062, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5391074419021606, |
|
"rewards/margins": 0.17995604872703552, |
|
"rewards/rejected": -0.7190635204315186, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 2.204906617138839e-06, |
|
"logits/chosen": -2.052870750427246, |
|
"logits/rejected": -2.0588982105255127, |
|
"logps/chosen": -115.16014099121094, |
|
"logps/rejected": -138.7529754638672, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.46134477853775024, |
|
"rewards/margins": 0.18981757760047913, |
|
"rewards/rejected": -0.651162326335907, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 2.173684519449872e-06, |
|
"logits/chosen": -2.017367124557495, |
|
"logits/rejected": -2.0284934043884277, |
|
"logps/chosen": -118.7997055053711, |
|
"logps/rejected": -136.1349639892578, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.49513015151023865, |
|
"rewards/margins": 0.1775195300579071, |
|
"rewards/rejected": -0.672649621963501, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 2.1425141160452495e-06, |
|
"logits/chosen": -1.9408687353134155, |
|
"logits/rejected": -1.9594764709472656, |
|
"logps/chosen": -116.89726257324219, |
|
"logps/rejected": -135.9090118408203, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5176368951797485, |
|
"rewards/margins": 0.18570610880851746, |
|
"rewards/rejected": -0.7033429145812988, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 2.1114003448816205e-06, |
|
"logits/chosen": -1.9267289638519287, |
|
"logits/rejected": -1.930748701095581, |
|
"logps/chosen": -111.66670227050781, |
|
"logps/rejected": -129.04257202148438, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5672639608383179, |
|
"rewards/margins": 0.16399827599525452, |
|
"rewards/rejected": -0.7312622666358948, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 2.080348134944063e-06, |
|
"logits/chosen": -1.9702529907226562, |
|
"logits/rejected": -1.9817975759506226, |
|
"logps/chosen": -119.13056945800781, |
|
"logps/rejected": -137.2602081298828, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5436175465583801, |
|
"rewards/margins": 0.15731260180473328, |
|
"rewards/rejected": -0.7009302377700806, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.890625, |
|
"learning_rate": 2.049362405465236e-06, |
|
"logits/chosen": -2.0406806468963623, |
|
"logits/rejected": -2.043137550354004, |
|
"logps/chosen": -112.21296691894531, |
|
"logps/rejected": -136.54315185546875, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.4973304271697998, |
|
"rewards/margins": 0.20603354275226593, |
|
"rewards/rejected": -0.7033639550209045, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 2.0184480651460943e-06, |
|
"logits/chosen": -1.961282730102539, |
|
"logits/rejected": -1.9708878993988037, |
|
"logps/chosen": -121.48686218261719, |
|
"logps/rejected": -150.19656372070312, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5785536170005798, |
|
"rewards/margins": 0.25495901703834534, |
|
"rewards/rejected": -0.8335126638412476, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 1.9876100113782534e-06, |
|
"logits/chosen": -2.0227205753326416, |
|
"logits/rejected": -2.0364012718200684, |
|
"logps/chosen": -114.74979400634766, |
|
"logps/rejected": -138.68084716796875, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4629599153995514, |
|
"rewards/margins": 0.2168576419353485, |
|
"rewards/rejected": -0.6798175573348999, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.75, |
|
"learning_rate": 1.9568531294681585e-06, |
|
"logits/chosen": -1.9471362829208374, |
|
"logits/rejected": -1.9518378973007202, |
|
"logps/chosen": -122.4446792602539, |
|
"logps/rejected": -156.8361053466797, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5372573137283325, |
|
"rewards/margins": 0.3052898943424225, |
|
"rewards/rejected": -0.8425471186637878, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 1.926182291863162e-06, |
|
"logits/chosen": -1.8842859268188477, |
|
"logits/rejected": -1.8872559070587158, |
|
"logps/chosen": -115.28511810302734, |
|
"logps/rejected": -142.72998046875, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5601629018783569, |
|
"rewards/margins": 0.23351116478443146, |
|
"rewards/rejected": -0.7936740517616272, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 1.895602357379637e-06, |
|
"logits/chosen": -1.851300597190857, |
|
"logits/rejected": -1.8685184717178345, |
|
"logps/chosen": -120.60140228271484, |
|
"logps/rejected": -148.75662231445312, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5954613089561462, |
|
"rewards/margins": 0.26449450850486755, |
|
"rewards/rejected": -0.859955906867981, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 2.390625, |
|
"learning_rate": 1.8651181704332578e-06, |
|
"logits/chosen": -1.9334551095962524, |
|
"logits/rejected": -1.9329140186309814, |
|
"logps/chosen": -126.85723876953125, |
|
"logps/rejected": -153.77548217773438, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6201340556144714, |
|
"rewards/margins": 0.23935556411743164, |
|
"rewards/rejected": -0.8594895601272583, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.4296875, |
|
"learning_rate": 1.8347345602715543e-06, |
|
"logits/chosen": -1.9892994165420532, |
|
"logits/rejected": -2.0142014026641846, |
|
"logps/chosen": -119.41783142089844, |
|
"logps/rejected": -146.21707153320312, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5245205163955688, |
|
"rewards/margins": 0.25306034088134766, |
|
"rewards/rejected": -0.7775809168815613, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.8044563402088686e-06, |
|
"logits/chosen": -1.9546706676483154, |
|
"logits/rejected": -1.9724689722061157, |
|
"logps/chosen": -130.3236541748047, |
|
"logps/rejected": -160.91209411621094, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6136053800582886, |
|
"rewards/margins": 0.3152478337287903, |
|
"rewards/rejected": -0.9288532137870789, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 1.7742883068638447e-06, |
|
"logits/chosen": -2.0497043132781982, |
|
"logits/rejected": -2.048368453979492, |
|
"logps/chosen": -127.9777603149414, |
|
"logps/rejected": -154.79327392578125, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5853675603866577, |
|
"rewards/margins": 0.23666468262672424, |
|
"rewards/rejected": -0.8220322728157043, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 1.7442352393995516e-06, |
|
"logits/chosen": -1.9354140758514404, |
|
"logits/rejected": -1.9446359872817993, |
|
"logps/chosen": -124.5389175415039, |
|
"logps/rejected": -148.15814208984375, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6259867548942566, |
|
"rewards/margins": 0.19919133186340332, |
|
"rewards/rejected": -0.8251781463623047, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 1.7143018987663814e-06, |
|
"logits/chosen": -1.9998855590820312, |
|
"logits/rejected": -2.0096142292022705, |
|
"logps/chosen": -126.11322021484375, |
|
"logps/rejected": -145.43692016601562, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5246410369873047, |
|
"rewards/margins": 0.19330283999443054, |
|
"rewards/rejected": -0.7179439663887024, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 2.25, |
|
"learning_rate": 1.6844930269478274e-06, |
|
"logits/chosen": -1.9050662517547607, |
|
"logits/rejected": -1.9045469760894775, |
|
"logps/chosen": -123.74418640136719, |
|
"logps/rejected": -137.86448669433594, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5779796242713928, |
|
"rewards/margins": 0.1330142766237259, |
|
"rewards/rejected": -0.7109938859939575, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.828125, |
|
"learning_rate": 1.6548133462092647e-06, |
|
"logits/chosen": -1.9649972915649414, |
|
"logits/rejected": -1.9714637994766235, |
|
"logps/chosen": -129.48873901367188, |
|
"logps/rejected": -158.52069091796875, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6639618277549744, |
|
"rewards/margins": 0.22173753380775452, |
|
"rewards/rejected": -0.885699450969696, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.375, |
|
"learning_rate": 1.6252675583498644e-06, |
|
"logits/chosen": -1.9044713973999023, |
|
"logits/rejected": -1.9047183990478516, |
|
"logps/chosen": -114.80845642089844, |
|
"logps/rejected": -141.17929077148438, |
|
"loss": 0.606, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.506965696811676, |
|
"rewards/margins": 0.22917525470256805, |
|
"rewards/rejected": -0.7361409068107605, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 1.5958603439577381e-06, |
|
"logits/chosen": -1.883062720298767, |
|
"logits/rejected": -1.8782352209091187, |
|
"logps/chosen": -115.2265853881836, |
|
"logps/rejected": -145.7313690185547, |
|
"loss": 0.6037, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5807043313980103, |
|
"rewards/margins": 0.2627798616886139, |
|
"rewards/rejected": -0.843484103679657, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.9375, |
|
"learning_rate": 1.5665963616684477e-06, |
|
"logits/chosen": -1.8872991800308228, |
|
"logits/rejected": -1.9082088470458984, |
|
"logps/chosen": -118.02657318115234, |
|
"logps/rejected": -144.72972106933594, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5587154626846313, |
|
"rewards/margins": 0.24482004344463348, |
|
"rewards/rejected": -0.8035355806350708, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 1.5374802474269973e-06, |
|
"logits/chosen": -1.8889667987823486, |
|
"logits/rejected": -1.8945300579071045, |
|
"logps/chosen": -120.35169982910156, |
|
"logps/rejected": -144.44015502929688, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5581179857254028, |
|
"rewards/margins": 0.23101505637168884, |
|
"rewards/rejected": -0.7891330718994141, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.828125, |
|
"learning_rate": 1.5085166137534124e-06, |
|
"logits/chosen": -1.8958622217178345, |
|
"logits/rejected": -1.8905636072158813, |
|
"logps/chosen": -124.46125793457031, |
|
"logps/rejected": -151.10842895507812, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6295716762542725, |
|
"rewards/margins": 0.23376984894275665, |
|
"rewards/rejected": -0.8633416295051575, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 1.479710049012033e-06, |
|
"logits/chosen": -1.9351632595062256, |
|
"logits/rejected": -1.9478946924209595, |
|
"logps/chosen": -121.83003234863281, |
|
"logps/rejected": -154.7652587890625, |
|
"loss": 0.5865, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5323207974433899, |
|
"rewards/margins": 0.2836820185184479, |
|
"rewards/rejected": -0.8160028457641602, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 1.4510651166846369e-06, |
|
"logits/chosen": -1.8797328472137451, |
|
"logits/rejected": -1.9063024520874023, |
|
"logps/chosen": -112.57334899902344, |
|
"logps/rejected": -139.17276000976562, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5350430011749268, |
|
"rewards/margins": 0.23087510466575623, |
|
"rewards/rejected": -0.7659180760383606, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.8046875, |
|
"learning_rate": 1.4225863546474944e-06, |
|
"logits/chosen": -1.9153077602386475, |
|
"logits/rejected": -1.92630934715271, |
|
"logps/chosen": -117.9030532836914, |
|
"logps/rejected": -144.8286895751953, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5061390399932861, |
|
"rewards/margins": 0.2523689270019531, |
|
"rewards/rejected": -0.7585079669952393, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.625, |
|
"learning_rate": 1.3942782744524974e-06, |
|
"logits/chosen": -1.9394657611846924, |
|
"logits/rejected": -1.9521135091781616, |
|
"logps/chosen": -122.05293273925781, |
|
"logps/rejected": -145.07711791992188, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5658047199249268, |
|
"rewards/margins": 0.20031043887138367, |
|
"rewards/rejected": -0.7661150693893433, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 1.3661453606124353e-06, |
|
"logits/chosen": -1.8490660190582275, |
|
"logits/rejected": -1.849898099899292, |
|
"logps/chosen": -117.15348815917969, |
|
"logps/rejected": -144.00486755371094, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.5616058111190796, |
|
"rewards/margins": 0.2321668118238449, |
|
"rewards/rejected": -0.7937726378440857, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 1.3381920698905788e-06, |
|
"logits/chosen": -1.8940210342407227, |
|
"logits/rejected": -1.8968864679336548, |
|
"logps/chosen": -122.25953674316406, |
|
"logps/rejected": -151.90289306640625, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.587809145450592, |
|
"rewards/margins": 0.24534039199352264, |
|
"rewards/rejected": -0.8331495523452759, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 1.3104228305946385e-06, |
|
"logits/chosen": -1.8536640405654907, |
|
"logits/rejected": -1.8629567623138428, |
|
"logps/chosen": -108.5637435913086, |
|
"logps/rejected": -140.7650146484375, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5375715494155884, |
|
"rewards/margins": 0.26606285572052, |
|
"rewards/rejected": -0.8036344647407532, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.90625, |
|
"learning_rate": 1.2828420418752442e-06, |
|
"logits/chosen": -1.8929815292358398, |
|
"logits/rejected": -1.9167912006378174, |
|
"logps/chosen": -130.51766967773438, |
|
"logps/rejected": -146.90655517578125, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5917934775352478, |
|
"rewards/margins": 0.16113656759262085, |
|
"rewards/rejected": -0.7529300451278687, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.0, |
|
"learning_rate": 1.2554540730290437e-06, |
|
"logits/chosen": -1.8626874685287476, |
|
"logits/rejected": -1.8674083948135376, |
|
"logps/chosen": -122.74143981933594, |
|
"logps/rejected": -148.19137573242188, |
|
"loss": 0.6133, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6179603338241577, |
|
"rewards/margins": 0.22583599388599396, |
|
"rewards/rejected": -0.8437963724136353, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 1.2282632628065197e-06, |
|
"logits/chosen": -1.8630259037017822, |
|
"logits/rejected": -1.8686736822128296, |
|
"logps/chosen": -127.19625091552734, |
|
"logps/rejected": -152.7012939453125, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6396566033363342, |
|
"rewards/margins": 0.2267201840877533, |
|
"rewards/rejected": -0.8663768768310547, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 1.2012739187246575e-06, |
|
"logits/chosen": -1.9101310968399048, |
|
"logits/rejected": -1.9150078296661377, |
|
"logps/chosen": -125.06834411621094, |
|
"logps/rejected": -152.13970947265625, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6242455840110779, |
|
"rewards/margins": 0.23204731941223145, |
|
"rewards/rejected": -0.8562929034233093, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.7890625, |
|
"learning_rate": 1.1744903163845578e-06, |
|
"logits/chosen": -1.9141194820404053, |
|
"logits/rejected": -1.9090967178344727, |
|
"logps/chosen": -125.41385650634766, |
|
"logps/rejected": -153.21505737304688, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6701093912124634, |
|
"rewards/margins": 0.23631341755390167, |
|
"rewards/rejected": -0.9064227938652039, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 1.1479166987940981e-06, |
|
"logits/chosen": -1.9218595027923584, |
|
"logits/rejected": -1.9358152151107788, |
|
"logps/chosen": -121.7696533203125, |
|
"logps/rejected": -142.92947387695312, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6072500944137573, |
|
"rewards/margins": 0.1642817258834839, |
|
"rewards/rejected": -0.771531879901886, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.890625, |
|
"learning_rate": 1.121557275695771e-06, |
|
"logits/chosen": -1.8173805475234985, |
|
"logits/rejected": -1.8257992267608643, |
|
"logps/chosen": -123.26933288574219, |
|
"logps/rejected": -149.6290740966797, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6274382472038269, |
|
"rewards/margins": 0.2434215098619461, |
|
"rewards/rejected": -0.8708597421646118, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 1.0954162228997778e-06, |
|
"logits/chosen": -1.944850206375122, |
|
"logits/rejected": -1.9471490383148193, |
|
"logps/chosen": -121.35750579833984, |
|
"logps/rejected": -149.9652557373047, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6109983325004578, |
|
"rewards/margins": 0.23012125492095947, |
|
"rewards/rejected": -0.8411195874214172, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.8359375, |
|
"learning_rate": 1.0694976816225072e-06, |
|
"logits/chosen": -1.931652307510376, |
|
"logits/rejected": -1.9369986057281494, |
|
"logps/chosen": -121.70970153808594, |
|
"logps/rejected": -147.0286102294922, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6017236709594727, |
|
"rewards/margins": 0.22414302825927734, |
|
"rewards/rejected": -0.82586669921875, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 1.043805757830495e-06, |
|
"logits/chosen": -1.888380765914917, |
|
"logits/rejected": -1.898215889930725, |
|
"logps/chosen": -123.5376968383789, |
|
"logps/rejected": -143.40316772460938, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5882707834243774, |
|
"rewards/margins": 0.17841866612434387, |
|
"rewards/rejected": -0.7666894793510437, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.921875, |
|
"learning_rate": 1.0183445215899585e-06, |
|
"logits/chosen": -1.9046001434326172, |
|
"logits/rejected": -1.8922739028930664, |
|
"logps/chosen": -119.24836730957031, |
|
"logps/rejected": -143.8057861328125, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5545870065689087, |
|
"rewards/margins": 0.2050396203994751, |
|
"rewards/rejected": -0.7596266269683838, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.578125, |
|
"learning_rate": 9.931180064220276e-07, |
|
"logits/chosen": -1.92236328125, |
|
"logits/rejected": -1.924556016921997, |
|
"logps/chosen": -136.11215209960938, |
|
"logps/rejected": -159.76925659179688, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6600568890571594, |
|
"rewards/margins": 0.19632843136787415, |
|
"rewards/rejected": -0.8563854098320007, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.671875, |
|
"learning_rate": 9.681302086637634e-07, |
|
"logits/chosen": -1.8995593786239624, |
|
"logits/rejected": -1.922876000404358, |
|
"logps/chosen": -134.8125457763672, |
|
"logps/rejected": -150.06996154785156, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.647091805934906, |
|
"rewards/margins": 0.1497163623571396, |
|
"rewards/rejected": -0.796808123588562, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 9.433850868350619e-07, |
|
"logits/chosen": -1.8294461965560913, |
|
"logits/rejected": -1.845920205116272, |
|
"logps/chosen": -116.8541259765625, |
|
"logps/rejected": -143.2628936767578, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5514515042304993, |
|
"rewards/margins": 0.2609653174877167, |
|
"rewards/rejected": -0.8124168515205383, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.953125, |
|
"learning_rate": 9.188865610115572e-07, |
|
"logits/chosen": -1.921491265296936, |
|
"logits/rejected": -1.93572998046875, |
|
"logps/chosen": -126.38240814208984, |
|
"logps/rejected": -145.36019897460938, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5697722434997559, |
|
"rewards/margins": 0.17015670239925385, |
|
"rewards/rejected": -0.7399289608001709, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.7890625, |
|
"learning_rate": 8.946385122036066e-07, |
|
"logits/chosen": -1.8846461772918701, |
|
"logits/rejected": -1.8940550088882446, |
|
"logps/chosen": -121.21919250488281, |
|
"logps/rejected": -144.52047729492188, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5412122011184692, |
|
"rewards/margins": 0.19210803508758545, |
|
"rewards/rejected": -0.7333202958106995, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 8.706447817414696e-07, |
|
"logits/chosen": -1.9248275756835938, |
|
"logits/rejected": -1.922368049621582, |
|
"logps/chosen": -127.80845642089844, |
|
"logps/rejected": -151.72030639648438, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6831592917442322, |
|
"rewards/margins": 0.2034546136856079, |
|
"rewards/rejected": -0.8866138458251953, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 8.469091706667748e-07, |
|
"logits/chosen": -1.8915945291519165, |
|
"logits/rejected": -1.895379662513733, |
|
"logps/chosen": -122.15771484375, |
|
"logps/rejected": -147.4517364501953, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6304605007171631, |
|
"rewards/margins": 0.22080358862876892, |
|
"rewards/rejected": -0.8512641191482544, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.96875, |
|
"learning_rate": 8.234354391303606e-07, |
|
"logits/chosen": -1.8591235876083374, |
|
"logits/rejected": -1.8553224802017212, |
|
"logps/chosen": -124.28314208984375, |
|
"logps/rejected": -152.6465301513672, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6334540247917175, |
|
"rewards/margins": 0.24212419986724854, |
|
"rewards/rejected": -0.8755782246589661, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 8.002273057966012e-07, |
|
"logits/chosen": -1.8992531299591064, |
|
"logits/rejected": -1.9243097305297852, |
|
"logps/chosen": -128.35739135742188, |
|
"logps/rejected": -149.07540893554688, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5612360835075378, |
|
"rewards/margins": 0.19729313254356384, |
|
"rewards/rejected": -0.7585291862487793, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 7.772884472543066e-07, |
|
"logits/chosen": -1.9013763666152954, |
|
"logits/rejected": -1.9246801137924194, |
|
"logps/chosen": -124.08580017089844, |
|
"logps/rejected": -140.3448028564453, |
|
"loss": 0.6467, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.5578123927116394, |
|
"rewards/margins": 0.1421774923801422, |
|
"rewards/rejected": -0.699989914894104, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 7.546224974342775e-07, |
|
"logits/chosen": -1.9061437845230103, |
|
"logits/rejected": -1.899762749671936, |
|
"logps/chosen": -136.278076171875, |
|
"logps/rejected": -161.5443115234375, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6313563585281372, |
|
"rewards/margins": 0.23241499066352844, |
|
"rewards/rejected": -0.8637714385986328, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 7.322330470336314e-07, |
|
"logits/chosen": -1.919785499572754, |
|
"logits/rejected": -1.9172786474227905, |
|
"logps/chosen": -130.88584899902344, |
|
"logps/rejected": -155.8290252685547, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6158424019813538, |
|
"rewards/margins": 0.20419493317604065, |
|
"rewards/rejected": -0.8200373649597168, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 7.10123642946966e-07, |
|
"logits/chosen": -1.9181368350982666, |
|
"logits/rejected": -1.9378869533538818, |
|
"logps/chosen": -125.4980239868164, |
|
"logps/rejected": -148.65103149414062, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5430660247802734, |
|
"rewards/margins": 0.2324432134628296, |
|
"rewards/rejected": -0.775509238243103, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 6.882977877044691e-07, |
|
"logits/chosen": -1.9170925617218018, |
|
"logits/rejected": -1.9317693710327148, |
|
"logps/chosen": -118.6207504272461, |
|
"logps/rejected": -140.1538543701172, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.576205313205719, |
|
"rewards/margins": 0.18266887962818146, |
|
"rewards/rejected": -0.758874237537384, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 6.667589389170561e-07, |
|
"logits/chosen": -1.913522720336914, |
|
"logits/rejected": -1.9155197143554688, |
|
"logps/chosen": -127.30131530761719, |
|
"logps/rejected": -149.8275909423828, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5541995763778687, |
|
"rewards/margins": 0.18559743463993073, |
|
"rewards/rejected": -0.7397969365119934, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.9921875, |
|
"learning_rate": 6.455105087286173e-07, |
|
"logits/chosen": -1.9797407388687134, |
|
"logits/rejected": -1.9776780605316162, |
|
"logps/chosen": -130.6244659423828, |
|
"logps/rejected": -150.9293670654297, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6223429441452026, |
|
"rewards/margins": 0.1531866490840912, |
|
"rewards/rejected": -0.775529682636261, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 6.245558632754778e-07, |
|
"logits/chosen": -1.8683338165283203, |
|
"logits/rejected": -1.8908354043960571, |
|
"logps/chosen": -125.47029876708984, |
|
"logps/rejected": -154.7410125732422, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5848723649978638, |
|
"rewards/margins": 0.2577180862426758, |
|
"rewards/rejected": -0.8425905108451843, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.9375, |
|
"learning_rate": 6.038983221531353e-07, |
|
"logits/chosen": -1.9424070119857788, |
|
"logits/rejected": -1.9472051858901978, |
|
"logps/chosen": -120.87110900878906, |
|
"logps/rejected": -145.4131622314453, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5482410192489624, |
|
"rewards/margins": 0.23777303099632263, |
|
"rewards/rejected": -0.7860140204429626, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 5.83541157890379e-07, |
|
"logits/chosen": -1.9903781414031982, |
|
"logits/rejected": -2.0020580291748047, |
|
"logps/chosen": -123.49699401855469, |
|
"logps/rejected": -151.511474609375, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.501916766166687, |
|
"rewards/margins": 0.23306772112846375, |
|
"rewards/rejected": -0.7349845170974731, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 5.634875954308638e-07, |
|
"logits/chosen": -1.9178415536880493, |
|
"logits/rejected": -1.908630609512329, |
|
"logps/chosen": -129.9990692138672, |
|
"logps/rejected": -152.32931518554688, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6379269361495972, |
|
"rewards/margins": 0.18584506213665009, |
|
"rewards/rejected": -0.8237720727920532, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 5.437408116222148e-07, |
|
"logits/chosen": -1.8094866275787354, |
|
"logits/rejected": -1.8253847360610962, |
|
"logps/chosen": -115.37788391113281, |
|
"logps/rejected": -147.9441375732422, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5823795199394226, |
|
"rewards/margins": 0.27050092816352844, |
|
"rewards/rejected": -0.852880597114563, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 5.243039347127621e-07, |
|
"logits/chosen": -1.9520610570907593, |
|
"logits/rejected": -1.9586107730865479, |
|
"logps/chosen": -133.17977905273438, |
|
"logps/rejected": -154.48606872558594, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6545882225036621, |
|
"rewards/margins": 0.17907670140266418, |
|
"rewards/rejected": -0.8336648941040039, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 5.05180043855969e-07, |
|
"logits/chosen": -1.8850457668304443, |
|
"logits/rejected": -1.895381212234497, |
|
"logps/chosen": -113.73271179199219, |
|
"logps/rejected": -137.9407501220703, |
|
"loss": 0.6167, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5211442708969116, |
|
"rewards/margins": 0.21343907713890076, |
|
"rewards/rejected": -0.7345833778381348, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 4.86372168622635e-07, |
|
"logits/chosen": -1.8701765537261963, |
|
"logits/rejected": -1.897936463356018, |
|
"logps/chosen": -124.44834899902344, |
|
"logps/rejected": -146.36325073242188, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6087551116943359, |
|
"rewards/margins": 0.192201167345047, |
|
"rewards/rejected": -0.8009563684463501, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 4.678832885209622e-07, |
|
"logits/chosen": -1.9065357446670532, |
|
"logits/rejected": -1.9053528308868408, |
|
"logps/chosen": -133.0020294189453, |
|
"logps/rejected": -151.4831085205078, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6242747902870178, |
|
"rewards/margins": 0.15634949505329132, |
|
"rewards/rejected": -0.7806242108345032, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 4.497163325245416e-07, |
|
"logits/chosen": -1.869490623474121, |
|
"logits/rejected": -1.8728406429290771, |
|
"logps/chosen": -129.19412231445312, |
|
"logps/rejected": -148.17112731933594, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.601394534111023, |
|
"rewards/margins": 0.20290544629096985, |
|
"rewards/rejected": -0.8042998313903809, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 4.3187417860835386e-07, |
|
"logits/chosen": -1.8597100973129272, |
|
"logits/rejected": -1.8595672845840454, |
|
"logps/chosen": -123.48005676269531, |
|
"logps/rejected": -146.0297088623047, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6197245717048645, |
|
"rewards/margins": 0.22459180653095245, |
|
"rewards/rejected": -0.8443164825439453, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 1.7265625, |
|
"learning_rate": 4.143596532928468e-07, |
|
"logits/chosen": -1.8806402683258057, |
|
"logits/rejected": -1.901450753211975, |
|
"logps/chosen": -121.08549499511719, |
|
"logps/rejected": -143.3876190185547, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.48497408628463745, |
|
"rewards/margins": 0.20996761322021484, |
|
"rewards/rejected": -0.6949416995048523, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.9921875, |
|
"learning_rate": 3.971755311961606e-07, |
|
"logits/chosen": -1.9731667041778564, |
|
"logits/rejected": -1.9899402856826782, |
|
"logps/chosen": -119.02622985839844, |
|
"logps/rejected": -144.31222534179688, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5730189681053162, |
|
"rewards/margins": 0.22726324200630188, |
|
"rewards/rejected": -0.8002821207046509, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 2.609375, |
|
"learning_rate": 3.8032453459457884e-07, |
|
"logits/chosen": -1.8654229640960693, |
|
"logits/rejected": -1.8797037601470947, |
|
"logps/chosen": -130.2448272705078, |
|
"logps/rejected": -158.59487915039062, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7119914293289185, |
|
"rewards/margins": 0.24810293316841125, |
|
"rewards/rejected": -0.9600943326950073, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 3.6380933299127285e-07, |
|
"logits/chosen": -1.9288314580917358, |
|
"logits/rejected": -1.943996787071228, |
|
"logps/chosen": -119.17585754394531, |
|
"logps/rejected": -147.80038452148438, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5984092950820923, |
|
"rewards/margins": 0.2575072944164276, |
|
"rewards/rejected": -0.8559166193008423, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 3.4763254269339965e-07, |
|
"logits/chosen": -1.8540977239608765, |
|
"logits/rejected": -1.8645613193511963, |
|
"logps/chosen": -138.80377197265625, |
|
"logps/rejected": -159.77102661132812, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6650180816650391, |
|
"rewards/margins": 0.20767009258270264, |
|
"rewards/rejected": -0.8726881146430969, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 3.3179672639763737e-07, |
|
"logits/chosen": -1.9465539455413818, |
|
"logits/rejected": -1.9527965784072876, |
|
"logps/chosen": -113.87541198730469, |
|
"logps/rejected": -147.63290405273438, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.52118980884552, |
|
"rewards/margins": 0.291698157787323, |
|
"rewards/rejected": -0.812887966632843, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 3.163043927842019e-07, |
|
"logits/chosen": -1.9162803888320923, |
|
"logits/rejected": -1.934597373008728, |
|
"logps/chosen": -128.00485229492188, |
|
"logps/rejected": -146.68711853027344, |
|
"loss": 0.6268, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.596336841583252, |
|
"rewards/margins": 0.18232461810112, |
|
"rewards/rejected": -0.7786614298820496, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 3.011579961194286e-07, |
|
"logits/chosen": -1.956756591796875, |
|
"logits/rejected": -1.9522291421890259, |
|
"logps/chosen": -130.30319213867188, |
|
"logps/rejected": -157.8980712890625, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6217106580734253, |
|
"rewards/margins": 0.24674446880817413, |
|
"rewards/rejected": -0.8684550523757935, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 2.109375, |
|
"learning_rate": 2.8635993586697555e-07, |
|
"logits/chosen": -1.8693218231201172, |
|
"logits/rejected": -1.8768870830535889, |
|
"logps/chosen": -117.62882995605469, |
|
"logps/rejected": -141.52174377441406, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.554486870765686, |
|
"rewards/margins": 0.21078386902809143, |
|
"rewards/rejected": -0.7652707695960999, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 2.125, |
|
"learning_rate": 2.7191255630769855e-07, |
|
"logits/chosen": -1.9183435440063477, |
|
"logits/rejected": -1.9085958003997803, |
|
"logps/chosen": -131.42051696777344, |
|
"logps/rejected": -160.8799591064453, |
|
"loss": 0.5994, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6411622762680054, |
|
"rewards/margins": 0.2594824433326721, |
|
"rewards/rejected": -0.9006446599960327, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 2.5781814616827936e-07, |
|
"logits/chosen": -1.9339672327041626, |
|
"logits/rejected": -1.9259040355682373, |
|
"logps/chosen": -126.9478530883789, |
|
"logps/rejected": -150.4621124267578, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6331970691680908, |
|
"rewards/margins": 0.18595722317695618, |
|
"rewards/rejected": -0.8191541433334351, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 2.4407893825864893e-07, |
|
"logits/chosen": -1.8841025829315186, |
|
"logits/rejected": -1.9010101556777954, |
|
"logps/chosen": -123.89164733886719, |
|
"logps/rejected": -148.62289428710938, |
|
"loss": 0.6121, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6260435581207275, |
|
"rewards/margins": 0.23781859874725342, |
|
"rewards/rejected": -0.863862156867981, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 2.5, |
|
"learning_rate": 2.3069710911826858e-07, |
|
"logits/chosen": -1.873400092124939, |
|
"logits/rejected": -1.8696212768554688, |
|
"logps/chosen": -131.77992248535156, |
|
"logps/rejected": -158.84988403320312, |
|
"loss": 0.6255, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.671171247959137, |
|
"rewards/margins": 0.2179645597934723, |
|
"rewards/rejected": -0.8891357183456421, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.5703125, |
|
"learning_rate": 2.176747786713282e-07, |
|
"logits/chosen": -1.8760631084442139, |
|
"logits/rejected": -1.8778858184814453, |
|
"logps/chosen": -125.80195617675781, |
|
"logps/rejected": -149.81651306152344, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6132515072822571, |
|
"rewards/margins": 0.2072262316942215, |
|
"rewards/rejected": -0.8204777836799622, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 2.0501400989091036e-07, |
|
"logits/chosen": -1.9188148975372314, |
|
"logits/rejected": -1.9283740520477295, |
|
"logps/chosen": -126.12955474853516, |
|
"logps/rejected": -145.44830322265625, |
|
"loss": 0.6261, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5981575846672058, |
|
"rewards/margins": 0.18680432438850403, |
|
"rewards/rejected": -0.7849618792533875, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 1.927168084721795e-07, |
|
"logits/chosen": -1.9040225744247437, |
|
"logits/rejected": -1.918349027633667, |
|
"logps/chosen": -121.2413558959961, |
|
"logps/rejected": -146.60833740234375, |
|
"loss": 0.6096, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5998507738113403, |
|
"rewards/margins": 0.24554471671581268, |
|
"rewards/rejected": -0.8453954458236694, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.6796875, |
|
"learning_rate": 1.8078512251464285e-07, |
|
"logits/chosen": -1.9655431509017944, |
|
"logits/rejected": -1.9582946300506592, |
|
"logps/chosen": -131.67860412597656, |
|
"logps/rejected": -152.63572692871094, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5897047519683838, |
|
"rewards/margins": 0.19095389544963837, |
|
"rewards/rejected": -0.7806587219238281, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 2.421875, |
|
"learning_rate": 1.6922084221353607e-07, |
|
"logits/chosen": -1.915435791015625, |
|
"logits/rejected": -1.941033124923706, |
|
"logps/chosen": -124.4185562133789, |
|
"logps/rejected": -150.6072540283203, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5871225595474243, |
|
"rewards/margins": 0.2428620308637619, |
|
"rewards/rejected": -0.8299845457077026, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 1.5802579956038093e-07, |
|
"logits/chosen": -1.8886842727661133, |
|
"logits/rejected": -1.9026222229003906, |
|
"logps/chosen": -115.16085052490234, |
|
"logps/rejected": -143.67367553710938, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5255457758903503, |
|
"rewards/margins": 0.26099538803100586, |
|
"rewards/rejected": -0.7865411639213562, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 1.472017680527685e-07, |
|
"logits/chosen": -1.9219309091567993, |
|
"logits/rejected": -1.9138708114624023, |
|
"logps/chosen": -121.44775390625, |
|
"logps/rejected": -151.65969848632812, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6054600477218628, |
|
"rewards/margins": 0.24131233990192413, |
|
"rewards/rejected": -0.846772313117981, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 1.3675046241339918e-07, |
|
"logits/chosen": -1.8917341232299805, |
|
"logits/rejected": -1.9025049209594727, |
|
"logps/chosen": -127.67252349853516, |
|
"logps/rejected": -148.03077697753906, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5955443978309631, |
|
"rewards/margins": 0.18720856308937073, |
|
"rewards/rejected": -0.7827528715133667, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 1.2667353831844585e-07, |
|
"logits/chosen": -1.83237624168396, |
|
"logits/rejected": -1.8439161777496338, |
|
"logps/chosen": -128.77362060546875, |
|
"logps/rejected": -150.86912536621094, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6072179675102234, |
|
"rewards/margins": 0.20041854679584503, |
|
"rewards/rejected": -0.807636559009552, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.8359375, |
|
"learning_rate": 1.1697259213525936e-07, |
|
"logits/chosen": -1.8945062160491943, |
|
"logits/rejected": -1.8865067958831787, |
|
"logps/chosen": -113.77557373046875, |
|
"logps/rejected": -145.04769897460938, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5280709266662598, |
|
"rewards/margins": 0.2756304144859314, |
|
"rewards/rejected": -0.8037012815475464, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 1.0764916066947795e-07, |
|
"logits/chosen": -1.801983118057251, |
|
"logits/rejected": -1.791394829750061, |
|
"logps/chosen": -131.66268920898438, |
|
"logps/rejected": -166.57620239257812, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7564202547073364, |
|
"rewards/margins": 0.3014541268348694, |
|
"rewards/rejected": -1.057874321937561, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.640625, |
|
"learning_rate": 9.870472092156941e-08, |
|
"logits/chosen": -1.8863308429718018, |
|
"logits/rejected": -1.8997328281402588, |
|
"logps/chosen": -120.3888931274414, |
|
"logps/rejected": -147.81582641601562, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6340813040733337, |
|
"rewards/margins": 0.23104877769947052, |
|
"rewards/rejected": -0.8651300668716431, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 9.014068985284618e-08, |
|
"logits/chosen": -1.8636146783828735, |
|
"logits/rejected": -1.8543964624404907, |
|
"logps/chosen": -123.03385162353516, |
|
"logps/rejected": -138.28298950195312, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5915199518203735, |
|
"rewards/margins": 0.1568307727575302, |
|
"rewards/rejected": -0.7483507394790649, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 8.19584241609936e-08, |
|
"logits/chosen": -1.983232855796814, |
|
"logits/rejected": -1.9851760864257812, |
|
"logps/chosen": -132.586669921875, |
|
"logps/rejected": -164.07855224609375, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6490141153335571, |
|
"rewards/margins": 0.2791653275489807, |
|
"rewards/rejected": -0.9281795620918274, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 2.25, |
|
"learning_rate": 7.415922006514448e-08, |
|
"logits/chosen": -1.888055443763733, |
|
"logits/rejected": -1.9014968872070312, |
|
"logps/chosen": -122.8010482788086, |
|
"logps/rejected": -147.2124786376953, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.582960307598114, |
|
"rewards/margins": 0.22722116112709045, |
|
"rewards/rejected": -0.8101814985275269, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.9140625, |
|
"learning_rate": 6.674431310053519e-08, |
|
"logits/chosen": -1.9087717533111572, |
|
"logits/rejected": -1.9133113622665405, |
|
"logps/chosen": -117.87506103515625, |
|
"logps/rejected": -142.63198852539062, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5861650109291077, |
|
"rewards/margins": 0.21530351042747498, |
|
"rewards/rejected": -0.8014683723449707, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.625, |
|
"learning_rate": 5.971487792277297e-08, |
|
"logits/chosen": -1.927983045578003, |
|
"logits/rejected": -1.9456886053085327, |
|
"logps/chosen": -117.11873626708984, |
|
"logps/rejected": -138.4589385986328, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5543134212493896, |
|
"rewards/margins": 0.19218505918979645, |
|
"rewards/rejected": -0.7464984655380249, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.8984375, |
|
"learning_rate": 5.307202812175005e-08, |
|
"logits/chosen": -1.8637211322784424, |
|
"logits/rejected": -1.878003716468811, |
|
"logps/chosen": -124.0638656616211, |
|
"logps/rejected": -146.49771118164062, |
|
"loss": 0.6155, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6212812662124634, |
|
"rewards/margins": 0.21200039982795715, |
|
"rewards/rejected": -0.8332816362380981, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.7421875, |
|
"learning_rate": 4.681681604523064e-08, |
|
"logits/chosen": -1.877753496170044, |
|
"logits/rejected": -1.8904426097869873, |
|
"logps/chosen": -126.96830749511719, |
|
"logps/rejected": -155.18118286132812, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6443455815315247, |
|
"rewards/margins": 0.26543739438056946, |
|
"rewards/rejected": -0.9097830057144165, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.625, |
|
"learning_rate": 4.0950232632141205e-08, |
|
"logits/chosen": -1.9946720600128174, |
|
"logits/rejected": -2.0097415447235107, |
|
"logps/chosen": -134.808349609375, |
|
"logps/rejected": -155.46096801757812, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6130391359329224, |
|
"rewards/margins": 0.19625858962535858, |
|
"rewards/rejected": -0.8092976808547974, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 3.547320725558495e-08, |
|
"logits/chosen": -1.8689781427383423, |
|
"logits/rejected": -1.8868701457977295, |
|
"logps/chosen": -127.9247817993164, |
|
"logps/rejected": -149.6826629638672, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6171109676361084, |
|
"rewards/margins": 0.20821337401866913, |
|
"rewards/rejected": -0.8253243565559387, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.3828125, |
|
"learning_rate": 3.038660757561568e-08, |
|
"logits/chosen": -1.9256298542022705, |
|
"logits/rejected": -1.9275277853012085, |
|
"logps/chosen": -131.28964233398438, |
|
"logps/rejected": -156.5281524658203, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5464403629302979, |
|
"rewards/margins": 0.2162129133939743, |
|
"rewards/rejected": -0.7626532316207886, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 2.569123940178192e-08, |
|
"logits/chosen": -1.8706880807876587, |
|
"logits/rejected": -1.895275354385376, |
|
"logps/chosen": -126.8041000366211, |
|
"logps/rejected": -152.7987060546875, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6154114007949829, |
|
"rewards/margins": 0.23175501823425293, |
|
"rewards/rejected": -0.8471664190292358, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 2.1387846565474047e-08, |
|
"logits/chosen": -1.9511226415634155, |
|
"logits/rejected": -1.9575185775756836, |
|
"logps/chosen": -122.9830322265625, |
|
"logps/rejected": -150.94505310058594, |
|
"loss": 0.6051, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6026027798652649, |
|
"rewards/margins": 0.24615421891212463, |
|
"rewards/rejected": -0.8487569689750671, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.7477110802086583e-08, |
|
"logits/chosen": -1.8935045003890991, |
|
"logits/rejected": -1.9025996923446655, |
|
"logps/chosen": -128.3074188232422, |
|
"logps/rejected": -151.99522399902344, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6137545704841614, |
|
"rewards/margins": 0.19285576045513153, |
|
"rewards/rejected": -0.8066104054450989, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 2.0, |
|
"learning_rate": 1.3959651643019601e-08, |
|
"logits/chosen": -1.9040815830230713, |
|
"logits/rejected": -1.9198287725448608, |
|
"logps/chosen": -127.39599609375, |
|
"logps/rejected": -146.60008239746094, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5616117119789124, |
|
"rewards/margins": 0.1830439418554306, |
|
"rewards/rejected": -0.7446557283401489, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 2.390625, |
|
"learning_rate": 1.0836026317533887e-08, |
|
"logits/chosen": -1.925762414932251, |
|
"logits/rejected": -1.9249995946884155, |
|
"logps/chosen": -134.05189514160156, |
|
"logps/rejected": -150.71640014648438, |
|
"loss": 0.6467, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.6878820657730103, |
|
"rewards/margins": 0.15121865272521973, |
|
"rewards/rejected": -0.83910071849823, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 8.106729664475178e-09, |
|
"logits/chosen": -1.9120187759399414, |
|
"logits/rejected": -1.9409675598144531, |
|
"logps/chosen": -126.06358337402344, |
|
"logps/rejected": -143.04531860351562, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.599898636341095, |
|
"rewards/margins": 0.15934984385967255, |
|
"rewards/rejected": -0.7592484951019287, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.78125, |
|
"learning_rate": 5.772194053882962e-09, |
|
"logits/chosen": -1.8914750814437866, |
|
"logits/rejected": -1.8805005550384521, |
|
"logps/chosen": -124.221435546875, |
|
"logps/rejected": -154.97128295898438, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6593270301818848, |
|
"rewards/margins": 0.2628743350505829, |
|
"rewards/rejected": -0.9222013354301453, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 3.832789318495289e-09, |
|
"logits/chosen": -1.9097976684570312, |
|
"logits/rejected": -1.9211629629135132, |
|
"logps/chosen": -117.1760025024414, |
|
"logps/rejected": -139.37118530273438, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5425055027008057, |
|
"rewards/margins": 0.20210960507392883, |
|
"rewards/rejected": -0.7446150779724121, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 3.078125, |
|
"learning_rate": 2.288822695160897e-09, |
|
"logits/chosen": -1.8931448459625244, |
|
"logits/rejected": -1.8963727951049805, |
|
"logps/chosen": -138.54782104492188, |
|
"logps/rejected": -168.485595703125, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7985516786575317, |
|
"rewards/margins": 0.25337541103363037, |
|
"rewards/rejected": -1.051927089691162, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 1.1405387761664888e-09, |
|
"logits/chosen": -1.9339786767959595, |
|
"logits/rejected": -1.9223487377166748, |
|
"logps/chosen": -120.66938781738281, |
|
"logps/rejected": -145.23965454101562, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5394836664199829, |
|
"rewards/margins": 0.20085513591766357, |
|
"rewards/rejected": -0.7403386831283569, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 3.8811947048994494e-10, |
|
"logits/chosen": -1.9571815729141235, |
|
"logits/rejected": -1.9642584323883057, |
|
"logps/chosen": -129.06796264648438, |
|
"logps/rejected": -154.98562622070312, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6091276407241821, |
|
"rewards/margins": 0.23150965571403503, |
|
"rewards/rejected": -0.8406373262405396, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 3.168397498115594e-11, |
|
"logits/chosen": -1.903794288635254, |
|
"logits/rejected": -1.9025567770004272, |
|
"logps/chosen": -130.3432159423828, |
|
"logps/rejected": -151.00790405273438, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6614962220191956, |
|
"rewards/margins": 0.1874585896730423, |
|
"rewards/rejected": -0.8489547967910767, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 2774, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6373478753496264, |
|
"train_runtime": 5106.1264, |
|
"train_samples_per_second": 8.696, |
|
"train_steps_per_second": 0.543 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2774, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|