|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9921259842519685, |
|
"eval_steps": 500, |
|
"global_step": 42, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14389.429036458334, |
|
"epoch": 0.023622047244094488, |
|
"grad_norm": 0.18101558089256287, |
|
"kl": 0.0, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.0, |
|
"reward": 0.09523809949556987, |
|
"reward_std": 0.2519763112068176, |
|
"rewards/accuracy_reward": 0.09523809949556987, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14440.095703125, |
|
"epoch": 0.047244094488188976, |
|
"grad_norm": 0.10683715343475342, |
|
"kl": 0.0, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": -0.0, |
|
"reward": 0.09523809949556987, |
|
"reward_std": 0.16265000899632773, |
|
"rewards/accuracy_reward": 0.09523809949556987, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14900.953125, |
|
"epoch": 0.07086614173228346, |
|
"grad_norm": 0.09402324259281158, |
|
"kl": 9.489059448242188e-05, |
|
"learning_rate": 1.8e-06, |
|
"loss": 0.0, |
|
"reward": 0.047619049747784935, |
|
"reward_std": 0.1259881556034088, |
|
"rewards/accuracy_reward": 0.047619049747784935, |
|
"step": 3 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12512.096028645834, |
|
"epoch": 0.09448818897637795, |
|
"grad_norm": 0.20009347796440125, |
|
"kl": 0.0001041094462076823, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.0, |
|
"reward": 0.1428571492433548, |
|
"reward_std": 0.2886381645997365, |
|
"rewards/accuracy_reward": 0.1428571492433548, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 15601.238932291666, |
|
"epoch": 0.11811023622047244, |
|
"grad_norm": 0.0005439579836092889, |
|
"kl": 7.62939453125e-05, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14982.953125, |
|
"epoch": 0.14173228346456693, |
|
"grad_norm": 0.17226731777191162, |
|
"kl": 8.336702982584636e-05, |
|
"learning_rate": 2.99459623281379e-06, |
|
"loss": 0.0, |
|
"reward": 0.09523809949556987, |
|
"reward_std": 0.2519763112068176, |
|
"rewards/accuracy_reward": 0.09523809949556987, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14418.096028645834, |
|
"epoch": 0.16535433070866143, |
|
"grad_norm": 0.2076222449541092, |
|
"kl": 9.473164876302083e-05, |
|
"learning_rate": 2.978423865521563e-06, |
|
"loss": 0.0, |
|
"reward": 0.2857143034537633, |
|
"reward_std": 0.4668123225371043, |
|
"rewards/accuracy_reward": 0.2857143034537633, |
|
"step": 7 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 13729.191080729166, |
|
"epoch": 0.1889763779527559, |
|
"grad_norm": 0.13402122259140015, |
|
"kl": 0.00010665257771809895, |
|
"learning_rate": 2.9515994204002487e-06, |
|
"loss": 0.0, |
|
"reward": 0.14285715421040854, |
|
"reward_std": 0.17817415793736777, |
|
"rewards/accuracy_reward": 0.14285715421040854, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12956.857747395834, |
|
"epoch": 0.2125984251968504, |
|
"grad_norm": 0.11447420716285706, |
|
"kl": 0.00014670689900716147, |
|
"learning_rate": 2.9143161681916264e-06, |
|
"loss": 0.0, |
|
"reward": 0.047619049747784935, |
|
"reward_std": 0.1259881556034088, |
|
"rewards/accuracy_reward": 0.047619049747784935, |
|
"step": 9 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 11794.762532552084, |
|
"epoch": 0.23622047244094488, |
|
"grad_norm": 0.0022508089896291494, |
|
"kl": 0.00014130274454752603, |
|
"learning_rate": 2.866842735582204e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12350.71484375, |
|
"epoch": 0.25984251968503935, |
|
"grad_norm": 0.19783379137516022, |
|
"kl": 0.00015211105346679688, |
|
"learning_rate": 2.8095211697417823e-06, |
|
"loss": 0.0, |
|
"reward": 0.09523809949556987, |
|
"reward_std": 0.2519763112068176, |
|
"rewards/accuracy_reward": 0.09523809949556987, |
|
"step": 11 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 15620.238606770834, |
|
"epoch": 0.28346456692913385, |
|
"grad_norm": 0.004334005527198315, |
|
"kl": 0.0004711151123046875, |
|
"learning_rate": 2.7427644738657634e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12504.857747395834, |
|
"epoch": 0.30708661417322836, |
|
"grad_norm": 0.23003408312797546, |
|
"kl": 0.00032520294189453125, |
|
"learning_rate": 2.6670536314776595e-06, |
|
"loss": 0.0, |
|
"reward": 0.2380952537059784, |
|
"reward_std": 0.43015046914418537, |
|
"rewards/accuracy_reward": 0.2380952537059784, |
|
"step": 13 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 15596.096028645834, |
|
"epoch": 0.33070866141732286, |
|
"grad_norm": 0.0899478867650032, |
|
"kl": 0.0004437764485677083, |
|
"learning_rate": 2.5829341409317867e-06, |
|
"loss": 0.0, |
|
"reward": 0.047619049747784935, |
|
"reward_std": 0.1259881556034088, |
|
"rewards/accuracy_reward": 0.047619049747784935, |
|
"step": 14 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12472.667317708334, |
|
"epoch": 0.3543307086614173, |
|
"grad_norm": 0.1614641547203064, |
|
"kl": 0.00022761027018229166, |
|
"learning_rate": 2.4910120850851222e-06, |
|
"loss": 0.0, |
|
"reward": 0.047619049747784935, |
|
"reward_std": 0.1259881556034088, |
|
"rewards/accuracy_reward": 0.047619049747784935, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14462.953125, |
|
"epoch": 0.3779527559055118, |
|
"grad_norm": 0.1555420160293579, |
|
"kl": 0.0011622111002604167, |
|
"learning_rate": 2.39194976445643e-06, |
|
"loss": 0.0, |
|
"reward": 0.19047619899113974, |
|
"reward_std": 0.32530001799265545, |
|
"rewards/accuracy_reward": 0.19047619899113974, |
|
"step": 16 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14019.333984375, |
|
"epoch": 0.4015748031496063, |
|
"grad_norm": 0.006728035863488913, |
|
"kl": 0.0006306966145833334, |
|
"learning_rate": 2.286460925335848e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14344.667317708334, |
|
"epoch": 0.4251968503937008, |
|
"grad_norm": 0.10385935008525848, |
|
"kl": 0.0008605321248372396, |
|
"learning_rate": 2.17530561722651e-06, |
|
"loss": 0.0, |
|
"reward": 0.14285715421040854, |
|
"reward_std": 0.17817415793736777, |
|
"rewards/accuracy_reward": 0.14285715421040854, |
|
"step": 18 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 15407.238932291666, |
|
"epoch": 0.44881889763779526, |
|
"grad_norm": 0.007518239319324493, |
|
"kl": 0.0008875528971354166, |
|
"learning_rate": 2.059284716670463e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 11890.9052734375, |
|
"epoch": 0.47244094488188976, |
|
"grad_norm": 0.15603958070278168, |
|
"kl": 0.0013834635416666667, |
|
"learning_rate": 1.9392341569148255e-06, |
|
"loss": 0.0001, |
|
"reward": 0.23809524377187094, |
|
"reward_std": 0.16265000899632773, |
|
"rewards/accuracy_reward": 0.23809524377187094, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 13337.333658854166, |
|
"epoch": 0.49606299212598426, |
|
"grad_norm": 0.007209372241050005, |
|
"kl": 0.0018170674641927083, |
|
"learning_rate": 1.8160189049935894e-06, |
|
"loss": 0.0001, |
|
"reward": 0.3333333333333333, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.3333333333333333, |
|
"step": 21 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14147.5244140625, |
|
"epoch": 0.5196850393700787, |
|
"grad_norm": 0.16794154047966003, |
|
"kl": 0.0015004475911458333, |
|
"learning_rate": 1.6905267296203183e-06, |
|
"loss": 0.0001, |
|
"reward": 0.3333333432674408, |
|
"reward_std": 0.32530001799265545, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"step": 22 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 10586.619303385416, |
|
"epoch": 0.5433070866141733, |
|
"grad_norm": 0.24202802777290344, |
|
"kl": 0.0016371409098307292, |
|
"learning_rate": 1.5636618047942224e-06, |
|
"loss": 0.0001, |
|
"reward": 0.571428601940473, |
|
"reward_std": 0.4146263202031453, |
|
"rewards/accuracy_reward": 0.571428601940473, |
|
"step": 23 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12914.190755208334, |
|
"epoch": 0.5669291338582677, |
|
"grad_norm": 0.006770447362214327, |
|
"kl": 0.0011011759440104167, |
|
"learning_rate": 1.4363381952057778e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 15026.238606770834, |
|
"epoch": 0.5905511811023622, |
|
"grad_norm": 0.14896388351917267, |
|
"kl": 0.0005785624186197916, |
|
"learning_rate": 1.3094732703796818e-06, |
|
"loss": 0.0, |
|
"reward": 0.19047620395819345, |
|
"reward_std": 0.30416231354077655, |
|
"rewards/accuracy_reward": 0.19047620395819345, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14715.4765625, |
|
"epoch": 0.6141732283464567, |
|
"grad_norm": 0.00993309449404478, |
|
"kl": 0.001087188720703125, |
|
"learning_rate": 1.183981095006411e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12330.762532552084, |
|
"epoch": 0.6377952755905512, |
|
"grad_norm": 0.008712113834917545, |
|
"kl": 0.0025761922200520835, |
|
"learning_rate": 1.0607658430851746e-06, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 13653.619140625, |
|
"epoch": 0.6614173228346457, |
|
"grad_norm": 0.007787778973579407, |
|
"kl": 0.0022443135579427085, |
|
"learning_rate": 9.407152833295372e-07, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 13795.381184895834, |
|
"epoch": 0.6850393700787402, |
|
"grad_norm": 0.008136502467095852, |
|
"kl": 0.0014813741048177083, |
|
"learning_rate": 8.246943827734898e-07, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 29 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 15744.857747395834, |
|
"epoch": 0.7086614173228346, |
|
"grad_norm": 0.005922618322074413, |
|
"kl": 0.0023244222005208335, |
|
"learning_rate": 7.135390746641527e-07, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12687.0009765625, |
|
"epoch": 0.7322834645669292, |
|
"grad_norm": 0.10701826214790344, |
|
"kl": 0.0008004506429036459, |
|
"learning_rate": 6.080502355435701e-07, |
|
"loss": 0.0, |
|
"reward": 0.09523809949556987, |
|
"reward_std": 0.16265000899632773, |
|
"rewards/accuracy_reward": 0.09523809949556987, |
|
"step": 31 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 13935.095703125, |
|
"epoch": 0.7559055118110236, |
|
"grad_norm": 0.10233797878026962, |
|
"kl": 0.002204259236653646, |
|
"learning_rate": 5.089879149148781e-07, |
|
"loss": 0.0001, |
|
"reward": 0.14285715421040854, |
|
"reward_std": 0.17817415793736777, |
|
"rewards/accuracy_reward": 0.14285715421040854, |
|
"step": 32 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 15292.429361979166, |
|
"epoch": 0.7795275590551181, |
|
"grad_norm": 0.1071338877081871, |
|
"kl": 0.0007845560709635416, |
|
"learning_rate": 4.170658590682134e-07, |
|
"loss": 0.0, |
|
"reward": 0.14285715421040854, |
|
"reward_std": 0.17817415793736777, |
|
"rewards/accuracy_reward": 0.14285715421040854, |
|
"step": 33 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12384.1435546875, |
|
"epoch": 0.8031496062992126, |
|
"grad_norm": 0.19518683850765228, |
|
"kl": 0.0014890034993489583, |
|
"learning_rate": 3.3294636852234106e-07, |
|
"loss": 0.0001, |
|
"reward": 0.3809523979822795, |
|
"reward_std": 0.35634831587473553, |
|
"rewards/accuracy_reward": 0.3809523979822795, |
|
"step": 34 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 15084.381510416666, |
|
"epoch": 0.8267716535433071, |
|
"grad_norm": 0.12067427486181259, |
|
"kl": 0.0007756551106770834, |
|
"learning_rate": 2.572355261342369e-07, |
|
"loss": 0.0, |
|
"reward": 0.09523809949556987, |
|
"reward_std": 0.16265000899632773, |
|
"rewards/accuracy_reward": 0.09523809949556987, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 13533.048502604166, |
|
"epoch": 0.8503937007874016, |
|
"grad_norm": 0.1780463457107544, |
|
"kl": 0.0023701985677083335, |
|
"learning_rate": 1.9047883025821777e-07, |
|
"loss": 0.0001, |
|
"reward": 0.3333333432674408, |
|
"reward_std": 0.32530001799265545, |
|
"rewards/accuracy_reward": 0.3333333432674408, |
|
"step": 36 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12354.524088541666, |
|
"epoch": 0.8740157480314961, |
|
"grad_norm": 0.008607493713498116, |
|
"kl": 0.0020573933919270835, |
|
"learning_rate": 1.3315726441779629e-07, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14624.238606770834, |
|
"epoch": 0.8976377952755905, |
|
"grad_norm": 0.00765492208302021, |
|
"kl": 0.0015093485514322917, |
|
"learning_rate": 8.568383180837369e-08, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 14310.810221354166, |
|
"epoch": 0.9212598425196851, |
|
"grad_norm": 0.009644408710300922, |
|
"kl": 0.0020573933919270835, |
|
"learning_rate": 4.8400579599751696e-08, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 11984.619547526041, |
|
"epoch": 0.9448818897637795, |
|
"grad_norm": 0.008329860866069794, |
|
"kl": 0.0015360514322916667, |
|
"learning_rate": 2.1576134478437316e-08, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 13679.857747395834, |
|
"epoch": 0.968503937007874, |
|
"grad_norm": 0.16029423475265503, |
|
"kl": 0.001178741455078125, |
|
"learning_rate": 5.403767186210218e-09, |
|
"loss": 0.0, |
|
"reward": 0.047619049747784935, |
|
"reward_std": 0.1259881556034088, |
|
"rewards/accuracy_reward": 0.047619049747784935, |
|
"step": 41 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 12962.333333333334, |
|
"epoch": 0.9921259842519685, |
|
"grad_norm": 0.010274315252900124, |
|
"kl": 0.0017293294270833333, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9921259842519685, |
|
"step": 42, |
|
"total_flos": 0.0, |
|
"train_loss": 4.045824278607414e-05, |
|
"train_runtime": 33605.9584, |
|
"train_samples_per_second": 0.004, |
|
"train_steps_per_second": 0.001 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 42, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|