|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999559277214632, |
|
"eval_steps": 500, |
|
"global_step": 567, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 477.0, |
|
"completions/max_terminated_length": 477.0, |
|
"completions/mean_length": 175.50390625, |
|
"completions/mean_terminated_length": 175.50390625, |
|
"completions/min_length": 21.0, |
|
"completions/min_terminated_length": 21.0, |
|
"epoch": 0.0017628911414720142, |
|
"grad_norm": 1.0880173896572545, |
|
"kl": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": -0.327, |
|
"num_tokens": 129409.0, |
|
"reward": 0.814777672290802, |
|
"reward_std": 0.14736539125442505, |
|
"rewards/format_reward/mean": 0.68359375, |
|
"rewards/format_reward/std": 0.4659844934940338, |
|
"rewards/qatch_metrics/mean": 0.8332747220993042, |
|
"rewards/qatch_metrics/std": 0.3284282088279724, |
|
"rewards/tag_count_reward/mean": 0.7626953125, |
|
"rewards/tag_count_reward/std": 0.34948837757110596, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 421.0, |
|
"completions/max_terminated_length": 421.0, |
|
"completions/mean_length": 177.318359375, |
|
"completions/mean_terminated_length": 177.318359375, |
|
"completions/min_length": 21.5, |
|
"completions/min_terminated_length": 21.5, |
|
"epoch": 0.00881445570736007, |
|
"grad_norm": 0.9499188530188546, |
|
"kl": 0.00019824504852294922, |
|
"learning_rate": 7.017543859649122e-08, |
|
"loss": -0.2902, |
|
"num_tokens": 685703.0, |
|
"reward": 0.762174516916275, |
|
"reward_std": 0.15002675727009773, |
|
"rewards/format_reward/mean": 0.7265625, |
|
"rewards/format_reward/std": 0.4450720399618149, |
|
"rewards/qatch_metrics/mean": 0.7644235193729401, |
|
"rewards/qatch_metrics/std": 0.3610532283782959, |
|
"rewards/tag_count_reward/mean": 0.795166015625, |
|
"rewards/tag_count_reward/std": 0.33385463058948517, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 438.8, |
|
"completions/max_terminated_length": 438.8, |
|
"completions/mean_length": 173.41171875, |
|
"completions/mean_terminated_length": 173.41171875, |
|
"completions/min_length": 21.8, |
|
"completions/min_terminated_length": 21.8, |
|
"epoch": 0.01762891141472014, |
|
"grad_norm": 0.9346895582900878, |
|
"kl": 0.00028295516967773436, |
|
"learning_rate": 1.5789473684210525e-07, |
|
"loss": -0.2591, |
|
"num_tokens": 1398566.0, |
|
"reward": 0.7710299372673035, |
|
"reward_std": 0.1539353460073471, |
|
"rewards/format_reward/mean": 0.71796875, |
|
"rewards/format_reward/std": 0.4487275779247284, |
|
"rewards/qatch_metrics/mean": 0.7762346506118775, |
|
"rewards/qatch_metrics/std": 0.3281721532344818, |
|
"rewards/tag_count_reward/mean": 0.788671875, |
|
"rewards/tag_count_reward/std": 0.33627479076385497, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 438.2, |
|
"completions/max_terminated_length": 438.2, |
|
"completions/mean_length": 183.1796875, |
|
"completions/mean_terminated_length": 183.1796875, |
|
"completions/min_length": 20.0, |
|
"completions/min_terminated_length": 20.0, |
|
"epoch": 0.026443367122080213, |
|
"grad_norm": 0.7943318239924386, |
|
"kl": 0.00037631988525390627, |
|
"learning_rate": 2.456140350877193e-07, |
|
"loss": -0.2603, |
|
"num_tokens": 2071996.0, |
|
"reward": 0.7256837129592896, |
|
"reward_std": 0.12991088777780532, |
|
"rewards/format_reward/mean": 0.765625, |
|
"rewards/format_reward/std": 0.4240167737007141, |
|
"rewards/qatch_metrics/mean": 0.7151770830154419, |
|
"rewards/qatch_metrics/std": 0.37596395611763, |
|
"rewards/tag_count_reward/mean": 0.8244140625, |
|
"rewards/tag_count_reward/std": 0.31790287494659425, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 479.6, |
|
"completions/max_terminated_length": 479.6, |
|
"completions/mean_length": 201.30234375, |
|
"completions/mean_terminated_length": 201.30234375, |
|
"completions/min_length": 21.2, |
|
"completions/min_terminated_length": 21.2, |
|
"epoch": 0.03525782282944028, |
|
"grad_norm": 0.4721344642723057, |
|
"kl": 0.00091400146484375, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": -0.1315, |
|
"num_tokens": 2791247.0, |
|
"reward": 0.8173989057540894, |
|
"reward_std": 0.12794919013977052, |
|
"rewards/format_reward/mean": 0.89765625, |
|
"rewards/format_reward/std": 0.29814977645874025, |
|
"rewards/qatch_metrics/mean": 0.8017192721366883, |
|
"rewards/qatch_metrics/std": 0.331482595205307, |
|
"rewards/tag_count_reward/mean": 0.9234375, |
|
"rewards/tag_count_reward/std": 0.22307254374027252, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 432.2, |
|
"completions/max_terminated_length": 432.2, |
|
"completions/mean_length": 221.4625, |
|
"completions/mean_terminated_length": 221.4625, |
|
"completions/min_length": 51.4, |
|
"completions/min_terminated_length": 51.4, |
|
"epoch": 0.044072278536800354, |
|
"grad_norm": 0.29592079815815686, |
|
"kl": 0.0016038894653320312, |
|
"learning_rate": 4.2105263157894733e-07, |
|
"loss": -0.0424, |
|
"num_tokens": 3536975.0, |
|
"reward": 0.7564297676086426, |
|
"reward_std": 0.08200130835175515, |
|
"rewards/format_reward/mean": 0.96953125, |
|
"rewards/format_reward/std": 0.13422587364912034, |
|
"rewards/qatch_metrics/mean": 0.7183640837669373, |
|
"rewards/qatch_metrics/std": 0.3674669623374939, |
|
"rewards/tag_count_reward/mean": 0.97734375, |
|
"rewards/tag_count_reward/std": 0.09909781143069267, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 445.6, |
|
"completions/max_terminated_length": 445.6, |
|
"completions/mean_length": 216.53984375, |
|
"completions/mean_terminated_length": 216.53984375, |
|
"completions/min_length": 77.0, |
|
"completions/min_terminated_length": 77.0, |
|
"epoch": 0.052886734244160426, |
|
"grad_norm": 0.275794455786416, |
|
"kl": 0.0034694671630859375, |
|
"learning_rate": 5.087719298245614e-07, |
|
"loss": 0.002, |
|
"num_tokens": 4281330.0, |
|
"reward": 0.7764788866043091, |
|
"reward_std": 0.09769791960716248, |
|
"rewards/format_reward/mean": 0.9953125, |
|
"rewards/format_reward/std": 0.06028594672679901, |
|
"rewards/qatch_metrics/mean": 0.7377692699432373, |
|
"rewards/qatch_metrics/std": 0.3548368811607361, |
|
"rewards/tag_count_reward/mean": 0.996875, |
|
"rewards/tag_count_reward/std": 0.04124387204647064, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 445.8, |
|
"completions/max_terminated_length": 445.8, |
|
"completions/mean_length": 220.11796875, |
|
"completions/mean_terminated_length": 220.11796875, |
|
"completions/min_length": 59.8, |
|
"completions/min_terminated_length": 59.8, |
|
"epoch": 0.06170118995152049, |
|
"grad_norm": 0.2691159080285212, |
|
"kl": 0.005501174926757812, |
|
"learning_rate": 5.964912280701754e-07, |
|
"loss": -0.0083, |
|
"num_tokens": 5008025.0, |
|
"reward": 0.8268720507621765, |
|
"reward_std": 0.08243840038776398, |
|
"rewards/format_reward/mean": 0.99609375, |
|
"rewards/format_reward/std": 0.0625, |
|
"rewards/qatch_metrics/mean": 0.7969059944152832, |
|
"rewards/qatch_metrics/std": 0.30500164330005647, |
|
"rewards/tag_count_reward/mean": 0.9978515625, |
|
"rewards/tag_count_reward/std": 0.03437500074505806, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 487.2, |
|
"completions/max_terminated_length": 487.2, |
|
"completions/mean_length": 227.76015625, |
|
"completions/mean_terminated_length": 227.76015625, |
|
"completions/min_length": 83.4, |
|
"completions/min_terminated_length": 83.4, |
|
"epoch": 0.07051564565888056, |
|
"grad_norm": 0.33908836616855625, |
|
"kl": 0.002800750732421875, |
|
"learning_rate": 6.842105263157895e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 5774806.0, |
|
"reward": 0.7647829532623291, |
|
"reward_std": 0.09533883556723595, |
|
"rewards/format_reward/mean": 0.9984375, |
|
"rewards/format_reward/std": 0.025, |
|
"rewards/qatch_metrics/mean": 0.7235268354415894, |
|
"rewards/qatch_metrics/std": 0.35323665738105775, |
|
"rewards/tag_count_reward/mean": 0.998828125, |
|
"rewards/tag_count_reward/std": 0.01875, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 476.2, |
|
"completions/max_terminated_length": 476.2, |
|
"completions/mean_length": 221.7984375, |
|
"completions/mean_terminated_length": 221.7984375, |
|
"completions/min_length": 83.4, |
|
"completions/min_terminated_length": 83.4, |
|
"epoch": 0.07933010136624064, |
|
"grad_norm": 0.3262303740341099, |
|
"kl": 0.00310516357421875, |
|
"learning_rate": 7.719298245614034e-07, |
|
"loss": 0.0104, |
|
"num_tokens": 6557268.0, |
|
"reward": 0.7565465092658996, |
|
"reward_std": 0.09911727011203766, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7135841250419617, |
|
"rewards/qatch_metrics/std": 0.37862626910209657, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 512.8, |
|
"completions/max_terminated_length": 512.8, |
|
"completions/mean_length": 228.45546875, |
|
"completions/mean_terminated_length": 228.45546875, |
|
"completions/min_length": 76.0, |
|
"completions/min_terminated_length": 76.0, |
|
"epoch": 0.08814455707360071, |
|
"grad_norm": 0.23276410584015308, |
|
"kl": 0.00273895263671875, |
|
"learning_rate": 8.596491228070175e-07, |
|
"loss": -0.0018, |
|
"num_tokens": 7327499.0, |
|
"reward": 0.7988326072692871, |
|
"reward_std": 0.06667622029781342, |
|
"rewards/format_reward/mean": 0.9984375, |
|
"rewards/format_reward/std": 0.025, |
|
"rewards/qatch_metrics/mean": 0.7635622501373291, |
|
"rewards/qatch_metrics/std": 0.369570130109787, |
|
"rewards/tag_count_reward/mean": 0.99921875, |
|
"rewards/tag_count_reward/std": 0.0125, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 483.8, |
|
"completions/max_terminated_length": 483.8, |
|
"completions/mean_length": 220.52734375, |
|
"completions/mean_terminated_length": 220.52734375, |
|
"completions/min_length": 81.2, |
|
"completions/min_terminated_length": 81.2, |
|
"epoch": 0.09695901278096078, |
|
"grad_norm": 0.28218074028465906, |
|
"kl": 0.00196533203125, |
|
"learning_rate": 9.473684210526315e-07, |
|
"loss": -0.0021, |
|
"num_tokens": 8077390.0, |
|
"reward": 0.8159880757331848, |
|
"reward_std": 0.10231453701853752, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7835153818130494, |
|
"rewards/qatch_metrics/std": 0.33782891631126405, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 481.2, |
|
"completions/max_terminated_length": 481.2, |
|
"completions/mean_length": 223.60703125, |
|
"completions/mean_terminated_length": 223.60703125, |
|
"completions/min_length": 75.6, |
|
"completions/min_terminated_length": 75.6, |
|
"epoch": 0.10577346848832085, |
|
"grad_norm": 0.23258401790732933, |
|
"kl": 0.00223388671875, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0045, |
|
"num_tokens": 8800407.0, |
|
"reward": 0.74871985912323, |
|
"reward_std": 0.09312780797481537, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7043763160705566, |
|
"rewards/qatch_metrics/std": 0.39227073788642886, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 487.8, |
|
"completions/max_terminated_length": 487.8, |
|
"completions/mean_length": 222.81015625, |
|
"completions/mean_terminated_length": 222.81015625, |
|
"completions/min_length": 77.4, |
|
"completions/min_terminated_length": 77.4, |
|
"epoch": 0.11458792419568092, |
|
"grad_norm": 0.22445170455470606, |
|
"kl": 0.002956390380859375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0057, |
|
"num_tokens": 9557380.0, |
|
"reward": 0.8077908515930176, |
|
"reward_std": 0.09828853458166123, |
|
"rewards/format_reward/mean": 0.9984375, |
|
"rewards/format_reward/std": 0.025, |
|
"rewards/qatch_metrics/mean": 0.774078369140625, |
|
"rewards/qatch_metrics/std": 0.33206661343574523, |
|
"rewards/tag_count_reward/mean": 0.999609375, |
|
"rewards/tag_count_reward/std": 0.00625, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 492.6, |
|
"completions/max_terminated_length": 492.6, |
|
"completions/mean_length": 231.83984375, |
|
"completions/mean_terminated_length": 231.83984375, |
|
"completions/min_length": 94.6, |
|
"completions/min_terminated_length": 94.6, |
|
"epoch": 0.12340237990304098, |
|
"grad_norm": 0.22832903725685313, |
|
"kl": 0.00381317138671875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0025, |
|
"num_tokens": 10339127.0, |
|
"reward": 0.7895300030708313, |
|
"reward_std": 0.10415169298648834, |
|
"rewards/format_reward/mean": 0.9984375, |
|
"rewards/format_reward/std": 0.025, |
|
"rewards/qatch_metrics/mean": 0.7526065230369567, |
|
"rewards/qatch_metrics/std": 0.3542828977108002, |
|
"rewards/tag_count_reward/mean": 0.9994140625, |
|
"rewards/tag_count_reward/std": 0.009375, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 521.8, |
|
"completions/max_terminated_length": 521.8, |
|
"completions/mean_length": 236.3125, |
|
"completions/mean_terminated_length": 236.3125, |
|
"completions/min_length": 80.4, |
|
"completions/min_terminated_length": 80.4, |
|
"epoch": 0.13221683561040107, |
|
"grad_norm": 0.2597151805235052, |
|
"kl": 0.00432281494140625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0083, |
|
"num_tokens": 11147287.0, |
|
"reward": 0.7333161950111389, |
|
"reward_std": 0.08832715749740601, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.6862887978553772, |
|
"rewards/qatch_metrics/std": 0.36336439847946167, |
|
"rewards/tag_count_reward/mean": 0.9994140625, |
|
"rewards/tag_count_reward/std": 0.0069767430424690245, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 445.6, |
|
"completions/max_terminated_length": 445.6, |
|
"completions/mean_length": 216.43984375, |
|
"completions/mean_terminated_length": 216.43984375, |
|
"completions/min_length": 87.8, |
|
"completions/min_terminated_length": 87.8, |
|
"epoch": 0.14103129131776113, |
|
"grad_norm": 0.2463929158667687, |
|
"kl": 0.00528717041015625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0044, |
|
"num_tokens": 11891066.0, |
|
"reward": 0.8300724029541016, |
|
"reward_std": 0.09615504890680313, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8000851631164551, |
|
"rewards/qatch_metrics/std": 0.3208737909793854, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 491.6, |
|
"completions/max_terminated_length": 491.6, |
|
"completions/mean_length": 225.32890625, |
|
"completions/mean_terminated_length": 225.32890625, |
|
"completions/min_length": 86.2, |
|
"completions/min_terminated_length": 86.2, |
|
"epoch": 0.1498457470251212, |
|
"grad_norm": 0.22719354366888944, |
|
"kl": 0.005328369140625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0129, |
|
"num_tokens": 12668159.0, |
|
"reward": 0.816937243938446, |
|
"reward_std": 0.08283708170056343, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7846320390701294, |
|
"rewards/qatch_metrics/std": 0.32469419240951536, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 460.4, |
|
"completions/max_terminated_length": 460.4, |
|
"completions/mean_length": 217.92890625, |
|
"completions/mean_terminated_length": 217.92890625, |
|
"completions/min_length": 76.2, |
|
"completions/min_terminated_length": 76.2, |
|
"epoch": 0.15866020273248127, |
|
"grad_norm": 0.2721517170479785, |
|
"kl": 0.00579071044921875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0117, |
|
"num_tokens": 13413588.0, |
|
"reward": 0.7426301956176757, |
|
"reward_std": 0.0905102699995041, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.6972119808197021, |
|
"rewards/qatch_metrics/std": 0.37120566368103025, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 428.6, |
|
"completions/max_terminated_length": 428.6, |
|
"completions/mean_length": 204.6640625, |
|
"completions/mean_terminated_length": 204.6640625, |
|
"completions/min_length": 75.6, |
|
"completions/min_terminated_length": 75.6, |
|
"epoch": 0.16747465843984133, |
|
"grad_norm": 0.2525985499058037, |
|
"kl": 0.0056243896484375, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0012, |
|
"num_tokens": 14111606.0, |
|
"reward": 0.7979554295539856, |
|
"reward_std": 0.06609301418066024, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7623119950294495, |
|
"rewards/qatch_metrics/std": 0.34469759464263916, |
|
"rewards/tag_count_reward/mean": 0.9998046875, |
|
"rewards/tag_count_reward/std": 0.003125, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 431.0, |
|
"completions/max_terminated_length": 431.0, |
|
"completions/mean_length": 212.34765625, |
|
"completions/mean_terminated_length": 212.34765625, |
|
"completions/min_length": 69.2, |
|
"completions/min_terminated_length": 69.2, |
|
"epoch": 0.17628911414720141, |
|
"grad_norm": 0.30357672091416305, |
|
"kl": 0.0057861328125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0083, |
|
"num_tokens": 14876659.0, |
|
"reward": 0.7724857568740845, |
|
"reward_std": 0.09265935122966766, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7323476672172546, |
|
"rewards/qatch_metrics/std": 0.33567925691604616, |
|
"rewards/tag_count_reward/mean": 0.9998046875, |
|
"rewards/tag_count_reward/std": 0.003125, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 463.4, |
|
"completions/max_terminated_length": 463.4, |
|
"completions/mean_length": 216.46875, |
|
"completions/mean_terminated_length": 216.46875, |
|
"completions/min_length": 80.4, |
|
"completions/min_terminated_length": 80.4, |
|
"epoch": 0.18510356985456147, |
|
"grad_norm": 0.23780324977532238, |
|
"kl": 0.0056549072265625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0087, |
|
"num_tokens": 15600331.0, |
|
"reward": 0.7508906722068787, |
|
"reward_std": 0.0951332688331604, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7069302201271057, |
|
"rewards/qatch_metrics/std": 0.38108278512954713, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 442.4, |
|
"completions/max_terminated_length": 442.4, |
|
"completions/mean_length": 216.578125, |
|
"completions/mean_terminated_length": 216.578125, |
|
"completions/min_length": 80.2, |
|
"completions/min_terminated_length": 80.2, |
|
"epoch": 0.19391802556192156, |
|
"grad_norm": 0.21716869090526136, |
|
"kl": 0.0054229736328125, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0045, |
|
"num_tokens": 16326015.0, |
|
"reward": 0.8402611017227173, |
|
"reward_std": 0.05716411247849464, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8120718836784363, |
|
"rewards/qatch_metrics/std": 0.2929441839456558, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 428.2, |
|
"completions/max_terminated_length": 428.2, |
|
"completions/mean_length": 222.0265625, |
|
"completions/mean_terminated_length": 222.0265625, |
|
"completions/min_length": 78.0, |
|
"completions/min_terminated_length": 78.0, |
|
"epoch": 0.20273248126928162, |
|
"grad_norm": 0.22835452896575356, |
|
"kl": 0.0060882568359375, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0017, |
|
"num_tokens": 17091921.0, |
|
"reward": 0.8265595078468323, |
|
"reward_std": 0.07398260906338691, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7959523558616638, |
|
"rewards/qatch_metrics/std": 0.3277123510837555, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 459.8, |
|
"completions/max_terminated_length": 459.8, |
|
"completions/mean_length": 220.7453125, |
|
"completions/mean_terminated_length": 220.7453125, |
|
"completions/min_length": 87.0, |
|
"completions/min_terminated_length": 87.0, |
|
"epoch": 0.2115469369766417, |
|
"grad_norm": 0.22726862373109216, |
|
"kl": 0.006689453125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0043, |
|
"num_tokens": 17877371.0, |
|
"reward": 0.8397867679595947, |
|
"reward_std": 0.09087342023849487, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8115137934684753, |
|
"rewards/qatch_metrics/std": 0.3017837733030319, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 491.4, |
|
"completions/max_terminated_length": 491.4, |
|
"completions/mean_length": 225.2140625, |
|
"completions/mean_terminated_length": 225.2140625, |
|
"completions/min_length": 75.4, |
|
"completions/min_terminated_length": 75.4, |
|
"epoch": 0.22036139268400176, |
|
"grad_norm": 0.2004953082769917, |
|
"kl": 0.00776519775390625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0056, |
|
"num_tokens": 18623005.0, |
|
"reward": 0.8202541828155517, |
|
"reward_std": 0.07537120208144188, |
|
"rewards/format_reward/mean": 0.99921875, |
|
"rewards/format_reward/std": 0.0125, |
|
"rewards/qatch_metrics/mean": 0.7886492252349854, |
|
"rewards/qatch_metrics/std": 0.32776339948177335, |
|
"rewards/tag_count_reward/mean": 0.999609375, |
|
"rewards/tag_count_reward/std": 0.00625, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 456.8, |
|
"completions/max_terminated_length": 456.8, |
|
"completions/mean_length": 223.48203125, |
|
"completions/mean_terminated_length": 223.48203125, |
|
"completions/min_length": 78.2, |
|
"completions/min_terminated_length": 78.2, |
|
"epoch": 0.22917584839136185, |
|
"grad_norm": 0.2341532579835068, |
|
"kl": 0.00804290771484375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0096, |
|
"num_tokens": 19349606.0, |
|
"reward": 0.8026262044906616, |
|
"reward_std": 0.06839245334267616, |
|
"rewards/format_reward/mean": 0.99921875, |
|
"rewards/format_reward/std": 0.0125, |
|
"rewards/qatch_metrics/mean": 0.7679218888282776, |
|
"rewards/qatch_metrics/std": 0.3324147403240204, |
|
"rewards/tag_count_reward/mean": 0.9994140625, |
|
"rewards/tag_count_reward/std": 0.009375, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 458.4, |
|
"completions/max_terminated_length": 458.4, |
|
"completions/mean_length": 216.72578125, |
|
"completions/mean_terminated_length": 216.72578125, |
|
"completions/min_length": 86.2, |
|
"completions/min_terminated_length": 86.2, |
|
"epoch": 0.2379903040987219, |
|
"grad_norm": 0.23655650548465582, |
|
"kl": 0.0078033447265625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0007, |
|
"num_tokens": 20092311.0, |
|
"reward": 0.8197526335716248, |
|
"reward_std": 0.0839143767952919, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7879442930221557, |
|
"rewards/qatch_metrics/std": 0.3431123554706573, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 454.6, |
|
"completions/max_terminated_length": 454.6, |
|
"completions/mean_length": 204.48984375, |
|
"completions/mean_terminated_length": 204.48984375, |
|
"completions/min_length": 79.0, |
|
"completions/min_terminated_length": 79.0, |
|
"epoch": 0.24680475980608196, |
|
"grad_norm": 0.2641797202959811, |
|
"kl": 0.00862884521484375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0051, |
|
"num_tokens": 20821962.0, |
|
"reward": 0.8242111682891846, |
|
"reward_std": 0.07407020255923272, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7931895971298217, |
|
"rewards/qatch_metrics/std": 0.3176054835319519, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 443.2, |
|
"completions/max_terminated_length": 443.2, |
|
"completions/mean_length": 203.590625, |
|
"completions/mean_terminated_length": 203.590625, |
|
"completions/min_length": 86.6, |
|
"completions/min_terminated_length": 86.6, |
|
"epoch": 0.255619215513442, |
|
"grad_norm": 0.263066002535131, |
|
"kl": 0.009637451171875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0065, |
|
"num_tokens": 21526046.0, |
|
"reward": 0.7875781059265137, |
|
"reward_std": 0.09901705384254456, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7501148462295533, |
|
"rewards/qatch_metrics/std": 0.3672972857952118, |
|
"rewards/tag_count_reward/mean": 0.999609375, |
|
"rewards/tag_count_reward/std": 0.00625, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 448.4, |
|
"completions/max_terminated_length": 448.4, |
|
"completions/mean_length": 208.90546875, |
|
"completions/mean_terminated_length": 208.90546875, |
|
"completions/min_length": 73.2, |
|
"completions/min_terminated_length": 73.2, |
|
"epoch": 0.26443367122080214, |
|
"grad_norm": 0.2798500312218402, |
|
"kl": 0.01026153564453125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0003, |
|
"num_tokens": 22271333.0, |
|
"reward": 0.818337082862854, |
|
"reward_std": 0.07784928977489472, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7862788915634156, |
|
"rewards/qatch_metrics/std": 0.3341992735862732, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 494.2, |
|
"completions/max_terminated_length": 494.2, |
|
"completions/mean_length": 209.651953125, |
|
"completions/mean_terminated_length": 209.651953125, |
|
"completions/min_length": 73.0, |
|
"completions/min_terminated_length": 73.0, |
|
"epoch": 0.5464962538563244, |
|
"grad_norm": 0.2122029879190087, |
|
"kl": 0.010993194580078126, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0126, |
|
"num_tokens": 23726666.0, |
|
"reward": 0.811666476726532, |
|
"reward_std": 0.0841904804110527, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7784311413764954, |
|
"rewards/qatch_metrics/std": 0.32770459055900575, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 452.6, |
|
"completions/max_terminated_length": 452.6, |
|
"completions/mean_length": 217.9859375, |
|
"completions/mean_terminated_length": 217.9859375, |
|
"completions/min_length": 75.8, |
|
"completions/min_terminated_length": 75.8, |
|
"epoch": 0.5641251652710445, |
|
"grad_norm": 0.15403477284537095, |
|
"kl": 0.00980377197265625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0008, |
|
"num_tokens": 25239750.0, |
|
"reward": 0.7868865132331848, |
|
"reward_std": 0.07244862839579583, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7492782354354859, |
|
"rewards/qatch_metrics/std": 0.3493395745754242, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 511.0, |
|
"completions/max_terminated_length": 511.0, |
|
"completions/mean_length": 208.38984375, |
|
"completions/mean_terminated_length": 208.38984375, |
|
"completions/min_length": 58.4, |
|
"completions/min_terminated_length": 58.4, |
|
"epoch": 0.5817540766857646, |
|
"grad_norm": 0.18706575889421317, |
|
"kl": 0.00914154052734375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0072, |
|
"num_tokens": 26687596.0, |
|
"reward": 0.828769075870514, |
|
"reward_std": 0.07729479111731052, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7985518336296081, |
|
"rewards/qatch_metrics/std": 0.29670341312885284, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 505.0, |
|
"completions/max_terminated_length": 505.0, |
|
"completions/mean_length": 206.281640625, |
|
"completions/mean_terminated_length": 206.281640625, |
|
"completions/min_length": 79.0, |
|
"completions/min_terminated_length": 79.0, |
|
"epoch": 0.5993829881004848, |
|
"grad_norm": 0.19776450858978561, |
|
"kl": 0.01090240478515625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0105, |
|
"num_tokens": 28175773.0, |
|
"reward": 0.8511051416397095, |
|
"reward_std": 0.07431531846523284, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8248295664787293, |
|
"rewards/qatch_metrics/std": 0.3192874014377594, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 462.2, |
|
"completions/max_terminated_length": 462.2, |
|
"completions/mean_length": 219.3890625, |
|
"completions/mean_terminated_length": 219.3890625, |
|
"completions/min_length": 70.8, |
|
"completions/min_terminated_length": 70.8, |
|
"epoch": 0.617011899515205, |
|
"grad_norm": 0.15290022120008429, |
|
"kl": 0.01065216064453125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0047, |
|
"num_tokens": 29739969.0, |
|
"reward": 0.8426113128662109, |
|
"reward_std": 0.09004694148898125, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.814836847782135, |
|
"rewards/qatch_metrics/std": 0.309688937664032, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 458.8, |
|
"completions/max_terminated_length": 458.8, |
|
"completions/mean_length": 211.655078125, |
|
"completions/mean_terminated_length": 211.655078125, |
|
"completions/min_length": 73.4, |
|
"completions/min_terminated_length": 73.4, |
|
"epoch": 0.6346408109299251, |
|
"grad_norm": 0.17923424569681315, |
|
"kl": 0.0114501953125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.01, |
|
"num_tokens": 31191502.0, |
|
"reward": 0.8262084484100342, |
|
"reward_std": 0.08637549504637718, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7955393195152283, |
|
"rewards/qatch_metrics/std": 0.3134327620267868, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 499.2, |
|
"completions/max_terminated_length": 499.2, |
|
"completions/mean_length": 215.95546875, |
|
"completions/mean_terminated_length": 215.95546875, |
|
"completions/min_length": 78.0, |
|
"completions/min_terminated_length": 78.0, |
|
"epoch": 0.6522697223446452, |
|
"grad_norm": 0.1321015357675111, |
|
"kl": 0.012237548828125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0021, |
|
"num_tokens": 32694108.0, |
|
"reward": 0.7994898676872253, |
|
"reward_std": 0.08254800513386726, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.764105749130249, |
|
"rewards/qatch_metrics/std": 0.3532308578491211, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 442.2, |
|
"completions/max_terminated_length": 442.2, |
|
"completions/mean_length": 209.90078125, |
|
"completions/mean_terminated_length": 209.90078125, |
|
"completions/min_length": 76.6, |
|
"completions/min_terminated_length": 76.6, |
|
"epoch": 0.6698986337593653, |
|
"grad_norm": 0.22256806005967145, |
|
"kl": 0.01057586669921875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0013, |
|
"num_tokens": 34144670.0, |
|
"reward": 0.7911163926124573, |
|
"reward_std": 0.06518566869199276, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7542545795440674, |
|
"rewards/qatch_metrics/std": 0.35398219227790834, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 476.2, |
|
"completions/max_terminated_length": 476.2, |
|
"completions/mean_length": 208.534765625, |
|
"completions/mean_terminated_length": 208.534765625, |
|
"completions/min_length": 77.8, |
|
"completions/min_terminated_length": 77.8, |
|
"epoch": 0.6875275451740855, |
|
"grad_norm": 0.17237028945675698, |
|
"kl": 0.0087860107421875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0069, |
|
"num_tokens": 35620023.0, |
|
"reward": 0.8418472170829773, |
|
"reward_std": 0.08243692219257355, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8139379024505615, |
|
"rewards/qatch_metrics/std": 0.336453515291214, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 514.6, |
|
"completions/max_terminated_length": 514.6, |
|
"completions/mean_length": 217.3328125, |
|
"completions/mean_terminated_length": 217.3328125, |
|
"completions/min_length": 90.4, |
|
"completions/min_terminated_length": 90.4, |
|
"epoch": 0.7051564565888057, |
|
"grad_norm": 0.19274445010407998, |
|
"kl": 0.009130859375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0053, |
|
"num_tokens": 37166635.0, |
|
"reward": 0.8295193314552307, |
|
"reward_std": 0.06927115023136139, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7994345307350159, |
|
"rewards/qatch_metrics/std": 0.3011426508426666, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 499.8, |
|
"completions/max_terminated_length": 499.8, |
|
"completions/mean_length": 212.651171875, |
|
"completions/mean_terminated_length": 212.651171875, |
|
"completions/min_length": 68.6, |
|
"completions/min_terminated_length": 68.6, |
|
"epoch": 0.7227853680035258, |
|
"grad_norm": 0.13990900967805797, |
|
"kl": 0.0087432861328125, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0027, |
|
"num_tokens": 38617966.0, |
|
"reward": 0.8151894211769104, |
|
"reward_std": 0.07495353966951371, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7825757980346679, |
|
"rewards/qatch_metrics/std": 0.33874245882034304, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 205 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 560.4, |
|
"completions/max_terminated_length": 560.4, |
|
"completions/mean_length": 223.7015625, |
|
"completions/mean_terminated_length": 223.7015625, |
|
"completions/min_length": 74.6, |
|
"completions/min_terminated_length": 74.6, |
|
"epoch": 0.7404142794182459, |
|
"grad_norm": 0.20163985914598806, |
|
"kl": 0.00806884765625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0054, |
|
"num_tokens": 40092050.0, |
|
"reward": 0.8460610270500183, |
|
"reward_std": 0.05867695920169354, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8188953161239624, |
|
"rewards/qatch_metrics/std": 0.3239317536354065, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 486.6, |
|
"completions/max_terminated_length": 486.6, |
|
"completions/mean_length": 215.6828125, |
|
"completions/mean_terminated_length": 215.6828125, |
|
"completions/min_length": 82.2, |
|
"completions/min_terminated_length": 82.2, |
|
"epoch": 0.7580431908329661, |
|
"grad_norm": 0.17564998217230318, |
|
"kl": 0.009525299072265625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0034, |
|
"num_tokens": 41565542.0, |
|
"reward": 0.799136507511139, |
|
"reward_std": 0.06419738680124283, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7636899828910828, |
|
"rewards/qatch_metrics/std": 0.3342160403728485, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 215 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 503.4, |
|
"completions/max_terminated_length": 503.4, |
|
"completions/mean_length": 233.409765625, |
|
"completions/mean_terminated_length": 233.409765625, |
|
"completions/min_length": 91.0, |
|
"completions/min_terminated_length": 91.0, |
|
"epoch": 0.7756721022476862, |
|
"grad_norm": 0.19283324501226842, |
|
"kl": 0.009130096435546875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0109, |
|
"num_tokens": 43081919.0, |
|
"reward": 0.7851791024208069, |
|
"reward_std": 0.07570808604359627, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7472695469856262, |
|
"rewards/qatch_metrics/std": 0.36822828054428103, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 448.4, |
|
"completions/max_terminated_length": 448.4, |
|
"completions/mean_length": 224.728125, |
|
"completions/mean_terminated_length": 224.728125, |
|
"completions/min_length": 81.0, |
|
"completions/min_terminated_length": 81.0, |
|
"epoch": 0.7933010136624064, |
|
"grad_norm": 0.17754847688569442, |
|
"kl": 0.009470367431640625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.002, |
|
"num_tokens": 44606439.0, |
|
"reward": 0.8152384400367737, |
|
"reward_std": 0.09764492362737656, |
|
"rewards/format_reward/mean": 0.999609375, |
|
"rewards/format_reward/std": 0.00883883461356163, |
|
"rewards/qatch_metrics/mean": 0.7826851725578308, |
|
"rewards/qatch_metrics/std": 0.3263732075691223, |
|
"rewards/tag_count_reward/mean": 0.99990234375, |
|
"rewards/tag_count_reward/std": 0.0022097086533904076, |
|
"step": 225 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 503.2, |
|
"completions/max_terminated_length": 503.2, |
|
"completions/mean_length": 218.58203125, |
|
"completions/mean_terminated_length": 218.58203125, |
|
"completions/min_length": 69.6, |
|
"completions/min_terminated_length": 69.6, |
|
"epoch": 0.8109299250771265, |
|
"grad_norm": 0.19017267970498908, |
|
"kl": 0.009508514404296875, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0009, |
|
"num_tokens": 46095257.0, |
|
"reward": 0.8068280577659607, |
|
"reward_std": 0.0781441181898117, |
|
"rewards/format_reward/mean": 0.999609375, |
|
"rewards/format_reward/std": 0.00883883461356163, |
|
"rewards/qatch_metrics/mean": 0.7728020906448364, |
|
"rewards/qatch_metrics/std": 0.3386655867099762, |
|
"rewards/tag_count_reward/mean": 0.99970703125, |
|
"rewards/tag_count_reward/std": 0.006629125773906707, |
|
"step": 230 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 456.6, |
|
"completions/max_terminated_length": 456.6, |
|
"completions/mean_length": 204.84375, |
|
"completions/mean_terminated_length": 204.84375, |
|
"completions/min_length": 72.6, |
|
"completions/min_terminated_length": 72.6, |
|
"epoch": 0.8285588364918466, |
|
"grad_norm": 0.1678878918468119, |
|
"kl": 0.009729766845703125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0041, |
|
"num_tokens": 47519433.0, |
|
"reward": 0.8672606706619262, |
|
"reward_std": 0.0644603468477726, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8438360691070557, |
|
"rewards/qatch_metrics/std": 0.2717843741178513, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 235 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 526.2, |
|
"completions/max_terminated_length": 526.2, |
|
"completions/mean_length": 214.90625, |
|
"completions/mean_terminated_length": 214.90625, |
|
"completions/min_length": 66.8, |
|
"completions/min_terminated_length": 66.8, |
|
"epoch": 0.8461877479065668, |
|
"grad_norm": 0.18169011669761398, |
|
"kl": 0.01288604736328125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0035, |
|
"num_tokens": 48943993.0, |
|
"reward": 0.8558493018150329, |
|
"reward_std": 0.07027828097343444, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8304109454154969, |
|
"rewards/qatch_metrics/std": 0.301141357421875, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 472.6, |
|
"completions/max_terminated_length": 472.6, |
|
"completions/mean_length": 212.559765625, |
|
"completions/mean_terminated_length": 212.559765625, |
|
"completions/min_length": 76.8, |
|
"completions/min_terminated_length": 76.8, |
|
"epoch": 0.8638166593212869, |
|
"grad_norm": 0.2046340854229955, |
|
"kl": 0.01494140625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.006, |
|
"num_tokens": 50416114.0, |
|
"reward": 0.831060528755188, |
|
"reward_std": 0.07754805404692888, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8012476563453674, |
|
"rewards/qatch_metrics/std": 0.3293557226657867, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 245 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 503.2, |
|
"completions/max_terminated_length": 503.2, |
|
"completions/mean_length": 222.1375, |
|
"completions/mean_terminated_length": 222.1375, |
|
"completions/min_length": 83.2, |
|
"completions/min_terminated_length": 83.2, |
|
"epoch": 0.881445570736007, |
|
"grad_norm": 0.15161264539796646, |
|
"kl": 0.0138031005859375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0018, |
|
"num_tokens": 51932274.0, |
|
"reward": 0.8422249555587769, |
|
"reward_std": 0.06234893724322319, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8143823027610779, |
|
"rewards/qatch_metrics/std": 0.2993943512439728, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 463.6, |
|
"completions/max_terminated_length": 463.6, |
|
"completions/mean_length": 231.19609375, |
|
"completions/mean_terminated_length": 231.19609375, |
|
"completions/min_length": 77.4, |
|
"completions/min_terminated_length": 77.4, |
|
"epoch": 0.8990744821507272, |
|
"grad_norm": 0.20035266636054513, |
|
"kl": 0.011871337890625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0003, |
|
"num_tokens": 53450248.0, |
|
"reward": 0.8096501588821411, |
|
"reward_std": 0.06698438860476016, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7760589838027954, |
|
"rewards/qatch_metrics/std": 0.3199191153049469, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 471.2, |
|
"completions/max_terminated_length": 471.2, |
|
"completions/mean_length": 237.80234375, |
|
"completions/mean_terminated_length": 237.80234375, |
|
"completions/min_length": 82.2, |
|
"completions/min_terminated_length": 82.2, |
|
"epoch": 0.9167033935654474, |
|
"grad_norm": 0.0856229450795828, |
|
"kl": 0.011614227294921875, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0017, |
|
"num_tokens": 54970542.0, |
|
"reward": 0.8725608706474304, |
|
"reward_std": 0.051827043667435645, |
|
"rewards/format_reward/mean": 0.999609375, |
|
"rewards/format_reward/std": 0.00883883461356163, |
|
"rewards/qatch_metrics/mean": 0.8501232981681823, |
|
"rewards/qatch_metrics/std": 0.26386110931634904, |
|
"rewards/tag_count_reward/mean": 0.99990234375, |
|
"rewards/tag_count_reward/std": 0.0022097086533904076, |
|
"step": 260 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 476.6, |
|
"completions/max_terminated_length": 476.6, |
|
"completions/mean_length": 231.53671875, |
|
"completions/mean_terminated_length": 231.53671875, |
|
"completions/min_length": 79.0, |
|
"completions/min_terminated_length": 79.0, |
|
"epoch": 0.9343323049801675, |
|
"grad_norm": 0.17178453068271043, |
|
"kl": 0.010117340087890624, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0063, |
|
"num_tokens": 56485356.0, |
|
"reward": 0.8532873392105103, |
|
"reward_std": 0.07009301483631133, |
|
"rewards/format_reward/mean": 0.999609375, |
|
"rewards/format_reward/std": 0.00883883461356163, |
|
"rewards/qatch_metrics/mean": 0.8274485826492309, |
|
"rewards/qatch_metrics/std": 0.31240676045417787, |
|
"rewards/tag_count_reward/mean": 0.99990234375, |
|
"rewards/tag_count_reward/std": 0.0022097086533904076, |
|
"step": 265 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 459.6, |
|
"completions/max_terminated_length": 459.6, |
|
"completions/mean_length": 220.95234375, |
|
"completions/mean_terminated_length": 220.95234375, |
|
"completions/min_length": 68.6, |
|
"completions/min_terminated_length": 68.6, |
|
"epoch": 0.9519612163948876, |
|
"grad_norm": 0.15364550208264494, |
|
"kl": 0.00984039306640625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0031, |
|
"num_tokens": 57953010.0, |
|
"reward": 0.868242597579956, |
|
"reward_std": 0.06916632130742073, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8449912905693054, |
|
"rewards/qatch_metrics/std": 0.2899660974740982, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 423.2, |
|
"completions/max_terminated_length": 423.2, |
|
"completions/mean_length": 225.621875, |
|
"completions/mean_terminated_length": 225.621875, |
|
"completions/min_length": 88.8, |
|
"completions/min_terminated_length": 88.8, |
|
"epoch": 0.48479506390480387, |
|
"grad_norm": 0.17697767584196022, |
|
"kl": 0.00970916748046875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0059, |
|
"num_tokens": 58736110.0, |
|
"reward": 0.8460039258003235, |
|
"reward_std": 0.055821475386619565, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8188281297683716, |
|
"rewards/qatch_metrics/std": 0.30660555958747865, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 504.0, |
|
"completions/max_terminated_length": 504.0, |
|
"completions/mean_length": 223.92734375, |
|
"completions/mean_terminated_length": 223.92734375, |
|
"completions/min_length": 77.8, |
|
"completions/min_terminated_length": 77.8, |
|
"epoch": 0.4936095196121639, |
|
"grad_norm": 0.2692630701899735, |
|
"kl": 0.0131378173828125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0021, |
|
"num_tokens": 59498897.0, |
|
"reward": 0.7988754034042358, |
|
"reward_std": 0.08376505076885224, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7633828163146973, |
|
"rewards/qatch_metrics/std": 0.3335907101631165, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 434.8, |
|
"completions/max_terminated_length": 434.8, |
|
"completions/mean_length": 223.48125, |
|
"completions/mean_terminated_length": 223.48125, |
|
"completions/min_length": 83.2, |
|
"completions/min_terminated_length": 83.2, |
|
"epoch": 0.502423975319524, |
|
"grad_norm": 0.2666009697829767, |
|
"kl": 0.0107269287109375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0007, |
|
"num_tokens": 60277897.0, |
|
"reward": 0.7720089554786682, |
|
"reward_std": 0.0594131164252758, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7317867279052734, |
|
"rewards/qatch_metrics/std": 0.33845625519752504, |
|
"rewards/tag_count_reward/mean": 0.9998046875, |
|
"rewards/tag_count_reward/std": 0.003125, |
|
"step": 285 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 481.0, |
|
"completions/max_terminated_length": 481.0, |
|
"completions/mean_length": 219.62421875, |
|
"completions/mean_terminated_length": 219.62421875, |
|
"completions/min_length": 91.0, |
|
"completions/min_terminated_length": 91.0, |
|
"epoch": 0.511238431026884, |
|
"grad_norm": 0.16876063412105669, |
|
"kl": 0.01141357421875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0033, |
|
"num_tokens": 61033560.0, |
|
"reward": 0.7902166962623596, |
|
"reward_std": 0.0687429528683424, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7531961083412171, |
|
"rewards/qatch_metrics/std": 0.37054654359817507, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 471.2, |
|
"completions/max_terminated_length": 471.2, |
|
"completions/mean_length": 226.275, |
|
"completions/mean_terminated_length": 226.275, |
|
"completions/min_length": 80.6, |
|
"completions/min_terminated_length": 80.6, |
|
"epoch": 0.5200528867342442, |
|
"grad_norm": 0.26818466602074054, |
|
"kl": 0.0130706787109375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0004, |
|
"num_tokens": 61786008.0, |
|
"reward": 0.7699209451675415, |
|
"reward_std": 0.07550354823470115, |
|
"rewards/format_reward/mean": 0.99921875, |
|
"rewards/format_reward/std": 0.0125, |
|
"rewards/qatch_metrics/mean": 0.7294221520423889, |
|
"rewards/qatch_metrics/std": 0.3492735385894775, |
|
"rewards/tag_count_reward/mean": 0.9998046875, |
|
"rewards/tag_count_reward/std": 0.003125, |
|
"step": 295 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 484.6, |
|
"completions/max_terminated_length": 484.6, |
|
"completions/mean_length": 247.2328125, |
|
"completions/mean_terminated_length": 247.2328125, |
|
"completions/min_length": 95.8, |
|
"completions/min_terminated_length": 95.8, |
|
"epoch": 0.5288673424416043, |
|
"grad_norm": 0.16485515882678206, |
|
"kl": 0.0113006591796875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0007, |
|
"num_tokens": 62590434.0, |
|
"reward": 0.8454334974288941, |
|
"reward_std": 0.0570029616355896, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8181570172309875, |
|
"rewards/qatch_metrics/std": 0.2992805689573288, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 480.6, |
|
"completions/max_terminated_length": 480.6, |
|
"completions/mean_length": 235.16015625, |
|
"completions/mean_terminated_length": 235.16015625, |
|
"completions/min_length": 88.0, |
|
"completions/min_terminated_length": 88.0, |
|
"epoch": 0.5376817981489643, |
|
"grad_norm": 0.27561378534620606, |
|
"kl": 0.01141510009765625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0097, |
|
"num_tokens": 63366287.0, |
|
"reward": 0.8380108118057251, |
|
"reward_std": 0.07530387155711651, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8094244718551635, |
|
"rewards/qatch_metrics/std": 0.30977231860160825, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 305 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 458.6, |
|
"completions/max_terminated_length": 458.6, |
|
"completions/mean_length": 215.646875, |
|
"completions/mean_terminated_length": 215.646875, |
|
"completions/min_length": 79.4, |
|
"completions/min_terminated_length": 79.4, |
|
"epoch": 0.5464962538563244, |
|
"grad_norm": 0.2018916915779266, |
|
"kl": 0.013714599609375, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0045, |
|
"num_tokens": 64097387.0, |
|
"reward": 0.8135073184967041, |
|
"reward_std": 0.05950811579823494, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7806198120117187, |
|
"rewards/qatch_metrics/std": 0.33523867428302767, |
|
"rewards/tag_count_reward/mean": 0.999609375, |
|
"rewards/tag_count_reward/std": 0.00625, |
|
"step": 310 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 447.4, |
|
"completions/max_terminated_length": 447.4, |
|
"completions/mean_length": 223.68515625, |
|
"completions/mean_terminated_length": 223.68515625, |
|
"completions/min_length": 92.4, |
|
"completions/min_terminated_length": 92.4, |
|
"epoch": 0.5553107095636844, |
|
"grad_norm": 0.1836962735356692, |
|
"kl": 0.0138214111328125, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0024, |
|
"num_tokens": 64869416.0, |
|
"reward": 0.8333834052085877, |
|
"reward_std": 0.07006162852048874, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8039804816246032, |
|
"rewards/qatch_metrics/std": 0.3219245493412018, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 315 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 504.8, |
|
"completions/max_terminated_length": 504.8, |
|
"completions/mean_length": 221.45390625, |
|
"completions/mean_terminated_length": 221.45390625, |
|
"completions/min_length": 80.0, |
|
"completions/min_terminated_length": 80.0, |
|
"epoch": 0.5641251652710445, |
|
"grad_norm": 0.23250178423343035, |
|
"kl": 0.01497802734375, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0014, |
|
"num_tokens": 65613165.0, |
|
"reward": 0.8320096850395202, |
|
"reward_std": 0.053499556705355646, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8023643255233764, |
|
"rewards/qatch_metrics/std": 0.3343039393424988, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 466.6, |
|
"completions/max_terminated_length": 466.6, |
|
"completions/mean_length": 220.3984375, |
|
"completions/mean_terminated_length": 220.3984375, |
|
"completions/min_length": 72.6, |
|
"completions/min_terminated_length": 72.6, |
|
"epoch": 0.5729396209784046, |
|
"grad_norm": 0.09740281424559781, |
|
"kl": 0.0155609130859375, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0012, |
|
"num_tokens": 66336475.0, |
|
"reward": 0.8796087980270386, |
|
"reward_std": 0.05236431676894426, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8583632946014405, |
|
"rewards/qatch_metrics/std": 0.2817832052707672, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 459.4, |
|
"completions/max_terminated_length": 459.4, |
|
"completions/mean_length": 225.27890625, |
|
"completions/mean_terminated_length": 225.27890625, |
|
"completions/min_length": 78.6, |
|
"completions/min_terminated_length": 78.6, |
|
"epoch": 0.5817540766857646, |
|
"grad_norm": 0.08354955287926201, |
|
"kl": 0.01513671875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0095, |
|
"num_tokens": 67098736.0, |
|
"reward": 0.8658102512359619, |
|
"reward_std": 0.07466748803853988, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8421296954154969, |
|
"rewards/qatch_metrics/std": 0.2614422976970673, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 447.2, |
|
"completions/max_terminated_length": 447.2, |
|
"completions/mean_length": 220.01875, |
|
"completions/mean_terminated_length": 220.01875, |
|
"completions/min_length": 81.8, |
|
"completions/min_terminated_length": 81.8, |
|
"epoch": 0.5905685323931247, |
|
"grad_norm": 0.20574209747901576, |
|
"kl": 0.015081787109375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.011, |
|
"num_tokens": 67847928.0, |
|
"reward": 0.865822184085846, |
|
"reward_std": 0.046268445625901225, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8421437621116639, |
|
"rewards/qatch_metrics/std": 0.29589260220527647, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 335 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 432.0, |
|
"completions/max_terminated_length": 432.0, |
|
"completions/mean_length": 213.5953125, |
|
"completions/mean_terminated_length": 213.5953125, |
|
"completions/min_length": 76.0, |
|
"completions/min_terminated_length": 76.0, |
|
"epoch": 0.5993829881004848, |
|
"grad_norm": 0.2039975034177896, |
|
"kl": 0.0161651611328125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0066, |
|
"num_tokens": 68585234.0, |
|
"reward": 0.8343551635742188, |
|
"reward_std": 0.0688902921974659, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8051237106323242, |
|
"rewards/qatch_metrics/std": 0.30847290754318235, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 435.0, |
|
"completions/max_terminated_length": 435.0, |
|
"completions/mean_length": 203.69453125, |
|
"completions/mean_terminated_length": 203.69453125, |
|
"completions/min_length": 76.0, |
|
"completions/min_terminated_length": 76.0, |
|
"epoch": 0.6081974438078449, |
|
"grad_norm": 0.26848084439203446, |
|
"kl": 0.014788818359375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0023, |
|
"num_tokens": 69338379.0, |
|
"reward": 0.8848124146461487, |
|
"reward_std": 0.06373886093497276, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8644851684570313, |
|
"rewards/qatch_metrics/std": 0.26705425381660464, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 345 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 446.8, |
|
"completions/max_terminated_length": 446.8, |
|
"completions/mean_length": 221.1046875, |
|
"completions/mean_terminated_length": 221.1046875, |
|
"completions/min_length": 79.4, |
|
"completions/min_terminated_length": 79.4, |
|
"epoch": 0.617011899515205, |
|
"grad_norm": 0.2363792510293019, |
|
"kl": 0.019024658203125, |
|
"learning_rate": 1e-06, |
|
"loss": -0.002, |
|
"num_tokens": 70095473.0, |
|
"reward": 0.8130708336830139, |
|
"reward_std": 0.08477363213896752, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7800833344459533, |
|
"rewards/qatch_metrics/std": 0.3211198329925537, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 458.4, |
|
"completions/max_terminated_length": 458.4, |
|
"completions/mean_length": 234.00078125, |
|
"completions/mean_terminated_length": 234.00078125, |
|
"completions/min_length": 91.4, |
|
"completions/min_terminated_length": 91.4, |
|
"epoch": 0.625826355222565, |
|
"grad_norm": 0.1856420640121193, |
|
"kl": 0.019122314453125, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0036, |
|
"num_tokens": 70860290.0, |
|
"reward": 0.8471660256385803, |
|
"reward_std": 0.0506692998111248, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8201953172683716, |
|
"rewards/qatch_metrics/std": 0.30663308799266814, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 355 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 452.8, |
|
"completions/max_terminated_length": 452.8, |
|
"completions/mean_length": 241.72421875, |
|
"completions/mean_terminated_length": 241.72421875, |
|
"completions/min_length": 88.0, |
|
"completions/min_terminated_length": 88.0, |
|
"epoch": 0.6346408109299251, |
|
"grad_norm": 0.22939974521057024, |
|
"kl": 0.01826171875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0127, |
|
"num_tokens": 71648401.0, |
|
"reward": 0.8702264785766601, |
|
"reward_std": 0.0592925101518631, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8473252534866333, |
|
"rewards/qatch_metrics/std": 0.28537269234657286, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 434.6, |
|
"completions/max_terminated_length": 434.6, |
|
"completions/mean_length": 215.853125, |
|
"completions/mean_terminated_length": 215.853125, |
|
"completions/min_length": 75.0, |
|
"completions/min_terminated_length": 75.0, |
|
"epoch": 0.6434552666372851, |
|
"grad_norm": 0.19883621919511643, |
|
"kl": 0.0163330078125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0073, |
|
"num_tokens": 72382693.0, |
|
"reward": 0.8091506719589233, |
|
"reward_std": 0.0635421834886074, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7754713773727417, |
|
"rewards/qatch_metrics/std": 0.3179103255271912, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 365 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 496.8, |
|
"completions/max_terminated_length": 496.8, |
|
"completions/mean_length": 213.1640625, |
|
"completions/mean_terminated_length": 213.1640625, |
|
"completions/min_length": 76.2, |
|
"completions/min_terminated_length": 76.2, |
|
"epoch": 0.6522697223446452, |
|
"grad_norm": 0.1916457590662772, |
|
"kl": 0.0175506591796875, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0029, |
|
"num_tokens": 73161111.0, |
|
"reward": 0.8094798445701599, |
|
"reward_std": 0.04875086285173893, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7758586168289184, |
|
"rewards/qatch_metrics/std": 0.32606661319732666, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 480.4, |
|
"completions/max_terminated_length": 480.4, |
|
"completions/mean_length": 222.75, |
|
"completions/mean_terminated_length": 222.75, |
|
"completions/min_length": 72.6, |
|
"completions/min_terminated_length": 72.6, |
|
"epoch": 0.6610841780520053, |
|
"grad_norm": 0.15787517122504152, |
|
"kl": 0.0181884765625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0018, |
|
"num_tokens": 73905591.0, |
|
"reward": 0.89048171043396, |
|
"reward_std": 0.04932568361982703, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8711549639701843, |
|
"rewards/qatch_metrics/std": 0.2736783862113953, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 561.4, |
|
"completions/max_terminated_length": 561.4, |
|
"completions/mean_length": 234.4390625, |
|
"completions/mean_terminated_length": 234.4390625, |
|
"completions/min_length": 75.2, |
|
"completions/min_terminated_length": 75.2, |
|
"epoch": 0.6698986337593653, |
|
"grad_norm": 0.2653930596733297, |
|
"kl": 0.0174713134765625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.003, |
|
"num_tokens": 74679801.0, |
|
"reward": 0.8243065714836121, |
|
"reward_std": 0.06958894729614258, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7933018207550049, |
|
"rewards/qatch_metrics/std": 0.3086866676807404, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 511.8, |
|
"completions/max_terminated_length": 511.8, |
|
"completions/mean_length": 243.73359375, |
|
"completions/mean_terminated_length": 243.73359375, |
|
"completions/min_length": 81.6, |
|
"completions/min_terminated_length": 81.6, |
|
"epoch": 0.6787130894667255, |
|
"grad_norm": 0.20233916054675122, |
|
"kl": 0.014093017578125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0048, |
|
"num_tokens": 75445892.0, |
|
"reward": 0.8653998494148254, |
|
"reward_std": 0.07132081612944603, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8416468739509583, |
|
"rewards/qatch_metrics/std": 0.3147186517715454, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 385 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 447.4, |
|
"completions/max_terminated_length": 447.4, |
|
"completions/mean_length": 228.43671875, |
|
"completions/mean_terminated_length": 228.43671875, |
|
"completions/min_length": 79.8, |
|
"completions/min_terminated_length": 79.8, |
|
"epoch": 0.6875275451740855, |
|
"grad_norm": 0.29996778931865303, |
|
"kl": 0.0146087646484375, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0016, |
|
"num_tokens": 76229251.0, |
|
"reward": 0.8502862334251404, |
|
"reward_std": 0.07314281612634659, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8238661646842956, |
|
"rewards/qatch_metrics/std": 0.3113024443387985, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 487.0, |
|
"completions/max_terminated_length": 487.0, |
|
"completions/mean_length": 249.8734375, |
|
"completions/mean_terminated_length": 249.8734375, |
|
"completions/min_length": 84.4, |
|
"completions/min_terminated_length": 84.4, |
|
"epoch": 0.6963420008814456, |
|
"grad_norm": 0.2150032953896288, |
|
"kl": 0.017156982421875, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0029, |
|
"num_tokens": 77021793.0, |
|
"reward": 0.8494030237197876, |
|
"reward_std": 0.05776047557592392, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8228270888328553, |
|
"rewards/qatch_metrics/std": 0.3020846724510193, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 395 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 471.4, |
|
"completions/max_terminated_length": 471.4, |
|
"completions/mean_length": 247.9828125, |
|
"completions/mean_terminated_length": 247.9828125, |
|
"completions/min_length": 84.4, |
|
"completions/min_terminated_length": 84.4, |
|
"epoch": 0.7051564565888057, |
|
"grad_norm": 0.2754041387856829, |
|
"kl": 0.0148590087890625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0065, |
|
"num_tokens": 77833451.0, |
|
"reward": 0.8363431453704834, |
|
"reward_std": 0.06054745838046074, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.807462501525879, |
|
"rewards/qatch_metrics/std": 0.29668720066547394, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 442.6, |
|
"completions/max_terminated_length": 442.6, |
|
"completions/mean_length": 225.4296875, |
|
"completions/mean_terminated_length": 225.4296875, |
|
"completions/min_length": 83.0, |
|
"completions/min_terminated_length": 83.0, |
|
"epoch": 0.7139709122961657, |
|
"grad_norm": 0.22420011771594078, |
|
"kl": 0.017706298828125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0019, |
|
"num_tokens": 78585793.0, |
|
"reward": 0.8382049560546875, |
|
"reward_std": 0.05150428526103497, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8096528768539428, |
|
"rewards/qatch_metrics/std": 0.2925006330013275, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 405 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 618.4, |
|
"completions/max_terminated_length": 618.4, |
|
"completions/mean_length": 219.3421875, |
|
"completions/mean_terminated_length": 219.3421875, |
|
"completions/min_length": 84.6, |
|
"completions/min_terminated_length": 84.6, |
|
"epoch": 0.7227853680035258, |
|
"grad_norm": 0.0986589707089894, |
|
"kl": 0.0170196533203125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0022, |
|
"num_tokens": 79352135.0, |
|
"reward": 0.8465274453163147, |
|
"reward_std": 0.05231629386544227, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8194440126419067, |
|
"rewards/qatch_metrics/std": 0.3004340440034866, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 470.6, |
|
"completions/max_terminated_length": 470.6, |
|
"completions/mean_length": 213.9921875, |
|
"completions/mean_terminated_length": 213.9921875, |
|
"completions/min_length": 83.6, |
|
"completions/min_terminated_length": 83.6, |
|
"epoch": 0.7315998237108858, |
|
"grad_norm": 0.17969166348358623, |
|
"kl": 0.01600341796875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0196, |
|
"num_tokens": 80093021.0, |
|
"reward": 0.7899853944778442, |
|
"reward_std": 0.06183199286460876, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7529239773750305, |
|
"rewards/qatch_metrics/std": 0.32831716537475586, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 415 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 458.4, |
|
"completions/max_terminated_length": 458.4, |
|
"completions/mean_length": 208.25390625, |
|
"completions/mean_terminated_length": 208.25390625, |
|
"completions/min_length": 72.6, |
|
"completions/min_terminated_length": 72.6, |
|
"epoch": 0.7404142794182459, |
|
"grad_norm": 0.12360613268228073, |
|
"kl": 0.0170166015625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0011, |
|
"num_tokens": 80810658.0, |
|
"reward": 0.8781363725662231, |
|
"reward_std": 0.04314489997923374, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8566309928894043, |
|
"rewards/qatch_metrics/std": 0.2832080274820328, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 447.2, |
|
"completions/max_terminated_length": 447.2, |
|
"completions/mean_length": 203.98046875, |
|
"completions/mean_terminated_length": 203.98046875, |
|
"completions/min_length": 79.2, |
|
"completions/min_terminated_length": 79.2, |
|
"epoch": 0.749228735125606, |
|
"grad_norm": 0.210810313322166, |
|
"kl": 0.0164581298828125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.003, |
|
"num_tokens": 81548361.0, |
|
"reward": 0.8270991563796997, |
|
"reward_std": 0.06941422820091248, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7965872406959533, |
|
"rewards/qatch_metrics/std": 0.33117216229438784, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 434.6, |
|
"completions/max_terminated_length": 434.6, |
|
"completions/mean_length": 220.75703125, |
|
"completions/mean_terminated_length": 220.75703125, |
|
"completions/min_length": 80.4, |
|
"completions/min_terminated_length": 80.4, |
|
"epoch": 0.7580431908329661, |
|
"grad_norm": 0.21910688267881026, |
|
"kl": 0.016754150390625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0012, |
|
"num_tokens": 82290706.0, |
|
"reward": 0.8464880228042603, |
|
"reward_std": 0.04884184449911118, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8193976640701294, |
|
"rewards/qatch_metrics/std": 0.28375020921230315, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 451.8, |
|
"completions/max_terminated_length": 451.8, |
|
"completions/mean_length": 223.1234375, |
|
"completions/mean_terminated_length": 223.1234375, |
|
"completions/min_length": 85.0, |
|
"completions/min_terminated_length": 85.0, |
|
"epoch": 0.7668576465403262, |
|
"grad_norm": 0.26253720274856984, |
|
"kl": 0.0178009033203125, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0023, |
|
"num_tokens": 83056976.0, |
|
"reward": 0.8096219301223755, |
|
"reward_std": 0.07494284212589264, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7760257959365845, |
|
"rewards/qatch_metrics/std": 0.3492628037929535, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 435 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 501.2, |
|
"completions/max_terminated_length": 501.2, |
|
"completions/mean_length": 216.840625, |
|
"completions/mean_terminated_length": 216.840625, |
|
"completions/min_length": 88.8, |
|
"completions/min_terminated_length": 88.8, |
|
"epoch": 0.7756721022476862, |
|
"grad_norm": 0.27647079947407377, |
|
"kl": 0.0181732177734375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0012, |
|
"num_tokens": 83805044.0, |
|
"reward": 0.7776495218276978, |
|
"reward_std": 0.056884029135108, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7384112119674683, |
|
"rewards/qatch_metrics/std": 0.3683965981006622, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 455.0, |
|
"completions/max_terminated_length": 455.0, |
|
"completions/mean_length": 216.46015625, |
|
"completions/mean_terminated_length": 216.46015625, |
|
"completions/min_length": 78.2, |
|
"completions/min_terminated_length": 78.2, |
|
"epoch": 0.7844865579550463, |
|
"grad_norm": 0.20996305667402082, |
|
"kl": 0.0163116455078125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0063, |
|
"num_tokens": 84571313.0, |
|
"reward": 0.8477118849754334, |
|
"reward_std": 0.06959039457142353, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8208375215530396, |
|
"rewards/qatch_metrics/std": 0.30095059871673585, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 445 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 443.4, |
|
"completions/max_terminated_length": 443.4, |
|
"completions/mean_length": 211.00234375, |
|
"completions/mean_terminated_length": 211.00234375, |
|
"completions/min_length": 86.2, |
|
"completions/min_terminated_length": 86.2, |
|
"epoch": 0.7933010136624064, |
|
"grad_norm": 0.15662206787116065, |
|
"kl": 0.0160797119140625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0004, |
|
"num_tokens": 85319188.0, |
|
"reward": 0.8328658938407898, |
|
"reward_std": 0.05801869332790375, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8033716320991516, |
|
"rewards/qatch_metrics/std": 0.3037038058042526, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 475.2, |
|
"completions/max_terminated_length": 475.2, |
|
"completions/mean_length": 222.7078125, |
|
"completions/mean_terminated_length": 222.7078125, |
|
"completions/min_length": 80.8, |
|
"completions/min_terminated_length": 80.8, |
|
"epoch": 0.8021154693697664, |
|
"grad_norm": 0.19919629119501958, |
|
"kl": 0.01639404296875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.001, |
|
"num_tokens": 86091774.0, |
|
"reward": 0.8358211517333984, |
|
"reward_std": 0.0607087716460228, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8068713665008544, |
|
"rewards/qatch_metrics/std": 0.30334635376930236, |
|
"rewards/tag_count_reward/mean": 0.999609375, |
|
"rewards/tag_count_reward/std": 0.00625, |
|
"step": 455 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 418.4, |
|
"completions/max_terminated_length": 418.4, |
|
"completions/mean_length": 221.0015625, |
|
"completions/mean_terminated_length": 221.0015625, |
|
"completions/min_length": 80.4, |
|
"completions/min_terminated_length": 80.4, |
|
"epoch": 0.8109299250771265, |
|
"grad_norm": 0.1419366062228353, |
|
"kl": 0.01617431640625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0031, |
|
"num_tokens": 86848528.0, |
|
"reward": 0.8028954148292542, |
|
"reward_std": 0.06934207193553447, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7681122660636902, |
|
"rewards/qatch_metrics/std": 0.3390295565128326, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 426.6, |
|
"completions/max_terminated_length": 426.6, |
|
"completions/mean_length": 210.6140625, |
|
"completions/mean_terminated_length": 210.6140625, |
|
"completions/min_length": 85.0, |
|
"completions/min_terminated_length": 85.0, |
|
"epoch": 0.8197443807844865, |
|
"grad_norm": 0.16116384181364513, |
|
"kl": 0.0162078857421875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.013, |
|
"num_tokens": 87564482.0, |
|
"reward": 0.8424649000167846, |
|
"reward_std": 0.040234316140413284, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8146645903587342, |
|
"rewards/qatch_metrics/std": 0.2840981811285019, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 465 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 447.6, |
|
"completions/max_terminated_length": 447.6, |
|
"completions/mean_length": 195.89140625, |
|
"completions/mean_terminated_length": 195.89140625, |
|
"completions/min_length": 80.8, |
|
"completions/min_terminated_length": 80.8, |
|
"epoch": 0.8285588364918466, |
|
"grad_norm": 0.21075371504226795, |
|
"kl": 0.0193115234375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.001, |
|
"num_tokens": 88255159.0, |
|
"reward": 0.8565711379051208, |
|
"reward_std": 0.06344871073961258, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8312601566314697, |
|
"rewards/qatch_metrics/std": 0.3075568675994873, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 445.4, |
|
"completions/max_terminated_length": 445.4, |
|
"completions/mean_length": 201.01484375, |
|
"completions/mean_terminated_length": 201.01484375, |
|
"completions/min_length": 85.0, |
|
"completions/min_terminated_length": 85.0, |
|
"epoch": 0.8373732921992068, |
|
"grad_norm": 0.27204162033836665, |
|
"kl": 0.019976806640625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0009, |
|
"num_tokens": 88945690.0, |
|
"reward": 0.8785177230834961, |
|
"reward_std": 0.06470721438527108, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8570796966552734, |
|
"rewards/qatch_metrics/std": 0.2825317859649658, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 497.4, |
|
"completions/max_terminated_length": 497.4, |
|
"completions/mean_length": 221.90859375, |
|
"completions/mean_terminated_length": 221.90859375, |
|
"completions/min_length": 81.2, |
|
"completions/min_terminated_length": 81.2, |
|
"epoch": 0.8461877479065668, |
|
"grad_norm": 0.19323853705899263, |
|
"kl": 0.0183746337890625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0041, |
|
"num_tokens": 89712373.0, |
|
"reward": 0.8555493712425232, |
|
"reward_std": 0.06230065375566483, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8300580739974975, |
|
"rewards/qatch_metrics/std": 0.28706649839878084, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 498.8, |
|
"completions/max_terminated_length": 498.8, |
|
"completions/mean_length": 228.7890625, |
|
"completions/mean_terminated_length": 228.7890625, |
|
"completions/min_length": 88.4, |
|
"completions/min_terminated_length": 88.4, |
|
"epoch": 0.8550022036139269, |
|
"grad_norm": 0.24770714763886528, |
|
"kl": 0.0176513671875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.004, |
|
"num_tokens": 90520071.0, |
|
"reward": 0.8527018785476684, |
|
"reward_std": 0.062195781618356705, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8267080783843994, |
|
"rewards/qatch_metrics/std": 0.2996180385351181, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 485 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 480.6, |
|
"completions/max_terminated_length": 480.6, |
|
"completions/mean_length": 227.66875, |
|
"completions/mean_terminated_length": 227.66875, |
|
"completions/min_length": 83.2, |
|
"completions/min_terminated_length": 83.2, |
|
"epoch": 0.8638166593212869, |
|
"grad_norm": 0.16162980170931898, |
|
"kl": 0.0188812255859375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0006, |
|
"num_tokens": 91278479.0, |
|
"reward": 0.8309607028961181, |
|
"reward_std": 0.0656251635402441, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8011302351951599, |
|
"rewards/qatch_metrics/std": 0.31802850365638735, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 502.4, |
|
"completions/max_terminated_length": 502.4, |
|
"completions/mean_length": 225.3171875, |
|
"completions/mean_terminated_length": 225.3171875, |
|
"completions/min_length": 80.4, |
|
"completions/min_terminated_length": 80.4, |
|
"epoch": 0.872631115028647, |
|
"grad_norm": 0.1886973597841831, |
|
"kl": 0.01859130859375, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0052, |
|
"num_tokens": 92033173.0, |
|
"reward": 0.8441248655319213, |
|
"reward_std": 0.043570340052247046, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8166174769401551, |
|
"rewards/qatch_metrics/std": 0.30278873145580293, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 495 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 443.6, |
|
"completions/max_terminated_length": 443.6, |
|
"completions/mean_length": 234.475, |
|
"completions/mean_terminated_length": 234.475, |
|
"completions/min_length": 99.8, |
|
"completions/min_terminated_length": 99.8, |
|
"epoch": 0.881445570736007, |
|
"grad_norm": 0.24444756963754977, |
|
"kl": 0.01798095703125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.007, |
|
"num_tokens": 92808293.0, |
|
"reward": 0.8517020106315613, |
|
"reward_std": 0.06295906975865365, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.825531804561615, |
|
"rewards/qatch_metrics/std": 0.3100520223379135, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 440.0, |
|
"completions/max_terminated_length": 440.0, |
|
"completions/mean_length": 215.0328125, |
|
"completions/mean_terminated_length": 215.0328125, |
|
"completions/min_length": 84.2, |
|
"completions/min_terminated_length": 84.2, |
|
"epoch": 0.8902600264433671, |
|
"grad_norm": 0.21103775626066984, |
|
"kl": 0.0171600341796875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0051, |
|
"num_tokens": 93563327.0, |
|
"reward": 0.8682243466377259, |
|
"reward_std": 0.04365142099559307, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8449697852134704, |
|
"rewards/qatch_metrics/std": 0.2696381151676178, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 505 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 463.4, |
|
"completions/max_terminated_length": 463.4, |
|
"completions/mean_length": 218.59453125, |
|
"completions/mean_terminated_length": 218.59453125, |
|
"completions/min_length": 77.0, |
|
"completions/min_terminated_length": 77.0, |
|
"epoch": 0.8990744821507272, |
|
"grad_norm": 0.20107359914643413, |
|
"kl": 0.016455078125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0086, |
|
"num_tokens": 94333288.0, |
|
"reward": 0.8064153909683227, |
|
"reward_std": 0.06192653328180313, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.772253406047821, |
|
"rewards/qatch_metrics/std": 0.3227865040302277, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 443.0, |
|
"completions/max_terminated_length": 443.0, |
|
"completions/mean_length": 206.5703125, |
|
"completions/mean_terminated_length": 206.5703125, |
|
"completions/min_length": 73.0, |
|
"completions/min_terminated_length": 73.0, |
|
"epoch": 0.9078889378580872, |
|
"grad_norm": 0.10741725097461949, |
|
"kl": 0.0163330078125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0056, |
|
"num_tokens": 95051890.0, |
|
"reward": 0.8839513182640075, |
|
"reward_std": 0.04564618114382028, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8634721517562867, |
|
"rewards/qatch_metrics/std": 0.24794530421495437, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 515 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 420.2, |
|
"completions/max_terminated_length": 420.2, |
|
"completions/mean_length": 193.584375, |
|
"completions/mean_terminated_length": 193.584375, |
|
"completions/min_length": 74.8, |
|
"completions/min_terminated_length": 74.8, |
|
"epoch": 0.9167033935654474, |
|
"grad_norm": 0.3417922303720187, |
|
"kl": 0.0196563720703125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0033, |
|
"num_tokens": 95755150.0, |
|
"reward": 0.8428452134132385, |
|
"reward_std": 0.05727057494223118, |
|
"rewards/format_reward/mean": 0.99921875, |
|
"rewards/format_reward/std": 0.0125, |
|
"rewards/qatch_metrics/mean": 0.8152039051055908, |
|
"rewards/qatch_metrics/std": 0.31376497745513915, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 404.8, |
|
"completions/max_terminated_length": 404.8, |
|
"completions/mean_length": 208.72890625, |
|
"completions/mean_terminated_length": 208.72890625, |
|
"completions/min_length": 72.2, |
|
"completions/min_terminated_length": 72.2, |
|
"epoch": 0.9255178492728074, |
|
"grad_norm": 0.17161657062686406, |
|
"kl": 0.0185943603515625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0023, |
|
"num_tokens": 96514835.0, |
|
"reward": 0.8597602009773254, |
|
"reward_std": 0.044371549785137174, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8350119948387146, |
|
"rewards/qatch_metrics/std": 0.295586758852005, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 426.8, |
|
"completions/max_terminated_length": 426.8, |
|
"completions/mean_length": 212.95859375, |
|
"completions/mean_terminated_length": 212.95859375, |
|
"completions/min_length": 77.0, |
|
"completions/min_terminated_length": 77.0, |
|
"epoch": 0.9343323049801675, |
|
"grad_norm": 0.22162383692372334, |
|
"kl": 0.0186981201171875, |
|
"learning_rate": 1e-06, |
|
"loss": -0.002, |
|
"num_tokens": 97270782.0, |
|
"reward": 0.8363440155982971, |
|
"reward_std": 0.06691965609788894, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8074635624885559, |
|
"rewards/qatch_metrics/std": 0.3064163327217102, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 454.6, |
|
"completions/max_terminated_length": 454.6, |
|
"completions/mean_length": 233.40234375, |
|
"completions/mean_terminated_length": 233.40234375, |
|
"completions/min_length": 76.4, |
|
"completions/min_terminated_length": 76.4, |
|
"epoch": 0.9431467606875276, |
|
"grad_norm": 0.1434511776519399, |
|
"kl": 0.019879150390625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0023, |
|
"num_tokens": 98016705.0, |
|
"reward": 0.8363542199134827, |
|
"reward_std": 0.05200971700251102, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8074755430221557, |
|
"rewards/qatch_metrics/std": 0.2885085940361023, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 535 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 444.6, |
|
"completions/max_terminated_length": 444.6, |
|
"completions/mean_length": 235.70625, |
|
"completions/mean_terminated_length": 235.70625, |
|
"completions/min_length": 80.6, |
|
"completions/min_terminated_length": 80.6, |
|
"epoch": 0.9519612163948876, |
|
"grad_norm": 0.09221258199209693, |
|
"kl": 0.018701171875, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0038, |
|
"num_tokens": 98787193.0, |
|
"reward": 0.8677037119865417, |
|
"reward_std": 0.057669999450445174, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8443572998046875, |
|
"rewards/qatch_metrics/std": 0.288933590054512, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 455.2, |
|
"completions/max_terminated_length": 455.2, |
|
"completions/mean_length": 222.11875, |
|
"completions/mean_terminated_length": 222.11875, |
|
"completions/min_length": 74.6, |
|
"completions/min_terminated_length": 74.6, |
|
"epoch": 0.9607756721022477, |
|
"grad_norm": 0.1352237905149159, |
|
"kl": 0.018145751953125, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0031, |
|
"num_tokens": 99532081.0, |
|
"reward": 0.8805891752243042, |
|
"reward_std": 0.05483146589249373, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8595166802406311, |
|
"rewards/qatch_metrics/std": 0.25585181415081026, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 545 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 472.6, |
|
"completions/max_terminated_length": 472.6, |
|
"completions/mean_length": 218.659375, |
|
"completions/mean_terminated_length": 218.659375, |
|
"completions/min_length": 86.2, |
|
"completions/min_terminated_length": 86.2, |
|
"epoch": 0.9695901278096077, |
|
"grad_norm": 0.16904630982662794, |
|
"kl": 0.01783447265625, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0016, |
|
"num_tokens": 100246573.0, |
|
"reward": 0.8569401383399964, |
|
"reward_std": 0.07272802218794823, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8316942691802979, |
|
"rewards/qatch_metrics/std": 0.3041912466287613, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 459.8, |
|
"completions/max_terminated_length": 459.8, |
|
"completions/mean_length": 221.78984375, |
|
"completions/mean_terminated_length": 221.78984375, |
|
"completions/min_length": 77.8, |
|
"completions/min_terminated_length": 77.8, |
|
"epoch": 0.9784045835169678, |
|
"grad_norm": 0.31854687165087076, |
|
"kl": 0.0183258056640625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0058, |
|
"num_tokens": 100996640.0, |
|
"reward": 0.8102917551994324, |
|
"reward_std": 0.07570969834923744, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.7768138289451599, |
|
"rewards/qatch_metrics/std": 0.34436498284339906, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 555 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 490.6, |
|
"completions/max_terminated_length": 490.6, |
|
"completions/mean_length": 230.28828125, |
|
"completions/mean_terminated_length": 230.28828125, |
|
"completions/min_length": 85.0, |
|
"completions/min_terminated_length": 85.0, |
|
"epoch": 0.9872190392243279, |
|
"grad_norm": 0.16545798735816303, |
|
"kl": 0.01719970703125, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0054, |
|
"num_tokens": 101777473.0, |
|
"reward": 0.854366683959961, |
|
"reward_std": 0.050544672086834906, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.8286666750907898, |
|
"rewards/qatch_metrics/std": 0.3027670204639435, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 460.0, |
|
"completions/max_terminated_length": 460.0, |
|
"completions/mean_length": 232.278125, |
|
"completions/mean_terminated_length": 232.278125, |
|
"completions/min_length": 79.2, |
|
"completions/min_terminated_length": 79.2, |
|
"epoch": 0.996033494931688, |
|
"grad_norm": 0.2064718967348405, |
|
"kl": 0.020306396484375, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0052, |
|
"num_tokens": 102547669.0, |
|
"reward": 0.7918175339698792, |
|
"reward_std": 0.05684706475585699, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.755079448223114, |
|
"rewards/qatch_metrics/std": 0.3250477254390717, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 565 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 468.5, |
|
"completions/max_terminated_length": 468.5, |
|
"completions/mean_length": 214.265625, |
|
"completions/mean_terminated_length": 214.265625, |
|
"completions/min_length": 66.0, |
|
"completions/min_terminated_length": 66.0, |
|
"epoch": 0.999559277214632, |
|
"kl": 0.01806640625, |
|
"num_tokens": 102823629.0, |
|
"reward": 0.8797399699687958, |
|
"reward_std": 0.056224397383630276, |
|
"rewards/format_reward/mean": 1.0, |
|
"rewards/format_reward/std": 0.0, |
|
"rewards/qatch_metrics/mean": 0.858517587184906, |
|
"rewards/qatch_metrics/std": 0.26497258245944977, |
|
"rewards/tag_count_reward/mean": 1.0, |
|
"rewards/tag_count_reward/std": 0.0, |
|
"step": 567, |
|
"total_flos": 0.0, |
|
"train_loss": -1.6490349831877564e-05, |
|
"train_runtime": 5804.9117, |
|
"train_samples_per_second": 1.564, |
|
"train_steps_per_second": 0.098 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 567, |
|
"num_input_tokens_seen": 102823629, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|