|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 67, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 599.9989124298096, |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 0.4140438437461853, |
|
"kl": 0.00025391876697540284, |
|
"learning_rate": 2.981532510892707e-06, |
|
"loss": 0.0, |
|
"reward": 0.36986608924344183, |
|
"reward_std": 0.3811873120255768, |
|
"rewards/accuracy_reward": 0.36986608924344183, |
|
"rewards/format_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 606.8819444656372, |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 0.06738726049661636, |
|
"kl": 0.0030163049697875975, |
|
"learning_rate": 2.6657189421854562e-06, |
|
"loss": 0.0001, |
|
"reward": 0.5493303807452321, |
|
"reward_std": 0.338475276529789, |
|
"rewards/accuracy_reward": 0.5493303807452321, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 594.3245794296265, |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 0.050410859286785126, |
|
"kl": 0.004825687408447266, |
|
"learning_rate": 2.03755192431795e-06, |
|
"loss": 0.0002, |
|
"reward": 0.6329241359606386, |
|
"reward_std": 0.25738380146212875, |
|
"rewards/accuracy_reward": 0.6329241359606386, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 602.447682762146, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.04725565016269684, |
|
"kl": 0.004597854614257812, |
|
"learning_rate": 1.2653483024396534e-06, |
|
"loss": 0.0002, |
|
"reward": 0.6353794910013676, |
|
"reward_std": 0.24207868478260935, |
|
"rewards/accuracy_reward": 0.6353794910013676, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 604.7845136642457, |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 0.050348229706287384, |
|
"kl": 0.004749441146850586, |
|
"learning_rate": 5.560194134252441e-07, |
|
"loss": 0.0002, |
|
"reward": 0.6332589566707612, |
|
"reward_std": 0.24845922645181417, |
|
"rewards/accuracy_reward": 0.6332589566707612, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 604.7117460250854, |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 0.05302207171916962, |
|
"kl": 0.004680252075195313, |
|
"learning_rate": 9.962936025419756e-08, |
|
"loss": 0.0002, |
|
"reward": 0.6436384223401547, |
|
"reward_std": 0.23067113785073162, |
|
"rewards/accuracy_reward": 0.6436384223401547, |
|
"rewards/format_reward": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 598.5010340554373, |
|
"epoch": 1.0, |
|
"kl": 0.0044345855712890625, |
|
"reward": 0.6657632046512195, |
|
"reward_std": 0.23220190632023982, |
|
"rewards/accuracy_reward": 0.6657632046512195, |
|
"rewards/format_reward": 0.0, |
|
"step": 67, |
|
"total_flos": 0.0, |
|
"train_loss": 0.00015022740371389758, |
|
"train_runtime": 9418.0568, |
|
"train_samples_per_second": 0.796, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 67, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|