|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 30, |
|
"global_step": 225, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 0.3667745590209961, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 0.307, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 0.016515299677848816, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.0062, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.09039086103439331, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.0042, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"eval_loss": 0.004083628766238689, |
|
"eval_runtime": 51.9939, |
|
"eval_samples_per_second": 9.617, |
|
"eval_steps_per_second": 0.481, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.0036161942407488823, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 0.0041, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.03548239916563034, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.0039, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.03349478542804718, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 0.004, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"eval_loss": 0.003905750811100006, |
|
"eval_runtime": 52.0248, |
|
"eval_samples_per_second": 9.611, |
|
"eval_steps_per_second": 0.481, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 0.003334084525704384, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 0.004, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.0032222650479525328, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.004, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.009061365388333797, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.0039, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.0039283959195017815, |
|
"eval_runtime": 52.0001, |
|
"eval_samples_per_second": 9.615, |
|
"eval_steps_per_second": 0.481, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.001912094303406775, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 0.004, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4888888888888889, |
|
"grad_norm": 0.007684116251766682, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 0.0039, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.0023366182576864958, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 0.0039, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"eval_loss": 0.003954995423555374, |
|
"eval_runtime": 52.0391, |
|
"eval_samples_per_second": 9.608, |
|
"eval_steps_per_second": 0.48, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5777777777777777, |
|
"grad_norm": 0.003150626551359892, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 0.0039, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 0.001225905492901802, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 0.0039, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.004419529344886541, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.0039, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"eval_loss": 0.003903371747583151, |
|
"eval_runtime": 52.037, |
|
"eval_samples_per_second": 9.609, |
|
"eval_steps_per_second": 0.48, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 0.0037097111344337463, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 0.0039, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7555555555555555, |
|
"grad_norm": 0.011564449407160282, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 0.0039, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.019085539504885674, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.0039, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.003919120877981186, |
|
"eval_runtime": 52.0957, |
|
"eval_samples_per_second": 9.598, |
|
"eval_steps_per_second": 0.48, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8444444444444444, |
|
"grad_norm": 0.0039107827469706535, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 0.0039, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.011299680918455124, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.0039, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.0018409871263429523, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 0.0039, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"eval_loss": 0.003911687061190605, |
|
"eval_runtime": 52.1073, |
|
"eval_samples_per_second": 9.596, |
|
"eval_steps_per_second": 0.48, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 0.0032750105019658804, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 0.0038, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 225, |
|
"total_flos": 3.68076153035948e+17, |
|
"train_loss": 0.017504285780919924, |
|
"train_runtime": 1899.4331, |
|
"train_samples_per_second": 2.369, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 225, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.68076153035948e+17, |
|
"train_batch_size": 10, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|