|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 572, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 7.7545, |
|
"eval_gen_len": 14.8741, |
|
"eval_jit_compilation_time": 11.4032, |
|
"eval_loss": 2.9266164302825928, |
|
"eval_runtime": 12.5758, |
|
"eval_samples_per_second": 11.371, |
|
"eval_steps_per_second": 2.863, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 10.4293, |
|
"eval_gen_len": 14.0629, |
|
"eval_jit_compilation_time": 12.1037, |
|
"eval_loss": 2.850106716156006, |
|
"eval_runtime": 11.8772, |
|
"eval_samples_per_second": 12.04, |
|
"eval_steps_per_second": 3.031, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 11.0503, |
|
"eval_gen_len": 14.5664, |
|
"eval_jit_compilation_time": 12.7826, |
|
"eval_loss": 2.875509262084961, |
|
"eval_runtime": 12.4472, |
|
"eval_samples_per_second": 11.489, |
|
"eval_steps_per_second": 2.892, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.5874125874125877e-05, |
|
"loss": 2.0491, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 12.4752, |
|
"eval_gen_len": 14.6014, |
|
"eval_jit_compilation_time": 12.9294, |
|
"eval_loss": 2.9099602699279785, |
|
"eval_runtime": 12.6411, |
|
"eval_samples_per_second": 11.312, |
|
"eval_steps_per_second": 2.848, |
|
"step": 572 |
|
} |
|
], |
|
"max_steps": 572, |
|
"num_train_epochs": 4, |
|
"total_flos": 11456058359808.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|