|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 366, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.165, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 68.60884809734969, |
|
"eval_f1": 71.57514961558589, |
|
"eval_runtime": 12.6849, |
|
"eval_samples_per_second": 764.454, |
|
"eval_steps_per_second": 2.444, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5445, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 69.99071877900381, |
|
"eval_f1": 73.06771046827892, |
|
"eval_runtime": 8.5294, |
|
"eval_samples_per_second": 1136.896, |
|
"eval_steps_per_second": 3.635, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4736, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 71.20758997628133, |
|
"eval_f1": 73.91150191975164, |
|
"eval_runtime": 8.5195, |
|
"eval_samples_per_second": 1138.209, |
|
"eval_steps_per_second": 3.639, |
|
"step": 366 |
|
} |
|
], |
|
"max_steps": 1220, |
|
"num_train_epochs": 10, |
|
"total_flos": 115390042603520.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|