|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9923224568138194, |
|
"eval_steps": 500, |
|
"global_step": 1560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.9106, |
|
"eval_gen_len": 38.35490909090909, |
|
"eval_loss": 1.621419906616211, |
|
"eval_precision": 0.9049, |
|
"eval_recall": 0.9166, |
|
"eval_rouge1": 0.4804, |
|
"eval_rouge2": 0.2218, |
|
"eval_rougeL": 0.3873, |
|
"eval_rougeLsum": 0.3873, |
|
"eval_runtime": 654.035, |
|
"eval_samples_per_second": 4.205, |
|
"eval_steps_per_second": 0.263, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.3589743589743592e-05, |
|
"loss": 1.5842, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9113, |
|
"eval_gen_len": 37.86036363636364, |
|
"eval_loss": 1.5547642707824707, |
|
"eval_precision": 0.9059, |
|
"eval_recall": 0.9171, |
|
"eval_rouge1": 0.4874, |
|
"eval_rouge2": 0.2283, |
|
"eval_rougeL": 0.3945, |
|
"eval_rougeLsum": 0.3945, |
|
"eval_runtime": 634.7852, |
|
"eval_samples_per_second": 4.332, |
|
"eval_steps_per_second": 0.271, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 1.3014, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.912, |
|
"eval_gen_len": 37.75636363636364, |
|
"eval_loss": 1.5460655689239502, |
|
"eval_precision": 0.9064, |
|
"eval_recall": 0.918, |
|
"eval_rouge1": 0.49, |
|
"eval_rouge2": 0.2294, |
|
"eval_rougeL": 0.3975, |
|
"eval_rougeLsum": 0.3974, |
|
"eval_runtime": 629.8341, |
|
"eval_samples_per_second": 4.366, |
|
"eval_steps_per_second": 0.273, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 1.18, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_f1": 0.9123, |
|
"eval_gen_len": 38.22872727272727, |
|
"eval_loss": 1.559003472328186, |
|
"eval_precision": 0.9063, |
|
"eval_recall": 0.9187, |
|
"eval_rouge1": 0.4909, |
|
"eval_rouge2": 0.2303, |
|
"eval_rougeL": 0.3967, |
|
"eval_rougeLsum": 0.3965, |
|
"eval_runtime": 638.1793, |
|
"eval_samples_per_second": 4.309, |
|
"eval_steps_per_second": 0.27, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"step": 1560, |
|
"total_flos": 4.304969545380004e+17, |
|
"train_loss": 1.3475837952051408, |
|
"train_runtime": 10536.0779, |
|
"train_samples_per_second": 18.982, |
|
"train_steps_per_second": 0.148 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 4.304969545380004e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|