|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 19200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.535132032339264e-05, |
|
"loss": 1.5294, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6997061800943186, |
|
"eval_loss": 1.505336046218872, |
|
"eval_runtime": 59.2609, |
|
"eval_samples_per_second": 128.314, |
|
"eval_steps_per_second": 0.81, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6900880215595094e-05, |
|
"loss": 1.3178, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7119164174032121, |
|
"eval_loss": 1.4108233451843262, |
|
"eval_runtime": 57.8828, |
|
"eval_samples_per_second": 131.369, |
|
"eval_steps_per_second": 0.829, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7807314645155048e-05, |
|
"loss": 1.2464, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.716069757803329, |
|
"eval_loss": 1.3826147317886353, |
|
"eval_runtime": 57.5, |
|
"eval_samples_per_second": 132.244, |
|
"eval_steps_per_second": 0.835, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8450440107797548e-05, |
|
"loss": 1.2004, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7212539141360841, |
|
"eval_loss": 1.3423837423324585, |
|
"eval_runtime": 57.7225, |
|
"eval_samples_per_second": 131.734, |
|
"eval_steps_per_second": 0.832, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.894928697180815e-05, |
|
"loss": 1.1671, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.723119868068725, |
|
"eval_loss": 1.3292474746704102, |
|
"eval_runtime": 57.4277, |
|
"eval_samples_per_second": 132.41, |
|
"eval_steps_per_second": 0.836, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.93568745373575e-05, |
|
"loss": 1.1405, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7279360245602308, |
|
"eval_loss": 1.3002619743347168, |
|
"eval_runtime": 57.5343, |
|
"eval_samples_per_second": 132.165, |
|
"eval_steps_per_second": 0.834, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9701484913790247e-05, |
|
"loss": 1.1195, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7286793766001343, |
|
"eval_loss": 1.2911103963851929, |
|
"eval_runtime": 57.3971, |
|
"eval_samples_per_second": 132.481, |
|
"eval_steps_per_second": 0.836, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0984, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.731524743026423, |
|
"eval_loss": 1.2744665145874023, |
|
"eval_runtime": 58.0408, |
|
"eval_samples_per_second": 131.011, |
|
"eval_steps_per_second": 0.827, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0802, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7321408265476137, |
|
"eval_loss": 1.2718857526779175, |
|
"eval_runtime": 57.237, |
|
"eval_samples_per_second": 132.851, |
|
"eval_steps_per_second": 0.839, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0678, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7324240391543068, |
|
"eval_loss": 1.261126160621643, |
|
"eval_runtime": 57.4079, |
|
"eval_samples_per_second": 132.456, |
|
"eval_steps_per_second": 0.836, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.055, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7351396479197737, |
|
"eval_loss": 1.2426605224609375, |
|
"eval_runtime": 57.4037, |
|
"eval_samples_per_second": 132.465, |
|
"eval_steps_per_second": 0.836, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0426, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.738889192995377, |
|
"eval_loss": 1.226466417312622, |
|
"eval_runtime": 57.6438, |
|
"eval_samples_per_second": 131.914, |
|
"eval_steps_per_second": 0.833, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0329, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7372212557134691, |
|
"eval_loss": 1.234350323677063, |
|
"eval_runtime": 57.4326, |
|
"eval_samples_per_second": 132.399, |
|
"eval_steps_per_second": 0.836, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0211, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7388404872505433, |
|
"eval_loss": 1.2182461023330688, |
|
"eval_runtime": 57.2924, |
|
"eval_samples_per_second": 132.723, |
|
"eval_steps_per_second": 0.838, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0118, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7408094590258935, |
|
"eval_loss": 1.214383602142334, |
|
"eval_runtime": 62.5016, |
|
"eval_samples_per_second": 121.661, |
|
"eval_steps_per_second": 0.768, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0038, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.740329327948473, |
|
"eval_loss": 1.2094308137893677, |
|
"eval_runtime": 57.7614, |
|
"eval_samples_per_second": 131.645, |
|
"eval_steps_per_second": 0.831, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9952, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7427147172532104, |
|
"eval_loss": 1.1963080167770386, |
|
"eval_runtime": 57.4115, |
|
"eval_samples_per_second": 132.447, |
|
"eval_steps_per_second": 0.836, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.988, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7429841884959819, |
|
"eval_loss": 1.1982266902923584, |
|
"eval_runtime": 57.4423, |
|
"eval_samples_per_second": 132.376, |
|
"eval_steps_per_second": 0.836, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9808, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7437883910645381, |
|
"eval_loss": 1.1920512914657593, |
|
"eval_runtime": 57.5877, |
|
"eval_samples_per_second": 132.042, |
|
"eval_steps_per_second": 0.834, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9747, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7435962484051245, |
|
"eval_loss": 1.1889218091964722, |
|
"eval_runtime": 57.672, |
|
"eval_samples_per_second": 131.849, |
|
"eval_steps_per_second": 0.832, |
|
"step": 19200 |
|
} |
|
], |
|
"max_steps": 38400, |
|
"num_train_epochs": 40, |
|
"total_flos": 3062931751174144.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|