|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 35.0, |
|
"global_step": 33600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.535132032339264e-05, |
|
"loss": 2.3734, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6711076941043658, |
|
"eval_loss": 1.6856393814086914, |
|
"eval_runtime": 42.9945, |
|
"eval_samples_per_second": 176.86, |
|
"eval_steps_per_second": 1.116, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6900880215595094e-05, |
|
"loss": 1.5002, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6930295365255809, |
|
"eval_loss": 1.5316802263259888, |
|
"eval_runtime": 40.2605, |
|
"eval_samples_per_second": 188.87, |
|
"eval_steps_per_second": 1.192, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7807314645155048e-05, |
|
"loss": 1.3682, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7001060409279067, |
|
"eval_loss": 1.4794470071792603, |
|
"eval_runtime": 40.3426, |
|
"eval_samples_per_second": 188.486, |
|
"eval_steps_per_second": 1.19, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8450440107797548e-05, |
|
"loss": 1.3057, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7058089394925496, |
|
"eval_loss": 1.445176362991333, |
|
"eval_runtime": 40.2825, |
|
"eval_samples_per_second": 188.767, |
|
"eval_steps_per_second": 1.192, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.894928697180815e-05, |
|
"loss": 1.2652, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7078205742901283, |
|
"eval_loss": 1.4240751266479492, |
|
"eval_runtime": 44.133, |
|
"eval_samples_per_second": 172.297, |
|
"eval_steps_per_second": 1.088, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.93568745373575e-05, |
|
"loss": 1.2347, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.712788831055115, |
|
"eval_loss": 1.393662452697754, |
|
"eval_runtime": 40.438, |
|
"eval_samples_per_second": 188.041, |
|
"eval_steps_per_second": 1.187, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9701484913790247e-05, |
|
"loss": 1.2117, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7158340831339961, |
|
"eval_loss": 1.3783458471298218, |
|
"eval_runtime": 47.0316, |
|
"eval_samples_per_second": 161.679, |
|
"eval_steps_per_second": 1.021, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1863, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7177737277852768, |
|
"eval_loss": 1.356780767440796, |
|
"eval_runtime": 40.2772, |
|
"eval_samples_per_second": 188.791, |
|
"eval_steps_per_second": 1.192, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.167, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7172315754804711, |
|
"eval_loss": 1.362362265586853, |
|
"eval_runtime": 40.3662, |
|
"eval_samples_per_second": 188.375, |
|
"eval_steps_per_second": 1.189, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1528, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7208890541082557, |
|
"eval_loss": 1.3375591039657593, |
|
"eval_runtime": 40.4301, |
|
"eval_samples_per_second": 188.077, |
|
"eval_steps_per_second": 1.187, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1403, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.722637380785269, |
|
"eval_loss": 1.3316693305969238, |
|
"eval_runtime": 40.2778, |
|
"eval_samples_per_second": 188.789, |
|
"eval_steps_per_second": 1.192, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1276, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7243298395325108, |
|
"eval_loss": 1.3127739429473877, |
|
"eval_runtime": 40.1949, |
|
"eval_samples_per_second": 189.178, |
|
"eval_steps_per_second": 1.194, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1176, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7242034818873964, |
|
"eval_loss": 1.3149378299713135, |
|
"eval_runtime": 40.5071, |
|
"eval_samples_per_second": 187.72, |
|
"eval_steps_per_second": 1.185, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1061, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7251281499452183, |
|
"eval_loss": 1.3011534214019775, |
|
"eval_runtime": 41.5767, |
|
"eval_samples_per_second": 182.891, |
|
"eval_steps_per_second": 1.154, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0953, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7274233697774307, |
|
"eval_loss": 1.2953981161117554, |
|
"eval_runtime": 40.2998, |
|
"eval_samples_per_second": 188.686, |
|
"eval_steps_per_second": 1.191, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0872, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.729807233922918, |
|
"eval_loss": 1.2836934328079224, |
|
"eval_runtime": 40.3358, |
|
"eval_samples_per_second": 188.517, |
|
"eval_steps_per_second": 1.19, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0778, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.728892083446406, |
|
"eval_loss": 1.2819887399673462, |
|
"eval_runtime": 40.2817, |
|
"eval_samples_per_second": 188.77, |
|
"eval_steps_per_second": 1.192, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0709, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7314453291503982, |
|
"eval_loss": 1.2700670957565308, |
|
"eval_runtime": 40.2932, |
|
"eval_samples_per_second": 188.717, |
|
"eval_steps_per_second": 1.191, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0629, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7313673197000715, |
|
"eval_loss": 1.2694642543792725, |
|
"eval_runtime": 40.2951, |
|
"eval_samples_per_second": 188.708, |
|
"eval_steps_per_second": 1.191, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0575, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7321188555482103, |
|
"eval_loss": 1.269392490386963, |
|
"eval_runtime": 40.9309, |
|
"eval_samples_per_second": 185.776, |
|
"eval_steps_per_second": 1.173, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0494, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7336233174474587, |
|
"eval_loss": 1.2491707801818848, |
|
"eval_runtime": 41.3244, |
|
"eval_samples_per_second": 184.007, |
|
"eval_steps_per_second": 1.162, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0443, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7333931208601605, |
|
"eval_loss": 1.2573738098144531, |
|
"eval_runtime": 40.4041, |
|
"eval_samples_per_second": 188.199, |
|
"eval_steps_per_second": 1.188, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0375, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7354173520930503, |
|
"eval_loss": 1.2430847883224487, |
|
"eval_runtime": 40.4071, |
|
"eval_samples_per_second": 188.185, |
|
"eval_steps_per_second": 1.188, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0332, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7351443165552266, |
|
"eval_loss": 1.240692377090454, |
|
"eval_runtime": 40.2279, |
|
"eval_samples_per_second": 189.023, |
|
"eval_steps_per_second": 1.193, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0279, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7350845404729578, |
|
"eval_loss": 1.2445788383483887, |
|
"eval_runtime": 40.3432, |
|
"eval_samples_per_second": 188.483, |
|
"eval_steps_per_second": 1.19, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0233, |
|
"step": 24960 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7361292168770468, |
|
"eval_loss": 1.2367281913757324, |
|
"eval_runtime": 40.3061, |
|
"eval_samples_per_second": 188.657, |
|
"eval_steps_per_second": 1.191, |
|
"step": 24960 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.018, |
|
"step": 25920 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7351869592986123, |
|
"eval_loss": 1.2435057163238525, |
|
"eval_runtime": 41.4116, |
|
"eval_samples_per_second": 183.62, |
|
"eval_steps_per_second": 1.159, |
|
"step": 25920 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0128, |
|
"step": 26880 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7379444038039502, |
|
"eval_loss": 1.2293747663497925, |
|
"eval_runtime": 42.7481, |
|
"eval_samples_per_second": 177.879, |
|
"eval_steps_per_second": 1.123, |
|
"step": 26880 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.008, |
|
"step": 27840 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7381460506414618, |
|
"eval_loss": 1.224423885345459, |
|
"eval_runtime": 40.3086, |
|
"eval_samples_per_second": 188.645, |
|
"eval_steps_per_second": 1.191, |
|
"step": 27840 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0036, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7393030501323384, |
|
"eval_loss": 1.2178620100021362, |
|
"eval_runtime": 40.2497, |
|
"eval_samples_per_second": 188.921, |
|
"eval_steps_per_second": 1.193, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9997, |
|
"step": 29760 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7388722666381714, |
|
"eval_loss": 1.2249476909637451, |
|
"eval_runtime": 40.2623, |
|
"eval_samples_per_second": 188.862, |
|
"eval_steps_per_second": 1.192, |
|
"step": 29760 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9969, |
|
"step": 30720 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7389634850823794, |
|
"eval_loss": 1.2235573530197144, |
|
"eval_runtime": 40.3447, |
|
"eval_samples_per_second": 188.476, |
|
"eval_steps_per_second": 1.19, |
|
"step": 30720 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.992, |
|
"step": 31680 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7388708814628272, |
|
"eval_loss": 1.217455506324768, |
|
"eval_runtime": 42.7026, |
|
"eval_samples_per_second": 178.069, |
|
"eval_steps_per_second": 1.124, |
|
"step": 31680 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.988, |
|
"step": 32640 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7401730933519727, |
|
"eval_loss": 1.2093894481658936, |
|
"eval_runtime": 40.7008, |
|
"eval_samples_per_second": 186.827, |
|
"eval_steps_per_second": 1.179, |
|
"step": 32640 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9836, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7400202510644008, |
|
"eval_loss": 1.208998203277588, |
|
"eval_runtime": 40.6904, |
|
"eval_samples_per_second": 186.874, |
|
"eval_steps_per_second": 1.18, |
|
"step": 33600 |
|
} |
|
], |
|
"max_steps": 38400, |
|
"num_train_epochs": 40, |
|
"total_flos": 2041828249436160.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|