|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.006492347145801886, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.492347145801887e-05, |
|
"eval_loss": 1.2574450969696045, |
|
"eval_runtime": 2164.1177, |
|
"eval_samples_per_second": 11.987, |
|
"eval_steps_per_second": 1.499, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00019477041437405659, |
|
"grad_norm": 0.21717190742492676, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1457, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00038954082874811317, |
|
"grad_norm": 0.1491047739982605, |
|
"learning_rate": 6e-05, |
|
"loss": 1.2315, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005843112431221698, |
|
"grad_norm": 0.18038997054100037, |
|
"learning_rate": 9e-05, |
|
"loss": 1.1694, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0005843112431221698, |
|
"eval_loss": 1.246630072593689, |
|
"eval_runtime": 2177.4551, |
|
"eval_samples_per_second": 11.914, |
|
"eval_steps_per_second": 1.489, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0007790816574962263, |
|
"grad_norm": 0.2281239777803421, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 1.3245, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0009738520718702829, |
|
"grad_norm": 0.24129489064216614, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 1.2447, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0011686224862443395, |
|
"grad_norm": 0.20848846435546875, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 1.3351, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0011686224862443395, |
|
"eval_loss": 1.1892844438552856, |
|
"eval_runtime": 2174.5469, |
|
"eval_samples_per_second": 11.93, |
|
"eval_steps_per_second": 1.491, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001363392900618396, |
|
"grad_norm": 0.22571416199207306, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 1.1832, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0015581633149924527, |
|
"grad_norm": 0.24647393822669983, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 1.3137, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0017529337293665092, |
|
"grad_norm": 0.2367832213640213, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 1.24, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0017529337293665092, |
|
"eval_loss": 1.1527782678604126, |
|
"eval_runtime": 2174.8129, |
|
"eval_samples_per_second": 11.928, |
|
"eval_steps_per_second": 1.491, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0019477041437405659, |
|
"grad_norm": 0.2434782236814499, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 1.0845, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0021424745581146223, |
|
"grad_norm": 0.1957402527332306, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 1.0744, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.002337244972488679, |
|
"grad_norm": 0.20614418387413025, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 1.0835, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.002337244972488679, |
|
"eval_loss": 1.136653184890747, |
|
"eval_runtime": 2176.2744, |
|
"eval_samples_per_second": 11.92, |
|
"eval_steps_per_second": 1.49, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0025320153868627357, |
|
"grad_norm": 0.19403059780597687, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 1.2458, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.002726785801236792, |
|
"grad_norm": 0.2524452805519104, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 1.0825, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0029215562156108487, |
|
"grad_norm": 0.22213758528232574, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 1.1252, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0029215562156108487, |
|
"eval_loss": 1.1249250173568726, |
|
"eval_runtime": 2174.4018, |
|
"eval_samples_per_second": 11.931, |
|
"eval_steps_per_second": 1.491, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0031163266299849054, |
|
"grad_norm": 0.3361480236053467, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 1.1268, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.003311097044358962, |
|
"grad_norm": 0.26797690987586975, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 1.0806, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0035058674587330183, |
|
"grad_norm": 0.24251677095890045, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 1.1435, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0035058674587330183, |
|
"eval_loss": 1.1167911291122437, |
|
"eval_runtime": 2174.2458, |
|
"eval_samples_per_second": 11.931, |
|
"eval_steps_per_second": 1.492, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.003700637873107075, |
|
"grad_norm": 0.25566375255584717, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 1.1187, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0038954082874811317, |
|
"grad_norm": 0.22987186908721924, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 1.1929, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004090178701855188, |
|
"grad_norm": 0.2270929515361786, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 1.061, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.004090178701855188, |
|
"eval_loss": 1.1118545532226562, |
|
"eval_runtime": 2176.6142, |
|
"eval_samples_per_second": 11.919, |
|
"eval_steps_per_second": 1.49, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.004284949116229245, |
|
"grad_norm": 0.24837514758110046, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 1.0437, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.004479719530603301, |
|
"grad_norm": 0.2640354037284851, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 0.9759, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.004674489944977358, |
|
"grad_norm": 0.2336353361606598, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 1.009, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.004674489944977358, |
|
"eval_loss": 1.107908844947815, |
|
"eval_runtime": 2174.4566, |
|
"eval_samples_per_second": 11.93, |
|
"eval_steps_per_second": 1.491, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.004869260359351414, |
|
"grad_norm": 0.2683839201927185, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 1.0181, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0050640307737254715, |
|
"grad_norm": 0.2877131998538971, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 1.0598, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.005258801188099528, |
|
"grad_norm": 0.278920441865921, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 1.1018, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.005258801188099528, |
|
"eval_loss": 1.1061383485794067, |
|
"eval_runtime": 2176.8672, |
|
"eval_samples_per_second": 11.917, |
|
"eval_steps_per_second": 1.49, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.005453571602473584, |
|
"grad_norm": 0.2761317491531372, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 1.024, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.005648342016847641, |
|
"grad_norm": 0.29402637481689453, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 1.2208, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.005843112431221697, |
|
"grad_norm": 0.1755310297012329, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.951, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.005843112431221697, |
|
"eval_loss": 1.1051793098449707, |
|
"eval_runtime": 2177.0043, |
|
"eval_samples_per_second": 11.916, |
|
"eval_steps_per_second": 1.49, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006037882845595754, |
|
"grad_norm": 0.2584977447986603, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 1.0409, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.006232653259969811, |
|
"grad_norm": 0.21517957746982574, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 1.102, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.006427423674343867, |
|
"grad_norm": 0.2704929709434509, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 1.256, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.006427423674343867, |
|
"eval_loss": 1.1049902439117432, |
|
"eval_runtime": 2176.592, |
|
"eval_samples_per_second": 11.919, |
|
"eval_steps_per_second": 1.49, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6401962573286605e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|