{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3714020427112349, "eval_steps": 9, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003714020427112349, "eval_loss": 10.378816604614258, "eval_runtime": 1.3292, "eval_samples_per_second": 341.551, "eval_steps_per_second": 42.882, "step": 1 }, { "epoch": 0.011142061281337047, "grad_norm": 0.014514707960188389, "learning_rate": 3e-05, "loss": 10.3795, "step": 3 }, { "epoch": 0.022284122562674095, "grad_norm": 0.01718750037252903, "learning_rate": 6e-05, "loss": 10.3788, "step": 6 }, { "epoch": 0.033426183844011144, "grad_norm": 0.018008925020694733, "learning_rate": 9e-05, "loss": 10.3786, "step": 9 }, { "epoch": 0.033426183844011144, "eval_loss": 10.378602981567383, "eval_runtime": 1.3052, "eval_samples_per_second": 347.852, "eval_steps_per_second": 43.673, "step": 9 }, { "epoch": 0.04456824512534819, "grad_norm": 0.016212427988648415, "learning_rate": 9.987820251299122e-05, "loss": 10.3789, "step": 12 }, { "epoch": 0.055710306406685235, "grad_norm": 0.016671117395162582, "learning_rate": 9.924038765061042e-05, "loss": 10.3792, "step": 15 }, { "epoch": 0.06685236768802229, "grad_norm": 0.016305958852171898, "learning_rate": 9.806308479691595e-05, "loss": 10.3783, "step": 18 }, { "epoch": 0.06685236768802229, "eval_loss": 10.378040313720703, "eval_runtime": 1.3173, "eval_samples_per_second": 344.654, "eval_steps_per_second": 43.272, "step": 18 }, { "epoch": 0.07799442896935933, "grad_norm": 0.018844736739993095, "learning_rate": 9.635919272833938e-05, "loss": 10.378, "step": 21 }, { "epoch": 0.08913649025069638, "grad_norm": 0.020653052255511284, "learning_rate": 9.414737964294636e-05, "loss": 10.3781, "step": 24 }, { "epoch": 0.10027855153203342, "grad_norm": 0.018101783469319344, "learning_rate": 9.145187862775209e-05, "loss": 10.3773, "step": 27 }, { "epoch": 0.10027855153203342, "eval_loss": 10.377413749694824, "eval_runtime": 1.3583, "eval_samples_per_second": 334.247, "eval_steps_per_second": 41.965, "step": 27 }, { "epoch": 0.11142061281337047, "grad_norm": 0.021037165075540543, "learning_rate": 8.83022221559489e-05, "loss": 10.3777, "step": 30 }, { "epoch": 0.12256267409470752, "grad_norm": 0.02159174345433712, "learning_rate": 8.473291852294987e-05, "loss": 10.3772, "step": 33 }, { "epoch": 0.13370473537604458, "grad_norm": 0.023182040080428123, "learning_rate": 8.07830737662829e-05, "loss": 10.3766, "step": 36 }, { "epoch": 0.13370473537604458, "eval_loss": 10.376729011535645, "eval_runtime": 1.3571, "eval_samples_per_second": 334.536, "eval_steps_per_second": 42.001, "step": 36 }, { "epoch": 0.14484679665738162, "grad_norm": 0.023219875991344452, "learning_rate": 7.649596321166024e-05, "loss": 10.3776, "step": 39 }, { "epoch": 0.15598885793871867, "grad_norm": 0.026010680943727493, "learning_rate": 7.191855733945387e-05, "loss": 10.3773, "step": 42 }, { "epoch": 0.1671309192200557, "grad_norm": 0.029791677370667458, "learning_rate": 6.710100716628344e-05, "loss": 10.3759, "step": 45 }, { "epoch": 0.1671309192200557, "eval_loss": 10.375983238220215, "eval_runtime": 1.3417, "eval_samples_per_second": 338.387, "eval_steps_per_second": 42.485, "step": 45 }, { "epoch": 0.17827298050139276, "grad_norm": 0.026904676109552383, "learning_rate": 6.209609477998338e-05, "loss": 10.3762, "step": 48 }, { "epoch": 0.1894150417827298, "grad_norm": 0.034599192440509796, "learning_rate": 5.695865504800327e-05, "loss": 10.375, "step": 51 }, { "epoch": 0.20055710306406685, "grad_norm": 0.034609757363796234, "learning_rate": 5.174497483512506e-05, "loss": 10.3753, "step": 54 }, { "epoch": 0.20055710306406685, "eval_loss": 10.375200271606445, "eval_runtime": 1.3283, "eval_samples_per_second": 341.798, "eval_steps_per_second": 42.913, "step": 54 }, { "epoch": 0.2116991643454039, "grad_norm": 0.03360167518258095, "learning_rate": 4.6512176312793736e-05, "loss": 10.3749, "step": 57 }, { "epoch": 0.22284122562674094, "grad_norm": 0.03287893533706665, "learning_rate": 4.131759111665349e-05, "loss": 10.3746, "step": 60 }, { "epoch": 0.233983286908078, "grad_norm": 0.035624388605356216, "learning_rate": 3.6218132209150045e-05, "loss": 10.3739, "step": 63 }, { "epoch": 0.233983286908078, "eval_loss": 10.374495506286621, "eval_runtime": 1.3201, "eval_samples_per_second": 343.917, "eval_steps_per_second": 43.179, "step": 63 }, { "epoch": 0.24512534818941503, "grad_norm": 0.03744365647435188, "learning_rate": 3.12696703292044e-05, "loss": 10.3745, "step": 66 }, { "epoch": 0.2562674094707521, "grad_norm": 0.03684169799089432, "learning_rate": 2.6526421860705473e-05, "loss": 10.3749, "step": 69 }, { "epoch": 0.26740947075208915, "grad_norm": 0.041187137365341187, "learning_rate": 2.2040354826462668e-05, "loss": 10.3751, "step": 72 }, { "epoch": 0.26740947075208915, "eval_loss": 10.373970985412598, "eval_runtime": 1.3239, "eval_samples_per_second": 342.93, "eval_steps_per_second": 43.055, "step": 72 }, { "epoch": 0.2785515320334262, "grad_norm": 0.04117584973573685, "learning_rate": 1.7860619515673033e-05, "loss": 10.374, "step": 75 }, { "epoch": 0.28969359331476324, "grad_norm": 0.041653797030448914, "learning_rate": 1.4033009983067452e-05, "loss": 10.3748, "step": 78 }, { "epoch": 0.3008356545961003, "grad_norm": 0.05219626799225807, "learning_rate": 1.0599462319663905e-05, "loss": 10.3743, "step": 81 }, { "epoch": 0.3008356545961003, "eval_loss": 10.373651504516602, "eval_runtime": 1.3214, "eval_samples_per_second": 343.581, "eval_steps_per_second": 43.137, "step": 81 }, { "epoch": 0.31197771587743733, "grad_norm": 0.04116308316588402, "learning_rate": 7.597595192178702e-06, "loss": 10.374, "step": 84 }, { "epoch": 0.3231197771587744, "grad_norm": 0.04389878734946251, "learning_rate": 5.060297685041659e-06, "loss": 10.3742, "step": 87 }, { "epoch": 0.3342618384401114, "grad_norm": 0.045802537351846695, "learning_rate": 3.0153689607045845e-06, "loss": 10.3736, "step": 90 }, { "epoch": 0.3342618384401114, "eval_loss": 10.373515129089355, "eval_runtime": 1.3528, "eval_samples_per_second": 335.605, "eval_steps_per_second": 42.135, "step": 90 }, { "epoch": 0.34540389972144847, "grad_norm": 0.048498645424842834, "learning_rate": 1.4852136862001764e-06, "loss": 10.3742, "step": 93 }, { "epoch": 0.3565459610027855, "grad_norm": 0.03822680562734604, "learning_rate": 4.865965629214819e-07, "loss": 10.3734, "step": 96 }, { "epoch": 0.36768802228412256, "grad_norm": 0.05491569638252258, "learning_rate": 3.04586490452119e-08, "loss": 10.3726, "step": 99 }, { "epoch": 0.36768802228412256, "eval_loss": 10.373485565185547, "eval_runtime": 1.3066, "eval_samples_per_second": 347.471, "eval_steps_per_second": 43.625, "step": 99 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 18384309977088.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }