Nexspear's picture
Training in progress, step 100, checkpoint
8160b81 verified
raw
history blame
9.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.011010184420589045,
"eval_steps": 9,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00011010184420589045,
"eval_loss": 1.441235899925232,
"eval_runtime": 1176.9455,
"eval_samples_per_second": 12.997,
"eval_steps_per_second": 1.625,
"step": 1
},
{
"epoch": 0.00033030553261767135,
"grad_norm": 47.746360778808594,
"learning_rate": 1.5e-05,
"loss": 5.1907,
"step": 3
},
{
"epoch": 0.0006606110652353427,
"grad_norm": 45.7549934387207,
"learning_rate": 3e-05,
"loss": 4.5615,
"step": 6
},
{
"epoch": 0.000990916597853014,
"grad_norm": 54.48334503173828,
"learning_rate": 4.5e-05,
"loss": 2.5264,
"step": 9
},
{
"epoch": 0.000990916597853014,
"eval_loss": 0.35898175835609436,
"eval_runtime": 1183.3259,
"eval_samples_per_second": 12.927,
"eval_steps_per_second": 1.617,
"step": 9
},
{
"epoch": 0.0013212221304706854,
"grad_norm": 23.894742965698242,
"learning_rate": 4.993910125649561e-05,
"loss": 1.0379,
"step": 12
},
{
"epoch": 0.0016515276630883566,
"grad_norm": 19.3171443939209,
"learning_rate": 4.962019382530521e-05,
"loss": 0.5869,
"step": 15
},
{
"epoch": 0.001981833195706028,
"grad_norm": 13.86229419708252,
"learning_rate": 4.9031542398457974e-05,
"loss": 0.4021,
"step": 18
},
{
"epoch": 0.001981833195706028,
"eval_loss": 0.061614975333213806,
"eval_runtime": 1183.6671,
"eval_samples_per_second": 12.923,
"eval_steps_per_second": 1.616,
"step": 18
},
{
"epoch": 0.0023121387283236996,
"grad_norm": 8.881821632385254,
"learning_rate": 4.817959636416969e-05,
"loss": 0.1436,
"step": 21
},
{
"epoch": 0.002642444260941371,
"grad_norm": 5.539639472961426,
"learning_rate": 4.707368982147318e-05,
"loss": 0.0862,
"step": 24
},
{
"epoch": 0.002972749793559042,
"grad_norm": 12.02042293548584,
"learning_rate": 4.572593931387604e-05,
"loss": 0.0929,
"step": 27
},
{
"epoch": 0.002972749793559042,
"eval_loss": 0.014203645288944244,
"eval_runtime": 1185.2725,
"eval_samples_per_second": 12.906,
"eval_steps_per_second": 1.614,
"step": 27
},
{
"epoch": 0.0033030553261767133,
"grad_norm": 1.4277693033218384,
"learning_rate": 4.415111107797445e-05,
"loss": 0.0259,
"step": 30
},
{
"epoch": 0.003633360858794385,
"grad_norm": 11.9326753616333,
"learning_rate": 4.2366459261474933e-05,
"loss": 0.1501,
"step": 33
},
{
"epoch": 0.003963666391412056,
"grad_norm": 13.255081176757812,
"learning_rate": 4.039153688314145e-05,
"loss": 0.0348,
"step": 36
},
{
"epoch": 0.003963666391412056,
"eval_loss": 0.010344818234443665,
"eval_runtime": 1185.2904,
"eval_samples_per_second": 12.906,
"eval_steps_per_second": 1.614,
"step": 36
},
{
"epoch": 0.0042939719240297275,
"grad_norm": 0.062791608273983,
"learning_rate": 3.824798160583012e-05,
"loss": 0.0094,
"step": 39
},
{
"epoch": 0.004624277456647399,
"grad_norm": 1.287218689918518,
"learning_rate": 3.5959278669726935e-05,
"loss": 0.0155,
"step": 42
},
{
"epoch": 0.00495458298926507,
"grad_norm": 0.8373615741729736,
"learning_rate": 3.355050358314172e-05,
"loss": 0.0018,
"step": 45
},
{
"epoch": 0.00495458298926507,
"eval_loss": 0.009160873480141163,
"eval_runtime": 1185.2739,
"eval_samples_per_second": 12.906,
"eval_steps_per_second": 1.614,
"step": 45
},
{
"epoch": 0.005284888521882742,
"grad_norm": 0.1402716338634491,
"learning_rate": 3.104804738999169e-05,
"loss": 0.0432,
"step": 48
},
{
"epoch": 0.005615194054500413,
"grad_norm": 0.2631000578403473,
"learning_rate": 2.8479327524001636e-05,
"loss": 0.0085,
"step": 51
},
{
"epoch": 0.005945499587118084,
"grad_norm": 3.119053840637207,
"learning_rate": 2.587248741756253e-05,
"loss": 0.055,
"step": 54
},
{
"epoch": 0.005945499587118084,
"eval_loss": 0.007310016546398401,
"eval_runtime": 1184.9738,
"eval_samples_per_second": 12.909,
"eval_steps_per_second": 1.614,
"step": 54
},
{
"epoch": 0.006275805119735756,
"grad_norm": 1.840917944908142,
"learning_rate": 2.3256088156396868e-05,
"loss": 0.0032,
"step": 57
},
{
"epoch": 0.006606110652353427,
"grad_norm": 4.891698360443115,
"learning_rate": 2.0658795558326743e-05,
"loss": 0.0493,
"step": 60
},
{
"epoch": 0.006936416184971098,
"grad_norm": 7.879451751708984,
"learning_rate": 1.8109066104575023e-05,
"loss": 0.0499,
"step": 63
},
{
"epoch": 0.006936416184971098,
"eval_loss": 0.0055978428572416306,
"eval_runtime": 1185.2837,
"eval_samples_per_second": 12.906,
"eval_steps_per_second": 1.614,
"step": 63
},
{
"epoch": 0.00726672171758877,
"grad_norm": 1.2567558288574219,
"learning_rate": 1.56348351646022e-05,
"loss": 0.0473,
"step": 66
},
{
"epoch": 0.007597027250206441,
"grad_norm": 0.13767965137958527,
"learning_rate": 1.3263210930352737e-05,
"loss": 0.0037,
"step": 69
},
{
"epoch": 0.007927332782824112,
"grad_norm": 0.10938674211502075,
"learning_rate": 1.1020177413231334e-05,
"loss": 0.0007,
"step": 72
},
{
"epoch": 0.007927332782824112,
"eval_loss": 0.004890562035143375,
"eval_runtime": 1184.4337,
"eval_samples_per_second": 12.915,
"eval_steps_per_second": 1.615,
"step": 72
},
{
"epoch": 0.008257638315441783,
"grad_norm": 0.06881111860275269,
"learning_rate": 8.930309757836517e-06,
"loss": 0.005,
"step": 75
},
{
"epoch": 0.008587943848059455,
"grad_norm": 1.8729628324508667,
"learning_rate": 7.016504991533726e-06,
"loss": 0.0321,
"step": 78
},
{
"epoch": 0.008918249380677127,
"grad_norm": 0.050177041441202164,
"learning_rate": 5.299731159831953e-06,
"loss": 0.0043,
"step": 81
},
{
"epoch": 0.008918249380677127,
"eval_loss": 0.004884020891040564,
"eval_runtime": 1184.5953,
"eval_samples_per_second": 12.913,
"eval_steps_per_second": 1.615,
"step": 81
},
{
"epoch": 0.009248554913294798,
"grad_norm": 0.036499105393886566,
"learning_rate": 3.798797596089351e-06,
"loss": 0.002,
"step": 84
},
{
"epoch": 0.009578860445912468,
"grad_norm": 0.19988898932933807,
"learning_rate": 2.5301488425208296e-06,
"loss": 0.0006,
"step": 87
},
{
"epoch": 0.00990916597853014,
"grad_norm": 0.11661279201507568,
"learning_rate": 1.5076844803522922e-06,
"loss": 0.0005,
"step": 90
},
{
"epoch": 0.00990916597853014,
"eval_loss": 0.004694274626672268,
"eval_runtime": 1184.8106,
"eval_samples_per_second": 12.911,
"eval_steps_per_second": 1.615,
"step": 90
},
{
"epoch": 0.010239471511147812,
"grad_norm": 0.025197148323059082,
"learning_rate": 7.426068431000882e-07,
"loss": 0.0007,
"step": 93
},
{
"epoch": 0.010569777043765483,
"grad_norm": 10.560782432556152,
"learning_rate": 2.4329828146074095e-07,
"loss": 0.0412,
"step": 96
},
{
"epoch": 0.010900082576383155,
"grad_norm": 0.04284638911485672,
"learning_rate": 1.522932452260595e-08,
"loss": 0.0004,
"step": 99
},
{
"epoch": 0.010900082576383155,
"eval_loss": 0.004690830130130053,
"eval_runtime": 1184.8269,
"eval_samples_per_second": 12.911,
"eval_steps_per_second": 1.615,
"step": 99
}
],
"logging_steps": 3,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 9,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5082145020536422e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}