|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.011010184420589045, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00011010184420589045, |
|
"eval_loss": 1.441235899925232, |
|
"eval_runtime": 1176.9455, |
|
"eval_samples_per_second": 12.997, |
|
"eval_steps_per_second": 1.625, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00033030553261767135, |
|
"grad_norm": 47.746360778808594, |
|
"learning_rate": 1.5e-05, |
|
"loss": 5.1907, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006606110652353427, |
|
"grad_norm": 45.7549934387207, |
|
"learning_rate": 3e-05, |
|
"loss": 4.5615, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.000990916597853014, |
|
"grad_norm": 54.48334503173828, |
|
"learning_rate": 4.5e-05, |
|
"loss": 2.5264, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.000990916597853014, |
|
"eval_loss": 0.35898175835609436, |
|
"eval_runtime": 1183.3259, |
|
"eval_samples_per_second": 12.927, |
|
"eval_steps_per_second": 1.617, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0013212221304706854, |
|
"grad_norm": 23.894742965698242, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 1.0379, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0016515276630883566, |
|
"grad_norm": 19.3171443939209, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 0.5869, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001981833195706028, |
|
"grad_norm": 13.86229419708252, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 0.4021, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001981833195706028, |
|
"eval_loss": 0.061614975333213806, |
|
"eval_runtime": 1183.6671, |
|
"eval_samples_per_second": 12.923, |
|
"eval_steps_per_second": 1.616, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0023121387283236996, |
|
"grad_norm": 8.881821632385254, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 0.1436, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002642444260941371, |
|
"grad_norm": 5.539639472961426, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 0.0862, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002972749793559042, |
|
"grad_norm": 12.02042293548584, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 0.0929, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002972749793559042, |
|
"eval_loss": 0.014203645288944244, |
|
"eval_runtime": 1185.2725, |
|
"eval_samples_per_second": 12.906, |
|
"eval_steps_per_second": 1.614, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0033030553261767133, |
|
"grad_norm": 1.4277693033218384, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 0.0259, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003633360858794385, |
|
"grad_norm": 11.9326753616333, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 0.1501, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.003963666391412056, |
|
"grad_norm": 13.255081176757812, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 0.0348, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.003963666391412056, |
|
"eval_loss": 0.010344818234443665, |
|
"eval_runtime": 1185.2904, |
|
"eval_samples_per_second": 12.906, |
|
"eval_steps_per_second": 1.614, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0042939719240297275, |
|
"grad_norm": 0.062791608273983, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 0.0094, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.004624277456647399, |
|
"grad_norm": 1.287218689918518, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 0.0155, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.00495458298926507, |
|
"grad_norm": 0.8373615741729736, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 0.0018, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00495458298926507, |
|
"eval_loss": 0.009160873480141163, |
|
"eval_runtime": 1185.2739, |
|
"eval_samples_per_second": 12.906, |
|
"eval_steps_per_second": 1.614, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.005284888521882742, |
|
"grad_norm": 0.1402716338634491, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 0.0432, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.005615194054500413, |
|
"grad_norm": 0.2631000578403473, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 0.0085, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.005945499587118084, |
|
"grad_norm": 3.119053840637207, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 0.055, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.005945499587118084, |
|
"eval_loss": 0.007310016546398401, |
|
"eval_runtime": 1184.9738, |
|
"eval_samples_per_second": 12.909, |
|
"eval_steps_per_second": 1.614, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.006275805119735756, |
|
"grad_norm": 1.840917944908142, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 0.0032, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.006606110652353427, |
|
"grad_norm": 4.891698360443115, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.0493, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.006936416184971098, |
|
"grad_norm": 7.879451751708984, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 0.0499, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.006936416184971098, |
|
"eval_loss": 0.0055978428572416306, |
|
"eval_runtime": 1185.2837, |
|
"eval_samples_per_second": 12.906, |
|
"eval_steps_per_second": 1.614, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.00726672171758877, |
|
"grad_norm": 1.2567558288574219, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 0.0473, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.007597027250206441, |
|
"grad_norm": 0.13767965137958527, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 0.0037, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.007927332782824112, |
|
"grad_norm": 0.10938674211502075, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 0.0007, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.007927332782824112, |
|
"eval_loss": 0.004890562035143375, |
|
"eval_runtime": 1184.4337, |
|
"eval_samples_per_second": 12.915, |
|
"eval_steps_per_second": 1.615, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.008257638315441783, |
|
"grad_norm": 0.06881111860275269, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 0.005, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.008587943848059455, |
|
"grad_norm": 1.8729628324508667, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 0.0321, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.008918249380677127, |
|
"grad_norm": 0.050177041441202164, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 0.0043, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.008918249380677127, |
|
"eval_loss": 0.004884020891040564, |
|
"eval_runtime": 1184.5953, |
|
"eval_samples_per_second": 12.913, |
|
"eval_steps_per_second": 1.615, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.009248554913294798, |
|
"grad_norm": 0.036499105393886566, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 0.002, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.009578860445912468, |
|
"grad_norm": 0.19988898932933807, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 0.0006, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.00990916597853014, |
|
"grad_norm": 0.11661279201507568, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 0.0005, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.00990916597853014, |
|
"eval_loss": 0.004694274626672268, |
|
"eval_runtime": 1184.8106, |
|
"eval_samples_per_second": 12.911, |
|
"eval_steps_per_second": 1.615, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.010239471511147812, |
|
"grad_norm": 0.025197148323059082, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 0.0007, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.010569777043765483, |
|
"grad_norm": 10.560782432556152, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 0.0412, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.010900082576383155, |
|
"grad_norm": 0.04284638911485672, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 0.0004, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.010900082576383155, |
|
"eval_loss": 0.004690830130130053, |
|
"eval_runtime": 1184.8269, |
|
"eval_samples_per_second": 12.911, |
|
"eval_steps_per_second": 1.615, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5082145020536422e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|