{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0036797775778886254, "eval_steps": 8, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001226592525962875, "eval_loss": 4.26250696182251, "eval_runtime": 334.4519, "eval_samples_per_second": 7.699, "eval_steps_per_second": 3.851, "step": 1 }, { "epoch": 0.00036797775778886255, "grad_norm": 1.429119348526001, "learning_rate": 8.999999999999999e-05, "loss": 4.1435, "step": 3 }, { "epoch": 0.0007359555155777251, "grad_norm": 2.790388584136963, "learning_rate": 0.00017999999999999998, "loss": 3.7593, "step": 6 }, { "epoch": 0.0009812740207703, "eval_loss": 3.0642571449279785, "eval_runtime": 335.062, "eval_samples_per_second": 7.685, "eval_steps_per_second": 3.844, "step": 8 }, { "epoch": 0.0011039332733665877, "grad_norm": 2.1888792514801025, "learning_rate": 0.00027, "loss": 3.1196, "step": 9 }, { "epoch": 0.0014719110311554502, "grad_norm": 1.176177978515625, "learning_rate": 0.00029265847744427303, "loss": 2.4117, "step": 12 }, { "epoch": 0.0018398887889443127, "grad_norm": 2.197014808654785, "learning_rate": 0.00025606601717798207, "loss": 2.6601, "step": 15 }, { "epoch": 0.0019625480415406, "eval_loss": 2.3805487155914307, "eval_runtime": 334.9417, "eval_samples_per_second": 7.688, "eval_steps_per_second": 3.845, "step": 16 }, { "epoch": 0.0022078665467331754, "grad_norm": 1.6648246049880981, "learning_rate": 0.0001963525491562421, "loss": 2.3264, "step": 18 }, { "epoch": 0.0025758443045220377, "grad_norm": 1.6911269426345825, "learning_rate": 0.00012653483024396533, "loss": 2.2865, "step": 21 }, { "epoch": 0.0029438220623109004, "grad_norm": 0.7457146048545837, "learning_rate": 6.183221215612904e-05, "loss": 2.0923, "step": 24 }, { "epoch": 0.0029438220623109004, "eval_loss": 2.2382869720458984, "eval_runtime": 334.9773, "eval_samples_per_second": 7.687, "eval_steps_per_second": 3.845, "step": 24 }, { "epoch": 0.0033117998200997627, "grad_norm": 0.9372111558914185, "learning_rate": 1.634902137174483e-05, "loss": 2.0104, "step": 27 }, { "epoch": 0.0036797775778886254, "grad_norm": 0.9846737384796143, "learning_rate": 0.0, "loss": 1.9179, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4282526818304e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }