{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.002324842210416383, "eval_steps": 500, "global_step": 24, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.686842543401596e-05, "grad_norm": 0.4089602828025818, "learning_rate": 1.9230769230769234e-06, "loss": 2.0398, "step": 1 }, { "epoch": 0.00019373685086803192, "grad_norm": 0.4970704913139343, "learning_rate": 3.846153846153847e-06, "loss": 2.0532, "step": 2 }, { "epoch": 0.00029060527630204785, "grad_norm": 0.4048117995262146, "learning_rate": 5.76923076923077e-06, "loss": 2.1092, "step": 3 }, { "epoch": 0.00038747370173606384, "grad_norm": 0.41647177934646606, "learning_rate": 7.692307692307694e-06, "loss": 2.1701, "step": 4 }, { "epoch": 0.00048434212717007977, "grad_norm": 0.33691734075546265, "learning_rate": 9.615384615384616e-06, "loss": 2.0658, "step": 5 }, { "epoch": 0.0005812105526040957, "grad_norm": 0.40251973271369934, "learning_rate": 1.153846153846154e-05, "loss": 2.0623, "step": 6 }, { "epoch": 0.0006780789780381117, "grad_norm": 0.32849112153053284, "learning_rate": 1.3461538461538462e-05, "loss": 2.0855, "step": 7 }, { "epoch": 0.0007749474034721277, "grad_norm": 0.29179856181144714, "learning_rate": 1.5384615384615387e-05, "loss": 1.9376, "step": 8 }, { "epoch": 0.0008718158289061436, "grad_norm": 0.34249696135520935, "learning_rate": 1.730769230769231e-05, "loss": 2.0817, "step": 9 }, { "epoch": 0.0009686842543401595, "grad_norm": 0.25032880902290344, "learning_rate": 1.923076923076923e-05, "loss": 2.0218, "step": 10 }, { "epoch": 0.0010655526797741755, "grad_norm": 0.2710218131542206, "learning_rate": 2.1153846153846154e-05, "loss": 2.0107, "step": 11 }, { "epoch": 0.0011624211052081914, "grad_norm": 0.2562181055545807, "learning_rate": 2.307692307692308e-05, "loss": 1.8556, "step": 12 }, { "epoch": 0.0012592895306422073, "grad_norm": 0.15152186155319214, "learning_rate": 2.5e-05, "loss": 1.938, "step": 13 }, { "epoch": 0.0013561579560762234, "grad_norm": 0.19589252769947052, "learning_rate": 2.6923076923076923e-05, "loss": 1.9859, "step": 14 }, { "epoch": 0.0014530263815102393, "grad_norm": 0.17176692187786102, "learning_rate": 2.8846153846153845e-05, "loss": 1.8812, "step": 15 }, { "epoch": 0.0015498948069442554, "grad_norm": 0.18412314355373383, "learning_rate": 3.0769230769230774e-05, "loss": 2.0042, "step": 16 }, { "epoch": 0.0016467632323782712, "grad_norm": 0.19615541398525238, "learning_rate": 3.269230769230769e-05, "loss": 1.9725, "step": 17 }, { "epoch": 0.001743631657812287, "grad_norm": 0.18842636048793793, "learning_rate": 3.461538461538462e-05, "loss": 1.9555, "step": 18 }, { "epoch": 0.0018405000832463032, "grad_norm": 0.23347383737564087, "learning_rate": 3.653846153846154e-05, "loss": 1.9779, "step": 19 }, { "epoch": 0.001937368508680319, "grad_norm": 0.19372476637363434, "learning_rate": 3.846153846153846e-05, "loss": 1.9203, "step": 20 }, { "epoch": 0.002034236934114335, "grad_norm": 0.15928150713443756, "learning_rate": 4.038461538461539e-05, "loss": 1.8459, "step": 21 }, { "epoch": 0.002131105359548351, "grad_norm": 0.18540354073047638, "learning_rate": 4.230769230769231e-05, "loss": 1.7937, "step": 22 }, { "epoch": 0.0022279737849823667, "grad_norm": 0.1321619600057602, "learning_rate": 4.423076923076923e-05, "loss": 1.8011, "step": 23 }, { "epoch": 0.002324842210416383, "grad_norm": 0.1503838449716568, "learning_rate": 4.615384615384616e-05, "loss": 1.8784, "step": 24 } ], "logging_steps": 1.0, "max_steps": 10323, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8301560403240960.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }