{ "best_metric": 2.128573179244995, "best_model_checkpoint": "output/nirvana/checkpoint-72", "epoch": 2.0, "global_step": 72, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.00013040646433810595, "loss": 2.8877, "step": 5 }, { "epoch": 0.29, "learning_rate": 0.00011137140040750913, "loss": 2.7784, "step": 10 }, { "epoch": 0.43, "learning_rate": 8.386493606940316e-05, "loss": 2.2959, "step": 15 }, { "epoch": 0.57, "learning_rate": 5.3335063930596836e-05, "loss": 2.5076, "step": 20 }, { "epoch": 0.71, "learning_rate": 2.5828599592490882e-05, "loss": 2.4917, "step": 25 }, { "epoch": 0.86, "learning_rate": 6.793535661894054e-06, "loss": 2.4772, "step": 30 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 2.6382, "step": 35 }, { "epoch": 1.0, "eval_loss": 2.5190556049346924, "eval_runtime": 2.4884, "eval_samples_per_second": 22.504, "eval_steps_per_second": 2.813, "step": 35 }, { "epoch": 1.0, "eval_loss": 2.3264973163604736, "eval_runtime": 1.1647, "eval_samples_per_second": 45.506, "eval_steps_per_second": 6.01, "step": 36 }, { "epoch": 1.11, "learning_rate": 4.137086214086682e-06, "loss": 2.6774, "step": 40 }, { "epoch": 1.25, "learning_rate": 2.009247481060283e-05, "loss": 2.4577, "step": 45 }, { "epoch": 1.39, "learning_rate": 4.513741816785908e-05, "loss": 2.2823, "step": 50 }, { "epoch": 1.53, "learning_rate": 7.457888395248933e-05, "loss": 2.298, "step": 55 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.4088, "step": 60 }, { "epoch": 1.81, "learning_rate": 0.00012479383023822482, "loss": 2.2944, "step": 65 }, { "epoch": 1.94, "learning_rate": 0.00013615781185663748, "loss": 2.0032, "step": 70 }, { "epoch": 2.0, "eval_loss": 2.128573179244995, "eval_runtime": 1.1938, "eval_samples_per_second": 44.396, "eval_steps_per_second": 5.864, "step": 72 } ], "max_steps": 72, "num_train_epochs": 2, "total_flos": 73161768960000.0, "trial_name": null, "trial_params": null }