{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.92,
  "eval_steps": 500,
  "global_step": 6000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.16,
      "grad_norm": 1.2959253787994385,
      "learning_rate": 4.8400000000000004e-05,
      "loss": 3.4782,
      "step": 500
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3196003437042236,
      "learning_rate": 4.6800000000000006e-05,
      "loss": 2.0957,
      "step": 1000
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5477054119110107,
      "learning_rate": 4.52e-05,
      "loss": 2.0086,
      "step": 1500
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4371566772460938,
      "learning_rate": 4.36e-05,
      "loss": 1.9488,
      "step": 2000
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6890789270401,
      "learning_rate": 4.2e-05,
      "loss": 1.8929,
      "step": 2500
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6345953941345215,
      "learning_rate": 4.0400000000000006e-05,
      "loss": 1.8377,
      "step": 3000
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.744602918624878,
      "eval_runtime": 0.3579,
      "eval_samples_per_second": 558.824,
      "eval_steps_per_second": 69.853,
      "step": 3125
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8465389013290405,
      "learning_rate": 3.88e-05,
      "loss": 1.7824,
      "step": 3500
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.703276515007019,
      "learning_rate": 3.72e-05,
      "loss": 1.7505,
      "step": 4000
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.056072235107422,
      "learning_rate": 3.56e-05,
      "loss": 1.726,
      "step": 4500
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.597702980041504,
      "learning_rate": 3.4000000000000007e-05,
      "loss": 1.7057,
      "step": 5000
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1297709941864014,
      "learning_rate": 3.24e-05,
      "loss": 1.6863,
      "step": 5500
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2768709659576416,
      "learning_rate": 3.08e-05,
      "loss": 1.6646,
      "step": 6000
    }
  ],
  "logging_steps": 500,
  "max_steps": 15625,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 2000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1582743214080000.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}