{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.96,
  "eval_steps": 10,
  "global_step": 27,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.21333333333333335,
      "grad_norm": NaN,
      "learning_rate": 0.0,
      "loss": 63.3011,
      "step": 2
    },
    {
      "epoch": 0.4266666666666667,
      "grad_norm": Infinity,
      "learning_rate": 0.0,
      "loss": 61.3878,
      "step": 4
    },
    {
      "epoch": 0.64,
      "grad_norm": 611.62060546875,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 64.6879,
      "step": 6
    },
    {
      "epoch": 0.8533333333333334,
      "grad_norm": 343.85888671875,
      "learning_rate": 6e-06,
      "loss": 59.1893,
      "step": 8
    },
    {
      "epoch": 1.1066666666666667,
      "grad_norm": 286.243408203125,
      "learning_rate": 1e-05,
      "loss": 61.2499,
      "step": 10
    },
    {
      "epoch": 1.1066666666666667,
      "eval_loss": 3.3731160163879395,
      "eval_runtime": 3.4128,
      "eval_samples_per_second": 5.86,
      "eval_steps_per_second": 2.051,
      "step": 10
    },
    {
      "epoch": 1.32,
      "grad_norm": 289.30523681640625,
      "learning_rate": 1.4e-05,
      "loss": 55.2484,
      "step": 12
    },
    {
      "epoch": 1.5333333333333332,
      "grad_norm": 334.54632568359375,
      "learning_rate": 1.8e-05,
      "loss": 56.5117,
      "step": 14
    },
    {
      "epoch": 1.7466666666666666,
      "grad_norm": 328.0875549316406,
      "learning_rate": 1.982973099683902e-05,
      "loss": 54.6102,
      "step": 16
    },
    {
      "epoch": 1.96,
      "grad_norm": 249.81207275390625,
      "learning_rate": 1.8502171357296144e-05,
      "loss": 55.2078,
      "step": 18
    },
    {
      "epoch": 2.2133333333333334,
      "grad_norm": 240.95912170410156,
      "learning_rate": 1.6026346363792565e-05,
      "loss": 56.2904,
      "step": 20
    },
    {
      "epoch": 2.2133333333333334,
      "eval_loss": 3.224900007247925,
      "eval_runtime": 3.438,
      "eval_samples_per_second": 5.817,
      "eval_steps_per_second": 2.036,
      "step": 20
    },
    {
      "epoch": 2.4266666666666667,
      "grad_norm": 274.64892578125,
      "learning_rate": 1.2736629900720832e-05,
      "loss": 52.4623,
      "step": 22
    },
    {
      "epoch": 2.64,
      "grad_norm": 275.81597900390625,
      "learning_rate": 9.07731640536698e-06,
      "loss": 53.445,
      "step": 24
    },
    {
      "epoch": 2.8533333333333335,
      "grad_norm": 252.26492309570312,
      "learning_rate": 5.542616442234618e-06,
      "loss": 53.8146,
      "step": 26
    }
  ],
  "logging_steps": 2,
  "max_steps": 27,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 10,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.910272351731712e+16,
  "train_batch_size": 3,
  "trial_name": null,
  "trial_params": null
}