{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.17066666666666666,
  "eval_steps": 500,
  "global_step": 20,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.008533333333333334,
      "grad_norm": 160.11701043689894,
      "learning_rate": 0.0,
      "loss": 32.4968,
      "step": 1
    },
    {
      "epoch": 0.017066666666666667,
      "grad_norm": 157.24779534424323,
      "learning_rate": 1.5051499783199057e-06,
      "loss": 31.6979,
      "step": 2
    },
    {
      "epoch": 0.0256,
      "grad_norm": 157.9465272449825,
      "learning_rate": 2.385606273598312e-06,
      "loss": 31.8828,
      "step": 3
    },
    {
      "epoch": 0.034133333333333335,
      "grad_norm": 160.2154859965946,
      "learning_rate": 3.0102999566398115e-06,
      "loss": 31.9681,
      "step": 4
    },
    {
      "epoch": 0.042666666666666665,
      "grad_norm": 158.5305446712084,
      "learning_rate": 3.4948500216800934e-06,
      "loss": 31.3717,
      "step": 5
    },
    {
      "epoch": 0.0512,
      "grad_norm": 155.50243039700376,
      "learning_rate": 3.890756251918218e-06,
      "loss": 30.5348,
      "step": 6
    },
    {
      "epoch": 0.05973333333333333,
      "grad_norm": 168.6887446693614,
      "learning_rate": 4.225490200071284e-06,
      "loss": 31.3845,
      "step": 7
    },
    {
      "epoch": 0.06826666666666667,
      "grad_norm": 164.2631689450651,
      "learning_rate": 4.515449934959717e-06,
      "loss": 30.5243,
      "step": 8
    },
    {
      "epoch": 0.0768,
      "grad_norm": 174.1878139573776,
      "learning_rate": 4.771212547196624e-06,
      "loss": 30.0138,
      "step": 9
    },
    {
      "epoch": 0.08533333333333333,
      "grad_norm": 177.9519334680014,
      "learning_rate": 4.9999999999999996e-06,
      "loss": 29.6143,
      "step": 10
    },
    {
      "epoch": 0.09386666666666667,
      "grad_norm": 183.57104380865735,
      "learning_rate": 5.206963425791125e-06,
      "loss": 28.8718,
      "step": 11
    },
    {
      "epoch": 0.1024,
      "grad_norm": 186.4090344511231,
      "learning_rate": 5.395906230238124e-06,
      "loss": 26.1695,
      "step": 12
    },
    {
      "epoch": 0.11093333333333333,
      "grad_norm": 198.17161320746723,
      "learning_rate": 5.5697167615341825e-06,
      "loss": 26.1266,
      "step": 13
    },
    {
      "epoch": 0.11946666666666667,
      "grad_norm": 182.4443087115901,
      "learning_rate": 5.730640178391189e-06,
      "loss": 24.2121,
      "step": 14
    },
    {
      "epoch": 0.128,
      "grad_norm": 159.38105380659272,
      "learning_rate": 5.880456295278406e-06,
      "loss": 22.5796,
      "step": 15
    },
    {
      "epoch": 0.13653333333333334,
      "grad_norm": 142.82387126501297,
      "learning_rate": 6.020599913279623e-06,
      "loss": 21.1346,
      "step": 16
    },
    {
      "epoch": 0.14506666666666668,
      "grad_norm": 123.86394296641578,
      "learning_rate": 6.15224460689137e-06,
      "loss": 19.8457,
      "step": 17
    },
    {
      "epoch": 0.1536,
      "grad_norm": 112.3988260336824,
      "learning_rate": 6.276362525516529e-06,
      "loss": 18.7824,
      "step": 18
    },
    {
      "epoch": 0.16213333333333332,
      "grad_norm": 120.96712330991012,
      "learning_rate": 6.393768004764144e-06,
      "loss": 18.0207,
      "step": 19
    },
    {
      "epoch": 0.17066666666666666,
      "grad_norm": 129.42692949353702,
      "learning_rate": 6.505149978319905e-06,
      "loss": 16.8355,
      "step": 20
    }
  ],
  "logging_steps": 1,
  "max_steps": 301,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 20,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.924957640079442e+17,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}