{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.00650110518788194,
  "eval_steps": 9,
  "global_step": 25,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0002600442075152776,
      "grad_norm": 0.02279164083302021,
      "learning_rate": 1e-05,
      "loss": 10.3782,
      "step": 1
    },
    {
      "epoch": 0.0002600442075152776,
      "eval_loss": 10.382096290588379,
      "eval_runtime": 24.8644,
      "eval_samples_per_second": 130.267,
      "eval_steps_per_second": 16.288,
      "step": 1
    },
    {
      "epoch": 0.0005200884150305552,
      "grad_norm": 0.027484985068440437,
      "learning_rate": 2e-05,
      "loss": 10.3802,
      "step": 2
    },
    {
      "epoch": 0.0007801326225458328,
      "grad_norm": 0.024028444662690163,
      "learning_rate": 3e-05,
      "loss": 10.3797,
      "step": 3
    },
    {
      "epoch": 0.0010401768300611104,
      "grad_norm": 0.029752936214208603,
      "learning_rate": 4e-05,
      "loss": 10.3782,
      "step": 4
    },
    {
      "epoch": 0.001300221037576388,
      "grad_norm": 0.026785146445035934,
      "learning_rate": 5e-05,
      "loss": 10.3819,
      "step": 5
    },
    {
      "epoch": 0.0015602652450916656,
      "grad_norm": 0.024060087278485298,
      "learning_rate": 6e-05,
      "loss": 10.3817,
      "step": 6
    },
    {
      "epoch": 0.0018203094526069432,
      "grad_norm": 0.023959027603268623,
      "learning_rate": 7e-05,
      "loss": 10.3831,
      "step": 7
    },
    {
      "epoch": 0.002080353660122221,
      "grad_norm": 0.025802776217460632,
      "learning_rate": 8e-05,
      "loss": 10.3809,
      "step": 8
    },
    {
      "epoch": 0.0023403978676374984,
      "grad_norm": 0.02310967445373535,
      "learning_rate": 9e-05,
      "loss": 10.3836,
      "step": 9
    },
    {
      "epoch": 0.0023403978676374984,
      "eval_loss": 10.381851196289062,
      "eval_runtime": 25.7478,
      "eval_samples_per_second": 125.797,
      "eval_steps_per_second": 15.73,
      "step": 9
    },
    {
      "epoch": 0.002600442075152776,
      "grad_norm": 0.026796067133545876,
      "learning_rate": 0.0001,
      "loss": 10.3811,
      "step": 10
    },
    {
      "epoch": 0.0028604862826680537,
      "grad_norm": 0.04965611919760704,
      "learning_rate": 9.99695413509548e-05,
      "loss": 10.3798,
      "step": 11
    },
    {
      "epoch": 0.0031205304901833313,
      "grad_norm": 0.024298246949911118,
      "learning_rate": 9.987820251299122e-05,
      "loss": 10.3828,
      "step": 12
    },
    {
      "epoch": 0.003380574697698609,
      "grad_norm": 0.024884523823857307,
      "learning_rate": 9.972609476841367e-05,
      "loss": 10.3814,
      "step": 13
    },
    {
      "epoch": 0.0036406189052138865,
      "grad_norm": 0.023707769811153412,
      "learning_rate": 9.951340343707852e-05,
      "loss": 10.3819,
      "step": 14
    },
    {
      "epoch": 0.003900663112729164,
      "grad_norm": 0.027032267302274704,
      "learning_rate": 9.924038765061042e-05,
      "loss": 10.3842,
      "step": 15
    },
    {
      "epoch": 0.004160707320244442,
      "grad_norm": 0.020228637382388115,
      "learning_rate": 9.890738003669029e-05,
      "loss": 10.3777,
      "step": 16
    },
    {
      "epoch": 0.004420751527759719,
      "grad_norm": 0.021995818242430687,
      "learning_rate": 9.851478631379982e-05,
      "loss": 10.3832,
      "step": 17
    },
    {
      "epoch": 0.004680795735274997,
      "grad_norm": 0.02389182150363922,
      "learning_rate": 9.806308479691595e-05,
      "loss": 10.3844,
      "step": 18
    },
    {
      "epoch": 0.004680795735274997,
      "eval_loss": 10.38124942779541,
      "eval_runtime": 25.5914,
      "eval_samples_per_second": 126.566,
      "eval_steps_per_second": 15.826,
      "step": 18
    },
    {
      "epoch": 0.0049408399427902745,
      "grad_norm": 0.021635526791214943,
      "learning_rate": 9.755282581475769e-05,
      "loss": 10.3828,
      "step": 19
    },
    {
      "epoch": 0.005200884150305552,
      "grad_norm": 0.023616788908839226,
      "learning_rate": 9.698463103929542e-05,
      "loss": 10.3831,
      "step": 20
    },
    {
      "epoch": 0.00546092835782083,
      "grad_norm": 0.02443654276430607,
      "learning_rate": 9.635919272833938e-05,
      "loss": 10.3821,
      "step": 21
    },
    {
      "epoch": 0.005720972565336107,
      "grad_norm": 0.02596656046807766,
      "learning_rate": 9.567727288213005e-05,
      "loss": 10.3781,
      "step": 22
    },
    {
      "epoch": 0.005981016772851385,
      "grad_norm": 0.02593740075826645,
      "learning_rate": 9.493970231495835e-05,
      "loss": 10.379,
      "step": 23
    },
    {
      "epoch": 0.0062410609803666625,
      "grad_norm": 0.026120077818632126,
      "learning_rate": 9.414737964294636e-05,
      "loss": 10.382,
      "step": 24
    },
    {
      "epoch": 0.00650110518788194,
      "grad_norm": 0.02591986022889614,
      "learning_rate": 9.330127018922194e-05,
      "loss": 10.3816,
      "step": 25
    }
  ],
  "logging_steps": 1,
  "max_steps": 100,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1438317281280.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}