{
  "best_metric": 1.139582872390747,
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
  "epoch": 0.00865263977618505,
  "eval_steps": 50,
  "global_step": 150,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 5.768426517456701e-05,
      "eval_loss": 2.3238277435302734,
      "eval_runtime": 495.8901,
      "eval_samples_per_second": 58.878,
      "eval_steps_per_second": 14.721,
      "step": 1
    },
    {
      "epoch": 0.00017305279552370101,
      "grad_norm": 0.45083099603652954,
      "learning_rate": 3e-05,
      "loss": 1.4648,
      "step": 3
    },
    {
      "epoch": 0.00034610559104740203,
      "grad_norm": 0.5587688684463501,
      "learning_rate": 6e-05,
      "loss": 1.8717,
      "step": 6
    },
    {
      "epoch": 0.0005191583865711031,
      "grad_norm": 0.5804035067558289,
      "learning_rate": 9e-05,
      "loss": 1.8528,
      "step": 9
    },
    {
      "epoch": 0.0006922111820948041,
      "grad_norm": 0.5394102931022644,
      "learning_rate": 9.997266286704631e-05,
      "loss": 1.8216,
      "step": 12
    },
    {
      "epoch": 0.0008652639776185052,
      "grad_norm": 0.5287687182426453,
      "learning_rate": 9.98292246503335e-05,
      "loss": 1.4867,
      "step": 15
    },
    {
      "epoch": 0.0010383167731422061,
      "grad_norm": 0.5580219626426697,
      "learning_rate": 9.956320346634876e-05,
      "loss": 1.553,
      "step": 18
    },
    {
      "epoch": 0.0012113695686659072,
      "grad_norm": 0.5071175694465637,
      "learning_rate": 9.917525374361912e-05,
      "loss": 1.4905,
      "step": 21
    },
    {
      "epoch": 0.0013844223641896081,
      "grad_norm": 0.618292510509491,
      "learning_rate": 9.86663298624003e-05,
      "loss": 1.3951,
      "step": 24
    },
    {
      "epoch": 0.0015574751597133092,
      "grad_norm": 0.46973147988319397,
      "learning_rate": 9.803768380684242e-05,
      "loss": 1.5449,
      "step": 27
    },
    {
      "epoch": 0.0017305279552370103,
      "grad_norm": 0.49171945452690125,
      "learning_rate": 9.729086208503174e-05,
      "loss": 1.3479,
      "step": 30
    },
    {
      "epoch": 0.0019035807507607112,
      "grad_norm": 0.48666226863861084,
      "learning_rate": 9.642770192448536e-05,
      "loss": 1.4163,
      "step": 33
    },
    {
      "epoch": 0.0020766335462844123,
      "grad_norm": 0.5254663825035095,
      "learning_rate": 9.545032675245813e-05,
      "loss": 1.3606,
      "step": 36
    },
    {
      "epoch": 0.0022496863418081134,
      "grad_norm": 0.4926294684410095,
      "learning_rate": 9.43611409721806e-05,
      "loss": 1.3109,
      "step": 39
    },
    {
      "epoch": 0.0024227391373318145,
      "grad_norm": 0.5055580735206604,
      "learning_rate": 9.316282404787871e-05,
      "loss": 1.3493,
      "step": 42
    },
    {
      "epoch": 0.002595791932855515,
      "grad_norm": 0.5364347100257874,
      "learning_rate": 9.185832391312644e-05,
      "loss": 1.4115,
      "step": 45
    },
    {
      "epoch": 0.0027688447283792162,
      "grad_norm": 0.7558141350746155,
      "learning_rate": 9.045084971874738e-05,
      "loss": 1.4071,
      "step": 48
    },
    {
      "epoch": 0.0028842132587283504,
      "eval_loss": 1.2356951236724854,
      "eval_runtime": 500.8722,
      "eval_samples_per_second": 58.292,
      "eval_steps_per_second": 14.575,
      "step": 50
    },
    {
      "epoch": 0.0029418975239029173,
      "grad_norm": 0.4775656461715698,
      "learning_rate": 8.894386393810563e-05,
      "loss": 1.2319,
      "step": 51
    },
    {
      "epoch": 0.0031149503194266184,
      "grad_norm": 0.5036152601242065,
      "learning_rate": 8.73410738492077e-05,
      "loss": 0.9718,
      "step": 54
    },
    {
      "epoch": 0.0032880031149503195,
      "grad_norm": 0.39522024989128113,
      "learning_rate": 8.564642241456986e-05,
      "loss": 1.1598,
      "step": 57
    },
    {
      "epoch": 0.0034610559104740206,
      "grad_norm": 0.35405585169792175,
      "learning_rate": 8.386407858128706e-05,
      "loss": 1.0674,
      "step": 60
    },
    {
      "epoch": 0.0036341087059977213,
      "grad_norm": 0.4121133089065552,
      "learning_rate": 8.199842702516583e-05,
      "loss": 1.0174,
      "step": 63
    },
    {
      "epoch": 0.0038071615015214224,
      "grad_norm": 0.32193905115127563,
      "learning_rate": 8.005405736415126e-05,
      "loss": 1.0159,
      "step": 66
    },
    {
      "epoch": 0.003980214297045124,
      "grad_norm": 0.3453183174133301,
      "learning_rate": 7.803575286758364e-05,
      "loss": 1.0557,
      "step": 69
    },
    {
      "epoch": 0.0041532670925688246,
      "grad_norm": 0.3785501718521118,
      "learning_rate": 7.594847868906076e-05,
      "loss": 1.2081,
      "step": 72
    },
    {
      "epoch": 0.004326319888092525,
      "grad_norm": 0.3871416747570038,
      "learning_rate": 7.379736965185368e-05,
      "loss": 1.1649,
      "step": 75
    },
    {
      "epoch": 0.004499372683616227,
      "grad_norm": 0.4219188988208771,
      "learning_rate": 7.158771761692464e-05,
      "loss": 1.1471,
      "step": 78
    },
    {
      "epoch": 0.004672425479139927,
      "grad_norm": 0.40537410974502563,
      "learning_rate": 6.932495846462261e-05,
      "loss": 1.3124,
      "step": 81
    },
    {
      "epoch": 0.004845478274663629,
      "grad_norm": 0.36390063166618347,
      "learning_rate": 6.701465872208216e-05,
      "loss": 1.2589,
      "step": 84
    },
    {
      "epoch": 0.00501853107018733,
      "grad_norm": 0.4167007803916931,
      "learning_rate": 6.466250186922325e-05,
      "loss": 1.0981,
      "step": 87
    },
    {
      "epoch": 0.00519158386571103,
      "grad_norm": 0.4835963845252991,
      "learning_rate": 6.227427435703997e-05,
      "loss": 1.3458,
      "step": 90
    },
    {
      "epoch": 0.005364636661234732,
      "grad_norm": 0.46142578125,
      "learning_rate": 5.985585137257401e-05,
      "loss": 1.1397,
      "step": 93
    },
    {
      "epoch": 0.0055376894567584324,
      "grad_norm": 0.5481660962104797,
      "learning_rate": 5.74131823855921e-05,
      "loss": 1.1947,
      "step": 96
    },
    {
      "epoch": 0.005710742252282134,
      "grad_norm": 0.6424285173416138,
      "learning_rate": 5.495227651252315e-05,
      "loss": 1.4071,
      "step": 99
    },
    {
      "epoch": 0.005768426517456701,
      "eval_loss": 1.1644848585128784,
      "eval_runtime": 495.3076,
      "eval_samples_per_second": 58.947,
      "eval_steps_per_second": 14.738,
      "step": 100
    },
    {
      "epoch": 0.005883795047805835,
      "grad_norm": 0.46381473541259766,
      "learning_rate": 5.247918773366112e-05,
      "loss": 1.1083,
      "step": 102
    },
    {
      "epoch": 0.006056847843329536,
      "grad_norm": 0.5000247955322266,
      "learning_rate": 5e-05,
      "loss": 1.0165,
      "step": 105
    },
    {
      "epoch": 0.006229900638853237,
      "grad_norm": 0.38582298159599304,
      "learning_rate": 4.7520812266338885e-05,
      "loss": 1.059,
      "step": 108
    },
    {
      "epoch": 0.0064029534343769375,
      "grad_norm": 0.40634816884994507,
      "learning_rate": 4.504772348747687e-05,
      "loss": 1.021,
      "step": 111
    },
    {
      "epoch": 0.006576006229900639,
      "grad_norm": 0.32036200165748596,
      "learning_rate": 4.2586817614407895e-05,
      "loss": 0.9665,
      "step": 114
    },
    {
      "epoch": 0.00674905902542434,
      "grad_norm": 0.3690759241580963,
      "learning_rate": 4.0144148627425993e-05,
      "loss": 1.1389,
      "step": 117
    },
    {
      "epoch": 0.006922111820948041,
      "grad_norm": 0.3883983790874481,
      "learning_rate": 3.772572564296005e-05,
      "loss": 1.1566,
      "step": 120
    },
    {
      "epoch": 0.007095164616471742,
      "grad_norm": 0.4271601438522339,
      "learning_rate": 3.533749813077677e-05,
      "loss": 1.1872,
      "step": 123
    },
    {
      "epoch": 0.0072682174119954425,
      "grad_norm": 0.358237087726593,
      "learning_rate": 3.298534127791785e-05,
      "loss": 1.1692,
      "step": 126
    },
    {
      "epoch": 0.007441270207519144,
      "grad_norm": 0.365348219871521,
      "learning_rate": 3.0675041535377405e-05,
      "loss": 1.1708,
      "step": 129
    },
    {
      "epoch": 0.007614323003042845,
      "grad_norm": 0.41103696823120117,
      "learning_rate": 2.8412282383075363e-05,
      "loss": 1.2572,
      "step": 132
    },
    {
      "epoch": 0.007787375798566546,
      "grad_norm": 0.41762009263038635,
      "learning_rate": 2.6202630348146324e-05,
      "loss": 1.1534,
      "step": 135
    },
    {
      "epoch": 0.007960428594090248,
      "grad_norm": 0.39923688769340515,
      "learning_rate": 2.405152131093926e-05,
      "loss": 1.3375,
      "step": 138
    },
    {
      "epoch": 0.008133481389613948,
      "grad_norm": 0.47532254457473755,
      "learning_rate": 2.196424713241637e-05,
      "loss": 1.3354,
      "step": 141
    },
    {
      "epoch": 0.008306534185137649,
      "grad_norm": 0.3896162211894989,
      "learning_rate": 1.9945942635848748e-05,
      "loss": 1.3604,
      "step": 144
    },
    {
      "epoch": 0.00847958698066135,
      "grad_norm": 0.4989997446537018,
      "learning_rate": 1.800157297483417e-05,
      "loss": 1.3699,
      "step": 147
    },
    {
      "epoch": 0.00865263977618505,
      "grad_norm": 0.7773590087890625,
      "learning_rate": 1.6135921418712956e-05,
      "loss": 1.333,
      "step": 150
    },
    {
      "epoch": 0.00865263977618505,
      "eval_loss": 1.139582872390747,
      "eval_runtime": 496.6517,
      "eval_samples_per_second": 58.788,
      "eval_steps_per_second": 14.698,
      "step": 150
    }
  ],
  "logging_steps": 3,
  "max_steps": 200,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 50,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 5,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.0303319310336e+16,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}