{ "best_metric": 1.1938797235488892, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.014147773494146358, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.073886747073179e-05, "eval_loss": 1.5841912031173706, "eval_runtime": 659.2651, "eval_samples_per_second": 9.03, "eval_steps_per_second": 2.259, "step": 1 }, { "epoch": 0.0007073886747073179, "grad_norm": 4.094351291656494, "learning_rate": 4.12e-05, "loss": 2.9349, "step": 10 }, { "epoch": 0.0014147773494146358, "grad_norm": 3.3907132148742676, "learning_rate": 8.24e-05, "loss": 2.6164, "step": 20 }, { "epoch": 0.002122166024121954, "grad_norm": 3.113591194152832, "learning_rate": 0.0001236, "loss": 2.5312, "step": 30 }, { "epoch": 0.0028295546988292715, "grad_norm": 3.8528008460998535, "learning_rate": 0.0001648, "loss": 2.3855, "step": 40 }, { "epoch": 0.0035369433735365895, "grad_norm": 5.419228553771973, "learning_rate": 0.000206, "loss": 2.224, "step": 50 }, { "epoch": 0.0035369433735365895, "eval_loss": 1.1938797235488892, "eval_runtime": 660.9789, "eval_samples_per_second": 9.006, "eval_steps_per_second": 2.253, "step": 50 }, { "epoch": 0.004244332048243908, "grad_norm": 3.8174710273742676, "learning_rate": 0.0002057490971767619, "loss": 2.4986, "step": 60 }, { "epoch": 0.0049517207229512255, "grad_norm": 3.127476930618286, "learning_rate": 0.00020499761108038175, "loss": 2.529, "step": 70 }, { "epoch": 0.005659109397658543, "grad_norm": 3.0806052684783936, "learning_rate": 0.00020374920287558198, "loss": 2.4661, "step": 80 }, { "epoch": 0.0063664980723658615, "grad_norm": 5.142642021179199, "learning_rate": 0.00020200995468164684, "loss": 2.3941, "step": 90 }, { "epoch": 0.007073886747073179, "grad_norm": 48.584197998046875, "learning_rate": 0.00019978833994094855, "loss": 1.9974, "step": 100 }, { "epoch": 0.007073886747073179, "eval_loss": 1.2410837411880493, "eval_runtime": 661.3129, "eval_samples_per_second": 9.002, "eval_steps_per_second": 2.252, "step": 100 }, { "epoch": 0.0077812754217804975, "grad_norm": 3.2873892784118652, "learning_rate": 0.00019709518213718787, "loss": 2.5612, "step": 110 }, { "epoch": 0.008488664096487816, "grad_norm": 3.0679943561553955, "learning_rate": 0.00019394360206446948, "loss": 2.4878, "step": 120 }, { "epoch": 0.009196052771195133, "grad_norm": 2.7864444255828857, "learning_rate": 0.00019034895390411186, "loss": 2.3364, "step": 130 }, { "epoch": 0.009903441445902451, "grad_norm": 4.135828495025635, "learning_rate": 0.0001863287504206196, "loss": 2.4225, "step": 140 }, { "epoch": 0.010610830120609769, "grad_norm": 5.823198318481445, "learning_rate": 0.00018190257764125471, "loss": 2.3496, "step": 150 }, { "epoch": 0.010610830120609769, "eval_loss": 1.2004902362823486, "eval_runtime": 660.3639, "eval_samples_per_second": 9.015, "eval_steps_per_second": 2.255, "step": 150 }, { "epoch": 0.011318218795317086, "grad_norm": 3.3262979984283447, "learning_rate": 0.00017709199943488106, "loss": 2.3911, "step": 160 }, { "epoch": 0.012025607470024405, "grad_norm": 2.9702227115631104, "learning_rate": 0.00017192045245496238, "loss": 2.4674, "step": 170 }, { "epoch": 0.012732996144731723, "grad_norm": 2.9937350749969482, "learning_rate": 0.00016641313195854277, "loss": 2.4761, "step": 180 }, { "epoch": 0.01344038481943904, "grad_norm": 3.6195473670959473, "learning_rate": 0.0001605968690574869, "loss": 2.4561, "step": 190 }, { "epoch": 0.014147773494146358, "grad_norm": 12.492778778076172, "learning_rate": 0.0001545, "loss": 2.039, "step": 200 }, { "epoch": 0.014147773494146358, "eval_loss": 1.1949822902679443, "eval_runtime": 660.2682, "eval_samples_per_second": 9.016, "eval_steps_per_second": 2.255, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.15499004493824e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }