{ "best_metric": 0.7650135159492493, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 3.058275058275058, "eval_steps": 25, "global_step": 41, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07459207459207459, "grad_norm": 49.79338073730469, "learning_rate": 5e-05, "loss": 31.079, "step": 1 }, { "epoch": 0.07459207459207459, "eval_loss": 1.1030958890914917, "eval_runtime": 4.3497, "eval_samples_per_second": 11.495, "eval_steps_per_second": 2.989, "step": 1 }, { "epoch": 0.14918414918414918, "grad_norm": 61.619346618652344, "learning_rate": 0.0001, "loss": 35.8237, "step": 2 }, { "epoch": 0.22377622377622378, "grad_norm": 44.663516998291016, "learning_rate": 9.985407886603945e-05, "loss": 35.1631, "step": 3 }, { "epoch": 0.29836829836829837, "grad_norm": 28.899295806884766, "learning_rate": 9.941726181870608e-05, "loss": 29.8061, "step": 4 }, { "epoch": 0.372960372960373, "grad_norm": 25.077674865722656, "learning_rate": 9.869238178417235e-05, "loss": 28.4287, "step": 5 }, { "epoch": 0.44755244755244755, "grad_norm": 21.780200958251953, "learning_rate": 9.768413988762156e-05, "loss": 29.4979, "step": 6 }, { "epoch": 0.5221445221445221, "grad_norm": 16.97848129272461, "learning_rate": 9.639907496464709e-05, "loss": 27.768, "step": 7 }, { "epoch": 0.5967365967365967, "grad_norm": 14.971733093261719, "learning_rate": 9.484552115439445e-05, "loss": 27.3351, "step": 8 }, { "epoch": 0.6713286713286714, "grad_norm": 14.10373592376709, "learning_rate": 9.303355384947076e-05, "loss": 27.1077, "step": 9 }, { "epoch": 0.745920745920746, "grad_norm": 15.110809326171875, "learning_rate": 9.097492435315756e-05, "loss": 25.9653, "step": 10 }, { "epoch": 0.8205128205128205, "grad_norm": 12.38207721710205, "learning_rate": 8.868298366769954e-05, "loss": 26.2124, "step": 11 }, { "epoch": 0.8951048951048951, "grad_norm": 12.963375091552734, "learning_rate": 8.617259590793198e-05, "loss": 26.3925, "step": 12 }, { "epoch": 0.9696969696969697, "grad_norm": 17.126346588134766, "learning_rate": 8.346004190179198e-05, "loss": 27.4152, "step": 13 }, { "epoch": 1.0442890442890442, "grad_norm": 11.388605117797852, "learning_rate": 8.056291360290201e-05, "loss": 24.0213, "step": 14 }, { "epoch": 1.118881118881119, "grad_norm": 13.905165672302246, "learning_rate": 7.75e-05, "loss": 24.7093, "step": 15 }, { "epoch": 1.1934731934731935, "grad_norm": 14.038837432861328, "learning_rate": 7.429116526313744e-05, "loss": 23.8335, "step": 16 }, { "epoch": 1.2680652680652682, "grad_norm": 14.542243957519531, "learning_rate": 7.095721991691411e-05, "loss": 23.6019, "step": 17 }, { "epoch": 1.3426573426573427, "grad_norm": 11.928253173828125, "learning_rate": 6.751978587624037e-05, "loss": 23.773, "step": 18 }, { "epoch": 1.4172494172494172, "grad_norm": 13.130093574523926, "learning_rate": 6.400115621992201e-05, "loss": 23.8141, "step": 19 }, { "epoch": 1.491841491841492, "grad_norm": 15.3530855178833, "learning_rate": 6.042415061148954e-05, "loss": 23.2327, "step": 20 }, { "epoch": 1.5664335664335665, "grad_norm": 10.706517219543457, "learning_rate": 5.681196730492368e-05, "loss": 23.0154, "step": 21 }, { "epoch": 1.641025641025641, "grad_norm": 12.62425708770752, "learning_rate": 5.318803269507634e-05, "loss": 22.7153, "step": 22 }, { "epoch": 1.7156177156177157, "grad_norm": 15.199002265930176, "learning_rate": 4.9575849388510473e-05, "loss": 23.8232, "step": 23 }, { "epoch": 1.7902097902097902, "grad_norm": 10.122224807739258, "learning_rate": 4.599884378007802e-05, "loss": 21.8521, "step": 24 }, { "epoch": 1.8648018648018647, "grad_norm": 12.264386177062988, "learning_rate": 4.248021412375963e-05, "loss": 23.36, "step": 25 }, { "epoch": 1.8648018648018647, "eval_loss": 0.7650135159492493, "eval_runtime": 4.3787, "eval_samples_per_second": 11.419, "eval_steps_per_second": 2.969, "step": 25 }, { "epoch": 1.9393939393939394, "grad_norm": 13.958155632019043, "learning_rate": 3.904278008308589e-05, "loss": 23.0799, "step": 26 }, { "epoch": 2.013986013986014, "grad_norm": 13.080740928649902, "learning_rate": 3.570883473686256e-05, "loss": 22.7616, "step": 27 }, { "epoch": 2.0885780885780885, "grad_norm": 9.918713569641113, "learning_rate": 3.250000000000001e-05, "loss": 22.2095, "step": 28 }, { "epoch": 2.163170163170163, "grad_norm": 12.069844245910645, "learning_rate": 2.9437086397097995e-05, "loss": 21.0234, "step": 29 }, { "epoch": 2.237762237762238, "grad_norm": 14.520157814025879, "learning_rate": 2.6539958098208027e-05, "loss": 21.4936, "step": 30 }, { "epoch": 2.312354312354312, "grad_norm": 9.91746711730957, "learning_rate": 2.3827404092068032e-05, "loss": 20.1627, "step": 31 }, { "epoch": 2.386946386946387, "grad_norm": 11.826277732849121, "learning_rate": 2.1317016332300447e-05, "loss": 21.355, "step": 32 }, { "epoch": 2.4615384615384617, "grad_norm": 13.523852348327637, "learning_rate": 1.902507564684246e-05, "loss": 20.8342, "step": 33 }, { "epoch": 2.5361305361305364, "grad_norm": 11.003284454345703, "learning_rate": 1.6966446150529244e-05, "loss": 20.2875, "step": 34 }, { "epoch": 2.6107226107226107, "grad_norm": 11.648452758789062, "learning_rate": 1.515447884560556e-05, "loss": 21.3661, "step": 35 }, { "epoch": 2.6853146853146854, "grad_norm": 13.226274490356445, "learning_rate": 1.3600925035352913e-05, "loss": 20.4063, "step": 36 }, { "epoch": 2.7599067599067597, "grad_norm": 12.421295166015625, "learning_rate": 1.2315860112378455e-05, "loss": 20.5653, "step": 37 }, { "epoch": 2.8344988344988344, "grad_norm": 11.139906883239746, "learning_rate": 1.130761821582766e-05, "loss": 21.522, "step": 38 }, { "epoch": 2.909090909090909, "grad_norm": 12.317776679992676, "learning_rate": 1.0582738181293923e-05, "loss": 21.0642, "step": 39 }, { "epoch": 2.983682983682984, "grad_norm": 13.74180793762207, "learning_rate": 1.0145921133960554e-05, "loss": 20.5685, "step": 40 }, { "epoch": 3.058275058275058, "grad_norm": 10.381887435913086, "learning_rate": 1e-05, "loss": 21.0001, "step": 41 } ], "logging_steps": 1, "max_steps": 41, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.63958278020268e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }