{ "best_metric": 0.8158317804336548, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.39940089865202194, "eval_steps": 25, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01597603594608088, "grad_norm": 3.754645824432373, "learning_rate": 5e-05, "loss": 3.8213, "step": 1 }, { "epoch": 0.01597603594608088, "eval_loss": 4.774369239807129, "eval_runtime": 0.8733, "eval_samples_per_second": 57.257, "eval_steps_per_second": 14.887, "step": 1 }, { "epoch": 0.03195207189216176, "grad_norm": 4.066346645355225, "learning_rate": 0.0001, "loss": 4.008, "step": 2 }, { "epoch": 0.04792810783824263, "grad_norm": 3.6833364963531494, "learning_rate": 9.958086757163489e-05, "loss": 3.9832, "step": 3 }, { "epoch": 0.06390414378432352, "grad_norm": 7.125794887542725, "learning_rate": 9.833127793065098e-05, "loss": 3.95, "step": 4 }, { "epoch": 0.0798801797304044, "grad_norm": 7.377561569213867, "learning_rate": 9.627450856774539e-05, "loss": 3.2631, "step": 5 }, { "epoch": 0.09585621567648527, "grad_norm": 7.788794040679932, "learning_rate": 9.3448873204592e-05, "loss": 2.3484, "step": 6 }, { "epoch": 0.11183225162256615, "grad_norm": 5.947895050048828, "learning_rate": 8.990700808169889e-05, "loss": 2.084, "step": 7 }, { "epoch": 0.12780828756864704, "grad_norm": 6.4907050132751465, "learning_rate": 8.571489144483944e-05, "loss": 2.8501, "step": 8 }, { "epoch": 0.14378432351472792, "grad_norm": 8.412772178649902, "learning_rate": 8.095061449516903e-05, "loss": 3.5651, "step": 9 }, { "epoch": 0.1597603594608088, "grad_norm": 10.891843795776367, "learning_rate": 7.570292669790186e-05, "loss": 3.5416, "step": 10 }, { "epoch": 0.17573639540688968, "grad_norm": 11.607494354248047, "learning_rate": 7.006958254769438e-05, "loss": 2.8788, "step": 11 }, { "epoch": 0.19171243135297053, "grad_norm": 9.564980506896973, "learning_rate": 6.415552058736854e-05, "loss": 1.7524, "step": 12 }, { "epoch": 0.2076884672990514, "grad_norm": 6.569855690002441, "learning_rate": 5.80709086014102e-05, "loss": 1.7455, "step": 13 }, { "epoch": 0.2236645032451323, "grad_norm": 2.3155879974365234, "learning_rate": 5.192909139858981e-05, "loss": 1.5254, "step": 14 }, { "epoch": 0.23964053919121317, "grad_norm": 2.168813943862915, "learning_rate": 4.584447941263149e-05, "loss": 1.5916, "step": 15 }, { "epoch": 0.2556165751372941, "grad_norm": 2.051868438720703, "learning_rate": 3.9930417452305626e-05, "loss": 1.2706, "step": 16 }, { "epoch": 0.27159261108337496, "grad_norm": 3.016422986984253, "learning_rate": 3.4297073302098156e-05, "loss": 0.7138, "step": 17 }, { "epoch": 0.28756864702945584, "grad_norm": 3.377518653869629, "learning_rate": 2.9049385504830985e-05, "loss": 0.5013, "step": 18 }, { "epoch": 0.3035446829755367, "grad_norm": 5.198056221008301, "learning_rate": 2.4285108555160577e-05, "loss": 0.3902, "step": 19 }, { "epoch": 0.3195207189216176, "grad_norm": 5.946393013000488, "learning_rate": 2.0092991918301108e-05, "loss": 0.6247, "step": 20 }, { "epoch": 0.3354967548676985, "grad_norm": 4.9002532958984375, "learning_rate": 1.6551126795408016e-05, "loss": 1.6558, "step": 21 }, { "epoch": 0.35147279081377936, "grad_norm": 7.489260673522949, "learning_rate": 1.3725491432254624e-05, "loss": 2.4167, "step": 22 }, { "epoch": 0.36744882675986024, "grad_norm": 8.54692554473877, "learning_rate": 1.1668722069349041e-05, "loss": 1.8753, "step": 23 }, { "epoch": 0.38342486270594106, "grad_norm": 7.267559051513672, "learning_rate": 1.0419132428365116e-05, "loss": 1.7638, "step": 24 }, { "epoch": 0.39940089865202194, "grad_norm": 7.053504467010498, "learning_rate": 1e-05, "loss": 0.5409, "step": 25 }, { "epoch": 0.39940089865202194, "eval_loss": 0.8158317804336548, "eval_runtime": 0.8858, "eval_samples_per_second": 56.445, "eval_steps_per_second": 14.676, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.172290816212992e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }