{ "best_metric": 1.8255712985992432, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 1.2345679012345678, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.024691358024691357, "grad_norm": 0.25252068042755127, "learning_rate": 1e-05, "loss": 2.0604, "step": 1 }, { "epoch": 0.024691358024691357, "eval_loss": 2.237061023712158, "eval_runtime": 1.21, "eval_samples_per_second": 57.023, "eval_steps_per_second": 14.876, "step": 1 }, { "epoch": 0.04938271604938271, "grad_norm": 0.31075847148895264, "learning_rate": 2e-05, "loss": 2.2158, "step": 2 }, { "epoch": 0.07407407407407407, "grad_norm": 0.38259977102279663, "learning_rate": 3e-05, "loss": 2.1454, "step": 3 }, { "epoch": 0.09876543209876543, "grad_norm": 0.35990017652511597, "learning_rate": 4e-05, "loss": 2.0522, "step": 4 }, { "epoch": 0.12345679012345678, "grad_norm": 0.4168837070465088, "learning_rate": 5e-05, "loss": 2.3263, "step": 5 }, { "epoch": 0.14814814814814814, "grad_norm": 0.469605028629303, "learning_rate": 6e-05, "loss": 2.2257, "step": 6 }, { "epoch": 0.1728395061728395, "grad_norm": 0.5073450207710266, "learning_rate": 7e-05, "loss": 2.3573, "step": 7 }, { "epoch": 0.19753086419753085, "grad_norm": 0.7278738021850586, "learning_rate": 8e-05, "loss": 2.4981, "step": 8 }, { "epoch": 0.2222222222222222, "grad_norm": 0.8480843305587769, "learning_rate": 9e-05, "loss": 2.4136, "step": 9 }, { "epoch": 0.24691358024691357, "grad_norm": 1.1377860307693481, "learning_rate": 0.0001, "loss": 2.6231, "step": 10 }, { "epoch": 0.2716049382716049, "grad_norm": 0.1992480605840683, "learning_rate": 9.998033131915266e-05, "loss": 2.125, "step": 11 }, { "epoch": 0.2962962962962963, "grad_norm": 0.21675321459770203, "learning_rate": 9.992134075089084e-05, "loss": 2.1632, "step": 12 }, { "epoch": 0.32098765432098764, "grad_norm": 0.2902778387069702, "learning_rate": 9.982307470588098e-05, "loss": 2.079, "step": 13 }, { "epoch": 0.345679012345679, "grad_norm": 0.2948603630065918, "learning_rate": 9.968561049466214e-05, "loss": 2.031, "step": 14 }, { "epoch": 0.37037037037037035, "grad_norm": 0.347444623708725, "learning_rate": 9.950905626682228e-05, "loss": 2.0397, "step": 15 }, { "epoch": 0.3950617283950617, "grad_norm": 0.4035986363887787, "learning_rate": 9.92935509259118e-05, "loss": 2.1259, "step": 16 }, { "epoch": 0.41975308641975306, "grad_norm": 0.4339040517807007, "learning_rate": 9.903926402016153e-05, "loss": 2.1056, "step": 17 }, { "epoch": 0.4444444444444444, "grad_norm": 0.5206056833267212, "learning_rate": 9.874639560909117e-05, "loss": 2.0121, "step": 18 }, { "epoch": 0.4691358024691358, "grad_norm": 0.719115674495697, "learning_rate": 9.841517610611309e-05, "loss": 2.1641, "step": 19 }, { "epoch": 0.49382716049382713, "grad_norm": 0.9554781317710876, "learning_rate": 9.804586609725499e-05, "loss": 2.112, "step": 20 }, { "epoch": 0.5185185185185185, "grad_norm": 0.2317459136247635, "learning_rate": 9.763875613614482e-05, "loss": 2.0239, "step": 21 }, { "epoch": 0.5432098765432098, "grad_norm": 0.3069552481174469, "learning_rate": 9.719416651541839e-05, "loss": 2.0365, "step": 22 }, { "epoch": 0.5679012345679012, "grad_norm": 0.34765249490737915, "learning_rate": 9.671244701472999e-05, "loss": 2.1446, "step": 23 }, { "epoch": 0.5925925925925926, "grad_norm": 0.37877699732780457, "learning_rate": 9.619397662556435e-05, "loss": 2.0267, "step": 24 }, { "epoch": 0.6172839506172839, "grad_norm": 0.4079001843929291, "learning_rate": 9.563916325306594e-05, "loss": 2.0075, "step": 25 }, { "epoch": 0.6419753086419753, "grad_norm": 0.3358095586299896, "learning_rate": 9.504844339512095e-05, "loss": 1.8691, "step": 26 }, { "epoch": 0.6666666666666666, "grad_norm": 0.4075435400009155, "learning_rate": 9.442228179894362e-05, "loss": 2.0415, "step": 27 }, { "epoch": 0.691358024691358, "grad_norm": 0.4543934762477875, "learning_rate": 9.376117109543769e-05, "loss": 1.9039, "step": 28 }, { "epoch": 0.7160493827160493, "grad_norm": 0.5283561944961548, "learning_rate": 9.306563141162046e-05, "loss": 1.8946, "step": 29 }, { "epoch": 0.7407407407407407, "grad_norm": 0.7076103687286377, "learning_rate": 9.233620996141421e-05, "loss": 1.5743, "step": 30 }, { "epoch": 0.7654320987654321, "grad_norm": 0.19869261980056763, "learning_rate": 9.157348061512727e-05, "loss": 1.9074, "step": 31 }, { "epoch": 0.7901234567901234, "grad_norm": 0.2551741302013397, "learning_rate": 9.077804344796302e-05, "loss": 1.9752, "step": 32 }, { "epoch": 0.8148148148148148, "grad_norm": 0.29231002926826477, "learning_rate": 8.995052426791247e-05, "loss": 2.0127, "step": 33 }, { "epoch": 0.8395061728395061, "grad_norm": 0.2705162465572357, "learning_rate": 8.90915741234015e-05, "loss": 1.8734, "step": 34 }, { "epoch": 0.8641975308641975, "grad_norm": 0.30582037568092346, "learning_rate": 8.820186879108038e-05, "loss": 1.7893, "step": 35 }, { "epoch": 0.8888888888888888, "grad_norm": 0.2692418694496155, "learning_rate": 8.728210824415827e-05, "loss": 1.897, "step": 36 }, { "epoch": 0.9135802469135802, "grad_norm": 0.3430037498474121, "learning_rate": 8.633301610170135e-05, "loss": 1.8172, "step": 37 }, { "epoch": 0.9382716049382716, "grad_norm": 0.3440125286579132, "learning_rate": 8.535533905932738e-05, "loss": 1.9525, "step": 38 }, { "epoch": 0.9629629629629629, "grad_norm": 0.46276724338531494, "learning_rate": 8.434984630174509e-05, "loss": 1.8196, "step": 39 }, { "epoch": 0.9876543209876543, "grad_norm": 0.7176722288131714, "learning_rate": 8.33173288976002e-05, "loss": 1.837, "step": 40 }, { "epoch": 1.0123456790123457, "grad_norm": 0.38161012530326843, "learning_rate": 8.225859917710439e-05, "loss": 3.0373, "step": 41 }, { "epoch": 1.037037037037037, "grad_norm": 0.16387499868869781, "learning_rate": 8.117449009293668e-05, "loss": 1.6453, "step": 42 }, { "epoch": 1.0617283950617284, "grad_norm": 0.24855704605579376, "learning_rate": 8.006585456492029e-05, "loss": 2.0729, "step": 43 }, { "epoch": 1.0864197530864197, "grad_norm": 0.2932082712650299, "learning_rate": 7.89335648089903e-05, "loss": 1.991, "step": 44 }, { "epoch": 1.1111111111111112, "grad_norm": 0.2368183583021164, "learning_rate": 7.777851165098012e-05, "loss": 1.6878, "step": 45 }, { "epoch": 1.1358024691358024, "grad_norm": 0.3216198682785034, "learning_rate": 7.660160382576683e-05, "loss": 2.1539, "step": 46 }, { "epoch": 1.1604938271604939, "grad_norm": 0.31032422184944153, "learning_rate": 7.540376726232648e-05, "loss": 1.784, "step": 47 }, { "epoch": 1.1851851851851851, "grad_norm": 0.3157343566417694, "learning_rate": 7.4185944355262e-05, "loss": 1.6648, "step": 48 }, { "epoch": 1.2098765432098766, "grad_norm": 0.4062681794166565, "learning_rate": 7.294909322337689e-05, "loss": 1.8114, "step": 49 }, { "epoch": 1.2345679012345678, "grad_norm": 0.5582627654075623, "learning_rate": 7.169418695587791e-05, "loss": 1.6857, "step": 50 }, { "epoch": 1.2345679012345678, "eval_loss": 1.8255712985992432, "eval_runtime": 1.2107, "eval_samples_per_second": 56.991, "eval_steps_per_second": 14.867, "step": 50 } ], "logging_steps": 1, "max_steps": 122, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3432293245255680.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }