{ "best_metric": 0.005342242307960987, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.08090614886731391, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004045307443365696, "eval_loss": 0.49801015853881836, "eval_runtime": 43.3011, "eval_samples_per_second": 18.037, "eval_steps_per_second": 4.526, "step": 1 }, { "epoch": 0.0040453074433656954, "grad_norm": 1.0355167388916016, "learning_rate": 5.05e-06, "loss": 0.2311, "step": 10 }, { "epoch": 0.008090614886731391, "grad_norm": 1.5257176160812378, "learning_rate": 1.01e-05, "loss": 0.199, "step": 20 }, { "epoch": 0.012135922330097087, "grad_norm": 0.5611529350280762, "learning_rate": 9.538888888888889e-06, "loss": 0.0588, "step": 30 }, { "epoch": 0.016181229773462782, "grad_norm": 0.027493484318256378, "learning_rate": 8.977777777777778e-06, "loss": 0.0085, "step": 40 }, { "epoch": 0.020226537216828478, "grad_norm": 8.812362670898438, "learning_rate": 8.416666666666667e-06, "loss": 0.0486, "step": 50 }, { "epoch": 0.020226537216828478, "eval_loss": 0.008491799235343933, "eval_runtime": 43.3298, "eval_samples_per_second": 18.025, "eval_steps_per_second": 4.523, "step": 50 }, { "epoch": 0.024271844660194174, "grad_norm": 0.9959584474563599, "learning_rate": 7.855555555555556e-06, "loss": 0.0005, "step": 60 }, { "epoch": 0.02831715210355987, "grad_norm": 0.14248639345169067, "learning_rate": 7.294444444444444e-06, "loss": 0.0035, "step": 70 }, { "epoch": 0.032362459546925564, "grad_norm": 0.0067140571773052216, "learning_rate": 6.733333333333333e-06, "loss": 0.001, "step": 80 }, { "epoch": 0.03640776699029126, "grad_norm": 0.009364698082208633, "learning_rate": 6.172222222222223e-06, "loss": 0.0003, "step": 90 }, { "epoch": 0.040453074433656956, "grad_norm": 0.013642555102705956, "learning_rate": 5.611111111111111e-06, "loss": 0.0023, "step": 100 }, { "epoch": 0.040453074433656956, "eval_loss": 0.005722765810787678, "eval_runtime": 43.3568, "eval_samples_per_second": 18.013, "eval_steps_per_second": 4.521, "step": 100 }, { "epoch": 0.04449838187702265, "grad_norm": 0.04571974277496338, "learning_rate": 5.05e-06, "loss": 0.0002, "step": 110 }, { "epoch": 0.04854368932038835, "grad_norm": 0.01307100709527731, "learning_rate": 4.488888888888889e-06, "loss": 0.0007, "step": 120 }, { "epoch": 0.052588996763754045, "grad_norm": 0.43846479058265686, "learning_rate": 3.927777777777778e-06, "loss": 0.0027, "step": 130 }, { "epoch": 0.05663430420711974, "grad_norm": 0.002617649268358946, "learning_rate": 3.3666666666666665e-06, "loss": 0.0006, "step": 140 }, { "epoch": 0.06067961165048544, "grad_norm": 1.0591951608657837, "learning_rate": 2.8055555555555555e-06, "loss": 0.0014, "step": 150 }, { "epoch": 0.06067961165048544, "eval_loss": 0.005342242307960987, "eval_runtime": 43.2869, "eval_samples_per_second": 18.042, "eval_steps_per_second": 4.528, "step": 150 }, { "epoch": 0.06472491909385113, "grad_norm": 0.0019265917362645268, "learning_rate": 2.2444444444444445e-06, "loss": 0.0011, "step": 160 }, { "epoch": 0.06877022653721683, "grad_norm": 0.004902382381260395, "learning_rate": 1.6833333333333332e-06, "loss": 0.0, "step": 170 }, { "epoch": 0.07281553398058252, "grad_norm": 0.012828897684812546, "learning_rate": 1.1222222222222222e-06, "loss": 0.0001, "step": 180 }, { "epoch": 0.07686084142394822, "grad_norm": 0.03425660356879234, "learning_rate": 5.611111111111111e-07, "loss": 0.0035, "step": 190 }, { "epoch": 0.08090614886731391, "grad_norm": 0.12202399224042892, "learning_rate": 0.0, "loss": 0.0148, "step": 200 }, { "epoch": 0.08090614886731391, "eval_loss": 0.005512339062988758, "eval_runtime": 43.2377, "eval_samples_per_second": 18.063, "eval_steps_per_second": 4.533, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.685210549898445e+16, "train_batch_size": 6, "trial_name": null, "trial_params": null }