{ "best_metric": 0.6260796785354614, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.14925373134328357, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004975124378109453, "eval_loss": 0.8322070240974426, "eval_runtime": 6.0506, "eval_samples_per_second": 28.096, "eval_steps_per_second": 7.107, "step": 1 }, { "epoch": 0.014925373134328358, "grad_norm": 9.024953842163086, "learning_rate": 0.00012, "loss": 0.7831, "step": 3 }, { "epoch": 0.024875621890547265, "eval_loss": 0.7414592504501343, "eval_runtime": 6.0376, "eval_samples_per_second": 28.157, "eval_steps_per_second": 7.122, "step": 5 }, { "epoch": 0.029850746268656716, "grad_norm": 0.4759524464607239, "learning_rate": 0.0001992114701314478, "loss": 0.7473, "step": 6 }, { "epoch": 0.04477611940298507, "grad_norm": 0.4512733221054077, "learning_rate": 0.00018763066800438636, "loss": 0.6839, "step": 9 }, { "epoch": 0.04975124378109453, "eval_loss": 0.6773250102996826, "eval_runtime": 6.04, "eval_samples_per_second": 28.146, "eval_steps_per_second": 7.119, "step": 10 }, { "epoch": 0.05970149253731343, "grad_norm": 0.5345317721366882, "learning_rate": 0.000163742398974869, "loss": 0.6818, "step": 12 }, { "epoch": 0.07462686567164178, "grad_norm": 0.44765985012054443, "learning_rate": 0.00013090169943749476, "loss": 0.6346, "step": 15 }, { "epoch": 0.07462686567164178, "eval_loss": 0.6496699452400208, "eval_runtime": 6.0382, "eval_samples_per_second": 28.154, "eval_steps_per_second": 7.121, "step": 15 }, { "epoch": 0.08955223880597014, "grad_norm": 0.4275529086589813, "learning_rate": 9.372094804706867e-05, "loss": 0.6198, "step": 18 }, { "epoch": 0.09950248756218906, "eval_loss": 0.6332871913909912, "eval_runtime": 6.0316, "eval_samples_per_second": 28.185, "eval_steps_per_second": 7.129, "step": 20 }, { "epoch": 0.1044776119402985, "grad_norm": 0.4247596859931946, "learning_rate": 5.7422070843492734e-05, "loss": 0.6587, "step": 21 }, { "epoch": 0.11940298507462686, "grad_norm": 0.4106825590133667, "learning_rate": 2.7103137257858868e-05, "loss": 0.6146, "step": 24 }, { "epoch": 0.12437810945273632, "eval_loss": 0.6272332668304443, "eval_runtime": 6.0465, "eval_samples_per_second": 28.116, "eval_steps_per_second": 7.112, "step": 25 }, { "epoch": 0.13432835820895522, "grad_norm": 0.4225139617919922, "learning_rate": 7.022351411174866e-06, "loss": 0.6011, "step": 27 }, { "epoch": 0.14925373134328357, "grad_norm": 0.43210238218307495, "learning_rate": 0.0, "loss": 0.6208, "step": 30 }, { "epoch": 0.14925373134328357, "eval_loss": 0.6260796785354614, "eval_runtime": 6.0498, "eval_samples_per_second": 28.1, "eval_steps_per_second": 7.108, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6082054657671168.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }