{ "best_metric": 1.6733328104019165, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.43383947939262474, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004338394793926247, "eval_loss": 2.287693500518799, "eval_runtime": 2.3715, "eval_samples_per_second": 41.323, "eval_steps_per_second": 10.542, "step": 1 }, { "epoch": 0.04338394793926247, "grad_norm": 0.5313376784324646, "learning_rate": 4.12e-05, "loss": 1.8587, "step": 10 }, { "epoch": 0.08676789587852494, "grad_norm": 0.5186929702758789, "learning_rate": 8.24e-05, "loss": 2.0639, "step": 20 }, { "epoch": 0.1301518438177874, "grad_norm": 0.7352866530418396, "learning_rate": 0.0001236, "loss": 2.0921, "step": 30 }, { "epoch": 0.1735357917570499, "grad_norm": 0.8231748342514038, "learning_rate": 0.0001648, "loss": 1.8906, "step": 40 }, { "epoch": 0.21691973969631237, "grad_norm": 1.9571149349212646, "learning_rate": 0.000206, "loss": 1.9955, "step": 50 }, { "epoch": 0.21691973969631237, "eval_loss": 1.9748687744140625, "eval_runtime": 2.3417, "eval_samples_per_second": 41.85, "eval_steps_per_second": 10.676, "step": 50 }, { "epoch": 0.2603036876355748, "grad_norm": 0.42307645082473755, "learning_rate": 0.00020445239810895846, "loss": 1.7835, "step": 60 }, { "epoch": 0.3036876355748373, "grad_norm": 0.465277761220932, "learning_rate": 0.00019985609868074957, "loss": 1.7309, "step": 70 }, { "epoch": 0.3470715835140998, "grad_norm": 0.6410661935806274, "learning_rate": 0.0001923492229131769, "loss": 1.6651, "step": 80 }, { "epoch": 0.39045553145336226, "grad_norm": 0.6676890850067139, "learning_rate": 0.0001821573563428174, "loss": 1.7176, "step": 90 }, { "epoch": 0.43383947939262474, "grad_norm": 2.6857898235321045, "learning_rate": 0.00016958676988185618, "loss": 1.8401, "step": 100 }, { "epoch": 0.43383947939262474, "eval_loss": 1.6733328104019165, "eval_runtime": 2.3671, "eval_samples_per_second": 41.402, "eval_steps_per_second": 10.562, "step": 100 } ], "logging_steps": 10, "max_steps": 231, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 927298737930240.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }