{ "best_metric": 1.5005075931549072, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 1.002169197396963, "eval_steps": 50, "global_step": 231, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004338394793926247, "eval_loss": 2.287693500518799, "eval_runtime": 2.3715, "eval_samples_per_second": 41.323, "eval_steps_per_second": 10.542, "step": 1 }, { "epoch": 0.04338394793926247, "grad_norm": 0.5313376784324646, "learning_rate": 4.12e-05, "loss": 1.8587, "step": 10 }, { "epoch": 0.08676789587852494, "grad_norm": 0.5186929702758789, "learning_rate": 8.24e-05, "loss": 2.0639, "step": 20 }, { "epoch": 0.1301518438177874, "grad_norm": 0.7352866530418396, "learning_rate": 0.0001236, "loss": 2.0921, "step": 30 }, { "epoch": 0.1735357917570499, "grad_norm": 0.8231748342514038, "learning_rate": 0.0001648, "loss": 1.8906, "step": 40 }, { "epoch": 0.21691973969631237, "grad_norm": 1.9571149349212646, "learning_rate": 0.000206, "loss": 1.9955, "step": 50 }, { "epoch": 0.21691973969631237, "eval_loss": 1.9748687744140625, "eval_runtime": 2.3417, "eval_samples_per_second": 41.85, "eval_steps_per_second": 10.676, "step": 50 }, { "epoch": 0.2603036876355748, "grad_norm": 0.42307645082473755, "learning_rate": 0.00020445239810895846, "loss": 1.7835, "step": 60 }, { "epoch": 0.3036876355748373, "grad_norm": 0.465277761220932, "learning_rate": 0.00019985609868074957, "loss": 1.7309, "step": 70 }, { "epoch": 0.3470715835140998, "grad_norm": 0.6410661935806274, "learning_rate": 0.0001923492229131769, "loss": 1.6651, "step": 80 }, { "epoch": 0.39045553145336226, "grad_norm": 0.6676890850067139, "learning_rate": 0.0001821573563428174, "loss": 1.7176, "step": 90 }, { "epoch": 0.43383947939262474, "grad_norm": 2.6857898235321045, "learning_rate": 0.00016958676988185618, "loss": 1.8401, "step": 100 }, { "epoch": 0.43383947939262474, "eval_loss": 1.6733328104019165, "eval_runtime": 2.3671, "eval_samples_per_second": 41.402, "eval_steps_per_second": 10.562, "step": 100 }, { "epoch": 0.4772234273318872, "grad_norm": 0.4030924141407013, "learning_rate": 0.00015501521621725395, "loss": 1.5976, "step": 110 }, { "epoch": 0.5206073752711496, "grad_norm": 0.4995245337486267, "learning_rate": 0.00013888057814525847, "loss": 1.5292, "step": 120 }, { "epoch": 0.5639913232104121, "grad_norm": 0.6390402913093567, "learning_rate": 0.00012166770996473358, "loss": 1.5418, "step": 130 }, { "epoch": 0.6073752711496746, "grad_norm": 0.6645438075065613, "learning_rate": 0.0001038938673523089, "loss": 1.6084, "step": 140 }, { "epoch": 0.6507592190889371, "grad_norm": 2.0902185440063477, "learning_rate": 8.609316355920905e-05, "loss": 1.7155, "step": 150 }, { "epoch": 0.6507592190889371, "eval_loss": 1.5498733520507812, "eval_runtime": 2.3415, "eval_samples_per_second": 41.853, "eval_steps_per_second": 10.677, "step": 150 }, { "epoch": 0.6941431670281996, "grad_norm": 0.42309829592704773, "learning_rate": 6.880051902916351e-05, "loss": 1.5078, "step": 160 }, { "epoch": 0.737527114967462, "grad_norm": 0.4995267391204834, "learning_rate": 5.25355867597608e-05, "loss": 1.4928, "step": 170 }, { "epoch": 0.7809110629067245, "grad_norm": 0.5781512260437012, "learning_rate": 3.778713645853078e-05, "loss": 1.4352, "step": 180 }, { "epoch": 0.824295010845987, "grad_norm": 0.728786289691925, "learning_rate": 2.4998366758019425e-05, "loss": 1.4567, "step": 190 }, { "epoch": 0.8676789587852495, "grad_norm": 2.4118869304656982, "learning_rate": 1.4553586865461783e-05, "loss": 1.8346, "step": 200 }, { "epoch": 0.8676789587852495, "eval_loss": 1.5005075931549072, "eval_runtime": 2.4042, "eval_samples_per_second": 40.762, "eval_steps_per_second": 10.398, "step": 200 }, { "epoch": 0.911062906724512, "grad_norm": 0.4956224858760834, "learning_rate": 6.7666678704352e-06, "loss": 1.4672, "step": 210 }, { "epoch": 0.9544468546637744, "grad_norm": 0.6833217144012451, "learning_rate": 1.8716107547278716e-06, "loss": 1.419, "step": 220 }, { "epoch": 0.9978308026030369, "grad_norm": 1.7939646244049072, "learning_rate": 1.5514540650944263e-08, "loss": 1.592, "step": 230 } ], "logging_steps": 10, "max_steps": 231, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2171209892167680.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }