{ "best_metric": 0.43841221928596497, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 1.0023310023310024, "eval_steps": 50, "global_step": 215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004662004662004662, "eval_loss": 0.7121074199676514, "eval_runtime": 7.0933, "eval_samples_per_second": 12.829, "eval_steps_per_second": 3.242, "step": 1 }, { "epoch": 0.046620046620046623, "grad_norm": 0.5039583444595337, "learning_rate": 4.22e-05, "loss": 0.6299, "step": 10 }, { "epoch": 0.09324009324009325, "grad_norm": 0.983407199382782, "learning_rate": 8.44e-05, "loss": 0.6324, "step": 20 }, { "epoch": 0.13986013986013987, "grad_norm": 0.9202139377593994, "learning_rate": 0.0001266, "loss": 0.5562, "step": 30 }, { "epoch": 0.1864801864801865, "grad_norm": 1.4665826559066772, "learning_rate": 0.0001688, "loss": 0.5768, "step": 40 }, { "epoch": 0.2331002331002331, "grad_norm": 2.6910219192504883, "learning_rate": 0.000211, "loss": 0.5501, "step": 50 }, { "epoch": 0.2331002331002331, "eval_loss": 0.5634785890579224, "eval_runtime": 7.092, "eval_samples_per_second": 12.831, "eval_steps_per_second": 3.243, "step": 50 }, { "epoch": 0.27972027972027974, "grad_norm": 0.5147734880447388, "learning_rate": 0.00020909347756121555, "loss": 0.6062, "step": 60 }, { "epoch": 0.32634032634032634, "grad_norm": 0.5392813682556152, "learning_rate": 0.00020344281693319567, "loss": 0.5125, "step": 70 }, { "epoch": 0.372960372960373, "grad_norm": 0.8237327337265015, "learning_rate": 0.00019425224771368963, "loss": 0.4912, "step": 80 }, { "epoch": 0.4195804195804196, "grad_norm": 0.8676568269729614, "learning_rate": 0.0001818539410200849, "loss": 0.4504, "step": 90 }, { "epoch": 0.4662004662004662, "grad_norm": 2.122711181640625, "learning_rate": 0.00016669600395976142, "loss": 0.4019, "step": 100 }, { "epoch": 0.4662004662004662, "eval_loss": 0.5337139368057251, "eval_runtime": 7.0219, "eval_samples_per_second": 12.959, "eval_steps_per_second": 3.275, "step": 100 }, { "epoch": 0.5128205128205128, "grad_norm": 0.6717955470085144, "learning_rate": 0.00014932628387169904, "loss": 0.473, "step": 110 }, { "epoch": 0.5594405594405595, "grad_norm": 0.6870163083076477, "learning_rate": 0.0001303725676962446, "loss": 0.482, "step": 120 }, { "epoch": 0.6060606060606061, "grad_norm": 0.6760181784629822, "learning_rate": 0.00011051989211940484, "loss": 0.4559, "step": 130 }, { "epoch": 0.6526806526806527, "grad_norm": 0.966548502445221, "learning_rate": 9.048578456216843e-05, "loss": 0.4025, "step": 140 }, { "epoch": 0.6993006993006993, "grad_norm": 3.640005588531494, "learning_rate": 7.099432987001202e-05, "loss": 0.4958, "step": 150 }, { "epoch": 0.6993006993006993, "eval_loss": 0.4841608703136444, "eval_runtime": 7.1179, "eval_samples_per_second": 12.785, "eval_steps_per_second": 3.231, "step": 150 }, { "epoch": 0.745920745920746, "grad_norm": 0.4770185649394989, "learning_rate": 5.275000000000002e-05, "loss": 0.4045, "step": 160 }, { "epoch": 0.7925407925407926, "grad_norm": 0.7207168340682983, "learning_rate": 3.6412192568772434e-05, "loss": 0.4739, "step": 170 }, { "epoch": 0.8391608391608392, "grad_norm": 0.7188816666603088, "learning_rate": 2.257139850463594e-05, "loss": 0.4556, "step": 180 }, { "epoch": 0.8857808857808858, "grad_norm": 0.9799768328666687, "learning_rate": 1.1727860166905585e-05, "loss": 0.5001, "step": 190 }, { "epoch": 0.9324009324009324, "grad_norm": 2.884735584259033, "learning_rate": 4.27349128367054e-06, "loss": 0.5498, "step": 200 }, { "epoch": 0.9324009324009324, "eval_loss": 0.43841221928596497, "eval_runtime": 6.931, "eval_samples_per_second": 13.129, "eval_steps_per_second": 3.318, "step": 200 }, { "epoch": 0.9790209790209791, "grad_norm": 0.7910031676292419, "learning_rate": 4.777121685395759e-07, "loss": 0.4308, "step": 210 } ], "logging_steps": 10, "max_steps": 215, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.789784787340493e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }