|
{ |
|
"best_metric": 1.5005075931549072, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 1.002169197396963, |
|
"eval_steps": 50, |
|
"global_step": 231, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004338394793926247, |
|
"eval_loss": 2.287693500518799, |
|
"eval_runtime": 2.3715, |
|
"eval_samples_per_second": 41.323, |
|
"eval_steps_per_second": 10.542, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04338394793926247, |
|
"grad_norm": 0.5313376784324646, |
|
"learning_rate": 4.12e-05, |
|
"loss": 1.8587, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08676789587852494, |
|
"grad_norm": 0.5186929702758789, |
|
"learning_rate": 8.24e-05, |
|
"loss": 2.0639, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1301518438177874, |
|
"grad_norm": 0.7352866530418396, |
|
"learning_rate": 0.0001236, |
|
"loss": 2.0921, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1735357917570499, |
|
"grad_norm": 0.8231748342514038, |
|
"learning_rate": 0.0001648, |
|
"loss": 1.8906, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21691973969631237, |
|
"grad_norm": 1.9571149349212646, |
|
"learning_rate": 0.000206, |
|
"loss": 1.9955, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21691973969631237, |
|
"eval_loss": 1.9748687744140625, |
|
"eval_runtime": 2.3417, |
|
"eval_samples_per_second": 41.85, |
|
"eval_steps_per_second": 10.676, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2603036876355748, |
|
"grad_norm": 0.42307645082473755, |
|
"learning_rate": 0.00020445239810895846, |
|
"loss": 1.7835, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3036876355748373, |
|
"grad_norm": 0.465277761220932, |
|
"learning_rate": 0.00019985609868074957, |
|
"loss": 1.7309, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3470715835140998, |
|
"grad_norm": 0.6410661935806274, |
|
"learning_rate": 0.0001923492229131769, |
|
"loss": 1.6651, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.39045553145336226, |
|
"grad_norm": 0.6676890850067139, |
|
"learning_rate": 0.0001821573563428174, |
|
"loss": 1.7176, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.43383947939262474, |
|
"grad_norm": 2.6857898235321045, |
|
"learning_rate": 0.00016958676988185618, |
|
"loss": 1.8401, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.43383947939262474, |
|
"eval_loss": 1.6733328104019165, |
|
"eval_runtime": 2.3671, |
|
"eval_samples_per_second": 41.402, |
|
"eval_steps_per_second": 10.562, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4772234273318872, |
|
"grad_norm": 0.4030924141407013, |
|
"learning_rate": 0.00015501521621725395, |
|
"loss": 1.5976, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5206073752711496, |
|
"grad_norm": 0.4995245337486267, |
|
"learning_rate": 0.00013888057814525847, |
|
"loss": 1.5292, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5639913232104121, |
|
"grad_norm": 0.6390402913093567, |
|
"learning_rate": 0.00012166770996473358, |
|
"loss": 1.5418, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6073752711496746, |
|
"grad_norm": 0.6645438075065613, |
|
"learning_rate": 0.0001038938673523089, |
|
"loss": 1.6084, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6507592190889371, |
|
"grad_norm": 2.0902185440063477, |
|
"learning_rate": 8.609316355920905e-05, |
|
"loss": 1.7155, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6507592190889371, |
|
"eval_loss": 1.5498733520507812, |
|
"eval_runtime": 2.3415, |
|
"eval_samples_per_second": 41.853, |
|
"eval_steps_per_second": 10.677, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6941431670281996, |
|
"grad_norm": 0.42309829592704773, |
|
"learning_rate": 6.880051902916351e-05, |
|
"loss": 1.5078, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.737527114967462, |
|
"grad_norm": 0.4995267391204834, |
|
"learning_rate": 5.25355867597608e-05, |
|
"loss": 1.4928, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7809110629067245, |
|
"grad_norm": 0.5781512260437012, |
|
"learning_rate": 3.778713645853078e-05, |
|
"loss": 1.4352, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.824295010845987, |
|
"grad_norm": 0.728786289691925, |
|
"learning_rate": 2.4998366758019425e-05, |
|
"loss": 1.4567, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8676789587852495, |
|
"grad_norm": 2.4118869304656982, |
|
"learning_rate": 1.4553586865461783e-05, |
|
"loss": 1.8346, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8676789587852495, |
|
"eval_loss": 1.5005075931549072, |
|
"eval_runtime": 2.4042, |
|
"eval_samples_per_second": 40.762, |
|
"eval_steps_per_second": 10.398, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.911062906724512, |
|
"grad_norm": 0.4956224858760834, |
|
"learning_rate": 6.7666678704352e-06, |
|
"loss": 1.4672, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9544468546637744, |
|
"grad_norm": 0.6833217144012451, |
|
"learning_rate": 1.8716107547278716e-06, |
|
"loss": 1.419, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9978308026030369, |
|
"grad_norm": 1.7939646244049072, |
|
"learning_rate": 1.5514540650944263e-08, |
|
"loss": 1.592, |
|
"step": 230 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 231, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2171209892167680.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|