|
{ |
|
"best_metric": 1.0434695482254028, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.0021246308453906136, |
|
"eval_steps": 5, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.498523381562454e-05, |
|
"grad_norm": 0.2922225594520569, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2053, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 8.498523381562454e-05, |
|
"eval_loss": 1.3844845294952393, |
|
"eval_runtime": 3381.9851, |
|
"eval_samples_per_second": 1.465, |
|
"eval_steps_per_second": 0.733, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016997046763124907, |
|
"grad_norm": 0.3761480450630188, |
|
"learning_rate": 4e-05, |
|
"loss": 1.3331, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0002549557014468736, |
|
"grad_norm": 0.39635875821113586, |
|
"learning_rate": 6e-05, |
|
"loss": 1.7427, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00033994093526249815, |
|
"grad_norm": 0.3360555171966553, |
|
"learning_rate": 8e-05, |
|
"loss": 1.22, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00042492616907812265, |
|
"grad_norm": 0.3878980875015259, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1324, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00042492616907812265, |
|
"eval_loss": 1.3745969533920288, |
|
"eval_runtime": 1321.6033, |
|
"eval_samples_per_second": 3.749, |
|
"eval_steps_per_second": 1.875, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0005099114028937472, |
|
"grad_norm": 0.40587636828422546, |
|
"learning_rate": 0.00012, |
|
"loss": 1.2451, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005948966367093718, |
|
"grad_norm": 0.4392871558666229, |
|
"learning_rate": 0.00014, |
|
"loss": 1.2641, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0006798818705249963, |
|
"grad_norm": 0.5529392957687378, |
|
"learning_rate": 0.00016, |
|
"loss": 1.04, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0007648671043406208, |
|
"grad_norm": 0.4955708086490631, |
|
"learning_rate": 0.00018, |
|
"loss": 1.1158, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008498523381562453, |
|
"grad_norm": 0.47972533106803894, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2657, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0008498523381562453, |
|
"eval_loss": 1.182800531387329, |
|
"eval_runtime": 1329.7539, |
|
"eval_samples_per_second": 3.726, |
|
"eval_steps_per_second": 1.864, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0009348375719718699, |
|
"grad_norm": 0.5133495330810547, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 1.4981, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0010198228057874943, |
|
"grad_norm": 0.4741702377796173, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 1.0614, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.001104808039603119, |
|
"grad_norm": 0.48592695593833923, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.3289, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0011897932734187436, |
|
"grad_norm": 0.5056785941123962, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 1.0073, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.001274778507234368, |
|
"grad_norm": 0.3255745768547058, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.3241, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001274778507234368, |
|
"eval_loss": 1.0932385921478271, |
|
"eval_runtime": 1319.1764, |
|
"eval_samples_per_second": 3.756, |
|
"eval_steps_per_second": 1.878, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0013597637410499926, |
|
"grad_norm": 0.3560282289981842, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.8673, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.001444748974865617, |
|
"grad_norm": 0.3492855131626129, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 0.9656, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0015297342086812416, |
|
"grad_norm": 0.33957651257514954, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 1.0601, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0016147194424968661, |
|
"grad_norm": 0.3338528871536255, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 1.025, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0016997046763124906, |
|
"grad_norm": 0.3303922116756439, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 1.2887, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0016997046763124906, |
|
"eval_loss": 1.0485574007034302, |
|
"eval_runtime": 2250.8, |
|
"eval_samples_per_second": 2.201, |
|
"eval_steps_per_second": 1.101, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0017846899101281153, |
|
"grad_norm": 0.2990271747112274, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 0.8543, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0018696751439437399, |
|
"grad_norm": 0.27824667096138, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 0.9073, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.001954660377759364, |
|
"grad_norm": 0.39718323945999146, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 1.2813, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0020396456115749887, |
|
"grad_norm": 0.3636144995689392, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 0.831, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0021246308453906136, |
|
"grad_norm": 0.3469758629798889, |
|
"learning_rate": 0.0, |
|
"loss": 0.9989, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0021246308453906136, |
|
"eval_loss": 1.0434695482254028, |
|
"eval_runtime": 2449.2051, |
|
"eval_samples_per_second": 2.023, |
|
"eval_steps_per_second": 1.012, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.511863093297152e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|