|
{ |
|
"best_metric": 1.7171608209609985, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.05684240443370755, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002842120221685377, |
|
"eval_loss": 2.4652628898620605, |
|
"eval_runtime": 82.1891, |
|
"eval_samples_per_second": 9.016, |
|
"eval_steps_per_second": 2.263, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002842120221685377, |
|
"grad_norm": 2.0982413291931152, |
|
"learning_rate": 4.12e-05, |
|
"loss": 1.8261, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005684240443370754, |
|
"grad_norm": 4.10166597366333, |
|
"learning_rate": 8.24e-05, |
|
"loss": 1.4635, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008526360665056132, |
|
"grad_norm": 1.6615333557128906, |
|
"learning_rate": 0.0001236, |
|
"loss": 1.667, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011368480886741509, |
|
"grad_norm": 1.931045651435852, |
|
"learning_rate": 0.0001648, |
|
"loss": 1.5695, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.014210601108426887, |
|
"grad_norm": 1.6880329847335815, |
|
"learning_rate": 0.000206, |
|
"loss": 1.8497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014210601108426887, |
|
"eval_loss": 1.8087369203567505, |
|
"eval_runtime": 81.6706, |
|
"eval_samples_per_second": 9.073, |
|
"eval_steps_per_second": 2.277, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.017052721330112264, |
|
"grad_norm": 3.674773931503296, |
|
"learning_rate": 0.0002057490971767619, |
|
"loss": 1.7821, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.019894841551797642, |
|
"grad_norm": 3.0321569442749023, |
|
"learning_rate": 0.00020499761108038175, |
|
"loss": 1.6055, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.022736961773483017, |
|
"grad_norm": 2.507596254348755, |
|
"learning_rate": 0.00020374920287558198, |
|
"loss": 1.3591, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.025579081995168396, |
|
"grad_norm": 8.490031242370605, |
|
"learning_rate": 0.00020200995468164684, |
|
"loss": 1.2845, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.028421202216853774, |
|
"grad_norm": 42.760581970214844, |
|
"learning_rate": 0.00019978833994094855, |
|
"loss": 1.8626, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028421202216853774, |
|
"eval_loss": 2.087491750717163, |
|
"eval_runtime": 81.5271, |
|
"eval_samples_per_second": 9.089, |
|
"eval_steps_per_second": 2.281, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03126332243853915, |
|
"grad_norm": 2.0331203937530518, |
|
"learning_rate": 0.00019709518213718787, |
|
"loss": 1.708, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03410544266022453, |
|
"grad_norm": 4.635890007019043, |
|
"learning_rate": 0.00019394360206446948, |
|
"loss": 1.6543, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0369475628819099, |
|
"grad_norm": 2.990798234939575, |
|
"learning_rate": 0.00019034895390411186, |
|
"loss": 1.8625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.039789683103595284, |
|
"grad_norm": 3.2386255264282227, |
|
"learning_rate": 0.0001863287504206196, |
|
"loss": 1.7414, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04263180332528066, |
|
"grad_norm": 1.6220500469207764, |
|
"learning_rate": 0.00018190257764125471, |
|
"loss": 1.7585, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04263180332528066, |
|
"eval_loss": 1.7171608209609985, |
|
"eval_runtime": 82.0601, |
|
"eval_samples_per_second": 9.03, |
|
"eval_steps_per_second": 2.267, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.045473923546966034, |
|
"grad_norm": 1.6705387830734253, |
|
"learning_rate": 0.00017709199943488106, |
|
"loss": 1.806, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.048316043768651416, |
|
"grad_norm": 2.3406643867492676, |
|
"learning_rate": 0.00017192045245496238, |
|
"loss": 1.5212, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05115816399033679, |
|
"grad_norm": 3.6276838779449463, |
|
"learning_rate": 0.00016641313195854277, |
|
"loss": 1.5071, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.054000284212022166, |
|
"grad_norm": 7.468491077423096, |
|
"learning_rate": 0.0001605968690574869, |
|
"loss": 1.0868, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05684240443370755, |
|
"grad_norm": 19.19157600402832, |
|
"learning_rate": 0.0001545, |
|
"loss": 1.503, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05684240443370755, |
|
"eval_loss": 2.506401300430298, |
|
"eval_runtime": 81.7213, |
|
"eval_samples_per_second": 9.067, |
|
"eval_steps_per_second": 2.276, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.482539204490035e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|