|
{ |
|
"best_metric": 2.626811981201172, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.09017132551848513, |
|
"eval_steps": 5, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0036068530207394047, |
|
"grad_norm": 0.9312214851379395, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 11.1103, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0036068530207394047, |
|
"eval_loss": 2.680246353149414, |
|
"eval_runtime": 12.0869, |
|
"eval_samples_per_second": 9.68, |
|
"eval_steps_per_second": 4.881, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007213706041478809, |
|
"grad_norm": 0.920833170413971, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 10.8111, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.010820559062218215, |
|
"grad_norm": 0.9480271339416504, |
|
"learning_rate": 6e-06, |
|
"loss": 10.6508, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.014427412082957619, |
|
"grad_norm": 0.9571245312690735, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 10.7966, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.018034265103697024, |
|
"grad_norm": 0.9208737015724182, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9468, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018034265103697024, |
|
"eval_loss": 2.6797571182250977, |
|
"eval_runtime": 12.0851, |
|
"eval_samples_per_second": 9.681, |
|
"eval_steps_per_second": 4.882, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02164111812443643, |
|
"grad_norm": 0.9306275844573975, |
|
"learning_rate": 1.2e-05, |
|
"loss": 10.6401, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.025247971145175834, |
|
"grad_norm": 0.9370696544647217, |
|
"learning_rate": 1.4e-05, |
|
"loss": 10.6417, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.028854824165915238, |
|
"grad_norm": 1.055396318435669, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 10.7145, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.032461677186654644, |
|
"grad_norm": 1.0336787700653076, |
|
"learning_rate": 1.8e-05, |
|
"loss": 10.7719, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03606853020739405, |
|
"grad_norm": 1.0623098611831665, |
|
"learning_rate": 2e-05, |
|
"loss": 10.8959, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03606853020739405, |
|
"eval_loss": 2.6718361377716064, |
|
"eval_runtime": 12.0963, |
|
"eval_samples_per_second": 9.672, |
|
"eval_steps_per_second": 4.878, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03967538322813345, |
|
"grad_norm": 1.1591328382492065, |
|
"learning_rate": 1.9781476007338058e-05, |
|
"loss": 10.7372, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04328223624887286, |
|
"grad_norm": 1.1925017833709717, |
|
"learning_rate": 1.913545457642601e-05, |
|
"loss": 10.6267, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.046889089269612265, |
|
"grad_norm": 1.2597171068191528, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 10.8772, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05049594229035167, |
|
"grad_norm": 1.3097079992294312, |
|
"learning_rate": 1.6691306063588583e-05, |
|
"loss": 10.6183, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05410279531109107, |
|
"grad_norm": 1.390768051147461, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 10.5317, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05410279531109107, |
|
"eval_loss": 2.6502935886383057, |
|
"eval_runtime": 12.1513, |
|
"eval_samples_per_second": 9.629, |
|
"eval_steps_per_second": 4.855, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.057709648331830475, |
|
"grad_norm": 1.3049812316894531, |
|
"learning_rate": 1.3090169943749475e-05, |
|
"loss": 10.6775, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.061316501352569885, |
|
"grad_norm": 1.3082205057144165, |
|
"learning_rate": 1.1045284632676535e-05, |
|
"loss": 10.4822, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06492335437330929, |
|
"grad_norm": 1.460840106010437, |
|
"learning_rate": 8.954715367323468e-06, |
|
"loss": 10.7828, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06853020739404869, |
|
"grad_norm": 1.2913298606872559, |
|
"learning_rate": 6.909830056250527e-06, |
|
"loss": 10.4023, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0721370604147881, |
|
"grad_norm": 1.4784389734268188, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 10.6093, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0721370604147881, |
|
"eval_loss": 2.63152813911438, |
|
"eval_runtime": 12.0811, |
|
"eval_samples_per_second": 9.685, |
|
"eval_steps_per_second": 4.884, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0757439134355275, |
|
"grad_norm": 1.4882969856262207, |
|
"learning_rate": 3.308693936411421e-06, |
|
"loss": 10.718, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0793507664562669, |
|
"grad_norm": 1.4357144832611084, |
|
"learning_rate": 1.9098300562505266e-06, |
|
"loss": 10.5072, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0829576194770063, |
|
"grad_norm": 1.4908266067504883, |
|
"learning_rate": 8.645454235739903e-07, |
|
"loss": 10.5149, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08656447249774572, |
|
"grad_norm": 1.411119818687439, |
|
"learning_rate": 2.1852399266194312e-07, |
|
"loss": 10.5278, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09017132551848513, |
|
"grad_norm": 1.4688230752944946, |
|
"learning_rate": 0.0, |
|
"loss": 10.4999, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09017132551848513, |
|
"eval_loss": 2.626811981201172, |
|
"eval_runtime": 12.1576, |
|
"eval_samples_per_second": 9.624, |
|
"eval_steps_per_second": 4.853, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7994716371025920.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|