|
{ |
|
"best_metric": 1.639454960823059, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.09715812484819043, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00048579062424095217, |
|
"eval_loss": 2.5925121307373047, |
|
"eval_runtime": 51.7124, |
|
"eval_samples_per_second": 16.766, |
|
"eval_steps_per_second": 4.196, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004857906242409522, |
|
"grad_norm": 14.00100326538086, |
|
"learning_rate": 4.2600000000000005e-05, |
|
"loss": 3.0736, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009715812484819043, |
|
"grad_norm": 12.023590087890625, |
|
"learning_rate": 8.520000000000001e-05, |
|
"loss": 2.3164, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.014573718727228565, |
|
"grad_norm": 25.465974807739258, |
|
"learning_rate": 0.0001278, |
|
"loss": 2.5703, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.019431624969638087, |
|
"grad_norm": 14.983630180358887, |
|
"learning_rate": 0.00017040000000000002, |
|
"loss": 2.9312, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.024289531212047608, |
|
"grad_norm": 18.844684600830078, |
|
"learning_rate": 0.000213, |
|
"loss": 3.0475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.024289531212047608, |
|
"eval_loss": 1.639454960823059, |
|
"eval_runtime": 51.7386, |
|
"eval_samples_per_second": 16.757, |
|
"eval_steps_per_second": 4.194, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02914743745445713, |
|
"grad_norm": 9.967033386230469, |
|
"learning_rate": 0.00021274057135267128, |
|
"loss": 2.881, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03400534369686665, |
|
"grad_norm": 13.58944320678711, |
|
"learning_rate": 0.00021196354932097723, |
|
"loss": 2.4333, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03886324993927617, |
|
"grad_norm": 24.828699111938477, |
|
"learning_rate": 0.0002106727194781503, |
|
"loss": 3.1402, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04372115618168569, |
|
"grad_norm": 51.00019454956055, |
|
"learning_rate": 0.00020887437061743096, |
|
"loss": 3.3349, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.048579062424095217, |
|
"grad_norm": 26.764297485351562, |
|
"learning_rate": 0.00020657726411369925, |
|
"loss": 3.5652, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.048579062424095217, |
|
"eval_loss": 2.1433684825897217, |
|
"eval_runtime": 51.7778, |
|
"eval_samples_per_second": 16.745, |
|
"eval_steps_per_second": 4.191, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.053436968666504735, |
|
"grad_norm": 20.774890899658203, |
|
"learning_rate": 0.000203792591238937, |
|
"loss": 3.2651, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05829487490891426, |
|
"grad_norm": 13.955368041992188, |
|
"learning_rate": 0.0002005339186394757, |
|
"loss": 2.3564, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06315278115132378, |
|
"grad_norm": 20.711078643798828, |
|
"learning_rate": 0.00019681712224065936, |
|
"loss": 3.2759, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0680106873937333, |
|
"grad_norm": 20.040428161621094, |
|
"learning_rate": 0.0001926603099009319, |
|
"loss": 3.2425, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07286859363614283, |
|
"grad_norm": 50.84387969970703, |
|
"learning_rate": 0.00018808373319217114, |
|
"loss": 3.6393, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07286859363614283, |
|
"eval_loss": 1.8135055303573608, |
|
"eval_runtime": 51.7534, |
|
"eval_samples_per_second": 16.753, |
|
"eval_steps_per_second": 4.193, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07772649987855235, |
|
"grad_norm": 15.678354263305664, |
|
"learning_rate": 0.00018310968873606635, |
|
"loss": 3.1187, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08258440612096186, |
|
"grad_norm": 27.940187454223633, |
|
"learning_rate": 0.0001777624095772184, |
|
"loss": 2.4375, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08744231236337138, |
|
"grad_norm": 22.329130172729492, |
|
"learning_rate": 0.0001720679471221826, |
|
"loss": 3.5057, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09230021860578091, |
|
"grad_norm": 21.67806053161621, |
|
"learning_rate": 0.00016605404421963453, |
|
"loss": 3.6538, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09715812484819043, |
|
"grad_norm": 19.599658966064453, |
|
"learning_rate": 0.00015975, |
|
"loss": 3.1059, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09715812484819043, |
|
"eval_loss": 1.7558817863464355, |
|
"eval_runtime": 51.7429, |
|
"eval_samples_per_second": 16.756, |
|
"eval_steps_per_second": 4.194, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.57749502246912e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|