|
{ |
|
"best_metric": 10.373170852661133, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.08421052631578947, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016842105263157896, |
|
"grad_norm": 0.0806819349527359, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3805, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016842105263157896, |
|
"eval_loss": 10.37641429901123, |
|
"eval_runtime": 2.5054, |
|
"eval_samples_per_second": 99.784, |
|
"eval_steps_per_second": 49.892, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003368421052631579, |
|
"grad_norm": 0.024693956598639488, |
|
"learning_rate": 0.0002, |
|
"loss": 10.3779, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0050526315789473685, |
|
"grad_norm": 0.03821239992976189, |
|
"learning_rate": 0.00019978589232386035, |
|
"loss": 10.3785, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006736842105263158, |
|
"grad_norm": 0.027714772149920464, |
|
"learning_rate": 0.00019914448613738106, |
|
"loss": 10.3807, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.008421052631578947, |
|
"grad_norm": 0.028413690626621246, |
|
"learning_rate": 0.00019807852804032305, |
|
"loss": 10.3768, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010105263157894737, |
|
"grad_norm": 0.03714875131845474, |
|
"learning_rate": 0.00019659258262890683, |
|
"loss": 10.3808, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011789473684210527, |
|
"grad_norm": 0.029419012367725372, |
|
"learning_rate": 0.0001946930129495106, |
|
"loss": 10.3766, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.013473684210526317, |
|
"grad_norm": 0.03351010009646416, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 10.3803, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.015157894736842105, |
|
"grad_norm": 0.02861347794532776, |
|
"learning_rate": 0.00018968727415326884, |
|
"loss": 10.3819, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.016842105263157894, |
|
"grad_norm": 0.033412087708711624, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 10.3826, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018526315789473686, |
|
"grad_norm": 0.036408938467502594, |
|
"learning_rate": 0.00018314696123025454, |
|
"loss": 10.3754, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.020210526315789474, |
|
"grad_norm": 0.037720438092947006, |
|
"learning_rate": 0.00017933533402912354, |
|
"loss": 10.3789, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.021894736842105262, |
|
"grad_norm": 0.04012691602110863, |
|
"learning_rate": 0.00017518398074789775, |
|
"loss": 10.373, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.023578947368421053, |
|
"grad_norm": 0.04150703176856041, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 10.3795, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02526315789473684, |
|
"grad_norm": 0.04563893377780914, |
|
"learning_rate": 0.00016593458151000688, |
|
"loss": 10.3767, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.026947368421052633, |
|
"grad_norm": 0.03193050995469093, |
|
"learning_rate": 0.00016087614290087208, |
|
"loss": 10.3773, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02863157894736842, |
|
"grad_norm": 0.03927315026521683, |
|
"learning_rate": 0.00015555702330196023, |
|
"loss": 10.3744, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03031578947368421, |
|
"grad_norm": 0.04184223338961601, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 10.3749, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 0.049206532537937164, |
|
"learning_rate": 0.00014422886902190014, |
|
"loss": 10.3728, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03368421052631579, |
|
"grad_norm": 0.04716723784804344, |
|
"learning_rate": 0.000138268343236509, |
|
"loss": 10.3735, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03536842105263158, |
|
"grad_norm": 0.04559972882270813, |
|
"learning_rate": 0.00013214394653031616, |
|
"loss": 10.3782, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03705263157894737, |
|
"grad_norm": 0.05300372093915939, |
|
"learning_rate": 0.00012588190451025207, |
|
"loss": 10.372, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03873684210526316, |
|
"grad_norm": 0.046657513827085495, |
|
"learning_rate": 0.00011950903220161285, |
|
"loss": 10.3777, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04042105263157895, |
|
"grad_norm": 0.049605146050453186, |
|
"learning_rate": 0.00011305261922200519, |
|
"loss": 10.3798, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.042105263157894736, |
|
"grad_norm": 0.042676687240600586, |
|
"learning_rate": 0.00010654031292301432, |
|
"loss": 10.3753, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.042105263157894736, |
|
"eval_loss": 10.374467849731445, |
|
"eval_runtime": 2.5035, |
|
"eval_samples_per_second": 99.861, |
|
"eval_steps_per_second": 49.93, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.043789473684210524, |
|
"grad_norm": 0.06261739879846573, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3714, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04547368421052632, |
|
"grad_norm": 0.05161561071872711, |
|
"learning_rate": 9.345968707698569e-05, |
|
"loss": 10.3704, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04715789473684211, |
|
"grad_norm": 0.0623190775513649, |
|
"learning_rate": 8.694738077799488e-05, |
|
"loss": 10.3713, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.048842105263157895, |
|
"grad_norm": 0.04920860007405281, |
|
"learning_rate": 8.049096779838719e-05, |
|
"loss": 10.3743, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05052631578947368, |
|
"grad_norm": 0.0820833221077919, |
|
"learning_rate": 7.411809548974792e-05, |
|
"loss": 10.3766, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05221052631578947, |
|
"grad_norm": 0.0759882777929306, |
|
"learning_rate": 6.785605346968386e-05, |
|
"loss": 10.3706, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.053894736842105266, |
|
"grad_norm": 0.09138514846563339, |
|
"learning_rate": 6.173165676349103e-05, |
|
"loss": 10.3766, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.055578947368421054, |
|
"grad_norm": 0.06546122580766678, |
|
"learning_rate": 5.577113097809989e-05, |
|
"loss": 10.3789, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05726315789473684, |
|
"grad_norm": 0.07500308752059937, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 10.3635, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05894736842105263, |
|
"grad_norm": 0.07415971904993057, |
|
"learning_rate": 4.444297669803981e-05, |
|
"loss": 10.3744, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06063157894736842, |
|
"grad_norm": 0.0628037378191948, |
|
"learning_rate": 3.9123857099127936e-05, |
|
"loss": 10.3741, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06231578947368421, |
|
"grad_norm": 0.07070739567279816, |
|
"learning_rate": 3.406541848999312e-05, |
|
"loss": 10.3683, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 0.07649371773004532, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 10.3733, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06568421052631579, |
|
"grad_norm": 0.09366203844547272, |
|
"learning_rate": 2.4816019252102273e-05, |
|
"loss": 10.3807, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06736842105263158, |
|
"grad_norm": 0.08414056897163391, |
|
"learning_rate": 2.0664665970876496e-05, |
|
"loss": 10.3586, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06905263157894737, |
|
"grad_norm": 0.09400244802236557, |
|
"learning_rate": 1.6853038769745467e-05, |
|
"loss": 10.3526, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07073684210526315, |
|
"grad_norm": 0.07930176705121994, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 10.3771, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07242105263157894, |
|
"grad_norm": 0.08477415144443512, |
|
"learning_rate": 1.0312725846731175e-05, |
|
"loss": 10.3674, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07410526315789474, |
|
"grad_norm": 0.09532937407493591, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 10.364, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07578947368421053, |
|
"grad_norm": 0.12698949873447418, |
|
"learning_rate": 5.306987050489442e-06, |
|
"loss": 10.3493, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07747368421052632, |
|
"grad_norm": 0.09168936312198639, |
|
"learning_rate": 3.40741737109318e-06, |
|
"loss": 10.3725, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07915789473684211, |
|
"grad_norm": 0.10333892703056335, |
|
"learning_rate": 1.921471959676957e-06, |
|
"loss": 10.3572, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0808421052631579, |
|
"grad_norm": 0.15725451707839966, |
|
"learning_rate": 8.555138626189618e-07, |
|
"loss": 10.3644, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08252631578947368, |
|
"grad_norm": 0.158616304397583, |
|
"learning_rate": 2.141076761396521e-07, |
|
"loss": 10.3458, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08421052631578947, |
|
"grad_norm": 0.16719745099544525, |
|
"learning_rate": 0.0, |
|
"loss": 10.3634, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08421052631578947, |
|
"eval_loss": 10.373170852661133, |
|
"eval_runtime": 2.7738, |
|
"eval_samples_per_second": 90.128, |
|
"eval_steps_per_second": 45.064, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5230244659200.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|