|
{ |
|
"best_metric": 0.7650135159492493, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 3.058275058275058, |
|
"eval_steps": 25, |
|
"global_step": 41, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07459207459207459, |
|
"grad_norm": 49.79338073730469, |
|
"learning_rate": 5e-05, |
|
"loss": 31.079, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07459207459207459, |
|
"eval_loss": 1.1030958890914917, |
|
"eval_runtime": 4.3497, |
|
"eval_samples_per_second": 11.495, |
|
"eval_steps_per_second": 2.989, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.14918414918414918, |
|
"grad_norm": 61.619346618652344, |
|
"learning_rate": 0.0001, |
|
"loss": 35.8237, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.22377622377622378, |
|
"grad_norm": 44.663516998291016, |
|
"learning_rate": 9.985407886603945e-05, |
|
"loss": 35.1631, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.29836829836829837, |
|
"grad_norm": 28.899295806884766, |
|
"learning_rate": 9.941726181870608e-05, |
|
"loss": 29.8061, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.372960372960373, |
|
"grad_norm": 25.077674865722656, |
|
"learning_rate": 9.869238178417235e-05, |
|
"loss": 28.4287, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.44755244755244755, |
|
"grad_norm": 21.780200958251953, |
|
"learning_rate": 9.768413988762156e-05, |
|
"loss": 29.4979, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.5221445221445221, |
|
"grad_norm": 16.97848129272461, |
|
"learning_rate": 9.639907496464709e-05, |
|
"loss": 27.768, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.5967365967365967, |
|
"grad_norm": 14.971733093261719, |
|
"learning_rate": 9.484552115439445e-05, |
|
"loss": 27.3351, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.6713286713286714, |
|
"grad_norm": 14.10373592376709, |
|
"learning_rate": 9.303355384947076e-05, |
|
"loss": 27.1077, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.745920745920746, |
|
"grad_norm": 15.110809326171875, |
|
"learning_rate": 9.097492435315756e-05, |
|
"loss": 25.9653, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 12.38207721710205, |
|
"learning_rate": 8.868298366769954e-05, |
|
"loss": 26.2124, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.8951048951048951, |
|
"grad_norm": 12.963375091552734, |
|
"learning_rate": 8.617259590793198e-05, |
|
"loss": 26.3925, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.9696969696969697, |
|
"grad_norm": 17.126346588134766, |
|
"learning_rate": 8.346004190179198e-05, |
|
"loss": 27.4152, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.0442890442890442, |
|
"grad_norm": 11.388605117797852, |
|
"learning_rate": 8.056291360290201e-05, |
|
"loss": 24.0213, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.118881118881119, |
|
"grad_norm": 13.905165672302246, |
|
"learning_rate": 7.75e-05, |
|
"loss": 24.7093, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.1934731934731935, |
|
"grad_norm": 14.038837432861328, |
|
"learning_rate": 7.429116526313744e-05, |
|
"loss": 23.8335, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.2680652680652682, |
|
"grad_norm": 14.542243957519531, |
|
"learning_rate": 7.095721991691411e-05, |
|
"loss": 23.6019, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.3426573426573427, |
|
"grad_norm": 11.928253173828125, |
|
"learning_rate": 6.751978587624037e-05, |
|
"loss": 23.773, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.4172494172494172, |
|
"grad_norm": 13.130093574523926, |
|
"learning_rate": 6.400115621992201e-05, |
|
"loss": 23.8141, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.491841491841492, |
|
"grad_norm": 15.3530855178833, |
|
"learning_rate": 6.042415061148954e-05, |
|
"loss": 23.2327, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.5664335664335665, |
|
"grad_norm": 10.706517219543457, |
|
"learning_rate": 5.681196730492368e-05, |
|
"loss": 23.0154, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.641025641025641, |
|
"grad_norm": 12.62425708770752, |
|
"learning_rate": 5.318803269507634e-05, |
|
"loss": 22.7153, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.7156177156177157, |
|
"grad_norm": 15.199002265930176, |
|
"learning_rate": 4.9575849388510473e-05, |
|
"loss": 23.8232, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.7902097902097902, |
|
"grad_norm": 10.122224807739258, |
|
"learning_rate": 4.599884378007802e-05, |
|
"loss": 21.8521, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.8648018648018647, |
|
"grad_norm": 12.264386177062988, |
|
"learning_rate": 4.248021412375963e-05, |
|
"loss": 23.36, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.8648018648018647, |
|
"eval_loss": 0.7650135159492493, |
|
"eval_runtime": 4.3787, |
|
"eval_samples_per_second": 11.419, |
|
"eval_steps_per_second": 2.969, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.9393939393939394, |
|
"grad_norm": 13.958155632019043, |
|
"learning_rate": 3.904278008308589e-05, |
|
"loss": 23.0799, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 2.013986013986014, |
|
"grad_norm": 13.080740928649902, |
|
"learning_rate": 3.570883473686256e-05, |
|
"loss": 22.7616, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.0885780885780885, |
|
"grad_norm": 9.918713569641113, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 22.2095, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.163170163170163, |
|
"grad_norm": 12.069844245910645, |
|
"learning_rate": 2.9437086397097995e-05, |
|
"loss": 21.0234, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.237762237762238, |
|
"grad_norm": 14.520157814025879, |
|
"learning_rate": 2.6539958098208027e-05, |
|
"loss": 21.4936, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.312354312354312, |
|
"grad_norm": 9.91746711730957, |
|
"learning_rate": 2.3827404092068032e-05, |
|
"loss": 20.1627, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.386946386946387, |
|
"grad_norm": 11.826277732849121, |
|
"learning_rate": 2.1317016332300447e-05, |
|
"loss": 21.355, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.4615384615384617, |
|
"grad_norm": 13.523852348327637, |
|
"learning_rate": 1.902507564684246e-05, |
|
"loss": 20.8342, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.5361305361305364, |
|
"grad_norm": 11.003284454345703, |
|
"learning_rate": 1.6966446150529244e-05, |
|
"loss": 20.2875, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.6107226107226107, |
|
"grad_norm": 11.648452758789062, |
|
"learning_rate": 1.515447884560556e-05, |
|
"loss": 21.3661, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.6853146853146854, |
|
"grad_norm": 13.226274490356445, |
|
"learning_rate": 1.3600925035352913e-05, |
|
"loss": 20.4063, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.7599067599067597, |
|
"grad_norm": 12.421295166015625, |
|
"learning_rate": 1.2315860112378455e-05, |
|
"loss": 20.5653, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.8344988344988344, |
|
"grad_norm": 11.139906883239746, |
|
"learning_rate": 1.130761821582766e-05, |
|
"loss": 21.522, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 12.317776679992676, |
|
"learning_rate": 1.0582738181293923e-05, |
|
"loss": 21.0642, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.983682983682984, |
|
"grad_norm": 13.74180793762207, |
|
"learning_rate": 1.0145921133960554e-05, |
|
"loss": 20.5685, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.058275058275058, |
|
"grad_norm": 10.381887435913086, |
|
"learning_rate": 1e-05, |
|
"loss": 21.0001, |
|
"step": 41 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 41, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.63958278020268e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|