|
{ |
|
"best_metric": 0.6652013063430786, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.05042864346949067, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010085728693898135, |
|
"grad_norm": 1.3238667249679565, |
|
"learning_rate": 1.013e-05, |
|
"loss": 0.928, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010085728693898135, |
|
"eval_loss": 1.1040126085281372, |
|
"eval_runtime": 101.7212, |
|
"eval_samples_per_second": 4.109, |
|
"eval_steps_per_second": 1.032, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002017145738779627, |
|
"grad_norm": 1.4647407531738281, |
|
"learning_rate": 2.026e-05, |
|
"loss": 1.1291, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0030257186081694403, |
|
"grad_norm": 1.7250173091888428, |
|
"learning_rate": 3.039e-05, |
|
"loss": 1.0937, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004034291477559254, |
|
"grad_norm": 1.4756126403808594, |
|
"learning_rate": 4.052e-05, |
|
"loss": 1.0637, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005042864346949067, |
|
"grad_norm": 1.4971727132797241, |
|
"learning_rate": 5.065e-05, |
|
"loss": 1.1541, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006051437216338881, |
|
"grad_norm": 1.1345500946044922, |
|
"learning_rate": 6.078e-05, |
|
"loss": 0.8789, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0070600100857286935, |
|
"grad_norm": 1.1694258451461792, |
|
"learning_rate": 7.091e-05, |
|
"loss": 0.7942, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008068582955118508, |
|
"grad_norm": 0.9738374948501587, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.7897, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009077155824508321, |
|
"grad_norm": 1.2070674896240234, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.8907, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.010085728693898134, |
|
"grad_norm": 0.9936267733573914, |
|
"learning_rate": 0.0001013, |
|
"loss": 0.7356, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011094301563287948, |
|
"grad_norm": 0.9822749495506287, |
|
"learning_rate": 0.00010076684210526316, |
|
"loss": 0.787, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.012102874432677761, |
|
"grad_norm": 0.9206593632698059, |
|
"learning_rate": 0.0001002336842105263, |
|
"loss": 0.7268, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.013111447302067574, |
|
"grad_norm": 0.8007137179374695, |
|
"learning_rate": 9.970052631578946e-05, |
|
"loss": 0.6785, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.014120020171457387, |
|
"grad_norm": 0.7873572707176208, |
|
"learning_rate": 9.916736842105263e-05, |
|
"loss": 0.65, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.015128593040847202, |
|
"grad_norm": 0.8010468482971191, |
|
"learning_rate": 9.863421052631579e-05, |
|
"loss": 0.6508, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.016137165910237016, |
|
"grad_norm": 0.7741969227790833, |
|
"learning_rate": 9.810105263157895e-05, |
|
"loss": 0.5805, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01714573877962683, |
|
"grad_norm": 0.7493349313735962, |
|
"learning_rate": 9.756789473684211e-05, |
|
"loss": 0.564, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.018154311649016642, |
|
"grad_norm": 0.9663587212562561, |
|
"learning_rate": 9.703473684210525e-05, |
|
"loss": 0.808, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.019162884518406455, |
|
"grad_norm": 0.7793949246406555, |
|
"learning_rate": 9.650157894736842e-05, |
|
"loss": 0.5491, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.020171457387796268, |
|
"grad_norm": 0.7455626726150513, |
|
"learning_rate": 9.596842105263158e-05, |
|
"loss": 0.6404, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02118003025718608, |
|
"grad_norm": 0.7438361048698425, |
|
"learning_rate": 9.543526315789474e-05, |
|
"loss": 0.6354, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.022188603126575897, |
|
"grad_norm": 0.740470826625824, |
|
"learning_rate": 9.49021052631579e-05, |
|
"loss": 0.5225, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02319717599596571, |
|
"grad_norm": 0.8656465411186218, |
|
"learning_rate": 9.436894736842105e-05, |
|
"loss": 0.7408, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.024205748865355523, |
|
"grad_norm": 0.9166726469993591, |
|
"learning_rate": 9.38357894736842e-05, |
|
"loss": 0.6548, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.025214321734745335, |
|
"grad_norm": 0.8170962333679199, |
|
"learning_rate": 9.330263157894737e-05, |
|
"loss": 0.654, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.026222894604135148, |
|
"grad_norm": 0.7673491835594177, |
|
"learning_rate": 9.276947368421051e-05, |
|
"loss": 0.6428, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02723146747352496, |
|
"grad_norm": 0.8028613924980164, |
|
"learning_rate": 9.223631578947369e-05, |
|
"loss": 0.5204, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.028240040342914774, |
|
"grad_norm": 0.8541864156723022, |
|
"learning_rate": 9.170315789473684e-05, |
|
"loss": 0.6858, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02924861321230459, |
|
"grad_norm": 0.8092413544654846, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.6384, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.030257186081694403, |
|
"grad_norm": 0.9186341762542725, |
|
"learning_rate": 9.063684210526316e-05, |
|
"loss": 0.7882, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.031265758951084216, |
|
"grad_norm": 0.9697185158729553, |
|
"learning_rate": 9.010368421052632e-05, |
|
"loss": 0.799, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03227433182047403, |
|
"grad_norm": 0.7349818348884583, |
|
"learning_rate": 8.957052631578946e-05, |
|
"loss": 0.6457, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03328290468986384, |
|
"grad_norm": 0.843620240688324, |
|
"learning_rate": 8.903736842105263e-05, |
|
"loss": 0.7368, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03429147755925366, |
|
"grad_norm": 0.8225664496421814, |
|
"learning_rate": 8.850421052631579e-05, |
|
"loss": 0.7159, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03530005042864347, |
|
"grad_norm": 0.9680421352386475, |
|
"learning_rate": 8.797105263157895e-05, |
|
"loss": 0.8112, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.036308623298033284, |
|
"grad_norm": 0.9144193530082703, |
|
"learning_rate": 8.743789473684211e-05, |
|
"loss": 0.7829, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03731719616742309, |
|
"grad_norm": 0.8283833265304565, |
|
"learning_rate": 8.690473684210526e-05, |
|
"loss": 0.7398, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03832576903681291, |
|
"grad_norm": 0.7781999707221985, |
|
"learning_rate": 8.637157894736842e-05, |
|
"loss": 0.7255, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.039334341906202726, |
|
"grad_norm": 0.7448036074638367, |
|
"learning_rate": 8.583842105263158e-05, |
|
"loss": 0.6482, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.040342914775592535, |
|
"grad_norm": 0.7694168090820312, |
|
"learning_rate": 8.530526315789472e-05, |
|
"loss": 0.6282, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04135148764498235, |
|
"grad_norm": 0.9006367325782776, |
|
"learning_rate": 8.47721052631579e-05, |
|
"loss": 0.7293, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04236006051437216, |
|
"grad_norm": 0.9051785469055176, |
|
"learning_rate": 8.423894736842105e-05, |
|
"loss": 0.8256, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04336863338376198, |
|
"grad_norm": 0.9707128405570984, |
|
"learning_rate": 8.37057894736842e-05, |
|
"loss": 0.7974, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.044377206253151794, |
|
"grad_norm": 0.885073721408844, |
|
"learning_rate": 8.317263157894737e-05, |
|
"loss": 0.7655, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0453857791225416, |
|
"grad_norm": 0.9013693928718567, |
|
"learning_rate": 8.263947368421053e-05, |
|
"loss": 0.7205, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04639435199193142, |
|
"grad_norm": 1.1316449642181396, |
|
"learning_rate": 8.210631578947368e-05, |
|
"loss": 0.7543, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04740292486132123, |
|
"grad_norm": 0.8760470151901245, |
|
"learning_rate": 8.157315789473684e-05, |
|
"loss": 0.7771, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.048411497730711045, |
|
"grad_norm": 1.0497716665267944, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.8032, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.049420070600100854, |
|
"grad_norm": 0.9932529926300049, |
|
"learning_rate": 8.050684210526316e-05, |
|
"loss": 0.7941, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05042864346949067, |
|
"grad_norm": 1.1281291246414185, |
|
"learning_rate": 7.997368421052632e-05, |
|
"loss": 0.845, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05042864346949067, |
|
"eval_loss": 0.6652013063430786, |
|
"eval_runtime": 101.679, |
|
"eval_samples_per_second": 4.111, |
|
"eval_steps_per_second": 1.033, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.357300236897485e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|