|
{ |
|
"best_metric": 2.246959686279297, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.0013698817792024548, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.7397635584049098e-05, |
|
"grad_norm": 17.083011627197266, |
|
"learning_rate": 1.0110000000000001e-05, |
|
"loss": 3.6833, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.7397635584049098e-05, |
|
"eval_loss": 5.066332817077637, |
|
"eval_runtime": 907.7509, |
|
"eval_samples_per_second": 16.931, |
|
"eval_steps_per_second": 4.234, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 5.4795271168098196e-05, |
|
"grad_norm": 30.948528289794922, |
|
"learning_rate": 2.0220000000000003e-05, |
|
"loss": 5.5955, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 8.219290675214729e-05, |
|
"grad_norm": 24.4798641204834, |
|
"learning_rate": 3.033e-05, |
|
"loss": 5.2317, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00010959054233619639, |
|
"grad_norm": 26.41942596435547, |
|
"learning_rate": 4.0440000000000006e-05, |
|
"loss": 6.3092, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00013698817792024548, |
|
"grad_norm": 31.87883949279785, |
|
"learning_rate": 5.055e-05, |
|
"loss": 5.9147, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00016438581350429457, |
|
"grad_norm": 33.11574172973633, |
|
"learning_rate": 6.066e-05, |
|
"loss": 6.6052, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00019178344908834366, |
|
"grad_norm": 21.89510726928711, |
|
"learning_rate": 7.077e-05, |
|
"loss": 4.7486, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00021918108467239278, |
|
"grad_norm": 20.02174949645996, |
|
"learning_rate": 8.088000000000001e-05, |
|
"loss": 3.4227, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00024657872025644185, |
|
"grad_norm": 19.635520935058594, |
|
"learning_rate": 9.099000000000001e-05, |
|
"loss": 3.8081, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00027397635584049096, |
|
"grad_norm": 23.233644485473633, |
|
"learning_rate": 0.0001011, |
|
"loss": 3.0193, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0003013739914245401, |
|
"grad_norm": 22.40667724609375, |
|
"learning_rate": 0.00010056789473684211, |
|
"loss": 2.9783, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00032877162700858915, |
|
"grad_norm": 15.5116548538208, |
|
"learning_rate": 0.00010003578947368421, |
|
"loss": 2.7802, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00035616926259263826, |
|
"grad_norm": 12.772533416748047, |
|
"learning_rate": 9.950368421052632e-05, |
|
"loss": 2.3746, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00038356689817668733, |
|
"grad_norm": 12.892407417297363, |
|
"learning_rate": 9.897157894736842e-05, |
|
"loss": 1.5691, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00041096453376073645, |
|
"grad_norm": 12.087071418762207, |
|
"learning_rate": 9.843947368421053e-05, |
|
"loss": 1.8777, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00043836216934478556, |
|
"grad_norm": 19.26116943359375, |
|
"learning_rate": 9.790736842105264e-05, |
|
"loss": 2.8353, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00046575980492883463, |
|
"grad_norm": 11.111328125, |
|
"learning_rate": 9.737526315789474e-05, |
|
"loss": 1.8071, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0004931574405128837, |
|
"grad_norm": 12.628350257873535, |
|
"learning_rate": 9.684315789473684e-05, |
|
"loss": 2.1004, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0005205550760969328, |
|
"grad_norm": 14.211240768432617, |
|
"learning_rate": 9.631105263157895e-05, |
|
"loss": 2.2592, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0005479527116809819, |
|
"grad_norm": 15.376936912536621, |
|
"learning_rate": 9.577894736842105e-05, |
|
"loss": 1.8852, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.000575350347265031, |
|
"grad_norm": 12.521246910095215, |
|
"learning_rate": 9.524684210526317e-05, |
|
"loss": 2.0885, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0006027479828490802, |
|
"grad_norm": 13.282447814941406, |
|
"learning_rate": 9.471473684210526e-05, |
|
"loss": 2.1103, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0006301456184331292, |
|
"grad_norm": 11.5529203414917, |
|
"learning_rate": 9.418263157894737e-05, |
|
"loss": 1.9263, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0006575432540171783, |
|
"grad_norm": 11.215520858764648, |
|
"learning_rate": 9.365052631578948e-05, |
|
"loss": 1.9848, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0006849408896012274, |
|
"grad_norm": 14.77686882019043, |
|
"learning_rate": 9.311842105263157e-05, |
|
"loss": 2.4333, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0007123385251852765, |
|
"grad_norm": 12.779730796813965, |
|
"learning_rate": 9.258631578947368e-05, |
|
"loss": 2.5709, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0007397361607693256, |
|
"grad_norm": 11.351499557495117, |
|
"learning_rate": 9.20542105263158e-05, |
|
"loss": 2.2625, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0007671337963533747, |
|
"grad_norm": 13.397592544555664, |
|
"learning_rate": 9.15221052631579e-05, |
|
"loss": 2.7227, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0007945314319374238, |
|
"grad_norm": 10.616507530212402, |
|
"learning_rate": 9.099000000000001e-05, |
|
"loss": 1.8181, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0008219290675214729, |
|
"grad_norm": 11.262557983398438, |
|
"learning_rate": 9.045789473684212e-05, |
|
"loss": 1.7244, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.000849326703105522, |
|
"grad_norm": 11.611719131469727, |
|
"learning_rate": 8.992578947368421e-05, |
|
"loss": 1.7876, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0008767243386895711, |
|
"grad_norm": 12.462687492370605, |
|
"learning_rate": 8.939368421052632e-05, |
|
"loss": 2.3846, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0009041219742736201, |
|
"grad_norm": 11.689743995666504, |
|
"learning_rate": 8.886157894736841e-05, |
|
"loss": 1.893, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0009315196098576693, |
|
"grad_norm": 15.821490287780762, |
|
"learning_rate": 8.832947368421054e-05, |
|
"loss": 2.277, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0009589172454417184, |
|
"grad_norm": 11.826910018920898, |
|
"learning_rate": 8.779736842105264e-05, |
|
"loss": 1.8275, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0009863148810257674, |
|
"grad_norm": 10.841914176940918, |
|
"learning_rate": 8.726526315789474e-05, |
|
"loss": 1.553, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0010137125166098165, |
|
"grad_norm": 11.523122787475586, |
|
"learning_rate": 8.673315789473685e-05, |
|
"loss": 1.9103, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0010411101521938656, |
|
"grad_norm": 11.838396072387695, |
|
"learning_rate": 8.620105263157896e-05, |
|
"loss": 1.5697, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0010685077877779147, |
|
"grad_norm": 11.571324348449707, |
|
"learning_rate": 8.566894736842105e-05, |
|
"loss": 2.0003, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0010959054233619639, |
|
"grad_norm": 10.516695022583008, |
|
"learning_rate": 8.513684210526316e-05, |
|
"loss": 1.5583, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.001123303058946013, |
|
"grad_norm": 9.6254301071167, |
|
"learning_rate": 8.460473684210527e-05, |
|
"loss": 1.3097, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.001150700694530062, |
|
"grad_norm": 9.205260276794434, |
|
"learning_rate": 8.407263157894738e-05, |
|
"loss": 1.32, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0011780983301141112, |
|
"grad_norm": 12.572107315063477, |
|
"learning_rate": 8.354052631578948e-05, |
|
"loss": 2.1812, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0012054959656981603, |
|
"grad_norm": 12.90071964263916, |
|
"learning_rate": 8.300842105263158e-05, |
|
"loss": 1.3577, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0012328936012822094, |
|
"grad_norm": 13.483455657958984, |
|
"learning_rate": 8.247631578947369e-05, |
|
"loss": 2.3667, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0012602912368662584, |
|
"grad_norm": 15.868042945861816, |
|
"learning_rate": 8.19442105263158e-05, |
|
"loss": 2.6061, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0012876888724503075, |
|
"grad_norm": 13.639692306518555, |
|
"learning_rate": 8.141210526315789e-05, |
|
"loss": 2.9595, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0013150865080343566, |
|
"grad_norm": 11.425675392150879, |
|
"learning_rate": 8.088000000000001e-05, |
|
"loss": 1.7875, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0013424841436184057, |
|
"grad_norm": 11.319607734680176, |
|
"learning_rate": 8.03478947368421e-05, |
|
"loss": 2.12, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0013698817792024548, |
|
"grad_norm": 13.456693649291992, |
|
"learning_rate": 7.981578947368421e-05, |
|
"loss": 1.9008, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0013698817792024548, |
|
"eval_loss": 2.246959686279297, |
|
"eval_runtime": 901.5117, |
|
"eval_samples_per_second": 17.048, |
|
"eval_steps_per_second": 4.263, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5721720518344704.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|