|
{ |
|
"best_metric": 1.9446079730987549, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.005479527116809819, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.7397635584049098e-05, |
|
"grad_norm": 17.083011627197266, |
|
"learning_rate": 1.0110000000000001e-05, |
|
"loss": 3.6833, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.7397635584049098e-05, |
|
"eval_loss": 5.066332817077637, |
|
"eval_runtime": 907.7509, |
|
"eval_samples_per_second": 16.931, |
|
"eval_steps_per_second": 4.234, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 5.4795271168098196e-05, |
|
"grad_norm": 30.948528289794922, |
|
"learning_rate": 2.0220000000000003e-05, |
|
"loss": 5.5955, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 8.219290675214729e-05, |
|
"grad_norm": 24.4798641204834, |
|
"learning_rate": 3.033e-05, |
|
"loss": 5.2317, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00010959054233619639, |
|
"grad_norm": 26.41942596435547, |
|
"learning_rate": 4.0440000000000006e-05, |
|
"loss": 6.3092, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00013698817792024548, |
|
"grad_norm": 31.87883949279785, |
|
"learning_rate": 5.055e-05, |
|
"loss": 5.9147, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00016438581350429457, |
|
"grad_norm": 33.11574172973633, |
|
"learning_rate": 6.066e-05, |
|
"loss": 6.6052, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00019178344908834366, |
|
"grad_norm": 21.89510726928711, |
|
"learning_rate": 7.077e-05, |
|
"loss": 4.7486, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00021918108467239278, |
|
"grad_norm": 20.02174949645996, |
|
"learning_rate": 8.088000000000001e-05, |
|
"loss": 3.4227, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00024657872025644185, |
|
"grad_norm": 19.635520935058594, |
|
"learning_rate": 9.099000000000001e-05, |
|
"loss": 3.8081, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00027397635584049096, |
|
"grad_norm": 23.233644485473633, |
|
"learning_rate": 0.0001011, |
|
"loss": 3.0193, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0003013739914245401, |
|
"grad_norm": 22.40667724609375, |
|
"learning_rate": 0.00010056789473684211, |
|
"loss": 2.9783, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00032877162700858915, |
|
"grad_norm": 15.5116548538208, |
|
"learning_rate": 0.00010003578947368421, |
|
"loss": 2.7802, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00035616926259263826, |
|
"grad_norm": 12.772533416748047, |
|
"learning_rate": 9.950368421052632e-05, |
|
"loss": 2.3746, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00038356689817668733, |
|
"grad_norm": 12.892407417297363, |
|
"learning_rate": 9.897157894736842e-05, |
|
"loss": 1.5691, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00041096453376073645, |
|
"grad_norm": 12.087071418762207, |
|
"learning_rate": 9.843947368421053e-05, |
|
"loss": 1.8777, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00043836216934478556, |
|
"grad_norm": 19.26116943359375, |
|
"learning_rate": 9.790736842105264e-05, |
|
"loss": 2.8353, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00046575980492883463, |
|
"grad_norm": 11.111328125, |
|
"learning_rate": 9.737526315789474e-05, |
|
"loss": 1.8071, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0004931574405128837, |
|
"grad_norm": 12.628350257873535, |
|
"learning_rate": 9.684315789473684e-05, |
|
"loss": 2.1004, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0005205550760969328, |
|
"grad_norm": 14.211240768432617, |
|
"learning_rate": 9.631105263157895e-05, |
|
"loss": 2.2592, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0005479527116809819, |
|
"grad_norm": 15.376936912536621, |
|
"learning_rate": 9.577894736842105e-05, |
|
"loss": 1.8852, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.000575350347265031, |
|
"grad_norm": 12.521246910095215, |
|
"learning_rate": 9.524684210526317e-05, |
|
"loss": 2.0885, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0006027479828490802, |
|
"grad_norm": 13.282447814941406, |
|
"learning_rate": 9.471473684210526e-05, |
|
"loss": 2.1103, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0006301456184331292, |
|
"grad_norm": 11.5529203414917, |
|
"learning_rate": 9.418263157894737e-05, |
|
"loss": 1.9263, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0006575432540171783, |
|
"grad_norm": 11.215520858764648, |
|
"learning_rate": 9.365052631578948e-05, |
|
"loss": 1.9848, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0006849408896012274, |
|
"grad_norm": 14.77686882019043, |
|
"learning_rate": 9.311842105263157e-05, |
|
"loss": 2.4333, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0007123385251852765, |
|
"grad_norm": 12.779730796813965, |
|
"learning_rate": 9.258631578947368e-05, |
|
"loss": 2.5709, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0007397361607693256, |
|
"grad_norm": 11.351499557495117, |
|
"learning_rate": 9.20542105263158e-05, |
|
"loss": 2.2625, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0007671337963533747, |
|
"grad_norm": 13.397592544555664, |
|
"learning_rate": 9.15221052631579e-05, |
|
"loss": 2.7227, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0007945314319374238, |
|
"grad_norm": 10.616507530212402, |
|
"learning_rate": 9.099000000000001e-05, |
|
"loss": 1.8181, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0008219290675214729, |
|
"grad_norm": 11.262557983398438, |
|
"learning_rate": 9.045789473684212e-05, |
|
"loss": 1.7244, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.000849326703105522, |
|
"grad_norm": 11.611719131469727, |
|
"learning_rate": 8.992578947368421e-05, |
|
"loss": 1.7876, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0008767243386895711, |
|
"grad_norm": 12.462687492370605, |
|
"learning_rate": 8.939368421052632e-05, |
|
"loss": 2.3846, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0009041219742736201, |
|
"grad_norm": 11.689743995666504, |
|
"learning_rate": 8.886157894736841e-05, |
|
"loss": 1.893, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0009315196098576693, |
|
"grad_norm": 15.821490287780762, |
|
"learning_rate": 8.832947368421054e-05, |
|
"loss": 2.277, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0009589172454417184, |
|
"grad_norm": 11.826910018920898, |
|
"learning_rate": 8.779736842105264e-05, |
|
"loss": 1.8275, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0009863148810257674, |
|
"grad_norm": 10.841914176940918, |
|
"learning_rate": 8.726526315789474e-05, |
|
"loss": 1.553, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0010137125166098165, |
|
"grad_norm": 11.523122787475586, |
|
"learning_rate": 8.673315789473685e-05, |
|
"loss": 1.9103, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0010411101521938656, |
|
"grad_norm": 11.838396072387695, |
|
"learning_rate": 8.620105263157896e-05, |
|
"loss": 1.5697, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0010685077877779147, |
|
"grad_norm": 11.571324348449707, |
|
"learning_rate": 8.566894736842105e-05, |
|
"loss": 2.0003, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0010959054233619639, |
|
"grad_norm": 10.516695022583008, |
|
"learning_rate": 8.513684210526316e-05, |
|
"loss": 1.5583, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.001123303058946013, |
|
"grad_norm": 9.6254301071167, |
|
"learning_rate": 8.460473684210527e-05, |
|
"loss": 1.3097, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.001150700694530062, |
|
"grad_norm": 9.205260276794434, |
|
"learning_rate": 8.407263157894738e-05, |
|
"loss": 1.32, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0011780983301141112, |
|
"grad_norm": 12.572107315063477, |
|
"learning_rate": 8.354052631578948e-05, |
|
"loss": 2.1812, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0012054959656981603, |
|
"grad_norm": 12.90071964263916, |
|
"learning_rate": 8.300842105263158e-05, |
|
"loss": 1.3577, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0012328936012822094, |
|
"grad_norm": 13.483455657958984, |
|
"learning_rate": 8.247631578947369e-05, |
|
"loss": 2.3667, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0012602912368662584, |
|
"grad_norm": 15.868042945861816, |
|
"learning_rate": 8.19442105263158e-05, |
|
"loss": 2.6061, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0012876888724503075, |
|
"grad_norm": 13.639692306518555, |
|
"learning_rate": 8.141210526315789e-05, |
|
"loss": 2.9595, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0013150865080343566, |
|
"grad_norm": 11.425675392150879, |
|
"learning_rate": 8.088000000000001e-05, |
|
"loss": 1.7875, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0013424841436184057, |
|
"grad_norm": 11.319607734680176, |
|
"learning_rate": 8.03478947368421e-05, |
|
"loss": 2.12, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0013698817792024548, |
|
"grad_norm": 13.456693649291992, |
|
"learning_rate": 7.981578947368421e-05, |
|
"loss": 1.9008, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0013698817792024548, |
|
"eval_loss": 2.246959686279297, |
|
"eval_runtime": 901.5117, |
|
"eval_samples_per_second": 17.048, |
|
"eval_steps_per_second": 4.263, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.001397279414786504, |
|
"grad_norm": 9.39171314239502, |
|
"learning_rate": 7.928368421052632e-05, |
|
"loss": 2.5679, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.001424677050370553, |
|
"grad_norm": 10.350808143615723, |
|
"learning_rate": 7.875157894736842e-05, |
|
"loss": 2.6965, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0014520746859546022, |
|
"grad_norm": 11.915090560913086, |
|
"learning_rate": 7.821947368421053e-05, |
|
"loss": 2.7901, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0014794723215386513, |
|
"grad_norm": 14.726212501525879, |
|
"learning_rate": 7.768736842105263e-05, |
|
"loss": 3.894, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0015068699571227004, |
|
"grad_norm": 14.440069198608398, |
|
"learning_rate": 7.715526315789474e-05, |
|
"loss": 3.8703, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0015342675927067493, |
|
"grad_norm": 16.271181106567383, |
|
"learning_rate": 7.662315789473685e-05, |
|
"loss": 3.6617, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0015616652282907984, |
|
"grad_norm": 13.526236534118652, |
|
"learning_rate": 7.609105263157895e-05, |
|
"loss": 2.7699, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0015890628638748476, |
|
"grad_norm": 9.286958694458008, |
|
"learning_rate": 7.555894736842105e-05, |
|
"loss": 2.8028, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0016164604994588967, |
|
"grad_norm": 14.175232887268066, |
|
"learning_rate": 7.502684210526316e-05, |
|
"loss": 3.3656, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0016438581350429458, |
|
"grad_norm": 9.617558479309082, |
|
"learning_rate": 7.449473684210526e-05, |
|
"loss": 1.9919, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.001671255770626995, |
|
"grad_norm": 9.61012077331543, |
|
"learning_rate": 7.396263157894738e-05, |
|
"loss": 1.9658, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.001698653406211044, |
|
"grad_norm": 10.791799545288086, |
|
"learning_rate": 7.343052631578949e-05, |
|
"loss": 2.7421, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0017260510417950931, |
|
"grad_norm": 11.175983428955078, |
|
"learning_rate": 7.289842105263158e-05, |
|
"loss": 1.7803, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0017534486773791423, |
|
"grad_norm": 21.124692916870117, |
|
"learning_rate": 7.236631578947369e-05, |
|
"loss": 1.9238, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0017808463129631914, |
|
"grad_norm": 11.414015769958496, |
|
"learning_rate": 7.183421052631579e-05, |
|
"loss": 1.9238, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0018082439485472403, |
|
"grad_norm": 9.583854675292969, |
|
"learning_rate": 7.13021052631579e-05, |
|
"loss": 2.2229, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0018356415841312894, |
|
"grad_norm": 11.926726341247559, |
|
"learning_rate": 7.077e-05, |
|
"loss": 1.9964, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0018630392197153385, |
|
"grad_norm": 13.71705436706543, |
|
"learning_rate": 7.023789473684211e-05, |
|
"loss": 2.9112, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0018904368552993876, |
|
"grad_norm": 9.148221969604492, |
|
"learning_rate": 6.970578947368422e-05, |
|
"loss": 2.0433, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0019178344908834368, |
|
"grad_norm": 11.769927024841309, |
|
"learning_rate": 6.917368421052633e-05, |
|
"loss": 1.9223, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0019452321264674859, |
|
"grad_norm": 12.037910461425781, |
|
"learning_rate": 6.864157894736842e-05, |
|
"loss": 1.8342, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0019726297620515348, |
|
"grad_norm": 9.010817527770996, |
|
"learning_rate": 6.810947368421053e-05, |
|
"loss": 1.9317, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.002000027397635584, |
|
"grad_norm": 12.990135192871094, |
|
"learning_rate": 6.757736842105264e-05, |
|
"loss": 2.7586, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.002027425033219633, |
|
"grad_norm": 9.70896053314209, |
|
"learning_rate": 6.704526315789473e-05, |
|
"loss": 1.9642, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.002054822668803682, |
|
"grad_norm": 10.410635948181152, |
|
"learning_rate": 6.651315789473685e-05, |
|
"loss": 1.9407, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0020822203043877312, |
|
"grad_norm": 11.185718536376953, |
|
"learning_rate": 6.598105263157895e-05, |
|
"loss": 1.7685, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0021096179399717804, |
|
"grad_norm": 12.08330249786377, |
|
"learning_rate": 6.544894736842106e-05, |
|
"loss": 2.5273, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0021370155755558295, |
|
"grad_norm": 10.257572174072266, |
|
"learning_rate": 6.491684210526317e-05, |
|
"loss": 1.6406, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0021644132111398786, |
|
"grad_norm": 8.938873291015625, |
|
"learning_rate": 6.438473684210526e-05, |
|
"loss": 1.4209, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0021918108467239277, |
|
"grad_norm": 12.932609558105469, |
|
"learning_rate": 6.385263157894737e-05, |
|
"loss": 2.4091, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.002219208482307977, |
|
"grad_norm": 13.582636833190918, |
|
"learning_rate": 6.332052631578948e-05, |
|
"loss": 2.4494, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.002246606117892026, |
|
"grad_norm": 10.341235160827637, |
|
"learning_rate": 6.278842105263159e-05, |
|
"loss": 1.9182, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.002274003753476075, |
|
"grad_norm": 9.691158294677734, |
|
"learning_rate": 6.22563157894737e-05, |
|
"loss": 1.6645, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.002301401389060124, |
|
"grad_norm": 9.134404182434082, |
|
"learning_rate": 6.172421052631579e-05, |
|
"loss": 1.5419, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0023287990246441733, |
|
"grad_norm": 13.0061674118042, |
|
"learning_rate": 6.11921052631579e-05, |
|
"loss": 2.0695, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0023561966602282224, |
|
"grad_norm": 11.673482894897461, |
|
"learning_rate": 6.066e-05, |
|
"loss": 1.6456, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0023835942958122715, |
|
"grad_norm": 12.235639572143555, |
|
"learning_rate": 6.012789473684211e-05, |
|
"loss": 2.0552, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0024109919313963207, |
|
"grad_norm": 12.531293869018555, |
|
"learning_rate": 5.959578947368421e-05, |
|
"loss": 1.7928, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0024383895669803698, |
|
"grad_norm": 11.760167121887207, |
|
"learning_rate": 5.9063684210526324e-05, |
|
"loss": 1.9041, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.002465787202564419, |
|
"grad_norm": 11.60911750793457, |
|
"learning_rate": 5.8531578947368425e-05, |
|
"loss": 1.7488, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0024931848381484676, |
|
"grad_norm": 12.436223983764648, |
|
"learning_rate": 5.7999473684210527e-05, |
|
"loss": 1.8447, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0025205824737325167, |
|
"grad_norm": 10.578962326049805, |
|
"learning_rate": 5.7467368421052635e-05, |
|
"loss": 1.7382, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.002547980109316566, |
|
"grad_norm": 11.361544609069824, |
|
"learning_rate": 5.6935263157894736e-05, |
|
"loss": 1.8159, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.002575377744900615, |
|
"grad_norm": 12.437856674194336, |
|
"learning_rate": 5.640315789473684e-05, |
|
"loss": 1.9681, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.002602775380484664, |
|
"grad_norm": 8.324384689331055, |
|
"learning_rate": 5.587105263157895e-05, |
|
"loss": 1.3098, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.002630173016068713, |
|
"grad_norm": 10.963112831115723, |
|
"learning_rate": 5.533894736842106e-05, |
|
"loss": 1.3977, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0026575706516527623, |
|
"grad_norm": 11.730253219604492, |
|
"learning_rate": 5.480684210526316e-05, |
|
"loss": 2.0322, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0026849682872368114, |
|
"grad_norm": 10.242620468139648, |
|
"learning_rate": 5.4274736842105264e-05, |
|
"loss": 1.5608, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0027123659228208605, |
|
"grad_norm": 8.740520477294922, |
|
"learning_rate": 5.374263157894737e-05, |
|
"loss": 1.5199, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0027397635584049096, |
|
"grad_norm": 11.723816871643066, |
|
"learning_rate": 5.3210526315789474e-05, |
|
"loss": 2.1569, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0027397635584049096, |
|
"eval_loss": 2.1092565059661865, |
|
"eval_runtime": 908.8001, |
|
"eval_samples_per_second": 16.911, |
|
"eval_steps_per_second": 4.229, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0027671611939889588, |
|
"grad_norm": 8.458086967468262, |
|
"learning_rate": 5.2678421052631576e-05, |
|
"loss": 2.8784, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.002794558829573008, |
|
"grad_norm": 12.095956802368164, |
|
"learning_rate": 5.214631578947369e-05, |
|
"loss": 3.8121, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.002821956465157057, |
|
"grad_norm": 12.647064208984375, |
|
"learning_rate": 5.161421052631579e-05, |
|
"loss": 3.4101, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.002849354100741106, |
|
"grad_norm": 13.06585693359375, |
|
"learning_rate": 5.10821052631579e-05, |
|
"loss": 2.9773, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0028767517363251552, |
|
"grad_norm": 14.944098472595215, |
|
"learning_rate": 5.055e-05, |
|
"loss": 3.6446, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0029041493719092044, |
|
"grad_norm": 14.845952033996582, |
|
"learning_rate": 5.0017894736842104e-05, |
|
"loss": 4.0916, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0029315470074932535, |
|
"grad_norm": 15.211709976196289, |
|
"learning_rate": 4.948578947368421e-05, |
|
"loss": 4.2204, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0029589446430773026, |
|
"grad_norm": 8.81589412689209, |
|
"learning_rate": 4.895368421052632e-05, |
|
"loss": 1.4566, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0029863422786613517, |
|
"grad_norm": 12.095672607421875, |
|
"learning_rate": 4.842157894736842e-05, |
|
"loss": 2.6067, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.003013739914245401, |
|
"grad_norm": 11.675946235656738, |
|
"learning_rate": 4.7889473684210523e-05, |
|
"loss": 2.4116, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0030411375498294495, |
|
"grad_norm": 11.829989433288574, |
|
"learning_rate": 4.735736842105263e-05, |
|
"loss": 2.6115, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0030685351854134986, |
|
"grad_norm": 9.828085899353027, |
|
"learning_rate": 4.682526315789474e-05, |
|
"loss": 2.2881, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0030959328209975477, |
|
"grad_norm": 9.46169376373291, |
|
"learning_rate": 4.629315789473684e-05, |
|
"loss": 1.9979, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.003123330456581597, |
|
"grad_norm": 9.22454833984375, |
|
"learning_rate": 4.576105263157895e-05, |
|
"loss": 1.9874, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.003150728092165646, |
|
"grad_norm": 10.429281234741211, |
|
"learning_rate": 4.522894736842106e-05, |
|
"loss": 2.2162, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.003178125727749695, |
|
"grad_norm": 11.570110321044922, |
|
"learning_rate": 4.469684210526316e-05, |
|
"loss": 2.4278, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0032055233633337442, |
|
"grad_norm": 9.045998573303223, |
|
"learning_rate": 4.416473684210527e-05, |
|
"loss": 1.808, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0032329209989177933, |
|
"grad_norm": 10.189197540283203, |
|
"learning_rate": 4.363263157894737e-05, |
|
"loss": 1.539, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0032603186345018425, |
|
"grad_norm": 8.157354354858398, |
|
"learning_rate": 4.310052631578948e-05, |
|
"loss": 1.7214, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0032877162700858916, |
|
"grad_norm": 8.792228698730469, |
|
"learning_rate": 4.256842105263158e-05, |
|
"loss": 1.3858, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0033151139056699407, |
|
"grad_norm": 10.97755241394043, |
|
"learning_rate": 4.203631578947369e-05, |
|
"loss": 1.77, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.00334251154125399, |
|
"grad_norm": 10.724143981933594, |
|
"learning_rate": 4.150421052631579e-05, |
|
"loss": 1.8881, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.003369909176838039, |
|
"grad_norm": 12.109813690185547, |
|
"learning_rate": 4.09721052631579e-05, |
|
"loss": 2.1987, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.003397306812422088, |
|
"grad_norm": 11.524436950683594, |
|
"learning_rate": 4.0440000000000006e-05, |
|
"loss": 1.9404, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.003424704448006137, |
|
"grad_norm": 9.770040512084961, |
|
"learning_rate": 3.990789473684211e-05, |
|
"loss": 1.5231, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0034521020835901863, |
|
"grad_norm": 14.594894409179688, |
|
"learning_rate": 3.937578947368421e-05, |
|
"loss": 2.1621, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0034794997191742354, |
|
"grad_norm": 9.148670196533203, |
|
"learning_rate": 3.884368421052632e-05, |
|
"loss": 1.9675, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0035068973547582845, |
|
"grad_norm": 9.530057907104492, |
|
"learning_rate": 3.8311578947368426e-05, |
|
"loss": 1.8147, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0035342949903423336, |
|
"grad_norm": 10.43026065826416, |
|
"learning_rate": 3.777947368421053e-05, |
|
"loss": 2.0498, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0035616926259263828, |
|
"grad_norm": 15.587285041809082, |
|
"learning_rate": 3.724736842105263e-05, |
|
"loss": 2.4641, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0035890902615104314, |
|
"grad_norm": 12.40803337097168, |
|
"learning_rate": 3.6715263157894744e-05, |
|
"loss": 2.7419, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0036164878970944806, |
|
"grad_norm": 10.415118217468262, |
|
"learning_rate": 3.6183157894736845e-05, |
|
"loss": 2.1787, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0036438855326785297, |
|
"grad_norm": 9.937010765075684, |
|
"learning_rate": 3.565105263157895e-05, |
|
"loss": 1.8409, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.003671283168262579, |
|
"grad_norm": 10.181679725646973, |
|
"learning_rate": 3.5118947368421055e-05, |
|
"loss": 1.873, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.003698680803846628, |
|
"grad_norm": 8.590680122375488, |
|
"learning_rate": 3.458684210526316e-05, |
|
"loss": 1.5075, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.003726078439430677, |
|
"grad_norm": 10.049001693725586, |
|
"learning_rate": 3.4054736842105265e-05, |
|
"loss": 1.6106, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.003753476075014726, |
|
"grad_norm": 8.262350082397461, |
|
"learning_rate": 3.3522631578947366e-05, |
|
"loss": 1.2253, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0037808737105987753, |
|
"grad_norm": 10.25830078125, |
|
"learning_rate": 3.2990526315789475e-05, |
|
"loss": 1.6037, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0038082713461828244, |
|
"grad_norm": 10.215044975280762, |
|
"learning_rate": 3.245842105263158e-05, |
|
"loss": 1.8698, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.0038356689817668735, |
|
"grad_norm": 9.670408248901367, |
|
"learning_rate": 3.1926315789473685e-05, |
|
"loss": 1.6544, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0038630666173509226, |
|
"grad_norm": 13.060770988464355, |
|
"learning_rate": 3.139421052631579e-05, |
|
"loss": 2.1578, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.0038904642529349717, |
|
"grad_norm": 10.066153526306152, |
|
"learning_rate": 3.0862105263157894e-05, |
|
"loss": 1.7932, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.00391786188851902, |
|
"grad_norm": 9.591915130615234, |
|
"learning_rate": 3.033e-05, |
|
"loss": 1.8039, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0039452595241030695, |
|
"grad_norm": 9.10180950164795, |
|
"learning_rate": 2.9797894736842104e-05, |
|
"loss": 1.2231, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.003972657159687119, |
|
"grad_norm": 8.49804401397705, |
|
"learning_rate": 2.9265789473684213e-05, |
|
"loss": 1.2112, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.004000054795271168, |
|
"grad_norm": 10.35539436340332, |
|
"learning_rate": 2.8733684210526317e-05, |
|
"loss": 1.397, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.004027452430855217, |
|
"grad_norm": 12.644351959228516, |
|
"learning_rate": 2.820157894736842e-05, |
|
"loss": 1.6601, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.004054850066439266, |
|
"grad_norm": 10.167571067810059, |
|
"learning_rate": 2.766947368421053e-05, |
|
"loss": 1.6444, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.004082247702023315, |
|
"grad_norm": 11.871386528015137, |
|
"learning_rate": 2.7137368421052632e-05, |
|
"loss": 1.9473, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.004109645337607364, |
|
"grad_norm": 9.358609199523926, |
|
"learning_rate": 2.6605263157894737e-05, |
|
"loss": 1.6992, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004109645337607364, |
|
"eval_loss": 2.0133330821990967, |
|
"eval_runtime": 908.0735, |
|
"eval_samples_per_second": 16.925, |
|
"eval_steps_per_second": 4.232, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004137042973191413, |
|
"grad_norm": 10.242443084716797, |
|
"learning_rate": 2.6073157894736845e-05, |
|
"loss": 2.8813, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.0041644406087754625, |
|
"grad_norm": 11.027935981750488, |
|
"learning_rate": 2.554105263157895e-05, |
|
"loss": 3.2701, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.004191838244359512, |
|
"grad_norm": 9.913683891296387, |
|
"learning_rate": 2.5008947368421052e-05, |
|
"loss": 2.8378, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.004219235879943561, |
|
"grad_norm": 12.305196762084961, |
|
"learning_rate": 2.447684210526316e-05, |
|
"loss": 3.3966, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.00424663351552761, |
|
"grad_norm": 14.736776351928711, |
|
"learning_rate": 2.3944736842105262e-05, |
|
"loss": 2.936, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.004274031151111659, |
|
"grad_norm": 15.716378211975098, |
|
"learning_rate": 2.341263157894737e-05, |
|
"loss": 3.3793, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.004301428786695708, |
|
"grad_norm": 11.29692554473877, |
|
"learning_rate": 2.2880526315789475e-05, |
|
"loss": 2.5899, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.004328826422279757, |
|
"grad_norm": 8.92076587677002, |
|
"learning_rate": 2.234842105263158e-05, |
|
"loss": 1.5897, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.004356224057863806, |
|
"grad_norm": 10.286359786987305, |
|
"learning_rate": 2.1816315789473685e-05, |
|
"loss": 2.0679, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.004383621693447855, |
|
"grad_norm": 10.094871520996094, |
|
"learning_rate": 2.128421052631579e-05, |
|
"loss": 2.2023, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0044110193290319046, |
|
"grad_norm": 8.398151397705078, |
|
"learning_rate": 2.0752105263157895e-05, |
|
"loss": 1.7198, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.004438416964615954, |
|
"grad_norm": 9.89030647277832, |
|
"learning_rate": 2.0220000000000003e-05, |
|
"loss": 1.8957, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.004465814600200003, |
|
"grad_norm": 10.257231712341309, |
|
"learning_rate": 1.9687894736842104e-05, |
|
"loss": 1.8288, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.004493212235784052, |
|
"grad_norm": 10.972249984741211, |
|
"learning_rate": 1.9155789473684213e-05, |
|
"loss": 2.3046, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.004520609871368101, |
|
"grad_norm": 9.473671913146973, |
|
"learning_rate": 1.8623684210526314e-05, |
|
"loss": 1.5979, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.00454800750695215, |
|
"grad_norm": 8.295541763305664, |
|
"learning_rate": 1.8091578947368423e-05, |
|
"loss": 1.6066, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.004575405142536199, |
|
"grad_norm": 9.380114555358887, |
|
"learning_rate": 1.7559473684210528e-05, |
|
"loss": 1.382, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.004602802778120248, |
|
"grad_norm": 10.417853355407715, |
|
"learning_rate": 1.7027368421052632e-05, |
|
"loss": 2.454, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.0046302004137042975, |
|
"grad_norm": 10.452378273010254, |
|
"learning_rate": 1.6495263157894737e-05, |
|
"loss": 1.9223, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.004657598049288347, |
|
"grad_norm": 9.896018981933594, |
|
"learning_rate": 1.5963157894736842e-05, |
|
"loss": 1.8119, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.004684995684872396, |
|
"grad_norm": 9.311156272888184, |
|
"learning_rate": 1.5431052631578947e-05, |
|
"loss": 1.5882, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.004712393320456445, |
|
"grad_norm": 10.566994667053223, |
|
"learning_rate": 1.4898947368421052e-05, |
|
"loss": 1.7865, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.004739790956040494, |
|
"grad_norm": 13.512009620666504, |
|
"learning_rate": 1.4366842105263159e-05, |
|
"loss": 3.2459, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.004767188591624543, |
|
"grad_norm": 9.658906936645508, |
|
"learning_rate": 1.3834736842105265e-05, |
|
"loss": 1.6884, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.004794586227208592, |
|
"grad_norm": 9.381290435791016, |
|
"learning_rate": 1.3302631578947369e-05, |
|
"loss": 1.6764, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.004821983862792641, |
|
"grad_norm": 11.110421180725098, |
|
"learning_rate": 1.2770526315789475e-05, |
|
"loss": 2.4533, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.0048493814983766904, |
|
"grad_norm": 9.552355766296387, |
|
"learning_rate": 1.223842105263158e-05, |
|
"loss": 1.587, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.0048767791339607396, |
|
"grad_norm": 10.369064331054688, |
|
"learning_rate": 1.1706315789473685e-05, |
|
"loss": 1.5635, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.004904176769544789, |
|
"grad_norm": 10.4714994430542, |
|
"learning_rate": 1.117421052631579e-05, |
|
"loss": 1.3165, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.004931574405128838, |
|
"grad_norm": 10.99134635925293, |
|
"learning_rate": 1.0642105263157895e-05, |
|
"loss": 1.9887, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.004958972040712886, |
|
"grad_norm": 11.243077278137207, |
|
"learning_rate": 1.0110000000000001e-05, |
|
"loss": 2.0445, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.004986369676296935, |
|
"grad_norm": 9.404143333435059, |
|
"learning_rate": 9.577894736842106e-06, |
|
"loss": 1.9587, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.005013767311880984, |
|
"grad_norm": 8.410197257995605, |
|
"learning_rate": 9.045789473684211e-06, |
|
"loss": 1.8646, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.005041164947465033, |
|
"grad_norm": 7.286149501800537, |
|
"learning_rate": 8.513684210526316e-06, |
|
"loss": 1.2129, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.0050685625830490825, |
|
"grad_norm": 10.829649925231934, |
|
"learning_rate": 7.981578947368421e-06, |
|
"loss": 2.1754, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.005095960218633132, |
|
"grad_norm": 9.724632263183594, |
|
"learning_rate": 7.449473684210526e-06, |
|
"loss": 1.3908, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.005123357854217181, |
|
"grad_norm": 9.229701042175293, |
|
"learning_rate": 6.917368421052633e-06, |
|
"loss": 1.6759, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.00515075548980123, |
|
"grad_norm": 9.659087181091309, |
|
"learning_rate": 6.385263157894738e-06, |
|
"loss": 1.6113, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.005178153125385279, |
|
"grad_norm": 10.842117309570312, |
|
"learning_rate": 5.8531578947368425e-06, |
|
"loss": 2.3648, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.005205550760969328, |
|
"grad_norm": 10.12624740600586, |
|
"learning_rate": 5.321052631578947e-06, |
|
"loss": 1.6541, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.005232948396553377, |
|
"grad_norm": 9.375452041625977, |
|
"learning_rate": 4.788947368421053e-06, |
|
"loss": 1.9107, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.005260346032137426, |
|
"grad_norm": 10.81548023223877, |
|
"learning_rate": 4.256842105263158e-06, |
|
"loss": 1.9993, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.0052877436677214755, |
|
"grad_norm": 8.572677612304688, |
|
"learning_rate": 3.724736842105263e-06, |
|
"loss": 1.7091, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.005315141303305525, |
|
"grad_norm": 11.520868301391602, |
|
"learning_rate": 3.192631578947369e-06, |
|
"loss": 2.1969, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.005342538938889574, |
|
"grad_norm": 10.084508895874023, |
|
"learning_rate": 2.6605263157894737e-06, |
|
"loss": 1.4943, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.005369936574473623, |
|
"grad_norm": 10.452617645263672, |
|
"learning_rate": 2.128421052631579e-06, |
|
"loss": 1.5151, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.005397334210057672, |
|
"grad_norm": 8.461989402770996, |
|
"learning_rate": 1.5963157894736844e-06, |
|
"loss": 1.3936, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.005424731845641721, |
|
"grad_norm": 8.975988388061523, |
|
"learning_rate": 1.0642105263157895e-06, |
|
"loss": 1.3449, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.00545212948122577, |
|
"grad_norm": 9.221831321716309, |
|
"learning_rate": 5.321052631578948e-07, |
|
"loss": 1.0431, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.005479527116809819, |
|
"grad_norm": 9.018491744995117, |
|
"learning_rate": 0.0, |
|
"loss": 1.1321, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.005479527116809819, |
|
"eval_loss": 1.9446079730987549, |
|
"eval_runtime": 906.8244, |
|
"eval_samples_per_second": 16.948, |
|
"eval_steps_per_second": 4.238, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.303878615793664e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|