|
{ |
|
"best_metric": 0.7234218120574951, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.009289363678588018, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.644681839294008e-05, |
|
"grad_norm": 3.092576503753662, |
|
"learning_rate": 1.018e-05, |
|
"loss": 0.997, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 4.644681839294008e-05, |
|
"eval_loss": 1.5409296751022339, |
|
"eval_runtime": 162.8209, |
|
"eval_samples_per_second": 55.681, |
|
"eval_steps_per_second": 13.923, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 9.289363678588016e-05, |
|
"grad_norm": 21.47955322265625, |
|
"learning_rate": 2.036e-05, |
|
"loss": 1.9273, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00013934045517882026, |
|
"grad_norm": 9.852048873901367, |
|
"learning_rate": 3.0539999999999996e-05, |
|
"loss": 1.6379, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00018578727357176033, |
|
"grad_norm": 17.509506225585938, |
|
"learning_rate": 4.072e-05, |
|
"loss": 1.4906, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00023223409196470042, |
|
"grad_norm": 14.546015739440918, |
|
"learning_rate": 5.09e-05, |
|
"loss": 1.4437, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0002786809103576405, |
|
"grad_norm": 5.5904154777526855, |
|
"learning_rate": 6.107999999999999e-05, |
|
"loss": 1.6272, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0003251277287505806, |
|
"grad_norm": 4.112157821655273, |
|
"learning_rate": 7.125999999999999e-05, |
|
"loss": 1.4437, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00037157454714352065, |
|
"grad_norm": 4.384551048278809, |
|
"learning_rate": 8.144e-05, |
|
"loss": 1.3646, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0004180213655364608, |
|
"grad_norm": 8.227641105651855, |
|
"learning_rate": 9.162e-05, |
|
"loss": 1.1622, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00046446818392940084, |
|
"grad_norm": 5.1519856452941895, |
|
"learning_rate": 0.0001018, |
|
"loss": 1.4487, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0005109150023223409, |
|
"grad_norm": 9.391843795776367, |
|
"learning_rate": 0.00010126421052631578, |
|
"loss": 1.2691, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.000557361820715281, |
|
"grad_norm": 5.215498924255371, |
|
"learning_rate": 0.00010072842105263156, |
|
"loss": 0.9428, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.000603808639108221, |
|
"grad_norm": 2.952871084213257, |
|
"learning_rate": 0.00010019263157894736, |
|
"loss": 1.0689, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0006502554575011612, |
|
"grad_norm": 2.8391802310943604, |
|
"learning_rate": 9.965684210526316e-05, |
|
"loss": 0.9291, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0006967022758941013, |
|
"grad_norm": 2.461745023727417, |
|
"learning_rate": 9.912105263157895e-05, |
|
"loss": 0.8318, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0007431490942870413, |
|
"grad_norm": 2.3736178874969482, |
|
"learning_rate": 9.858526315789473e-05, |
|
"loss": 0.5747, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0007895959126799814, |
|
"grad_norm": 3.788600444793701, |
|
"learning_rate": 9.804947368421052e-05, |
|
"loss": 0.9676, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0008360427310729215, |
|
"grad_norm": 3.9328246116638184, |
|
"learning_rate": 9.75136842105263e-05, |
|
"loss": 1.0193, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0008824895494658616, |
|
"grad_norm": 2.693528652191162, |
|
"learning_rate": 9.69778947368421e-05, |
|
"loss": 0.7466, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0009289363678588017, |
|
"grad_norm": 3.4937636852264404, |
|
"learning_rate": 9.644210526315789e-05, |
|
"loss": 0.8632, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0009753831862517418, |
|
"grad_norm": 2.1122593879699707, |
|
"learning_rate": 9.590631578947369e-05, |
|
"loss": 0.7247, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0010218300046446818, |
|
"grad_norm": 3.186523675918579, |
|
"learning_rate": 9.537052631578947e-05, |
|
"loss": 0.6886, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.001068276823037622, |
|
"grad_norm": 2.758934497833252, |
|
"learning_rate": 9.483473684210526e-05, |
|
"loss": 0.6994, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.001114723641430562, |
|
"grad_norm": 2.430767774581909, |
|
"learning_rate": 9.429894736842104e-05, |
|
"loss": 0.6421, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0011611704598235022, |
|
"grad_norm": 3.4728782176971436, |
|
"learning_rate": 9.376315789473684e-05, |
|
"loss": 0.8879, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.001207617278216442, |
|
"grad_norm": 3.511162519454956, |
|
"learning_rate": 9.322736842105262e-05, |
|
"loss": 0.9696, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0012540640966093822, |
|
"grad_norm": 3.0230774879455566, |
|
"learning_rate": 9.269157894736842e-05, |
|
"loss": 0.9589, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0013005109150023223, |
|
"grad_norm": 2.1474082469940186, |
|
"learning_rate": 9.215578947368421e-05, |
|
"loss": 0.7707, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0013469577333952625, |
|
"grad_norm": 2.3117761611938477, |
|
"learning_rate": 9.162e-05, |
|
"loss": 0.7724, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0013934045517882026, |
|
"grad_norm": 1.9512385129928589, |
|
"learning_rate": 9.108421052631578e-05, |
|
"loss": 0.603, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0014398513701811425, |
|
"grad_norm": 6.22908878326416, |
|
"learning_rate": 9.054842105263158e-05, |
|
"loss": 1.0569, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0014862981885740826, |
|
"grad_norm": 2.2944276332855225, |
|
"learning_rate": 9.001263157894736e-05, |
|
"loss": 0.6613, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0015327450069670227, |
|
"grad_norm": 2.312437057495117, |
|
"learning_rate": 8.947684210526315e-05, |
|
"loss": 0.8758, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0015791918253599629, |
|
"grad_norm": 2.5488150119781494, |
|
"learning_rate": 8.894105263157895e-05, |
|
"loss": 0.635, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.001625638643752903, |
|
"grad_norm": 3.658079147338867, |
|
"learning_rate": 8.840526315789473e-05, |
|
"loss": 1.0831, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.001672085462145843, |
|
"grad_norm": 2.474161386489868, |
|
"learning_rate": 8.786947368421052e-05, |
|
"loss": 0.7918, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.001718532280538783, |
|
"grad_norm": 2.5074217319488525, |
|
"learning_rate": 8.733368421052632e-05, |
|
"loss": 0.8514, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0017649790989317231, |
|
"grad_norm": 2.0377755165100098, |
|
"learning_rate": 8.67978947368421e-05, |
|
"loss": 0.6768, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0018114259173246632, |
|
"grad_norm": 2.2819857597351074, |
|
"learning_rate": 8.626210526315789e-05, |
|
"loss": 0.6528, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0018578727357176034, |
|
"grad_norm": 2.3973352909088135, |
|
"learning_rate": 8.572631578947367e-05, |
|
"loss": 0.8542, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0019043195541105435, |
|
"grad_norm": 2.628427267074585, |
|
"learning_rate": 8.519052631578947e-05, |
|
"loss": 0.8507, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0019507663725034836, |
|
"grad_norm": 3.116105556488037, |
|
"learning_rate": 8.465473684210527e-05, |
|
"loss": 0.9283, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0019972131908964235, |
|
"grad_norm": 3.5683062076568604, |
|
"learning_rate": 8.411894736842105e-05, |
|
"loss": 1.1937, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0020436600092893636, |
|
"grad_norm": 2.569610118865967, |
|
"learning_rate": 8.358315789473684e-05, |
|
"loss": 0.9237, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0020901068276823038, |
|
"grad_norm": 3.7425827980041504, |
|
"learning_rate": 8.304736842105262e-05, |
|
"loss": 1.3234, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.002136553646075244, |
|
"grad_norm": 3.7030258178710938, |
|
"learning_rate": 8.251157894736841e-05, |
|
"loss": 0.7639, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.002183000464468184, |
|
"grad_norm": 3.188816785812378, |
|
"learning_rate": 8.197578947368421e-05, |
|
"loss": 1.1931, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.002229447282861124, |
|
"grad_norm": 5.445688724517822, |
|
"learning_rate": 8.144e-05, |
|
"loss": 0.8938, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0022758941012540643, |
|
"grad_norm": 2.3576200008392334, |
|
"learning_rate": 8.090421052631579e-05, |
|
"loss": 0.836, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0023223409196470044, |
|
"grad_norm": 22.260164260864258, |
|
"learning_rate": 8.036842105263158e-05, |
|
"loss": 1.0266, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0023223409196470044, |
|
"eval_loss": 0.8269708752632141, |
|
"eval_runtime": 162.7994, |
|
"eval_samples_per_second": 55.688, |
|
"eval_steps_per_second": 13.925, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.002368787738039944, |
|
"grad_norm": 2.7304725646972656, |
|
"learning_rate": 7.983263157894736e-05, |
|
"loss": 0.7953, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.002415234556432884, |
|
"grad_norm": 2.282052516937256, |
|
"learning_rate": 7.929684210526315e-05, |
|
"loss": 0.8493, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0024616813748258243, |
|
"grad_norm": 2.524898052215576, |
|
"learning_rate": 7.876105263157895e-05, |
|
"loss": 1.0108, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0025081281932187644, |
|
"grad_norm": 2.6745500564575195, |
|
"learning_rate": 7.822526315789473e-05, |
|
"loss": 1.0531, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0025545750116117046, |
|
"grad_norm": 3.1213998794555664, |
|
"learning_rate": 7.768947368421053e-05, |
|
"loss": 1.1068, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0026010218300046447, |
|
"grad_norm": 3.499636173248291, |
|
"learning_rate": 7.715368421052631e-05, |
|
"loss": 1.0808, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.002647468648397585, |
|
"grad_norm": 3.125180721282959, |
|
"learning_rate": 7.66178947368421e-05, |
|
"loss": 0.8485, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.002693915466790525, |
|
"grad_norm": 2.547490119934082, |
|
"learning_rate": 7.608210526315788e-05, |
|
"loss": 0.7604, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.002740362285183465, |
|
"grad_norm": 2.058576822280884, |
|
"learning_rate": 7.554631578947368e-05, |
|
"loss": 0.665, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.002786809103576405, |
|
"grad_norm": 2.624077320098877, |
|
"learning_rate": 7.501052631578947e-05, |
|
"loss": 0.6822, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0028332559219693453, |
|
"grad_norm": 1.4456878900527954, |
|
"learning_rate": 7.447473684210527e-05, |
|
"loss": 0.554, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.002879702740362285, |
|
"grad_norm": 1.9477028846740723, |
|
"learning_rate": 7.393894736842105e-05, |
|
"loss": 0.712, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.002926149558755225, |
|
"grad_norm": 2.0057406425476074, |
|
"learning_rate": 7.340315789473684e-05, |
|
"loss": 0.7368, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0029725963771481652, |
|
"grad_norm": 2.041309356689453, |
|
"learning_rate": 7.286736842105262e-05, |
|
"loss": 0.9478, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0030190431955411053, |
|
"grad_norm": 1.8585742712020874, |
|
"learning_rate": 7.233157894736842e-05, |
|
"loss": 0.5883, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0030654900139340455, |
|
"grad_norm": 2.3726940155029297, |
|
"learning_rate": 7.179578947368421e-05, |
|
"loss": 0.8448, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0031119368323269856, |
|
"grad_norm": 4.274697303771973, |
|
"learning_rate": 7.125999999999999e-05, |
|
"loss": 0.693, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0031583836507199257, |
|
"grad_norm": 1.7456036806106567, |
|
"learning_rate": 7.072421052631579e-05, |
|
"loss": 0.6139, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.003204830469112866, |
|
"grad_norm": 2.077462673187256, |
|
"learning_rate": 7.018842105263158e-05, |
|
"loss": 0.6687, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.003251277287505806, |
|
"grad_norm": 1.6591328382492065, |
|
"learning_rate": 6.965263157894736e-05, |
|
"loss": 0.6602, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.003297724105898746, |
|
"grad_norm": 2.040104866027832, |
|
"learning_rate": 6.911684210526316e-05, |
|
"loss": 0.6718, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.003344170924291686, |
|
"grad_norm": 2.031517505645752, |
|
"learning_rate": 6.858105263157894e-05, |
|
"loss": 0.6965, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.003390617742684626, |
|
"grad_norm": 1.677396297454834, |
|
"learning_rate": 6.804526315789473e-05, |
|
"loss": 0.6022, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.003437064561077566, |
|
"grad_norm": 2.6971142292022705, |
|
"learning_rate": 6.750947368421052e-05, |
|
"loss": 0.7803, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.003483511379470506, |
|
"grad_norm": 1.2442923784255981, |
|
"learning_rate": 6.697368421052631e-05, |
|
"loss": 0.4904, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0035299581978634463, |
|
"grad_norm": 1.520882487297058, |
|
"learning_rate": 6.64378947368421e-05, |
|
"loss": 0.5449, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0035764050162563864, |
|
"grad_norm": 2.945136070251465, |
|
"learning_rate": 6.59021052631579e-05, |
|
"loss": 0.5945, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0036228518346493265, |
|
"grad_norm": 2.225796937942505, |
|
"learning_rate": 6.536631578947368e-05, |
|
"loss": 0.8414, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0036692986530422666, |
|
"grad_norm": 3.5419042110443115, |
|
"learning_rate": 6.483052631578947e-05, |
|
"loss": 0.951, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0037157454714352067, |
|
"grad_norm": 2.4470789432525635, |
|
"learning_rate": 6.429473684210525e-05, |
|
"loss": 0.6766, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.003762192289828147, |
|
"grad_norm": 1.8150739669799805, |
|
"learning_rate": 6.375894736842104e-05, |
|
"loss": 0.6212, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.003808639108221087, |
|
"grad_norm": 2.2378828525543213, |
|
"learning_rate": 6.322315789473684e-05, |
|
"loss": 0.912, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.003855085926614027, |
|
"grad_norm": 2.66448974609375, |
|
"learning_rate": 6.268736842105264e-05, |
|
"loss": 0.7284, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0039015327450069672, |
|
"grad_norm": 2.0171289443969727, |
|
"learning_rate": 6.215157894736842e-05, |
|
"loss": 0.5339, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.003947979563399907, |
|
"grad_norm": 1.829827070236206, |
|
"learning_rate": 6.16157894736842e-05, |
|
"loss": 0.6982, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.003994426381792847, |
|
"grad_norm": 1.3786966800689697, |
|
"learning_rate": 6.107999999999999e-05, |
|
"loss": 0.4433, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.004040873200185788, |
|
"grad_norm": 2.0562403202056885, |
|
"learning_rate": 6.054421052631578e-05, |
|
"loss": 0.791, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.004087320018578727, |
|
"grad_norm": 1.8710417747497559, |
|
"learning_rate": 6.000842105263157e-05, |
|
"loss": 0.5487, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.004133766836971668, |
|
"grad_norm": 2.46244215965271, |
|
"learning_rate": 5.947263157894737e-05, |
|
"loss": 0.9045, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0041802136553646075, |
|
"grad_norm": 1.9283982515335083, |
|
"learning_rate": 5.893684210526316e-05, |
|
"loss": 0.6472, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.004226660473757547, |
|
"grad_norm": 1.794073462486267, |
|
"learning_rate": 5.8401052631578944e-05, |
|
"loss": 0.7332, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.004273107292150488, |
|
"grad_norm": 2.4211764335632324, |
|
"learning_rate": 5.7865263157894736e-05, |
|
"loss": 0.9175, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0043195541105434275, |
|
"grad_norm": 2.131087064743042, |
|
"learning_rate": 5.732947368421052e-05, |
|
"loss": 0.846, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.004366000928936368, |
|
"grad_norm": 3.606595993041992, |
|
"learning_rate": 5.6793684210526306e-05, |
|
"loss": 1.023, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.004412447747329308, |
|
"grad_norm": 2.0817458629608154, |
|
"learning_rate": 5.6257894736842105e-05, |
|
"loss": 0.7585, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.004458894565722248, |
|
"grad_norm": 2.736661672592163, |
|
"learning_rate": 5.57221052631579e-05, |
|
"loss": 0.9443, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.004505341384115188, |
|
"grad_norm": 1.7814656496047974, |
|
"learning_rate": 5.518631578947368e-05, |
|
"loss": 0.9056, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0045517882025081285, |
|
"grad_norm": 2.098845958709717, |
|
"learning_rate": 5.4650526315789474e-05, |
|
"loss": 0.7014, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.004598235020901068, |
|
"grad_norm": 2.316159963607788, |
|
"learning_rate": 5.411473684210526e-05, |
|
"loss": 0.7147, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.004644681839294009, |
|
"grad_norm": 2.182925224304199, |
|
"learning_rate": 5.3578947368421044e-05, |
|
"loss": 0.7637, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004644681839294009, |
|
"eval_loss": 0.7575440406799316, |
|
"eval_runtime": 162.5731, |
|
"eval_samples_per_second": 55.766, |
|
"eval_steps_per_second": 13.944, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0046911286576869484, |
|
"grad_norm": 1.547819972038269, |
|
"learning_rate": 5.3043157894736836e-05, |
|
"loss": 0.7768, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.004737575476079888, |
|
"grad_norm": 2.0550365447998047, |
|
"learning_rate": 5.2507368421052635e-05, |
|
"loss": 0.8542, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.004784022294472829, |
|
"grad_norm": 1.7644928693771362, |
|
"learning_rate": 5.197157894736842e-05, |
|
"loss": 0.8406, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.004830469112865768, |
|
"grad_norm": 2.784821033477783, |
|
"learning_rate": 5.143578947368421e-05, |
|
"loss": 1.0809, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.004876915931258709, |
|
"grad_norm": 2.643968105316162, |
|
"learning_rate": 5.09e-05, |
|
"loss": 1.1358, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.004923362749651649, |
|
"grad_norm": 2.6479332447052, |
|
"learning_rate": 5.036421052631578e-05, |
|
"loss": 0.9534, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.004969809568044589, |
|
"grad_norm": 1.5139284133911133, |
|
"learning_rate": 4.982842105263158e-05, |
|
"loss": 0.6037, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.005016256386437529, |
|
"grad_norm": 2.2001686096191406, |
|
"learning_rate": 4.9292631578947366e-05, |
|
"loss": 1.0875, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.005062703204830469, |
|
"grad_norm": 1.906663417816162, |
|
"learning_rate": 4.875684210526315e-05, |
|
"loss": 0.8251, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.005109150023223409, |
|
"grad_norm": 1.6133707761764526, |
|
"learning_rate": 4.822105263157894e-05, |
|
"loss": 0.7804, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.00515559684161635, |
|
"grad_norm": 1.6872289180755615, |
|
"learning_rate": 4.7685263157894735e-05, |
|
"loss": 0.5731, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.005202043660009289, |
|
"grad_norm": 1.2829549312591553, |
|
"learning_rate": 4.714947368421052e-05, |
|
"loss": 0.4865, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.005248490478402229, |
|
"grad_norm": 1.8299009799957275, |
|
"learning_rate": 4.661368421052631e-05, |
|
"loss": 0.7806, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.00529493729679517, |
|
"grad_norm": 1.3792545795440674, |
|
"learning_rate": 4.6077894736842104e-05, |
|
"loss": 0.5824, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.005341384115188109, |
|
"grad_norm": 1.554002046585083, |
|
"learning_rate": 4.554210526315789e-05, |
|
"loss": 0.7399, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.00538783093358105, |
|
"grad_norm": 1.8911974430084229, |
|
"learning_rate": 4.500631578947368e-05, |
|
"loss": 0.8756, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0054342777519739895, |
|
"grad_norm": 2.071706771850586, |
|
"learning_rate": 4.447052631578947e-05, |
|
"loss": 0.8007, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.00548072457036693, |
|
"grad_norm": 2.202437162399292, |
|
"learning_rate": 4.393473684210526e-05, |
|
"loss": 0.8207, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.00552717138875987, |
|
"grad_norm": 1.33773672580719, |
|
"learning_rate": 4.339894736842105e-05, |
|
"loss": 0.5947, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.00557361820715281, |
|
"grad_norm": 1.8306225538253784, |
|
"learning_rate": 4.2863157894736835e-05, |
|
"loss": 0.7513, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.00562006502554575, |
|
"grad_norm": 1.6813061237335205, |
|
"learning_rate": 4.2327368421052634e-05, |
|
"loss": 0.6929, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.005666511843938691, |
|
"grad_norm": 1.5658451318740845, |
|
"learning_rate": 4.179157894736842e-05, |
|
"loss": 0.5594, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.00571295866233163, |
|
"grad_norm": 1.4536268711090088, |
|
"learning_rate": 4.1255789473684204e-05, |
|
"loss": 0.6208, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.00575940548072457, |
|
"grad_norm": 1.9043149948120117, |
|
"learning_rate": 4.072e-05, |
|
"loss": 0.6332, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0058058522991175105, |
|
"grad_norm": 2.0733814239501953, |
|
"learning_rate": 4.018421052631579e-05, |
|
"loss": 0.6764, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.00585229911751045, |
|
"grad_norm": 1.7627897262573242, |
|
"learning_rate": 3.9648421052631573e-05, |
|
"loss": 0.7384, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.005898745935903391, |
|
"grad_norm": 1.6006054878234863, |
|
"learning_rate": 3.9112631578947365e-05, |
|
"loss": 0.6752, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0059451927542963304, |
|
"grad_norm": 1.4541168212890625, |
|
"learning_rate": 3.857684210526316e-05, |
|
"loss": 0.6692, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.005991639572689271, |
|
"grad_norm": 1.3292078971862793, |
|
"learning_rate": 3.804105263157894e-05, |
|
"loss": 0.5353, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.006038086391082211, |
|
"grad_norm": 1.6884562969207764, |
|
"learning_rate": 3.7505263157894734e-05, |
|
"loss": 0.7562, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.006084533209475151, |
|
"grad_norm": 1.0477324724197388, |
|
"learning_rate": 3.6969473684210526e-05, |
|
"loss": 0.3243, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.006130980027868091, |
|
"grad_norm": 1.4753937721252441, |
|
"learning_rate": 3.643368421052631e-05, |
|
"loss": 0.5291, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0061774268462610315, |
|
"grad_norm": 1.7509891986846924, |
|
"learning_rate": 3.5897894736842103e-05, |
|
"loss": 0.6364, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.006223873664653971, |
|
"grad_norm": 2.055713653564453, |
|
"learning_rate": 3.5362105263157895e-05, |
|
"loss": 0.782, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.006270320483046911, |
|
"grad_norm": 2.0711967945098877, |
|
"learning_rate": 3.482631578947368e-05, |
|
"loss": 0.7677, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.006316767301439851, |
|
"grad_norm": 1.3271763324737549, |
|
"learning_rate": 3.429052631578947e-05, |
|
"loss": 0.5314, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.006363214119832791, |
|
"grad_norm": 1.7668476104736328, |
|
"learning_rate": 3.375473684210526e-05, |
|
"loss": 0.8441, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.006409660938225732, |
|
"grad_norm": 1.773807168006897, |
|
"learning_rate": 3.321894736842105e-05, |
|
"loss": 0.7551, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.006456107756618671, |
|
"grad_norm": 1.6312812566757202, |
|
"learning_rate": 3.268315789473684e-05, |
|
"loss": 0.8111, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.006502554575011612, |
|
"grad_norm": 1.6187984943389893, |
|
"learning_rate": 3.2147368421052627e-05, |
|
"loss": 0.6781, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.006549001393404552, |
|
"grad_norm": 1.6448986530303955, |
|
"learning_rate": 3.161157894736842e-05, |
|
"loss": 0.5815, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.006595448211797492, |
|
"grad_norm": 1.9651342630386353, |
|
"learning_rate": 3.107578947368421e-05, |
|
"loss": 0.7199, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.006641895030190432, |
|
"grad_norm": 2.4397366046905518, |
|
"learning_rate": 3.0539999999999996e-05, |
|
"loss": 0.7959, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.006688341848583372, |
|
"grad_norm": 1.7463246583938599, |
|
"learning_rate": 3.0004210526315784e-05, |
|
"loss": 0.7066, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.006734788666976312, |
|
"grad_norm": 1.6383179426193237, |
|
"learning_rate": 2.946842105263158e-05, |
|
"loss": 0.5844, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.006781235485369252, |
|
"grad_norm": 2.03802752494812, |
|
"learning_rate": 2.8932631578947368e-05, |
|
"loss": 0.7851, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.006827682303762192, |
|
"grad_norm": 1.5965886116027832, |
|
"learning_rate": 2.8396842105263153e-05, |
|
"loss": 0.7421, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.006874129122155132, |
|
"grad_norm": 1.6589584350585938, |
|
"learning_rate": 2.786105263157895e-05, |
|
"loss": 0.7362, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.006920575940548073, |
|
"grad_norm": 1.904215693473816, |
|
"learning_rate": 2.7325263157894737e-05, |
|
"loss": 0.842, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.006967022758941012, |
|
"grad_norm": 1.954518437385559, |
|
"learning_rate": 2.6789473684210522e-05, |
|
"loss": 0.8687, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.006967022758941012, |
|
"eval_loss": 0.7381066679954529, |
|
"eval_runtime": 163.587, |
|
"eval_samples_per_second": 55.42, |
|
"eval_steps_per_second": 13.858, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.007013469577333953, |
|
"grad_norm": 1.321113109588623, |
|
"learning_rate": 2.6253684210526317e-05, |
|
"loss": 0.6275, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.0070599163957268925, |
|
"grad_norm": 1.9313557147979736, |
|
"learning_rate": 2.5717894736842106e-05, |
|
"loss": 0.8083, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.007106363214119833, |
|
"grad_norm": 2.39707350730896, |
|
"learning_rate": 2.518210526315789e-05, |
|
"loss": 1.1125, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.007152810032512773, |
|
"grad_norm": 2.2258388996124268, |
|
"learning_rate": 2.4646315789473683e-05, |
|
"loss": 0.9885, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.007199256850905713, |
|
"grad_norm": 2.207796096801758, |
|
"learning_rate": 2.411052631578947e-05, |
|
"loss": 0.8165, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.007245703669298653, |
|
"grad_norm": 2.068021774291992, |
|
"learning_rate": 2.357473684210526e-05, |
|
"loss": 0.9621, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.0072921504876915936, |
|
"grad_norm": 3.123298168182373, |
|
"learning_rate": 2.3038947368421052e-05, |
|
"loss": 0.9623, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.007338597306084533, |
|
"grad_norm": 1.7516857385635376, |
|
"learning_rate": 2.250315789473684e-05, |
|
"loss": 0.7126, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.007385044124477473, |
|
"grad_norm": 1.756352424621582, |
|
"learning_rate": 2.196736842105263e-05, |
|
"loss": 0.6112, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.0074314909428704135, |
|
"grad_norm": 1.324313998222351, |
|
"learning_rate": 2.1431578947368418e-05, |
|
"loss": 0.4837, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.007477937761263353, |
|
"grad_norm": 1.6090558767318726, |
|
"learning_rate": 2.089578947368421e-05, |
|
"loss": 0.5255, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.007524384579656294, |
|
"grad_norm": 1.3804148435592651, |
|
"learning_rate": 2.036e-05, |
|
"loss": 0.4674, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.007570831398049233, |
|
"grad_norm": 1.3651041984558105, |
|
"learning_rate": 1.9824210526315787e-05, |
|
"loss": 0.7306, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.007617278216442174, |
|
"grad_norm": 1.8530007600784302, |
|
"learning_rate": 1.928842105263158e-05, |
|
"loss": 0.7491, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.007663725034835114, |
|
"grad_norm": 1.493820309638977, |
|
"learning_rate": 1.8752631578947367e-05, |
|
"loss": 0.6576, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.007710171853228054, |
|
"grad_norm": 1.199458360671997, |
|
"learning_rate": 1.8216842105263156e-05, |
|
"loss": 0.3963, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.007756618671620994, |
|
"grad_norm": 1.6788829565048218, |
|
"learning_rate": 1.7681052631578948e-05, |
|
"loss": 0.7574, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.0078030654900139345, |
|
"grad_norm": 1.2864102125167847, |
|
"learning_rate": 1.7145263157894736e-05, |
|
"loss": 0.5522, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.007849512308406874, |
|
"grad_norm": 1.8316515684127808, |
|
"learning_rate": 1.6609473684210525e-05, |
|
"loss": 0.5285, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.007895959126799815, |
|
"grad_norm": 1.116195559501648, |
|
"learning_rate": 1.6073684210526313e-05, |
|
"loss": 0.5598, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.007942405945192754, |
|
"grad_norm": 1.7328448295593262, |
|
"learning_rate": 1.5537894736842105e-05, |
|
"loss": 0.5905, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.007988852763585694, |
|
"grad_norm": 1.5131913423538208, |
|
"learning_rate": 1.5002105263157892e-05, |
|
"loss": 0.5292, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.008035299581978635, |
|
"grad_norm": 1.8316717147827148, |
|
"learning_rate": 1.4466315789473684e-05, |
|
"loss": 0.547, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.008081746400371575, |
|
"grad_norm": 1.4963319301605225, |
|
"learning_rate": 1.3930526315789474e-05, |
|
"loss": 0.6936, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.008128193218764514, |
|
"grad_norm": 1.6800758838653564, |
|
"learning_rate": 1.3394736842105261e-05, |
|
"loss": 0.6586, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.008174640037157455, |
|
"grad_norm": 1.0843589305877686, |
|
"learning_rate": 1.2858947368421053e-05, |
|
"loss": 0.4544, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.008221086855550395, |
|
"grad_norm": 1.6353403329849243, |
|
"learning_rate": 1.2323157894736842e-05, |
|
"loss": 0.6454, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.008267533673943336, |
|
"grad_norm": 1.6226987838745117, |
|
"learning_rate": 1.178736842105263e-05, |
|
"loss": 0.7083, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.008313980492336275, |
|
"grad_norm": 1.2014755010604858, |
|
"learning_rate": 1.125157894736842e-05, |
|
"loss": 0.518, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.008360427310729215, |
|
"grad_norm": 0.9961537718772888, |
|
"learning_rate": 1.0715789473684209e-05, |
|
"loss": 0.5034, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.008406874129122156, |
|
"grad_norm": 1.2505362033843994, |
|
"learning_rate": 1.018e-05, |
|
"loss": 0.3744, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.008453320947515094, |
|
"grad_norm": 1.209218144416809, |
|
"learning_rate": 9.64421052631579e-06, |
|
"loss": 0.5659, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.008499767765908035, |
|
"grad_norm": 1.5287011861801147, |
|
"learning_rate": 9.108421052631578e-06, |
|
"loss": 0.6864, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.008546214584300976, |
|
"grad_norm": 1.5412635803222656, |
|
"learning_rate": 8.572631578947368e-06, |
|
"loss": 0.784, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.008592661402693916, |
|
"grad_norm": 1.2702871561050415, |
|
"learning_rate": 8.036842105263157e-06, |
|
"loss": 0.6373, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.008639108221086855, |
|
"grad_norm": 1.2671583890914917, |
|
"learning_rate": 7.501052631578946e-06, |
|
"loss": 0.4503, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.008685555039479795, |
|
"grad_norm": 1.6440976858139038, |
|
"learning_rate": 6.965263157894737e-06, |
|
"loss": 0.6182, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.008732001857872736, |
|
"grad_norm": 1.6860370635986328, |
|
"learning_rate": 6.4294736842105265e-06, |
|
"loss": 0.6629, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.008778448676265677, |
|
"grad_norm": 1.778744101524353, |
|
"learning_rate": 5.893684210526315e-06, |
|
"loss": 0.707, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.008824895494658615, |
|
"grad_norm": 2.259239673614502, |
|
"learning_rate": 5.3578947368421044e-06, |
|
"loss": 0.8088, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.008871342313051556, |
|
"grad_norm": 1.5541491508483887, |
|
"learning_rate": 4.822105263157895e-06, |
|
"loss": 0.6756, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.008917789131444497, |
|
"grad_norm": 1.634876012802124, |
|
"learning_rate": 4.286315789473684e-06, |
|
"loss": 0.8733, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.008964235949837435, |
|
"grad_norm": 1.7315068244934082, |
|
"learning_rate": 3.750526315789473e-06, |
|
"loss": 0.6555, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.009010682768230376, |
|
"grad_norm": 1.7454522848129272, |
|
"learning_rate": 3.2147368421052633e-06, |
|
"loss": 0.754, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.009057129586623316, |
|
"grad_norm": 1.7474086284637451, |
|
"learning_rate": 2.6789473684210522e-06, |
|
"loss": 0.8278, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.009103576405016257, |
|
"grad_norm": 1.9185843467712402, |
|
"learning_rate": 2.143157894736842e-06, |
|
"loss": 0.7852, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.009150023223409196, |
|
"grad_norm": 1.923701286315918, |
|
"learning_rate": 1.6073684210526316e-06, |
|
"loss": 0.6713, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.009196470041802136, |
|
"grad_norm": 1.524601697921753, |
|
"learning_rate": 1.071578947368421e-06, |
|
"loss": 0.7535, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.009242916860195077, |
|
"grad_norm": 2.139697313308716, |
|
"learning_rate": 5.357894736842105e-07, |
|
"loss": 0.9147, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.009289363678588018, |
|
"grad_norm": 1.9575085639953613, |
|
"learning_rate": 0.0, |
|
"loss": 0.8268, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.009289363678588018, |
|
"eval_loss": 0.7234218120574951, |
|
"eval_runtime": 163.2305, |
|
"eval_samples_per_second": 55.541, |
|
"eval_steps_per_second": 13.888, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2158942759092224e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|