|
{ |
|
"best_metric": 0.6350612640380859, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.10085728693898134, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010085728693898135, |
|
"grad_norm": 1.3238667249679565, |
|
"learning_rate": 1.013e-05, |
|
"loss": 0.928, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010085728693898135, |
|
"eval_loss": 1.1040126085281372, |
|
"eval_runtime": 101.7212, |
|
"eval_samples_per_second": 4.109, |
|
"eval_steps_per_second": 1.032, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002017145738779627, |
|
"grad_norm": 1.4647407531738281, |
|
"learning_rate": 2.026e-05, |
|
"loss": 1.1291, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0030257186081694403, |
|
"grad_norm": 1.7250173091888428, |
|
"learning_rate": 3.039e-05, |
|
"loss": 1.0937, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004034291477559254, |
|
"grad_norm": 1.4756126403808594, |
|
"learning_rate": 4.052e-05, |
|
"loss": 1.0637, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005042864346949067, |
|
"grad_norm": 1.4971727132797241, |
|
"learning_rate": 5.065e-05, |
|
"loss": 1.1541, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006051437216338881, |
|
"grad_norm": 1.1345500946044922, |
|
"learning_rate": 6.078e-05, |
|
"loss": 0.8789, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0070600100857286935, |
|
"grad_norm": 1.1694258451461792, |
|
"learning_rate": 7.091e-05, |
|
"loss": 0.7942, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008068582955118508, |
|
"grad_norm": 0.9738374948501587, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.7897, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009077155824508321, |
|
"grad_norm": 1.2070674896240234, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.8907, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.010085728693898134, |
|
"grad_norm": 0.9936267733573914, |
|
"learning_rate": 0.0001013, |
|
"loss": 0.7356, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011094301563287948, |
|
"grad_norm": 0.9822749495506287, |
|
"learning_rate": 0.00010076684210526316, |
|
"loss": 0.787, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.012102874432677761, |
|
"grad_norm": 0.9206593632698059, |
|
"learning_rate": 0.0001002336842105263, |
|
"loss": 0.7268, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.013111447302067574, |
|
"grad_norm": 0.8007137179374695, |
|
"learning_rate": 9.970052631578946e-05, |
|
"loss": 0.6785, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.014120020171457387, |
|
"grad_norm": 0.7873572707176208, |
|
"learning_rate": 9.916736842105263e-05, |
|
"loss": 0.65, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.015128593040847202, |
|
"grad_norm": 0.8010468482971191, |
|
"learning_rate": 9.863421052631579e-05, |
|
"loss": 0.6508, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.016137165910237016, |
|
"grad_norm": 0.7741969227790833, |
|
"learning_rate": 9.810105263157895e-05, |
|
"loss": 0.5805, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01714573877962683, |
|
"grad_norm": 0.7493349313735962, |
|
"learning_rate": 9.756789473684211e-05, |
|
"loss": 0.564, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.018154311649016642, |
|
"grad_norm": 0.9663587212562561, |
|
"learning_rate": 9.703473684210525e-05, |
|
"loss": 0.808, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.019162884518406455, |
|
"grad_norm": 0.7793949246406555, |
|
"learning_rate": 9.650157894736842e-05, |
|
"loss": 0.5491, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.020171457387796268, |
|
"grad_norm": 0.7455626726150513, |
|
"learning_rate": 9.596842105263158e-05, |
|
"loss": 0.6404, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02118003025718608, |
|
"grad_norm": 0.7438361048698425, |
|
"learning_rate": 9.543526315789474e-05, |
|
"loss": 0.6354, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.022188603126575897, |
|
"grad_norm": 0.740470826625824, |
|
"learning_rate": 9.49021052631579e-05, |
|
"loss": 0.5225, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02319717599596571, |
|
"grad_norm": 0.8656465411186218, |
|
"learning_rate": 9.436894736842105e-05, |
|
"loss": 0.7408, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.024205748865355523, |
|
"grad_norm": 0.9166726469993591, |
|
"learning_rate": 9.38357894736842e-05, |
|
"loss": 0.6548, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.025214321734745335, |
|
"grad_norm": 0.8170962333679199, |
|
"learning_rate": 9.330263157894737e-05, |
|
"loss": 0.654, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.026222894604135148, |
|
"grad_norm": 0.7673491835594177, |
|
"learning_rate": 9.276947368421051e-05, |
|
"loss": 0.6428, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02723146747352496, |
|
"grad_norm": 0.8028613924980164, |
|
"learning_rate": 9.223631578947369e-05, |
|
"loss": 0.5204, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.028240040342914774, |
|
"grad_norm": 0.8541864156723022, |
|
"learning_rate": 9.170315789473684e-05, |
|
"loss": 0.6858, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02924861321230459, |
|
"grad_norm": 0.8092413544654846, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.6384, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.030257186081694403, |
|
"grad_norm": 0.9186341762542725, |
|
"learning_rate": 9.063684210526316e-05, |
|
"loss": 0.7882, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.031265758951084216, |
|
"grad_norm": 0.9697185158729553, |
|
"learning_rate": 9.010368421052632e-05, |
|
"loss": 0.799, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03227433182047403, |
|
"grad_norm": 0.7349818348884583, |
|
"learning_rate": 8.957052631578946e-05, |
|
"loss": 0.6457, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03328290468986384, |
|
"grad_norm": 0.843620240688324, |
|
"learning_rate": 8.903736842105263e-05, |
|
"loss": 0.7368, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03429147755925366, |
|
"grad_norm": 0.8225664496421814, |
|
"learning_rate": 8.850421052631579e-05, |
|
"loss": 0.7159, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03530005042864347, |
|
"grad_norm": 0.9680421352386475, |
|
"learning_rate": 8.797105263157895e-05, |
|
"loss": 0.8112, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.036308623298033284, |
|
"grad_norm": 0.9144193530082703, |
|
"learning_rate": 8.743789473684211e-05, |
|
"loss": 0.7829, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03731719616742309, |
|
"grad_norm": 0.8283833265304565, |
|
"learning_rate": 8.690473684210526e-05, |
|
"loss": 0.7398, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03832576903681291, |
|
"grad_norm": 0.7781999707221985, |
|
"learning_rate": 8.637157894736842e-05, |
|
"loss": 0.7255, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.039334341906202726, |
|
"grad_norm": 0.7448036074638367, |
|
"learning_rate": 8.583842105263158e-05, |
|
"loss": 0.6482, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.040342914775592535, |
|
"grad_norm": 0.7694168090820312, |
|
"learning_rate": 8.530526315789472e-05, |
|
"loss": 0.6282, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04135148764498235, |
|
"grad_norm": 0.9006367325782776, |
|
"learning_rate": 8.47721052631579e-05, |
|
"loss": 0.7293, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04236006051437216, |
|
"grad_norm": 0.9051785469055176, |
|
"learning_rate": 8.423894736842105e-05, |
|
"loss": 0.8256, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04336863338376198, |
|
"grad_norm": 0.9707128405570984, |
|
"learning_rate": 8.37057894736842e-05, |
|
"loss": 0.7974, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.044377206253151794, |
|
"grad_norm": 0.885073721408844, |
|
"learning_rate": 8.317263157894737e-05, |
|
"loss": 0.7655, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0453857791225416, |
|
"grad_norm": 0.9013693928718567, |
|
"learning_rate": 8.263947368421053e-05, |
|
"loss": 0.7205, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04639435199193142, |
|
"grad_norm": 1.1316449642181396, |
|
"learning_rate": 8.210631578947368e-05, |
|
"loss": 0.7543, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04740292486132123, |
|
"grad_norm": 0.8760470151901245, |
|
"learning_rate": 8.157315789473684e-05, |
|
"loss": 0.7771, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.048411497730711045, |
|
"grad_norm": 1.0497716665267944, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.8032, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.049420070600100854, |
|
"grad_norm": 0.9932529926300049, |
|
"learning_rate": 8.050684210526316e-05, |
|
"loss": 0.7941, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05042864346949067, |
|
"grad_norm": 1.1281291246414185, |
|
"learning_rate": 7.997368421052632e-05, |
|
"loss": 0.845, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05042864346949067, |
|
"eval_loss": 0.6652013063430786, |
|
"eval_runtime": 101.679, |
|
"eval_samples_per_second": 4.111, |
|
"eval_steps_per_second": 1.033, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05143721633888049, |
|
"grad_norm": 0.7430902123451233, |
|
"learning_rate": 7.944052631578947e-05, |
|
"loss": 0.4593, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.052445789208270296, |
|
"grad_norm": 0.6915680170059204, |
|
"learning_rate": 7.890736842105263e-05, |
|
"loss": 0.6674, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05345436207766011, |
|
"grad_norm": 0.6954211592674255, |
|
"learning_rate": 7.837421052631579e-05, |
|
"loss": 0.7528, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05446293494704992, |
|
"grad_norm": 0.6784757971763611, |
|
"learning_rate": 7.784105263157893e-05, |
|
"loss": 0.617, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05547150781643974, |
|
"grad_norm": 0.750968337059021, |
|
"learning_rate": 7.730789473684211e-05, |
|
"loss": 0.6296, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05648008068582955, |
|
"grad_norm": 0.6823384761810303, |
|
"learning_rate": 7.677473684210526e-05, |
|
"loss": 0.6229, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.057488653555219364, |
|
"grad_norm": 0.9757019877433777, |
|
"learning_rate": 7.624157894736842e-05, |
|
"loss": 0.6533, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05849722642460918, |
|
"grad_norm": 0.9269747734069824, |
|
"learning_rate": 7.570842105263158e-05, |
|
"loss": 0.5794, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05950579929399899, |
|
"grad_norm": 0.6206308007240295, |
|
"learning_rate": 7.517526315789474e-05, |
|
"loss": 0.5007, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.060514372163388806, |
|
"grad_norm": 0.6674503684043884, |
|
"learning_rate": 7.464210526315789e-05, |
|
"loss": 0.4885, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.061522945032778616, |
|
"grad_norm": 0.7698457837104797, |
|
"learning_rate": 7.410894736842106e-05, |
|
"loss": 0.773, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06253151790216843, |
|
"grad_norm": 0.7389516234397888, |
|
"learning_rate": 7.35757894736842e-05, |
|
"loss": 0.5738, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06354009077155824, |
|
"grad_norm": 0.770261287689209, |
|
"learning_rate": 7.304263157894737e-05, |
|
"loss": 0.6907, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06454866364094806, |
|
"grad_norm": 0.7029135227203369, |
|
"learning_rate": 7.250947368421053e-05, |
|
"loss": 0.6071, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06555723651033787, |
|
"grad_norm": 0.7364175319671631, |
|
"learning_rate": 7.197631578947368e-05, |
|
"loss": 0.6433, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06656580937972768, |
|
"grad_norm": 0.6604887247085571, |
|
"learning_rate": 7.144315789473684e-05, |
|
"loss": 0.5419, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06757438224911749, |
|
"grad_norm": 0.6892684102058411, |
|
"learning_rate": 7.091e-05, |
|
"loss": 0.6809, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06858295511850732, |
|
"grad_norm": 0.6906418204307556, |
|
"learning_rate": 7.037684210526316e-05, |
|
"loss": 0.5438, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06959152798789713, |
|
"grad_norm": 0.7817425727844238, |
|
"learning_rate": 6.984368421052632e-05, |
|
"loss": 0.7386, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.07060010085728693, |
|
"grad_norm": 0.5860413312911987, |
|
"learning_rate": 6.931052631578947e-05, |
|
"loss": 0.4939, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07160867372667676, |
|
"grad_norm": 0.6654998660087585, |
|
"learning_rate": 6.877736842105263e-05, |
|
"loss": 0.5478, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07261724659606657, |
|
"grad_norm": 0.7202038168907166, |
|
"learning_rate": 6.824421052631579e-05, |
|
"loss": 0.5561, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07362581946545638, |
|
"grad_norm": 0.7074757218360901, |
|
"learning_rate": 6.771105263157895e-05, |
|
"loss": 0.5294, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07463439233484619, |
|
"grad_norm": 0.7609388828277588, |
|
"learning_rate": 6.71778947368421e-05, |
|
"loss": 0.6816, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07564296520423601, |
|
"grad_norm": 0.7042875289916992, |
|
"learning_rate": 6.664473684210527e-05, |
|
"loss": 0.6686, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07665153807362582, |
|
"grad_norm": 0.697859525680542, |
|
"learning_rate": 6.611157894736842e-05, |
|
"loss": 0.5751, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07766011094301563, |
|
"grad_norm": 0.7064348459243774, |
|
"learning_rate": 6.557842105263158e-05, |
|
"loss": 0.5744, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07866868381240545, |
|
"grad_norm": 0.5865401029586792, |
|
"learning_rate": 6.504526315789474e-05, |
|
"loss": 0.5026, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07967725668179526, |
|
"grad_norm": 0.7729213237762451, |
|
"learning_rate": 6.451210526315789e-05, |
|
"loss": 0.5335, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08068582955118507, |
|
"grad_norm": 0.791968584060669, |
|
"learning_rate": 6.397894736842105e-05, |
|
"loss": 0.6654, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08169440242057488, |
|
"grad_norm": 0.8126956820487976, |
|
"learning_rate": 6.344578947368421e-05, |
|
"loss": 0.6091, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0827029752899647, |
|
"grad_norm": 0.7012320160865784, |
|
"learning_rate": 6.291263157894737e-05, |
|
"loss": 0.587, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08371154815935451, |
|
"grad_norm": 0.7842673659324646, |
|
"learning_rate": 6.237947368421053e-05, |
|
"loss": 0.6438, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.08472012102874432, |
|
"grad_norm": 0.7788791656494141, |
|
"learning_rate": 6.184631578947368e-05, |
|
"loss": 0.6173, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08572869389813415, |
|
"grad_norm": 0.7247044444084167, |
|
"learning_rate": 6.131315789473684e-05, |
|
"loss": 0.6625, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08673726676752395, |
|
"grad_norm": 0.7117682099342346, |
|
"learning_rate": 6.078e-05, |
|
"loss": 0.6324, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08774583963691376, |
|
"grad_norm": 0.7648577094078064, |
|
"learning_rate": 6.024684210526315e-05, |
|
"loss": 0.6915, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08875441250630359, |
|
"grad_norm": 0.8128008842468262, |
|
"learning_rate": 5.9713684210526305e-05, |
|
"loss": 0.6515, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0897629853756934, |
|
"grad_norm": 0.8586528301239014, |
|
"learning_rate": 5.918052631578947e-05, |
|
"loss": 0.7476, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0907715582450832, |
|
"grad_norm": 0.9298937916755676, |
|
"learning_rate": 5.8647368421052634e-05, |
|
"loss": 0.792, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09178013111447302, |
|
"grad_norm": 0.8197916746139526, |
|
"learning_rate": 5.811421052631579e-05, |
|
"loss": 0.6175, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.09278870398386284, |
|
"grad_norm": 0.7706024050712585, |
|
"learning_rate": 5.758105263157894e-05, |
|
"loss": 0.719, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09379727685325265, |
|
"grad_norm": 0.853099524974823, |
|
"learning_rate": 5.70478947368421e-05, |
|
"loss": 0.6546, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09480584972264246, |
|
"grad_norm": 0.796420156955719, |
|
"learning_rate": 5.6514736842105256e-05, |
|
"loss": 0.6689, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09581442259203228, |
|
"grad_norm": 0.7574595212936401, |
|
"learning_rate": 5.5981578947368424e-05, |
|
"loss": 0.5644, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09682299546142209, |
|
"grad_norm": 0.9858140349388123, |
|
"learning_rate": 5.544842105263158e-05, |
|
"loss": 0.83, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0978315683308119, |
|
"grad_norm": 0.9185313582420349, |
|
"learning_rate": 5.491526315789474e-05, |
|
"loss": 0.7675, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09884014120020171, |
|
"grad_norm": 0.9572092890739441, |
|
"learning_rate": 5.438210526315789e-05, |
|
"loss": 0.7159, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09984871406959153, |
|
"grad_norm": 1.0749398469924927, |
|
"learning_rate": 5.384894736842105e-05, |
|
"loss": 0.8607, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.10085728693898134, |
|
"grad_norm": 1.266366958618164, |
|
"learning_rate": 5.331578947368421e-05, |
|
"loss": 0.8676, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10085728693898134, |
|
"eval_loss": 0.6350612640380859, |
|
"eval_runtime": 101.2691, |
|
"eval_samples_per_second": 4.128, |
|
"eval_steps_per_second": 1.037, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.648580773235917e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|