|
{ |
|
"best_metric": 0.23224526643753052, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 107, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009345794392523364, |
|
"grad_norm": 1.2118754386901855, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8063, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009345794392523364, |
|
"eval_loss": 0.7730486392974854, |
|
"eval_runtime": 14.8906, |
|
"eval_samples_per_second": 12.155, |
|
"eval_steps_per_second": 3.089, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.018691588785046728, |
|
"grad_norm": 1.7004547119140625, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8924, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.028037383177570093, |
|
"grad_norm": 1.4907296895980835, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6562, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.037383177570093455, |
|
"grad_norm": 1.358850359916687, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4478, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04672897196261682, |
|
"grad_norm": 1.1591500043869019, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4457, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.056074766355140186, |
|
"grad_norm": 1.2124463319778442, |
|
"learning_rate": 6e-05, |
|
"loss": 0.3413, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06542056074766354, |
|
"grad_norm": 1.477152705192566, |
|
"learning_rate": 7e-05, |
|
"loss": 0.3224, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07476635514018691, |
|
"grad_norm": 1.3230681419372559, |
|
"learning_rate": 8e-05, |
|
"loss": 0.4452, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08411214953271028, |
|
"grad_norm": 1.0649213790893555, |
|
"learning_rate": 9e-05, |
|
"loss": 0.3927, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.09345794392523364, |
|
"grad_norm": 1.3603013753890991, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3909, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.102803738317757, |
|
"grad_norm": 1.195493459701538, |
|
"learning_rate": 9.997377845227576e-05, |
|
"loss": 0.3308, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.11214953271028037, |
|
"grad_norm": 0.8207054138183594, |
|
"learning_rate": 9.989514131188559e-05, |
|
"loss": 0.3578, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12149532710280374, |
|
"grad_norm": 0.8612375855445862, |
|
"learning_rate": 9.97641710583307e-05, |
|
"loss": 0.3874, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1308411214953271, |
|
"grad_norm": 0.9588674306869507, |
|
"learning_rate": 9.958100506132127e-05, |
|
"loss": 0.449, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.14018691588785046, |
|
"grad_norm": 0.8636590242385864, |
|
"learning_rate": 9.934583543669453e-05, |
|
"loss": 0.3778, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.14953271028037382, |
|
"grad_norm": 0.9204433560371399, |
|
"learning_rate": 9.905890884491195e-05, |
|
"loss": 0.4883, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1588785046728972, |
|
"grad_norm": 0.8820167779922485, |
|
"learning_rate": 9.872052623234632e-05, |
|
"loss": 0.443, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.16822429906542055, |
|
"grad_norm": 0.7248702645301819, |
|
"learning_rate": 9.833104251563056e-05, |
|
"loss": 0.3601, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.17757009345794392, |
|
"grad_norm": 0.7967797517776489, |
|
"learning_rate": 9.789086620939936e-05, |
|
"loss": 0.4402, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 0.9615940451622009, |
|
"learning_rate": 9.740045899781352e-05, |
|
"loss": 0.4484, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19626168224299065, |
|
"grad_norm": 1.0163406133651733, |
|
"learning_rate": 9.686033525031719e-05, |
|
"loss": 0.4522, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.205607476635514, |
|
"grad_norm": 0.9328191876411438, |
|
"learning_rate": 9.627106148213522e-05, |
|
"loss": 0.3848, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.21495327102803738, |
|
"grad_norm": 1.0401077270507812, |
|
"learning_rate": 9.563325576007701e-05, |
|
"loss": 0.4394, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.22429906542056074, |
|
"grad_norm": 1.1316356658935547, |
|
"learning_rate": 9.494758705426978e-05, |
|
"loss": 0.5448, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2336448598130841, |
|
"grad_norm": 1.076812505722046, |
|
"learning_rate": 9.421477453650118e-05, |
|
"loss": 0.5361, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.24299065420560748, |
|
"grad_norm": 1.1580618619918823, |
|
"learning_rate": 9.343558682590756e-05, |
|
"loss": 0.4144, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2523364485981308, |
|
"grad_norm": 0.7226683497428894, |
|
"learning_rate": 9.261084118279847e-05, |
|
"loss": 0.5771, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2616822429906542, |
|
"grad_norm": 0.8558270931243896, |
|
"learning_rate": 9.174140265146356e-05, |
|
"loss": 0.4875, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.27102803738317754, |
|
"grad_norm": 0.6313632726669312, |
|
"learning_rate": 9.082818315286055e-05, |
|
"loss": 0.2833, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2803738317757009, |
|
"grad_norm": 0.54814213514328, |
|
"learning_rate": 8.987214052813604e-05, |
|
"loss": 0.1784, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2897196261682243, |
|
"grad_norm": 0.5720391273498535, |
|
"learning_rate": 8.887427753398248e-05, |
|
"loss": 0.1903, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.29906542056074764, |
|
"grad_norm": 0.5032756924629211, |
|
"learning_rate": 8.783564079088477e-05, |
|
"loss": 0.2295, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.308411214953271, |
|
"grad_norm": 0.7864720821380615, |
|
"learning_rate": 8.675731968536002e-05, |
|
"loss": 0.3057, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3177570093457944, |
|
"grad_norm": 0.6048300266265869, |
|
"learning_rate": 8.564044522734147e-05, |
|
"loss": 0.2863, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.32710280373831774, |
|
"grad_norm": 0.7044374346733093, |
|
"learning_rate": 8.448618886390522e-05, |
|
"loss": 0.2952, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3364485981308411, |
|
"grad_norm": 0.5851072072982788, |
|
"learning_rate": 8.329576125058406e-05, |
|
"loss": 0.2035, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.34579439252336447, |
|
"grad_norm": 0.7135735154151917, |
|
"learning_rate": 8.2070410981557e-05, |
|
"loss": 0.312, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.35514018691588783, |
|
"grad_norm": 0.6680054068565369, |
|
"learning_rate": 8.081142328004637e-05, |
|
"loss": 0.3637, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3644859813084112, |
|
"grad_norm": 0.5793293714523315, |
|
"learning_rate": 7.952011865029614e-05, |
|
"loss": 0.2209, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 0.7005558013916016, |
|
"learning_rate": 7.819785149254532e-05, |
|
"loss": 0.3438, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38317757009345793, |
|
"grad_norm": 0.7467755079269409, |
|
"learning_rate": 7.68460086824492e-05, |
|
"loss": 0.3601, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.3925233644859813, |
|
"grad_norm": 0.7336003184318542, |
|
"learning_rate": 7.546600811643816e-05, |
|
"loss": 0.3653, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.40186915887850466, |
|
"grad_norm": 0.8589034080505371, |
|
"learning_rate": 7.405929722454026e-05, |
|
"loss": 0.356, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.411214953271028, |
|
"grad_norm": 0.8823071122169495, |
|
"learning_rate": 7.262735145222696e-05, |
|
"loss": 0.3742, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.4205607476635514, |
|
"grad_norm": 0.9084258675575256, |
|
"learning_rate": 7.117167271287453e-05, |
|
"loss": 0.3287, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.42990654205607476, |
|
"grad_norm": 0.8384641408920288, |
|
"learning_rate": 6.969378781246436e-05, |
|
"loss": 0.3742, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4392523364485981, |
|
"grad_norm": 0.7774955034255981, |
|
"learning_rate": 6.819524684817438e-05, |
|
"loss": 0.3295, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4485981308411215, |
|
"grad_norm": 0.7413498163223267, |
|
"learning_rate": 6.667762158254104e-05, |
|
"loss": 0.3136, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.45794392523364486, |
|
"grad_norm": 0.783761203289032, |
|
"learning_rate": 6.514250379489753e-05, |
|
"loss": 0.287, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4672897196261682, |
|
"grad_norm": 0.9675016403198242, |
|
"learning_rate": 6.359150361181715e-05, |
|
"loss": 0.353, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4672897196261682, |
|
"eval_loss": 0.283975750207901, |
|
"eval_runtime": 15.2093, |
|
"eval_samples_per_second": 11.901, |
|
"eval_steps_per_second": 3.024, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4766355140186916, |
|
"grad_norm": 1.1342438459396362, |
|
"learning_rate": 6.202624781831268e-05, |
|
"loss": 0.3326, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.48598130841121495, |
|
"grad_norm": 0.9279870986938477, |
|
"learning_rate": 6.044837815156377e-05, |
|
"loss": 0.3252, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4953271028037383, |
|
"grad_norm": 0.5372065901756287, |
|
"learning_rate": 5.885954957896115e-05, |
|
"loss": 0.3693, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5046728971962616, |
|
"grad_norm": 1.0543334484100342, |
|
"learning_rate": 5.726142856227452e-05, |
|
"loss": 0.6897, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.514018691588785, |
|
"grad_norm": 0.5793179869651794, |
|
"learning_rate": 5.565569130976422e-05, |
|
"loss": 0.2111, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5233644859813084, |
|
"grad_norm": 0.5614607930183411, |
|
"learning_rate": 5.4044022018070214e-05, |
|
"loss": 0.1905, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5327102803738317, |
|
"grad_norm": 0.6062226891517639, |
|
"learning_rate": 5.242811110572242e-05, |
|
"loss": 0.2007, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5420560747663551, |
|
"grad_norm": 0.5884981155395508, |
|
"learning_rate": 5.080965344012508e-05, |
|
"loss": 0.1989, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5514018691588785, |
|
"grad_norm": 0.5349563360214233, |
|
"learning_rate": 4.919034655987493e-05, |
|
"loss": 0.2204, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 0.5922881960868835, |
|
"learning_rate": 4.7571888894277604e-05, |
|
"loss": 0.256, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5700934579439252, |
|
"grad_norm": 0.5709701776504517, |
|
"learning_rate": 4.59559779819298e-05, |
|
"loss": 0.2065, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5794392523364486, |
|
"grad_norm": 0.5307181477546692, |
|
"learning_rate": 4.434430869023579e-05, |
|
"loss": 0.1677, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5887850467289719, |
|
"grad_norm": 0.5722399950027466, |
|
"learning_rate": 4.27385714377255e-05, |
|
"loss": 0.2411, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5981308411214953, |
|
"grad_norm": 0.489662766456604, |
|
"learning_rate": 4.114045042103887e-05, |
|
"loss": 0.1892, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.6074766355140186, |
|
"grad_norm": 0.8208240270614624, |
|
"learning_rate": 3.955162184843625e-05, |
|
"loss": 0.3509, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.616822429906542, |
|
"grad_norm": 0.7339255213737488, |
|
"learning_rate": 3.7973752181687335e-05, |
|
"loss": 0.2853, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6261682242990654, |
|
"grad_norm": 0.7878158092498779, |
|
"learning_rate": 3.640849638818286e-05, |
|
"loss": 0.3666, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6355140186915887, |
|
"grad_norm": 0.600767195224762, |
|
"learning_rate": 3.4857496205102474e-05, |
|
"loss": 0.279, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6448598130841121, |
|
"grad_norm": 0.7032753825187683, |
|
"learning_rate": 3.332237841745898e-05, |
|
"loss": 0.3348, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6542056074766355, |
|
"grad_norm": 0.7657633423805237, |
|
"learning_rate": 3.180475315182563e-05, |
|
"loss": 0.3218, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6635514018691588, |
|
"grad_norm": 0.6951287388801575, |
|
"learning_rate": 3.0306212187535653e-05, |
|
"loss": 0.3108, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6728971962616822, |
|
"grad_norm": 0.7856379151344299, |
|
"learning_rate": 2.882832728712551e-05, |
|
"loss": 0.3201, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6822429906542056, |
|
"grad_norm": 0.7768362760543823, |
|
"learning_rate": 2.737264854777306e-05, |
|
"loss": 0.2302, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6915887850467289, |
|
"grad_norm": 0.7495273947715759, |
|
"learning_rate": 2.5940702775459747e-05, |
|
"loss": 0.2837, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.7009345794392523, |
|
"grad_norm": 0.6833620667457581, |
|
"learning_rate": 2.4533991883561868e-05, |
|
"loss": 0.288, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7102803738317757, |
|
"grad_norm": 0.9326289296150208, |
|
"learning_rate": 2.315399131755081e-05, |
|
"loss": 0.2953, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.719626168224299, |
|
"grad_norm": 0.7127006649971008, |
|
"learning_rate": 2.180214850745467e-05, |
|
"loss": 0.2908, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.7289719626168224, |
|
"grad_norm": 1.0180916786193848, |
|
"learning_rate": 2.0479881349703883e-05, |
|
"loss": 0.3716, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7383177570093458, |
|
"grad_norm": 0.583429753780365, |
|
"learning_rate": 1.9188576719953633e-05, |
|
"loss": 0.5111, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 0.9300647377967834, |
|
"learning_rate": 1.7929589018443016e-05, |
|
"loss": 0.4941, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7570093457943925, |
|
"grad_norm": 0.5266556143760681, |
|
"learning_rate": 1.6704238749415957e-05, |
|
"loss": 0.2014, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7663551401869159, |
|
"grad_norm": 0.3937390446662903, |
|
"learning_rate": 1.5513811136094787e-05, |
|
"loss": 0.1134, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7757009345794392, |
|
"grad_norm": 0.5176160931587219, |
|
"learning_rate": 1.4359554772658552e-05, |
|
"loss": 0.2048, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7850467289719626, |
|
"grad_norm": 0.49737098813056946, |
|
"learning_rate": 1.3242680314639993e-05, |
|
"loss": 0.1656, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.794392523364486, |
|
"grad_norm": 0.7320502996444702, |
|
"learning_rate": 1.2164359209115234e-05, |
|
"loss": 0.3223, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8037383177570093, |
|
"grad_norm": 0.5976892709732056, |
|
"learning_rate": 1.1125722466017547e-05, |
|
"loss": 0.2132, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.8130841121495327, |
|
"grad_norm": 0.6687475442886353, |
|
"learning_rate": 1.012785947186397e-05, |
|
"loss": 0.2055, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.822429906542056, |
|
"grad_norm": 0.603065013885498, |
|
"learning_rate": 9.171816847139448e-06, |
|
"loss": 0.186, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.8317757009345794, |
|
"grad_norm": 0.5435901880264282, |
|
"learning_rate": 8.25859734853645e-06, |
|
"loss": 0.2109, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.8411214953271028, |
|
"grad_norm": 0.6504529714584351, |
|
"learning_rate": 7.389158817201542e-06, |
|
"loss": 0.2328, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8504672897196262, |
|
"grad_norm": 0.7861918210983276, |
|
"learning_rate": 6.564413174092443e-06, |
|
"loss": 0.2692, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.8598130841121495, |
|
"grad_norm": 0.5888051390647888, |
|
"learning_rate": 5.785225463498828e-06, |
|
"loss": 0.2117, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8691588785046729, |
|
"grad_norm": 0.5872686505317688, |
|
"learning_rate": 5.05241294573024e-06, |
|
"loss": 0.2273, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8785046728971962, |
|
"grad_norm": 0.7006099224090576, |
|
"learning_rate": 4.366744239922998e-06, |
|
"loss": 0.2502, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8878504672897196, |
|
"grad_norm": 0.6123574376106262, |
|
"learning_rate": 3.728938517864794e-06, |
|
"loss": 0.2465, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.897196261682243, |
|
"grad_norm": 0.5895901918411255, |
|
"learning_rate": 3.1396647496828247e-06, |
|
"loss": 0.2473, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.9065420560747663, |
|
"grad_norm": 0.513577938079834, |
|
"learning_rate": 2.5995410021864787e-06, |
|
"loss": 0.1811, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.9158878504672897, |
|
"grad_norm": 0.5878252983093262, |
|
"learning_rate": 2.1091337906006482e-06, |
|
"loss": 0.2349, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.9252336448598131, |
|
"grad_norm": 0.5837914943695068, |
|
"learning_rate": 1.6689574843694433e-06, |
|
"loss": 0.2317, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 0.5625414848327637, |
|
"learning_rate": 1.2794737676536994e-06, |
|
"loss": 0.1619, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"eval_loss": 0.23224526643753052, |
|
"eval_runtime": 14.9082, |
|
"eval_samples_per_second": 12.141, |
|
"eval_steps_per_second": 3.086, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9439252336448598, |
|
"grad_norm": 0.7002161741256714, |
|
"learning_rate": 9.410911550880475e-07, |
|
"loss": 0.2533, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.9532710280373832, |
|
"grad_norm": 0.6093440651893616, |
|
"learning_rate": 6.54164563305465e-07, |
|
"loss": 0.1811, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9626168224299065, |
|
"grad_norm": 0.7323504686355591, |
|
"learning_rate": 4.189949386787462e-07, |
|
"loss": 0.2834, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.9719626168224299, |
|
"grad_norm": 0.701038122177124, |
|
"learning_rate": 2.3582894166930268e-07, |
|
"loss": 0.2602, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9813084112149533, |
|
"grad_norm": 0.44046589732170105, |
|
"learning_rate": 1.0485868811441757e-07, |
|
"loss": 0.2167, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9906542056074766, |
|
"grad_norm": 0.6788187026977539, |
|
"learning_rate": 2.6221547724253337e-08, |
|
"loss": 0.3156, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.561313807964325, |
|
"learning_rate": 0.0, |
|
"loss": 0.2182, |
|
"step": 107 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 107, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7536470352920576e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|