|
{ |
|
"best_metric": 1.1512540578842163, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 2.989247311827957, |
|
"eval_steps": 50, |
|
"global_step": 139, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.021505376344086023, |
|
"grad_norm": 0.3019464313983917, |
|
"learning_rate": 1.16e-05, |
|
"loss": 1.4259, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.021505376344086023, |
|
"eval_loss": 1.4605212211608887, |
|
"eval_runtime": 3.307, |
|
"eval_samples_per_second": 188.69, |
|
"eval_steps_per_second": 6.048, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.043010752688172046, |
|
"grad_norm": 0.3766098916530609, |
|
"learning_rate": 2.32e-05, |
|
"loss": 1.3061, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 0.4208398163318634, |
|
"learning_rate": 3.48e-05, |
|
"loss": 1.2716, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.08602150537634409, |
|
"grad_norm": 0.4907033145427704, |
|
"learning_rate": 4.64e-05, |
|
"loss": 1.3698, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.10752688172043011, |
|
"grad_norm": 0.5819088816642761, |
|
"learning_rate": 5.8e-05, |
|
"loss": 1.5208, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 0.8909784555435181, |
|
"learning_rate": 6.96e-05, |
|
"loss": 1.6946, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.15053763440860216, |
|
"grad_norm": 0.1904004067182541, |
|
"learning_rate": 8.12e-05, |
|
"loss": 1.3806, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.17204301075268819, |
|
"grad_norm": 0.2766229510307312, |
|
"learning_rate": 9.28e-05, |
|
"loss": 1.2944, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 0.3707273602485657, |
|
"learning_rate": 0.0001044, |
|
"loss": 1.1893, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.21505376344086022, |
|
"grad_norm": 0.4749780595302582, |
|
"learning_rate": 0.000116, |
|
"loss": 1.2153, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23655913978494625, |
|
"grad_norm": 0.5096077919006348, |
|
"learning_rate": 0.00011598280125101809, |
|
"loss": 1.3548, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 0.46667513251304626, |
|
"learning_rate": 0.00011593121520396772, |
|
"loss": 1.4838, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.27956989247311825, |
|
"grad_norm": 0.2550894618034363, |
|
"learning_rate": 0.000115845272452486, |
|
"loss": 1.3795, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.3010752688172043, |
|
"grad_norm": 0.2862931787967682, |
|
"learning_rate": 0.00011572502396580767, |
|
"loss": 1.2467, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 0.2682102620601654, |
|
"learning_rate": 0.00011557054105853753, |
|
"loss": 1.1907, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.34408602150537637, |
|
"grad_norm": 0.27467212080955505, |
|
"learning_rate": 0.0001153819153483564, |
|
"loss": 1.1003, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3655913978494624, |
|
"grad_norm": 0.3005955219268799, |
|
"learning_rate": 0.00011515925870168636, |
|
"loss": 1.2234, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3870967741935484, |
|
"grad_norm": 0.3794748783111572, |
|
"learning_rate": 0.00011490270316734726, |
|
"loss": 1.4082, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.40860215053763443, |
|
"grad_norm": 0.21310873329639435, |
|
"learning_rate": 0.00011461240089824378, |
|
"loss": 1.328, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.43010752688172044, |
|
"grad_norm": 0.20409537851810455, |
|
"learning_rate": 0.0001142885240611295, |
|
"loss": 1.3031, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45161290322580644, |
|
"grad_norm": 0.22202569246292114, |
|
"learning_rate": 0.0001139312647345018, |
|
"loss": 1.1874, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4731182795698925, |
|
"grad_norm": 0.2386716902256012, |
|
"learning_rate": 0.00011354083479468755, |
|
"loss": 1.1251, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4946236559139785, |
|
"grad_norm": 0.26145026087760925, |
|
"learning_rate": 0.00011311746579018779, |
|
"loss": 1.1777, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5161290322580645, |
|
"grad_norm": 0.30247944593429565, |
|
"learning_rate": 0.00011266140880435544, |
|
"loss": 1.3137, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5376344086021505, |
|
"grad_norm": 0.587131917476654, |
|
"learning_rate": 0.00011217293430648779, |
|
"loss": 1.4008, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5591397849462365, |
|
"grad_norm": 0.14670686423778534, |
|
"learning_rate": 0.00011165233199142182, |
|
"loss": 1.2933, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.5806451612903226, |
|
"grad_norm": 0.16923627257347107, |
|
"learning_rate": 0.00011109991060772776, |
|
"loss": 1.1914, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6021505376344086, |
|
"grad_norm": 0.19446203112602234, |
|
"learning_rate": 0.0001105159977746025, |
|
"loss": 1.1251, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6236559139784946, |
|
"grad_norm": 0.22473880648612976, |
|
"learning_rate": 0.00010990093978757173, |
|
"loss": 1.1065, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 0.2743987441062927, |
|
"learning_rate": 0.00010925510141311572, |
|
"loss": 1.2497, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.3620847761631012, |
|
"learning_rate": 0.00010857886567234085, |
|
"loss": 1.3353, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6881720430107527, |
|
"grad_norm": 0.14775493741035461, |
|
"learning_rate": 0.00010787263361382498, |
|
"loss": 1.2885, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7096774193548387, |
|
"grad_norm": 0.1633865386247635, |
|
"learning_rate": 0.00010713682407577149, |
|
"loss": 1.2385, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7311827956989247, |
|
"grad_norm": 0.1605488508939743, |
|
"learning_rate": 0.00010637187343761291, |
|
"loss": 1.0806, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.7526881720430108, |
|
"grad_norm": 0.19970649480819702, |
|
"learning_rate": 0.00010557823536121162, |
|
"loss": 1.1132, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7741935483870968, |
|
"grad_norm": 0.24861781299114227, |
|
"learning_rate": 0.00010475638052181104, |
|
"loss": 1.1757, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7956989247311828, |
|
"grad_norm": 0.37152165174484253, |
|
"learning_rate": 0.00010390679632889674, |
|
"loss": 1.3386, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8172043010752689, |
|
"grad_norm": 0.16140355169773102, |
|
"learning_rate": 0.00010302998663713333, |
|
"loss": 1.3232, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8387096774193549, |
|
"grad_norm": 0.15119719505310059, |
|
"learning_rate": 0.00010212647144754812, |
|
"loss": 1.2435, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.8602150537634409, |
|
"grad_norm": 0.15187421441078186, |
|
"learning_rate": 0.00010119678659913935, |
|
"loss": 1.0749, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8817204301075269, |
|
"grad_norm": 0.1807960420846939, |
|
"learning_rate": 0.00010024148345109112, |
|
"loss": 1.0696, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.9032258064516129, |
|
"grad_norm": 0.2298922836780548, |
|
"learning_rate": 9.926112855578431e-05, |
|
"loss": 1.1653, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9247311827956989, |
|
"grad_norm": 0.29605555534362793, |
|
"learning_rate": 9.825630332279677e-05, |
|
"loss": 1.235, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.946236559139785, |
|
"grad_norm": 0.2054792195558548, |
|
"learning_rate": 9.722760367409236e-05, |
|
"loss": 1.2058, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 0.1517435610294342, |
|
"learning_rate": 9.617563969060338e-05, |
|
"loss": 1.1643, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.989247311827957, |
|
"grad_norm": 0.2478746473789215, |
|
"learning_rate": 9.51010352504157e-05, |
|
"loss": 1.1552, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.010752688172043, |
|
"grad_norm": 0.2573375105857849, |
|
"learning_rate": 9.400442765877141e-05, |
|
"loss": 2.141, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.032258064516129, |
|
"grad_norm": 0.12741310894489288, |
|
"learning_rate": 9.288646727010848e-05, |
|
"loss": 1.0358, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.053763440860215, |
|
"grad_norm": 0.1643674373626709, |
|
"learning_rate": 9.174781710236128e-05, |
|
"loss": 1.1719, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.075268817204301, |
|
"grad_norm": 0.19035093486309052, |
|
"learning_rate": 9.058915244375091e-05, |
|
"loss": 1.0132, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.075268817204301, |
|
"eval_loss": 1.1688854694366455, |
|
"eval_runtime": 3.3032, |
|
"eval_samples_per_second": 188.91, |
|
"eval_steps_per_second": 6.055, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.096774193548387, |
|
"grad_norm": 0.24456371366977692, |
|
"learning_rate": 8.94111604522987e-05, |
|
"loss": 1.1068, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.118279569892473, |
|
"grad_norm": 0.34046271443367004, |
|
"learning_rate": 8.821453974829996e-05, |
|
"loss": 1.2428, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.139784946236559, |
|
"grad_norm": 0.247173473238945, |
|
"learning_rate": 8.7e-05, |
|
"loss": 0.9179, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.1612903225806452, |
|
"grad_norm": 0.17861323058605194, |
|
"learning_rate": 8.576826150271813e-05, |
|
"loss": 1.3754, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.1827956989247312, |
|
"grad_norm": 0.20689214766025543, |
|
"learning_rate": 8.452005475166903e-05, |
|
"loss": 1.2233, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.2043010752688172, |
|
"grad_norm": 0.2131056934595108, |
|
"learning_rate": 8.325612000873509e-05, |
|
"loss": 1.0103, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.2258064516129032, |
|
"grad_norm": 0.26632529497146606, |
|
"learning_rate": 8.197720686344642e-05, |
|
"loss": 1.0388, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.2473118279569892, |
|
"grad_norm": 0.29926690459251404, |
|
"learning_rate": 8.068407378842904e-05, |
|
"loss": 1.1619, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.2688172043010753, |
|
"grad_norm": 0.21377074718475342, |
|
"learning_rate": 7.937748768958499e-05, |
|
"loss": 0.548, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": 0.215475931763649, |
|
"learning_rate": 7.805822345127066e-05, |
|
"loss": 1.9897, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3118279569892473, |
|
"grad_norm": 0.1549675166606903, |
|
"learning_rate": 7.672706347674388e-05, |
|
"loss": 0.9913, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.21127307415008545, |
|
"learning_rate": 7.53847972241514e-05, |
|
"loss": 1.0833, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.3548387096774195, |
|
"grad_norm": 0.24489474296569824, |
|
"learning_rate": 7.403222073833276e-05, |
|
"loss": 1.0426, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.3763440860215055, |
|
"grad_norm": 0.3033023178577423, |
|
"learning_rate": 7.267013617871748e-05, |
|
"loss": 1.1059, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.3978494623655915, |
|
"grad_norm": 0.22320985794067383, |
|
"learning_rate": 7.129935134359642e-05, |
|
"loss": 0.6576, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.4193548387096775, |
|
"grad_norm": 0.23948603868484497, |
|
"learning_rate": 6.992067919104844e-05, |
|
"loss": 1.8893, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.4408602150537635, |
|
"grad_norm": 0.1648971438407898, |
|
"learning_rate": 6.85349373568073e-05, |
|
"loss": 0.9948, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.4623655913978495, |
|
"grad_norm": 0.20279648900032043, |
|
"learning_rate": 6.714294766935446e-05, |
|
"loss": 1.0688, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.4838709677419355, |
|
"grad_norm": 0.25218087434768677, |
|
"learning_rate": 6.574553566252508e-05, |
|
"loss": 1.0871, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.5053763440860215, |
|
"grad_norm": 0.2963137626647949, |
|
"learning_rate": 6.434353008591673e-05, |
|
"loss": 1.0764, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5268817204301075, |
|
"grad_norm": 0.41593724489212036, |
|
"learning_rate": 6.293776241339087e-05, |
|
"loss": 1.2876, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.5483870967741935, |
|
"grad_norm": 0.3219810426235199, |
|
"learning_rate": 6.152906634995881e-05, |
|
"loss": 1.1358, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.5698924731182795, |
|
"grad_norm": 0.14590708911418915, |
|
"learning_rate": 6.011827733734423e-05, |
|
"loss": 1.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.5913978494623655, |
|
"grad_norm": 0.18871888518333435, |
|
"learning_rate": 5.870623205851586e-05, |
|
"loss": 1.2011, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.6129032258064515, |
|
"grad_norm": 0.20429526269435883, |
|
"learning_rate": 5.729376794148415e-05, |
|
"loss": 1.0139, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.6344086021505375, |
|
"grad_norm": 0.2530740201473236, |
|
"learning_rate": 5.588172266265578e-05, |
|
"loss": 1.053, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.6559139784946235, |
|
"grad_norm": 0.3459080755710602, |
|
"learning_rate": 5.4470933650041196e-05, |
|
"loss": 1.1392, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.6774193548387095, |
|
"grad_norm": 0.2594045102596283, |
|
"learning_rate": 5.3062237586609127e-05, |
|
"loss": 0.8978, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.6989247311827957, |
|
"grad_norm": 0.1727474331855774, |
|
"learning_rate": 5.16564699140833e-05, |
|
"loss": 1.4071, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.7204301075268817, |
|
"grad_norm": 0.17758332192897797, |
|
"learning_rate": 5.025446433747493e-05, |
|
"loss": 1.1052, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7419354838709677, |
|
"grad_norm": 0.19625383615493774, |
|
"learning_rate": 4.885705233064554e-05, |
|
"loss": 1.012, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.7634408602150538, |
|
"grad_norm": 0.25267520546913147, |
|
"learning_rate": 4.746506264319269e-05, |
|
"loss": 1.0759, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.7849462365591398, |
|
"grad_norm": 0.31696927547454834, |
|
"learning_rate": 4.6079320808951565e-05, |
|
"loss": 1.1305, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.8064516129032258, |
|
"grad_norm": 0.22307011485099792, |
|
"learning_rate": 4.470064865640358e-05, |
|
"loss": 0.5376, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.827956989247312, |
|
"grad_norm": 0.2187909334897995, |
|
"learning_rate": 4.3329863821282514e-05, |
|
"loss": 1.8596, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.849462365591398, |
|
"grad_norm": 0.16456525027751923, |
|
"learning_rate": 4.1967779261667245e-05, |
|
"loss": 1.0951, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.870967741935484, |
|
"grad_norm": 0.19617106020450592, |
|
"learning_rate": 4.06152027758486e-05, |
|
"loss": 1.0345, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.89247311827957, |
|
"grad_norm": 0.23399649560451508, |
|
"learning_rate": 3.9272936523256134e-05, |
|
"loss": 1.0037, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.913978494623656, |
|
"grad_norm": 0.3092529773712158, |
|
"learning_rate": 3.794177654872934e-05, |
|
"loss": 1.1699, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.935483870967742, |
|
"grad_norm": 0.23798301815986633, |
|
"learning_rate": 3.662251231041502e-05, |
|
"loss": 0.5823, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.956989247311828, |
|
"grad_norm": 0.250249445438385, |
|
"learning_rate": 3.531592621157096e-05, |
|
"loss": 1.6714, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.978494623655914, |
|
"grad_norm": 0.2460961937904358, |
|
"learning_rate": 3.402279313655359e-05, |
|
"loss": 1.2035, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.3106490969657898, |
|
"learning_rate": 3.274387999126492e-05, |
|
"loss": 1.5907, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.021505376344086, |
|
"grad_norm": 0.14660653471946716, |
|
"learning_rate": 3.1479945248330964e-05, |
|
"loss": 1.2345, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.043010752688172, |
|
"grad_norm": 0.17234809696674347, |
|
"learning_rate": 3.023173849728189e-05, |
|
"loss": 1.1059, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.064516129032258, |
|
"grad_norm": 0.18329447507858276, |
|
"learning_rate": 2.9000000000000014e-05, |
|
"loss": 0.9879, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.086021505376344, |
|
"grad_norm": 0.222031369805336, |
|
"learning_rate": 2.7785460251700053e-05, |
|
"loss": 0.9824, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.10752688172043, |
|
"grad_norm": 0.284976065158844, |
|
"learning_rate": 2.6588839547701294e-05, |
|
"loss": 1.0231, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.129032258064516, |
|
"grad_norm": 0.3977857828140259, |
|
"learning_rate": 2.541084755624909e-05, |
|
"loss": 1.0588, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.150537634408602, |
|
"grad_norm": 0.1437109261751175, |
|
"learning_rate": 2.4252182897638746e-05, |
|
"loss": 1.1997, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.150537634408602, |
|
"eval_loss": 1.1512540578842163, |
|
"eval_runtime": 3.6809, |
|
"eval_samples_per_second": 169.524, |
|
"eval_steps_per_second": 5.433, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.172043010752688, |
|
"grad_norm": 0.17709468305110931, |
|
"learning_rate": 2.3113532729891522e-05, |
|
"loss": 1.1533, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.193548387096774, |
|
"grad_norm": 0.1920265406370163, |
|
"learning_rate": 2.1995572341228588e-05, |
|
"loss": 1.0311, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.21505376344086, |
|
"grad_norm": 0.2193988710641861, |
|
"learning_rate": 2.089896474958432e-05, |
|
"loss": 0.9886, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.236559139784946, |
|
"grad_norm": 0.2702076733112335, |
|
"learning_rate": 1.9824360309396626e-05, |
|
"loss": 1.0325, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.258064516129032, |
|
"grad_norm": 0.36857450008392334, |
|
"learning_rate": 1.877239632590764e-05, |
|
"loss": 1.0513, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.279569892473118, |
|
"grad_norm": 0.1756919026374817, |
|
"learning_rate": 1.774369667720323e-05, |
|
"loss": 1.2273, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.3010752688172045, |
|
"grad_norm": 0.17812420427799225, |
|
"learning_rate": 1.67388714442157e-05, |
|
"loss": 1.1593, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.3225806451612905, |
|
"grad_norm": 0.18077994883060455, |
|
"learning_rate": 1.575851654890888e-05, |
|
"loss": 1.0257, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.3440860215053765, |
|
"grad_norm": 0.21289852261543274, |
|
"learning_rate": 1.4803213400860651e-05, |
|
"loss": 0.9742, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.3655913978494625, |
|
"grad_norm": 0.2521963119506836, |
|
"learning_rate": 1.3873528552451873e-05, |
|
"loss": 0.9653, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.3870967741935485, |
|
"grad_norm": 0.32557451725006104, |
|
"learning_rate": 1.2970013362866697e-05, |
|
"loss": 1.013, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.4086021505376345, |
|
"grad_norm": 0.2451455295085907, |
|
"learning_rate": 1.2093203671103267e-05, |
|
"loss": 1.2257, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.4301075268817205, |
|
"grad_norm": 0.1599583923816681, |
|
"learning_rate": 1.1243619478188961e-05, |
|
"loss": 1.1622, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.4516129032258065, |
|
"grad_norm": 0.17558430135250092, |
|
"learning_rate": 1.0421764638788365e-05, |
|
"loss": 1.0423, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.4731182795698925, |
|
"grad_norm": 0.20616887509822845, |
|
"learning_rate": 9.628126562387086e-06, |
|
"loss": 0.9993, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.4946236559139785, |
|
"grad_norm": 0.24216623604297638, |
|
"learning_rate": 8.863175924228501e-06, |
|
"loss": 0.9776, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.5161290322580645, |
|
"grad_norm": 0.3300863802433014, |
|
"learning_rate": 8.127366386175014e-06, |
|
"loss": 1.054, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.5376344086021505, |
|
"grad_norm": 0.5971299409866333, |
|
"learning_rate": 7.421134327659152e-06, |
|
"loss": 1.0757, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.5591397849462365, |
|
"grad_norm": 0.14677409827709198, |
|
"learning_rate": 6.744898586884296e-06, |
|
"loss": 1.2074, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.5806451612903225, |
|
"grad_norm": 0.17917855083942413, |
|
"learning_rate": 6.099060212428274e-06, |
|
"loss": 1.0735, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.6021505376344085, |
|
"grad_norm": 0.2020336389541626, |
|
"learning_rate": 5.484002225397496e-06, |
|
"loss": 0.9547, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.6236559139784945, |
|
"grad_norm": 0.23973648250102997, |
|
"learning_rate": 4.900089392272253e-06, |
|
"loss": 0.9674, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.6451612903225805, |
|
"grad_norm": 0.2944413423538208, |
|
"learning_rate": 4.347668008578187e-06, |
|
"loss": 1.0609, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.40398478507995605, |
|
"learning_rate": 3.8270656935122204e-06, |
|
"loss": 1.0409, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.688172043010753, |
|
"grad_norm": 0.15678246319293976, |
|
"learning_rate": 3.3385911956445625e-06, |
|
"loss": 1.2516, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.709677419354839, |
|
"grad_norm": 0.18234221637248993, |
|
"learning_rate": 2.8825342098122193e-06, |
|
"loss": 1.1226, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.731182795698925, |
|
"grad_norm": 0.19632849097251892, |
|
"learning_rate": 2.4591652053124607e-06, |
|
"loss": 1.0385, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.752688172043011, |
|
"grad_norm": 0.22743487358093262, |
|
"learning_rate": 2.068735265498204e-06, |
|
"loss": 0.9646, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.774193548387097, |
|
"grad_norm": 0.28680744767189026, |
|
"learning_rate": 1.711475938870494e-06, |
|
"loss": 0.9698, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.795698924731183, |
|
"grad_norm": 0.37023359537124634, |
|
"learning_rate": 1.3875991017562305e-06, |
|
"loss": 1.0446, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.817204301075269, |
|
"grad_norm": 0.17839553952217102, |
|
"learning_rate": 1.0972968326527323e-06, |
|
"loss": 1.2334, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.838709677419355, |
|
"grad_norm": 0.17378120124340057, |
|
"learning_rate": 8.407412983136427e-07, |
|
"loss": 1.1619, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.860215053763441, |
|
"grad_norm": 0.18641377985477448, |
|
"learning_rate": 6.180846516436054e-07, |
|
"loss": 1.0182, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.881720430107527, |
|
"grad_norm": 0.2144346982240677, |
|
"learning_rate": 4.294589414624692e-07, |
|
"loss": 0.9406, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.903225806451613, |
|
"grad_norm": 0.27374863624572754, |
|
"learning_rate": 2.7497603419232487e-07, |
|
"loss": 1.031, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.924731182795699, |
|
"grad_norm": 0.3471708297729492, |
|
"learning_rate": 1.5472754751400464e-07, |
|
"loss": 1.0266, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.946236559139785, |
|
"grad_norm": 0.24548716843128204, |
|
"learning_rate": 6.878479603226562e-08, |
|
"loss": 1.1736, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.967741935483871, |
|
"grad_norm": 0.18722181022167206, |
|
"learning_rate": 1.71987489819172e-08, |
|
"loss": 1.0664, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.989247311827957, |
|
"grad_norm": 0.27170056104660034, |
|
"learning_rate": 0.0, |
|
"loss": 0.9598, |
|
"step": 139 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 139, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.224234711308042e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|