lesso16's picture
Training in progress, step 139, checkpoint
b0acb49 verified
raw
history blame
25.5 kB
{
"best_metric": 1.1512540578842163,
"best_model_checkpoint": "miner_id_24/checkpoint-100",
"epoch": 2.989247311827957,
"eval_steps": 50,
"global_step": 139,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.021505376344086023,
"grad_norm": 0.3019464313983917,
"learning_rate": 1.16e-05,
"loss": 1.4259,
"step": 1
},
{
"epoch": 0.021505376344086023,
"eval_loss": 1.4605212211608887,
"eval_runtime": 3.307,
"eval_samples_per_second": 188.69,
"eval_steps_per_second": 6.048,
"step": 1
},
{
"epoch": 0.043010752688172046,
"grad_norm": 0.3766098916530609,
"learning_rate": 2.32e-05,
"loss": 1.3061,
"step": 2
},
{
"epoch": 0.06451612903225806,
"grad_norm": 0.4208398163318634,
"learning_rate": 3.48e-05,
"loss": 1.2716,
"step": 3
},
{
"epoch": 0.08602150537634409,
"grad_norm": 0.4907033145427704,
"learning_rate": 4.64e-05,
"loss": 1.3698,
"step": 4
},
{
"epoch": 0.10752688172043011,
"grad_norm": 0.5819088816642761,
"learning_rate": 5.8e-05,
"loss": 1.5208,
"step": 5
},
{
"epoch": 0.12903225806451613,
"grad_norm": 0.8909784555435181,
"learning_rate": 6.96e-05,
"loss": 1.6946,
"step": 6
},
{
"epoch": 0.15053763440860216,
"grad_norm": 0.1904004067182541,
"learning_rate": 8.12e-05,
"loss": 1.3806,
"step": 7
},
{
"epoch": 0.17204301075268819,
"grad_norm": 0.2766229510307312,
"learning_rate": 9.28e-05,
"loss": 1.2944,
"step": 8
},
{
"epoch": 0.1935483870967742,
"grad_norm": 0.3707273602485657,
"learning_rate": 0.0001044,
"loss": 1.1893,
"step": 9
},
{
"epoch": 0.21505376344086022,
"grad_norm": 0.4749780595302582,
"learning_rate": 0.000116,
"loss": 1.2153,
"step": 10
},
{
"epoch": 0.23655913978494625,
"grad_norm": 0.5096077919006348,
"learning_rate": 0.00011598280125101809,
"loss": 1.3548,
"step": 11
},
{
"epoch": 0.25806451612903225,
"grad_norm": 0.46667513251304626,
"learning_rate": 0.00011593121520396772,
"loss": 1.4838,
"step": 12
},
{
"epoch": 0.27956989247311825,
"grad_norm": 0.2550894618034363,
"learning_rate": 0.000115845272452486,
"loss": 1.3795,
"step": 13
},
{
"epoch": 0.3010752688172043,
"grad_norm": 0.2862931787967682,
"learning_rate": 0.00011572502396580767,
"loss": 1.2467,
"step": 14
},
{
"epoch": 0.3225806451612903,
"grad_norm": 0.2682102620601654,
"learning_rate": 0.00011557054105853753,
"loss": 1.1907,
"step": 15
},
{
"epoch": 0.34408602150537637,
"grad_norm": 0.27467212080955505,
"learning_rate": 0.0001153819153483564,
"loss": 1.1003,
"step": 16
},
{
"epoch": 0.3655913978494624,
"grad_norm": 0.3005955219268799,
"learning_rate": 0.00011515925870168636,
"loss": 1.2234,
"step": 17
},
{
"epoch": 0.3870967741935484,
"grad_norm": 0.3794748783111572,
"learning_rate": 0.00011490270316734726,
"loss": 1.4082,
"step": 18
},
{
"epoch": 0.40860215053763443,
"grad_norm": 0.21310873329639435,
"learning_rate": 0.00011461240089824378,
"loss": 1.328,
"step": 19
},
{
"epoch": 0.43010752688172044,
"grad_norm": 0.20409537851810455,
"learning_rate": 0.0001142885240611295,
"loss": 1.3031,
"step": 20
},
{
"epoch": 0.45161290322580644,
"grad_norm": 0.22202569246292114,
"learning_rate": 0.0001139312647345018,
"loss": 1.1874,
"step": 21
},
{
"epoch": 0.4731182795698925,
"grad_norm": 0.2386716902256012,
"learning_rate": 0.00011354083479468755,
"loss": 1.1251,
"step": 22
},
{
"epoch": 0.4946236559139785,
"grad_norm": 0.26145026087760925,
"learning_rate": 0.00011311746579018779,
"loss": 1.1777,
"step": 23
},
{
"epoch": 0.5161290322580645,
"grad_norm": 0.30247944593429565,
"learning_rate": 0.00011266140880435544,
"loss": 1.3137,
"step": 24
},
{
"epoch": 0.5376344086021505,
"grad_norm": 0.587131917476654,
"learning_rate": 0.00011217293430648779,
"loss": 1.4008,
"step": 25
},
{
"epoch": 0.5591397849462365,
"grad_norm": 0.14670686423778534,
"learning_rate": 0.00011165233199142182,
"loss": 1.2933,
"step": 26
},
{
"epoch": 0.5806451612903226,
"grad_norm": 0.16923627257347107,
"learning_rate": 0.00011109991060772776,
"loss": 1.1914,
"step": 27
},
{
"epoch": 0.6021505376344086,
"grad_norm": 0.19446203112602234,
"learning_rate": 0.0001105159977746025,
"loss": 1.1251,
"step": 28
},
{
"epoch": 0.6236559139784946,
"grad_norm": 0.22473880648612976,
"learning_rate": 0.00010990093978757173,
"loss": 1.1065,
"step": 29
},
{
"epoch": 0.6451612903225806,
"grad_norm": 0.2743987441062927,
"learning_rate": 0.00010925510141311572,
"loss": 1.2497,
"step": 30
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.3620847761631012,
"learning_rate": 0.00010857886567234085,
"loss": 1.3353,
"step": 31
},
{
"epoch": 0.6881720430107527,
"grad_norm": 0.14775493741035461,
"learning_rate": 0.00010787263361382498,
"loss": 1.2885,
"step": 32
},
{
"epoch": 0.7096774193548387,
"grad_norm": 0.1633865386247635,
"learning_rate": 0.00010713682407577149,
"loss": 1.2385,
"step": 33
},
{
"epoch": 0.7311827956989247,
"grad_norm": 0.1605488508939743,
"learning_rate": 0.00010637187343761291,
"loss": 1.0806,
"step": 34
},
{
"epoch": 0.7526881720430108,
"grad_norm": 0.19970649480819702,
"learning_rate": 0.00010557823536121162,
"loss": 1.1132,
"step": 35
},
{
"epoch": 0.7741935483870968,
"grad_norm": 0.24861781299114227,
"learning_rate": 0.00010475638052181104,
"loss": 1.1757,
"step": 36
},
{
"epoch": 0.7956989247311828,
"grad_norm": 0.37152165174484253,
"learning_rate": 0.00010390679632889674,
"loss": 1.3386,
"step": 37
},
{
"epoch": 0.8172043010752689,
"grad_norm": 0.16140355169773102,
"learning_rate": 0.00010302998663713333,
"loss": 1.3232,
"step": 38
},
{
"epoch": 0.8387096774193549,
"grad_norm": 0.15119719505310059,
"learning_rate": 0.00010212647144754812,
"loss": 1.2435,
"step": 39
},
{
"epoch": 0.8602150537634409,
"grad_norm": 0.15187421441078186,
"learning_rate": 0.00010119678659913935,
"loss": 1.0749,
"step": 40
},
{
"epoch": 0.8817204301075269,
"grad_norm": 0.1807960420846939,
"learning_rate": 0.00010024148345109112,
"loss": 1.0696,
"step": 41
},
{
"epoch": 0.9032258064516129,
"grad_norm": 0.2298922836780548,
"learning_rate": 9.926112855578431e-05,
"loss": 1.1653,
"step": 42
},
{
"epoch": 0.9247311827956989,
"grad_norm": 0.29605555534362793,
"learning_rate": 9.825630332279677e-05,
"loss": 1.235,
"step": 43
},
{
"epoch": 0.946236559139785,
"grad_norm": 0.2054792195558548,
"learning_rate": 9.722760367409236e-05,
"loss": 1.2058,
"step": 44
},
{
"epoch": 0.967741935483871,
"grad_norm": 0.1517435610294342,
"learning_rate": 9.617563969060338e-05,
"loss": 1.1643,
"step": 45
},
{
"epoch": 0.989247311827957,
"grad_norm": 0.2478746473789215,
"learning_rate": 9.51010352504157e-05,
"loss": 1.1552,
"step": 46
},
{
"epoch": 1.010752688172043,
"grad_norm": 0.2573375105857849,
"learning_rate": 9.400442765877141e-05,
"loss": 2.141,
"step": 47
},
{
"epoch": 1.032258064516129,
"grad_norm": 0.12741310894489288,
"learning_rate": 9.288646727010848e-05,
"loss": 1.0358,
"step": 48
},
{
"epoch": 1.053763440860215,
"grad_norm": 0.1643674373626709,
"learning_rate": 9.174781710236128e-05,
"loss": 1.1719,
"step": 49
},
{
"epoch": 1.075268817204301,
"grad_norm": 0.19035093486309052,
"learning_rate": 9.058915244375091e-05,
"loss": 1.0132,
"step": 50
},
{
"epoch": 1.075268817204301,
"eval_loss": 1.1688854694366455,
"eval_runtime": 3.3032,
"eval_samples_per_second": 188.91,
"eval_steps_per_second": 6.055,
"step": 50
},
{
"epoch": 1.096774193548387,
"grad_norm": 0.24456371366977692,
"learning_rate": 8.94111604522987e-05,
"loss": 1.1068,
"step": 51
},
{
"epoch": 1.118279569892473,
"grad_norm": 0.34046271443367004,
"learning_rate": 8.821453974829996e-05,
"loss": 1.2428,
"step": 52
},
{
"epoch": 1.139784946236559,
"grad_norm": 0.247173473238945,
"learning_rate": 8.7e-05,
"loss": 0.9179,
"step": 53
},
{
"epoch": 1.1612903225806452,
"grad_norm": 0.17861323058605194,
"learning_rate": 8.576826150271813e-05,
"loss": 1.3754,
"step": 54
},
{
"epoch": 1.1827956989247312,
"grad_norm": 0.20689214766025543,
"learning_rate": 8.452005475166903e-05,
"loss": 1.2233,
"step": 55
},
{
"epoch": 1.2043010752688172,
"grad_norm": 0.2131056934595108,
"learning_rate": 8.325612000873509e-05,
"loss": 1.0103,
"step": 56
},
{
"epoch": 1.2258064516129032,
"grad_norm": 0.26632529497146606,
"learning_rate": 8.197720686344642e-05,
"loss": 1.0388,
"step": 57
},
{
"epoch": 1.2473118279569892,
"grad_norm": 0.29926690459251404,
"learning_rate": 8.068407378842904e-05,
"loss": 1.1619,
"step": 58
},
{
"epoch": 1.2688172043010753,
"grad_norm": 0.21377074718475342,
"learning_rate": 7.937748768958499e-05,
"loss": 0.548,
"step": 59
},
{
"epoch": 1.2903225806451613,
"grad_norm": 0.215475931763649,
"learning_rate": 7.805822345127066e-05,
"loss": 1.9897,
"step": 60
},
{
"epoch": 1.3118279569892473,
"grad_norm": 0.1549675166606903,
"learning_rate": 7.672706347674388e-05,
"loss": 0.9913,
"step": 61
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.21127307415008545,
"learning_rate": 7.53847972241514e-05,
"loss": 1.0833,
"step": 62
},
{
"epoch": 1.3548387096774195,
"grad_norm": 0.24489474296569824,
"learning_rate": 7.403222073833276e-05,
"loss": 1.0426,
"step": 63
},
{
"epoch": 1.3763440860215055,
"grad_norm": 0.3033023178577423,
"learning_rate": 7.267013617871748e-05,
"loss": 1.1059,
"step": 64
},
{
"epoch": 1.3978494623655915,
"grad_norm": 0.22320985794067383,
"learning_rate": 7.129935134359642e-05,
"loss": 0.6576,
"step": 65
},
{
"epoch": 1.4193548387096775,
"grad_norm": 0.23948603868484497,
"learning_rate": 6.992067919104844e-05,
"loss": 1.8893,
"step": 66
},
{
"epoch": 1.4408602150537635,
"grad_norm": 0.1648971438407898,
"learning_rate": 6.85349373568073e-05,
"loss": 0.9948,
"step": 67
},
{
"epoch": 1.4623655913978495,
"grad_norm": 0.20279648900032043,
"learning_rate": 6.714294766935446e-05,
"loss": 1.0688,
"step": 68
},
{
"epoch": 1.4838709677419355,
"grad_norm": 0.25218087434768677,
"learning_rate": 6.574553566252508e-05,
"loss": 1.0871,
"step": 69
},
{
"epoch": 1.5053763440860215,
"grad_norm": 0.2963137626647949,
"learning_rate": 6.434353008591673e-05,
"loss": 1.0764,
"step": 70
},
{
"epoch": 1.5268817204301075,
"grad_norm": 0.41593724489212036,
"learning_rate": 6.293776241339087e-05,
"loss": 1.2876,
"step": 71
},
{
"epoch": 1.5483870967741935,
"grad_norm": 0.3219810426235199,
"learning_rate": 6.152906634995881e-05,
"loss": 1.1358,
"step": 72
},
{
"epoch": 1.5698924731182795,
"grad_norm": 0.14590708911418915,
"learning_rate": 6.011827733734423e-05,
"loss": 1.0,
"step": 73
},
{
"epoch": 1.5913978494623655,
"grad_norm": 0.18871888518333435,
"learning_rate": 5.870623205851586e-05,
"loss": 1.2011,
"step": 74
},
{
"epoch": 1.6129032258064515,
"grad_norm": 0.20429526269435883,
"learning_rate": 5.729376794148415e-05,
"loss": 1.0139,
"step": 75
},
{
"epoch": 1.6344086021505375,
"grad_norm": 0.2530740201473236,
"learning_rate": 5.588172266265578e-05,
"loss": 1.053,
"step": 76
},
{
"epoch": 1.6559139784946235,
"grad_norm": 0.3459080755710602,
"learning_rate": 5.4470933650041196e-05,
"loss": 1.1392,
"step": 77
},
{
"epoch": 1.6774193548387095,
"grad_norm": 0.2594045102596283,
"learning_rate": 5.3062237586609127e-05,
"loss": 0.8978,
"step": 78
},
{
"epoch": 1.6989247311827957,
"grad_norm": 0.1727474331855774,
"learning_rate": 5.16564699140833e-05,
"loss": 1.4071,
"step": 79
},
{
"epoch": 1.7204301075268817,
"grad_norm": 0.17758332192897797,
"learning_rate": 5.025446433747493e-05,
"loss": 1.1052,
"step": 80
},
{
"epoch": 1.7419354838709677,
"grad_norm": 0.19625383615493774,
"learning_rate": 4.885705233064554e-05,
"loss": 1.012,
"step": 81
},
{
"epoch": 1.7634408602150538,
"grad_norm": 0.25267520546913147,
"learning_rate": 4.746506264319269e-05,
"loss": 1.0759,
"step": 82
},
{
"epoch": 1.7849462365591398,
"grad_norm": 0.31696927547454834,
"learning_rate": 4.6079320808951565e-05,
"loss": 1.1305,
"step": 83
},
{
"epoch": 1.8064516129032258,
"grad_norm": 0.22307011485099792,
"learning_rate": 4.470064865640358e-05,
"loss": 0.5376,
"step": 84
},
{
"epoch": 1.827956989247312,
"grad_norm": 0.2187909334897995,
"learning_rate": 4.3329863821282514e-05,
"loss": 1.8596,
"step": 85
},
{
"epoch": 1.849462365591398,
"grad_norm": 0.16456525027751923,
"learning_rate": 4.1967779261667245e-05,
"loss": 1.0951,
"step": 86
},
{
"epoch": 1.870967741935484,
"grad_norm": 0.19617106020450592,
"learning_rate": 4.06152027758486e-05,
"loss": 1.0345,
"step": 87
},
{
"epoch": 1.89247311827957,
"grad_norm": 0.23399649560451508,
"learning_rate": 3.9272936523256134e-05,
"loss": 1.0037,
"step": 88
},
{
"epoch": 1.913978494623656,
"grad_norm": 0.3092529773712158,
"learning_rate": 3.794177654872934e-05,
"loss": 1.1699,
"step": 89
},
{
"epoch": 1.935483870967742,
"grad_norm": 0.23798301815986633,
"learning_rate": 3.662251231041502e-05,
"loss": 0.5823,
"step": 90
},
{
"epoch": 1.956989247311828,
"grad_norm": 0.250249445438385,
"learning_rate": 3.531592621157096e-05,
"loss": 1.6714,
"step": 91
},
{
"epoch": 1.978494623655914,
"grad_norm": 0.2460961937904358,
"learning_rate": 3.402279313655359e-05,
"loss": 1.2035,
"step": 92
},
{
"epoch": 2.0,
"grad_norm": 0.3106490969657898,
"learning_rate": 3.274387999126492e-05,
"loss": 1.5907,
"step": 93
},
{
"epoch": 2.021505376344086,
"grad_norm": 0.14660653471946716,
"learning_rate": 3.1479945248330964e-05,
"loss": 1.2345,
"step": 94
},
{
"epoch": 2.043010752688172,
"grad_norm": 0.17234809696674347,
"learning_rate": 3.023173849728189e-05,
"loss": 1.1059,
"step": 95
},
{
"epoch": 2.064516129032258,
"grad_norm": 0.18329447507858276,
"learning_rate": 2.9000000000000014e-05,
"loss": 0.9879,
"step": 96
},
{
"epoch": 2.086021505376344,
"grad_norm": 0.222031369805336,
"learning_rate": 2.7785460251700053e-05,
"loss": 0.9824,
"step": 97
},
{
"epoch": 2.10752688172043,
"grad_norm": 0.284976065158844,
"learning_rate": 2.6588839547701294e-05,
"loss": 1.0231,
"step": 98
},
{
"epoch": 2.129032258064516,
"grad_norm": 0.3977857828140259,
"learning_rate": 2.541084755624909e-05,
"loss": 1.0588,
"step": 99
},
{
"epoch": 2.150537634408602,
"grad_norm": 0.1437109261751175,
"learning_rate": 2.4252182897638746e-05,
"loss": 1.1997,
"step": 100
},
{
"epoch": 2.150537634408602,
"eval_loss": 1.1512540578842163,
"eval_runtime": 3.6809,
"eval_samples_per_second": 169.524,
"eval_steps_per_second": 5.433,
"step": 100
},
{
"epoch": 2.172043010752688,
"grad_norm": 0.17709468305110931,
"learning_rate": 2.3113532729891522e-05,
"loss": 1.1533,
"step": 101
},
{
"epoch": 2.193548387096774,
"grad_norm": 0.1920265406370163,
"learning_rate": 2.1995572341228588e-05,
"loss": 1.0311,
"step": 102
},
{
"epoch": 2.21505376344086,
"grad_norm": 0.2193988710641861,
"learning_rate": 2.089896474958432e-05,
"loss": 0.9886,
"step": 103
},
{
"epoch": 2.236559139784946,
"grad_norm": 0.2702076733112335,
"learning_rate": 1.9824360309396626e-05,
"loss": 1.0325,
"step": 104
},
{
"epoch": 2.258064516129032,
"grad_norm": 0.36857450008392334,
"learning_rate": 1.877239632590764e-05,
"loss": 1.0513,
"step": 105
},
{
"epoch": 2.279569892473118,
"grad_norm": 0.1756919026374817,
"learning_rate": 1.774369667720323e-05,
"loss": 1.2273,
"step": 106
},
{
"epoch": 2.3010752688172045,
"grad_norm": 0.17812420427799225,
"learning_rate": 1.67388714442157e-05,
"loss": 1.1593,
"step": 107
},
{
"epoch": 2.3225806451612905,
"grad_norm": 0.18077994883060455,
"learning_rate": 1.575851654890888e-05,
"loss": 1.0257,
"step": 108
},
{
"epoch": 2.3440860215053765,
"grad_norm": 0.21289852261543274,
"learning_rate": 1.4803213400860651e-05,
"loss": 0.9742,
"step": 109
},
{
"epoch": 2.3655913978494625,
"grad_norm": 0.2521963119506836,
"learning_rate": 1.3873528552451873e-05,
"loss": 0.9653,
"step": 110
},
{
"epoch": 2.3870967741935485,
"grad_norm": 0.32557451725006104,
"learning_rate": 1.2970013362866697e-05,
"loss": 1.013,
"step": 111
},
{
"epoch": 2.4086021505376345,
"grad_norm": 0.2451455295085907,
"learning_rate": 1.2093203671103267e-05,
"loss": 1.2257,
"step": 112
},
{
"epoch": 2.4301075268817205,
"grad_norm": 0.1599583923816681,
"learning_rate": 1.1243619478188961e-05,
"loss": 1.1622,
"step": 113
},
{
"epoch": 2.4516129032258065,
"grad_norm": 0.17558430135250092,
"learning_rate": 1.0421764638788365e-05,
"loss": 1.0423,
"step": 114
},
{
"epoch": 2.4731182795698925,
"grad_norm": 0.20616887509822845,
"learning_rate": 9.628126562387086e-06,
"loss": 0.9993,
"step": 115
},
{
"epoch": 2.4946236559139785,
"grad_norm": 0.24216623604297638,
"learning_rate": 8.863175924228501e-06,
"loss": 0.9776,
"step": 116
},
{
"epoch": 2.5161290322580645,
"grad_norm": 0.3300863802433014,
"learning_rate": 8.127366386175014e-06,
"loss": 1.054,
"step": 117
},
{
"epoch": 2.5376344086021505,
"grad_norm": 0.5971299409866333,
"learning_rate": 7.421134327659152e-06,
"loss": 1.0757,
"step": 118
},
{
"epoch": 2.5591397849462365,
"grad_norm": 0.14677409827709198,
"learning_rate": 6.744898586884296e-06,
"loss": 1.2074,
"step": 119
},
{
"epoch": 2.5806451612903225,
"grad_norm": 0.17917855083942413,
"learning_rate": 6.099060212428274e-06,
"loss": 1.0735,
"step": 120
},
{
"epoch": 2.6021505376344085,
"grad_norm": 0.2020336389541626,
"learning_rate": 5.484002225397496e-06,
"loss": 0.9547,
"step": 121
},
{
"epoch": 2.6236559139784945,
"grad_norm": 0.23973648250102997,
"learning_rate": 4.900089392272253e-06,
"loss": 0.9674,
"step": 122
},
{
"epoch": 2.6451612903225805,
"grad_norm": 0.2944413423538208,
"learning_rate": 4.347668008578187e-06,
"loss": 1.0609,
"step": 123
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.40398478507995605,
"learning_rate": 3.8270656935122204e-06,
"loss": 1.0409,
"step": 124
},
{
"epoch": 2.688172043010753,
"grad_norm": 0.15678246319293976,
"learning_rate": 3.3385911956445625e-06,
"loss": 1.2516,
"step": 125
},
{
"epoch": 2.709677419354839,
"grad_norm": 0.18234221637248993,
"learning_rate": 2.8825342098122193e-06,
"loss": 1.1226,
"step": 126
},
{
"epoch": 2.731182795698925,
"grad_norm": 0.19632849097251892,
"learning_rate": 2.4591652053124607e-06,
"loss": 1.0385,
"step": 127
},
{
"epoch": 2.752688172043011,
"grad_norm": 0.22743487358093262,
"learning_rate": 2.068735265498204e-06,
"loss": 0.9646,
"step": 128
},
{
"epoch": 2.774193548387097,
"grad_norm": 0.28680744767189026,
"learning_rate": 1.711475938870494e-06,
"loss": 0.9698,
"step": 129
},
{
"epoch": 2.795698924731183,
"grad_norm": 0.37023359537124634,
"learning_rate": 1.3875991017562305e-06,
"loss": 1.0446,
"step": 130
},
{
"epoch": 2.817204301075269,
"grad_norm": 0.17839553952217102,
"learning_rate": 1.0972968326527323e-06,
"loss": 1.2334,
"step": 131
},
{
"epoch": 2.838709677419355,
"grad_norm": 0.17378120124340057,
"learning_rate": 8.407412983136427e-07,
"loss": 1.1619,
"step": 132
},
{
"epoch": 2.860215053763441,
"grad_norm": 0.18641377985477448,
"learning_rate": 6.180846516436054e-07,
"loss": 1.0182,
"step": 133
},
{
"epoch": 2.881720430107527,
"grad_norm": 0.2144346982240677,
"learning_rate": 4.294589414624692e-07,
"loss": 0.9406,
"step": 134
},
{
"epoch": 2.903225806451613,
"grad_norm": 0.27374863624572754,
"learning_rate": 2.7497603419232487e-07,
"loss": 1.031,
"step": 135
},
{
"epoch": 2.924731182795699,
"grad_norm": 0.3471708297729492,
"learning_rate": 1.5472754751400464e-07,
"loss": 1.0266,
"step": 136
},
{
"epoch": 2.946236559139785,
"grad_norm": 0.24548716843128204,
"learning_rate": 6.878479603226562e-08,
"loss": 1.1736,
"step": 137
},
{
"epoch": 2.967741935483871,
"grad_norm": 0.18722181022167206,
"learning_rate": 1.71987489819172e-08,
"loss": 1.0664,
"step": 138
},
{
"epoch": 2.989247311827957,
"grad_norm": 0.27170056104660034,
"learning_rate": 0.0,
"loss": 0.9598,
"step": 139
}
],
"logging_steps": 1,
"max_steps": 139,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.224234711308042e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}