|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.030557677616501147, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00030557677616501144, |
|
"grad_norm": 2.4270410537719727, |
|
"learning_rate": 2e-05, |
|
"loss": 4.3691, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00030557677616501144, |
|
"eval_loss": 3.825514316558838, |
|
"eval_runtime": 44.62, |
|
"eval_samples_per_second": 30.883, |
|
"eval_steps_per_second": 15.442, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006111535523300229, |
|
"grad_norm": 1.158055305480957, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9265, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0009167303284950344, |
|
"grad_norm": 1.5009372234344482, |
|
"learning_rate": 6e-05, |
|
"loss": 4.1948, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0012223071046600458, |
|
"grad_norm": 1.4214242696762085, |
|
"learning_rate": 8e-05, |
|
"loss": 3.1941, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0015278838808250573, |
|
"grad_norm": 1.7152305841445923, |
|
"learning_rate": 0.0001, |
|
"loss": 3.3993, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0018334606569900688, |
|
"grad_norm": 1.6367850303649902, |
|
"learning_rate": 0.00012, |
|
"loss": 4.2454, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0021390374331550803, |
|
"grad_norm": 1.2402284145355225, |
|
"learning_rate": 0.00014, |
|
"loss": 2.3504, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0024446142093200915, |
|
"grad_norm": 1.5380349159240723, |
|
"learning_rate": 0.00016, |
|
"loss": 3.6695, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0027501909854851033, |
|
"grad_norm": 2.2115941047668457, |
|
"learning_rate": 0.00018, |
|
"loss": 3.8667, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0030557677616501145, |
|
"grad_norm": 2.173429489135742, |
|
"learning_rate": 0.0002, |
|
"loss": 3.0004, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0033613445378151263, |
|
"grad_norm": 2.2001826763153076, |
|
"learning_rate": 0.0001999863304992469, |
|
"loss": 2.6077, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0036669213139801375, |
|
"grad_norm": 2.234281539916992, |
|
"learning_rate": 0.00019994532573409262, |
|
"loss": 3.6697, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003972498090145149, |
|
"grad_norm": 2.308614492416382, |
|
"learning_rate": 0.00019987699691483048, |
|
"loss": 2.7118, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0042780748663101605, |
|
"grad_norm": 2.648648500442505, |
|
"learning_rate": 0.00019978136272187747, |
|
"loss": 3.2435, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004583651642475172, |
|
"grad_norm": 1.7936925888061523, |
|
"learning_rate": 0.000199658449300667, |
|
"loss": 2.1392, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004889228418640183, |
|
"grad_norm": 2.6367146968841553, |
|
"learning_rate": 0.00019950829025450114, |
|
"loss": 3.592, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.005194805194805195, |
|
"grad_norm": 2.5174176692962646, |
|
"learning_rate": 0.00019933092663536382, |
|
"loss": 2.977, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0055003819709702065, |
|
"grad_norm": 2.296748399734497, |
|
"learning_rate": 0.00019912640693269752, |
|
"loss": 3.093, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.005805958747135217, |
|
"grad_norm": 3.448976993560791, |
|
"learning_rate": 0.00019889478706014687, |
|
"loss": 3.0724, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.006111535523300229, |
|
"grad_norm": 2.230823278427124, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 2.7141, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006417112299465241, |
|
"grad_norm": 2.3661162853240967, |
|
"learning_rate": 0.00019835050748723824, |
|
"loss": 3.6851, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0067226890756302525, |
|
"grad_norm": 1.993086338043213, |
|
"learning_rate": 0.00019803799658748094, |
|
"loss": 3.8929, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.007028265851795263, |
|
"grad_norm": 1.727256417274475, |
|
"learning_rate": 0.00019769868307835994, |
|
"loss": 2.7433, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.007333842627960275, |
|
"grad_norm": 2.6137921810150146, |
|
"learning_rate": 0.0001973326597248006, |
|
"loss": 3.9081, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.007639419404125287, |
|
"grad_norm": 1.9214521646499634, |
|
"learning_rate": 0.00019694002659393305, |
|
"loss": 3.451, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007944996180290298, |
|
"grad_norm": 2.5544991493225098, |
|
"learning_rate": 0.00019652089102773488, |
|
"loss": 2.4955, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.00825057295645531, |
|
"grad_norm": 3.0405311584472656, |
|
"learning_rate": 0.00019607536761368484, |
|
"loss": 3.6993, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.008556149732620321, |
|
"grad_norm": 1.412721037864685, |
|
"learning_rate": 0.00019560357815343577, |
|
"loss": 3.4813, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.008861726508785332, |
|
"grad_norm": 1.844821572303772, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 2.4911, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.009167303284950344, |
|
"grad_norm": 2.185194730758667, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 3.0409, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009472880061115355, |
|
"grad_norm": 2.528438091278076, |
|
"learning_rate": 0.00019403193901161613, |
|
"loss": 4.4194, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.009778456837280366, |
|
"grad_norm": 2.3545053005218506, |
|
"learning_rate": 0.0001934564464599461, |
|
"loss": 3.9219, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.010084033613445379, |
|
"grad_norm": 2.4586379528045654, |
|
"learning_rate": 0.00019285540384897073, |
|
"loss": 3.8094, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01038961038961039, |
|
"grad_norm": 2.5422775745391846, |
|
"learning_rate": 0.00019222897549773848, |
|
"loss": 1.9784, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0106951871657754, |
|
"grad_norm": 1.6043715476989746, |
|
"learning_rate": 0.00019157733266550575, |
|
"loss": 2.5754, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.011000763941940413, |
|
"grad_norm": 2.428220510482788, |
|
"learning_rate": 0.00019090065350491626, |
|
"loss": 3.7738, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.011306340718105424, |
|
"grad_norm": 2.145481586456299, |
|
"learning_rate": 0.00019019912301329592, |
|
"loss": 3.2982, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.011611917494270435, |
|
"grad_norm": 1.5569844245910645, |
|
"learning_rate": 0.00018947293298207635, |
|
"loss": 1.7218, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.011917494270435447, |
|
"grad_norm": 2.1777217388153076, |
|
"learning_rate": 0.0001887222819443612, |
|
"loss": 2.581, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.012223071046600458, |
|
"grad_norm": 2.7405147552490234, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 3.8181, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012528647822765469, |
|
"grad_norm": 1.6006349325180054, |
|
"learning_rate": 0.00018714842436272773, |
|
"loss": 2.0009, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.012834224598930482, |
|
"grad_norm": 1.8577955961227417, |
|
"learning_rate": 0.00018632564809575742, |
|
"loss": 3.3277, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.013139801375095492, |
|
"grad_norm": 1.7019318342208862, |
|
"learning_rate": 0.0001854792712585539, |
|
"loss": 3.6416, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.013445378151260505, |
|
"grad_norm": 2.172180414199829, |
|
"learning_rate": 0.00018460952524209355, |
|
"loss": 3.5653, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.013750954927425516, |
|
"grad_norm": 1.173954963684082, |
|
"learning_rate": 0.00018371664782625287, |
|
"loss": 2.4792, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.014056531703590527, |
|
"grad_norm": 2.2119481563568115, |
|
"learning_rate": 0.00018280088311480201, |
|
"loss": 3.7178, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01436210847975554, |
|
"grad_norm": 1.043997883796692, |
|
"learning_rate": 0.00018186248146866927, |
|
"loss": 2.3083, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01466768525592055, |
|
"grad_norm": 1.6460810899734497, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 2.4319, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.014973262032085561, |
|
"grad_norm": 1.5691938400268555, |
|
"learning_rate": 0.0001799187996894925, |
|
"loss": 2.5601, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.015278838808250574, |
|
"grad_norm": 2.165229320526123, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 3.1488, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015278838808250574, |
|
"eval_loss": 3.244551658630371, |
|
"eval_runtime": 43.5009, |
|
"eval_samples_per_second": 31.678, |
|
"eval_steps_per_second": 15.839, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015584415584415584, |
|
"grad_norm": 2.4259822368621826, |
|
"learning_rate": 0.00017788772787621126, |
|
"loss": 4.0701, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.015889992360580595, |
|
"grad_norm": 1.828935146331787, |
|
"learning_rate": 0.00017684011108568592, |
|
"loss": 3.8272, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.016195569136745608, |
|
"grad_norm": 1.6015702486038208, |
|
"learning_rate": 0.0001757714869760335, |
|
"loss": 4.0941, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.01650114591291062, |
|
"grad_norm": 1.4622135162353516, |
|
"learning_rate": 0.0001746821476984154, |
|
"loss": 2.8427, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01680672268907563, |
|
"grad_norm": 2.106966018676758, |
|
"learning_rate": 0.00017357239106731317, |
|
"loss": 2.3866, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.017112299465240642, |
|
"grad_norm": 2.9907472133636475, |
|
"learning_rate": 0.00017244252047910892, |
|
"loss": 3.2555, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.017417876241405655, |
|
"grad_norm": 1.2781822681427002, |
|
"learning_rate": 0.00017129284482913972, |
|
"loss": 2.8657, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.017723453017570664, |
|
"grad_norm": 1.5655877590179443, |
|
"learning_rate": 0.00017012367842724887, |
|
"loss": 2.6976, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.018029029793735676, |
|
"grad_norm": 1.7124484777450562, |
|
"learning_rate": 0.0001689353409118566, |
|
"loss": 2.5161, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01833460656990069, |
|
"grad_norm": 2.1622180938720703, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 2.9102, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018640183346065698, |
|
"grad_norm": 1.4135297536849976, |
|
"learning_rate": 0.0001665024572113848, |
|
"loss": 1.7485, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.01894576012223071, |
|
"grad_norm": 1.8781421184539795, |
|
"learning_rate": 0.00016525857615241687, |
|
"loss": 3.8095, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.019251336898395723, |
|
"grad_norm": 1.5626355409622192, |
|
"learning_rate": 0.00016399685405033167, |
|
"loss": 1.9366, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.019556913674560732, |
|
"grad_norm": 1.412752389907837, |
|
"learning_rate": 0.0001627176358473537, |
|
"loss": 1.9336, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.019862490450725745, |
|
"grad_norm": 5.998400688171387, |
|
"learning_rate": 0.0001614212712689668, |
|
"loss": 3.1919, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.020168067226890758, |
|
"grad_norm": 1.5334243774414062, |
|
"learning_rate": 0.00016010811472830252, |
|
"loss": 2.7113, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.020473644003055767, |
|
"grad_norm": 2.1354057788848877, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 2.988, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02077922077922078, |
|
"grad_norm": 1.9327161312103271, |
|
"learning_rate": 0.00015743286626829437, |
|
"loss": 2.9572, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.021084797555385792, |
|
"grad_norm": 2.1718881130218506, |
|
"learning_rate": 0.0001560715057351673, |
|
"loss": 3.5648, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0213903743315508, |
|
"grad_norm": 1.8955811262130737, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 3.1244, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.021695951107715813, |
|
"grad_norm": 1.5830830335617065, |
|
"learning_rate": 0.0001533031728727994, |
|
"loss": 2.7921, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.022001527883880826, |
|
"grad_norm": 2.2739856243133545, |
|
"learning_rate": 0.00015189695737812152, |
|
"loss": 3.5455, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.022307104660045835, |
|
"grad_norm": 0.9828822612762451, |
|
"learning_rate": 0.0001504765537734844, |
|
"loss": 1.6882, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.022612681436210848, |
|
"grad_norm": 1.9325013160705566, |
|
"learning_rate": 0.00014904235038305083, |
|
"loss": 2.1146, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.02291825821237586, |
|
"grad_norm": 1.3537631034851074, |
|
"learning_rate": 0.00014759473930370736, |
|
"loss": 2.2816, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02322383498854087, |
|
"grad_norm": 1.826690673828125, |
|
"learning_rate": 0.0001461341162978688, |
|
"loss": 1.5235, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.023529411764705882, |
|
"grad_norm": 1.8681014776229858, |
|
"learning_rate": 0.00014466088068528068, |
|
"loss": 4.0703, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.023834988540870895, |
|
"grad_norm": 4.881453514099121, |
|
"learning_rate": 0.00014317543523384928, |
|
"loss": 3.2547, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.024140565317035904, |
|
"grad_norm": 2.301090955734253, |
|
"learning_rate": 0.00014167818604952906, |
|
"loss": 2.3378, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.024446142093200916, |
|
"grad_norm": 1.1395305395126343, |
|
"learning_rate": 0.00014016954246529696, |
|
"loss": 1.5157, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02475171886936593, |
|
"grad_norm": 1.7658803462982178, |
|
"learning_rate": 0.00013864991692924523, |
|
"loss": 4.2361, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.025057295645530938, |
|
"grad_norm": 1.827609896659851, |
|
"learning_rate": 0.00013711972489182208, |
|
"loss": 3.0601, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.02536287242169595, |
|
"grad_norm": 1.9651907682418823, |
|
"learning_rate": 0.00013557938469225167, |
|
"loss": 3.6306, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.025668449197860963, |
|
"grad_norm": 2.074267625808716, |
|
"learning_rate": 0.00013402931744416433, |
|
"loss": 2.9667, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.025974025974025976, |
|
"grad_norm": 1.3315553665161133, |
|
"learning_rate": 0.00013246994692046836, |
|
"loss": 1.88, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.026279602750190985, |
|
"grad_norm": 1.5968420505523682, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 3.7276, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.026585179526355997, |
|
"grad_norm": 1.9459774494171143, |
|
"learning_rate": 0.0001293250037384465, |
|
"loss": 2.5366, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.02689075630252101, |
|
"grad_norm": 1.9473600387573242, |
|
"learning_rate": 0.00012774029087618446, |
|
"loss": 3.8513, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.02719633307868602, |
|
"grad_norm": 1.4431513547897339, |
|
"learning_rate": 0.00012614799409538198, |
|
"loss": 2.5023, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.02750190985485103, |
|
"grad_norm": 3.223552703857422, |
|
"learning_rate": 0.00012454854871407994, |
|
"loss": 2.0943, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.027807486631016044, |
|
"grad_norm": 2.679762363433838, |
|
"learning_rate": 0.00012294239200467516, |
|
"loss": 3.3053, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.028113063407181053, |
|
"grad_norm": 2.0697975158691406, |
|
"learning_rate": 0.0001213299630743747, |
|
"loss": 3.5176, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.028418640183346066, |
|
"grad_norm": 2.661999464035034, |
|
"learning_rate": 0.00011971170274514802, |
|
"loss": 3.7355, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.02872421695951108, |
|
"grad_norm": 1.8615680932998657, |
|
"learning_rate": 0.000118088053433211, |
|
"loss": 2.0932, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.029029793735676088, |
|
"grad_norm": 2.583749532699585, |
|
"learning_rate": 0.00011645945902807341, |
|
"loss": 4.0713, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0293353705118411, |
|
"grad_norm": 1.8530974388122559, |
|
"learning_rate": 0.0001148263647711842, |
|
"loss": 1.5702, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.029640947288006113, |
|
"grad_norm": 1.7810598611831665, |
|
"learning_rate": 0.00011318921713420691, |
|
"loss": 3.71, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.029946524064171122, |
|
"grad_norm": 2.1363232135772705, |
|
"learning_rate": 0.00011154846369695863, |
|
"loss": 3.1915, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.030252100840336135, |
|
"grad_norm": 1.8341383934020996, |
|
"learning_rate": 0.0001099045530250463, |
|
"loss": 2.4543, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.030557677616501147, |
|
"grad_norm": 1.8934003114700317, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 2.6079, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.030557677616501147, |
|
"eval_loss": 3.186122417449951, |
|
"eval_runtime": 43.6356, |
|
"eval_samples_per_second": 31.58, |
|
"eval_steps_per_second": 15.79, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.895803354349568e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|