{ "best_metric": 10.353182792663574, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.34315760343157603, "eval_steps": 100, "global_step": 155, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0022139200221392, "grad_norm": 0.015347130596637726, "learning_rate": 2e-05, "loss": 10.3793, "step": 1 }, { "epoch": 0.0022139200221392, "eval_loss": 10.376798629760742, "eval_runtime": 4.6374, "eval_samples_per_second": 328.199, "eval_steps_per_second": 82.157, "step": 1 }, { "epoch": 0.0044278400442784, "grad_norm": 0.018048662692308426, "learning_rate": 4e-05, "loss": 10.3775, "step": 2 }, { "epoch": 0.006641760066417601, "grad_norm": 0.022233936935663223, "learning_rate": 6e-05, "loss": 10.3768, "step": 3 }, { "epoch": 0.0088556800885568, "grad_norm": 0.019449712708592415, "learning_rate": 8e-05, "loss": 10.3786, "step": 4 }, { "epoch": 0.011069600110696002, "grad_norm": 0.020489409565925598, "learning_rate": 0.0001, "loss": 10.3764, "step": 5 }, { "epoch": 0.013283520132835201, "grad_norm": 0.021540051326155663, "learning_rate": 0.00012, "loss": 10.3786, "step": 6 }, { "epoch": 0.0154974401549744, "grad_norm": 0.018246637657284737, "learning_rate": 0.00014, "loss": 10.3767, "step": 7 }, { "epoch": 0.0177113601771136, "grad_norm": 0.01636139489710331, "learning_rate": 0.00016, "loss": 10.3764, "step": 8 }, { "epoch": 0.019925280199252802, "grad_norm": 0.019374892115592957, "learning_rate": 0.00018, "loss": 10.3768, "step": 9 }, { "epoch": 0.022139200221392003, "grad_norm": 0.02034880220890045, "learning_rate": 0.0002, "loss": 10.3765, "step": 10 }, { "epoch": 0.0243531202435312, "grad_norm": 0.01910431496798992, "learning_rate": 0.00019997652980184843, "loss": 10.3751, "step": 11 }, { "epoch": 0.026567040265670402, "grad_norm": 0.020900217816233635, "learning_rate": 0.0001999061302243977, "loss": 10.3766, "step": 12 }, { "epoch": 0.028780960287809604, "grad_norm": 0.01849156990647316, "learning_rate": 0.00019978883431348845, "loss": 10.378, "step": 13 }, { "epoch": 0.0309948803099488, "grad_norm": 0.018774129450321198, "learning_rate": 0.00019962469712828614, "loss": 10.3779, "step": 14 }, { "epoch": 0.033208800332088007, "grad_norm": 0.021555211395025253, "learning_rate": 0.00019941379571543596, "loss": 10.3781, "step": 15 }, { "epoch": 0.0354227203542272, "grad_norm": 0.021058112382888794, "learning_rate": 0.00019915622907289694, "loss": 10.3764, "step": 16 }, { "epoch": 0.0376366403763664, "grad_norm": 0.025353940203785896, "learning_rate": 0.00019885211810347184, "loss": 10.3754, "step": 17 }, { "epoch": 0.039850560398505604, "grad_norm": 0.023140626028180122, "learning_rate": 0.00019850160555805486, "loss": 10.376, "step": 18 }, { "epoch": 0.042064480420644805, "grad_norm": 0.022708751261234283, "learning_rate": 0.00019810485596862392, "loss": 10.3754, "step": 19 }, { "epoch": 0.044278400442784006, "grad_norm": 0.02649092860519886, "learning_rate": 0.00019766205557100868, "loss": 10.3751, "step": 20 }, { "epoch": 0.04649232046492321, "grad_norm": 0.026685183867812157, "learning_rate": 0.00019717341221747056, "loss": 10.3743, "step": 21 }, { "epoch": 0.0487062404870624, "grad_norm": 0.030229078605771065, "learning_rate": 0.00019663915527913625, "loss": 10.3754, "step": 22 }, { "epoch": 0.050920160509201604, "grad_norm": 0.025011839345097542, "learning_rate": 0.00019605953553832988, "loss": 10.3762, "step": 23 }, { "epoch": 0.053134080531340805, "grad_norm": 0.03366699442267418, "learning_rate": 0.00019543482507085482, "loss": 10.3732, "step": 24 }, { "epoch": 0.055348000553480006, "grad_norm": 0.031245626509189606, "learning_rate": 0.00019476531711828027, "loss": 10.3732, "step": 25 }, { "epoch": 0.05756192057561921, "grad_norm": 0.033843208104372025, "learning_rate": 0.0001940513259502924, "loss": 10.3746, "step": 26 }, { "epoch": 0.05977584059775841, "grad_norm": 0.031005628407001495, "learning_rate": 0.0001932931867171751, "loss": 10.3757, "step": 27 }, { "epoch": 0.0619897606198976, "grad_norm": 0.04169201850891113, "learning_rate": 0.0001924912552924889, "loss": 10.3723, "step": 28 }, { "epoch": 0.06420368064203681, "grad_norm": 0.04145209118723869, "learning_rate": 0.00019164590810602262, "loss": 10.3726, "step": 29 }, { "epoch": 0.06641760066417601, "grad_norm": 0.04390417039394379, "learning_rate": 0.00019075754196709572, "loss": 10.3735, "step": 30 }, { "epoch": 0.0686315206863152, "grad_norm": 0.04278510808944702, "learning_rate": 0.00018982657387829445, "loss": 10.3715, "step": 31 }, { "epoch": 0.0708454407084544, "grad_norm": 0.042029816657304764, "learning_rate": 0.00018885344083972914, "loss": 10.3736, "step": 32 }, { "epoch": 0.0730593607305936, "grad_norm": 0.04833783209323883, "learning_rate": 0.00018783859964390464, "loss": 10.3727, "step": 33 }, { "epoch": 0.0752732807527328, "grad_norm": 0.04858450964093208, "learning_rate": 0.00018678252666130013, "loss": 10.3711, "step": 34 }, { "epoch": 0.077487200774872, "grad_norm": 0.05063305422663689, "learning_rate": 0.00018568571761675893, "loss": 10.3696, "step": 35 }, { "epoch": 0.07970112079701121, "grad_norm": 0.05562438815832138, "learning_rate": 0.0001845486873567932, "loss": 10.3704, "step": 36 }, { "epoch": 0.08191504081915041, "grad_norm": 0.06275998800992966, "learning_rate": 0.00018337196960791302, "loss": 10.3701, "step": 37 }, { "epoch": 0.08412896084128961, "grad_norm": 0.06581102311611176, "learning_rate": 0.00018215611672609317, "loss": 10.3701, "step": 38 }, { "epoch": 0.08634288086342881, "grad_norm": 0.06170434504747391, "learning_rate": 0.00018090169943749476, "loss": 10.3693, "step": 39 }, { "epoch": 0.08855680088556801, "grad_norm": 0.06494265049695969, "learning_rate": 0.00017960930657056438, "loss": 10.3718, "step": 40 }, { "epoch": 0.09077072090770721, "grad_norm": 0.06664866209030151, "learning_rate": 0.00017827954477963557, "loss": 10.3714, "step": 41 }, { "epoch": 0.09298464092984642, "grad_norm": 0.05908958986401558, "learning_rate": 0.0001769130382601629, "loss": 10.3683, "step": 42 }, { "epoch": 0.0951985609519856, "grad_norm": 0.07850487530231476, "learning_rate": 0.00017551042845572208, "loss": 10.366, "step": 43 }, { "epoch": 0.0974124809741248, "grad_norm": 0.07899674773216248, "learning_rate": 0.00017407237375691392, "loss": 10.3679, "step": 44 }, { "epoch": 0.099626400996264, "grad_norm": 0.08278781175613403, "learning_rate": 0.0001725995491923131, "loss": 10.3674, "step": 45 }, { "epoch": 0.10184032101840321, "grad_norm": 0.07795372605323792, "learning_rate": 0.00017109264611160708, "loss": 10.3658, "step": 46 }, { "epoch": 0.10405424104054241, "grad_norm": 0.06829465925693512, "learning_rate": 0.00016955237186107387, "loss": 10.3658, "step": 47 }, { "epoch": 0.10626816106268161, "grad_norm": 0.08101464062929153, "learning_rate": 0.0001679794494515508, "loss": 10.3642, "step": 48 }, { "epoch": 0.10848208108482081, "grad_norm": 0.08278671652078629, "learning_rate": 0.00016637461721905045, "loss": 10.3651, "step": 49 }, { "epoch": 0.11069600110696001, "grad_norm": 0.07952283322811127, "learning_rate": 0.00016473862847818277, "loss": 10.3637, "step": 50 }, { "epoch": 0.11290992112909921, "grad_norm": 0.08242182433605194, "learning_rate": 0.00016307225116854622, "loss": 10.3645, "step": 51 }, { "epoch": 0.11512384115123842, "grad_norm": 0.08912307769060135, "learning_rate": 0.00016137626749425377, "loss": 10.3624, "step": 52 }, { "epoch": 0.11733776117337762, "grad_norm": 0.07046646624803543, "learning_rate": 0.00015965147355676343, "loss": 10.3622, "step": 53 }, { "epoch": 0.11955168119551682, "grad_norm": 0.08123268932104111, "learning_rate": 0.0001578986789811849, "loss": 10.3636, "step": 54 }, { "epoch": 0.121765601217656, "grad_norm": 0.07835382968187332, "learning_rate": 0.00015611870653623825, "loss": 10.3637, "step": 55 }, { "epoch": 0.1239795212397952, "grad_norm": 0.0735471174120903, "learning_rate": 0.00015431239174804328, "loss": 10.3604, "step": 56 }, { "epoch": 0.1261934412619344, "grad_norm": 0.0726391151547432, "learning_rate": 0.00015248058250792008, "loss": 10.3613, "step": 57 }, { "epoch": 0.12840736128407362, "grad_norm": 0.07755741477012634, "learning_rate": 0.0001506241386743854, "loss": 10.3579, "step": 58 }, { "epoch": 0.1306212813062128, "grad_norm": 0.06758265197277069, "learning_rate": 0.00014874393166953192, "loss": 10.3583, "step": 59 }, { "epoch": 0.13283520132835203, "grad_norm": 0.06026022881269455, "learning_rate": 0.00014684084406997903, "loss": 10.3587, "step": 60 }, { "epoch": 0.1350491213504912, "grad_norm": 0.06415611505508423, "learning_rate": 0.00014491576919258792, "loss": 10.3574, "step": 61 }, { "epoch": 0.1372630413726304, "grad_norm": 0.054800305515527725, "learning_rate": 0.0001429696106751352, "loss": 10.3607, "step": 62 }, { "epoch": 0.13947696139476962, "grad_norm": 0.056575533002614975, "learning_rate": 0.0001410032820521416, "loss": 10.3583, "step": 63 }, { "epoch": 0.1416908814169088, "grad_norm": 0.05453406646847725, "learning_rate": 0.00013901770632605547, "loss": 10.3581, "step": 64 }, { "epoch": 0.14390480143904802, "grad_norm": 0.052836980670690536, "learning_rate": 0.00013701381553399145, "loss": 10.3553, "step": 65 }, { "epoch": 0.1461187214611872, "grad_norm": 0.06288017332553864, "learning_rate": 0.00013499255031022885, "loss": 10.3575, "step": 66 }, { "epoch": 0.14833264148332642, "grad_norm": 0.05181482061743736, "learning_rate": 0.00013295485944467405, "loss": 10.3561, "step": 67 }, { "epoch": 0.1505465615054656, "grad_norm": 0.04769117385149002, "learning_rate": 0.00013090169943749476, "loss": 10.3588, "step": 68 }, { "epoch": 0.15276048152760482, "grad_norm": 0.05283131077885628, "learning_rate": 0.0001288340340501351, "loss": 10.3577, "step": 69 }, { "epoch": 0.154974401549744, "grad_norm": 0.053591929376125336, "learning_rate": 0.00012675283385292212, "loss": 10.3542, "step": 70 }, { "epoch": 0.15718832157188323, "grad_norm": 0.053562965244054794, "learning_rate": 0.00012465907576947622, "loss": 10.3554, "step": 71 }, { "epoch": 0.15940224159402241, "grad_norm": 0.04689027741551399, "learning_rate": 0.00012255374261813944, "loss": 10.3555, "step": 72 }, { "epoch": 0.16161616161616163, "grad_norm": 0.05137080326676369, "learning_rate": 0.0001204378226506365, "loss": 10.3579, "step": 73 }, { "epoch": 0.16383008163830082, "grad_norm": 0.04621529206633568, "learning_rate": 0.00011831230908818563, "loss": 10.3556, "step": 74 }, { "epoch": 0.16604400166044, "grad_norm": 0.04236424341797829, "learning_rate": 0.0001161781996552765, "loss": 10.3562, "step": 75 }, { "epoch": 0.16825792168257922, "grad_norm": 0.046798307448625565, "learning_rate": 0.00011403649611133444, "loss": 10.3537, "step": 76 }, { "epoch": 0.1704718417047184, "grad_norm": 0.040729619562625885, "learning_rate": 0.00011188820378049065, "loss": 10.3543, "step": 77 }, { "epoch": 0.17268576172685762, "grad_norm": 0.03752255439758301, "learning_rate": 0.00010973433107967902, "loss": 10.3547, "step": 78 }, { "epoch": 0.1748996817489968, "grad_norm": 0.04051700234413147, "learning_rate": 0.00010757588904528106, "loss": 10.355, "step": 79 }, { "epoch": 0.17711360177113603, "grad_norm": 0.04239342361688614, "learning_rate": 0.00010541389085854176, "loss": 10.3547, "step": 80 }, { "epoch": 0.1793275217932752, "grad_norm": 0.04204672947525978, "learning_rate": 0.00010324935136997806, "loss": 10.3569, "step": 81 }, { "epoch": 0.18154144181541443, "grad_norm": 0.031643904745578766, "learning_rate": 0.000101083286623004, "loss": 10.3562, "step": 82 }, { "epoch": 0.18375536183755362, "grad_norm": 0.038713809102773666, "learning_rate": 9.891671337699602e-05, "loss": 10.3554, "step": 83 }, { "epoch": 0.18596928185969283, "grad_norm": 0.046824660152196884, "learning_rate": 9.675064863002196e-05, "loss": 10.3561, "step": 84 }, { "epoch": 0.18818320188183202, "grad_norm": 0.039098598062992096, "learning_rate": 9.458610914145826e-05, "loss": 10.3546, "step": 85 }, { "epoch": 0.1903971219039712, "grad_norm": 0.04264703765511513, "learning_rate": 9.242411095471897e-05, "loss": 10.3542, "step": 86 }, { "epoch": 0.19261104192611042, "grad_norm": 0.04310859739780426, "learning_rate": 9.026566892032105e-05, "loss": 10.3552, "step": 87 }, { "epoch": 0.1948249619482496, "grad_norm": 0.039878156036138535, "learning_rate": 8.811179621950936e-05, "loss": 10.3576, "step": 88 }, { "epoch": 0.19703888197038882, "grad_norm": 0.038004275411367416, "learning_rate": 8.596350388866558e-05, "loss": 10.3547, "step": 89 }, { "epoch": 0.199252801992528, "grad_norm": 0.04718531668186188, "learning_rate": 8.382180034472353e-05, "loss": 10.3557, "step": 90 }, { "epoch": 0.20146672201466723, "grad_norm": 0.04298553988337517, "learning_rate": 8.168769091181438e-05, "loss": 10.3555, "step": 91 }, { "epoch": 0.20368064203680641, "grad_norm": 0.029132699593901634, "learning_rate": 7.956217734936353e-05, "loss": 10.3531, "step": 92 }, { "epoch": 0.20589456205894563, "grad_norm": 0.03928364813327789, "learning_rate": 7.744625738186059e-05, "loss": 10.3556, "step": 93 }, { "epoch": 0.20810848208108482, "grad_norm": 0.046986378729343414, "learning_rate": 7.534092423052381e-05, "loss": 10.3545, "step": 94 }, { "epoch": 0.21032240210322403, "grad_norm": 0.03824040666222572, "learning_rate": 7.324716614707793e-05, "loss": 10.3551, "step": 95 }, { "epoch": 0.21253632212536322, "grad_norm": 0.02702680043876171, "learning_rate": 7.116596594986494e-05, "loss": 10.3572, "step": 96 }, { "epoch": 0.21475024214750243, "grad_norm": 0.026534808799624443, "learning_rate": 6.909830056250527e-05, "loss": 10.3535, "step": 97 }, { "epoch": 0.21696416216964162, "grad_norm": 0.03434290364384651, "learning_rate": 6.704514055532597e-05, "loss": 10.3522, "step": 98 }, { "epoch": 0.2191780821917808, "grad_norm": 0.037764888256788254, "learning_rate": 6.500744968977116e-05, "loss": 10.3555, "step": 99 }, { "epoch": 0.22139200221392002, "grad_norm": 0.03366897255182266, "learning_rate": 6.298618446600856e-05, "loss": 10.3527, "step": 100 }, { "epoch": 0.22139200221392002, "eval_loss": 10.353182792663574, "eval_runtime": 4.6374, "eval_samples_per_second": 328.204, "eval_steps_per_second": 82.159, "step": 100 }, { "epoch": 0.2236059222360592, "grad_norm": 0.03327897563576698, "learning_rate": 6.0982293673944544e-05, "loss": 10.3556, "step": 101 }, { "epoch": 0.22581984225819843, "grad_norm": 0.03691767901182175, "learning_rate": 5.899671794785839e-05, "loss": 10.3553, "step": 102 }, { "epoch": 0.22803376228033762, "grad_norm": 0.030172044411301613, "learning_rate": 5.703038932486484e-05, "loss": 10.3542, "step": 103 }, { "epoch": 0.23024768230247683, "grad_norm": 0.021300997585058212, "learning_rate": 5.5084230807412126e-05, "loss": 10.3543, "step": 104 }, { "epoch": 0.23246160232461602, "grad_norm": 0.03279775753617287, "learning_rate": 5.3159155930021e-05, "loss": 10.3539, "step": 105 }, { "epoch": 0.23467552234675523, "grad_norm": 0.02924364060163498, "learning_rate": 5.12560683304681e-05, "loss": 10.3543, "step": 106 }, { "epoch": 0.23688944236889442, "grad_norm": 0.03262259438633919, "learning_rate": 4.9375861325614606e-05, "loss": 10.355, "step": 107 }, { "epoch": 0.23910336239103364, "grad_norm": 0.034073278307914734, "learning_rate": 4.751941749207995e-05, "loss": 10.357, "step": 108 }, { "epoch": 0.24131728241317282, "grad_norm": 0.03489963710308075, "learning_rate": 4.5687608251956714e-05, "loss": 10.355, "step": 109 }, { "epoch": 0.243531202435312, "grad_norm": 0.02581014297902584, "learning_rate": 4.388129346376178e-05, "loss": 10.3537, "step": 110 }, { "epoch": 0.24574512245745123, "grad_norm": 0.023335812613368034, "learning_rate": 4.210132101881516e-05, "loss": 10.3553, "step": 111 }, { "epoch": 0.2479590424795904, "grad_norm": 0.03612133115530014, "learning_rate": 4.034852644323661e-05, "loss": 10.3534, "step": 112 }, { "epoch": 0.2501729625017296, "grad_norm": 0.02821163646876812, "learning_rate": 3.862373250574626e-05, "loss": 10.3556, "step": 113 }, { "epoch": 0.2523868825238688, "grad_norm": 0.0300295390188694, "learning_rate": 3.6927748831453836e-05, "loss": 10.3551, "step": 114 }, { "epoch": 0.25460080254600803, "grad_norm": 0.025033898651599884, "learning_rate": 3.5261371521817244e-05, "loss": 10.3525, "step": 115 }, { "epoch": 0.25681472256814725, "grad_norm": 0.02417595498263836, "learning_rate": 3.3625382780949574e-05, "loss": 10.3542, "step": 116 }, { "epoch": 0.2590286425902864, "grad_norm": 0.02323267050087452, "learning_rate": 3.202055054844921e-05, "loss": 10.3521, "step": 117 }, { "epoch": 0.2612425626124256, "grad_norm": 0.026705941185355186, "learning_rate": 3.0447628138926156e-05, "loss": 10.3536, "step": 118 }, { "epoch": 0.26345648263456484, "grad_norm": 0.033880215138196945, "learning_rate": 2.890735388839295e-05, "loss": 10.3554, "step": 119 }, { "epoch": 0.26567040265670405, "grad_norm": 0.029678767547011375, "learning_rate": 2.7400450807686938e-05, "loss": 10.353, "step": 120 }, { "epoch": 0.2678843226788432, "grad_norm": 0.02781762182712555, "learning_rate": 2.59276262430861e-05, "loss": 10.3553, "step": 121 }, { "epoch": 0.2700982427009824, "grad_norm": 0.027942579239606857, "learning_rate": 2.4489571544277945e-05, "loss": 10.3556, "step": 122 }, { "epoch": 0.27231216272312164, "grad_norm": 0.02813226915895939, "learning_rate": 2.308696173983711e-05, "loss": 10.3528, "step": 123 }, { "epoch": 0.2745260827452608, "grad_norm": 0.030037103220820427, "learning_rate": 2.1720455220364444e-05, "loss": 10.353, "step": 124 }, { "epoch": 0.2767400027674, "grad_norm": 0.030153660103678703, "learning_rate": 2.0390693429435627e-05, "loss": 10.3552, "step": 125 }, { "epoch": 0.27895392278953923, "grad_norm": 0.037454936653375626, "learning_rate": 1.9098300562505266e-05, "loss": 10.3531, "step": 126 }, { "epoch": 0.28116784281167845, "grad_norm": 0.029572051018476486, "learning_rate": 1.784388327390687e-05, "loss": 10.3504, "step": 127 }, { "epoch": 0.2833817628338176, "grad_norm": 0.027182403951883316, "learning_rate": 1.6628030392087e-05, "loss": 10.3504, "step": 128 }, { "epoch": 0.2855956828559568, "grad_norm": 0.03518354520201683, "learning_rate": 1.5451312643206827e-05, "loss": 10.3536, "step": 129 }, { "epoch": 0.28780960287809604, "grad_norm": 0.024024929851293564, "learning_rate": 1.4314282383241096e-05, "loss": 10.3533, "step": 130 }, { "epoch": 0.29002352290023525, "grad_norm": 0.020221339538693428, "learning_rate": 1.3217473338699859e-05, "loss": 10.3521, "step": 131 }, { "epoch": 0.2922374429223744, "grad_norm": 0.02575266920030117, "learning_rate": 1.2161400356095375e-05, "loss": 10.3528, "step": 132 }, { "epoch": 0.29445136294451363, "grad_norm": 0.019320376217365265, "learning_rate": 1.1146559160270875e-05, "loss": 10.3529, "step": 133 }, { "epoch": 0.29666528296665284, "grad_norm": 0.022721335291862488, "learning_rate": 1.0173426121705576e-05, "loss": 10.3534, "step": 134 }, { "epoch": 0.298879202988792, "grad_norm": 0.030306054279208183, "learning_rate": 9.242458032904311e-06, "loss": 10.3529, "step": 135 }, { "epoch": 0.3010931230109312, "grad_norm": 0.03088550828397274, "learning_rate": 8.354091893977401e-06, "loss": 10.3542, "step": 136 }, { "epoch": 0.30330704303307043, "grad_norm": 0.02875097282230854, "learning_rate": 7.508744707511117e-06, "loss": 10.3556, "step": 137 }, { "epoch": 0.30552096305520965, "grad_norm": 0.022224275395274162, "learning_rate": 6.70681328282492e-06, "loss": 10.3536, "step": 138 }, { "epoch": 0.3077348830773488, "grad_norm": 0.028994860127568245, "learning_rate": 5.948674049707603e-06, "loss": 10.3531, "step": 139 }, { "epoch": 0.309948803099488, "grad_norm": 0.03301481530070305, "learning_rate": 5.2346828817197655e-06, "loss": 10.3539, "step": 140 }, { "epoch": 0.31216272312162724, "grad_norm": 0.031127754598855972, "learning_rate": 4.565174929145188e-06, "loss": 10.3541, "step": 141 }, { "epoch": 0.31437664314376645, "grad_norm": 0.03324393928050995, "learning_rate": 3.940464461670135e-06, "loss": 10.3558, "step": 142 }, { "epoch": 0.3165905631659056, "grad_norm": 0.025115065276622772, "learning_rate": 3.360844720863765e-06, "loss": 10.3528, "step": 143 }, { "epoch": 0.31880448318804483, "grad_norm": 0.02497878670692444, "learning_rate": 2.826587782529444e-06, "loss": 10.353, "step": 144 }, { "epoch": 0.32101840321018404, "grad_norm": 0.026006096974015236, "learning_rate": 2.3379444289913342e-06, "loss": 10.3548, "step": 145 }, { "epoch": 0.32323232323232326, "grad_norm": 0.028647800907492638, "learning_rate": 1.8951440313760837e-06, "loss": 10.355, "step": 146 }, { "epoch": 0.3254462432544624, "grad_norm": 0.03259943798184395, "learning_rate": 1.4983944419451613e-06, "loss": 10.3541, "step": 147 }, { "epoch": 0.32766016327660163, "grad_norm": 0.033153582364320755, "learning_rate": 1.1478818965281911e-06, "loss": 10.3529, "step": 148 }, { "epoch": 0.32987408329874085, "grad_norm": 0.026517199352383614, "learning_rate": 8.437709271030603e-07, "loss": 10.3537, "step": 149 }, { "epoch": 0.33208800332088, "grad_norm": 0.02658323012292385, "learning_rate": 5.862042845640403e-07, "loss": 10.3527, "step": 150 }, { "epoch": 0.3343019233430192, "grad_norm": 0.024668825790286064, "learning_rate": 3.7530287171387843e-07, "loss": 10.3539, "step": 151 }, { "epoch": 0.33651584336515844, "grad_norm": 0.030534988269209862, "learning_rate": 2.1116568651156076e-07, "loss": 10.3545, "step": 152 }, { "epoch": 0.33872976338729766, "grad_norm": 0.024195190519094467, "learning_rate": 9.386977560232879e-08, "loss": 10.3539, "step": 153 }, { "epoch": 0.3409436834094368, "grad_norm": 0.027664266526699066, "learning_rate": 2.347019815158724e-08, "loss": 10.3528, "step": 154 }, { "epoch": 0.34315760343157603, "grad_norm": 0.03868336230516434, "learning_rate": 0.0, "loss": 10.3539, "step": 155 } ], "logging_steps": 1, "max_steps": 155, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 64855033774080.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }