diff --git "a/run-0/checkpoint-620/trainer_state.json" "b/run-0/checkpoint-620/trainer_state.json" new file mode 100644--- /dev/null +++ "b/run-0/checkpoint-620/trainer_state.json" @@ -0,0 +1,4440 @@ +{ + "best_metric": 1.6532543369745685, + "best_model_checkpoint": "./modernBERT-content-regression/run-0/checkpoint-620", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 620, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008064516129032258, + "grad_norm": 312.8164978027344, + "learning_rate": 1.9405916792639294e-05, + "loss": 21.3087, + "step": 1 + }, + { + "epoch": 0.016129032258064516, + "grad_norm": 21.297748565673828, + "learning_rate": 1.9374566361633415e-05, + "loss": 0.3159, + "step": 2 + }, + { + "epoch": 0.024193548387096774, + "grad_norm": 378.6422424316406, + "learning_rate": 1.9343215930627533e-05, + "loss": 138.0821, + "step": 3 + }, + { + "epoch": 0.03225806451612903, + "grad_norm": 98.11734008789062, + "learning_rate": 1.9311865499621654e-05, + "loss": 13.477, + "step": 4 + }, + { + "epoch": 0.04032258064516129, + "grad_norm": 55.27867126464844, + "learning_rate": 1.9280515068615775e-05, + "loss": 1.8628, + "step": 5 + }, + { + "epoch": 0.04838709677419355, + "grad_norm": 116.12382507324219, + "learning_rate": 1.92491646376099e-05, + "loss": 2.5156, + "step": 6 + }, + { + "epoch": 0.056451612903225805, + "grad_norm": 44.64065170288086, + "learning_rate": 1.921781420660402e-05, + "loss": 4.839, + "step": 7 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 444.887451171875, + "learning_rate": 1.9186463775598138e-05, + "loss": 10.6158, + "step": 8 + }, + { + "epoch": 0.07258064516129033, + "grad_norm": 29.839136123657227, + "learning_rate": 1.915511334459226e-05, + "loss": 0.2628, + "step": 9 + }, + { + "epoch": 0.08064516129032258, + "grad_norm": 233.91651916503906, + "learning_rate": 1.912376291358638e-05, + "loss": 2.1308, + "step": 10 + }, + { + "epoch": 0.08870967741935484, + "grad_norm": 169.41172790527344, + "learning_rate": 1.90924124825805e-05, + "loss": 10.3841, + "step": 11 + }, + { + "epoch": 0.0967741935483871, + "grad_norm": 25.190792083740234, + "learning_rate": 1.906106205157462e-05, + "loss": 0.3598, + "step": 12 + }, + { + "epoch": 0.10483870967741936, + "grad_norm": 112.92284393310547, + "learning_rate": 1.902971162056874e-05, + "loss": 1.3362, + "step": 13 + }, + { + "epoch": 0.11290322580645161, + "grad_norm": 29.17025375366211, + "learning_rate": 1.899836118956286e-05, + "loss": 1.1826, + "step": 14 + }, + { + "epoch": 0.12096774193548387, + "grad_norm": 53.96384048461914, + "learning_rate": 1.8967010758556982e-05, + "loss": 0.3421, + "step": 15 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 201.16738891601562, + "learning_rate": 1.8935660327551106e-05, + "loss": 9.4231, + "step": 16 + }, + { + "epoch": 0.13709677419354838, + "grad_norm": 52.72154235839844, + "learning_rate": 1.8904309896545224e-05, + "loss": 1.9206, + "step": 17 + }, + { + "epoch": 0.14516129032258066, + "grad_norm": 127.67330932617188, + "learning_rate": 1.8872959465539345e-05, + "loss": 2.6729, + "step": 18 + }, + { + "epoch": 0.1532258064516129, + "grad_norm": 36.01234436035156, + "learning_rate": 1.8841609034533466e-05, + "loss": 1.044, + "step": 19 + }, + { + "epoch": 0.16129032258064516, + "grad_norm": 291.8822021484375, + "learning_rate": 1.8810258603527587e-05, + "loss": 9.1818, + "step": 20 + }, + { + "epoch": 0.1693548387096774, + "grad_norm": 270.27142333984375, + "learning_rate": 1.8778908172521708e-05, + "loss": 3.1568, + "step": 21 + }, + { + "epoch": 0.1774193548387097, + "grad_norm": 527.8693237304688, + "learning_rate": 1.8747557741515826e-05, + "loss": 7.3642, + "step": 22 + }, + { + "epoch": 0.18548387096774194, + "grad_norm": 135.9112548828125, + "learning_rate": 1.8716207310509947e-05, + "loss": 15.6669, + "step": 23 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 490.7304382324219, + "learning_rate": 1.8684856879504068e-05, + "loss": 6.4283, + "step": 24 + }, + { + "epoch": 0.20161290322580644, + "grad_norm": 159.22994995117188, + "learning_rate": 1.865350644849819e-05, + "loss": 1.1921, + "step": 25 + }, + { + "epoch": 0.20967741935483872, + "grad_norm": 183.89662170410156, + "learning_rate": 1.862215601749231e-05, + "loss": 1.6719, + "step": 26 + }, + { + "epoch": 0.21774193548387097, + "grad_norm": 89.63036346435547, + "learning_rate": 1.859080558648643e-05, + "loss": 2.104, + "step": 27 + }, + { + "epoch": 0.22580645161290322, + "grad_norm": 108.4155044555664, + "learning_rate": 1.8559455155480552e-05, + "loss": 1.345, + "step": 28 + }, + { + "epoch": 0.23387096774193547, + "grad_norm": 25.60169792175293, + "learning_rate": 1.8528104724474673e-05, + "loss": 0.6057, + "step": 29 + }, + { + "epoch": 0.24193548387096775, + "grad_norm": 64.74250793457031, + "learning_rate": 1.8496754293468794e-05, + "loss": 0.5283, + "step": 30 + }, + { + "epoch": 0.25, + "grad_norm": 48.2547721862793, + "learning_rate": 1.8465403862462912e-05, + "loss": 1.0487, + "step": 31 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 85.43180847167969, + "learning_rate": 1.8434053431457033e-05, + "loss": 1.1554, + "step": 32 + }, + { + "epoch": 0.2661290322580645, + "grad_norm": 23.75019645690918, + "learning_rate": 1.8402703000451154e-05, + "loss": 0.6364, + "step": 33 + }, + { + "epoch": 0.27419354838709675, + "grad_norm": 60.279544830322266, + "learning_rate": 1.8371352569445275e-05, + "loss": 1.4024, + "step": 34 + }, + { + "epoch": 0.28225806451612906, + "grad_norm": 256.9117126464844, + "learning_rate": 1.8340002138439396e-05, + "loss": 21.5309, + "step": 35 + }, + { + "epoch": 0.2903225806451613, + "grad_norm": 83.0962142944336, + "learning_rate": 1.8308651707433517e-05, + "loss": 1.4266, + "step": 36 + }, + { + "epoch": 0.29838709677419356, + "grad_norm": 31.391490936279297, + "learning_rate": 1.827730127642764e-05, + "loss": 0.1593, + "step": 37 + }, + { + "epoch": 0.3064516129032258, + "grad_norm": 196.5593719482422, + "learning_rate": 1.824595084542176e-05, + "loss": 18.6657, + "step": 38 + }, + { + "epoch": 0.31451612903225806, + "grad_norm": 526.9293823242188, + "learning_rate": 1.821460041441588e-05, + "loss": 2.932, + "step": 39 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 398.2054138183594, + "learning_rate": 1.8183249983409998e-05, + "loss": 17.9616, + "step": 40 + }, + { + "epoch": 0.33064516129032256, + "grad_norm": 720.282958984375, + "learning_rate": 1.815189955240412e-05, + "loss": 13.228, + "step": 41 + }, + { + "epoch": 0.3387096774193548, + "grad_norm": 163.12245178222656, + "learning_rate": 1.812054912139824e-05, + "loss": 19.4242, + "step": 42 + }, + { + "epoch": 0.3467741935483871, + "grad_norm": 355.48846435546875, + "learning_rate": 1.808919869039236e-05, + "loss": 26.5729, + "step": 43 + }, + { + "epoch": 0.3548387096774194, + "grad_norm": 241.5015411376953, + "learning_rate": 1.8057848259386482e-05, + "loss": 3.7127, + "step": 44 + }, + { + "epoch": 0.3629032258064516, + "grad_norm": 185.13877868652344, + "learning_rate": 1.8026497828380604e-05, + "loss": 3.4602, + "step": 45 + }, + { + "epoch": 0.3709677419354839, + "grad_norm": 63.099327087402344, + "learning_rate": 1.7995147397374725e-05, + "loss": 2.2782, + "step": 46 + }, + { + "epoch": 0.3790322580645161, + "grad_norm": 52.98133087158203, + "learning_rate": 1.7963796966368846e-05, + "loss": 2.8164, + "step": 47 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 72.33665466308594, + "learning_rate": 1.7932446535362967e-05, + "loss": 1.1977, + "step": 48 + }, + { + "epoch": 0.3951612903225806, + "grad_norm": 79.54340362548828, + "learning_rate": 1.7901096104357088e-05, + "loss": 2.3691, + "step": 49 + }, + { + "epoch": 0.4032258064516129, + "grad_norm": 112.45056915283203, + "learning_rate": 1.7869745673351205e-05, + "loss": 10.6579, + "step": 50 + }, + { + "epoch": 0.4112903225806452, + "grad_norm": 9.006159782409668, + "learning_rate": 1.7838395242345327e-05, + "loss": 0.1436, + "step": 51 + }, + { + "epoch": 0.41935483870967744, + "grad_norm": 62.99940490722656, + "learning_rate": 1.7807044811339448e-05, + "loss": 1.9241, + "step": 52 + }, + { + "epoch": 0.4274193548387097, + "grad_norm": 205.84600830078125, + "learning_rate": 1.777569438033357e-05, + "loss": 21.9755, + "step": 53 + }, + { + "epoch": 0.43548387096774194, + "grad_norm": 18.60399055480957, + "learning_rate": 1.774434394932769e-05, + "loss": 0.6385, + "step": 54 + }, + { + "epoch": 0.4435483870967742, + "grad_norm": 60.74576187133789, + "learning_rate": 1.771299351832181e-05, + "loss": 6.0776, + "step": 55 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 55.38773727416992, + "learning_rate": 1.7681643087315932e-05, + "loss": 1.713, + "step": 56 + }, + { + "epoch": 0.4596774193548387, + "grad_norm": 22.658430099487305, + "learning_rate": 1.7650292656310053e-05, + "loss": 0.2931, + "step": 57 + }, + { + "epoch": 0.46774193548387094, + "grad_norm": 103.3204574584961, + "learning_rate": 1.7618942225304174e-05, + "loss": 9.7618, + "step": 58 + }, + { + "epoch": 0.47580645161290325, + "grad_norm": 22.80617332458496, + "learning_rate": 1.758759179429829e-05, + "loss": 0.3063, + "step": 59 + }, + { + "epoch": 0.4838709677419355, + "grad_norm": 126.92974090576172, + "learning_rate": 1.7556241363292413e-05, + "loss": 14.4092, + "step": 60 + }, + { + "epoch": 0.49193548387096775, + "grad_norm": 43.025787353515625, + "learning_rate": 1.7524890932286534e-05, + "loss": 0.7731, + "step": 61 + }, + { + "epoch": 0.5, + "grad_norm": 80.46817779541016, + "learning_rate": 1.7493540501280655e-05, + "loss": 21.7639, + "step": 62 + }, + { + "epoch": 0.5080645161290323, + "grad_norm": 59.49717712402344, + "learning_rate": 1.7462190070274776e-05, + "loss": 1.618, + "step": 63 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 15.44653606414795, + "learning_rate": 1.7430839639268897e-05, + "loss": 2.4747, + "step": 64 + }, + { + "epoch": 0.5241935483870968, + "grad_norm": 89.72061920166016, + "learning_rate": 1.7399489208263018e-05, + "loss": 13.3349, + "step": 65 + }, + { + "epoch": 0.532258064516129, + "grad_norm": 16.540616989135742, + "learning_rate": 1.736813877725714e-05, + "loss": 1.4963, + "step": 66 + }, + { + "epoch": 0.5403225806451613, + "grad_norm": 50.684844970703125, + "learning_rate": 1.733678834625126e-05, + "loss": 2.0989, + "step": 67 + }, + { + "epoch": 0.5483870967741935, + "grad_norm": 56.257877349853516, + "learning_rate": 1.730543791524538e-05, + "loss": 1.6343, + "step": 68 + }, + { + "epoch": 0.5564516129032258, + "grad_norm": 87.75580596923828, + "learning_rate": 1.72740874842395e-05, + "loss": 3.4088, + "step": 69 + }, + { + "epoch": 0.5645161290322581, + "grad_norm": 14.779145240783691, + "learning_rate": 1.724273705323362e-05, + "loss": 0.4494, + "step": 70 + }, + { + "epoch": 0.5725806451612904, + "grad_norm": 20.206077575683594, + "learning_rate": 1.721138662222774e-05, + "loss": 2.2875, + "step": 71 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 81.59163665771484, + "learning_rate": 1.7180036191221862e-05, + "loss": 16.2722, + "step": 72 + }, + { + "epoch": 0.5887096774193549, + "grad_norm": 20.435260772705078, + "learning_rate": 1.7148685760215983e-05, + "loss": 0.2519, + "step": 73 + }, + { + "epoch": 0.5967741935483871, + "grad_norm": 77.98759460449219, + "learning_rate": 1.7117335329210104e-05, + "loss": 2.4472, + "step": 74 + }, + { + "epoch": 0.6048387096774194, + "grad_norm": 75.45653533935547, + "learning_rate": 1.7085984898204225e-05, + "loss": 9.1756, + "step": 75 + }, + { + "epoch": 0.6129032258064516, + "grad_norm": 16.75188636779785, + "learning_rate": 1.7054634467198346e-05, + "loss": 0.25, + "step": 76 + }, + { + "epoch": 0.6209677419354839, + "grad_norm": 21.303180694580078, + "learning_rate": 1.7023284036192467e-05, + "loss": 0.3106, + "step": 77 + }, + { + "epoch": 0.6290322580645161, + "grad_norm": 56.08082962036133, + "learning_rate": 1.6991933605186585e-05, + "loss": 13.2157, + "step": 78 + }, + { + "epoch": 0.6370967741935484, + "grad_norm": 115.4497299194336, + "learning_rate": 1.6960583174180706e-05, + "loss": 24.3069, + "step": 79 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 18.160301208496094, + "learning_rate": 1.6929232743174827e-05, + "loss": 1.2587, + "step": 80 + }, + { + "epoch": 0.6532258064516129, + "grad_norm": 15.181955337524414, + "learning_rate": 1.6897882312168948e-05, + "loss": 0.317, + "step": 81 + }, + { + "epoch": 0.6612903225806451, + "grad_norm": 50.316402435302734, + "learning_rate": 1.686653188116307e-05, + "loss": 2.1468, + "step": 82 + }, + { + "epoch": 0.6693548387096774, + "grad_norm": 24.807689666748047, + "learning_rate": 1.683518145015719e-05, + "loss": 0.4887, + "step": 83 + }, + { + "epoch": 0.6774193548387096, + "grad_norm": 7.860809803009033, + "learning_rate": 1.680383101915131e-05, + "loss": 0.7155, + "step": 84 + }, + { + "epoch": 0.6854838709677419, + "grad_norm": 152.7879180908203, + "learning_rate": 1.6772480588145432e-05, + "loss": 18.5123, + "step": 85 + }, + { + "epoch": 0.6935483870967742, + "grad_norm": 46.42924499511719, + "learning_rate": 1.6741130157139553e-05, + "loss": 14.8221, + "step": 86 + }, + { + "epoch": 0.7016129032258065, + "grad_norm": 55.30247497558594, + "learning_rate": 1.670977972613367e-05, + "loss": 1.3708, + "step": 87 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 64.38847351074219, + "learning_rate": 1.6678429295127792e-05, + "loss": 1.5522, + "step": 88 + }, + { + "epoch": 0.717741935483871, + "grad_norm": 71.2874755859375, + "learning_rate": 1.6647078864121913e-05, + "loss": 2.576, + "step": 89 + }, + { + "epoch": 0.7258064516129032, + "grad_norm": 81.83346557617188, + "learning_rate": 1.6615728433116034e-05, + "loss": 2.3827, + "step": 90 + }, + { + "epoch": 0.7338709677419355, + "grad_norm": 133.49618530273438, + "learning_rate": 1.6584378002110155e-05, + "loss": 18.9283, + "step": 91 + }, + { + "epoch": 0.7419354838709677, + "grad_norm": 58.27260971069336, + "learning_rate": 1.6553027571104276e-05, + "loss": 5.9736, + "step": 92 + }, + { + "epoch": 0.75, + "grad_norm": 5.685044288635254, + "learning_rate": 1.6521677140098398e-05, + "loss": 0.1484, + "step": 93 + }, + { + "epoch": 0.7580645161290323, + "grad_norm": 100.5199966430664, + "learning_rate": 1.649032670909252e-05, + "loss": 4.3167, + "step": 94 + }, + { + "epoch": 0.7661290322580645, + "grad_norm": 78.9014663696289, + "learning_rate": 1.645897627808664e-05, + "loss": 9.7868, + "step": 95 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 90.67410278320312, + "learning_rate": 1.642762584708076e-05, + "loss": 3.1579, + "step": 96 + }, + { + "epoch": 0.782258064516129, + "grad_norm": 64.82438659667969, + "learning_rate": 1.639627541607488e-05, + "loss": 1.3801, + "step": 97 + }, + { + "epoch": 0.7903225806451613, + "grad_norm": 54.49703598022461, + "learning_rate": 1.6364924985069e-05, + "loss": 1.1181, + "step": 98 + }, + { + "epoch": 0.7983870967741935, + "grad_norm": 48.997737884521484, + "learning_rate": 1.633357455406312e-05, + "loss": 0.8717, + "step": 99 + }, + { + "epoch": 0.8064516129032258, + "grad_norm": 163.54615783691406, + "learning_rate": 1.630222412305724e-05, + "loss": 20.7014, + "step": 100 + }, + { + "epoch": 0.8145161290322581, + "grad_norm": 40.45456314086914, + "learning_rate": 1.6270873692051363e-05, + "loss": 1.3114, + "step": 101 + }, + { + "epoch": 0.8225806451612904, + "grad_norm": 10.254913330078125, + "learning_rate": 1.6239523261045484e-05, + "loss": 0.2994, + "step": 102 + }, + { + "epoch": 0.8306451612903226, + "grad_norm": 97.43340301513672, + "learning_rate": 1.6208172830039605e-05, + "loss": 13.0826, + "step": 103 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 26.264678955078125, + "learning_rate": 1.6176822399033726e-05, + "loss": 0.8864, + "step": 104 + }, + { + "epoch": 0.8467741935483871, + "grad_norm": 52.89865493774414, + "learning_rate": 1.6145471968027847e-05, + "loss": 0.9729, + "step": 105 + }, + { + "epoch": 0.8548387096774194, + "grad_norm": 57.24679183959961, + "learning_rate": 1.6114121537021965e-05, + "loss": 2.4299, + "step": 106 + }, + { + "epoch": 0.8629032258064516, + "grad_norm": 50.82107925415039, + "learning_rate": 1.6082771106016086e-05, + "loss": 1.7577, + "step": 107 + }, + { + "epoch": 0.8709677419354839, + "grad_norm": 71.07525634765625, + "learning_rate": 1.6051420675010207e-05, + "loss": 1.3853, + "step": 108 + }, + { + "epoch": 0.8790322580645161, + "grad_norm": 102.36954498291016, + "learning_rate": 1.6020070244004328e-05, + "loss": 3.1053, + "step": 109 + }, + { + "epoch": 0.8870967741935484, + "grad_norm": 36.77400207519531, + "learning_rate": 1.598871981299845e-05, + "loss": 1.1706, + "step": 110 + }, + { + "epoch": 0.8951612903225806, + "grad_norm": 75.524658203125, + "learning_rate": 1.595736938199257e-05, + "loss": 2.8564, + "step": 111 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 22.996549606323242, + "learning_rate": 1.592601895098669e-05, + "loss": 1.1411, + "step": 112 + }, + { + "epoch": 0.9112903225806451, + "grad_norm": 56.16508102416992, + "learning_rate": 1.5894668519980812e-05, + "loss": 1.1069, + "step": 113 + }, + { + "epoch": 0.9193548387096774, + "grad_norm": 45.19282913208008, + "learning_rate": 1.5863318088974933e-05, + "loss": 1.7093, + "step": 114 + }, + { + "epoch": 0.9274193548387096, + "grad_norm": 66.1149673461914, + "learning_rate": 1.5831967657969054e-05, + "loss": 4.0799, + "step": 115 + }, + { + "epoch": 0.9354838709677419, + "grad_norm": 48.84867477416992, + "learning_rate": 1.5800617226963172e-05, + "loss": 2.2224, + "step": 116 + }, + { + "epoch": 0.9435483870967742, + "grad_norm": 59.682125091552734, + "learning_rate": 1.5769266795957293e-05, + "loss": 0.9111, + "step": 117 + }, + { + "epoch": 0.9516129032258065, + "grad_norm": 76.44542694091797, + "learning_rate": 1.5737916364951414e-05, + "loss": 14.3846, + "step": 118 + }, + { + "epoch": 0.9596774193548387, + "grad_norm": 28.405059814453125, + "learning_rate": 1.5706565933945535e-05, + "loss": 1.9018, + "step": 119 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 10.535357475280762, + "learning_rate": 1.5675215502939656e-05, + "loss": 0.9783, + "step": 120 + }, + { + "epoch": 0.9758064516129032, + "grad_norm": 21.8922176361084, + "learning_rate": 1.5643865071933777e-05, + "loss": 0.3224, + "step": 121 + }, + { + "epoch": 0.9838709677419355, + "grad_norm": 15.08030891418457, + "learning_rate": 1.5612514640927898e-05, + "loss": 0.4911, + "step": 122 + }, + { + "epoch": 0.9919354838709677, + "grad_norm": 3.00480580329895, + "learning_rate": 1.558116420992202e-05, + "loss": 0.1242, + "step": 123 + }, + { + "epoch": 1.0, + "grad_norm": 26.523765563964844, + "learning_rate": 1.554981377891614e-05, + "loss": 0.238, + "step": 124 + }, + { + "epoch": 1.0, + "eval_loss": 4.573007583618164, + "eval_mae": 1.3245397806167603, + "eval_mse": 4.573008060455322, + "eval_r2": -0.2520101070404053, + "eval_rmse": 2.138459272573439, + "eval_runtime": 1.736, + "eval_samples_per_second": 31.682, + "eval_smape": 54.24200892448425, + "eval_steps_per_second": 8.064, + "step": 124 + }, + { + "epoch": 1.0080645161290323, + "grad_norm": 23.402734756469727, + "learning_rate": 1.5518463347910258e-05, + "loss": 0.8834, + "step": 125 + }, + { + "epoch": 1.0161290322580645, + "grad_norm": 40.177024841308594, + "learning_rate": 1.548711291690438e-05, + "loss": 1.2453, + "step": 126 + }, + { + "epoch": 1.0241935483870968, + "grad_norm": 86.14007568359375, + "learning_rate": 1.54557624858985e-05, + "loss": 5.4819, + "step": 127 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 10.090241432189941, + "learning_rate": 1.542441205489262e-05, + "loss": 0.357, + "step": 128 + }, + { + "epoch": 1.0403225806451613, + "grad_norm": 29.933624267578125, + "learning_rate": 1.5393061623886742e-05, + "loss": 1.0975, + "step": 129 + }, + { + "epoch": 1.0483870967741935, + "grad_norm": 20.847654342651367, + "learning_rate": 1.5361711192880863e-05, + "loss": 2.23, + "step": 130 + }, + { + "epoch": 1.0564516129032258, + "grad_norm": 41.63291549682617, + "learning_rate": 1.5330360761874984e-05, + "loss": 0.6496, + "step": 131 + }, + { + "epoch": 1.064516129032258, + "grad_norm": 49.841773986816406, + "learning_rate": 1.5299010330869105e-05, + "loss": 3.59, + "step": 132 + }, + { + "epoch": 1.0725806451612903, + "grad_norm": 55.52513122558594, + "learning_rate": 1.5267659899863226e-05, + "loss": 1.1932, + "step": 133 + }, + { + "epoch": 1.0806451612903225, + "grad_norm": 46.58870315551758, + "learning_rate": 1.5236309468857344e-05, + "loss": 0.8727, + "step": 134 + }, + { + "epoch": 1.0887096774193548, + "grad_norm": 52.691673278808594, + "learning_rate": 1.5204959037851465e-05, + "loss": 5.618, + "step": 135 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 94.9619140625, + "learning_rate": 1.5173608606845586e-05, + "loss": 13.7161, + "step": 136 + }, + { + "epoch": 1.1048387096774193, + "grad_norm": 105.40327453613281, + "learning_rate": 1.5142258175839707e-05, + "loss": 7.7381, + "step": 137 + }, + { + "epoch": 1.1129032258064515, + "grad_norm": 90.10973358154297, + "learning_rate": 1.5110907744833828e-05, + "loss": 1.8823, + "step": 138 + }, + { + "epoch": 1.120967741935484, + "grad_norm": 220.77796936035156, + "learning_rate": 1.5079557313827948e-05, + "loss": 35.8495, + "step": 139 + }, + { + "epoch": 1.129032258064516, + "grad_norm": 122.45404815673828, + "learning_rate": 1.5048206882822069e-05, + "loss": 2.8625, + "step": 140 + }, + { + "epoch": 1.1370967741935485, + "grad_norm": 87.10842895507812, + "learning_rate": 1.501685645181619e-05, + "loss": 2.4731, + "step": 141 + }, + { + "epoch": 1.1451612903225807, + "grad_norm": 35.40389633178711, + "learning_rate": 1.498550602081031e-05, + "loss": 0.7125, + "step": 142 + }, + { + "epoch": 1.153225806451613, + "grad_norm": 45.49386215209961, + "learning_rate": 1.4954155589804432e-05, + "loss": 0.7262, + "step": 143 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 52.729427337646484, + "learning_rate": 1.4922805158798551e-05, + "loss": 0.9633, + "step": 144 + }, + { + "epoch": 1.1693548387096775, + "grad_norm": 40.62730407714844, + "learning_rate": 1.4891454727792672e-05, + "loss": 0.619, + "step": 145 + }, + { + "epoch": 1.1774193548387097, + "grad_norm": 63.05773162841797, + "learning_rate": 1.4860104296786793e-05, + "loss": 1.575, + "step": 146 + }, + { + "epoch": 1.185483870967742, + "grad_norm": 22.308488845825195, + "learning_rate": 1.4828753865780914e-05, + "loss": 0.4164, + "step": 147 + }, + { + "epoch": 1.1935483870967742, + "grad_norm": 20.031757354736328, + "learning_rate": 1.4797403434775034e-05, + "loss": 0.6238, + "step": 148 + }, + { + "epoch": 1.2016129032258065, + "grad_norm": 165.66102600097656, + "learning_rate": 1.4766053003769155e-05, + "loss": 11.1981, + "step": 149 + }, + { + "epoch": 1.2096774193548387, + "grad_norm": 184.57411193847656, + "learning_rate": 1.4734702572763276e-05, + "loss": 35.5336, + "step": 150 + }, + { + "epoch": 1.217741935483871, + "grad_norm": 97.44104766845703, + "learning_rate": 1.4703352141757397e-05, + "loss": 16.5436, + "step": 151 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 55.624298095703125, + "learning_rate": 1.4672001710751518e-05, + "loss": 1.5125, + "step": 152 + }, + { + "epoch": 1.2338709677419355, + "grad_norm": 69.92327117919922, + "learning_rate": 1.4640651279745637e-05, + "loss": 9.3334, + "step": 153 + }, + { + "epoch": 1.2419354838709677, + "grad_norm": 85.40164184570312, + "learning_rate": 1.4609300848739758e-05, + "loss": 11.2524, + "step": 154 + }, + { + "epoch": 1.25, + "grad_norm": 39.81537628173828, + "learning_rate": 1.457795041773388e-05, + "loss": 0.6493, + "step": 155 + }, + { + "epoch": 1.2580645161290323, + "grad_norm": 47.04484939575195, + "learning_rate": 1.4546599986728e-05, + "loss": 1.4688, + "step": 156 + }, + { + "epoch": 1.2661290322580645, + "grad_norm": 30.717687606811523, + "learning_rate": 1.4515249555722122e-05, + "loss": 2.073, + "step": 157 + }, + { + "epoch": 1.2741935483870968, + "grad_norm": 84.80261993408203, + "learning_rate": 1.4483899124716241e-05, + "loss": 11.3152, + "step": 158 + }, + { + "epoch": 1.282258064516129, + "grad_norm": 339.03363037109375, + "learning_rate": 1.4452548693710362e-05, + "loss": 19.9553, + "step": 159 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 48.307281494140625, + "learning_rate": 1.4421198262704483e-05, + "loss": 8.5167, + "step": 160 + }, + { + "epoch": 1.2983870967741935, + "grad_norm": 253.1395263671875, + "learning_rate": 1.4389847831698604e-05, + "loss": 22.7371, + "step": 161 + }, + { + "epoch": 1.3064516129032258, + "grad_norm": 120.206787109375, + "learning_rate": 1.4358497400692725e-05, + "loss": 3.3325, + "step": 162 + }, + { + "epoch": 1.314516129032258, + "grad_norm": 134.07962036132812, + "learning_rate": 1.4327146969686845e-05, + "loss": 2.6457, + "step": 163 + }, + { + "epoch": 1.3225806451612903, + "grad_norm": 85.90950775146484, + "learning_rate": 1.4295796538680966e-05, + "loss": 4.7217, + "step": 164 + }, + { + "epoch": 1.3306451612903225, + "grad_norm": 349.7077331542969, + "learning_rate": 1.4264446107675087e-05, + "loss": 9.5306, + "step": 165 + }, + { + "epoch": 1.3387096774193548, + "grad_norm": 175.317138671875, + "learning_rate": 1.4233095676669208e-05, + "loss": 4.5035, + "step": 166 + }, + { + "epoch": 1.346774193548387, + "grad_norm": 59.4417839050293, + "learning_rate": 1.4201745245663327e-05, + "loss": 0.9392, + "step": 167 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 51.75499725341797, + "learning_rate": 1.4170394814657448e-05, + "loss": 0.7847, + "step": 168 + }, + { + "epoch": 1.3629032258064515, + "grad_norm": 98.38888549804688, + "learning_rate": 1.413904438365157e-05, + "loss": 2.4999, + "step": 169 + }, + { + "epoch": 1.370967741935484, + "grad_norm": 430.9798278808594, + "learning_rate": 1.410769395264569e-05, + "loss": 124.0953, + "step": 170 + }, + { + "epoch": 1.379032258064516, + "grad_norm": 24.690670013427734, + "learning_rate": 1.4076343521639811e-05, + "loss": 0.8053, + "step": 171 + }, + { + "epoch": 1.3870967741935485, + "grad_norm": 198.97019958496094, + "learning_rate": 1.404499309063393e-05, + "loss": 12.995, + "step": 172 + }, + { + "epoch": 1.3951612903225805, + "grad_norm": 13.825026512145996, + "learning_rate": 1.4013642659628052e-05, + "loss": 0.4482, + "step": 173 + }, + { + "epoch": 1.403225806451613, + "grad_norm": 94.05176544189453, + "learning_rate": 1.3982292228622173e-05, + "loss": 8.6515, + "step": 174 + }, + { + "epoch": 1.4112903225806452, + "grad_norm": 64.00646209716797, + "learning_rate": 1.3950941797616294e-05, + "loss": 1.366, + "step": 175 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 259.6784973144531, + "learning_rate": 1.3919591366610415e-05, + "loss": 20.5985, + "step": 176 + }, + { + "epoch": 1.4274193548387097, + "grad_norm": 87.15518951416016, + "learning_rate": 1.3888240935604534e-05, + "loss": 2.9658, + "step": 177 + }, + { + "epoch": 1.435483870967742, + "grad_norm": 29.206682205200195, + "learning_rate": 1.3856890504598655e-05, + "loss": 0.4756, + "step": 178 + }, + { + "epoch": 1.4435483870967742, + "grad_norm": 53.03056716918945, + "learning_rate": 1.3825540073592777e-05, + "loss": 1.38, + "step": 179 + }, + { + "epoch": 1.4516129032258065, + "grad_norm": 23.307886123657227, + "learning_rate": 1.3794189642586898e-05, + "loss": 0.3993, + "step": 180 + }, + { + "epoch": 1.4596774193548387, + "grad_norm": 29.451797485351562, + "learning_rate": 1.3762839211581017e-05, + "loss": 0.3372, + "step": 181 + }, + { + "epoch": 1.467741935483871, + "grad_norm": 13.200406074523926, + "learning_rate": 1.3731488780575138e-05, + "loss": 0.4087, + "step": 182 + }, + { + "epoch": 1.4758064516129032, + "grad_norm": 51.59567642211914, + "learning_rate": 1.3700138349569259e-05, + "loss": 2.83, + "step": 183 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 49.19482421875, + "learning_rate": 1.366878791856338e-05, + "loss": 4.3183, + "step": 184 + }, + { + "epoch": 1.4919354838709677, + "grad_norm": 62.753883361816406, + "learning_rate": 1.3637437487557501e-05, + "loss": 2.1211, + "step": 185 + }, + { + "epoch": 1.5, + "grad_norm": 13.07942008972168, + "learning_rate": 1.360608705655162e-05, + "loss": 0.2284, + "step": 186 + }, + { + "epoch": 1.5080645161290323, + "grad_norm": 57.539710998535156, + "learning_rate": 1.3574736625545742e-05, + "loss": 4.917, + "step": 187 + }, + { + "epoch": 1.5161290322580645, + "grad_norm": 21.182952880859375, + "learning_rate": 1.3543386194539863e-05, + "loss": 1.2659, + "step": 188 + }, + { + "epoch": 1.5241935483870968, + "grad_norm": 63.780704498291016, + "learning_rate": 1.3512035763533984e-05, + "loss": 1.9342, + "step": 189 + }, + { + "epoch": 1.532258064516129, + "grad_norm": 39.30858612060547, + "learning_rate": 1.3480685332528105e-05, + "loss": 0.6736, + "step": 190 + }, + { + "epoch": 1.5403225806451613, + "grad_norm": 10.713706970214844, + "learning_rate": 1.3449334901522224e-05, + "loss": 0.2435, + "step": 191 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 26.50084686279297, + "learning_rate": 1.3417984470516345e-05, + "loss": 0.4394, + "step": 192 + }, + { + "epoch": 1.5564516129032258, + "grad_norm": 61.80959701538086, + "learning_rate": 1.3386634039510466e-05, + "loss": 1.6371, + "step": 193 + }, + { + "epoch": 1.564516129032258, + "grad_norm": 44.66097640991211, + "learning_rate": 1.3355283608504587e-05, + "loss": 0.9796, + "step": 194 + }, + { + "epoch": 1.5725806451612905, + "grad_norm": 37.348472595214844, + "learning_rate": 1.3323933177498707e-05, + "loss": 0.7044, + "step": 195 + }, + { + "epoch": 1.5806451612903225, + "grad_norm": 120.70457458496094, + "learning_rate": 1.3292582746492828e-05, + "loss": 4.7388, + "step": 196 + }, + { + "epoch": 1.588709677419355, + "grad_norm": 10.883711814880371, + "learning_rate": 1.3261232315486949e-05, + "loss": 0.6842, + "step": 197 + }, + { + "epoch": 1.596774193548387, + "grad_norm": 9.519349098205566, + "learning_rate": 1.322988188448107e-05, + "loss": 0.4115, + "step": 198 + }, + { + "epoch": 1.6048387096774195, + "grad_norm": 21.985918045043945, + "learning_rate": 1.3198531453475191e-05, + "loss": 0.1746, + "step": 199 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 51.3434944152832, + "learning_rate": 1.316718102246931e-05, + "loss": 0.8559, + "step": 200 + }, + { + "epoch": 1.620967741935484, + "grad_norm": 38.343318939208984, + "learning_rate": 1.3135830591463431e-05, + "loss": 0.5729, + "step": 201 + }, + { + "epoch": 1.629032258064516, + "grad_norm": 31.334217071533203, + "learning_rate": 1.3104480160457552e-05, + "loss": 0.8461, + "step": 202 + }, + { + "epoch": 1.6370967741935485, + "grad_norm": 28.43387222290039, + "learning_rate": 1.3073129729451674e-05, + "loss": 0.5074, + "step": 203 + }, + { + "epoch": 1.6451612903225805, + "grad_norm": 12.179628372192383, + "learning_rate": 1.3041779298445795e-05, + "loss": 0.5357, + "step": 204 + }, + { + "epoch": 1.653225806451613, + "grad_norm": 160.48968505859375, + "learning_rate": 1.3010428867439914e-05, + "loss": 15.9211, + "step": 205 + }, + { + "epoch": 1.661290322580645, + "grad_norm": 8.148846626281738, + "learning_rate": 1.2979078436434035e-05, + "loss": 0.074, + "step": 206 + }, + { + "epoch": 1.6693548387096775, + "grad_norm": 8.479569435119629, + "learning_rate": 1.2947728005428156e-05, + "loss": 0.1534, + "step": 207 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 19.190534591674805, + "learning_rate": 1.2916377574422277e-05, + "loss": 0.3617, + "step": 208 + }, + { + "epoch": 1.685483870967742, + "grad_norm": 7.419437408447266, + "learning_rate": 1.2885027143416398e-05, + "loss": 0.372, + "step": 209 + }, + { + "epoch": 1.6935483870967742, + "grad_norm": 36.760528564453125, + "learning_rate": 1.2853676712410518e-05, + "loss": 0.6221, + "step": 210 + }, + { + "epoch": 1.7016129032258065, + "grad_norm": 46.47172546386719, + "learning_rate": 1.2822326281404639e-05, + "loss": 1.0012, + "step": 211 + }, + { + "epoch": 1.7096774193548387, + "grad_norm": 21.93129539489746, + "learning_rate": 1.279097585039876e-05, + "loss": 0.9615, + "step": 212 + }, + { + "epoch": 1.717741935483871, + "grad_norm": 4.738753795623779, + "learning_rate": 1.275962541939288e-05, + "loss": 0.0968, + "step": 213 + }, + { + "epoch": 1.7258064516129032, + "grad_norm": 144.14422607421875, + "learning_rate": 1.2728274988386998e-05, + "loss": 24.8741, + "step": 214 + }, + { + "epoch": 1.7338709677419355, + "grad_norm": 98.76880645751953, + "learning_rate": 1.2696924557381121e-05, + "loss": 15.2355, + "step": 215 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 10.59051513671875, + "learning_rate": 1.2665574126375242e-05, + "loss": 0.3515, + "step": 216 + }, + { + "epoch": 1.75, + "grad_norm": 131.96859741210938, + "learning_rate": 1.2634223695369363e-05, + "loss": 7.7135, + "step": 217 + }, + { + "epoch": 1.7580645161290323, + "grad_norm": 25.29785919189453, + "learning_rate": 1.2602873264363484e-05, + "loss": 0.2482, + "step": 218 + }, + { + "epoch": 1.7661290322580645, + "grad_norm": 177.49879455566406, + "learning_rate": 1.2571522833357602e-05, + "loss": 15.0218, + "step": 219 + }, + { + "epoch": 1.7741935483870968, + "grad_norm": 30.580156326293945, + "learning_rate": 1.2540172402351725e-05, + "loss": 0.4993, + "step": 220 + }, + { + "epoch": 1.782258064516129, + "grad_norm": 82.16755676269531, + "learning_rate": 1.2508821971345846e-05, + "loss": 0.8773, + "step": 221 + }, + { + "epoch": 1.7903225806451613, + "grad_norm": 13.509096145629883, + "learning_rate": 1.2477471540339967e-05, + "loss": 0.598, + "step": 222 + }, + { + "epoch": 1.7983870967741935, + "grad_norm": 31.23018455505371, + "learning_rate": 1.2446121109334088e-05, + "loss": 0.2144, + "step": 223 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 146.499267578125, + "learning_rate": 1.2414770678328206e-05, + "loss": 14.024, + "step": 224 + }, + { + "epoch": 1.814516129032258, + "grad_norm": 411.3276672363281, + "learning_rate": 1.2383420247322328e-05, + "loss": 6.1271, + "step": 225 + }, + { + "epoch": 1.8225806451612905, + "grad_norm": 36.80350112915039, + "learning_rate": 1.235206981631645e-05, + "loss": 1.4717, + "step": 226 + }, + { + "epoch": 1.8306451612903225, + "grad_norm": 81.75621795654297, + "learning_rate": 1.232071938531057e-05, + "loss": 1.1353, + "step": 227 + }, + { + "epoch": 1.838709677419355, + "grad_norm": 64.03866577148438, + "learning_rate": 1.2289368954304688e-05, + "loss": 1.1949, + "step": 228 + }, + { + "epoch": 1.846774193548387, + "grad_norm": 38.97941207885742, + "learning_rate": 1.225801852329881e-05, + "loss": 0.7342, + "step": 229 + }, + { + "epoch": 1.8548387096774195, + "grad_norm": 247.11788940429688, + "learning_rate": 1.2226668092292932e-05, + "loss": 7.2037, + "step": 230 + }, + { + "epoch": 1.8629032258064515, + "grad_norm": 62.207733154296875, + "learning_rate": 1.2195317661287053e-05, + "loss": 1.5858, + "step": 231 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 15.489973068237305, + "learning_rate": 1.2163967230281174e-05, + "loss": 0.3868, + "step": 232 + }, + { + "epoch": 1.879032258064516, + "grad_norm": 29.43061637878418, + "learning_rate": 1.2132616799275292e-05, + "loss": 1.1257, + "step": 233 + }, + { + "epoch": 1.8870967741935485, + "grad_norm": 10.211771011352539, + "learning_rate": 1.2101266368269413e-05, + "loss": 0.1734, + "step": 234 + }, + { + "epoch": 1.8951612903225805, + "grad_norm": 11.279986381530762, + "learning_rate": 1.2069915937263534e-05, + "loss": 0.4312, + "step": 235 + }, + { + "epoch": 1.903225806451613, + "grad_norm": 119.58721160888672, + "learning_rate": 1.2038565506257657e-05, + "loss": 9.7858, + "step": 236 + }, + { + "epoch": 1.911290322580645, + "grad_norm": 201.3207244873047, + "learning_rate": 1.2007215075251778e-05, + "loss": 4.2142, + "step": 237 + }, + { + "epoch": 1.9193548387096775, + "grad_norm": 37.971343994140625, + "learning_rate": 1.1975864644245895e-05, + "loss": 0.9522, + "step": 238 + }, + { + "epoch": 1.9274193548387095, + "grad_norm": 34.468807220458984, + "learning_rate": 1.1944514213240016e-05, + "loss": 0.7723, + "step": 239 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 39.51136016845703, + "learning_rate": 1.1913163782234138e-05, + "loss": 2.6126, + "step": 240 + }, + { + "epoch": 1.9435483870967742, + "grad_norm": 27.13111686706543, + "learning_rate": 1.188181335122826e-05, + "loss": 0.6028, + "step": 241 + }, + { + "epoch": 1.9516129032258065, + "grad_norm": 17.496204376220703, + "learning_rate": 1.1850462920222378e-05, + "loss": 0.733, + "step": 242 + }, + { + "epoch": 1.9596774193548387, + "grad_norm": 43.25605773925781, + "learning_rate": 1.1819112489216499e-05, + "loss": 1.8264, + "step": 243 + }, + { + "epoch": 1.967741935483871, + "grad_norm": 144.58236694335938, + "learning_rate": 1.178776205821062e-05, + "loss": 4.7865, + "step": 244 + }, + { + "epoch": 1.9758064516129032, + "grad_norm": 71.39822387695312, + "learning_rate": 1.1756411627204741e-05, + "loss": 1.7471, + "step": 245 + }, + { + "epoch": 1.9838709677419355, + "grad_norm": 418.1564025878906, + "learning_rate": 1.1725061196198864e-05, + "loss": 15.5524, + "step": 246 + }, + { + "epoch": 1.9919354838709677, + "grad_norm": 103.1528091430664, + "learning_rate": 1.1693710765192982e-05, + "loss": 2.4682, + "step": 247 + }, + { + "epoch": 2.0, + "grad_norm": 129.07899475097656, + "learning_rate": 1.1662360334187103e-05, + "loss": 3.7685, + "step": 248 + }, + { + "epoch": 2.0, + "eval_loss": 4.093452453613281, + "eval_mae": 1.4580568075180054, + "eval_mse": 4.093452453613281, + "eval_r2": -0.12071609497070312, + "eval_rmse": 2.02322822578504, + "eval_runtime": 1.3208, + "eval_samples_per_second": 41.641, + "eval_smape": 53.77084016799927, + "eval_steps_per_second": 10.599, + "step": 248 + }, + { + "epoch": 2.0080645161290325, + "grad_norm": 176.1205291748047, + "learning_rate": 1.1631009903181224e-05, + "loss": 3.3068, + "step": 249 + }, + { + "epoch": 2.0161290322580645, + "grad_norm": 84.84161376953125, + "learning_rate": 1.1599659472175345e-05, + "loss": 2.8326, + "step": 250 + }, + { + "epoch": 2.024193548387097, + "grad_norm": 128.6629638671875, + "learning_rate": 1.1568309041169468e-05, + "loss": 4.0163, + "step": 251 + }, + { + "epoch": 2.032258064516129, + "grad_norm": 70.34894561767578, + "learning_rate": 1.1536958610163585e-05, + "loss": 3.7917, + "step": 252 + }, + { + "epoch": 2.0403225806451615, + "grad_norm": 18.99244499206543, + "learning_rate": 1.1505608179157706e-05, + "loss": 0.2877, + "step": 253 + }, + { + "epoch": 2.0483870967741935, + "grad_norm": 189.7742156982422, + "learning_rate": 1.1474257748151827e-05, + "loss": 6.5608, + "step": 254 + }, + { + "epoch": 2.056451612903226, + "grad_norm": 19.993907928466797, + "learning_rate": 1.1442907317145948e-05, + "loss": 0.2394, + "step": 255 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 174.26377868652344, + "learning_rate": 1.1411556886140068e-05, + "loss": 19.2623, + "step": 256 + }, + { + "epoch": 2.0725806451612905, + "grad_norm": 20.718603134155273, + "learning_rate": 1.1380206455134189e-05, + "loss": 0.2838, + "step": 257 + }, + { + "epoch": 2.0806451612903225, + "grad_norm": 55.44133377075195, + "learning_rate": 1.134885602412831e-05, + "loss": 0.9125, + "step": 258 + }, + { + "epoch": 2.088709677419355, + "grad_norm": 113.99673461914062, + "learning_rate": 1.1317505593122431e-05, + "loss": 23.6723, + "step": 259 + }, + { + "epoch": 2.096774193548387, + "grad_norm": 23.634668350219727, + "learning_rate": 1.1286155162116552e-05, + "loss": 0.3163, + "step": 260 + }, + { + "epoch": 2.1048387096774195, + "grad_norm": 142.3331756591797, + "learning_rate": 1.1254804731110671e-05, + "loss": 24.4121, + "step": 261 + }, + { + "epoch": 2.1129032258064515, + "grad_norm": 97.85147857666016, + "learning_rate": 1.1223454300104792e-05, + "loss": 0.8764, + "step": 262 + }, + { + "epoch": 2.120967741935484, + "grad_norm": 33.50703430175781, + "learning_rate": 1.1192103869098913e-05, + "loss": 0.2653, + "step": 263 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 25.053003311157227, + "learning_rate": 1.1160753438093035e-05, + "loss": 0.4152, + "step": 264 + }, + { + "epoch": 2.1370967741935485, + "grad_norm": 55.80361557006836, + "learning_rate": 1.1129403007087156e-05, + "loss": 0.8291, + "step": 265 + }, + { + "epoch": 2.1451612903225805, + "grad_norm": 161.3894805908203, + "learning_rate": 1.1098052576081275e-05, + "loss": 4.3344, + "step": 266 + }, + { + "epoch": 2.153225806451613, + "grad_norm": 12.537243843078613, + "learning_rate": 1.1066702145075396e-05, + "loss": 0.2252, + "step": 267 + }, + { + "epoch": 2.161290322580645, + "grad_norm": 60.92879867553711, + "learning_rate": 1.1035351714069517e-05, + "loss": 1.7349, + "step": 268 + }, + { + "epoch": 2.1693548387096775, + "grad_norm": 52.16379165649414, + "learning_rate": 1.1004001283063638e-05, + "loss": 2.7655, + "step": 269 + }, + { + "epoch": 2.1774193548387095, + "grad_norm": 13.385435104370117, + "learning_rate": 1.097265085205776e-05, + "loss": 0.7339, + "step": 270 + }, + { + "epoch": 2.185483870967742, + "grad_norm": 82.908935546875, + "learning_rate": 1.0941300421051879e-05, + "loss": 10.5988, + "step": 271 + }, + { + "epoch": 2.193548387096774, + "grad_norm": 114.64527130126953, + "learning_rate": 1.0909949990046e-05, + "loss": 14.9863, + "step": 272 + }, + { + "epoch": 2.2016129032258065, + "grad_norm": 90.68907928466797, + "learning_rate": 1.087859955904012e-05, + "loss": 12.8366, + "step": 273 + }, + { + "epoch": 2.2096774193548385, + "grad_norm": 24.137863159179688, + "learning_rate": 1.0847249128034242e-05, + "loss": 0.6209, + "step": 274 + }, + { + "epoch": 2.217741935483871, + "grad_norm": 86.06786346435547, + "learning_rate": 1.0815898697028361e-05, + "loss": 0.6642, + "step": 275 + }, + { + "epoch": 2.225806451612903, + "grad_norm": 34.535762786865234, + "learning_rate": 1.0784548266022482e-05, + "loss": 0.3586, + "step": 276 + }, + { + "epoch": 2.2338709677419355, + "grad_norm": 55.70576095581055, + "learning_rate": 1.0753197835016603e-05, + "loss": 0.7619, + "step": 277 + }, + { + "epoch": 2.241935483870968, + "grad_norm": 76.06886291503906, + "learning_rate": 1.0721847404010724e-05, + "loss": 2.1191, + "step": 278 + }, + { + "epoch": 2.25, + "grad_norm": 28.001771926879883, + "learning_rate": 1.0690496973004845e-05, + "loss": 0.8164, + "step": 279 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 103.4825439453125, + "learning_rate": 1.0659146541998965e-05, + "loss": 10.475, + "step": 280 + }, + { + "epoch": 2.2661290322580645, + "grad_norm": 33.585960388183594, + "learning_rate": 1.0627796110993086e-05, + "loss": 1.6235, + "step": 281 + }, + { + "epoch": 2.274193548387097, + "grad_norm": 232.9297332763672, + "learning_rate": 1.0596445679987207e-05, + "loss": 13.3261, + "step": 282 + }, + { + "epoch": 2.282258064516129, + "grad_norm": 96.77767944335938, + "learning_rate": 1.0565095248981328e-05, + "loss": 3.1002, + "step": 283 + }, + { + "epoch": 2.2903225806451615, + "grad_norm": 116.28626251220703, + "learning_rate": 1.0533744817975449e-05, + "loss": 5.5134, + "step": 284 + }, + { + "epoch": 2.2983870967741935, + "grad_norm": 69.49893951416016, + "learning_rate": 1.0502394386969568e-05, + "loss": 1.413, + "step": 285 + }, + { + "epoch": 2.306451612903226, + "grad_norm": 36.76485824584961, + "learning_rate": 1.047104395596369e-05, + "loss": 0.3031, + "step": 286 + }, + { + "epoch": 2.314516129032258, + "grad_norm": 62.41328048706055, + "learning_rate": 1.043969352495781e-05, + "loss": 1.5162, + "step": 287 + }, + { + "epoch": 2.3225806451612905, + "grad_norm": 30.905513763427734, + "learning_rate": 1.0408343093951932e-05, + "loss": 0.6394, + "step": 288 + }, + { + "epoch": 2.3306451612903225, + "grad_norm": 48.83860778808594, + "learning_rate": 1.0376992662946051e-05, + "loss": 1.7204, + "step": 289 + }, + { + "epoch": 2.338709677419355, + "grad_norm": 95.79818725585938, + "learning_rate": 1.0345642231940172e-05, + "loss": 9.2122, + "step": 290 + }, + { + "epoch": 2.346774193548387, + "grad_norm": 45.55128860473633, + "learning_rate": 1.0314291800934293e-05, + "loss": 0.6816, + "step": 291 + }, + { + "epoch": 2.3548387096774195, + "grad_norm": 170.74098205566406, + "learning_rate": 1.0282941369928414e-05, + "loss": 5.1995, + "step": 292 + }, + { + "epoch": 2.3629032258064515, + "grad_norm": 27.760730743408203, + "learning_rate": 1.0251590938922535e-05, + "loss": 0.2949, + "step": 293 + }, + { + "epoch": 2.370967741935484, + "grad_norm": 61.26620864868164, + "learning_rate": 1.0220240507916654e-05, + "loss": 12.498, + "step": 294 + }, + { + "epoch": 2.379032258064516, + "grad_norm": 88.63400268554688, + "learning_rate": 1.0188890076910776e-05, + "loss": 14.3648, + "step": 295 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 15.192766189575195, + "learning_rate": 1.0157539645904897e-05, + "loss": 0.7797, + "step": 296 + }, + { + "epoch": 2.3951612903225805, + "grad_norm": 14.215178489685059, + "learning_rate": 1.0126189214899018e-05, + "loss": 0.3113, + "step": 297 + }, + { + "epoch": 2.403225806451613, + "grad_norm": 83.61798858642578, + "learning_rate": 1.0094838783893139e-05, + "loss": 2.0366, + "step": 298 + }, + { + "epoch": 2.411290322580645, + "grad_norm": 16.302412033081055, + "learning_rate": 1.0063488352887258e-05, + "loss": 0.7133, + "step": 299 + }, + { + "epoch": 2.4193548387096775, + "grad_norm": 128.92327880859375, + "learning_rate": 1.0032137921881379e-05, + "loss": 3.2273, + "step": 300 + }, + { + "epoch": 2.4274193548387095, + "grad_norm": 42.748626708984375, + "learning_rate": 1.00007874908755e-05, + "loss": 0.66, + "step": 301 + }, + { + "epoch": 2.435483870967742, + "grad_norm": 30.09368133544922, + "learning_rate": 9.969437059869621e-06, + "loss": 0.2957, + "step": 302 + }, + { + "epoch": 2.443548387096774, + "grad_norm": 98.78866577148438, + "learning_rate": 9.93808662886374e-06, + "loss": 6.8945, + "step": 303 + }, + { + "epoch": 2.4516129032258065, + "grad_norm": 47.38784408569336, + "learning_rate": 9.906736197857862e-06, + "loss": 0.5275, + "step": 304 + }, + { + "epoch": 2.4596774193548385, + "grad_norm": 91.3726806640625, + "learning_rate": 9.875385766851983e-06, + "loss": 12.6285, + "step": 305 + }, + { + "epoch": 2.467741935483871, + "grad_norm": 14.849919319152832, + "learning_rate": 9.844035335846104e-06, + "loss": 0.1922, + "step": 306 + }, + { + "epoch": 2.475806451612903, + "grad_norm": 27.1463565826416, + "learning_rate": 9.812684904840225e-06, + "loss": 0.4246, + "step": 307 + }, + { + "epoch": 2.4838709677419355, + "grad_norm": 305.164306640625, + "learning_rate": 9.781334473834344e-06, + "loss": 11.3922, + "step": 308 + }, + { + "epoch": 2.491935483870968, + "grad_norm": 20.79343605041504, + "learning_rate": 9.749984042828465e-06, + "loss": 0.448, + "step": 309 + }, + { + "epoch": 2.5, + "grad_norm": 23.24410629272461, + "learning_rate": 9.718633611822586e-06, + "loss": 0.4327, + "step": 310 + }, + { + "epoch": 2.508064516129032, + "grad_norm": 9.029376029968262, + "learning_rate": 9.687283180816707e-06, + "loss": 0.3693, + "step": 311 + }, + { + "epoch": 2.5161290322580645, + "grad_norm": 9.926502227783203, + "learning_rate": 9.655932749810827e-06, + "loss": 0.1628, + "step": 312 + }, + { + "epoch": 2.524193548387097, + "grad_norm": 5.0813703536987305, + "learning_rate": 9.62458231880495e-06, + "loss": 0.0807, + "step": 313 + }, + { + "epoch": 2.532258064516129, + "grad_norm": 14.10543441772461, + "learning_rate": 9.593231887799069e-06, + "loss": 0.4584, + "step": 314 + }, + { + "epoch": 2.540322580645161, + "grad_norm": 112.93350219726562, + "learning_rate": 9.56188145679319e-06, + "loss": 10.0475, + "step": 315 + }, + { + "epoch": 2.5483870967741935, + "grad_norm": 8.689790725708008, + "learning_rate": 9.53053102578731e-06, + "loss": 0.2067, + "step": 316 + }, + { + "epoch": 2.556451612903226, + "grad_norm": 28.200075149536133, + "learning_rate": 9.49918059478143e-06, + "loss": 1.2775, + "step": 317 + }, + { + "epoch": 2.564516129032258, + "grad_norm": 15.749256134033203, + "learning_rate": 9.467830163775553e-06, + "loss": 0.4091, + "step": 318 + }, + { + "epoch": 2.5725806451612905, + "grad_norm": 8.130678176879883, + "learning_rate": 9.436479732769673e-06, + "loss": 0.7939, + "step": 319 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 13.053906440734863, + "learning_rate": 9.405129301763794e-06, + "loss": 0.8335, + "step": 320 + }, + { + "epoch": 2.588709677419355, + "grad_norm": 166.3572540283203, + "learning_rate": 9.373778870757913e-06, + "loss": 6.0399, + "step": 321 + }, + { + "epoch": 2.596774193548387, + "grad_norm": 9.214460372924805, + "learning_rate": 9.342428439752034e-06, + "loss": 0.3924, + "step": 322 + }, + { + "epoch": 2.6048387096774195, + "grad_norm": 143.34503173828125, + "learning_rate": 9.311078008746155e-06, + "loss": 13.3153, + "step": 323 + }, + { + "epoch": 2.6129032258064515, + "grad_norm": 23.62826156616211, + "learning_rate": 9.279727577740276e-06, + "loss": 0.3648, + "step": 324 + }, + { + "epoch": 2.620967741935484, + "grad_norm": 12.584870338439941, + "learning_rate": 9.248377146734397e-06, + "loss": 0.1898, + "step": 325 + }, + { + "epoch": 2.629032258064516, + "grad_norm": 20.08246612548828, + "learning_rate": 9.217026715728517e-06, + "loss": 0.6812, + "step": 326 + }, + { + "epoch": 2.6370967741935485, + "grad_norm": 15.498703956604004, + "learning_rate": 9.185676284722638e-06, + "loss": 0.8188, + "step": 327 + }, + { + "epoch": 2.6451612903225805, + "grad_norm": 26.546886444091797, + "learning_rate": 9.154325853716759e-06, + "loss": 0.2763, + "step": 328 + }, + { + "epoch": 2.653225806451613, + "grad_norm": 381.572021484375, + "learning_rate": 9.12297542271088e-06, + "loss": 126.325, + "step": 329 + }, + { + "epoch": 2.661290322580645, + "grad_norm": 92.21846771240234, + "learning_rate": 9.091624991704999e-06, + "loss": 6.1702, + "step": 330 + }, + { + "epoch": 2.6693548387096775, + "grad_norm": 16.447362899780273, + "learning_rate": 9.06027456069912e-06, + "loss": 0.8041, + "step": 331 + }, + { + "epoch": 2.6774193548387095, + "grad_norm": 19.612592697143555, + "learning_rate": 9.028924129693241e-06, + "loss": 1.1898, + "step": 332 + }, + { + "epoch": 2.685483870967742, + "grad_norm": 40.966026306152344, + "learning_rate": 8.997573698687362e-06, + "loss": 0.2561, + "step": 333 + }, + { + "epoch": 2.693548387096774, + "grad_norm": 164.29673767089844, + "learning_rate": 8.966223267681483e-06, + "loss": 10.4936, + "step": 334 + }, + { + "epoch": 2.7016129032258065, + "grad_norm": 20.719127655029297, + "learning_rate": 8.934872836675603e-06, + "loss": 1.0469, + "step": 335 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 46.28770446777344, + "learning_rate": 8.903522405669724e-06, + "loss": 1.2291, + "step": 336 + }, + { + "epoch": 2.717741935483871, + "grad_norm": 53.001338958740234, + "learning_rate": 8.872171974663845e-06, + "loss": 1.0995, + "step": 337 + }, + { + "epoch": 2.725806451612903, + "grad_norm": 80.97772979736328, + "learning_rate": 8.840821543657966e-06, + "loss": 1.4318, + "step": 338 + }, + { + "epoch": 2.7338709677419355, + "grad_norm": 81.25788116455078, + "learning_rate": 8.809471112652087e-06, + "loss": 10.1732, + "step": 339 + }, + { + "epoch": 2.741935483870968, + "grad_norm": 70.82359313964844, + "learning_rate": 8.778120681646206e-06, + "loss": 1.1948, + "step": 340 + }, + { + "epoch": 2.75, + "grad_norm": 28.553592681884766, + "learning_rate": 8.746770250640327e-06, + "loss": 0.3979, + "step": 341 + }, + { + "epoch": 2.758064516129032, + "grad_norm": 58.9395751953125, + "learning_rate": 8.715419819634448e-06, + "loss": 8.2167, + "step": 342 + }, + { + "epoch": 2.7661290322580645, + "grad_norm": 36.5241584777832, + "learning_rate": 8.68406938862857e-06, + "loss": 0.5727, + "step": 343 + }, + { + "epoch": 2.774193548387097, + "grad_norm": 12.758500099182129, + "learning_rate": 8.65271895762269e-06, + "loss": 0.5117, + "step": 344 + }, + { + "epoch": 2.782258064516129, + "grad_norm": 83.17584228515625, + "learning_rate": 8.62136852661681e-06, + "loss": 2.2623, + "step": 345 + }, + { + "epoch": 2.790322580645161, + "grad_norm": 26.435558319091797, + "learning_rate": 8.590018095610931e-06, + "loss": 0.2321, + "step": 346 + }, + { + "epoch": 2.7983870967741935, + "grad_norm": 50.01573181152344, + "learning_rate": 8.558667664605052e-06, + "loss": 0.9035, + "step": 347 + }, + { + "epoch": 2.806451612903226, + "grad_norm": 13.123492240905762, + "learning_rate": 8.527317233599173e-06, + "loss": 0.274, + "step": 348 + }, + { + "epoch": 2.814516129032258, + "grad_norm": 62.45608901977539, + "learning_rate": 8.495966802593292e-06, + "loss": 6.3547, + "step": 349 + }, + { + "epoch": 2.8225806451612905, + "grad_norm": 78.55066680908203, + "learning_rate": 8.464616371587414e-06, + "loss": 2.2642, + "step": 350 + }, + { + "epoch": 2.8306451612903225, + "grad_norm": 56.115962982177734, + "learning_rate": 8.433265940581535e-06, + "loss": 0.8852, + "step": 351 + }, + { + "epoch": 2.838709677419355, + "grad_norm": 36.07647705078125, + "learning_rate": 8.401915509575656e-06, + "loss": 2.3096, + "step": 352 + }, + { + "epoch": 2.846774193548387, + "grad_norm": 37.17245864868164, + "learning_rate": 8.370565078569777e-06, + "loss": 0.8445, + "step": 353 + }, + { + "epoch": 2.8548387096774195, + "grad_norm": 146.50592041015625, + "learning_rate": 8.339214647563896e-06, + "loss": 7.5497, + "step": 354 + }, + { + "epoch": 2.8629032258064515, + "grad_norm": 122.4879150390625, + "learning_rate": 8.307864216558017e-06, + "loss": 6.0334, + "step": 355 + }, + { + "epoch": 2.870967741935484, + "grad_norm": 32.15070724487305, + "learning_rate": 8.276513785552138e-06, + "loss": 0.3793, + "step": 356 + }, + { + "epoch": 2.879032258064516, + "grad_norm": 143.58071899414062, + "learning_rate": 8.24516335454626e-06, + "loss": 16.1946, + "step": 357 + }, + { + "epoch": 2.8870967741935485, + "grad_norm": 24.55172348022461, + "learning_rate": 8.21381292354038e-06, + "loss": 0.178, + "step": 358 + }, + { + "epoch": 2.8951612903225805, + "grad_norm": 24.15800666809082, + "learning_rate": 8.1824624925345e-06, + "loss": 0.5304, + "step": 359 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 16.40801239013672, + "learning_rate": 8.15111206152862e-06, + "loss": 0.7086, + "step": 360 + }, + { + "epoch": 2.911290322580645, + "grad_norm": 144.3972930908203, + "learning_rate": 8.119761630522742e-06, + "loss": 15.8248, + "step": 361 + }, + { + "epoch": 2.9193548387096775, + "grad_norm": 56.63901901245117, + "learning_rate": 8.088411199516863e-06, + "loss": 0.8383, + "step": 362 + }, + { + "epoch": 2.9274193548387095, + "grad_norm": 36.86946487426758, + "learning_rate": 8.057060768510982e-06, + "loss": 0.4044, + "step": 363 + }, + { + "epoch": 2.935483870967742, + "grad_norm": 54.56624984741211, + "learning_rate": 8.025710337505103e-06, + "loss": 0.8564, + "step": 364 + }, + { + "epoch": 2.943548387096774, + "grad_norm": 45.62172317504883, + "learning_rate": 7.994359906499224e-06, + "loss": 0.8052, + "step": 365 + }, + { + "epoch": 2.9516129032258065, + "grad_norm": 28.312416076660156, + "learning_rate": 7.963009475493345e-06, + "loss": 0.6061, + "step": 366 + }, + { + "epoch": 2.959677419354839, + "grad_norm": 47.32063293457031, + "learning_rate": 7.931659044487467e-06, + "loss": 0.7651, + "step": 367 + }, + { + "epoch": 2.967741935483871, + "grad_norm": 93.73538208007812, + "learning_rate": 7.900308613481586e-06, + "loss": 1.4781, + "step": 368 + }, + { + "epoch": 2.975806451612903, + "grad_norm": 30.8656063079834, + "learning_rate": 7.868958182475707e-06, + "loss": 0.3955, + "step": 369 + }, + { + "epoch": 2.9838709677419355, + "grad_norm": 11.793440818786621, + "learning_rate": 7.837607751469828e-06, + "loss": 0.3443, + "step": 370 + }, + { + "epoch": 2.991935483870968, + "grad_norm": 47.234580993652344, + "learning_rate": 7.806257320463949e-06, + "loss": 2.2459, + "step": 371 + }, + { + "epoch": 3.0, + "grad_norm": 330.5805969238281, + "learning_rate": 7.77490688945807e-06, + "loss": 27.661, + "step": 372 + }, + { + "epoch": 3.0, + "eval_loss": 3.361875057220459, + "eval_mae": 1.126670479774475, + "eval_mse": 3.3618738651275635, + "eval_r2": 0.07957738637924194, + "eval_rmse": 1.8335413453553655, + "eval_runtime": 1.3777, + "eval_samples_per_second": 39.921, + "eval_smape": 52.641284465789795, + "eval_steps_per_second": 10.162, + "step": 372 + }, + { + "epoch": 3.0080645161290325, + "grad_norm": 75.24702453613281, + "learning_rate": 7.74355645845219e-06, + "loss": 1.7123, + "step": 373 + }, + { + "epoch": 3.0161290322580645, + "grad_norm": 127.73233795166016, + "learning_rate": 7.71220602744631e-06, + "loss": 7.3851, + "step": 374 + }, + { + "epoch": 3.024193548387097, + "grad_norm": 13.305014610290527, + "learning_rate": 7.680855596440432e-06, + "loss": 0.2886, + "step": 375 + }, + { + "epoch": 3.032258064516129, + "grad_norm": 41.47939682006836, + "learning_rate": 7.649505165434553e-06, + "loss": 1.5766, + "step": 376 + }, + { + "epoch": 3.0403225806451615, + "grad_norm": 5.5650177001953125, + "learning_rate": 7.618154734428672e-06, + "loss": 0.1177, + "step": 377 + }, + { + "epoch": 3.0483870967741935, + "grad_norm": 60.55559539794922, + "learning_rate": 7.586804303422793e-06, + "loss": 1.4604, + "step": 378 + }, + { + "epoch": 3.056451612903226, + "grad_norm": 45.52365493774414, + "learning_rate": 7.555453872416914e-06, + "loss": 0.946, + "step": 379 + }, + { + "epoch": 3.064516129032258, + "grad_norm": 20.433025360107422, + "learning_rate": 7.524103441411034e-06, + "loss": 0.6702, + "step": 380 + }, + { + "epoch": 3.0725806451612905, + "grad_norm": 218.40994262695312, + "learning_rate": 7.492753010405155e-06, + "loss": 12.6873, + "step": 381 + }, + { + "epoch": 3.0806451612903225, + "grad_norm": 50.64395523071289, + "learning_rate": 7.461402579399276e-06, + "loss": 1.67, + "step": 382 + }, + { + "epoch": 3.088709677419355, + "grad_norm": 29.460969924926758, + "learning_rate": 7.430052148393397e-06, + "loss": 0.5095, + "step": 383 + }, + { + "epoch": 3.096774193548387, + "grad_norm": 31.18248176574707, + "learning_rate": 7.398701717387517e-06, + "loss": 1.3069, + "step": 384 + }, + { + "epoch": 3.1048387096774195, + "grad_norm": 71.00679779052734, + "learning_rate": 7.367351286381638e-06, + "loss": 2.2479, + "step": 385 + }, + { + "epoch": 3.1129032258064515, + "grad_norm": 23.04047393798828, + "learning_rate": 7.336000855375759e-06, + "loss": 0.2883, + "step": 386 + }, + { + "epoch": 3.120967741935484, + "grad_norm": 30.270566940307617, + "learning_rate": 7.304650424369879e-06, + "loss": 0.4375, + "step": 387 + }, + { + "epoch": 3.129032258064516, + "grad_norm": 19.1580867767334, + "learning_rate": 7.273299993364e-06, + "loss": 0.5482, + "step": 388 + }, + { + "epoch": 3.1370967741935485, + "grad_norm": 26.573993682861328, + "learning_rate": 7.2419495623581205e-06, + "loss": 0.3888, + "step": 389 + }, + { + "epoch": 3.1451612903225805, + "grad_norm": 43.726409912109375, + "learning_rate": 7.210599131352242e-06, + "loss": 0.6747, + "step": 390 + }, + { + "epoch": 3.153225806451613, + "grad_norm": 100.00196075439453, + "learning_rate": 7.179248700346363e-06, + "loss": 10.1746, + "step": 391 + }, + { + "epoch": 3.161290322580645, + "grad_norm": 17.870851516723633, + "learning_rate": 7.147898269340483e-06, + "loss": 0.8346, + "step": 392 + }, + { + "epoch": 3.1693548387096775, + "grad_norm": 11.3032865524292, + "learning_rate": 7.116547838334604e-06, + "loss": 0.0478, + "step": 393 + }, + { + "epoch": 3.1774193548387095, + "grad_norm": 8.371842384338379, + "learning_rate": 7.085197407328724e-06, + "loss": 0.1455, + "step": 394 + }, + { + "epoch": 3.185483870967742, + "grad_norm": 29.164669036865234, + "learning_rate": 7.053846976322845e-06, + "loss": 0.5291, + "step": 395 + }, + { + "epoch": 3.193548387096774, + "grad_norm": 16.50055694580078, + "learning_rate": 7.022496545316965e-06, + "loss": 0.0949, + "step": 396 + }, + { + "epoch": 3.2016129032258065, + "grad_norm": 11.366058349609375, + "learning_rate": 6.9911461143110865e-06, + "loss": 0.4745, + "step": 397 + }, + { + "epoch": 3.2096774193548385, + "grad_norm": 50.085636138916016, + "learning_rate": 6.9597956833052075e-06, + "loss": 1.0656, + "step": 398 + }, + { + "epoch": 3.217741935483871, + "grad_norm": 175.33628845214844, + "learning_rate": 6.928445252299328e-06, + "loss": 17.953, + "step": 399 + }, + { + "epoch": 3.225806451612903, + "grad_norm": 166.4563446044922, + "learning_rate": 6.897094821293449e-06, + "loss": 10.9431, + "step": 400 + }, + { + "epoch": 3.2338709677419355, + "grad_norm": 49.0633430480957, + "learning_rate": 6.865744390287569e-06, + "loss": 0.9611, + "step": 401 + }, + { + "epoch": 3.241935483870968, + "grad_norm": 185.53343200683594, + "learning_rate": 6.83439395928169e-06, + "loss": 5.934, + "step": 402 + }, + { + "epoch": 3.25, + "grad_norm": 146.576171875, + "learning_rate": 6.80304352827581e-06, + "loss": 3.7093, + "step": 403 + }, + { + "epoch": 3.258064516129032, + "grad_norm": 9.762494087219238, + "learning_rate": 6.771693097269931e-06, + "loss": 0.125, + "step": 404 + }, + { + "epoch": 3.2661290322580645, + "grad_norm": 37.09442901611328, + "learning_rate": 6.740342666264052e-06, + "loss": 0.3642, + "step": 405 + }, + { + "epoch": 3.274193548387097, + "grad_norm": 196.52310180664062, + "learning_rate": 6.708992235258173e-06, + "loss": 14.157, + "step": 406 + }, + { + "epoch": 3.282258064516129, + "grad_norm": 576.5252075195312, + "learning_rate": 6.677641804252294e-06, + "loss": 3.4606, + "step": 407 + }, + { + "epoch": 3.2903225806451615, + "grad_norm": 70.45011901855469, + "learning_rate": 6.646291373246414e-06, + "loss": 4.9324, + "step": 408 + }, + { + "epoch": 3.2983870967741935, + "grad_norm": 37.71321487426758, + "learning_rate": 6.614940942240535e-06, + "loss": 1.5522, + "step": 409 + }, + { + "epoch": 3.306451612903226, + "grad_norm": 19.980318069458008, + "learning_rate": 6.583590511234655e-06, + "loss": 0.7383, + "step": 410 + }, + { + "epoch": 3.314516129032258, + "grad_norm": 29.11577606201172, + "learning_rate": 6.552240080228776e-06, + "loss": 0.587, + "step": 411 + }, + { + "epoch": 3.3225806451612905, + "grad_norm": 63.494163513183594, + "learning_rate": 6.520889649222897e-06, + "loss": 1.7971, + "step": 412 + }, + { + "epoch": 3.3306451612903225, + "grad_norm": 11.22016429901123, + "learning_rate": 6.4895392182170175e-06, + "loss": 0.0712, + "step": 413 + }, + { + "epoch": 3.338709677419355, + "grad_norm": 83.33367919921875, + "learning_rate": 6.4581887872111386e-06, + "loss": 1.6527, + "step": 414 + }, + { + "epoch": 3.346774193548387, + "grad_norm": 14.314624786376953, + "learning_rate": 6.426838356205259e-06, + "loss": 0.2666, + "step": 415 + }, + { + "epoch": 3.3548387096774195, + "grad_norm": 49.528526306152344, + "learning_rate": 6.39548792519938e-06, + "loss": 0.4909, + "step": 416 + }, + { + "epoch": 3.3629032258064515, + "grad_norm": 101.2572021484375, + "learning_rate": 6.364137494193499e-06, + "loss": 0.998, + "step": 417 + }, + { + "epoch": 3.370967741935484, + "grad_norm": 42.62125778198242, + "learning_rate": 6.332787063187621e-06, + "loss": 0.5947, + "step": 418 + }, + { + "epoch": 3.379032258064516, + "grad_norm": 38.499935150146484, + "learning_rate": 6.301436632181742e-06, + "loss": 0.3158, + "step": 419 + }, + { + "epoch": 3.3870967741935485, + "grad_norm": 163.30111694335938, + "learning_rate": 6.270086201175862e-06, + "loss": 6.147, + "step": 420 + }, + { + "epoch": 3.3951612903225805, + "grad_norm": 34.82032012939453, + "learning_rate": 6.2387357701699835e-06, + "loss": 0.7495, + "step": 421 + }, + { + "epoch": 3.403225806451613, + "grad_norm": 24.731409072875977, + "learning_rate": 6.207385339164103e-06, + "loss": 0.876, + "step": 422 + }, + { + "epoch": 3.411290322580645, + "grad_norm": 218.20574951171875, + "learning_rate": 6.176034908158225e-06, + "loss": 10.9933, + "step": 423 + }, + { + "epoch": 3.4193548387096775, + "grad_norm": 288.42236328125, + "learning_rate": 6.144684477152344e-06, + "loss": 16.9427, + "step": 424 + }, + { + "epoch": 3.4274193548387095, + "grad_norm": 12.138355255126953, + "learning_rate": 6.113334046146466e-06, + "loss": 0.2813, + "step": 425 + }, + { + "epoch": 3.435483870967742, + "grad_norm": 66.90897369384766, + "learning_rate": 6.081983615140587e-06, + "loss": 1.1357, + "step": 426 + }, + { + "epoch": 3.443548387096774, + "grad_norm": 32.00901412963867, + "learning_rate": 6.0506331841347064e-06, + "loss": 0.3149, + "step": 427 + }, + { + "epoch": 3.4516129032258065, + "grad_norm": 28.868900299072266, + "learning_rate": 6.019282753128828e-06, + "loss": 0.3611, + "step": 428 + }, + { + "epoch": 3.4596774193548385, + "grad_norm": 15.651339530944824, + "learning_rate": 5.987932322122948e-06, + "loss": 0.119, + "step": 429 + }, + { + "epoch": 3.467741935483871, + "grad_norm": 11.72746753692627, + "learning_rate": 5.956581891117069e-06, + "loss": 0.1911, + "step": 430 + }, + { + "epoch": 3.475806451612903, + "grad_norm": 218.60215759277344, + "learning_rate": 5.925231460111189e-06, + "loss": 9.1296, + "step": 431 + }, + { + "epoch": 3.4838709677419355, + "grad_norm": 124.23365783691406, + "learning_rate": 5.89388102910531e-06, + "loss": 3.0086, + "step": 432 + }, + { + "epoch": 3.491935483870968, + "grad_norm": 15.99737548828125, + "learning_rate": 5.862530598099432e-06, + "loss": 0.4271, + "step": 433 + }, + { + "epoch": 3.5, + "grad_norm": 100.8034439086914, + "learning_rate": 5.831180167093551e-06, + "loss": 1.3045, + "step": 434 + }, + { + "epoch": 3.508064516129032, + "grad_norm": 16.803627014160156, + "learning_rate": 5.799829736087672e-06, + "loss": 0.3571, + "step": 435 + }, + { + "epoch": 3.5161290322580645, + "grad_norm": 9.734840393066406, + "learning_rate": 5.768479305081793e-06, + "loss": 0.0472, + "step": 436 + }, + { + "epoch": 3.524193548387097, + "grad_norm": 19.890609741210938, + "learning_rate": 5.737128874075914e-06, + "loss": 0.2308, + "step": 437 + }, + { + "epoch": 3.532258064516129, + "grad_norm": 59.4325065612793, + "learning_rate": 5.705778443070034e-06, + "loss": 0.8228, + "step": 438 + }, + { + "epoch": 3.540322580645161, + "grad_norm": 29.798025131225586, + "learning_rate": 5.674428012064155e-06, + "loss": 0.3622, + "step": 439 + }, + { + "epoch": 3.5483870967741935, + "grad_norm": 77.82572937011719, + "learning_rate": 5.643077581058276e-06, + "loss": 4.0208, + "step": 440 + }, + { + "epoch": 3.556451612903226, + "grad_norm": 54.52943420410156, + "learning_rate": 5.611727150052396e-06, + "loss": 0.9624, + "step": 441 + }, + { + "epoch": 3.564516129032258, + "grad_norm": 45.591339111328125, + "learning_rate": 5.580376719046517e-06, + "loss": 0.5806, + "step": 442 + }, + { + "epoch": 3.5725806451612905, + "grad_norm": 135.01614379882812, + "learning_rate": 5.5490262880406375e-06, + "loss": 1.8653, + "step": 443 + }, + { + "epoch": 3.5806451612903225, + "grad_norm": 17.62562370300293, + "learning_rate": 5.5176758570347585e-06, + "loss": 0.565, + "step": 444 + }, + { + "epoch": 3.588709677419355, + "grad_norm": 15.431583404541016, + "learning_rate": 5.48632542602888e-06, + "loss": 0.8572, + "step": 445 + }, + { + "epoch": 3.596774193548387, + "grad_norm": 228.9459991455078, + "learning_rate": 5.454974995023e-06, + "loss": 12.251, + "step": 446 + }, + { + "epoch": 3.6048387096774195, + "grad_norm": 92.89791107177734, + "learning_rate": 5.423624564017121e-06, + "loss": 6.3119, + "step": 447 + }, + { + "epoch": 3.6129032258064515, + "grad_norm": 117.79043579101562, + "learning_rate": 5.392274133011241e-06, + "loss": 1.9855, + "step": 448 + }, + { + "epoch": 3.620967741935484, + "grad_norm": 59.39741516113281, + "learning_rate": 5.360923702005362e-06, + "loss": 0.574, + "step": 449 + }, + { + "epoch": 3.629032258064516, + "grad_norm": 30.700626373291016, + "learning_rate": 5.329573270999482e-06, + "loss": 0.7384, + "step": 450 + }, + { + "epoch": 3.6370967741935485, + "grad_norm": 6.502930641174316, + "learning_rate": 5.298222839993603e-06, + "loss": 0.0293, + "step": 451 + }, + { + "epoch": 3.6451612903225805, + "grad_norm": 59.104400634765625, + "learning_rate": 5.2668724089877245e-06, + "loss": 0.5975, + "step": 452 + }, + { + "epoch": 3.653225806451613, + "grad_norm": 29.07902717590332, + "learning_rate": 5.235521977981845e-06, + "loss": 0.3861, + "step": 453 + }, + { + "epoch": 3.661290322580645, + "grad_norm": 154.84112548828125, + "learning_rate": 5.204171546975966e-06, + "loss": 10.357, + "step": 454 + }, + { + "epoch": 3.6693548387096775, + "grad_norm": 252.0116424560547, + "learning_rate": 5.172821115970086e-06, + "loss": 14.5254, + "step": 455 + }, + { + "epoch": 3.6774193548387095, + "grad_norm": 153.28329467773438, + "learning_rate": 5.141470684964207e-06, + "loss": 2.4753, + "step": 456 + }, + { + "epoch": 3.685483870967742, + "grad_norm": 22.697925567626953, + "learning_rate": 5.110120253958327e-06, + "loss": 0.1632, + "step": 457 + }, + { + "epoch": 3.693548387096774, + "grad_norm": 153.0172882080078, + "learning_rate": 5.078769822952448e-06, + "loss": 7.6211, + "step": 458 + }, + { + "epoch": 3.7016129032258065, + "grad_norm": 10.542808532714844, + "learning_rate": 5.047419391946569e-06, + "loss": 0.1378, + "step": 459 + }, + { + "epoch": 3.709677419354839, + "grad_norm": 50.96455001831055, + "learning_rate": 5.0160689609406896e-06, + "loss": 0.4241, + "step": 460 + }, + { + "epoch": 3.717741935483871, + "grad_norm": 41.19651412963867, + "learning_rate": 4.984718529934811e-06, + "loss": 0.3353, + "step": 461 + }, + { + "epoch": 3.725806451612903, + "grad_norm": 62.61945343017578, + "learning_rate": 4.953368098928931e-06, + "loss": 1.7132, + "step": 462 + }, + { + "epoch": 3.7338709677419355, + "grad_norm": 50.97965621948242, + "learning_rate": 4.922017667923052e-06, + "loss": 0.9171, + "step": 463 + }, + { + "epoch": 3.741935483870968, + "grad_norm": 171.7479248046875, + "learning_rate": 4.890667236917172e-06, + "loss": 11.7453, + "step": 464 + }, + { + "epoch": 3.75, + "grad_norm": 17.77037239074707, + "learning_rate": 4.859316805911293e-06, + "loss": 0.2725, + "step": 465 + }, + { + "epoch": 3.758064516129032, + "grad_norm": 1127.8482666015625, + "learning_rate": 4.827966374905413e-06, + "loss": 116.4141, + "step": 466 + }, + { + "epoch": 3.7661290322580645, + "grad_norm": 67.08070373535156, + "learning_rate": 4.7966159438995345e-06, + "loss": 1.17, + "step": 467 + }, + { + "epoch": 3.774193548387097, + "grad_norm": 182.09410095214844, + "learning_rate": 4.765265512893655e-06, + "loss": 7.6538, + "step": 468 + }, + { + "epoch": 3.782258064516129, + "grad_norm": 53.98783493041992, + "learning_rate": 4.733915081887777e-06, + "loss": 0.8628, + "step": 469 + }, + { + "epoch": 3.790322580645161, + "grad_norm": 28.54851722717285, + "learning_rate": 4.702564650881897e-06, + "loss": 0.1568, + "step": 470 + }, + { + "epoch": 3.7983870967741935, + "grad_norm": 73.85025024414062, + "learning_rate": 4.671214219876017e-06, + "loss": 3.8978, + "step": 471 + }, + { + "epoch": 3.806451612903226, + "grad_norm": 20.776636123657227, + "learning_rate": 4.639863788870138e-06, + "loss": 0.2737, + "step": 472 + }, + { + "epoch": 3.814516129032258, + "grad_norm": 109.15306854248047, + "learning_rate": 4.608513357864258e-06, + "loss": 0.9342, + "step": 473 + }, + { + "epoch": 3.8225806451612905, + "grad_norm": 67.26899719238281, + "learning_rate": 4.577162926858379e-06, + "loss": 1.0592, + "step": 474 + }, + { + "epoch": 3.8306451612903225, + "grad_norm": 61.645511627197266, + "learning_rate": 4.5458124958524996e-06, + "loss": 1.381, + "step": 475 + }, + { + "epoch": 3.838709677419355, + "grad_norm": 357.5131530761719, + "learning_rate": 4.514462064846621e-06, + "loss": 6.9558, + "step": 476 + }, + { + "epoch": 3.846774193548387, + "grad_norm": 52.92768478393555, + "learning_rate": 4.483111633840742e-06, + "loss": 1.4706, + "step": 477 + }, + { + "epoch": 3.8548387096774195, + "grad_norm": 145.08580017089844, + "learning_rate": 4.451761202834862e-06, + "loss": 4.1149, + "step": 478 + }, + { + "epoch": 3.8629032258064515, + "grad_norm": 12.895330429077148, + "learning_rate": 4.420410771828983e-06, + "loss": 0.2032, + "step": 479 + }, + { + "epoch": 3.870967741935484, + "grad_norm": 99.95780181884766, + "learning_rate": 4.389060340823103e-06, + "loss": 3.0421, + "step": 480 + }, + { + "epoch": 3.879032258064516, + "grad_norm": 13.724507331848145, + "learning_rate": 4.357709909817224e-06, + "loss": 0.1361, + "step": 481 + }, + { + "epoch": 3.8870967741935485, + "grad_norm": 179.6150665283203, + "learning_rate": 4.326359478811345e-06, + "loss": 2.9513, + "step": 482 + }, + { + "epoch": 3.8951612903225805, + "grad_norm": 37.32778549194336, + "learning_rate": 4.2950090478054655e-06, + "loss": 1.0036, + "step": 483 + }, + { + "epoch": 3.903225806451613, + "grad_norm": 121.20023345947266, + "learning_rate": 4.2636586167995866e-06, + "loss": 12.377, + "step": 484 + }, + { + "epoch": 3.911290322580645, + "grad_norm": 77.21897888183594, + "learning_rate": 4.232308185793707e-06, + "loss": 0.9028, + "step": 485 + }, + { + "epoch": 3.9193548387096775, + "grad_norm": 28.58183479309082, + "learning_rate": 4.200957754787828e-06, + "loss": 0.2499, + "step": 486 + }, + { + "epoch": 3.9274193548387095, + "grad_norm": 40.57847595214844, + "learning_rate": 4.169607323781948e-06, + "loss": 0.5026, + "step": 487 + }, + { + "epoch": 3.935483870967742, + "grad_norm": 29.6917781829834, + "learning_rate": 4.138256892776069e-06, + "loss": 0.4407, + "step": 488 + }, + { + "epoch": 3.943548387096774, + "grad_norm": 207.91818237304688, + "learning_rate": 4.10690646177019e-06, + "loss": 8.9606, + "step": 489 + }, + { + "epoch": 3.9516129032258065, + "grad_norm": 57.388397216796875, + "learning_rate": 4.07555603076431e-06, + "loss": 1.1955, + "step": 490 + }, + { + "epoch": 3.959677419354839, + "grad_norm": 39.93169403076172, + "learning_rate": 4.0442055997584314e-06, + "loss": 0.6821, + "step": 491 + }, + { + "epoch": 3.967741935483871, + "grad_norm": 115.96873474121094, + "learning_rate": 4.012855168752552e-06, + "loss": 1.2737, + "step": 492 + }, + { + "epoch": 3.975806451612903, + "grad_norm": 4.700145721435547, + "learning_rate": 3.981504737746673e-06, + "loss": 0.1307, + "step": 493 + }, + { + "epoch": 3.9838709677419355, + "grad_norm": 226.87930297851562, + "learning_rate": 3.950154306740793e-06, + "loss": 16.7857, + "step": 494 + }, + { + "epoch": 3.991935483870968, + "grad_norm": 22.20536994934082, + "learning_rate": 3.918803875734914e-06, + "loss": 0.4253, + "step": 495 + }, + { + "epoch": 4.0, + "grad_norm": 22.875497817993164, + "learning_rate": 3.887453444729035e-06, + "loss": 0.0923, + "step": 496 + }, + { + "epoch": 4.0, + "eval_loss": 2.7594592571258545, + "eval_mae": 1.0400739908218384, + "eval_mse": 2.7594590187072754, + "eval_r2": 0.24450808763504028, + "eval_rmse": 1.6611619483684532, + "eval_runtime": 1.3594, + "eval_samples_per_second": 40.46, + "eval_smape": 53.00933122634888, + "eval_steps_per_second": 10.299, + "step": 496 + }, + { + "epoch": 4.008064516129032, + "grad_norm": 12.457742691040039, + "learning_rate": 3.856103013723155e-06, + "loss": 0.1641, + "step": 497 + }, + { + "epoch": 4.016129032258065, + "grad_norm": 14.920323371887207, + "learning_rate": 3.824752582717276e-06, + "loss": 0.2707, + "step": 498 + }, + { + "epoch": 4.024193548387097, + "grad_norm": 29.544240951538086, + "learning_rate": 3.7934021517113965e-06, + "loss": 0.3924, + "step": 499 + }, + { + "epoch": 4.032258064516129, + "grad_norm": 38.91734313964844, + "learning_rate": 3.762051720705517e-06, + "loss": 0.9084, + "step": 500 + }, + { + "epoch": 4.040322580645161, + "grad_norm": 31.240724563598633, + "learning_rate": 3.730701289699638e-06, + "loss": 0.5555, + "step": 501 + }, + { + "epoch": 4.048387096774194, + "grad_norm": 94.12042236328125, + "learning_rate": 3.6993508586937585e-06, + "loss": 1.486, + "step": 502 + }, + { + "epoch": 4.056451612903226, + "grad_norm": 53.03417205810547, + "learning_rate": 3.6680004276878795e-06, + "loss": 0.4751, + "step": 503 + }, + { + "epoch": 4.064516129032258, + "grad_norm": 14.563098907470703, + "learning_rate": 3.636649996682e-06, + "loss": 0.1335, + "step": 504 + }, + { + "epoch": 4.07258064516129, + "grad_norm": 12.984607696533203, + "learning_rate": 3.605299565676121e-06, + "loss": 0.2579, + "step": 505 + }, + { + "epoch": 4.080645161290323, + "grad_norm": 13.699532508850098, + "learning_rate": 3.5739491346702414e-06, + "loss": 0.2477, + "step": 506 + }, + { + "epoch": 4.088709677419355, + "grad_norm": 94.13684844970703, + "learning_rate": 3.542598703664362e-06, + "loss": 3.815, + "step": 507 + }, + { + "epoch": 4.096774193548387, + "grad_norm": 19.701648712158203, + "learning_rate": 3.5112482726584827e-06, + "loss": 0.106, + "step": 508 + }, + { + "epoch": 4.104838709677419, + "grad_norm": 29.861709594726562, + "learning_rate": 3.4798978416526038e-06, + "loss": 0.3138, + "step": 509 + }, + { + "epoch": 4.112903225806452, + "grad_norm": 55.61738967895508, + "learning_rate": 3.4485474106467244e-06, + "loss": 0.6359, + "step": 510 + }, + { + "epoch": 4.120967741935484, + "grad_norm": 93.4219970703125, + "learning_rate": 3.417196979640845e-06, + "loss": 1.7865, + "step": 511 + }, + { + "epoch": 4.129032258064516, + "grad_norm": 85.9775619506836, + "learning_rate": 3.3858465486349657e-06, + "loss": 0.8047, + "step": 512 + }, + { + "epoch": 4.137096774193548, + "grad_norm": 183.13255310058594, + "learning_rate": 3.3544961176290863e-06, + "loss": 7.5678, + "step": 513 + }, + { + "epoch": 4.145161290322581, + "grad_norm": 248.67733764648438, + "learning_rate": 3.323145686623207e-06, + "loss": 7.1086, + "step": 514 + }, + { + "epoch": 4.153225806451613, + "grad_norm": 20.28827667236328, + "learning_rate": 3.2917952556173276e-06, + "loss": 0.1826, + "step": 515 + }, + { + "epoch": 4.161290322580645, + "grad_norm": 43.15050506591797, + "learning_rate": 3.2604448246114486e-06, + "loss": 0.4589, + "step": 516 + }, + { + "epoch": 4.169354838709677, + "grad_norm": 57.90827941894531, + "learning_rate": 3.2290943936055693e-06, + "loss": 0.7951, + "step": 517 + }, + { + "epoch": 4.17741935483871, + "grad_norm": 122.35932922363281, + "learning_rate": 3.19774396259969e-06, + "loss": 3.4445, + "step": 518 + }, + { + "epoch": 4.185483870967742, + "grad_norm": 34.796329498291016, + "learning_rate": 3.1663935315938106e-06, + "loss": 1.4262, + "step": 519 + }, + { + "epoch": 4.193548387096774, + "grad_norm": 165.61900329589844, + "learning_rate": 3.135043100587931e-06, + "loss": 11.7323, + "step": 520 + }, + { + "epoch": 4.201612903225806, + "grad_norm": 22.81295394897461, + "learning_rate": 3.1036926695820514e-06, + "loss": 0.2421, + "step": 521 + }, + { + "epoch": 4.209677419354839, + "grad_norm": 29.325931549072266, + "learning_rate": 3.072342238576172e-06, + "loss": 1.1239, + "step": 522 + }, + { + "epoch": 4.217741935483871, + "grad_norm": 51.42033386230469, + "learning_rate": 3.0409918075702935e-06, + "loss": 0.7285, + "step": 523 + }, + { + "epoch": 4.225806451612903, + "grad_norm": 29.262659072875977, + "learning_rate": 3.009641376564414e-06, + "loss": 0.7601, + "step": 524 + }, + { + "epoch": 4.233870967741935, + "grad_norm": 94.11527252197266, + "learning_rate": 2.9782909455585344e-06, + "loss": 4.2293, + "step": 525 + }, + { + "epoch": 4.241935483870968, + "grad_norm": 42.665855407714844, + "learning_rate": 2.946940514552655e-06, + "loss": 0.4546, + "step": 526 + }, + { + "epoch": 4.25, + "grad_norm": 40.39009475708008, + "learning_rate": 2.9155900835467757e-06, + "loss": 0.3513, + "step": 527 + }, + { + "epoch": 4.258064516129032, + "grad_norm": 11.640003204345703, + "learning_rate": 2.8842396525408963e-06, + "loss": 0.4406, + "step": 528 + }, + { + "epoch": 4.266129032258064, + "grad_norm": 102.39125061035156, + "learning_rate": 2.852889221535017e-06, + "loss": 2.7261, + "step": 529 + }, + { + "epoch": 4.274193548387097, + "grad_norm": 26.152294158935547, + "learning_rate": 2.821538790529138e-06, + "loss": 0.1906, + "step": 530 + }, + { + "epoch": 4.282258064516129, + "grad_norm": 20.90414047241211, + "learning_rate": 2.7901883595232586e-06, + "loss": 0.2463, + "step": 531 + }, + { + "epoch": 4.290322580645161, + "grad_norm": 62.55961990356445, + "learning_rate": 2.7588379285173793e-06, + "loss": 0.4813, + "step": 532 + }, + { + "epoch": 4.298387096774194, + "grad_norm": 53.30845260620117, + "learning_rate": 2.7274874975115e-06, + "loss": 1.1314, + "step": 533 + }, + { + "epoch": 4.306451612903226, + "grad_norm": 14.514229774475098, + "learning_rate": 2.6961370665056205e-06, + "loss": 0.5306, + "step": 534 + }, + { + "epoch": 4.314516129032258, + "grad_norm": 42.68838119506836, + "learning_rate": 2.664786635499741e-06, + "loss": 0.6835, + "step": 535 + }, + { + "epoch": 4.32258064516129, + "grad_norm": 52.57284164428711, + "learning_rate": 2.6334362044938622e-06, + "loss": 0.8247, + "step": 536 + }, + { + "epoch": 4.330645161290323, + "grad_norm": 65.6913070678711, + "learning_rate": 2.602085773487983e-06, + "loss": 1.043, + "step": 537 + }, + { + "epoch": 4.338709677419355, + "grad_norm": 26.719701766967773, + "learning_rate": 2.5707353424821035e-06, + "loss": 0.2364, + "step": 538 + }, + { + "epoch": 4.346774193548387, + "grad_norm": 64.74887084960938, + "learning_rate": 2.539384911476224e-06, + "loss": 0.5677, + "step": 539 + }, + { + "epoch": 4.354838709677419, + "grad_norm": 34.147438049316406, + "learning_rate": 2.5080344804703448e-06, + "loss": 0.6611, + "step": 540 + }, + { + "epoch": 4.362903225806452, + "grad_norm": 6.538976669311523, + "learning_rate": 2.4766840494644654e-06, + "loss": 0.0663, + "step": 541 + }, + { + "epoch": 4.370967741935484, + "grad_norm": 23.64634895324707, + "learning_rate": 2.445333618458586e-06, + "loss": 0.2443, + "step": 542 + }, + { + "epoch": 4.379032258064516, + "grad_norm": 42.18586349487305, + "learning_rate": 2.4139831874527067e-06, + "loss": 0.4006, + "step": 543 + }, + { + "epoch": 4.387096774193548, + "grad_norm": 213.9248504638672, + "learning_rate": 2.3826327564468273e-06, + "loss": 8.2247, + "step": 544 + }, + { + "epoch": 4.395161290322581, + "grad_norm": 7.639063835144043, + "learning_rate": 2.3512823254409484e-06, + "loss": 0.0266, + "step": 545 + }, + { + "epoch": 4.403225806451613, + "grad_norm": 13.752813339233398, + "learning_rate": 2.319931894435069e-06, + "loss": 0.1823, + "step": 546 + }, + { + "epoch": 4.411290322580645, + "grad_norm": 47.70906066894531, + "learning_rate": 2.2885814634291897e-06, + "loss": 0.9809, + "step": 547 + }, + { + "epoch": 4.419354838709677, + "grad_norm": 45.133907318115234, + "learning_rate": 2.2572310324233103e-06, + "loss": 1.2876, + "step": 548 + }, + { + "epoch": 4.42741935483871, + "grad_norm": 45.30345153808594, + "learning_rate": 2.225880601417431e-06, + "loss": 0.4589, + "step": 549 + }, + { + "epoch": 4.435483870967742, + "grad_norm": 143.42005920410156, + "learning_rate": 2.1945301704115516e-06, + "loss": 2.8116, + "step": 550 + }, + { + "epoch": 4.443548387096774, + "grad_norm": 43.46016311645508, + "learning_rate": 2.1631797394056726e-06, + "loss": 0.4703, + "step": 551 + }, + { + "epoch": 4.451612903225806, + "grad_norm": 129.6931610107422, + "learning_rate": 2.1318293083997933e-06, + "loss": 3.5121, + "step": 552 + }, + { + "epoch": 4.459677419354839, + "grad_norm": 21.558963775634766, + "learning_rate": 2.100478877393914e-06, + "loss": 0.3465, + "step": 553 + }, + { + "epoch": 4.467741935483871, + "grad_norm": 36.43219757080078, + "learning_rate": 2.0691284463880346e-06, + "loss": 0.6322, + "step": 554 + }, + { + "epoch": 4.475806451612903, + "grad_norm": 35.63532257080078, + "learning_rate": 2.037778015382155e-06, + "loss": 0.2085, + "step": 555 + }, + { + "epoch": 4.483870967741936, + "grad_norm": 19.665674209594727, + "learning_rate": 2.006427584376276e-06, + "loss": 0.1428, + "step": 556 + }, + { + "epoch": 4.491935483870968, + "grad_norm": 26.67451286315918, + "learning_rate": 1.9750771533703965e-06, + "loss": 0.2116, + "step": 557 + }, + { + "epoch": 4.5, + "grad_norm": 231.28211975097656, + "learning_rate": 1.9437267223645175e-06, + "loss": 10.8706, + "step": 558 + }, + { + "epoch": 4.508064516129032, + "grad_norm": 16.69761085510254, + "learning_rate": 1.912376291358638e-06, + "loss": 0.0591, + "step": 559 + }, + { + "epoch": 4.516129032258064, + "grad_norm": 142.25442504882812, + "learning_rate": 1.8810258603527586e-06, + "loss": 6.6835, + "step": 560 + }, + { + "epoch": 4.524193548387097, + "grad_norm": 19.17723274230957, + "learning_rate": 1.8496754293468792e-06, + "loss": 0.0427, + "step": 561 + }, + { + "epoch": 4.532258064516129, + "grad_norm": 33.36142349243164, + "learning_rate": 1.818324998341e-06, + "loss": 0.5271, + "step": 562 + }, + { + "epoch": 4.540322580645161, + "grad_norm": 279.6239929199219, + "learning_rate": 1.7869745673351207e-06, + "loss": 7.1391, + "step": 563 + }, + { + "epoch": 4.548387096774194, + "grad_norm": 9.864730834960938, + "learning_rate": 1.7556241363292414e-06, + "loss": 0.2604, + "step": 564 + }, + { + "epoch": 4.556451612903226, + "grad_norm": 53.1966438293457, + "learning_rate": 1.7242737053233622e-06, + "loss": 0.9147, + "step": 565 + }, + { + "epoch": 4.564516129032258, + "grad_norm": 123.42327880859375, + "learning_rate": 1.6929232743174828e-06, + "loss": 1.8798, + "step": 566 + }, + { + "epoch": 4.57258064516129, + "grad_norm": 135.15972900390625, + "learning_rate": 1.6615728433116035e-06, + "loss": 1.6816, + "step": 567 + }, + { + "epoch": 4.580645161290323, + "grad_norm": 145.2718963623047, + "learning_rate": 1.6302224123057243e-06, + "loss": 7.8278, + "step": 568 + }, + { + "epoch": 4.588709677419355, + "grad_norm": 53.99338912963867, + "learning_rate": 1.598871981299845e-06, + "loss": 0.6349, + "step": 569 + }, + { + "epoch": 4.596774193548387, + "grad_norm": 196.27590942382812, + "learning_rate": 1.5675215502939656e-06, + "loss": 4.1074, + "step": 570 + }, + { + "epoch": 4.604838709677419, + "grad_norm": 18.781539916992188, + "learning_rate": 1.536171119288086e-06, + "loss": 0.8987, + "step": 571 + }, + { + "epoch": 4.612903225806452, + "grad_norm": 15.959029197692871, + "learning_rate": 1.504820688282207e-06, + "loss": 0.3374, + "step": 572 + }, + { + "epoch": 4.620967741935484, + "grad_norm": 77.43952178955078, + "learning_rate": 1.4734702572763275e-06, + "loss": 2.507, + "step": 573 + }, + { + "epoch": 4.629032258064516, + "grad_norm": 12.534307479858398, + "learning_rate": 1.4421198262704481e-06, + "loss": 0.07, + "step": 574 + }, + { + "epoch": 4.637096774193548, + "grad_norm": 62.35894775390625, + "learning_rate": 1.410769395264569e-06, + "loss": 1.2078, + "step": 575 + }, + { + "epoch": 4.645161290322581, + "grad_norm": 28.931297302246094, + "learning_rate": 1.3794189642586896e-06, + "loss": 0.4481, + "step": 576 + }, + { + "epoch": 4.653225806451613, + "grad_norm": 22.307531356811523, + "learning_rate": 1.3480685332528103e-06, + "loss": 0.2238, + "step": 577 + }, + { + "epoch": 4.661290322580645, + "grad_norm": 235.91934204101562, + "learning_rate": 1.3167181022469311e-06, + "loss": 12.7111, + "step": 578 + }, + { + "epoch": 4.669354838709677, + "grad_norm": 12.003568649291992, + "learning_rate": 1.2853676712410518e-06, + "loss": 0.3155, + "step": 579 + }, + { + "epoch": 4.67741935483871, + "grad_norm": 115.58039855957031, + "learning_rate": 1.2540172402351724e-06, + "loss": 2.4697, + "step": 580 + }, + { + "epoch": 4.685483870967742, + "grad_norm": 7.92896032333374, + "learning_rate": 1.222666809229293e-06, + "loss": 0.1071, + "step": 581 + }, + { + "epoch": 4.693548387096774, + "grad_norm": 62.47394943237305, + "learning_rate": 1.1913163782234137e-06, + "loss": 0.4068, + "step": 582 + }, + { + "epoch": 4.701612903225806, + "grad_norm": 64.04085540771484, + "learning_rate": 1.1599659472175345e-06, + "loss": 0.5876, + "step": 583 + }, + { + "epoch": 4.709677419354839, + "grad_norm": 167.1731719970703, + "learning_rate": 1.1286155162116552e-06, + "loss": 7.7562, + "step": 584 + }, + { + "epoch": 4.717741935483871, + "grad_norm": 110.1922836303711, + "learning_rate": 1.0972650852057758e-06, + "loss": 1.7847, + "step": 585 + }, + { + "epoch": 4.725806451612903, + "grad_norm": 144.95281982421875, + "learning_rate": 1.0659146541998966e-06, + "loss": 9.9987, + "step": 586 + }, + { + "epoch": 4.733870967741936, + "grad_norm": 26.671306610107422, + "learning_rate": 1.0345642231940173e-06, + "loss": 0.1145, + "step": 587 + }, + { + "epoch": 4.741935483870968, + "grad_norm": 14.88308048248291, + "learning_rate": 1.003213792188138e-06, + "loss": 0.2181, + "step": 588 + }, + { + "epoch": 4.75, + "grad_norm": 117.62973022460938, + "learning_rate": 9.718633611822588e-07, + "loss": 1.5278, + "step": 589 + }, + { + "epoch": 4.758064516129032, + "grad_norm": 130.04318237304688, + "learning_rate": 9.405129301763793e-07, + "loss": 0.4744, + "step": 590 + }, + { + "epoch": 4.766129032258064, + "grad_norm": 184.98939514160156, + "learning_rate": 9.091624991705e-07, + "loss": 3.6068, + "step": 591 + }, + { + "epoch": 4.774193548387097, + "grad_norm": 48.318660736083984, + "learning_rate": 8.778120681646207e-07, + "loss": 0.4334, + "step": 592 + }, + { + "epoch": 4.782258064516129, + "grad_norm": 70.92566680908203, + "learning_rate": 8.464616371587414e-07, + "loss": 0.6742, + "step": 593 + }, + { + "epoch": 4.790322580645161, + "grad_norm": 48.69011306762695, + "learning_rate": 8.151112061528622e-07, + "loss": 0.403, + "step": 594 + }, + { + "epoch": 4.798387096774194, + "grad_norm": 168.84817504882812, + "learning_rate": 7.837607751469828e-07, + "loss": 2.1519, + "step": 595 + }, + { + "epoch": 4.806451612903226, + "grad_norm": 12.404576301574707, + "learning_rate": 7.524103441411035e-07, + "loss": 0.0748, + "step": 596 + }, + { + "epoch": 4.814516129032258, + "grad_norm": 11.056885719299316, + "learning_rate": 7.210599131352241e-07, + "loss": 0.1587, + "step": 597 + }, + { + "epoch": 4.82258064516129, + "grad_norm": 176.21572875976562, + "learning_rate": 6.897094821293448e-07, + "loss": 2.582, + "step": 598 + }, + { + "epoch": 4.830645161290323, + "grad_norm": 233.09788513183594, + "learning_rate": 6.583590511234656e-07, + "loss": 2.2739, + "step": 599 + }, + { + "epoch": 4.838709677419355, + "grad_norm": 64.253662109375, + "learning_rate": 6.270086201175862e-07, + "loss": 3.2137, + "step": 600 + }, + { + "epoch": 4.846774193548387, + "grad_norm": 91.62824249267578, + "learning_rate": 5.956581891117068e-07, + "loss": 1.013, + "step": 601 + }, + { + "epoch": 4.854838709677419, + "grad_norm": 8.819452285766602, + "learning_rate": 5.643077581058276e-07, + "loss": 0.1805, + "step": 602 + }, + { + "epoch": 4.862903225806452, + "grad_norm": 166.83380126953125, + "learning_rate": 5.329573270999483e-07, + "loss": 1.2785, + "step": 603 + }, + { + "epoch": 4.870967741935484, + "grad_norm": 21.492931365966797, + "learning_rate": 5.01606896094069e-07, + "loss": 0.154, + "step": 604 + }, + { + "epoch": 4.879032258064516, + "grad_norm": 50.13455581665039, + "learning_rate": 4.7025646508818965e-07, + "loss": 0.406, + "step": 605 + }, + { + "epoch": 4.887096774193548, + "grad_norm": 259.16168212890625, + "learning_rate": 4.3890603408231034e-07, + "loss": 10.8365, + "step": 606 + }, + { + "epoch": 4.895161290322581, + "grad_norm": 119.88123321533203, + "learning_rate": 4.075556030764311e-07, + "loss": 1.7068, + "step": 607 + }, + { + "epoch": 4.903225806451613, + "grad_norm": 60.6947135925293, + "learning_rate": 3.7620517207055177e-07, + "loss": 0.558, + "step": 608 + }, + { + "epoch": 4.911290322580645, + "grad_norm": 760.4828491210938, + "learning_rate": 3.448547410646724e-07, + "loss": 108.7089, + "step": 609 + }, + { + "epoch": 4.919354838709677, + "grad_norm": 27.476360321044922, + "learning_rate": 3.135043100587931e-07, + "loss": 0.1452, + "step": 610 + }, + { + "epoch": 4.92741935483871, + "grad_norm": 47.5902214050293, + "learning_rate": 2.821538790529138e-07, + "loss": 0.8969, + "step": 611 + }, + { + "epoch": 4.935483870967742, + "grad_norm": 128.73582458496094, + "learning_rate": 2.508034480470345e-07, + "loss": 9.6038, + "step": 612 + }, + { + "epoch": 4.943548387096774, + "grad_norm": 288.2920227050781, + "learning_rate": 2.1945301704115517e-07, + "loss": 5.6554, + "step": 613 + }, + { + "epoch": 4.951612903225806, + "grad_norm": 22.636474609375, + "learning_rate": 1.8810258603527589e-07, + "loss": 0.6051, + "step": 614 + }, + { + "epoch": 4.959677419354839, + "grad_norm": 17.700489044189453, + "learning_rate": 1.5675215502939655e-07, + "loss": 0.3193, + "step": 615 + }, + { + "epoch": 4.967741935483871, + "grad_norm": 20.34910774230957, + "learning_rate": 1.2540172402351724e-07, + "loss": 0.0564, + "step": 616 + }, + { + "epoch": 4.975806451612903, + "grad_norm": 21.71570587158203, + "learning_rate": 9.405129301763794e-08, + "loss": 0.1759, + "step": 617 + }, + { + "epoch": 4.983870967741936, + "grad_norm": 9.695738792419434, + "learning_rate": 6.270086201175862e-08, + "loss": 0.1146, + "step": 618 + }, + { + "epoch": 4.991935483870968, + "grad_norm": 246.5233612060547, + "learning_rate": 3.135043100587931e-08, + "loss": 10.0593, + "step": 619 + }, + { + "epoch": 5.0, + "grad_norm": 9.63797378540039, + "learning_rate": 0.0, + "loss": 0.0203, + "step": 620 + }, + { + "epoch": 5.0, + "eval_loss": 2.733250379562378, + "eval_mae": 1.0786534547805786, + "eval_mse": 2.7332499027252197, + "eval_r2": 0.25168371200561523, + "eval_rmse": 1.6532543369745685, + "eval_runtime": 1.3402, + "eval_samples_per_second": 41.038, + "eval_smape": 54.187166690826416, + "eval_steps_per_second": 10.446, + "step": 620 + } + ], + "logging_steps": 1, + "max_steps": 620, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3359849068769280.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 1.9437267223645173e-05 + } +}