{ "best_metric": 0.5751292109489441, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.3508771929824561, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0017543859649122807, "grad_norm": 0.7633857131004333, "learning_rate": 1.004e-05, "loss": 0.6942, "step": 1 }, { "epoch": 0.0017543859649122807, "eval_loss": 0.9683788418769836, "eval_runtime": 18.1228, "eval_samples_per_second": 13.243, "eval_steps_per_second": 3.311, "step": 1 }, { "epoch": 0.0035087719298245615, "grad_norm": 0.6051109433174133, "learning_rate": 2.008e-05, "loss": 0.4636, "step": 2 }, { "epoch": 0.005263157894736842, "grad_norm": 0.9793151021003723, "learning_rate": 3.012e-05, "loss": 0.5969, "step": 3 }, { "epoch": 0.007017543859649123, "grad_norm": 1.0058985948562622, "learning_rate": 4.016e-05, "loss": 0.7309, "step": 4 }, { "epoch": 0.008771929824561403, "grad_norm": 0.7041476964950562, "learning_rate": 5.02e-05, "loss": 0.523, "step": 5 }, { "epoch": 0.010526315789473684, "grad_norm": 0.606087327003479, "learning_rate": 6.024e-05, "loss": 0.6758, "step": 6 }, { "epoch": 0.012280701754385965, "grad_norm": 0.5665040612220764, "learning_rate": 7.028e-05, "loss": 0.6966, "step": 7 }, { "epoch": 0.014035087719298246, "grad_norm": 0.3772867023944855, "learning_rate": 8.032e-05, "loss": 0.5263, "step": 8 }, { "epoch": 0.015789473684210527, "grad_norm": 0.6435827016830444, "learning_rate": 9.036000000000001e-05, "loss": 0.5862, "step": 9 }, { "epoch": 0.017543859649122806, "grad_norm": 0.35413938760757446, "learning_rate": 0.0001004, "loss": 0.5889, "step": 10 }, { "epoch": 0.01929824561403509, "grad_norm": 0.4691724479198456, "learning_rate": 9.987157894736842e-05, "loss": 0.5193, "step": 11 }, { "epoch": 0.021052631578947368, "grad_norm": 0.3856242001056671, "learning_rate": 9.934315789473684e-05, "loss": 0.6366, "step": 12 }, { "epoch": 0.02280701754385965, "grad_norm": 0.5143347978591919, "learning_rate": 9.881473684210525e-05, "loss": 0.5594, "step": 13 }, { "epoch": 0.02456140350877193, "grad_norm": 0.40349557995796204, "learning_rate": 9.828631578947369e-05, "loss": 0.5083, "step": 14 }, { "epoch": 0.02631578947368421, "grad_norm": 0.43898674845695496, "learning_rate": 9.77578947368421e-05, "loss": 0.5141, "step": 15 }, { "epoch": 0.028070175438596492, "grad_norm": 0.5078945159912109, "learning_rate": 9.722947368421052e-05, "loss": 0.5504, "step": 16 }, { "epoch": 0.02982456140350877, "grad_norm": 0.40516263246536255, "learning_rate": 9.670105263157895e-05, "loss": 0.7386, "step": 17 }, { "epoch": 0.031578947368421054, "grad_norm": 0.4835037291049957, "learning_rate": 9.617263157894737e-05, "loss": 0.7497, "step": 18 }, { "epoch": 0.03333333333333333, "grad_norm": 0.4733222723007202, "learning_rate": 9.564421052631579e-05, "loss": 0.609, "step": 19 }, { "epoch": 0.03508771929824561, "grad_norm": 0.4226079285144806, "learning_rate": 9.511578947368421e-05, "loss": 0.6306, "step": 20 }, { "epoch": 0.03684210526315789, "grad_norm": 0.609290361404419, "learning_rate": 9.458736842105264e-05, "loss": 0.7647, "step": 21 }, { "epoch": 0.03859649122807018, "grad_norm": 0.45616650581359863, "learning_rate": 9.405894736842106e-05, "loss": 0.7182, "step": 22 }, { "epoch": 0.04035087719298246, "grad_norm": 0.36661219596862793, "learning_rate": 9.353052631578947e-05, "loss": 0.6727, "step": 23 }, { "epoch": 0.042105263157894736, "grad_norm": 0.45223456621170044, "learning_rate": 9.300210526315789e-05, "loss": 0.7213, "step": 24 }, { "epoch": 0.043859649122807015, "grad_norm": 0.42050930857658386, "learning_rate": 9.247368421052631e-05, "loss": 0.6136, "step": 25 }, { "epoch": 0.0456140350877193, "grad_norm": 0.5387095212936401, "learning_rate": 9.194526315789473e-05, "loss": 0.7309, "step": 26 }, { "epoch": 0.04736842105263158, "grad_norm": 0.3532935678958893, "learning_rate": 9.141684210526316e-05, "loss": 0.5167, "step": 27 }, { "epoch": 0.04912280701754386, "grad_norm": 0.451276957988739, "learning_rate": 9.088842105263158e-05, "loss": 0.7522, "step": 28 }, { "epoch": 0.05087719298245614, "grad_norm": 0.46688026189804077, "learning_rate": 9.036000000000001e-05, "loss": 0.7287, "step": 29 }, { "epoch": 0.05263157894736842, "grad_norm": 0.48501458764076233, "learning_rate": 8.983157894736843e-05, "loss": 0.7622, "step": 30 }, { "epoch": 0.054385964912280704, "grad_norm": 0.3821016550064087, "learning_rate": 8.930315789473684e-05, "loss": 0.6337, "step": 31 }, { "epoch": 0.056140350877192984, "grad_norm": 0.405115008354187, "learning_rate": 8.877473684210526e-05, "loss": 0.5717, "step": 32 }, { "epoch": 0.05789473684210526, "grad_norm": 0.4410609304904938, "learning_rate": 8.824631578947368e-05, "loss": 0.7316, "step": 33 }, { "epoch": 0.05964912280701754, "grad_norm": 0.4633833169937134, "learning_rate": 8.771789473684211e-05, "loss": 0.5911, "step": 34 }, { "epoch": 0.06140350877192982, "grad_norm": 0.4802892208099365, "learning_rate": 8.718947368421053e-05, "loss": 0.6533, "step": 35 }, { "epoch": 0.06315789473684211, "grad_norm": 0.5321075320243835, "learning_rate": 8.666105263157895e-05, "loss": 0.6759, "step": 36 }, { "epoch": 0.06491228070175438, "grad_norm": 0.5048239827156067, "learning_rate": 8.613263157894737e-05, "loss": 0.6365, "step": 37 }, { "epoch": 0.06666666666666667, "grad_norm": 0.4456152319908142, "learning_rate": 8.560421052631578e-05, "loss": 0.6302, "step": 38 }, { "epoch": 0.06842105263157895, "grad_norm": 0.5115567445755005, "learning_rate": 8.50757894736842e-05, "loss": 0.7867, "step": 39 }, { "epoch": 0.07017543859649122, "grad_norm": 0.5804420113563538, "learning_rate": 8.454736842105263e-05, "loss": 0.6771, "step": 40 }, { "epoch": 0.07192982456140351, "grad_norm": 0.5980835556983948, "learning_rate": 8.401894736842106e-05, "loss": 0.7108, "step": 41 }, { "epoch": 0.07368421052631578, "grad_norm": 0.6573796272277832, "learning_rate": 8.349052631578948e-05, "loss": 0.6749, "step": 42 }, { "epoch": 0.07543859649122807, "grad_norm": 0.5555533170700073, "learning_rate": 8.29621052631579e-05, "loss": 0.5844, "step": 43 }, { "epoch": 0.07719298245614035, "grad_norm": 0.7092603445053101, "learning_rate": 8.243368421052632e-05, "loss": 0.6703, "step": 44 }, { "epoch": 0.07894736842105263, "grad_norm": 0.5733909606933594, "learning_rate": 8.190526315789474e-05, "loss": 0.5058, "step": 45 }, { "epoch": 0.08070175438596491, "grad_norm": 0.7782843708992004, "learning_rate": 8.137684210526315e-05, "loss": 0.6775, "step": 46 }, { "epoch": 0.0824561403508772, "grad_norm": 0.8868169784545898, "learning_rate": 8.084842105263157e-05, "loss": 0.528, "step": 47 }, { "epoch": 0.08421052631578947, "grad_norm": 0.6333807706832886, "learning_rate": 8.032e-05, "loss": 0.5128, "step": 48 }, { "epoch": 0.08596491228070176, "grad_norm": 0.6676380634307861, "learning_rate": 7.979157894736842e-05, "loss": 0.604, "step": 49 }, { "epoch": 0.08771929824561403, "grad_norm": 0.6961976289749146, "learning_rate": 7.926315789473684e-05, "loss": 0.4115, "step": 50 }, { "epoch": 0.08771929824561403, "eval_loss": 0.5966642498970032, "eval_runtime": 18.5398, "eval_samples_per_second": 12.945, "eval_steps_per_second": 3.236, "step": 50 }, { "epoch": 0.08947368421052632, "grad_norm": 0.2961053252220154, "learning_rate": 7.873473684210526e-05, "loss": 0.5178, "step": 51 }, { "epoch": 0.0912280701754386, "grad_norm": 0.34033486247062683, "learning_rate": 7.820631578947369e-05, "loss": 0.516, "step": 52 }, { "epoch": 0.09298245614035087, "grad_norm": 0.25171908736228943, "learning_rate": 7.76778947368421e-05, "loss": 0.4852, "step": 53 }, { "epoch": 0.09473684210526316, "grad_norm": 0.38084715604782104, "learning_rate": 7.714947368421052e-05, "loss": 0.7436, "step": 54 }, { "epoch": 0.09649122807017543, "grad_norm": 0.25944605469703674, "learning_rate": 7.662105263157896e-05, "loss": 0.4129, "step": 55 }, { "epoch": 0.09824561403508772, "grad_norm": 0.26878178119659424, "learning_rate": 7.609263157894737e-05, "loss": 0.5345, "step": 56 }, { "epoch": 0.1, "grad_norm": 0.32609230279922485, "learning_rate": 7.556421052631579e-05, "loss": 0.514, "step": 57 }, { "epoch": 0.10175438596491228, "grad_norm": 0.28511008620262146, "learning_rate": 7.503578947368421e-05, "loss": 0.6131, "step": 58 }, { "epoch": 0.10350877192982456, "grad_norm": 0.30672985315322876, "learning_rate": 7.450736842105263e-05, "loss": 0.5703, "step": 59 }, { "epoch": 0.10526315789473684, "grad_norm": 0.4861604869365692, "learning_rate": 7.397894736842105e-05, "loss": 0.4703, "step": 60 }, { "epoch": 0.10701754385964912, "grad_norm": 0.3522111177444458, "learning_rate": 7.345052631578948e-05, "loss": 0.5839, "step": 61 }, { "epoch": 0.10877192982456141, "grad_norm": 0.30936574935913086, "learning_rate": 7.29221052631579e-05, "loss": 0.6505, "step": 62 }, { "epoch": 0.11052631578947368, "grad_norm": 0.30598902702331543, "learning_rate": 7.239368421052631e-05, "loss": 0.6192, "step": 63 }, { "epoch": 0.11228070175438597, "grad_norm": 0.30793705582618713, "learning_rate": 7.186526315789474e-05, "loss": 0.7445, "step": 64 }, { "epoch": 0.11403508771929824, "grad_norm": 0.35435715317726135, "learning_rate": 7.133684210526316e-05, "loss": 0.6768, "step": 65 }, { "epoch": 0.11578947368421053, "grad_norm": 0.3139382302761078, "learning_rate": 7.080842105263158e-05, "loss": 0.6297, "step": 66 }, { "epoch": 0.11754385964912281, "grad_norm": 0.44019240140914917, "learning_rate": 7.028e-05, "loss": 0.6443, "step": 67 }, { "epoch": 0.11929824561403508, "grad_norm": 0.39933502674102783, "learning_rate": 6.975157894736843e-05, "loss": 0.621, "step": 68 }, { "epoch": 0.12105263157894737, "grad_norm": 0.3180493116378784, "learning_rate": 6.922315789473685e-05, "loss": 0.5866, "step": 69 }, { "epoch": 0.12280701754385964, "grad_norm": 0.41940295696258545, "learning_rate": 6.869473684210527e-05, "loss": 0.8168, "step": 70 }, { "epoch": 0.12456140350877193, "grad_norm": 0.3845388889312744, "learning_rate": 6.816631578947368e-05, "loss": 0.5362, "step": 71 }, { "epoch": 0.12631578947368421, "grad_norm": 0.4456861913204193, "learning_rate": 6.76378947368421e-05, "loss": 0.5595, "step": 72 }, { "epoch": 0.1280701754385965, "grad_norm": 0.37462830543518066, "learning_rate": 6.710947368421052e-05, "loss": 0.5738, "step": 73 }, { "epoch": 0.12982456140350876, "grad_norm": 0.3942214250564575, "learning_rate": 6.658105263157894e-05, "loss": 0.6381, "step": 74 }, { "epoch": 0.13157894736842105, "grad_norm": 0.3449639081954956, "learning_rate": 6.605263157894737e-05, "loss": 0.6181, "step": 75 }, { "epoch": 0.13333333333333333, "grad_norm": 0.37448635697364807, "learning_rate": 6.55242105263158e-05, "loss": 0.5384, "step": 76 }, { "epoch": 0.13508771929824562, "grad_norm": 0.44382399320602417, "learning_rate": 6.499578947368422e-05, "loss": 0.6007, "step": 77 }, { "epoch": 0.1368421052631579, "grad_norm": 0.43044313788414, "learning_rate": 6.446736842105264e-05, "loss": 0.6722, "step": 78 }, { "epoch": 0.13859649122807016, "grad_norm": 0.4483705461025238, "learning_rate": 6.393894736842105e-05, "loss": 0.6315, "step": 79 }, { "epoch": 0.14035087719298245, "grad_norm": 0.4215352237224579, "learning_rate": 6.341052631578947e-05, "loss": 0.6254, "step": 80 }, { "epoch": 0.14210526315789473, "grad_norm": 0.42797961831092834, "learning_rate": 6.288210526315789e-05, "loss": 0.6096, "step": 81 }, { "epoch": 0.14385964912280702, "grad_norm": 0.4556654393672943, "learning_rate": 6.235368421052632e-05, "loss": 0.6797, "step": 82 }, { "epoch": 0.1456140350877193, "grad_norm": 0.41465988755226135, "learning_rate": 6.182526315789474e-05, "loss": 0.6022, "step": 83 }, { "epoch": 0.14736842105263157, "grad_norm": 0.38033217191696167, "learning_rate": 6.129684210526316e-05, "loss": 0.6254, "step": 84 }, { "epoch": 0.14912280701754385, "grad_norm": 0.3971835672855377, "learning_rate": 6.076842105263158e-05, "loss": 0.6715, "step": 85 }, { "epoch": 0.15087719298245614, "grad_norm": 0.49248117208480835, "learning_rate": 6.024e-05, "loss": 0.7178, "step": 86 }, { "epoch": 0.15263157894736842, "grad_norm": 0.4233746826648712, "learning_rate": 5.971157894736842e-05, "loss": 0.6413, "step": 87 }, { "epoch": 0.1543859649122807, "grad_norm": 0.42368537187576294, "learning_rate": 5.9183157894736835e-05, "loss": 0.7474, "step": 88 }, { "epoch": 0.156140350877193, "grad_norm": 0.4115893840789795, "learning_rate": 5.8654736842105267e-05, "loss": 0.6968, "step": 89 }, { "epoch": 0.15789473684210525, "grad_norm": 0.41116833686828613, "learning_rate": 5.8126315789473684e-05, "loss": 0.4591, "step": 90 }, { "epoch": 0.15964912280701754, "grad_norm": 0.5285294055938721, "learning_rate": 5.759789473684211e-05, "loss": 0.7422, "step": 91 }, { "epoch": 0.16140350877192983, "grad_norm": 0.6145169734954834, "learning_rate": 5.706947368421053e-05, "loss": 0.7716, "step": 92 }, { "epoch": 0.1631578947368421, "grad_norm": 0.47066769003868103, "learning_rate": 5.6541052631578945e-05, "loss": 0.6486, "step": 93 }, { "epoch": 0.1649122807017544, "grad_norm": 0.5414472818374634, "learning_rate": 5.601263157894736e-05, "loss": 0.5117, "step": 94 }, { "epoch": 0.16666666666666666, "grad_norm": 0.4897823631763458, "learning_rate": 5.5484210526315794e-05, "loss": 0.5896, "step": 95 }, { "epoch": 0.16842105263157894, "grad_norm": 0.8132259249687195, "learning_rate": 5.495578947368421e-05, "loss": 0.5201, "step": 96 }, { "epoch": 0.17017543859649123, "grad_norm": 0.6366084218025208, "learning_rate": 5.442736842105264e-05, "loss": 0.6222, "step": 97 }, { "epoch": 0.17192982456140352, "grad_norm": 0.5401890873908997, "learning_rate": 5.3898947368421055e-05, "loss": 0.6846, "step": 98 }, { "epoch": 0.1736842105263158, "grad_norm": 0.7210225462913513, "learning_rate": 5.337052631578947e-05, "loss": 0.5031, "step": 99 }, { "epoch": 0.17543859649122806, "grad_norm": 0.8404173254966736, "learning_rate": 5.284210526315789e-05, "loss": 0.5672, "step": 100 }, { "epoch": 0.17543859649122806, "eval_loss": 0.5870506763458252, "eval_runtime": 18.2818, "eval_samples_per_second": 13.128, "eval_steps_per_second": 3.282, "step": 100 }, { "epoch": 0.17719298245614035, "grad_norm": 0.2164190709590912, "learning_rate": 5.231368421052631e-05, "loss": 0.4833, "step": 101 }, { "epoch": 0.17894736842105263, "grad_norm": 0.21264716982841492, "learning_rate": 5.178526315789474e-05, "loss": 0.5487, "step": 102 }, { "epoch": 0.18070175438596492, "grad_norm": 0.2364315688610077, "learning_rate": 5.1256842105263165e-05, "loss": 0.4288, "step": 103 }, { "epoch": 0.1824561403508772, "grad_norm": 0.2573755979537964, "learning_rate": 5.072842105263158e-05, "loss": 0.4423, "step": 104 }, { "epoch": 0.18421052631578946, "grad_norm": 0.2713335156440735, "learning_rate": 5.02e-05, "loss": 0.5011, "step": 105 }, { "epoch": 0.18596491228070175, "grad_norm": 0.3541422486305237, "learning_rate": 4.967157894736842e-05, "loss": 0.6443, "step": 106 }, { "epoch": 0.18771929824561404, "grad_norm": 0.2984892725944519, "learning_rate": 4.914315789473684e-05, "loss": 0.6267, "step": 107 }, { "epoch": 0.18947368421052632, "grad_norm": 0.29001638293266296, "learning_rate": 4.861473684210526e-05, "loss": 0.406, "step": 108 }, { "epoch": 0.1912280701754386, "grad_norm": 0.3922370672225952, "learning_rate": 4.8086315789473686e-05, "loss": 0.5156, "step": 109 }, { "epoch": 0.19298245614035087, "grad_norm": 0.24790653586387634, "learning_rate": 4.7557894736842104e-05, "loss": 0.5068, "step": 110 }, { "epoch": 0.19473684210526315, "grad_norm": 0.28521355986595154, "learning_rate": 4.702947368421053e-05, "loss": 0.6439, "step": 111 }, { "epoch": 0.19649122807017544, "grad_norm": 0.3181811571121216, "learning_rate": 4.6501052631578946e-05, "loss": 0.7475, "step": 112 }, { "epoch": 0.19824561403508772, "grad_norm": 0.3269704580307007, "learning_rate": 4.5972631578947364e-05, "loss": 0.5917, "step": 113 }, { "epoch": 0.2, "grad_norm": 0.3818875551223755, "learning_rate": 4.544421052631579e-05, "loss": 0.535, "step": 114 }, { "epoch": 0.20175438596491227, "grad_norm": 0.32152608036994934, "learning_rate": 4.4915789473684213e-05, "loss": 0.5504, "step": 115 }, { "epoch": 0.20350877192982456, "grad_norm": 0.2853567898273468, "learning_rate": 4.438736842105263e-05, "loss": 0.5734, "step": 116 }, { "epoch": 0.20526315789473684, "grad_norm": 0.3862977623939514, "learning_rate": 4.3858947368421056e-05, "loss": 0.6241, "step": 117 }, { "epoch": 0.20701754385964913, "grad_norm": 0.2862344980239868, "learning_rate": 4.3330526315789474e-05, "loss": 0.6307, "step": 118 }, { "epoch": 0.20877192982456141, "grad_norm": 0.3162228763103485, "learning_rate": 4.280210526315789e-05, "loss": 0.4818, "step": 119 }, { "epoch": 0.21052631578947367, "grad_norm": 0.3464224934577942, "learning_rate": 4.2273684210526317e-05, "loss": 0.389, "step": 120 }, { "epoch": 0.21228070175438596, "grad_norm": 0.2816769480705261, "learning_rate": 4.174526315789474e-05, "loss": 0.4168, "step": 121 }, { "epoch": 0.21403508771929824, "grad_norm": 0.30498382449150085, "learning_rate": 4.121684210526316e-05, "loss": 0.5781, "step": 122 }, { "epoch": 0.21578947368421053, "grad_norm": 0.34201303124427795, "learning_rate": 4.068842105263158e-05, "loss": 0.7485, "step": 123 }, { "epoch": 0.21754385964912282, "grad_norm": 0.3817812502384186, "learning_rate": 4.016e-05, "loss": 0.7546, "step": 124 }, { "epoch": 0.21929824561403508, "grad_norm": 0.3733166754245758, "learning_rate": 3.963157894736842e-05, "loss": 0.6956, "step": 125 }, { "epoch": 0.22105263157894736, "grad_norm": 0.34608200192451477, "learning_rate": 3.9103157894736844e-05, "loss": 0.5985, "step": 126 }, { "epoch": 0.22280701754385965, "grad_norm": 0.3796122968196869, "learning_rate": 3.857473684210526e-05, "loss": 0.5721, "step": 127 }, { "epoch": 0.22456140350877193, "grad_norm": 0.4093203544616699, "learning_rate": 3.804631578947369e-05, "loss": 0.6252, "step": 128 }, { "epoch": 0.22631578947368422, "grad_norm": 0.3871783912181854, "learning_rate": 3.7517894736842105e-05, "loss": 0.5049, "step": 129 }, { "epoch": 0.22807017543859648, "grad_norm": 0.47319653630256653, "learning_rate": 3.698947368421052e-05, "loss": 0.6355, "step": 130 }, { "epoch": 0.22982456140350876, "grad_norm": 0.36782076954841614, "learning_rate": 3.646105263157895e-05, "loss": 0.5886, "step": 131 }, { "epoch": 0.23157894736842105, "grad_norm": 0.40341702103614807, "learning_rate": 3.593263157894737e-05, "loss": 0.7229, "step": 132 }, { "epoch": 0.23333333333333334, "grad_norm": 0.42723962664604187, "learning_rate": 3.540421052631579e-05, "loss": 0.7543, "step": 133 }, { "epoch": 0.23508771929824562, "grad_norm": 0.453500360250473, "learning_rate": 3.4875789473684215e-05, "loss": 0.5612, "step": 134 }, { "epoch": 0.23684210526315788, "grad_norm": 0.34924155473709106, "learning_rate": 3.434736842105263e-05, "loss": 0.5513, "step": 135 }, { "epoch": 0.23859649122807017, "grad_norm": 0.5097976922988892, "learning_rate": 3.381894736842105e-05, "loss": 0.5897, "step": 136 }, { "epoch": 0.24035087719298245, "grad_norm": 0.4261370301246643, "learning_rate": 3.329052631578947e-05, "loss": 0.6187, "step": 137 }, { "epoch": 0.24210526315789474, "grad_norm": 0.4436197876930237, "learning_rate": 3.27621052631579e-05, "loss": 0.6153, "step": 138 }, { "epoch": 0.24385964912280703, "grad_norm": 0.4517599642276764, "learning_rate": 3.223368421052632e-05, "loss": 0.7161, "step": 139 }, { "epoch": 0.24561403508771928, "grad_norm": 0.48819050192832947, "learning_rate": 3.1705263157894736e-05, "loss": 0.6747, "step": 140 }, { "epoch": 0.24736842105263157, "grad_norm": 0.5019546747207642, "learning_rate": 3.117684210526316e-05, "loss": 0.6966, "step": 141 }, { "epoch": 0.24912280701754386, "grad_norm": 0.43949094414711, "learning_rate": 3.064842105263158e-05, "loss": 0.5501, "step": 142 }, { "epoch": 0.25087719298245614, "grad_norm": 0.5558748841285706, "learning_rate": 3.012e-05, "loss": 0.7356, "step": 143 }, { "epoch": 0.25263157894736843, "grad_norm": 0.5377136468887329, "learning_rate": 2.9591578947368418e-05, "loss": 0.6438, "step": 144 }, { "epoch": 0.2543859649122807, "grad_norm": 0.6635644435882568, "learning_rate": 2.9063157894736842e-05, "loss": 0.5251, "step": 145 }, { "epoch": 0.256140350877193, "grad_norm": 0.5351884961128235, "learning_rate": 2.8534736842105264e-05, "loss": 0.5957, "step": 146 }, { "epoch": 0.2578947368421053, "grad_norm": 0.5116856694221497, "learning_rate": 2.800631578947368e-05, "loss": 0.49, "step": 147 }, { "epoch": 0.2596491228070175, "grad_norm": 0.5415542125701904, "learning_rate": 2.7477894736842106e-05, "loss": 0.5215, "step": 148 }, { "epoch": 0.2614035087719298, "grad_norm": 0.8459779024124146, "learning_rate": 2.6949473684210527e-05, "loss": 0.433, "step": 149 }, { "epoch": 0.2631578947368421, "grad_norm": 1.0002394914627075, "learning_rate": 2.6421052631578945e-05, "loss": 0.4335, "step": 150 }, { "epoch": 0.2631578947368421, "eval_loss": 0.5780511498451233, "eval_runtime": 18.5068, "eval_samples_per_second": 12.968, "eval_steps_per_second": 3.242, "step": 150 }, { "epoch": 0.2649122807017544, "grad_norm": 0.17568722367286682, "learning_rate": 2.589263157894737e-05, "loss": 0.546, "step": 151 }, { "epoch": 0.26666666666666666, "grad_norm": 0.3429137170314789, "learning_rate": 2.536421052631579e-05, "loss": 0.3165, "step": 152 }, { "epoch": 0.26842105263157895, "grad_norm": 0.21474985778331757, "learning_rate": 2.483578947368421e-05, "loss": 0.415, "step": 153 }, { "epoch": 0.27017543859649124, "grad_norm": 0.29766225814819336, "learning_rate": 2.430736842105263e-05, "loss": 0.5736, "step": 154 }, { "epoch": 0.2719298245614035, "grad_norm": 0.268090158700943, "learning_rate": 2.3778947368421052e-05, "loss": 0.5472, "step": 155 }, { "epoch": 0.2736842105263158, "grad_norm": 0.2689472436904907, "learning_rate": 2.3250526315789473e-05, "loss": 0.5009, "step": 156 }, { "epoch": 0.2754385964912281, "grad_norm": 0.23201614618301392, "learning_rate": 2.2722105263157894e-05, "loss": 0.4179, "step": 157 }, { "epoch": 0.2771929824561403, "grad_norm": 0.3329578936100006, "learning_rate": 2.2193684210526316e-05, "loss": 0.3145, "step": 158 }, { "epoch": 0.2789473684210526, "grad_norm": 0.30881157517433167, "learning_rate": 2.1665263157894737e-05, "loss": 0.6029, "step": 159 }, { "epoch": 0.2807017543859649, "grad_norm": 0.3386708199977875, "learning_rate": 2.1136842105263158e-05, "loss": 0.7325, "step": 160 }, { "epoch": 0.2824561403508772, "grad_norm": 0.2562579810619354, "learning_rate": 2.060842105263158e-05, "loss": 0.402, "step": 161 }, { "epoch": 0.28421052631578947, "grad_norm": 0.27218446135520935, "learning_rate": 2.008e-05, "loss": 0.4634, "step": 162 }, { "epoch": 0.28596491228070176, "grad_norm": 0.340582013130188, "learning_rate": 1.9551578947368422e-05, "loss": 0.4829, "step": 163 }, { "epoch": 0.28771929824561404, "grad_norm": 0.4173396825790405, "learning_rate": 1.9023157894736843e-05, "loss": 0.6958, "step": 164 }, { "epoch": 0.2894736842105263, "grad_norm": 0.293707013130188, "learning_rate": 1.849473684210526e-05, "loss": 0.5717, "step": 165 }, { "epoch": 0.2912280701754386, "grad_norm": 0.3465547561645508, "learning_rate": 1.7966315789473686e-05, "loss": 0.6538, "step": 166 }, { "epoch": 0.2929824561403509, "grad_norm": 0.4900147020816803, "learning_rate": 1.7437894736842107e-05, "loss": 0.4976, "step": 167 }, { "epoch": 0.29473684210526313, "grad_norm": 0.41154950857162476, "learning_rate": 1.6909473684210525e-05, "loss": 0.7079, "step": 168 }, { "epoch": 0.2964912280701754, "grad_norm": 0.37281063199043274, "learning_rate": 1.638105263157895e-05, "loss": 0.5842, "step": 169 }, { "epoch": 0.2982456140350877, "grad_norm": 0.2995673716068268, "learning_rate": 1.5852631578947368e-05, "loss": 0.4967, "step": 170 }, { "epoch": 0.3, "grad_norm": 0.35212984681129456, "learning_rate": 1.532421052631579e-05, "loss": 0.47, "step": 171 }, { "epoch": 0.3017543859649123, "grad_norm": 0.3804338872432709, "learning_rate": 1.4795789473684209e-05, "loss": 0.6148, "step": 172 }, { "epoch": 0.30350877192982456, "grad_norm": 0.35750070214271545, "learning_rate": 1.4267368421052632e-05, "loss": 0.6549, "step": 173 }, { "epoch": 0.30526315789473685, "grad_norm": 0.3266410529613495, "learning_rate": 1.3738947368421053e-05, "loss": 0.6267, "step": 174 }, { "epoch": 0.30701754385964913, "grad_norm": 0.40702515840530396, "learning_rate": 1.3210526315789473e-05, "loss": 0.6736, "step": 175 }, { "epoch": 0.3087719298245614, "grad_norm": 0.35786283016204834, "learning_rate": 1.2682105263157896e-05, "loss": 0.6763, "step": 176 }, { "epoch": 0.3105263157894737, "grad_norm": 0.3948259949684143, "learning_rate": 1.2153684210526315e-05, "loss": 0.6734, "step": 177 }, { "epoch": 0.312280701754386, "grad_norm": 0.37104299664497375, "learning_rate": 1.1625263157894737e-05, "loss": 0.7661, "step": 178 }, { "epoch": 0.3140350877192982, "grad_norm": 0.3601257801055908, "learning_rate": 1.1096842105263158e-05, "loss": 0.6679, "step": 179 }, { "epoch": 0.3157894736842105, "grad_norm": 0.45755448937416077, "learning_rate": 1.0568421052631579e-05, "loss": 0.8599, "step": 180 }, { "epoch": 0.3175438596491228, "grad_norm": 0.40678536891937256, "learning_rate": 1.004e-05, "loss": 0.7713, "step": 181 }, { "epoch": 0.3192982456140351, "grad_norm": 0.3657882809638977, "learning_rate": 9.511578947368422e-06, "loss": 0.6317, "step": 182 }, { "epoch": 0.32105263157894737, "grad_norm": 0.752341091632843, "learning_rate": 8.983157894736843e-06, "loss": 0.7552, "step": 183 }, { "epoch": 0.32280701754385965, "grad_norm": 0.4538853168487549, "learning_rate": 8.454736842105263e-06, "loss": 0.7126, "step": 184 }, { "epoch": 0.32456140350877194, "grad_norm": 0.5402305722236633, "learning_rate": 7.926315789473684e-06, "loss": 0.7343, "step": 185 }, { "epoch": 0.3263157894736842, "grad_norm": 0.43681031465530396, "learning_rate": 7.397894736842104e-06, "loss": 0.6744, "step": 186 }, { "epoch": 0.3280701754385965, "grad_norm": 0.3734006881713867, "learning_rate": 6.8694736842105265e-06, "loss": 0.4868, "step": 187 }, { "epoch": 0.3298245614035088, "grad_norm": 0.4675087034702301, "learning_rate": 6.341052631578948e-06, "loss": 0.69, "step": 188 }, { "epoch": 0.33157894736842103, "grad_norm": 0.41553711891174316, "learning_rate": 5.812631578947368e-06, "loss": 0.6899, "step": 189 }, { "epoch": 0.3333333333333333, "grad_norm": 0.42875123023986816, "learning_rate": 5.2842105263157896e-06, "loss": 0.5848, "step": 190 }, { "epoch": 0.3350877192982456, "grad_norm": 0.49000978469848633, "learning_rate": 4.755789473684211e-06, "loss": 0.6394, "step": 191 }, { "epoch": 0.3368421052631579, "grad_norm": 0.5385306477546692, "learning_rate": 4.227368421052631e-06, "loss": 0.6832, "step": 192 }, { "epoch": 0.3385964912280702, "grad_norm": 0.5240684151649475, "learning_rate": 3.698947368421052e-06, "loss": 0.6332, "step": 193 }, { "epoch": 0.34035087719298246, "grad_norm": 0.5050160884857178, "learning_rate": 3.170526315789474e-06, "loss": 0.6008, "step": 194 }, { "epoch": 0.34210526315789475, "grad_norm": 0.41370368003845215, "learning_rate": 2.6421052631578948e-06, "loss": 0.4953, "step": 195 }, { "epoch": 0.34385964912280703, "grad_norm": 0.5040670037269592, "learning_rate": 2.1136842105263157e-06, "loss": 0.7212, "step": 196 }, { "epoch": 0.3456140350877193, "grad_norm": 0.48486071825027466, "learning_rate": 1.585263157894737e-06, "loss": 0.4893, "step": 197 }, { "epoch": 0.3473684210526316, "grad_norm": 0.5510401725769043, "learning_rate": 1.0568421052631578e-06, "loss": 0.6578, "step": 198 }, { "epoch": 0.34912280701754383, "grad_norm": 0.7318386435508728, "learning_rate": 5.284210526315789e-07, "loss": 0.4288, "step": 199 }, { "epoch": 0.3508771929824561, "grad_norm": 0.6111555099487305, "learning_rate": 0.0, "loss": 0.4614, "step": 200 }, { "epoch": 0.3508771929824561, "eval_loss": 0.5751292109489441, "eval_runtime": 18.4962, "eval_samples_per_second": 12.976, "eval_steps_per_second": 3.244, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.871152053157888e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }