{ "best_metric": 1.3729252815246582, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.2238388360380526, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001119194180190263, "grad_norm": 0.8333172798156738, "learning_rate": 1.0060000000000002e-05, "loss": 1.9023, "step": 1 }, { "epoch": 0.001119194180190263, "eval_loss": 1.6400463581085205, "eval_runtime": 48.1693, "eval_samples_per_second": 7.827, "eval_steps_per_second": 1.972, "step": 1 }, { "epoch": 0.002238388360380526, "grad_norm": 0.8827718496322632, "learning_rate": 2.0120000000000004e-05, "loss": 1.9114, "step": 2 }, { "epoch": 0.003357582540570789, "grad_norm": 0.991491436958313, "learning_rate": 3.018e-05, "loss": 2.1815, "step": 3 }, { "epoch": 0.004476776720761052, "grad_norm": 0.9564715623855591, "learning_rate": 4.024000000000001e-05, "loss": 2.5454, "step": 4 }, { "epoch": 0.005595970900951315, "grad_norm": 0.9266034960746765, "learning_rate": 5.03e-05, "loss": 2.5874, "step": 5 }, { "epoch": 0.006715165081141578, "grad_norm": 0.7990221977233887, "learning_rate": 6.036e-05, "loss": 1.5337, "step": 6 }, { "epoch": 0.00783435926133184, "grad_norm": 0.9239597916603088, "learning_rate": 7.042e-05, "loss": 1.7556, "step": 7 }, { "epoch": 0.008953553441522105, "grad_norm": 0.9033121466636658, "learning_rate": 8.048000000000002e-05, "loss": 1.777, "step": 8 }, { "epoch": 0.010072747621712367, "grad_norm": 0.8066126108169556, "learning_rate": 9.054000000000001e-05, "loss": 1.5976, "step": 9 }, { "epoch": 0.01119194180190263, "grad_norm": 0.7422162294387817, "learning_rate": 0.0001006, "loss": 1.4408, "step": 10 }, { "epoch": 0.012311135982092894, "grad_norm": 0.9349278211593628, "learning_rate": 0.00010007052631578948, "loss": 1.6931, "step": 11 }, { "epoch": 0.013430330162283156, "grad_norm": 0.6395450830459595, "learning_rate": 9.954105263157895e-05, "loss": 1.1895, "step": 12 }, { "epoch": 0.014549524342473419, "grad_norm": 0.6857991814613342, "learning_rate": 9.901157894736842e-05, "loss": 1.5118, "step": 13 }, { "epoch": 0.01566871852266368, "grad_norm": 0.7936204075813293, "learning_rate": 9.84821052631579e-05, "loss": 1.7452, "step": 14 }, { "epoch": 0.016787912702853944, "grad_norm": 0.7593660950660706, "learning_rate": 9.795263157894737e-05, "loss": 1.2994, "step": 15 }, { "epoch": 0.01790710688304421, "grad_norm": 0.8076448440551758, "learning_rate": 9.742315789473686e-05, "loss": 1.683, "step": 16 }, { "epoch": 0.019026301063234472, "grad_norm": 0.6509789228439331, "learning_rate": 9.689368421052633e-05, "loss": 0.9289, "step": 17 }, { "epoch": 0.020145495243424735, "grad_norm": 0.7229270935058594, "learning_rate": 9.63642105263158e-05, "loss": 1.0062, "step": 18 }, { "epoch": 0.021264689423614997, "grad_norm": 0.6909345388412476, "learning_rate": 9.583473684210527e-05, "loss": 1.4466, "step": 19 }, { "epoch": 0.02238388360380526, "grad_norm": 0.5661738514900208, "learning_rate": 9.530526315789474e-05, "loss": 0.9758, "step": 20 }, { "epoch": 0.023503077783995522, "grad_norm": 0.6417646408081055, "learning_rate": 9.477578947368422e-05, "loss": 0.8591, "step": 21 }, { "epoch": 0.024622271964185788, "grad_norm": 0.7619107365608215, "learning_rate": 9.424631578947369e-05, "loss": 1.3434, "step": 22 }, { "epoch": 0.02574146614437605, "grad_norm": 0.6540561318397522, "learning_rate": 9.371684210526316e-05, "loss": 1.025, "step": 23 }, { "epoch": 0.026860660324566313, "grad_norm": 0.7674716711044312, "learning_rate": 9.318736842105263e-05, "loss": 1.3302, "step": 24 }, { "epoch": 0.027979854504756575, "grad_norm": 0.5855326056480408, "learning_rate": 9.26578947368421e-05, "loss": 0.8368, "step": 25 }, { "epoch": 0.029099048684946838, "grad_norm": 0.8187544345855713, "learning_rate": 9.212842105263159e-05, "loss": 1.3082, "step": 26 }, { "epoch": 0.0302182428651371, "grad_norm": 0.67572021484375, "learning_rate": 9.159894736842107e-05, "loss": 1.0997, "step": 27 }, { "epoch": 0.03133743704532736, "grad_norm": 0.7818107604980469, "learning_rate": 9.106947368421054e-05, "loss": 1.3211, "step": 28 }, { "epoch": 0.032456631225517625, "grad_norm": 0.6680915355682373, "learning_rate": 9.054000000000001e-05, "loss": 1.0876, "step": 29 }, { "epoch": 0.03357582540570789, "grad_norm": 0.7992515563964844, "learning_rate": 9.001052631578948e-05, "loss": 1.073, "step": 30 }, { "epoch": 0.03469501958589815, "grad_norm": 0.7344330549240112, "learning_rate": 8.948105263157895e-05, "loss": 1.1912, "step": 31 }, { "epoch": 0.03581421376608842, "grad_norm": 0.7862792611122131, "learning_rate": 8.895157894736842e-05, "loss": 1.3947, "step": 32 }, { "epoch": 0.03693340794627868, "grad_norm": 0.7691140174865723, "learning_rate": 8.842210526315789e-05, "loss": 1.3881, "step": 33 }, { "epoch": 0.038052602126468944, "grad_norm": 0.6874685287475586, "learning_rate": 8.789263157894738e-05, "loss": 1.0838, "step": 34 }, { "epoch": 0.03917179630665921, "grad_norm": 0.7623674869537354, "learning_rate": 8.736315789473685e-05, "loss": 1.3063, "step": 35 }, { "epoch": 0.04029099048684947, "grad_norm": 0.8765361309051514, "learning_rate": 8.683368421052632e-05, "loss": 1.0613, "step": 36 }, { "epoch": 0.04141018466703973, "grad_norm": 0.8110713958740234, "learning_rate": 8.63042105263158e-05, "loss": 1.3362, "step": 37 }, { "epoch": 0.042529378847229994, "grad_norm": 0.7469275593757629, "learning_rate": 8.577473684210527e-05, "loss": 1.2636, "step": 38 }, { "epoch": 0.04364857302742026, "grad_norm": 0.692807674407959, "learning_rate": 8.524526315789474e-05, "loss": 1.0179, "step": 39 }, { "epoch": 0.04476776720761052, "grad_norm": 0.750609278678894, "learning_rate": 8.471578947368421e-05, "loss": 1.2124, "step": 40 }, { "epoch": 0.04588696138780078, "grad_norm": 0.8829929828643799, "learning_rate": 8.41863157894737e-05, "loss": 1.1853, "step": 41 }, { "epoch": 0.047006155567991044, "grad_norm": 0.8921776413917542, "learning_rate": 8.365684210526317e-05, "loss": 1.4338, "step": 42 }, { "epoch": 0.048125349748181306, "grad_norm": 0.8418493270874023, "learning_rate": 8.312736842105264e-05, "loss": 1.2823, "step": 43 }, { "epoch": 0.049244543928371576, "grad_norm": 0.6852906942367554, "learning_rate": 8.259789473684211e-05, "loss": 1.1343, "step": 44 }, { "epoch": 0.05036373810856184, "grad_norm": 0.8703294396400452, "learning_rate": 8.206842105263158e-05, "loss": 1.4008, "step": 45 }, { "epoch": 0.0514829322887521, "grad_norm": 1.12958824634552, "learning_rate": 8.153894736842105e-05, "loss": 1.8617, "step": 46 }, { "epoch": 0.05260212646894236, "grad_norm": 1.4514553546905518, "learning_rate": 8.100947368421053e-05, "loss": 1.5494, "step": 47 }, { "epoch": 0.053721320649132626, "grad_norm": 1.2124923467636108, "learning_rate": 8.048000000000002e-05, "loss": 1.7901, "step": 48 }, { "epoch": 0.05484051482932289, "grad_norm": 1.474413275718689, "learning_rate": 7.995052631578949e-05, "loss": 1.7945, "step": 49 }, { "epoch": 0.05595970900951315, "grad_norm": 2.1778807640075684, "learning_rate": 7.942105263157896e-05, "loss": 1.9731, "step": 50 }, { "epoch": 0.05595970900951315, "eval_loss": 1.4635015726089478, "eval_runtime": 47.7507, "eval_samples_per_second": 7.895, "eval_steps_per_second": 1.989, "step": 50 }, { "epoch": 0.05707890318970341, "grad_norm": 0.8516172170639038, "learning_rate": 7.889157894736843e-05, "loss": 1.9615, "step": 51 }, { "epoch": 0.058198097369893675, "grad_norm": 0.7036626935005188, "learning_rate": 7.83621052631579e-05, "loss": 2.1477, "step": 52 }, { "epoch": 0.05931729155008394, "grad_norm": 0.7194252014160156, "learning_rate": 7.783263157894737e-05, "loss": 2.3045, "step": 53 }, { "epoch": 0.0604364857302742, "grad_norm": 0.7690565586090088, "learning_rate": 7.730315789473684e-05, "loss": 2.2782, "step": 54 }, { "epoch": 0.06155567991046446, "grad_norm": 0.6265071630477905, "learning_rate": 7.677368421052632e-05, "loss": 1.7766, "step": 55 }, { "epoch": 0.06267487409065473, "grad_norm": 0.6072700619697571, "learning_rate": 7.624421052631579e-05, "loss": 1.8207, "step": 56 }, { "epoch": 0.063794068270845, "grad_norm": 0.5296346545219421, "learning_rate": 7.571473684210526e-05, "loss": 1.2979, "step": 57 }, { "epoch": 0.06491326245103525, "grad_norm": 0.6143375635147095, "learning_rate": 7.518526315789475e-05, "loss": 1.7014, "step": 58 }, { "epoch": 0.06603245663122552, "grad_norm": 0.5131465196609497, "learning_rate": 7.465578947368422e-05, "loss": 1.1046, "step": 59 }, { "epoch": 0.06715165081141578, "grad_norm": 0.5977643728256226, "learning_rate": 7.412631578947369e-05, "loss": 1.6154, "step": 60 }, { "epoch": 0.06827084499160604, "grad_norm": 0.5476318001747131, "learning_rate": 7.359684210526317e-05, "loss": 1.3177, "step": 61 }, { "epoch": 0.0693900391717963, "grad_norm": 0.6704787611961365, "learning_rate": 7.306736842105264e-05, "loss": 1.7062, "step": 62 }, { "epoch": 0.07050923335198657, "grad_norm": 0.6229788064956665, "learning_rate": 7.253789473684211e-05, "loss": 1.4367, "step": 63 }, { "epoch": 0.07162842753217684, "grad_norm": 0.6314613819122314, "learning_rate": 7.200842105263158e-05, "loss": 1.5649, "step": 64 }, { "epoch": 0.0727476217123671, "grad_norm": 0.6403206586837769, "learning_rate": 7.147894736842105e-05, "loss": 1.8579, "step": 65 }, { "epoch": 0.07386681589255736, "grad_norm": 0.5655908584594727, "learning_rate": 7.094947368421052e-05, "loss": 1.119, "step": 66 }, { "epoch": 0.07498601007274762, "grad_norm": 0.502298891544342, "learning_rate": 7.042e-05, "loss": 0.8837, "step": 67 }, { "epoch": 0.07610520425293789, "grad_norm": 0.5558024048805237, "learning_rate": 6.989052631578948e-05, "loss": 1.1857, "step": 68 }, { "epoch": 0.07722439843312814, "grad_norm": 0.5176172852516174, "learning_rate": 6.936105263157896e-05, "loss": 0.9946, "step": 69 }, { "epoch": 0.07834359261331841, "grad_norm": 0.576248824596405, "learning_rate": 6.883157894736843e-05, "loss": 0.9529, "step": 70 }, { "epoch": 0.07946278679350867, "grad_norm": 0.5664923787117004, "learning_rate": 6.83021052631579e-05, "loss": 1.1525, "step": 71 }, { "epoch": 0.08058198097369894, "grad_norm": 0.6141046285629272, "learning_rate": 6.777263157894737e-05, "loss": 1.2382, "step": 72 }, { "epoch": 0.0817011751538892, "grad_norm": 0.5944051146507263, "learning_rate": 6.724315789473684e-05, "loss": 1.1227, "step": 73 }, { "epoch": 0.08282036933407946, "grad_norm": 0.6328909397125244, "learning_rate": 6.671368421052631e-05, "loss": 1.1311, "step": 74 }, { "epoch": 0.08393956351426973, "grad_norm": 0.5099555850028992, "learning_rate": 6.61842105263158e-05, "loss": 1.0304, "step": 75 }, { "epoch": 0.08505875769445999, "grad_norm": 0.6328510642051697, "learning_rate": 6.565473684210527e-05, "loss": 0.997, "step": 76 }, { "epoch": 0.08617795187465026, "grad_norm": 0.5725546479225159, "learning_rate": 6.512526315789474e-05, "loss": 0.9623, "step": 77 }, { "epoch": 0.08729714605484051, "grad_norm": 0.6395103931427002, "learning_rate": 6.459578947368421e-05, "loss": 1.5526, "step": 78 }, { "epoch": 0.08841634023503078, "grad_norm": 0.6277320981025696, "learning_rate": 6.406631578947369e-05, "loss": 1.1281, "step": 79 }, { "epoch": 0.08953553441522104, "grad_norm": 0.589504063129425, "learning_rate": 6.353684210526316e-05, "loss": 1.0345, "step": 80 }, { "epoch": 0.09065472859541131, "grad_norm": 0.6192626357078552, "learning_rate": 6.300736842105263e-05, "loss": 0.9688, "step": 81 }, { "epoch": 0.09177392277560156, "grad_norm": 0.6085880994796753, "learning_rate": 6.247789473684212e-05, "loss": 0.8814, "step": 82 }, { "epoch": 0.09289311695579183, "grad_norm": 0.6605837345123291, "learning_rate": 6.194842105263159e-05, "loss": 1.4382, "step": 83 }, { "epoch": 0.09401231113598209, "grad_norm": 0.6139897108078003, "learning_rate": 6.141894736842106e-05, "loss": 1.091, "step": 84 }, { "epoch": 0.09513150531617236, "grad_norm": 0.6851410269737244, "learning_rate": 6.088947368421053e-05, "loss": 1.1183, "step": 85 }, { "epoch": 0.09625069949636261, "grad_norm": 0.6170366406440735, "learning_rate": 6.036e-05, "loss": 1.105, "step": 86 }, { "epoch": 0.09736989367655288, "grad_norm": 0.84287029504776, "learning_rate": 5.9830526315789475e-05, "loss": 1.0071, "step": 87 }, { "epoch": 0.09848908785674315, "grad_norm": 0.6654536128044128, "learning_rate": 5.9301052631578946e-05, "loss": 1.1307, "step": 88 }, { "epoch": 0.09960828203693341, "grad_norm": 0.6223897933959961, "learning_rate": 5.877157894736843e-05, "loss": 0.885, "step": 89 }, { "epoch": 0.10072747621712368, "grad_norm": 0.7293018102645874, "learning_rate": 5.82421052631579e-05, "loss": 1.1569, "step": 90 }, { "epoch": 0.10184667039731393, "grad_norm": 0.654303789138794, "learning_rate": 5.771263157894737e-05, "loss": 0.9901, "step": 91 }, { "epoch": 0.1029658645775042, "grad_norm": 0.7520757913589478, "learning_rate": 5.718315789473685e-05, "loss": 1.4022, "step": 92 }, { "epoch": 0.10408505875769446, "grad_norm": 0.7011393904685974, "learning_rate": 5.665368421052632e-05, "loss": 1.18, "step": 93 }, { "epoch": 0.10520425293788473, "grad_norm": 0.7474265098571777, "learning_rate": 5.612421052631579e-05, "loss": 1.3109, "step": 94 }, { "epoch": 0.10632344711807498, "grad_norm": 0.8840892910957336, "learning_rate": 5.559473684210527e-05, "loss": 1.4722, "step": 95 }, { "epoch": 0.10744264129826525, "grad_norm": 0.831760048866272, "learning_rate": 5.506526315789474e-05, "loss": 1.3972, "step": 96 }, { "epoch": 0.1085618354784555, "grad_norm": 1.0217140913009644, "learning_rate": 5.453578947368421e-05, "loss": 1.6376, "step": 97 }, { "epoch": 0.10968102965864578, "grad_norm": 2.1897904872894287, "learning_rate": 5.400631578947369e-05, "loss": 1.3989, "step": 98 }, { "epoch": 0.11080022383883603, "grad_norm": 1.0960580110549927, "learning_rate": 5.347684210526316e-05, "loss": 1.8274, "step": 99 }, { "epoch": 0.1119194180190263, "grad_norm": 1.469871163368225, "learning_rate": 5.294736842105263e-05, "loss": 2.0208, "step": 100 }, { "epoch": 0.1119194180190263, "eval_loss": 1.4105467796325684, "eval_runtime": 48.5917, "eval_samples_per_second": 7.759, "eval_steps_per_second": 1.955, "step": 100 }, { "epoch": 0.11303861219921657, "grad_norm": 0.5879180431365967, "learning_rate": 5.24178947368421e-05, "loss": 1.5242, "step": 101 }, { "epoch": 0.11415780637940683, "grad_norm": 0.5801900625228882, "learning_rate": 5.1888421052631585e-05, "loss": 1.4539, "step": 102 }, { "epoch": 0.1152770005595971, "grad_norm": 0.6456881165504456, "learning_rate": 5.135894736842106e-05, "loss": 1.8406, "step": 103 }, { "epoch": 0.11639619473978735, "grad_norm": 0.6166803240776062, "learning_rate": 5.082947368421053e-05, "loss": 1.8484, "step": 104 }, { "epoch": 0.11751538891997762, "grad_norm": 0.6689649820327759, "learning_rate": 5.03e-05, "loss": 2.47, "step": 105 }, { "epoch": 0.11863458310016788, "grad_norm": 0.6608978509902954, "learning_rate": 4.977052631578947e-05, "loss": 1.9793, "step": 106 }, { "epoch": 0.11975377728035815, "grad_norm": 0.6113259792327881, "learning_rate": 4.924105263157895e-05, "loss": 2.0245, "step": 107 }, { "epoch": 0.1208729714605484, "grad_norm": 0.45949867367744446, "learning_rate": 4.871157894736843e-05, "loss": 1.0876, "step": 108 }, { "epoch": 0.12199216564073867, "grad_norm": 0.5350001454353333, "learning_rate": 4.81821052631579e-05, "loss": 1.6227, "step": 109 }, { "epoch": 0.12311135982092893, "grad_norm": 0.5312851667404175, "learning_rate": 4.765263157894737e-05, "loss": 1.4318, "step": 110 }, { "epoch": 0.1242305540011192, "grad_norm": 0.4572623670101166, "learning_rate": 4.7123157894736845e-05, "loss": 1.0106, "step": 111 }, { "epoch": 0.12534974818130945, "grad_norm": 0.5980809330940247, "learning_rate": 4.6593684210526316e-05, "loss": 1.7434, "step": 112 }, { "epoch": 0.12646894236149972, "grad_norm": 0.548912763595581, "learning_rate": 4.606421052631579e-05, "loss": 1.2903, "step": 113 }, { "epoch": 0.12758813654169, "grad_norm": 0.5556053519248962, "learning_rate": 4.553473684210527e-05, "loss": 1.4555, "step": 114 }, { "epoch": 0.12870733072188026, "grad_norm": 0.47583720088005066, "learning_rate": 4.500526315789474e-05, "loss": 0.9821, "step": 115 }, { "epoch": 0.1298265249020705, "grad_norm": 0.5786464810371399, "learning_rate": 4.447578947368421e-05, "loss": 1.3264, "step": 116 }, { "epoch": 0.13094571908226077, "grad_norm": 0.7116870284080505, "learning_rate": 4.394631578947369e-05, "loss": 1.7898, "step": 117 }, { "epoch": 0.13206491326245104, "grad_norm": 0.5318160057067871, "learning_rate": 4.341684210526316e-05, "loss": 1.2532, "step": 118 }, { "epoch": 0.1331841074426413, "grad_norm": 0.6466919183731079, "learning_rate": 4.2887368421052636e-05, "loss": 1.8143, "step": 119 }, { "epoch": 0.13430330162283155, "grad_norm": 0.5290660262107849, "learning_rate": 4.2357894736842106e-05, "loss": 1.2196, "step": 120 }, { "epoch": 0.13542249580302182, "grad_norm": 0.5391547679901123, "learning_rate": 4.182842105263158e-05, "loss": 1.2362, "step": 121 }, { "epoch": 0.1365416899832121, "grad_norm": 0.5914385914802551, "learning_rate": 4.1298947368421053e-05, "loss": 1.1101, "step": 122 }, { "epoch": 0.13766088416340236, "grad_norm": 0.49526429176330566, "learning_rate": 4.0769473684210524e-05, "loss": 1.0053, "step": 123 }, { "epoch": 0.1387800783435926, "grad_norm": 0.5738490223884583, "learning_rate": 4.024000000000001e-05, "loss": 1.3063, "step": 124 }, { "epoch": 0.13989927252378287, "grad_norm": 0.5793554186820984, "learning_rate": 3.971052631578948e-05, "loss": 1.1082, "step": 125 }, { "epoch": 0.14101846670397314, "grad_norm": 0.5960007905960083, "learning_rate": 3.918105263157895e-05, "loss": 1.2714, "step": 126 }, { "epoch": 0.1421376608841634, "grad_norm": 0.5454027056694031, "learning_rate": 3.865157894736842e-05, "loss": 0.9059, "step": 127 }, { "epoch": 0.14325685506435368, "grad_norm": 0.5426894426345825, "learning_rate": 3.8122105263157896e-05, "loss": 0.983, "step": 128 }, { "epoch": 0.14437604924454392, "grad_norm": 0.5231617093086243, "learning_rate": 3.759263157894737e-05, "loss": 0.893, "step": 129 }, { "epoch": 0.1454952434247342, "grad_norm": 0.5522717237472534, "learning_rate": 3.7063157894736844e-05, "loss": 0.9089, "step": 130 }, { "epoch": 0.14661443760492446, "grad_norm": 0.5636381506919861, "learning_rate": 3.653368421052632e-05, "loss": 0.8846, "step": 131 }, { "epoch": 0.14773363178511473, "grad_norm": 0.6152870655059814, "learning_rate": 3.600421052631579e-05, "loss": 1.1705, "step": 132 }, { "epoch": 0.14885282596530497, "grad_norm": 0.6401809453964233, "learning_rate": 3.547473684210526e-05, "loss": 1.1054, "step": 133 }, { "epoch": 0.14997202014549524, "grad_norm": 0.6023024320602417, "learning_rate": 3.494526315789474e-05, "loss": 1.0486, "step": 134 }, { "epoch": 0.1510912143256855, "grad_norm": 0.6276597380638123, "learning_rate": 3.4415789473684216e-05, "loss": 1.0336, "step": 135 }, { "epoch": 0.15221040850587578, "grad_norm": 0.6555151343345642, "learning_rate": 3.3886315789473686e-05, "loss": 1.1, "step": 136 }, { "epoch": 0.15332960268606602, "grad_norm": 0.6589622497558594, "learning_rate": 3.3356842105263156e-05, "loss": 1.1521, "step": 137 }, { "epoch": 0.1544487968662563, "grad_norm": 0.6621150970458984, "learning_rate": 3.2827368421052634e-05, "loss": 1.1907, "step": 138 }, { "epoch": 0.15556799104644656, "grad_norm": 0.7542665004730225, "learning_rate": 3.2297894736842104e-05, "loss": 1.2919, "step": 139 }, { "epoch": 0.15668718522663683, "grad_norm": 0.6534504890441895, "learning_rate": 3.176842105263158e-05, "loss": 1.2927, "step": 140 }, { "epoch": 0.1578063794068271, "grad_norm": 0.7612457275390625, "learning_rate": 3.123894736842106e-05, "loss": 1.4274, "step": 141 }, { "epoch": 0.15892557358701734, "grad_norm": 0.7387349009513855, "learning_rate": 3.070947368421053e-05, "loss": 1.1477, "step": 142 }, { "epoch": 0.1600447677672076, "grad_norm": 0.7960942983627319, "learning_rate": 3.018e-05, "loss": 1.1789, "step": 143 }, { "epoch": 0.16116396194739788, "grad_norm": 0.8818754553794861, "learning_rate": 2.9650526315789473e-05, "loss": 1.4173, "step": 144 }, { "epoch": 0.16228315612758815, "grad_norm": 0.8571878671646118, "learning_rate": 2.912105263157895e-05, "loss": 1.3802, "step": 145 }, { "epoch": 0.1634023503077784, "grad_norm": 0.9936959147453308, "learning_rate": 2.8591578947368424e-05, "loss": 1.7175, "step": 146 }, { "epoch": 0.16452154448796866, "grad_norm": 0.9072629809379578, "learning_rate": 2.8062105263157894e-05, "loss": 1.3749, "step": 147 }, { "epoch": 0.16564073866815893, "grad_norm": 1.0150874853134155, "learning_rate": 2.753263157894737e-05, "loss": 1.5923, "step": 148 }, { "epoch": 0.1667599328483492, "grad_norm": 1.203032374382019, "learning_rate": 2.7003157894736845e-05, "loss": 1.9182, "step": 149 }, { "epoch": 0.16787912702853947, "grad_norm": 1.8546819686889648, "learning_rate": 2.6473684210526315e-05, "loss": 2.1847, "step": 150 }, { "epoch": 0.16787912702853947, "eval_loss": 1.3877595663070679, "eval_runtime": 48.2138, "eval_samples_per_second": 7.819, "eval_steps_per_second": 1.97, "step": 150 }, { "epoch": 0.1689983212087297, "grad_norm": 0.5888712406158447, "learning_rate": 2.5944210526315793e-05, "loss": 1.4846, "step": 151 }, { "epoch": 0.17011751538891998, "grad_norm": 0.6555752754211426, "learning_rate": 2.5414736842105266e-05, "loss": 2.1498, "step": 152 }, { "epoch": 0.17123670956911025, "grad_norm": 0.5300792455673218, "learning_rate": 2.4885263157894737e-05, "loss": 1.4384, "step": 153 }, { "epoch": 0.17235590374930052, "grad_norm": 0.6344078183174133, "learning_rate": 2.4355789473684214e-05, "loss": 1.8914, "step": 154 }, { "epoch": 0.17347509792949076, "grad_norm": 0.6226881742477417, "learning_rate": 2.3826315789473684e-05, "loss": 2.2678, "step": 155 }, { "epoch": 0.17459429210968103, "grad_norm": 0.5832647085189819, "learning_rate": 2.3296842105263158e-05, "loss": 1.9514, "step": 156 }, { "epoch": 0.1757134862898713, "grad_norm": 0.7049195170402527, "learning_rate": 2.2767368421052635e-05, "loss": 2.2213, "step": 157 }, { "epoch": 0.17683268047006157, "grad_norm": 0.5463729500770569, "learning_rate": 2.2237894736842105e-05, "loss": 1.7612, "step": 158 }, { "epoch": 0.1779518746502518, "grad_norm": 0.48315393924713135, "learning_rate": 2.170842105263158e-05, "loss": 1.2555, "step": 159 }, { "epoch": 0.17907106883044208, "grad_norm": 0.5512372851371765, "learning_rate": 2.1178947368421053e-05, "loss": 1.5839, "step": 160 }, { "epoch": 0.18019026301063235, "grad_norm": 0.5438306927680969, "learning_rate": 2.0649473684210527e-05, "loss": 1.6096, "step": 161 }, { "epoch": 0.18130945719082261, "grad_norm": 0.45787855982780457, "learning_rate": 2.0120000000000004e-05, "loss": 1.1068, "step": 162 }, { "epoch": 0.18242865137101288, "grad_norm": 0.4952705204486847, "learning_rate": 1.9590526315789474e-05, "loss": 1.1894, "step": 163 }, { "epoch": 0.18354784555120313, "grad_norm": 0.6114853024482727, "learning_rate": 1.9061052631578948e-05, "loss": 1.7787, "step": 164 }, { "epoch": 0.1846670397313934, "grad_norm": 0.4656667709350586, "learning_rate": 1.8531578947368422e-05, "loss": 0.971, "step": 165 }, { "epoch": 0.18578623391158366, "grad_norm": 0.6445344686508179, "learning_rate": 1.8002105263157896e-05, "loss": 1.7756, "step": 166 }, { "epoch": 0.18690542809177393, "grad_norm": 0.5453393459320068, "learning_rate": 1.747263157894737e-05, "loss": 1.2964, "step": 167 }, { "epoch": 0.18802462227196418, "grad_norm": 0.5461987853050232, "learning_rate": 1.6943157894736843e-05, "loss": 1.2804, "step": 168 }, { "epoch": 0.18914381645215445, "grad_norm": 0.5559154748916626, "learning_rate": 1.6413684210526317e-05, "loss": 1.3295, "step": 169 }, { "epoch": 0.19026301063234471, "grad_norm": 0.521136462688446, "learning_rate": 1.588421052631579e-05, "loss": 1.1399, "step": 170 }, { "epoch": 0.19138220481253498, "grad_norm": 0.5922938585281372, "learning_rate": 1.5354736842105264e-05, "loss": 1.2445, "step": 171 }, { "epoch": 0.19250139899272523, "grad_norm": 0.5375987887382507, "learning_rate": 1.4825263157894736e-05, "loss": 1.0522, "step": 172 }, { "epoch": 0.1936205931729155, "grad_norm": 0.5461782217025757, "learning_rate": 1.4295789473684212e-05, "loss": 1.1601, "step": 173 }, { "epoch": 0.19473978735310576, "grad_norm": 0.5203652381896973, "learning_rate": 1.3766315789473686e-05, "loss": 1.1082, "step": 174 }, { "epoch": 0.19585898153329603, "grad_norm": 0.707985520362854, "learning_rate": 1.3236842105263158e-05, "loss": 1.8355, "step": 175 }, { "epoch": 0.1969781757134863, "grad_norm": 0.5729698538780212, "learning_rate": 1.2707368421052633e-05, "loss": 1.0793, "step": 176 }, { "epoch": 0.19809736989367654, "grad_norm": 0.5605016350746155, "learning_rate": 1.2177894736842107e-05, "loss": 1.0786, "step": 177 }, { "epoch": 0.19921656407386681, "grad_norm": 0.495593398809433, "learning_rate": 1.1648421052631579e-05, "loss": 0.803, "step": 178 }, { "epoch": 0.20033575825405708, "grad_norm": 0.5838123559951782, "learning_rate": 1.1118947368421053e-05, "loss": 1.1495, "step": 179 }, { "epoch": 0.20145495243424735, "grad_norm": 0.5868124961853027, "learning_rate": 1.0589473684210526e-05, "loss": 1.1091, "step": 180 }, { "epoch": 0.2025741466144376, "grad_norm": 0.6182821393013, "learning_rate": 1.0060000000000002e-05, "loss": 1.2361, "step": 181 }, { "epoch": 0.20369334079462786, "grad_norm": 0.516312301158905, "learning_rate": 9.530526315789474e-06, "loss": 0.9179, "step": 182 }, { "epoch": 0.20481253497481813, "grad_norm": 0.6000572443008423, "learning_rate": 9.001052631578948e-06, "loss": 1.0504, "step": 183 }, { "epoch": 0.2059317291550084, "grad_norm": 0.632702112197876, "learning_rate": 8.471578947368422e-06, "loss": 1.2226, "step": 184 }, { "epoch": 0.20705092333519864, "grad_norm": 0.587997317314148, "learning_rate": 7.942105263157895e-06, "loss": 1.0234, "step": 185 }, { "epoch": 0.2081701175153889, "grad_norm": 0.6361390948295593, "learning_rate": 7.412631578947368e-06, "loss": 1.2129, "step": 186 }, { "epoch": 0.20928931169557918, "grad_norm": 0.5540517568588257, "learning_rate": 6.883157894736843e-06, "loss": 0.8871, "step": 187 }, { "epoch": 0.21040850587576945, "grad_norm": 0.6559872031211853, "learning_rate": 6.3536842105263166e-06, "loss": 1.1548, "step": 188 }, { "epoch": 0.21152770005595972, "grad_norm": 0.7367547750473022, "learning_rate": 5.8242105263157895e-06, "loss": 1.4792, "step": 189 }, { "epoch": 0.21264689423614996, "grad_norm": 0.6602901816368103, "learning_rate": 5.294736842105263e-06, "loss": 1.1934, "step": 190 }, { "epoch": 0.21376608841634023, "grad_norm": 0.6574941873550415, "learning_rate": 4.765263157894737e-06, "loss": 0.9984, "step": 191 }, { "epoch": 0.2148852825965305, "grad_norm": 0.6636906266212463, "learning_rate": 4.235789473684211e-06, "loss": 0.9586, "step": 192 }, { "epoch": 0.21600447677672077, "grad_norm": 0.7810147404670715, "learning_rate": 3.706315789473684e-06, "loss": 1.3688, "step": 193 }, { "epoch": 0.217123670956911, "grad_norm": 0.6754685640335083, "learning_rate": 3.1768421052631583e-06, "loss": 1.1149, "step": 194 }, { "epoch": 0.21824286513710128, "grad_norm": 0.7550037503242493, "learning_rate": 2.6473684210526316e-06, "loss": 1.172, "step": 195 }, { "epoch": 0.21936205931729155, "grad_norm": 0.8898350596427917, "learning_rate": 2.1178947368421054e-06, "loss": 1.4463, "step": 196 }, { "epoch": 0.22048125349748182, "grad_norm": 0.8928287625312805, "learning_rate": 1.5884210526315791e-06, "loss": 1.1914, "step": 197 }, { "epoch": 0.22160044767767206, "grad_norm": 1.2467180490493774, "learning_rate": 1.0589473684210527e-06, "loss": 1.2538, "step": 198 }, { "epoch": 0.22271964185786233, "grad_norm": 1.032840371131897, "learning_rate": 5.294736842105263e-07, "loss": 1.4659, "step": 199 }, { "epoch": 0.2238388360380526, "grad_norm": 1.5969254970550537, "learning_rate": 0.0, "loss": 2.3837, "step": 200 }, { "epoch": 0.2238388360380526, "eval_loss": 1.3729252815246582, "eval_runtime": 47.7822, "eval_samples_per_second": 7.89, "eval_steps_per_second": 1.988, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.48538888044544e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }