{ "best_metric": 1.9441132545471191, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.20629190304280556, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010314595152140279, "grad_norm": 0.16474565863609314, "learning_rate": 1.009e-05, "loss": 2.0835, "step": 1 }, { "epoch": 0.0010314595152140279, "eval_loss": 2.683706045150757, "eval_runtime": 10.327, "eval_samples_per_second": 39.605, "eval_steps_per_second": 9.974, "step": 1 }, { "epoch": 0.0020629190304280558, "grad_norm": 0.2366841733455658, "learning_rate": 2.018e-05, "loss": 2.1664, "step": 2 }, { "epoch": 0.0030943785456420837, "grad_norm": 0.22200703620910645, "learning_rate": 3.027e-05, "loss": 2.087, "step": 3 }, { "epoch": 0.0041258380608561115, "grad_norm": 0.25771886110305786, "learning_rate": 4.036e-05, "loss": 2.2804, "step": 4 }, { "epoch": 0.005157297576070139, "grad_norm": 0.2574929893016815, "learning_rate": 5.045e-05, "loss": 2.278, "step": 5 }, { "epoch": 0.006188757091284167, "grad_norm": 0.2646543085575104, "learning_rate": 6.054e-05, "loss": 2.2671, "step": 6 }, { "epoch": 0.007220216606498195, "grad_norm": 0.2854577898979187, "learning_rate": 7.062999999999999e-05, "loss": 2.1952, "step": 7 }, { "epoch": 0.008251676121712223, "grad_norm": 0.25475743412971497, "learning_rate": 8.072e-05, "loss": 2.2634, "step": 8 }, { "epoch": 0.009283135636926251, "grad_norm": 0.28131428360939026, "learning_rate": 9.081e-05, "loss": 2.3816, "step": 9 }, { "epoch": 0.010314595152140279, "grad_norm": 0.26251721382141113, "learning_rate": 0.0001009, "loss": 2.164, "step": 10 }, { "epoch": 0.011346054667354307, "grad_norm": 0.2883453071117401, "learning_rate": 0.00010036894736842106, "loss": 2.3787, "step": 11 }, { "epoch": 0.012377514182568335, "grad_norm": 0.3053663969039917, "learning_rate": 9.98378947368421e-05, "loss": 2.3184, "step": 12 }, { "epoch": 0.013408973697782363, "grad_norm": 0.33314481377601624, "learning_rate": 9.930684210526315e-05, "loss": 2.1834, "step": 13 }, { "epoch": 0.01444043321299639, "grad_norm": 0.3067542314529419, "learning_rate": 9.877578947368421e-05, "loss": 2.3435, "step": 14 }, { "epoch": 0.015471892728210418, "grad_norm": 0.315849244594574, "learning_rate": 9.824473684210527e-05, "loss": 2.4042, "step": 15 }, { "epoch": 0.016503352243424446, "grad_norm": 0.36983856558799744, "learning_rate": 9.771368421052632e-05, "loss": 2.2645, "step": 16 }, { "epoch": 0.017534811758638472, "grad_norm": 0.34806883335113525, "learning_rate": 9.718263157894736e-05, "loss": 2.2981, "step": 17 }, { "epoch": 0.018566271273852502, "grad_norm": 0.4065679907798767, "learning_rate": 9.665157894736842e-05, "loss": 2.464, "step": 18 }, { "epoch": 0.019597730789066528, "grad_norm": 0.36784544587135315, "learning_rate": 9.612052631578948e-05, "loss": 2.3295, "step": 19 }, { "epoch": 0.020629190304280558, "grad_norm": 0.43711021542549133, "learning_rate": 9.558947368421052e-05, "loss": 2.391, "step": 20 }, { "epoch": 0.021660649819494584, "grad_norm": 0.43961966037750244, "learning_rate": 9.505842105263159e-05, "loss": 2.2478, "step": 21 }, { "epoch": 0.022692109334708613, "grad_norm": 0.4583075940608978, "learning_rate": 9.452736842105263e-05, "loss": 2.1811, "step": 22 }, { "epoch": 0.02372356884992264, "grad_norm": 0.4827490746974945, "learning_rate": 9.399631578947368e-05, "loss": 2.3106, "step": 23 }, { "epoch": 0.02475502836513667, "grad_norm": 0.4489187002182007, "learning_rate": 9.346526315789474e-05, "loss": 2.2137, "step": 24 }, { "epoch": 0.025786487880350695, "grad_norm": 0.46533113718032837, "learning_rate": 9.293421052631578e-05, "loss": 2.2674, "step": 25 }, { "epoch": 0.026817947395564725, "grad_norm": 0.5313922762870789, "learning_rate": 9.240315789473684e-05, "loss": 2.2478, "step": 26 }, { "epoch": 0.02784940691077875, "grad_norm": 0.5256475210189819, "learning_rate": 9.18721052631579e-05, "loss": 2.3217, "step": 27 }, { "epoch": 0.02888086642599278, "grad_norm": 0.5857870578765869, "learning_rate": 9.134105263157895e-05, "loss": 2.412, "step": 28 }, { "epoch": 0.029912325941206807, "grad_norm": 0.5662283897399902, "learning_rate": 9.081e-05, "loss": 2.1491, "step": 29 }, { "epoch": 0.030943785456420837, "grad_norm": 0.6469690203666687, "learning_rate": 9.027894736842105e-05, "loss": 2.2009, "step": 30 }, { "epoch": 0.03197524497163486, "grad_norm": 0.5636357069015503, "learning_rate": 8.97478947368421e-05, "loss": 2.3953, "step": 31 }, { "epoch": 0.03300670448684889, "grad_norm": 0.5752595663070679, "learning_rate": 8.921684210526316e-05, "loss": 1.9881, "step": 32 }, { "epoch": 0.03403816400206292, "grad_norm": 0.6073081493377686, "learning_rate": 8.86857894736842e-05, "loss": 2.1195, "step": 33 }, { "epoch": 0.035069623517276945, "grad_norm": 0.6931130886077881, "learning_rate": 8.815473684210527e-05, "loss": 2.0223, "step": 34 }, { "epoch": 0.036101083032490974, "grad_norm": 0.6980631351470947, "learning_rate": 8.762368421052631e-05, "loss": 2.2645, "step": 35 }, { "epoch": 0.037132542547705004, "grad_norm": 0.6957001686096191, "learning_rate": 8.709263157894737e-05, "loss": 2.2118, "step": 36 }, { "epoch": 0.038164002062919034, "grad_norm": 0.7358052730560303, "learning_rate": 8.656157894736843e-05, "loss": 1.9432, "step": 37 }, { "epoch": 0.039195461578133056, "grad_norm": 0.8302746415138245, "learning_rate": 8.603052631578947e-05, "loss": 2.1474, "step": 38 }, { "epoch": 0.040226921093347086, "grad_norm": 0.831340491771698, "learning_rate": 8.549947368421052e-05, "loss": 2.2282, "step": 39 }, { "epoch": 0.041258380608561115, "grad_norm": 0.8497560620307922, "learning_rate": 8.496842105263158e-05, "loss": 2.3349, "step": 40 }, { "epoch": 0.042289840123775145, "grad_norm": 0.9393477439880371, "learning_rate": 8.443736842105264e-05, "loss": 2.1167, "step": 41 }, { "epoch": 0.04332129963898917, "grad_norm": 0.9122183322906494, "learning_rate": 8.390631578947369e-05, "loss": 1.9331, "step": 42 }, { "epoch": 0.0443527591542032, "grad_norm": 0.9890502691268921, "learning_rate": 8.337526315789473e-05, "loss": 2.1503, "step": 43 }, { "epoch": 0.04538421866941723, "grad_norm": 0.960683286190033, "learning_rate": 8.284421052631579e-05, "loss": 2.0222, "step": 44 }, { "epoch": 0.04641567818463126, "grad_norm": 1.041608214378357, "learning_rate": 8.231315789473685e-05, "loss": 2.5014, "step": 45 }, { "epoch": 0.04744713769984528, "grad_norm": 1.0373979806900024, "learning_rate": 8.178210526315789e-05, "loss": 2.2135, "step": 46 }, { "epoch": 0.04847859721505931, "grad_norm": 1.0630627870559692, "learning_rate": 8.125105263157894e-05, "loss": 2.1207, "step": 47 }, { "epoch": 0.04951005673027334, "grad_norm": 1.1794432401657104, "learning_rate": 8.072e-05, "loss": 1.977, "step": 48 }, { "epoch": 0.05054151624548736, "grad_norm": 1.2990717887878418, "learning_rate": 8.018894736842106e-05, "loss": 2.3018, "step": 49 }, { "epoch": 0.05157297576070139, "grad_norm": 2.5085411071777344, "learning_rate": 7.965789473684211e-05, "loss": 2.3615, "step": 50 }, { "epoch": 0.05157297576070139, "eval_loss": 2.1803929805755615, "eval_runtime": 10.3528, "eval_samples_per_second": 39.506, "eval_steps_per_second": 9.949, "step": 50 }, { "epoch": 0.05260443527591542, "grad_norm": 1.5375233888626099, "learning_rate": 7.912684210526315e-05, "loss": 2.2371, "step": 51 }, { "epoch": 0.05363589479112945, "grad_norm": 1.507387638092041, "learning_rate": 7.859578947368421e-05, "loss": 2.1881, "step": 52 }, { "epoch": 0.05466735430634347, "grad_norm": 1.2837008237838745, "learning_rate": 7.806473684210527e-05, "loss": 2.212, "step": 53 }, { "epoch": 0.0556988138215575, "grad_norm": 1.143942952156067, "learning_rate": 7.753368421052631e-05, "loss": 2.1828, "step": 54 }, { "epoch": 0.05673027333677153, "grad_norm": 0.754581868648529, "learning_rate": 7.700263157894738e-05, "loss": 2.1049, "step": 55 }, { "epoch": 0.05776173285198556, "grad_norm": 0.5540018677711487, "learning_rate": 7.647157894736842e-05, "loss": 2.0066, "step": 56 }, { "epoch": 0.058793192367199584, "grad_norm": 0.47162699699401855, "learning_rate": 7.594052631578948e-05, "loss": 1.9498, "step": 57 }, { "epoch": 0.059824651882413614, "grad_norm": 0.4960596263408661, "learning_rate": 7.540947368421053e-05, "loss": 1.9481, "step": 58 }, { "epoch": 0.060856111397627644, "grad_norm": 0.5030167698860168, "learning_rate": 7.487842105263157e-05, "loss": 2.1481, "step": 59 }, { "epoch": 0.06188757091284167, "grad_norm": 0.5442874431610107, "learning_rate": 7.434736842105263e-05, "loss": 2.0262, "step": 60 }, { "epoch": 0.0629190304280557, "grad_norm": 0.5849094986915588, "learning_rate": 7.381631578947368e-05, "loss": 2.1238, "step": 61 }, { "epoch": 0.06395048994326973, "grad_norm": 0.572014331817627, "learning_rate": 7.328526315789474e-05, "loss": 1.9832, "step": 62 }, { "epoch": 0.06498194945848375, "grad_norm": 0.5268442034721375, "learning_rate": 7.27542105263158e-05, "loss": 2.0507, "step": 63 }, { "epoch": 0.06601340897369778, "grad_norm": 0.49900418519973755, "learning_rate": 7.222315789473684e-05, "loss": 2.0956, "step": 64 }, { "epoch": 0.06704486848891181, "grad_norm": 0.6159475445747375, "learning_rate": 7.16921052631579e-05, "loss": 2.1018, "step": 65 }, { "epoch": 0.06807632800412584, "grad_norm": 0.495156466960907, "learning_rate": 7.116105263157895e-05, "loss": 2.0261, "step": 66 }, { "epoch": 0.06910778751933987, "grad_norm": 0.5375419855117798, "learning_rate": 7.062999999999999e-05, "loss": 2.0484, "step": 67 }, { "epoch": 0.07013924703455389, "grad_norm": 0.52076655626297, "learning_rate": 7.009894736842106e-05, "loss": 2.0502, "step": 68 }, { "epoch": 0.07117070654976793, "grad_norm": 0.4397340714931488, "learning_rate": 6.95678947368421e-05, "loss": 2.0678, "step": 69 }, { "epoch": 0.07220216606498195, "grad_norm": 0.4669431746006012, "learning_rate": 6.903684210526316e-05, "loss": 2.1046, "step": 70 }, { "epoch": 0.07323362558019597, "grad_norm": 0.5103170871734619, "learning_rate": 6.850578947368422e-05, "loss": 1.9856, "step": 71 }, { "epoch": 0.07426508509541001, "grad_norm": 0.47135624289512634, "learning_rate": 6.797473684210526e-05, "loss": 2.0057, "step": 72 }, { "epoch": 0.07529654461062403, "grad_norm": 0.5028303861618042, "learning_rate": 6.744368421052631e-05, "loss": 1.9072, "step": 73 }, { "epoch": 0.07632800412583807, "grad_norm": 0.5187187194824219, "learning_rate": 6.691263157894736e-05, "loss": 2.0914, "step": 74 }, { "epoch": 0.07735946364105209, "grad_norm": 0.5368776321411133, "learning_rate": 6.638157894736843e-05, "loss": 2.0192, "step": 75 }, { "epoch": 0.07839092315626611, "grad_norm": 0.5615275502204895, "learning_rate": 6.585052631578948e-05, "loss": 2.0382, "step": 76 }, { "epoch": 0.07942238267148015, "grad_norm": 0.5335748791694641, "learning_rate": 6.531947368421052e-05, "loss": 2.3065, "step": 77 }, { "epoch": 0.08045384218669417, "grad_norm": 0.5279916524887085, "learning_rate": 6.478842105263158e-05, "loss": 2.0679, "step": 78 }, { "epoch": 0.0814853017019082, "grad_norm": 0.5749956369400024, "learning_rate": 6.425736842105264e-05, "loss": 1.8294, "step": 79 }, { "epoch": 0.08251676121712223, "grad_norm": 0.5677089691162109, "learning_rate": 6.372631578947368e-05, "loss": 2.0554, "step": 80 }, { "epoch": 0.08354822073233625, "grad_norm": 0.6165634989738464, "learning_rate": 6.319526315789473e-05, "loss": 1.9838, "step": 81 }, { "epoch": 0.08457968024755029, "grad_norm": 0.6698500514030457, "learning_rate": 6.266421052631579e-05, "loss": 1.9543, "step": 82 }, { "epoch": 0.08561113976276431, "grad_norm": 0.6537290215492249, "learning_rate": 6.213315789473685e-05, "loss": 1.9906, "step": 83 }, { "epoch": 0.08664259927797834, "grad_norm": 0.7046699523925781, "learning_rate": 6.16021052631579e-05, "loss": 1.9848, "step": 84 }, { "epoch": 0.08767405879319237, "grad_norm": 0.8004752397537231, "learning_rate": 6.107105263157894e-05, "loss": 2.2463, "step": 85 }, { "epoch": 0.0887055183084064, "grad_norm": 0.7303731441497803, "learning_rate": 6.054e-05, "loss": 2.1705, "step": 86 }, { "epoch": 0.08973697782362042, "grad_norm": 0.7392880320549011, "learning_rate": 6.000894736842105e-05, "loss": 1.9809, "step": 87 }, { "epoch": 0.09076843733883445, "grad_norm": 0.8272145986557007, "learning_rate": 5.94778947368421e-05, "loss": 2.0598, "step": 88 }, { "epoch": 0.09179989685404848, "grad_norm": 0.8388079404830933, "learning_rate": 5.894684210526316e-05, "loss": 1.9314, "step": 89 }, { "epoch": 0.09283135636926251, "grad_norm": 0.8744633793830872, "learning_rate": 5.841578947368421e-05, "loss": 2.0824, "step": 90 }, { "epoch": 0.09386281588447654, "grad_norm": 0.9463280439376831, "learning_rate": 5.7884736842105265e-05, "loss": 1.7114, "step": 91 }, { "epoch": 0.09489427539969056, "grad_norm": 1.031968355178833, "learning_rate": 5.7353684210526314e-05, "loss": 2.1825, "step": 92 }, { "epoch": 0.0959257349149046, "grad_norm": 0.8773642778396606, "learning_rate": 5.6822631578947364e-05, "loss": 1.7444, "step": 93 }, { "epoch": 0.09695719443011862, "grad_norm": 1.0959527492523193, "learning_rate": 5.629157894736842e-05, "loss": 2.3406, "step": 94 }, { "epoch": 0.09798865394533264, "grad_norm": 1.0502650737762451, "learning_rate": 5.576052631578948e-05, "loss": 2.1773, "step": 95 }, { "epoch": 0.09902011346054668, "grad_norm": 1.0509074926376343, "learning_rate": 5.522947368421053e-05, "loss": 1.9535, "step": 96 }, { "epoch": 0.1000515729757607, "grad_norm": 1.4844653606414795, "learning_rate": 5.469842105263158e-05, "loss": 2.2157, "step": 97 }, { "epoch": 0.10108303249097472, "grad_norm": 1.425537347793579, "learning_rate": 5.416736842105263e-05, "loss": 2.223, "step": 98 }, { "epoch": 0.10211449200618876, "grad_norm": 1.6812279224395752, "learning_rate": 5.3636315789473685e-05, "loss": 2.0671, "step": 99 }, { "epoch": 0.10314595152140278, "grad_norm": 3.453334093093872, "learning_rate": 5.3105263157894734e-05, "loss": 2.3347, "step": 100 }, { "epoch": 0.10314595152140278, "eval_loss": 2.0188324451446533, "eval_runtime": 10.1014, "eval_samples_per_second": 40.489, "eval_steps_per_second": 10.197, "step": 100 }, { "epoch": 0.10417741103661682, "grad_norm": 0.45920103788375854, "learning_rate": 5.257421052631578e-05, "loss": 2.0059, "step": 101 }, { "epoch": 0.10520887055183084, "grad_norm": 0.5362871885299683, "learning_rate": 5.2043157894736846e-05, "loss": 1.9553, "step": 102 }, { "epoch": 0.10624033006704486, "grad_norm": 0.5611453056335449, "learning_rate": 5.1512105263157895e-05, "loss": 1.9685, "step": 103 }, { "epoch": 0.1072717895822589, "grad_norm": 0.5878576040267944, "learning_rate": 5.098105263157895e-05, "loss": 2.1462, "step": 104 }, { "epoch": 0.10830324909747292, "grad_norm": 0.5360469222068787, "learning_rate": 5.045e-05, "loss": 2.0391, "step": 105 }, { "epoch": 0.10933470861268695, "grad_norm": 0.49366244673728943, "learning_rate": 4.991894736842105e-05, "loss": 1.9675, "step": 106 }, { "epoch": 0.11036616812790098, "grad_norm": 0.5151717662811279, "learning_rate": 4.9387894736842105e-05, "loss": 2.0115, "step": 107 }, { "epoch": 0.111397627643115, "grad_norm": 0.454767644405365, "learning_rate": 4.885684210526316e-05, "loss": 1.9593, "step": 108 }, { "epoch": 0.11242908715832904, "grad_norm": 0.4281928539276123, "learning_rate": 4.832578947368421e-05, "loss": 2.1606, "step": 109 }, { "epoch": 0.11346054667354306, "grad_norm": 0.44227486848831177, "learning_rate": 4.779473684210526e-05, "loss": 1.9297, "step": 110 }, { "epoch": 0.11449200618875709, "grad_norm": 0.42532879114151, "learning_rate": 4.7263684210526315e-05, "loss": 1.9166, "step": 111 }, { "epoch": 0.11552346570397112, "grad_norm": 0.4027937650680542, "learning_rate": 4.673263157894737e-05, "loss": 1.9755, "step": 112 }, { "epoch": 0.11655492521918515, "grad_norm": 0.4236574172973633, "learning_rate": 4.620157894736842e-05, "loss": 2.044, "step": 113 }, { "epoch": 0.11758638473439917, "grad_norm": 0.4736270308494568, "learning_rate": 4.5670526315789475e-05, "loss": 2.1852, "step": 114 }, { "epoch": 0.1186178442496132, "grad_norm": 0.4548782706260681, "learning_rate": 4.5139473684210524e-05, "loss": 1.9487, "step": 115 }, { "epoch": 0.11964930376482723, "grad_norm": 0.43922939896583557, "learning_rate": 4.460842105263158e-05, "loss": 1.8432, "step": 116 }, { "epoch": 0.12068076328004126, "grad_norm": 0.46339842677116394, "learning_rate": 4.4077368421052636e-05, "loss": 1.9553, "step": 117 }, { "epoch": 0.12171222279525529, "grad_norm": 0.4777732789516449, "learning_rate": 4.3546315789473685e-05, "loss": 2.0235, "step": 118 }, { "epoch": 0.12274368231046931, "grad_norm": 0.5178118348121643, "learning_rate": 4.3015263157894734e-05, "loss": 2.1566, "step": 119 }, { "epoch": 0.12377514182568335, "grad_norm": 0.5071201324462891, "learning_rate": 4.248421052631579e-05, "loss": 2.0107, "step": 120 }, { "epoch": 0.12480660134089737, "grad_norm": 0.567829430103302, "learning_rate": 4.1953157894736846e-05, "loss": 1.9559, "step": 121 }, { "epoch": 0.1258380608561114, "grad_norm": 0.5134384036064148, "learning_rate": 4.1422105263157895e-05, "loss": 1.903, "step": 122 }, { "epoch": 0.12686952037132543, "grad_norm": 0.5110701322555542, "learning_rate": 4.0891052631578944e-05, "loss": 1.8692, "step": 123 }, { "epoch": 0.12790097988653945, "grad_norm": 0.6005079746246338, "learning_rate": 4.036e-05, "loss": 1.8922, "step": 124 }, { "epoch": 0.12893243940175347, "grad_norm": 0.5652072429656982, "learning_rate": 3.9828947368421056e-05, "loss": 1.9826, "step": 125 }, { "epoch": 0.1299638989169675, "grad_norm": 0.6075225472450256, "learning_rate": 3.9297894736842105e-05, "loss": 2.0459, "step": 126 }, { "epoch": 0.13099535843218155, "grad_norm": 0.5819191932678223, "learning_rate": 3.8766842105263154e-05, "loss": 2.0138, "step": 127 }, { "epoch": 0.13202681794739557, "grad_norm": 0.5533136129379272, "learning_rate": 3.823578947368421e-05, "loss": 1.8657, "step": 128 }, { "epoch": 0.1330582774626096, "grad_norm": 0.5914553999900818, "learning_rate": 3.7704736842105265e-05, "loss": 1.9528, "step": 129 }, { "epoch": 0.13408973697782361, "grad_norm": 0.6647089123725891, "learning_rate": 3.7173684210526315e-05, "loss": 1.8903, "step": 130 }, { "epoch": 0.13512119649303764, "grad_norm": 0.6322739124298096, "learning_rate": 3.664263157894737e-05, "loss": 1.9933, "step": 131 }, { "epoch": 0.1361526560082517, "grad_norm": 0.6538094878196716, "learning_rate": 3.611157894736842e-05, "loss": 1.9274, "step": 132 }, { "epoch": 0.1371841155234657, "grad_norm": 0.8205069899559021, "learning_rate": 3.5580526315789475e-05, "loss": 2.1654, "step": 133 }, { "epoch": 0.13821557503867973, "grad_norm": 0.6460577249526978, "learning_rate": 3.504947368421053e-05, "loss": 1.8722, "step": 134 }, { "epoch": 0.13924703455389376, "grad_norm": 0.685218334197998, "learning_rate": 3.451842105263158e-05, "loss": 1.9776, "step": 135 }, { "epoch": 0.14027849406910778, "grad_norm": 0.7692356705665588, "learning_rate": 3.398736842105263e-05, "loss": 1.8696, "step": 136 }, { "epoch": 0.14130995358432183, "grad_norm": 0.8303024172782898, "learning_rate": 3.345631578947368e-05, "loss": 2.122, "step": 137 }, { "epoch": 0.14234141309953585, "grad_norm": 0.8787012696266174, "learning_rate": 3.292526315789474e-05, "loss": 2.3107, "step": 138 }, { "epoch": 0.14337287261474987, "grad_norm": 0.7676186561584473, "learning_rate": 3.239421052631579e-05, "loss": 1.9315, "step": 139 }, { "epoch": 0.1444043321299639, "grad_norm": 0.8150516152381897, "learning_rate": 3.186315789473684e-05, "loss": 2.016, "step": 140 }, { "epoch": 0.14543579164517792, "grad_norm": 0.9549844264984131, "learning_rate": 3.1332105263157895e-05, "loss": 1.8365, "step": 141 }, { "epoch": 0.14646725116039194, "grad_norm": 0.8961436152458191, "learning_rate": 3.080105263157895e-05, "loss": 2.0348, "step": 142 }, { "epoch": 0.147498710675606, "grad_norm": 0.9285019636154175, "learning_rate": 3.027e-05, "loss": 1.9354, "step": 143 }, { "epoch": 0.14853017019082002, "grad_norm": 0.8840616345405579, "learning_rate": 2.973894736842105e-05, "loss": 2.0117, "step": 144 }, { "epoch": 0.14956162970603404, "grad_norm": 1.013698935508728, "learning_rate": 2.9207894736842105e-05, "loss": 2.0418, "step": 145 }, { "epoch": 0.15059308922124806, "grad_norm": 1.0343754291534424, "learning_rate": 2.8676842105263157e-05, "loss": 1.9242, "step": 146 }, { "epoch": 0.15162454873646208, "grad_norm": 1.0633248090744019, "learning_rate": 2.814578947368421e-05, "loss": 1.9922, "step": 147 }, { "epoch": 0.15265600825167613, "grad_norm": 1.0766022205352783, "learning_rate": 2.7614736842105266e-05, "loss": 1.9753, "step": 148 }, { "epoch": 0.15368746776689016, "grad_norm": 1.3664469718933105, "learning_rate": 2.7083684210526315e-05, "loss": 2.1614, "step": 149 }, { "epoch": 0.15471892728210418, "grad_norm": 1.9069710969924927, "learning_rate": 2.6552631578947367e-05, "loss": 1.9754, "step": 150 }, { "epoch": 0.15471892728210418, "eval_loss": 1.9592078924179077, "eval_runtime": 10.1232, "eval_samples_per_second": 40.402, "eval_steps_per_second": 10.175, "step": 150 }, { "epoch": 0.1557503867973182, "grad_norm": 0.270033597946167, "learning_rate": 2.6021578947368423e-05, "loss": 1.8414, "step": 151 }, { "epoch": 0.15678184631253222, "grad_norm": 0.26924872398376465, "learning_rate": 2.5490526315789475e-05, "loss": 1.8932, "step": 152 }, { "epoch": 0.15781330582774625, "grad_norm": 0.2842865288257599, "learning_rate": 2.4959473684210524e-05, "loss": 1.8881, "step": 153 }, { "epoch": 0.1588447653429603, "grad_norm": 0.3822712004184723, "learning_rate": 2.442842105263158e-05, "loss": 1.798, "step": 154 }, { "epoch": 0.15987622485817432, "grad_norm": 0.3421807587146759, "learning_rate": 2.389736842105263e-05, "loss": 1.9053, "step": 155 }, { "epoch": 0.16090768437338834, "grad_norm": 0.35757681727409363, "learning_rate": 2.3366315789473685e-05, "loss": 1.9624, "step": 156 }, { "epoch": 0.16193914388860237, "grad_norm": 0.3699004352092743, "learning_rate": 2.2835263157894738e-05, "loss": 1.9436, "step": 157 }, { "epoch": 0.1629706034038164, "grad_norm": 0.34937629103660583, "learning_rate": 2.230421052631579e-05, "loss": 1.9536, "step": 158 }, { "epoch": 0.16400206291903044, "grad_norm": 0.37245842814445496, "learning_rate": 2.1773157894736843e-05, "loss": 1.8913, "step": 159 }, { "epoch": 0.16503352243424446, "grad_norm": 0.4117273986339569, "learning_rate": 2.1242105263157895e-05, "loss": 2.0267, "step": 160 }, { "epoch": 0.16606498194945848, "grad_norm": 0.4274907112121582, "learning_rate": 2.0711052631578947e-05, "loss": 1.945, "step": 161 }, { "epoch": 0.1670964414646725, "grad_norm": 0.40705499053001404, "learning_rate": 2.018e-05, "loss": 1.9219, "step": 162 }, { "epoch": 0.16812790097988653, "grad_norm": 0.4329587519168854, "learning_rate": 1.9648947368421052e-05, "loss": 1.9467, "step": 163 }, { "epoch": 0.16915936049510058, "grad_norm": 0.49915069341659546, "learning_rate": 1.9117894736842105e-05, "loss": 1.7944, "step": 164 }, { "epoch": 0.1701908200103146, "grad_norm": 0.4588720202445984, "learning_rate": 1.8586842105263157e-05, "loss": 1.9091, "step": 165 }, { "epoch": 0.17122227952552863, "grad_norm": 0.46588757634162903, "learning_rate": 1.805578947368421e-05, "loss": 1.9556, "step": 166 }, { "epoch": 0.17225373904074265, "grad_norm": 0.4667753279209137, "learning_rate": 1.7524736842105266e-05, "loss": 2.1494, "step": 167 }, { "epoch": 0.17328519855595667, "grad_norm": 0.4560595154762268, "learning_rate": 1.6993684210526315e-05, "loss": 1.9755, "step": 168 }, { "epoch": 0.1743166580711707, "grad_norm": 0.5003567337989807, "learning_rate": 1.646263157894737e-05, "loss": 2.0585, "step": 169 }, { "epoch": 0.17534811758638474, "grad_norm": 0.5197983980178833, "learning_rate": 1.593157894736842e-05, "loss": 2.0608, "step": 170 }, { "epoch": 0.17637957710159877, "grad_norm": 0.5022768378257751, "learning_rate": 1.5400526315789475e-05, "loss": 1.9698, "step": 171 }, { "epoch": 0.1774110366168128, "grad_norm": 0.5685702562332153, "learning_rate": 1.4869473684210524e-05, "loss": 1.8608, "step": 172 }, { "epoch": 0.1784424961320268, "grad_norm": 0.5398536324501038, "learning_rate": 1.4338421052631579e-05, "loss": 1.8953, "step": 173 }, { "epoch": 0.17947395564724083, "grad_norm": 0.5804785490036011, "learning_rate": 1.3807368421052633e-05, "loss": 2.004, "step": 174 }, { "epoch": 0.18050541516245489, "grad_norm": 0.6018512845039368, "learning_rate": 1.3276315789473684e-05, "loss": 1.8623, "step": 175 }, { "epoch": 0.1815368746776689, "grad_norm": 0.5564123392105103, "learning_rate": 1.2745263157894738e-05, "loss": 1.8873, "step": 176 }, { "epoch": 0.18256833419288293, "grad_norm": 0.5896154046058655, "learning_rate": 1.221421052631579e-05, "loss": 1.948, "step": 177 }, { "epoch": 0.18359979370809695, "grad_norm": 0.6125632524490356, "learning_rate": 1.1683157894736843e-05, "loss": 1.9461, "step": 178 }, { "epoch": 0.18463125322331098, "grad_norm": 0.6259174346923828, "learning_rate": 1.1152105263157895e-05, "loss": 2.0347, "step": 179 }, { "epoch": 0.18566271273852503, "grad_norm": 0.6763845682144165, "learning_rate": 1.0621052631578948e-05, "loss": 1.9662, "step": 180 }, { "epoch": 0.18669417225373905, "grad_norm": 0.6786672472953796, "learning_rate": 1.009e-05, "loss": 1.7026, "step": 181 }, { "epoch": 0.18772563176895307, "grad_norm": 0.690301775932312, "learning_rate": 9.558947368421052e-06, "loss": 1.8956, "step": 182 }, { "epoch": 0.1887570912841671, "grad_norm": 0.7268269062042236, "learning_rate": 9.027894736842105e-06, "loss": 1.6767, "step": 183 }, { "epoch": 0.18978855079938112, "grad_norm": 0.7444143295288086, "learning_rate": 8.496842105263157e-06, "loss": 1.8293, "step": 184 }, { "epoch": 0.19082001031459514, "grad_norm": 0.7371475696563721, "learning_rate": 7.96578947368421e-06, "loss": 1.9327, "step": 185 }, { "epoch": 0.1918514698298092, "grad_norm": 0.7923402190208435, "learning_rate": 7.434736842105262e-06, "loss": 1.8772, "step": 186 }, { "epoch": 0.1928829293450232, "grad_norm": 0.7950240969657898, "learning_rate": 6.903684210526316e-06, "loss": 1.836, "step": 187 }, { "epoch": 0.19391438886023724, "grad_norm": 0.7987897992134094, "learning_rate": 6.372631578947369e-06, "loss": 1.9805, "step": 188 }, { "epoch": 0.19494584837545126, "grad_norm": 0.8482062816619873, "learning_rate": 5.841578947368421e-06, "loss": 1.7972, "step": 189 }, { "epoch": 0.19597730789066528, "grad_norm": 0.9001215100288391, "learning_rate": 5.310526315789474e-06, "loss": 2.2765, "step": 190 }, { "epoch": 0.19700876740587933, "grad_norm": 0.8068826198577881, "learning_rate": 4.779473684210526e-06, "loss": 1.7912, "step": 191 }, { "epoch": 0.19804022692109335, "grad_norm": 0.9369192123413086, "learning_rate": 4.248421052631579e-06, "loss": 1.8483, "step": 192 }, { "epoch": 0.19907168643630738, "grad_norm": 1.030530571937561, "learning_rate": 3.717368421052631e-06, "loss": 1.8683, "step": 193 }, { "epoch": 0.2001031459515214, "grad_norm": 1.0830963850021362, "learning_rate": 3.1863157894736844e-06, "loss": 2.1431, "step": 194 }, { "epoch": 0.20113460546673542, "grad_norm": 1.0480977296829224, "learning_rate": 2.655263157894737e-06, "loss": 1.8002, "step": 195 }, { "epoch": 0.20216606498194944, "grad_norm": 1.6289103031158447, "learning_rate": 2.1242105263157893e-06, "loss": 2.1941, "step": 196 }, { "epoch": 0.2031975244971635, "grad_norm": 1.3260751962661743, "learning_rate": 1.5931578947368422e-06, "loss": 1.8132, "step": 197 }, { "epoch": 0.20422898401237752, "grad_norm": 1.3191272020339966, "learning_rate": 1.0621052631578947e-06, "loss": 1.8345, "step": 198 }, { "epoch": 0.20526044352759154, "grad_norm": 1.8387064933776855, "learning_rate": 5.310526315789473e-07, "loss": 2.3713, "step": 199 }, { "epoch": 0.20629190304280556, "grad_norm": 3.070661783218384, "learning_rate": 0.0, "loss": 2.5448, "step": 200 }, { "epoch": 0.20629190304280556, "eval_loss": 1.9441132545471191, "eval_runtime": 10.2833, "eval_samples_per_second": 39.773, "eval_steps_per_second": 10.016, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3494542466088960.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }