{ "best_metric": 1.2496984004974365, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.006256158405930838, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.128079202965419e-05, "grad_norm": 8.603960990905762, "learning_rate": 1.013e-05, "loss": 2.4085, "step": 1 }, { "epoch": 3.128079202965419e-05, "eval_loss": 2.1622259616851807, "eval_runtime": 1453.4897, "eval_samples_per_second": 9.261, "eval_steps_per_second": 2.316, "step": 1 }, { "epoch": 6.256158405930838e-05, "grad_norm": 9.24347972869873, "learning_rate": 2.026e-05, "loss": 2.3298, "step": 2 }, { "epoch": 9.384237608896257e-05, "grad_norm": 10.167364120483398, "learning_rate": 3.039e-05, "loss": 2.3114, "step": 3 }, { "epoch": 0.00012512316811861675, "grad_norm": 8.730388641357422, "learning_rate": 4.052e-05, "loss": 2.4593, "step": 4 }, { "epoch": 0.00015640396014827095, "grad_norm": 11.115665435791016, "learning_rate": 5.065e-05, "loss": 2.6282, "step": 5 }, { "epoch": 0.00018768475217792514, "grad_norm": 9.856693267822266, "learning_rate": 6.078e-05, "loss": 3.0035, "step": 6 }, { "epoch": 0.00021896554420757934, "grad_norm": 8.807425498962402, "learning_rate": 7.091e-05, "loss": 3.0714, "step": 7 }, { "epoch": 0.0002502463362372335, "grad_norm": 9.438488006591797, "learning_rate": 8.104e-05, "loss": 3.9226, "step": 8 }, { "epoch": 0.00028152712826688773, "grad_norm": 8.250779151916504, "learning_rate": 9.117e-05, "loss": 3.3859, "step": 9 }, { "epoch": 0.0003128079202965419, "grad_norm": 9.410152435302734, "learning_rate": 0.0001013, "loss": 3.3786, "step": 10 }, { "epoch": 0.0003440887123261961, "grad_norm": 9.531462669372559, "learning_rate": 0.00010076684210526316, "loss": 3.4903, "step": 11 }, { "epoch": 0.0003753695043558503, "grad_norm": 7.381387710571289, "learning_rate": 0.0001002336842105263, "loss": 3.4717, "step": 12 }, { "epoch": 0.00040665029638550445, "grad_norm": 8.890748023986816, "learning_rate": 9.970052631578946e-05, "loss": 2.8161, "step": 13 }, { "epoch": 0.0004379310884151587, "grad_norm": 7.502996921539307, "learning_rate": 9.916736842105263e-05, "loss": 3.6355, "step": 14 }, { "epoch": 0.00046921188044481284, "grad_norm": 8.14076042175293, "learning_rate": 9.863421052631579e-05, "loss": 2.8635, "step": 15 }, { "epoch": 0.000500492672474467, "grad_norm": 8.197126388549805, "learning_rate": 9.810105263157895e-05, "loss": 3.1866, "step": 16 }, { "epoch": 0.0005317734645041213, "grad_norm": 10.944892883300781, "learning_rate": 9.756789473684211e-05, "loss": 3.1276, "step": 17 }, { "epoch": 0.0005630542565337755, "grad_norm": 9.762085914611816, "learning_rate": 9.703473684210525e-05, "loss": 3.2915, "step": 18 }, { "epoch": 0.0005943350485634296, "grad_norm": 7.906670570373535, "learning_rate": 9.650157894736842e-05, "loss": 2.3633, "step": 19 }, { "epoch": 0.0006256158405930838, "grad_norm": 12.739398002624512, "learning_rate": 9.596842105263158e-05, "loss": 2.7844, "step": 20 }, { "epoch": 0.000656896632622738, "grad_norm": 9.012168884277344, "learning_rate": 9.543526315789474e-05, "loss": 2.5847, "step": 21 }, { "epoch": 0.0006881774246523922, "grad_norm": 6.85991096496582, "learning_rate": 9.49021052631579e-05, "loss": 2.707, "step": 22 }, { "epoch": 0.0007194582166820464, "grad_norm": 8.993796348571777, "learning_rate": 9.436894736842105e-05, "loss": 2.8682, "step": 23 }, { "epoch": 0.0007507390087117006, "grad_norm": 9.29892635345459, "learning_rate": 9.38357894736842e-05, "loss": 2.8197, "step": 24 }, { "epoch": 0.0007820198007413547, "grad_norm": 7.194595813751221, "learning_rate": 9.330263157894737e-05, "loss": 2.5708, "step": 25 }, { "epoch": 0.0008133005927710089, "grad_norm": 8.387815475463867, "learning_rate": 9.276947368421051e-05, "loss": 2.749, "step": 26 }, { "epoch": 0.0008445813848006632, "grad_norm": 6.011841773986816, "learning_rate": 9.223631578947369e-05, "loss": 2.7911, "step": 27 }, { "epoch": 0.0008758621768303174, "grad_norm": 9.871502876281738, "learning_rate": 9.170315789473684e-05, "loss": 2.7641, "step": 28 }, { "epoch": 0.0009071429688599715, "grad_norm": 5.9927191734313965, "learning_rate": 9.117e-05, "loss": 2.3526, "step": 29 }, { "epoch": 0.0009384237608896257, "grad_norm": 7.498050212860107, "learning_rate": 9.063684210526316e-05, "loss": 2.2624, "step": 30 }, { "epoch": 0.00096970455291928, "grad_norm": 9.01799201965332, "learning_rate": 9.010368421052632e-05, "loss": 2.1719, "step": 31 }, { "epoch": 0.001000985344948934, "grad_norm": 7.544229030609131, "learning_rate": 8.957052631578946e-05, "loss": 2.6878, "step": 32 }, { "epoch": 0.0010322661369785882, "grad_norm": 8.978902816772461, "learning_rate": 8.903736842105263e-05, "loss": 2.812, "step": 33 }, { "epoch": 0.0010635469290082426, "grad_norm": 6.842141628265381, "learning_rate": 8.850421052631579e-05, "loss": 2.7445, "step": 34 }, { "epoch": 0.0010948277210378967, "grad_norm": 10.93300724029541, "learning_rate": 8.797105263157895e-05, "loss": 2.7108, "step": 35 }, { "epoch": 0.001126108513067551, "grad_norm": 6.697794437408447, "learning_rate": 8.743789473684211e-05, "loss": 2.6349, "step": 36 }, { "epoch": 0.001157389305097205, "grad_norm": 6.207401752471924, "learning_rate": 8.690473684210526e-05, "loss": 2.124, "step": 37 }, { "epoch": 0.0011886700971268593, "grad_norm": 9.39468002319336, "learning_rate": 8.637157894736842e-05, "loss": 2.4196, "step": 38 }, { "epoch": 0.0012199508891565134, "grad_norm": 6.997471809387207, "learning_rate": 8.583842105263158e-05, "loss": 2.4107, "step": 39 }, { "epoch": 0.0012512316811861676, "grad_norm": 9.71256160736084, "learning_rate": 8.530526315789472e-05, "loss": 2.7412, "step": 40 }, { "epoch": 0.0012825124732158218, "grad_norm": 6.489169120788574, "learning_rate": 8.47721052631579e-05, "loss": 2.3819, "step": 41 }, { "epoch": 0.001313793265245476, "grad_norm": 6.8009748458862305, "learning_rate": 8.423894736842105e-05, "loss": 2.1071, "step": 42 }, { "epoch": 0.0013450740572751303, "grad_norm": 7.14219856262207, "learning_rate": 8.37057894736842e-05, "loss": 2.1881, "step": 43 }, { "epoch": 0.0013763548493047845, "grad_norm": 8.540878295898438, "learning_rate": 8.317263157894737e-05, "loss": 2.8623, "step": 44 }, { "epoch": 0.0014076356413344386, "grad_norm": 11.647428512573242, "learning_rate": 8.263947368421053e-05, "loss": 2.7257, "step": 45 }, { "epoch": 0.0014389164333640928, "grad_norm": 7.605653285980225, "learning_rate": 8.210631578947368e-05, "loss": 2.7827, "step": 46 }, { "epoch": 0.001470197225393747, "grad_norm": 9.34484577178955, "learning_rate": 8.157315789473684e-05, "loss": 3.075, "step": 47 }, { "epoch": 0.0015014780174234011, "grad_norm": 9.603285789489746, "learning_rate": 8.104e-05, "loss": 2.6011, "step": 48 }, { "epoch": 0.0015327588094530553, "grad_norm": 15.829987525939941, "learning_rate": 8.050684210526316e-05, "loss": 2.9921, "step": 49 }, { "epoch": 0.0015640396014827095, "grad_norm": 12.170818328857422, "learning_rate": 7.997368421052632e-05, "loss": 3.409, "step": 50 }, { "epoch": 0.0015640396014827095, "eval_loss": 1.3799381256103516, "eval_runtime": 1454.095, "eval_samples_per_second": 9.257, "eval_steps_per_second": 2.315, "step": 50 }, { "epoch": 0.0015953203935123637, "grad_norm": 3.6396615505218506, "learning_rate": 7.944052631578947e-05, "loss": 1.8916, "step": 51 }, { "epoch": 0.0016266011855420178, "grad_norm": 5.276243686676025, "learning_rate": 7.890736842105263e-05, "loss": 2.152, "step": 52 }, { "epoch": 0.0016578819775716722, "grad_norm": 3.8016412258148193, "learning_rate": 7.837421052631579e-05, "loss": 2.4937, "step": 53 }, { "epoch": 0.0016891627696013264, "grad_norm": 5.2608866691589355, "learning_rate": 7.784105263157893e-05, "loss": 2.9087, "step": 54 }, { "epoch": 0.0017204435616309805, "grad_norm": 6.502185821533203, "learning_rate": 7.730789473684211e-05, "loss": 2.9029, "step": 55 }, { "epoch": 0.0017517243536606347, "grad_norm": 8.368294715881348, "learning_rate": 7.677473684210526e-05, "loss": 3.5712, "step": 56 }, { "epoch": 0.0017830051456902889, "grad_norm": 7.264784336090088, "learning_rate": 7.624157894736842e-05, "loss": 3.641, "step": 57 }, { "epoch": 0.001814285937719943, "grad_norm": 5.740438938140869, "learning_rate": 7.570842105263158e-05, "loss": 3.4282, "step": 58 }, { "epoch": 0.0018455667297495972, "grad_norm": 8.016144752502441, "learning_rate": 7.517526315789474e-05, "loss": 2.8895, "step": 59 }, { "epoch": 0.0018768475217792514, "grad_norm": 5.884403705596924, "learning_rate": 7.464210526315789e-05, "loss": 3.003, "step": 60 }, { "epoch": 0.0019081283138089055, "grad_norm": 4.902200698852539, "learning_rate": 7.410894736842106e-05, "loss": 2.6912, "step": 61 }, { "epoch": 0.00193940910583856, "grad_norm": 5.990637302398682, "learning_rate": 7.35757894736842e-05, "loss": 3.2658, "step": 62 }, { "epoch": 0.001970689897868214, "grad_norm": 6.27999210357666, "learning_rate": 7.304263157894737e-05, "loss": 2.6262, "step": 63 }, { "epoch": 0.002001970689897868, "grad_norm": 6.293524742126465, "learning_rate": 7.250947368421053e-05, "loss": 2.9332, "step": 64 }, { "epoch": 0.0020332514819275224, "grad_norm": 5.076088905334473, "learning_rate": 7.197631578947368e-05, "loss": 2.4266, "step": 65 }, { "epoch": 0.0020645322739571764, "grad_norm": 8.73829174041748, "learning_rate": 7.144315789473684e-05, "loss": 2.8764, "step": 66 }, { "epoch": 0.0020958130659868308, "grad_norm": 6.070854663848877, "learning_rate": 7.091e-05, "loss": 2.3547, "step": 67 }, { "epoch": 0.002127093858016485, "grad_norm": 6.758303642272949, "learning_rate": 7.037684210526316e-05, "loss": 2.4258, "step": 68 }, { "epoch": 0.002158374650046139, "grad_norm": 6.710521221160889, "learning_rate": 6.984368421052632e-05, "loss": 2.8411, "step": 69 }, { "epoch": 0.0021896554420757935, "grad_norm": 5.664976119995117, "learning_rate": 6.931052631578947e-05, "loss": 2.7535, "step": 70 }, { "epoch": 0.0022209362341054474, "grad_norm": 5.253357887268066, "learning_rate": 6.877736842105263e-05, "loss": 2.4245, "step": 71 }, { "epoch": 0.002252217026135102, "grad_norm": 7.573156833648682, "learning_rate": 6.824421052631579e-05, "loss": 2.8877, "step": 72 }, { "epoch": 0.0022834978181647558, "grad_norm": 7.890182971954346, "learning_rate": 6.771105263157895e-05, "loss": 2.8596, "step": 73 }, { "epoch": 0.00231477861019441, "grad_norm": 6.137823104858398, "learning_rate": 6.71778947368421e-05, "loss": 2.5406, "step": 74 }, { "epoch": 0.002346059402224064, "grad_norm": 6.550741195678711, "learning_rate": 6.664473684210527e-05, "loss": 2.3237, "step": 75 }, { "epoch": 0.0023773401942537185, "grad_norm": 7.004899978637695, "learning_rate": 6.611157894736842e-05, "loss": 2.1531, "step": 76 }, { "epoch": 0.002408620986283373, "grad_norm": 6.756047248840332, "learning_rate": 6.557842105263158e-05, "loss": 2.5744, "step": 77 }, { "epoch": 0.002439901778313027, "grad_norm": 8.45261287689209, "learning_rate": 6.504526315789474e-05, "loss": 2.2788, "step": 78 }, { "epoch": 0.0024711825703426812, "grad_norm": 7.253743648529053, "learning_rate": 6.451210526315789e-05, "loss": 2.7263, "step": 79 }, { "epoch": 0.002502463362372335, "grad_norm": 7.247540473937988, "learning_rate": 6.397894736842105e-05, "loss": 2.0866, "step": 80 }, { "epoch": 0.0025337441544019896, "grad_norm": 6.436933994293213, "learning_rate": 6.344578947368421e-05, "loss": 2.6969, "step": 81 }, { "epoch": 0.0025650249464316435, "grad_norm": 6.152482509613037, "learning_rate": 6.291263157894737e-05, "loss": 2.7461, "step": 82 }, { "epoch": 0.002596305738461298, "grad_norm": 7.333639621734619, "learning_rate": 6.237947368421053e-05, "loss": 2.6492, "step": 83 }, { "epoch": 0.002627586530490952, "grad_norm": 5.620179176330566, "learning_rate": 6.184631578947368e-05, "loss": 2.432, "step": 84 }, { "epoch": 0.0026588673225206062, "grad_norm": 6.900599479675293, "learning_rate": 6.131315789473684e-05, "loss": 2.4198, "step": 85 }, { "epoch": 0.0026901481145502606, "grad_norm": 7.6465559005737305, "learning_rate": 6.078e-05, "loss": 2.1368, "step": 86 }, { "epoch": 0.0027214289065799146, "grad_norm": 7.627400875091553, "learning_rate": 6.024684210526315e-05, "loss": 2.3818, "step": 87 }, { "epoch": 0.002752709698609569, "grad_norm": 7.236347198486328, "learning_rate": 5.9713684210526305e-05, "loss": 2.2773, "step": 88 }, { "epoch": 0.002783990490639223, "grad_norm": 9.498893737792969, "learning_rate": 5.918052631578947e-05, "loss": 2.6348, "step": 89 }, { "epoch": 0.0028152712826688773, "grad_norm": 6.960134983062744, "learning_rate": 5.8647368421052634e-05, "loss": 2.4035, "step": 90 }, { "epoch": 0.0028465520746985312, "grad_norm": 7.996723651885986, "learning_rate": 5.811421052631579e-05, "loss": 2.4397, "step": 91 }, { "epoch": 0.0028778328667281856, "grad_norm": 8.388598442077637, "learning_rate": 5.758105263157894e-05, "loss": 2.3052, "step": 92 }, { "epoch": 0.0029091136587578396, "grad_norm": 9.838214874267578, "learning_rate": 5.70478947368421e-05, "loss": 2.1201, "step": 93 }, { "epoch": 0.002940394450787494, "grad_norm": 7.014636039733887, "learning_rate": 5.6514736842105256e-05, "loss": 1.9283, "step": 94 }, { "epoch": 0.0029716752428171483, "grad_norm": 6.765852928161621, "learning_rate": 5.5981578947368424e-05, "loss": 2.1875, "step": 95 }, { "epoch": 0.0030029560348468023, "grad_norm": 7.371057033538818, "learning_rate": 5.544842105263158e-05, "loss": 2.3463, "step": 96 }, { "epoch": 0.0030342368268764567, "grad_norm": 9.242403030395508, "learning_rate": 5.491526315789474e-05, "loss": 2.9248, "step": 97 }, { "epoch": 0.0030655176189061106, "grad_norm": 7.47991943359375, "learning_rate": 5.438210526315789e-05, "loss": 2.5732, "step": 98 }, { "epoch": 0.003096798410935765, "grad_norm": 10.116243362426758, "learning_rate": 5.384894736842105e-05, "loss": 3.2258, "step": 99 }, { "epoch": 0.003128079202965419, "grad_norm": 10.563674926757812, "learning_rate": 5.331578947368421e-05, "loss": 2.8873, "step": 100 }, { "epoch": 0.003128079202965419, "eval_loss": 1.3124351501464844, "eval_runtime": 1453.2201, "eval_samples_per_second": 9.263, "eval_steps_per_second": 2.316, "step": 100 }, { "epoch": 0.0031593599949950734, "grad_norm": 2.628980875015259, "learning_rate": 5.278263157894736e-05, "loss": 1.5805, "step": 101 }, { "epoch": 0.0031906407870247273, "grad_norm": 3.0475142002105713, "learning_rate": 5.224947368421053e-05, "loss": 2.2909, "step": 102 }, { "epoch": 0.0032219215790543817, "grad_norm": 3.8600575923919678, "learning_rate": 5.171631578947368e-05, "loss": 2.6855, "step": 103 }, { "epoch": 0.0032532023710840356, "grad_norm": 6.607161998748779, "learning_rate": 5.1183157894736844e-05, "loss": 3.4535, "step": 104 }, { "epoch": 0.00328448316311369, "grad_norm": 5.392179012298584, "learning_rate": 5.065e-05, "loss": 2.8729, "step": 105 }, { "epoch": 0.0033157639551433444, "grad_norm": 5.193509578704834, "learning_rate": 5.011684210526315e-05, "loss": 2.9775, "step": 106 }, { "epoch": 0.0033470447471729984, "grad_norm": 5.64705228805542, "learning_rate": 4.958368421052631e-05, "loss": 3.152, "step": 107 }, { "epoch": 0.0033783255392026527, "grad_norm": 6.723267555236816, "learning_rate": 4.9050526315789473e-05, "loss": 2.7698, "step": 108 }, { "epoch": 0.0034096063312323067, "grad_norm": 6.680335521697998, "learning_rate": 4.851736842105263e-05, "loss": 3.5541, "step": 109 }, { "epoch": 0.003440887123261961, "grad_norm": 5.122313976287842, "learning_rate": 4.798421052631579e-05, "loss": 2.6375, "step": 110 }, { "epoch": 0.003472167915291615, "grad_norm": 6.740786075592041, "learning_rate": 4.745105263157895e-05, "loss": 2.9484, "step": 111 }, { "epoch": 0.0035034487073212694, "grad_norm": 6.559206962585449, "learning_rate": 4.69178947368421e-05, "loss": 3.0081, "step": 112 }, { "epoch": 0.0035347294993509234, "grad_norm": 6.304297924041748, "learning_rate": 4.638473684210526e-05, "loss": 2.3097, "step": 113 }, { "epoch": 0.0035660102913805778, "grad_norm": 7.039328098297119, "learning_rate": 4.585157894736842e-05, "loss": 2.5406, "step": 114 }, { "epoch": 0.003597291083410232, "grad_norm": 6.471532344818115, "learning_rate": 4.531842105263158e-05, "loss": 2.4596, "step": 115 }, { "epoch": 0.003628571875439886, "grad_norm": 5.916082859039307, "learning_rate": 4.478526315789473e-05, "loss": 3.4104, "step": 116 }, { "epoch": 0.0036598526674695405, "grad_norm": 6.180294990539551, "learning_rate": 4.425210526315789e-05, "loss": 2.7326, "step": 117 }, { "epoch": 0.0036911334594991944, "grad_norm": 5.961937427520752, "learning_rate": 4.3718947368421054e-05, "loss": 2.6678, "step": 118 }, { "epoch": 0.003722414251528849, "grad_norm": 6.331954002380371, "learning_rate": 4.318578947368421e-05, "loss": 2.4229, "step": 119 }, { "epoch": 0.0037536950435585028, "grad_norm": 4.922019958496094, "learning_rate": 4.265263157894736e-05, "loss": 3.1542, "step": 120 }, { "epoch": 0.003784975835588157, "grad_norm": 7.65913724899292, "learning_rate": 4.211947368421052e-05, "loss": 3.0389, "step": 121 }, { "epoch": 0.003816256627617811, "grad_norm": 6.049108505249023, "learning_rate": 4.1586315789473684e-05, "loss": 2.3807, "step": 122 }, { "epoch": 0.0038475374196474655, "grad_norm": 5.221220016479492, "learning_rate": 4.105315789473684e-05, "loss": 2.1381, "step": 123 }, { "epoch": 0.00387881821167712, "grad_norm": 5.57716178894043, "learning_rate": 4.052e-05, "loss": 2.2677, "step": 124 }, { "epoch": 0.003910099003706774, "grad_norm": 7.513245582580566, "learning_rate": 3.998684210526316e-05, "loss": 2.5971, "step": 125 }, { "epoch": 0.003941379795736428, "grad_norm": 6.161846160888672, "learning_rate": 3.945368421052631e-05, "loss": 2.1936, "step": 126 }, { "epoch": 0.003972660587766083, "grad_norm": 4.7585530281066895, "learning_rate": 3.892052631578947e-05, "loss": 1.966, "step": 127 }, { "epoch": 0.004003941379795736, "grad_norm": 7.348730087280273, "learning_rate": 3.838736842105263e-05, "loss": 2.5792, "step": 128 }, { "epoch": 0.0040352221718253905, "grad_norm": 4.698526382446289, "learning_rate": 3.785421052631579e-05, "loss": 2.139, "step": 129 }, { "epoch": 0.004066502963855045, "grad_norm": 8.570194244384766, "learning_rate": 3.732105263157894e-05, "loss": 2.1969, "step": 130 }, { "epoch": 0.004097783755884699, "grad_norm": 5.913156032562256, "learning_rate": 3.67878947368421e-05, "loss": 2.7538, "step": 131 }, { "epoch": 0.004129064547914353, "grad_norm": 7.469818592071533, "learning_rate": 3.6254736842105264e-05, "loss": 2.0199, "step": 132 }, { "epoch": 0.004160345339944007, "grad_norm": 6.480471134185791, "learning_rate": 3.572157894736842e-05, "loss": 2.316, "step": 133 }, { "epoch": 0.0041916261319736615, "grad_norm": 6.787581443786621, "learning_rate": 3.518842105263158e-05, "loss": 2.2553, "step": 134 }, { "epoch": 0.004222906924003316, "grad_norm": 5.075686454772949, "learning_rate": 3.465526315789473e-05, "loss": 2.6243, "step": 135 }, { "epoch": 0.00425418771603297, "grad_norm": 7.515790939331055, "learning_rate": 3.4122105263157894e-05, "loss": 2.215, "step": 136 }, { "epoch": 0.004285468508062624, "grad_norm": 6.6318840980529785, "learning_rate": 3.358894736842105e-05, "loss": 2.2177, "step": 137 }, { "epoch": 0.004316749300092278, "grad_norm": 5.295938968658447, "learning_rate": 3.305578947368421e-05, "loss": 2.0566, "step": 138 }, { "epoch": 0.004348030092121933, "grad_norm": 6.72779655456543, "learning_rate": 3.252263157894737e-05, "loss": 2.3354, "step": 139 }, { "epoch": 0.004379310884151587, "grad_norm": 8.442112922668457, "learning_rate": 3.198947368421052e-05, "loss": 3.0371, "step": 140 }, { "epoch": 0.0044105916761812405, "grad_norm": 6.13646125793457, "learning_rate": 3.1456315789473684e-05, "loss": 2.3442, "step": 141 }, { "epoch": 0.004441872468210895, "grad_norm": 6.592789649963379, "learning_rate": 3.092315789473684e-05, "loss": 1.933, "step": 142 }, { "epoch": 0.004473153260240549, "grad_norm": 6.862227439880371, "learning_rate": 3.039e-05, "loss": 2.5313, "step": 143 }, { "epoch": 0.004504434052270204, "grad_norm": 6.412154197692871, "learning_rate": 2.9856842105263153e-05, "loss": 2.1372, "step": 144 }, { "epoch": 0.004535714844299858, "grad_norm": 9.697041511535645, "learning_rate": 2.9323684210526317e-05, "loss": 2.9306, "step": 145 }, { "epoch": 0.0045669956363295116, "grad_norm": 5.902824878692627, "learning_rate": 2.879052631578947e-05, "loss": 1.9473, "step": 146 }, { "epoch": 0.004598276428359166, "grad_norm": 7.2379889488220215, "learning_rate": 2.8257368421052628e-05, "loss": 2.64, "step": 147 }, { "epoch": 0.00462955722038882, "grad_norm": 6.2485809326171875, "learning_rate": 2.772421052631579e-05, "loss": 2.1195, "step": 148 }, { "epoch": 0.004660838012418475, "grad_norm": 8.572454452514648, "learning_rate": 2.7191052631578946e-05, "loss": 2.3414, "step": 149 }, { "epoch": 0.004692118804448128, "grad_norm": 8.547698974609375, "learning_rate": 2.6657894736842104e-05, "loss": 2.8386, "step": 150 }, { "epoch": 0.004692118804448128, "eval_loss": 1.270316481590271, "eval_runtime": 1454.4582, "eval_samples_per_second": 9.255, "eval_steps_per_second": 2.314, "step": 150 }, { "epoch": 0.004723399596477783, "grad_norm": 2.42592716217041, "learning_rate": 2.6124736842105265e-05, "loss": 2.3603, "step": 151 }, { "epoch": 0.004754680388507437, "grad_norm": 2.775956153869629, "learning_rate": 2.5591578947368422e-05, "loss": 2.0025, "step": 152 }, { "epoch": 0.004785961180537091, "grad_norm": 4.130958080291748, "learning_rate": 2.5058421052631576e-05, "loss": 2.4844, "step": 153 }, { "epoch": 0.004817241972566746, "grad_norm": 4.278428554534912, "learning_rate": 2.4525263157894737e-05, "loss": 2.6427, "step": 154 }, { "epoch": 0.004848522764596399, "grad_norm": 5.410045146942139, "learning_rate": 2.3992105263157894e-05, "loss": 2.6757, "step": 155 }, { "epoch": 0.004879803556626054, "grad_norm": 5.518387794494629, "learning_rate": 2.345894736842105e-05, "loss": 3.4032, "step": 156 }, { "epoch": 0.004911084348655708, "grad_norm": 5.258603572845459, "learning_rate": 2.292578947368421e-05, "loss": 3.3476, "step": 157 }, { "epoch": 0.0049423651406853624, "grad_norm": 4.674137115478516, "learning_rate": 2.2392631578947366e-05, "loss": 2.8687, "step": 158 }, { "epoch": 0.004973645932715016, "grad_norm": 5.20580530166626, "learning_rate": 2.1859473684210527e-05, "loss": 3.3849, "step": 159 }, { "epoch": 0.00500492672474467, "grad_norm": 5.6337361335754395, "learning_rate": 2.132631578947368e-05, "loss": 3.2894, "step": 160 }, { "epoch": 0.005036207516774325, "grad_norm": 6.477571964263916, "learning_rate": 2.0793157894736842e-05, "loss": 3.4826, "step": 161 }, { "epoch": 0.005067488308803979, "grad_norm": 5.67515754699707, "learning_rate": 2.026e-05, "loss": 3.011, "step": 162 }, { "epoch": 0.0050987691008336335, "grad_norm": 5.429590702056885, "learning_rate": 1.9726842105263157e-05, "loss": 2.9377, "step": 163 }, { "epoch": 0.005130049892863287, "grad_norm": 6.45125150680542, "learning_rate": 1.9193684210526314e-05, "loss": 2.7044, "step": 164 }, { "epoch": 0.005161330684892941, "grad_norm": 7.281853675842285, "learning_rate": 1.866052631578947e-05, "loss": 3.0561, "step": 165 }, { "epoch": 0.005192611476922596, "grad_norm": 5.888491153717041, "learning_rate": 1.8127368421052632e-05, "loss": 2.7701, "step": 166 }, { "epoch": 0.00522389226895225, "grad_norm": 6.112412452697754, "learning_rate": 1.759421052631579e-05, "loss": 3.0395, "step": 167 }, { "epoch": 0.005255173060981904, "grad_norm": 5.047494888305664, "learning_rate": 1.7061052631578947e-05, "loss": 2.5301, "step": 168 }, { "epoch": 0.005286453853011558, "grad_norm": 6.8131914138793945, "learning_rate": 1.6527894736842104e-05, "loss": 3.253, "step": 169 }, { "epoch": 0.0053177346450412125, "grad_norm": 5.729398250579834, "learning_rate": 1.599473684210526e-05, "loss": 2.606, "step": 170 }, { "epoch": 0.005349015437070867, "grad_norm": 4.419203758239746, "learning_rate": 1.546157894736842e-05, "loss": 2.3328, "step": 171 }, { "epoch": 0.005380296229100521, "grad_norm": 4.286160945892334, "learning_rate": 1.4928421052631576e-05, "loss": 2.4632, "step": 172 }, { "epoch": 0.005411577021130175, "grad_norm": 5.851687431335449, "learning_rate": 1.4395263157894735e-05, "loss": 2.5339, "step": 173 }, { "epoch": 0.005442857813159829, "grad_norm": 5.407496452331543, "learning_rate": 1.3862105263157895e-05, "loss": 2.6327, "step": 174 }, { "epoch": 0.0054741386051894835, "grad_norm": 5.361586570739746, "learning_rate": 1.3328947368421052e-05, "loss": 2.4593, "step": 175 }, { "epoch": 0.005505419397219138, "grad_norm": 4.834081172943115, "learning_rate": 1.2795789473684211e-05, "loss": 2.297, "step": 176 }, { "epoch": 0.005536700189248791, "grad_norm": 7.238943576812744, "learning_rate": 1.2262631578947368e-05, "loss": 2.4705, "step": 177 }, { "epoch": 0.005567980981278446, "grad_norm": 6.222843647003174, "learning_rate": 1.1729473684210526e-05, "loss": 2.139, "step": 178 }, { "epoch": 0.0055992617733081, "grad_norm": 5.301895618438721, "learning_rate": 1.1196315789473683e-05, "loss": 2.551, "step": 179 }, { "epoch": 0.005630542565337755, "grad_norm": 6.8632049560546875, "learning_rate": 1.066315789473684e-05, "loss": 2.5802, "step": 180 }, { "epoch": 0.005661823357367409, "grad_norm": 7.063612937927246, "learning_rate": 1.013e-05, "loss": 2.2323, "step": 181 }, { "epoch": 0.0056931041493970625, "grad_norm": 7.207292079925537, "learning_rate": 9.596842105263157e-06, "loss": 2.2983, "step": 182 }, { "epoch": 0.005724384941426717, "grad_norm": 6.4148454666137695, "learning_rate": 9.063684210526316e-06, "loss": 2.3184, "step": 183 }, { "epoch": 0.005755665733456371, "grad_norm": 5.327024936676025, "learning_rate": 8.530526315789473e-06, "loss": 2.4618, "step": 184 }, { "epoch": 0.005786946525486026, "grad_norm": 5.244446277618408, "learning_rate": 7.99736842105263e-06, "loss": 2.0683, "step": 185 }, { "epoch": 0.005818227317515679, "grad_norm": 7.9136962890625, "learning_rate": 7.464210526315788e-06, "loss": 2.5013, "step": 186 }, { "epoch": 0.0058495081095453335, "grad_norm": 5.471396446228027, "learning_rate": 6.931052631578947e-06, "loss": 1.962, "step": 187 }, { "epoch": 0.005880788901574988, "grad_norm": 7.096596717834473, "learning_rate": 6.3978947368421055e-06, "loss": 2.5139, "step": 188 }, { "epoch": 0.005912069693604642, "grad_norm": 8.217840194702148, "learning_rate": 5.864736842105263e-06, "loss": 3.1278, "step": 189 }, { "epoch": 0.005943350485634297, "grad_norm": 5.830233573913574, "learning_rate": 5.33157894736842e-06, "loss": 2.1642, "step": 190 }, { "epoch": 0.00597463127766395, "grad_norm": 5.79893159866333, "learning_rate": 4.7984210526315785e-06, "loss": 2.2242, "step": 191 }, { "epoch": 0.006005912069693605, "grad_norm": 6.665280818939209, "learning_rate": 4.265263157894737e-06, "loss": 2.3143, "step": 192 }, { "epoch": 0.006037192861723259, "grad_norm": 7.00280237197876, "learning_rate": 3.732105263157894e-06, "loss": 2.9935, "step": 193 }, { "epoch": 0.006068473653752913, "grad_norm": 8.092278480529785, "learning_rate": 3.1989473684210527e-06, "loss": 2.3867, "step": 194 }, { "epoch": 0.006099754445782567, "grad_norm": 8.693373680114746, "learning_rate": 2.66578947368421e-06, "loss": 2.6186, "step": 195 }, { "epoch": 0.006131035237812221, "grad_norm": 8.179140090942383, "learning_rate": 2.1326315789473684e-06, "loss": 2.4638, "step": 196 }, { "epoch": 0.006162316029841876, "grad_norm": 6.230535984039307, "learning_rate": 1.5994736842105264e-06, "loss": 2.2426, "step": 197 }, { "epoch": 0.00619359682187153, "grad_norm": 7.633178234100342, "learning_rate": 1.0663157894736842e-06, "loss": 2.4731, "step": 198 }, { "epoch": 0.0062248776139011835, "grad_norm": 7.5978193283081055, "learning_rate": 5.331578947368421e-07, "loss": 3.0887, "step": 199 }, { "epoch": 0.006256158405930838, "grad_norm": 7.267704963684082, "learning_rate": 0.0, "loss": 2.6678, "step": 200 }, { "epoch": 0.006256158405930838, "eval_loss": 1.2496984004974365, "eval_runtime": 1455.5271, "eval_samples_per_second": 9.248, "eval_steps_per_second": 2.313, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.22654807898194e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }