{ "best_metric": 1.4315483570098877, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.14836795252225518, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000741839762611276, "grad_norm": 0.34722599387168884, "learning_rate": 1.0017e-05, "loss": 1.6873, "step": 1 }, { "epoch": 0.000741839762611276, "eval_loss": 1.8154631853103638, "eval_runtime": 24.6231, "eval_samples_per_second": 23.068, "eval_steps_per_second": 5.767, "step": 1 }, { "epoch": 0.001483679525222552, "grad_norm": 0.3773996829986572, "learning_rate": 2.0034e-05, "loss": 1.6132, "step": 2 }, { "epoch": 0.002225519287833828, "grad_norm": 0.3886512517929077, "learning_rate": 3.0050999999999997e-05, "loss": 1.7477, "step": 3 }, { "epoch": 0.002967359050445104, "grad_norm": 0.3726235628128052, "learning_rate": 4.0068e-05, "loss": 1.6803, "step": 4 }, { "epoch": 0.00370919881305638, "grad_norm": 0.41099005937576294, "learning_rate": 5.0085e-05, "loss": 1.7182, "step": 5 }, { "epoch": 0.004451038575667656, "grad_norm": 0.41989028453826904, "learning_rate": 6.0101999999999995e-05, "loss": 1.7672, "step": 6 }, { "epoch": 0.0051928783382789315, "grad_norm": 0.4218914806842804, "learning_rate": 7.0119e-05, "loss": 1.6714, "step": 7 }, { "epoch": 0.005934718100890208, "grad_norm": 0.40673041343688965, "learning_rate": 8.0136e-05, "loss": 1.7538, "step": 8 }, { "epoch": 0.0066765578635014835, "grad_norm": 0.40596580505371094, "learning_rate": 9.0153e-05, "loss": 1.6588, "step": 9 }, { "epoch": 0.00741839762611276, "grad_norm": 0.3768528997898102, "learning_rate": 0.00010017, "loss": 1.6586, "step": 10 }, { "epoch": 0.008160237388724036, "grad_norm": 0.37419360876083374, "learning_rate": 9.964278947368421e-05, "loss": 1.594, "step": 11 }, { "epoch": 0.008902077151335312, "grad_norm": 0.3967873156070709, "learning_rate": 9.911557894736841e-05, "loss": 1.6244, "step": 12 }, { "epoch": 0.009643916913946587, "grad_norm": 0.42619067430496216, "learning_rate": 9.858836842105263e-05, "loss": 1.5205, "step": 13 }, { "epoch": 0.010385756676557863, "grad_norm": 0.42936253547668457, "learning_rate": 9.806115789473684e-05, "loss": 1.5066, "step": 14 }, { "epoch": 0.01112759643916914, "grad_norm": 0.42404335737228394, "learning_rate": 9.753394736842106e-05, "loss": 1.6706, "step": 15 }, { "epoch": 0.011869436201780416, "grad_norm": 0.4178953468799591, "learning_rate": 9.700673684210526e-05, "loss": 1.6554, "step": 16 }, { "epoch": 0.012611275964391691, "grad_norm": 0.42441174387931824, "learning_rate": 9.647952631578948e-05, "loss": 1.6094, "step": 17 }, { "epoch": 0.013353115727002967, "grad_norm": 0.4085826277732849, "learning_rate": 9.595231578947368e-05, "loss": 1.8246, "step": 18 }, { "epoch": 0.014094955489614243, "grad_norm": 0.4366580843925476, "learning_rate": 9.542510526315789e-05, "loss": 1.7384, "step": 19 }, { "epoch": 0.01483679525222552, "grad_norm": 0.4653952419757843, "learning_rate": 9.48978947368421e-05, "loss": 1.8026, "step": 20 }, { "epoch": 0.015578635014836795, "grad_norm": 0.41364946961402893, "learning_rate": 9.437068421052632e-05, "loss": 1.4229, "step": 21 }, { "epoch": 0.016320474777448073, "grad_norm": 0.4450036883354187, "learning_rate": 9.384347368421052e-05, "loss": 1.5319, "step": 22 }, { "epoch": 0.017062314540059347, "grad_norm": 0.4304330348968506, "learning_rate": 9.331626315789474e-05, "loss": 1.5087, "step": 23 }, { "epoch": 0.017804154302670624, "grad_norm": 0.4402780830860138, "learning_rate": 9.278905263157894e-05, "loss": 1.527, "step": 24 }, { "epoch": 0.018545994065281898, "grad_norm": 0.5134778618812561, "learning_rate": 9.226184210526316e-05, "loss": 1.7225, "step": 25 }, { "epoch": 0.019287833827893175, "grad_norm": 0.44076353311538696, "learning_rate": 9.173463157894736e-05, "loss": 1.448, "step": 26 }, { "epoch": 0.020029673590504452, "grad_norm": 0.48980656266212463, "learning_rate": 9.120742105263159e-05, "loss": 1.6529, "step": 27 }, { "epoch": 0.020771513353115726, "grad_norm": 0.46377643942832947, "learning_rate": 9.068021052631579e-05, "loss": 1.4986, "step": 28 }, { "epoch": 0.021513353115727003, "grad_norm": 0.472007155418396, "learning_rate": 9.0153e-05, "loss": 1.7628, "step": 29 }, { "epoch": 0.02225519287833828, "grad_norm": 0.45802855491638184, "learning_rate": 8.96257894736842e-05, "loss": 1.6237, "step": 30 }, { "epoch": 0.022997032640949554, "grad_norm": 0.46878162026405334, "learning_rate": 8.909857894736842e-05, "loss": 1.6249, "step": 31 }, { "epoch": 0.02373887240356083, "grad_norm": 0.47090795636177063, "learning_rate": 8.857136842105263e-05, "loss": 1.5893, "step": 32 }, { "epoch": 0.024480712166172106, "grad_norm": 0.5051029920578003, "learning_rate": 8.804415789473684e-05, "loss": 1.5688, "step": 33 }, { "epoch": 0.025222551928783383, "grad_norm": 0.4550938606262207, "learning_rate": 8.751694736842105e-05, "loss": 1.4056, "step": 34 }, { "epoch": 0.02596439169139466, "grad_norm": 0.540347158908844, "learning_rate": 8.698973684210527e-05, "loss": 1.658, "step": 35 }, { "epoch": 0.026706231454005934, "grad_norm": 0.4885530173778534, "learning_rate": 8.646252631578948e-05, "loss": 1.4331, "step": 36 }, { "epoch": 0.02744807121661721, "grad_norm": 0.49800577759742737, "learning_rate": 8.593531578947368e-05, "loss": 1.3833, "step": 37 }, { "epoch": 0.028189910979228485, "grad_norm": 0.5188278555870056, "learning_rate": 8.54081052631579e-05, "loss": 1.5612, "step": 38 }, { "epoch": 0.028931750741839762, "grad_norm": 0.5572861433029175, "learning_rate": 8.48808947368421e-05, "loss": 1.6761, "step": 39 }, { "epoch": 0.02967359050445104, "grad_norm": 0.5264113545417786, "learning_rate": 8.435368421052631e-05, "loss": 1.3864, "step": 40 }, { "epoch": 0.030415430267062313, "grad_norm": 0.5517453551292419, "learning_rate": 8.382647368421053e-05, "loss": 1.5269, "step": 41 }, { "epoch": 0.03115727002967359, "grad_norm": 0.5187894105911255, "learning_rate": 8.329926315789474e-05, "loss": 1.4188, "step": 42 }, { "epoch": 0.031899109792284865, "grad_norm": 0.5830979943275452, "learning_rate": 8.277205263157894e-05, "loss": 1.7822, "step": 43 }, { "epoch": 0.032640949554896145, "grad_norm": 0.5921754837036133, "learning_rate": 8.224484210526316e-05, "loss": 1.5062, "step": 44 }, { "epoch": 0.03338278931750742, "grad_norm": 0.5693333148956299, "learning_rate": 8.171763157894736e-05, "loss": 1.5407, "step": 45 }, { "epoch": 0.03412462908011869, "grad_norm": 0.6015297770500183, "learning_rate": 8.119042105263158e-05, "loss": 1.5718, "step": 46 }, { "epoch": 0.034866468842729974, "grad_norm": 0.6420146226882935, "learning_rate": 8.066321052631578e-05, "loss": 1.6665, "step": 47 }, { "epoch": 0.03560830860534125, "grad_norm": 0.6055085062980652, "learning_rate": 8.0136e-05, "loss": 1.5059, "step": 48 }, { "epoch": 0.03635014836795252, "grad_norm": 0.7061499953269958, "learning_rate": 7.960878947368421e-05, "loss": 1.8428, "step": 49 }, { "epoch": 0.037091988130563795, "grad_norm": 0.7242527604103088, "learning_rate": 7.908157894736842e-05, "loss": 1.5827, "step": 50 }, { "epoch": 0.037091988130563795, "eval_loss": 1.5464621782302856, "eval_runtime": 24.5932, "eval_samples_per_second": 23.096, "eval_steps_per_second": 5.774, "step": 50 }, { "epoch": 0.037833827893175076, "grad_norm": 0.4841320216655731, "learning_rate": 7.855436842105262e-05, "loss": 1.4443, "step": 51 }, { "epoch": 0.03857566765578635, "grad_norm": 0.46595099568367004, "learning_rate": 7.802715789473684e-05, "loss": 1.4447, "step": 52 }, { "epoch": 0.039317507418397624, "grad_norm": 0.4743313491344452, "learning_rate": 7.749994736842104e-05, "loss": 1.4073, "step": 53 }, { "epoch": 0.040059347181008904, "grad_norm": 0.46219533681869507, "learning_rate": 7.697273684210526e-05, "loss": 1.4843, "step": 54 }, { "epoch": 0.04080118694362018, "grad_norm": 0.4498580992221832, "learning_rate": 7.644552631578947e-05, "loss": 1.3828, "step": 55 }, { "epoch": 0.04154302670623145, "grad_norm": 0.4310692250728607, "learning_rate": 7.591831578947369e-05, "loss": 1.5251, "step": 56 }, { "epoch": 0.04228486646884273, "grad_norm": 0.44536563754081726, "learning_rate": 7.539110526315789e-05, "loss": 1.5329, "step": 57 }, { "epoch": 0.04302670623145401, "grad_norm": 0.4388580620288849, "learning_rate": 7.48638947368421e-05, "loss": 1.4534, "step": 58 }, { "epoch": 0.04376854599406528, "grad_norm": 0.44389694929122925, "learning_rate": 7.433668421052632e-05, "loss": 1.6085, "step": 59 }, { "epoch": 0.04451038575667656, "grad_norm": 0.42019519209861755, "learning_rate": 7.380947368421052e-05, "loss": 1.3532, "step": 60 }, { "epoch": 0.045252225519287835, "grad_norm": 0.4290321469306946, "learning_rate": 7.328226315789473e-05, "loss": 1.4509, "step": 61 }, { "epoch": 0.04599406528189911, "grad_norm": 0.44131532311439514, "learning_rate": 7.275505263157895e-05, "loss": 1.4341, "step": 62 }, { "epoch": 0.04673590504451038, "grad_norm": 0.4584609866142273, "learning_rate": 7.222784210526316e-05, "loss": 1.4711, "step": 63 }, { "epoch": 0.04747774480712166, "grad_norm": 0.45380136370658875, "learning_rate": 7.170063157894737e-05, "loss": 1.5905, "step": 64 }, { "epoch": 0.04821958456973294, "grad_norm": 0.44095584750175476, "learning_rate": 7.117342105263158e-05, "loss": 1.3695, "step": 65 }, { "epoch": 0.04896142433234421, "grad_norm": 0.43939009308815, "learning_rate": 7.064621052631578e-05, "loss": 1.4743, "step": 66 }, { "epoch": 0.04970326409495549, "grad_norm": 0.44385743141174316, "learning_rate": 7.0119e-05, "loss": 1.3362, "step": 67 }, { "epoch": 0.050445103857566766, "grad_norm": 0.46653878688812256, "learning_rate": 6.959178947368421e-05, "loss": 1.353, "step": 68 }, { "epoch": 0.05118694362017804, "grad_norm": 0.49922919273376465, "learning_rate": 6.906457894736843e-05, "loss": 1.5815, "step": 69 }, { "epoch": 0.05192878338278932, "grad_norm": 0.48967719078063965, "learning_rate": 6.853736842105263e-05, "loss": 1.6117, "step": 70 }, { "epoch": 0.052670623145400594, "grad_norm": 0.5269822478294373, "learning_rate": 6.801015789473684e-05, "loss": 1.7096, "step": 71 }, { "epoch": 0.05341246290801187, "grad_norm": 0.5190030932426453, "learning_rate": 6.748294736842105e-05, "loss": 1.5746, "step": 72 }, { "epoch": 0.05415430267062315, "grad_norm": 0.5140092372894287, "learning_rate": 6.695573684210526e-05, "loss": 1.5915, "step": 73 }, { "epoch": 0.05489614243323442, "grad_norm": 0.5112202167510986, "learning_rate": 6.642852631578946e-05, "loss": 1.5368, "step": 74 }, { "epoch": 0.055637982195845696, "grad_norm": 0.45904073119163513, "learning_rate": 6.590131578947369e-05, "loss": 1.3563, "step": 75 }, { "epoch": 0.05637982195845697, "grad_norm": 0.4861205816268921, "learning_rate": 6.537410526315789e-05, "loss": 1.4552, "step": 76 }, { "epoch": 0.05712166172106825, "grad_norm": 0.5068615674972534, "learning_rate": 6.484689473684211e-05, "loss": 1.4687, "step": 77 }, { "epoch": 0.057863501483679525, "grad_norm": 0.49406206607818604, "learning_rate": 6.431968421052631e-05, "loss": 1.4759, "step": 78 }, { "epoch": 0.0586053412462908, "grad_norm": 0.4920863211154938, "learning_rate": 6.379247368421052e-05, "loss": 1.4521, "step": 79 }, { "epoch": 0.05934718100890208, "grad_norm": 0.5270341038703918, "learning_rate": 6.326526315789474e-05, "loss": 1.6008, "step": 80 }, { "epoch": 0.06008902077151335, "grad_norm": 0.5248571038246155, "learning_rate": 6.273805263157894e-05, "loss": 1.6934, "step": 81 }, { "epoch": 0.06083086053412463, "grad_norm": 0.519171416759491, "learning_rate": 6.221084210526315e-05, "loss": 1.5087, "step": 82 }, { "epoch": 0.06157270029673591, "grad_norm": 0.4932954013347626, "learning_rate": 6.168363157894737e-05, "loss": 1.3543, "step": 83 }, { "epoch": 0.06231454005934718, "grad_norm": 0.5216118097305298, "learning_rate": 6.115642105263159e-05, "loss": 1.4453, "step": 84 }, { "epoch": 0.06305637982195846, "grad_norm": 0.5293753147125244, "learning_rate": 6.0629210526315787e-05, "loss": 1.4051, "step": 85 }, { "epoch": 0.06379821958456973, "grad_norm": 0.5512006878852844, "learning_rate": 6.0101999999999995e-05, "loss": 1.4588, "step": 86 }, { "epoch": 0.064540059347181, "grad_norm": 0.5609034895896912, "learning_rate": 5.95747894736842e-05, "loss": 1.5523, "step": 87 }, { "epoch": 0.06528189910979229, "grad_norm": 0.6228796243667603, "learning_rate": 5.904757894736841e-05, "loss": 1.563, "step": 88 }, { "epoch": 0.06602373887240356, "grad_norm": 0.6174453496932983, "learning_rate": 5.852036842105263e-05, "loss": 1.6785, "step": 89 }, { "epoch": 0.06676557863501484, "grad_norm": 0.560930609703064, "learning_rate": 5.799315789473684e-05, "loss": 1.5602, "step": 90 }, { "epoch": 0.06750741839762611, "grad_norm": 0.5620979070663452, "learning_rate": 5.746594736842105e-05, "loss": 1.4652, "step": 91 }, { "epoch": 0.06824925816023739, "grad_norm": 0.5614945888519287, "learning_rate": 5.693873684210526e-05, "loss": 1.4533, "step": 92 }, { "epoch": 0.06899109792284866, "grad_norm": 0.6170912384986877, "learning_rate": 5.641152631578947e-05, "loss": 1.5405, "step": 93 }, { "epoch": 0.06973293768545995, "grad_norm": 0.6051465272903442, "learning_rate": 5.588431578947368e-05, "loss": 1.3788, "step": 94 }, { "epoch": 0.07047477744807122, "grad_norm": 0.612392008304596, "learning_rate": 5.5357105263157896e-05, "loss": 1.513, "step": 95 }, { "epoch": 0.0712166172106825, "grad_norm": 0.5922538638114929, "learning_rate": 5.482989473684211e-05, "loss": 1.6693, "step": 96 }, { "epoch": 0.07195845697329377, "grad_norm": 0.6305238604545593, "learning_rate": 5.430268421052632e-05, "loss": 1.5546, "step": 97 }, { "epoch": 0.07270029673590504, "grad_norm": 0.6214465498924255, "learning_rate": 5.377547368421053e-05, "loss": 1.4541, "step": 98 }, { "epoch": 0.07344213649851632, "grad_norm": 0.6209021806716919, "learning_rate": 5.3248263157894736e-05, "loss": 1.4974, "step": 99 }, { "epoch": 0.07418397626112759, "grad_norm": 0.7504584193229675, "learning_rate": 5.2721052631578944e-05, "loss": 1.7205, "step": 100 }, { "epoch": 0.07418397626112759, "eval_loss": 1.4849724769592285, "eval_runtime": 24.627, "eval_samples_per_second": 23.064, "eval_steps_per_second": 5.766, "step": 100 }, { "epoch": 0.07492581602373888, "grad_norm": 0.4530433416366577, "learning_rate": 5.219384210526315e-05, "loss": 1.514, "step": 101 }, { "epoch": 0.07566765578635015, "grad_norm": 0.4724753201007843, "learning_rate": 5.1666631578947374e-05, "loss": 1.4021, "step": 102 }, { "epoch": 0.07640949554896143, "grad_norm": 0.47871169447898865, "learning_rate": 5.113942105263158e-05, "loss": 1.5589, "step": 103 }, { "epoch": 0.0771513353115727, "grad_norm": 0.45524775981903076, "learning_rate": 5.061221052631579e-05, "loss": 1.3405, "step": 104 }, { "epoch": 0.07789317507418397, "grad_norm": 0.4814228415489197, "learning_rate": 5.0085e-05, "loss": 1.4676, "step": 105 }, { "epoch": 0.07863501483679525, "grad_norm": 0.4835189878940582, "learning_rate": 4.955778947368421e-05, "loss": 1.4522, "step": 106 }, { "epoch": 0.07937685459940653, "grad_norm": 0.4658392071723938, "learning_rate": 4.903057894736842e-05, "loss": 1.3801, "step": 107 }, { "epoch": 0.08011869436201781, "grad_norm": 0.47330623865127563, "learning_rate": 4.850336842105263e-05, "loss": 1.4066, "step": 108 }, { "epoch": 0.08086053412462908, "grad_norm": 0.46363088488578796, "learning_rate": 4.797615789473684e-05, "loss": 1.3805, "step": 109 }, { "epoch": 0.08160237388724036, "grad_norm": 0.4817906618118286, "learning_rate": 4.744894736842105e-05, "loss": 1.4796, "step": 110 }, { "epoch": 0.08234421364985163, "grad_norm": 0.4742972254753113, "learning_rate": 4.692173684210526e-05, "loss": 1.4028, "step": 111 }, { "epoch": 0.0830860534124629, "grad_norm": 0.4543997049331665, "learning_rate": 4.639452631578947e-05, "loss": 1.3328, "step": 112 }, { "epoch": 0.08382789317507418, "grad_norm": 0.4936276376247406, "learning_rate": 4.586731578947368e-05, "loss": 1.5478, "step": 113 }, { "epoch": 0.08456973293768547, "grad_norm": 0.48722100257873535, "learning_rate": 4.5340105263157894e-05, "loss": 1.3693, "step": 114 }, { "epoch": 0.08531157270029674, "grad_norm": 0.4936085641384125, "learning_rate": 4.48128947368421e-05, "loss": 1.4177, "step": 115 }, { "epoch": 0.08605341246290801, "grad_norm": 0.5257976055145264, "learning_rate": 4.428568421052632e-05, "loss": 1.4234, "step": 116 }, { "epoch": 0.08679525222551929, "grad_norm": 0.5145537853240967, "learning_rate": 4.3758473684210525e-05, "loss": 1.4445, "step": 117 }, { "epoch": 0.08753709198813056, "grad_norm": 0.5173904895782471, "learning_rate": 4.323126315789474e-05, "loss": 1.3109, "step": 118 }, { "epoch": 0.08827893175074183, "grad_norm": 0.5231949090957642, "learning_rate": 4.270405263157895e-05, "loss": 1.4202, "step": 119 }, { "epoch": 0.08902077151335312, "grad_norm": 0.5481486320495605, "learning_rate": 4.217684210526316e-05, "loss": 1.6027, "step": 120 }, { "epoch": 0.0897626112759644, "grad_norm": 0.5185007452964783, "learning_rate": 4.164963157894737e-05, "loss": 1.4227, "step": 121 }, { "epoch": 0.09050445103857567, "grad_norm": 0.5656704306602478, "learning_rate": 4.112242105263158e-05, "loss": 1.7125, "step": 122 }, { "epoch": 0.09124629080118694, "grad_norm": 0.5431631207466125, "learning_rate": 4.059521052631579e-05, "loss": 1.5132, "step": 123 }, { "epoch": 0.09198813056379822, "grad_norm": 0.5578521490097046, "learning_rate": 4.0068e-05, "loss": 1.4467, "step": 124 }, { "epoch": 0.09272997032640949, "grad_norm": 0.535007119178772, "learning_rate": 3.954078947368421e-05, "loss": 1.2726, "step": 125 }, { "epoch": 0.09347181008902077, "grad_norm": 0.5330731272697449, "learning_rate": 3.901357894736842e-05, "loss": 1.4208, "step": 126 }, { "epoch": 0.09421364985163205, "grad_norm": 0.567901611328125, "learning_rate": 3.848636842105263e-05, "loss": 1.3921, "step": 127 }, { "epoch": 0.09495548961424333, "grad_norm": 0.5680362582206726, "learning_rate": 3.795915789473684e-05, "loss": 1.3536, "step": 128 }, { "epoch": 0.0956973293768546, "grad_norm": 0.5580912828445435, "learning_rate": 3.743194736842105e-05, "loss": 1.3825, "step": 129 }, { "epoch": 0.09643916913946587, "grad_norm": 0.5955318808555603, "learning_rate": 3.690473684210526e-05, "loss": 1.5347, "step": 130 }, { "epoch": 0.09718100890207715, "grad_norm": 0.5580132007598877, "learning_rate": 3.6377526315789475e-05, "loss": 1.4045, "step": 131 }, { "epoch": 0.09792284866468842, "grad_norm": 0.6038758158683777, "learning_rate": 3.585031578947368e-05, "loss": 1.4749, "step": 132 }, { "epoch": 0.09866468842729971, "grad_norm": 0.5841488242149353, "learning_rate": 3.532310526315789e-05, "loss": 1.4079, "step": 133 }, { "epoch": 0.09940652818991098, "grad_norm": 0.5694416165351868, "learning_rate": 3.4795894736842106e-05, "loss": 1.483, "step": 134 }, { "epoch": 0.10014836795252226, "grad_norm": 0.6277908682823181, "learning_rate": 3.4268684210526314e-05, "loss": 1.42, "step": 135 }, { "epoch": 0.10089020771513353, "grad_norm": 0.6425288319587708, "learning_rate": 3.374147368421052e-05, "loss": 1.5528, "step": 136 }, { "epoch": 0.1016320474777448, "grad_norm": 0.5806187391281128, "learning_rate": 3.321426315789473e-05, "loss": 1.5093, "step": 137 }, { "epoch": 0.10237388724035608, "grad_norm": 0.6284953355789185, "learning_rate": 3.2687052631578946e-05, "loss": 1.3592, "step": 138 }, { "epoch": 0.10311572700296735, "grad_norm": 0.6375062465667725, "learning_rate": 3.2159842105263154e-05, "loss": 1.281, "step": 139 }, { "epoch": 0.10385756676557864, "grad_norm": 0.5946156978607178, "learning_rate": 3.163263157894737e-05, "loss": 1.2957, "step": 140 }, { "epoch": 0.10459940652818991, "grad_norm": 0.6250626444816589, "learning_rate": 3.110542105263158e-05, "loss": 1.455, "step": 141 }, { "epoch": 0.10534124629080119, "grad_norm": 0.6201031804084778, "learning_rate": 3.057821052631579e-05, "loss": 1.5049, "step": 142 }, { "epoch": 0.10608308605341246, "grad_norm": 0.6878780126571655, "learning_rate": 3.0050999999999997e-05, "loss": 1.4236, "step": 143 }, { "epoch": 0.10682492581602374, "grad_norm": 0.6680196523666382, "learning_rate": 2.9523789473684206e-05, "loss": 1.4347, "step": 144 }, { "epoch": 0.10756676557863501, "grad_norm": 0.7300444841384888, "learning_rate": 2.899657894736842e-05, "loss": 1.7655, "step": 145 }, { "epoch": 0.1083086053412463, "grad_norm": 0.7111066579818726, "learning_rate": 2.846936842105263e-05, "loss": 1.6347, "step": 146 }, { "epoch": 0.10905044510385757, "grad_norm": 0.663420557975769, "learning_rate": 2.794215789473684e-05, "loss": 1.345, "step": 147 }, { "epoch": 0.10979228486646884, "grad_norm": 0.7585017681121826, "learning_rate": 2.7414947368421056e-05, "loss": 1.4643, "step": 148 }, { "epoch": 0.11053412462908012, "grad_norm": 0.7578019499778748, "learning_rate": 2.6887736842105264e-05, "loss": 1.6194, "step": 149 }, { "epoch": 0.11127596439169139, "grad_norm": 0.8730563521385193, "learning_rate": 2.6360526315789472e-05, "loss": 1.6065, "step": 150 }, { "epoch": 0.11127596439169139, "eval_loss": 1.4480029344558716, "eval_runtime": 24.6337, "eval_samples_per_second": 23.058, "eval_steps_per_second": 5.764, "step": 150 }, { "epoch": 0.11201780415430267, "grad_norm": 0.4206100106239319, "learning_rate": 2.5833315789473687e-05, "loss": 1.1387, "step": 151 }, { "epoch": 0.11275964391691394, "grad_norm": 0.4989394247531891, "learning_rate": 2.5306105263157895e-05, "loss": 1.4917, "step": 152 }, { "epoch": 0.11350148367952523, "grad_norm": 0.5074396729469299, "learning_rate": 2.4778894736842104e-05, "loss": 1.686, "step": 153 }, { "epoch": 0.1142433234421365, "grad_norm": 0.5123466849327087, "learning_rate": 2.4251684210526315e-05, "loss": 1.3703, "step": 154 }, { "epoch": 0.11498516320474778, "grad_norm": 0.5191718935966492, "learning_rate": 2.3724473684210524e-05, "loss": 1.4312, "step": 155 }, { "epoch": 0.11572700296735905, "grad_norm": 0.47905129194259644, "learning_rate": 2.3197263157894735e-05, "loss": 1.4236, "step": 156 }, { "epoch": 0.11646884272997032, "grad_norm": 0.4941026270389557, "learning_rate": 2.2670052631578947e-05, "loss": 1.2631, "step": 157 }, { "epoch": 0.1172106824925816, "grad_norm": 0.5065926909446716, "learning_rate": 2.214284210526316e-05, "loss": 1.4134, "step": 158 }, { "epoch": 0.11795252225519288, "grad_norm": 0.5101008415222168, "learning_rate": 2.161563157894737e-05, "loss": 1.3647, "step": 159 }, { "epoch": 0.11869436201780416, "grad_norm": 0.5099448561668396, "learning_rate": 2.108842105263158e-05, "loss": 1.3093, "step": 160 }, { "epoch": 0.11943620178041543, "grad_norm": 0.5270506143569946, "learning_rate": 2.056121052631579e-05, "loss": 1.4776, "step": 161 }, { "epoch": 0.1201780415430267, "grad_norm": 0.519027829170227, "learning_rate": 2.0034e-05, "loss": 1.4011, "step": 162 }, { "epoch": 0.12091988130563798, "grad_norm": 0.520959734916687, "learning_rate": 1.950678947368421e-05, "loss": 1.3618, "step": 163 }, { "epoch": 0.12166172106824925, "grad_norm": 0.5643500685691833, "learning_rate": 1.897957894736842e-05, "loss": 1.5463, "step": 164 }, { "epoch": 0.12240356083086053, "grad_norm": 0.5543540716171265, "learning_rate": 1.845236842105263e-05, "loss": 1.3988, "step": 165 }, { "epoch": 0.12314540059347182, "grad_norm": 0.5814954042434692, "learning_rate": 1.792515789473684e-05, "loss": 1.5149, "step": 166 }, { "epoch": 0.12388724035608309, "grad_norm": 0.5395814180374146, "learning_rate": 1.7397947368421053e-05, "loss": 1.4446, "step": 167 }, { "epoch": 0.12462908011869436, "grad_norm": 0.5087323784828186, "learning_rate": 1.687073684210526e-05, "loss": 1.3358, "step": 168 }, { "epoch": 0.12537091988130564, "grad_norm": 0.5564747452735901, "learning_rate": 1.6343526315789473e-05, "loss": 1.416, "step": 169 }, { "epoch": 0.1261127596439169, "grad_norm": 0.5298991799354553, "learning_rate": 1.5816315789473685e-05, "loss": 1.3435, "step": 170 }, { "epoch": 0.12685459940652818, "grad_norm": 0.554205060005188, "learning_rate": 1.5289105263157896e-05, "loss": 1.4239, "step": 171 }, { "epoch": 0.12759643916913946, "grad_norm": 0.5437472462654114, "learning_rate": 1.4761894736842103e-05, "loss": 1.5082, "step": 172 }, { "epoch": 0.12833827893175073, "grad_norm": 0.5654957294464111, "learning_rate": 1.4234684210526314e-05, "loss": 1.4074, "step": 173 }, { "epoch": 0.129080118694362, "grad_norm": 0.5292185544967651, "learning_rate": 1.3707473684210528e-05, "loss": 1.4173, "step": 174 }, { "epoch": 0.1298219584569733, "grad_norm": 0.5802279114723206, "learning_rate": 1.3180263157894736e-05, "loss": 1.4608, "step": 175 }, { "epoch": 0.13056379821958458, "grad_norm": 0.5980931520462036, "learning_rate": 1.2653052631578948e-05, "loss": 1.6759, "step": 176 }, { "epoch": 0.13130563798219586, "grad_norm": 0.5813962817192078, "learning_rate": 1.2125842105263158e-05, "loss": 1.4882, "step": 177 }, { "epoch": 0.13204747774480713, "grad_norm": 0.6063507199287415, "learning_rate": 1.1598631578947368e-05, "loss": 1.518, "step": 178 }, { "epoch": 0.1327893175074184, "grad_norm": 0.5826941132545471, "learning_rate": 1.107142105263158e-05, "loss": 1.4704, "step": 179 }, { "epoch": 0.13353115727002968, "grad_norm": 0.6097956895828247, "learning_rate": 1.054421052631579e-05, "loss": 1.4025, "step": 180 }, { "epoch": 0.13427299703264095, "grad_norm": 0.6548146605491638, "learning_rate": 1.0017e-05, "loss": 1.548, "step": 181 }, { "epoch": 0.13501483679525222, "grad_norm": 0.663008987903595, "learning_rate": 9.48978947368421e-06, "loss": 1.764, "step": 182 }, { "epoch": 0.1357566765578635, "grad_norm": 0.5668148994445801, "learning_rate": 8.96257894736842e-06, "loss": 1.351, "step": 183 }, { "epoch": 0.13649851632047477, "grad_norm": 0.6421298384666443, "learning_rate": 8.43536842105263e-06, "loss": 1.4227, "step": 184 }, { "epoch": 0.13724035608308605, "grad_norm": 0.6036615967750549, "learning_rate": 7.908157894736842e-06, "loss": 1.3897, "step": 185 }, { "epoch": 0.13798219584569732, "grad_norm": 0.6182149052619934, "learning_rate": 7.380947368421051e-06, "loss": 1.7172, "step": 186 }, { "epoch": 0.1387240356083086, "grad_norm": 0.6109817624092102, "learning_rate": 6.853736842105264e-06, "loss": 1.4867, "step": 187 }, { "epoch": 0.1394658753709199, "grad_norm": 0.696526288986206, "learning_rate": 6.326526315789474e-06, "loss": 1.5992, "step": 188 }, { "epoch": 0.14020771513353117, "grad_norm": 0.6494670510292053, "learning_rate": 5.799315789473684e-06, "loss": 1.3784, "step": 189 }, { "epoch": 0.14094955489614244, "grad_norm": 0.6380143761634827, "learning_rate": 5.272105263157895e-06, "loss": 1.4366, "step": 190 }, { "epoch": 0.14169139465875372, "grad_norm": 0.6222284436225891, "learning_rate": 4.744894736842105e-06, "loss": 1.379, "step": 191 }, { "epoch": 0.142433234421365, "grad_norm": 0.6166477203369141, "learning_rate": 4.217684210526315e-06, "loss": 1.3242, "step": 192 }, { "epoch": 0.14317507418397626, "grad_norm": 0.7048677206039429, "learning_rate": 3.6904736842105257e-06, "loss": 1.5888, "step": 193 }, { "epoch": 0.14391691394658754, "grad_norm": 0.708202600479126, "learning_rate": 3.163263157894737e-06, "loss": 1.5138, "step": 194 }, { "epoch": 0.1446587537091988, "grad_norm": 0.6921345591545105, "learning_rate": 2.6360526315789473e-06, "loss": 1.2819, "step": 195 }, { "epoch": 0.14540059347181009, "grad_norm": 0.7002333998680115, "learning_rate": 2.1088421052631577e-06, "loss": 1.3984, "step": 196 }, { "epoch": 0.14614243323442136, "grad_norm": 0.7143130898475647, "learning_rate": 1.5816315789473685e-06, "loss": 1.5338, "step": 197 }, { "epoch": 0.14688427299703263, "grad_norm": 0.7665969133377075, "learning_rate": 1.0544210526315788e-06, "loss": 1.5382, "step": 198 }, { "epoch": 0.1476261127596439, "grad_norm": 0.8401339650154114, "learning_rate": 5.272105263157894e-07, "loss": 1.8398, "step": 199 }, { "epoch": 0.14836795252225518, "grad_norm": 0.8993075489997864, "learning_rate": 0.0, "loss": 1.6225, "step": 200 }, { "epoch": 0.14836795252225518, "eval_loss": 1.4315483570098877, "eval_runtime": 25.1319, "eval_samples_per_second": 22.601, "eval_steps_per_second": 5.65, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.65452005244928e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }