diff --git "a/checkpoint-965/trainer_state.json" "b/checkpoint-965/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-965/trainer_state.json" @@ -0,0 +1,11420 @@ +{ + "best_metric": 0.8643116540112248, + "best_model_checkpoint": "/tmp/logs/binary_classification_model_v3.1.5_Junction/checkpoint-965", + "epoch": 386.0, + "eval_steps": 500, + "global_step": 965, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.4, + "grad_norm": 1347475.125, + "learning_rate": 1.1307100859339665e-09, + "loss": 0.8565, + "step": 1 + }, + { + "epoch": 0.8, + "grad_norm": 1321344.875, + "learning_rate": 2.261420171867933e-09, + "loss": 0.8385, + "step": 2 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.8175734281539917, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.3661, + "eval_samples_per_second": 934.45, + "eval_steps_per_second": 0.845, + "step": 2 + }, + { + "epoch": 1.2, + "grad_norm": 1193755.625, + "learning_rate": 3.3921302578018997e-09, + "loss": 0.8013, + "step": 3 + }, + { + "epoch": 1.6, + "grad_norm": 1351931.125, + "learning_rate": 4.522840343735866e-09, + "loss": 0.8474, + "step": 4 + }, + { + "epoch": 2.0, + "grad_norm": 1330190.0, + "learning_rate": 5.653550429669833e-09, + "loss": 0.8528, + "step": 5 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.817514181137085, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4097, + "eval_samples_per_second": 917.557, + "eval_steps_per_second": 0.83, + "step": 5 + }, + { + "epoch": 2.4, + "grad_norm": 1290867.375, + "learning_rate": 6.7842605156037995e-09, + "loss": 0.8322, + "step": 6 + }, + { + "epoch": 2.8, + "grad_norm": 1347207.125, + "learning_rate": 7.914970601537766e-09, + "loss": 0.8524, + "step": 7 + }, + { + "epoch": 2.8, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.8174384832382202, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4233, + "eval_samples_per_second": 912.393, + "eval_steps_per_second": 0.825, + "step": 7 + }, + { + "epoch": 3.2, + "grad_norm": 1392734.625, + "learning_rate": 9.045680687471732e-09, + "loss": 0.8594, + "step": 8 + }, + { + "epoch": 3.6, + "grad_norm": 1319739.25, + "learning_rate": 1.0176390773405698e-08, + "loss": 0.8436, + "step": 9 + }, + { + "epoch": 4.0, + "grad_norm": 1361536.75, + "learning_rate": 1.1307100859339666e-08, + "loss": 0.8429, + "step": 10 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.817280650138855, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4356, + "eval_samples_per_second": 907.787, + "eval_steps_per_second": 0.821, + "step": 10 + }, + { + "epoch": 4.4, + "grad_norm": 1258248.75, + "learning_rate": 1.2437810945273631e-08, + "loss": 0.8201, + "step": 11 + }, + { + "epoch": 4.8, + "grad_norm": 1320385.75, + "learning_rate": 1.3568521031207599e-08, + "loss": 0.8441, + "step": 12 + }, + { + "epoch": 4.8, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.817141592502594, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4338, + "eval_samples_per_second": 908.439, + "eval_steps_per_second": 0.822, + "step": 12 + }, + { + "epoch": 5.2, + "grad_norm": 1337900.125, + "learning_rate": 1.4699231117141565e-08, + "loss": 0.8399, + "step": 13 + }, + { + "epoch": 5.6, + "grad_norm": 1313904.125, + "learning_rate": 1.582994120307553e-08, + "loss": 0.8432, + "step": 14 + }, + { + "epoch": 6.0, + "grad_norm": 1366665.25, + "learning_rate": 1.69606512890095e-08, + "loss": 0.8525, + "step": 15 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.8168814778327942, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4624, + "eval_samples_per_second": 897.909, + "eval_steps_per_second": 0.812, + "step": 15 + }, + { + "epoch": 6.4, + "grad_norm": 1317935.0, + "learning_rate": 1.8091361374943464e-08, + "loss": 0.8453, + "step": 16 + }, + { + "epoch": 6.8, + "grad_norm": 1309251.0, + "learning_rate": 1.922207146087743e-08, + "loss": 0.8348, + "step": 17 + }, + { + "epoch": 6.8, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.8166739344596863, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4455, + "eval_samples_per_second": 904.098, + "eval_steps_per_second": 0.818, + "step": 17 + }, + { + "epoch": 7.2, + "grad_norm": 1365110.5, + "learning_rate": 2.0352781546811397e-08, + "loss": 0.8539, + "step": 18 + }, + { + "epoch": 7.6, + "grad_norm": 1226102.0, + "learning_rate": 2.1483491632745366e-08, + "loss": 0.8161, + "step": 19 + }, + { + "epoch": 8.0, + "grad_norm": 1312686.625, + "learning_rate": 2.2614201718679333e-08, + "loss": 0.8415, + "step": 20 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.8163145780563354, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4471, + "eval_samples_per_second": 903.517, + "eval_steps_per_second": 0.817, + "step": 20 + }, + { + "epoch": 8.4, + "grad_norm": 1262680.75, + "learning_rate": 2.37449118046133e-08, + "loss": 0.8269, + "step": 21 + }, + { + "epoch": 8.8, + "grad_norm": 1245756.625, + "learning_rate": 2.4875621890547262e-08, + "loss": 0.8162, + "step": 22 + }, + { + "epoch": 8.8, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.8160423040390015, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4559, + "eval_samples_per_second": 900.269, + "eval_steps_per_second": 0.814, + "step": 22 + }, + { + "epoch": 9.2, + "grad_norm": 1293811.75, + "learning_rate": 2.6006331976481228e-08, + "loss": 0.8395, + "step": 23 + }, + { + "epoch": 9.6, + "grad_norm": 1314988.25, + "learning_rate": 2.7137042062415198e-08, + "loss": 0.8437, + "step": 24 + }, + { + "epoch": 10.0, + "grad_norm": 1367454.25, + "learning_rate": 2.8267752148349164e-08, + "loss": 0.8437, + "step": 25 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.8155835866928101, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4408, + "eval_samples_per_second": 905.867, + "eval_steps_per_second": 0.819, + "step": 25 + }, + { + "epoch": 10.4, + "grad_norm": 1250999.625, + "learning_rate": 2.939846223428313e-08, + "loss": 0.8165, + "step": 26 + }, + { + "epoch": 10.8, + "grad_norm": 1274523.5, + "learning_rate": 3.05291723202171e-08, + "loss": 0.8331, + "step": 27 + }, + { + "epoch": 10.8, + "eval_accuracy": 0.5924920850293984, + "eval_f1": 0.6792452830188679, + "eval_loss": 0.8152433037757874, + "eval_precision": 0.7429906542056075, + "eval_recall": 0.6255737704918033, + "eval_runtime": 2.4629, + "eval_samples_per_second": 897.719, + "eval_steps_per_second": 0.812, + "step": 27 + }, + { + "epoch": 11.2, + "grad_norm": 1383570.75, + "learning_rate": 3.165988240615106e-08, + "loss": 0.8576, + "step": 28 + }, + { + "epoch": 11.6, + "grad_norm": 1317998.375, + "learning_rate": 3.279059249208503e-08, + "loss": 0.8542, + "step": 29 + }, + { + "epoch": 12.0, + "grad_norm": 1367335.875, + "learning_rate": 3.3921302578019e-08, + "loss": 0.8469, + "step": 30 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.592944369063772, + "eval_f1": 0.6797153024911032, + "eval_loss": 0.8146856427192688, + "eval_precision": 0.7431906614785992, + "eval_recall": 0.6262295081967213, + "eval_runtime": 2.4588, + "eval_samples_per_second": 899.214, + "eval_steps_per_second": 0.813, + "step": 30 + }, + { + "epoch": 12.4, + "grad_norm": 1296686.75, + "learning_rate": 3.505201266395296e-08, + "loss": 0.8298, + "step": 31 + }, + { + "epoch": 12.8, + "grad_norm": 1361290.5, + "learning_rate": 3.618272274988693e-08, + "loss": 0.8523, + "step": 32 + }, + { + "epoch": 12.8, + "eval_accuracy": 0.592944369063772, + "eval_f1": 0.6797153024911032, + "eval_loss": 0.8142819404602051, + "eval_precision": 0.7431906614785992, + "eval_recall": 0.6262295081967213, + "eval_runtime": 2.4602, + "eval_samples_per_second": 898.703, + "eval_steps_per_second": 0.813, + "step": 32 + }, + { + "epoch": 13.2, + "grad_norm": 1428386.625, + "learning_rate": 3.7313432835820895e-08, + "loss": 0.8721, + "step": 33 + }, + { + "epoch": 13.6, + "grad_norm": 1248038.125, + "learning_rate": 3.844414292175486e-08, + "loss": 0.8136, + "step": 34 + }, + { + "epoch": 14.0, + "grad_norm": 1431628.625, + "learning_rate": 3.957485300768883e-08, + "loss": 0.8614, + "step": 35 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.592944369063772, + "eval_f1": 0.6797153024911032, + "eval_loss": 0.8136261701583862, + "eval_precision": 0.7431906614785992, + "eval_recall": 0.6262295081967213, + "eval_runtime": 2.4887, + "eval_samples_per_second": 888.402, + "eval_steps_per_second": 0.804, + "step": 35 + }, + { + "epoch": 14.4, + "grad_norm": 1225990.25, + "learning_rate": 4.0705563093622794e-08, + "loss": 0.8136, + "step": 36 + }, + { + "epoch": 14.8, + "grad_norm": 1302060.5, + "learning_rate": 4.183627317955676e-08, + "loss": 0.8343, + "step": 37 + }, + { + "epoch": 14.8, + "eval_accuracy": 0.592944369063772, + "eval_f1": 0.6797153024911032, + "eval_loss": 0.8131555914878845, + "eval_precision": 0.7431906614785992, + "eval_recall": 0.6262295081967213, + "eval_runtime": 2.4699, + "eval_samples_per_second": 895.176, + "eval_steps_per_second": 0.81, + "step": 37 + }, + { + "epoch": 15.2, + "grad_norm": 1185428.875, + "learning_rate": 4.296698326549073e-08, + "loss": 0.7977, + "step": 38 + }, + { + "epoch": 15.6, + "grad_norm": 1342989.25, + "learning_rate": 4.40976933514247e-08, + "loss": 0.8457, + "step": 39 + }, + { + "epoch": 16.0, + "grad_norm": 1284317.0, + "learning_rate": 4.5228403437358665e-08, + "loss": 0.8347, + "step": 40 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.5933966530981456, + "eval_f1": 0.680184987548915, + "eval_loss": 0.8124008178710938, + "eval_precision": 0.7433903576982893, + "eval_recall": 0.6268852459016393, + "eval_runtime": 2.4309, + "eval_samples_per_second": 909.547, + "eval_steps_per_second": 0.823, + "step": 40 + }, + { + "epoch": 16.4, + "grad_norm": 1285609.625, + "learning_rate": 4.635911352329263e-08, + "loss": 0.8317, + "step": 41 + }, + { + "epoch": 16.8, + "grad_norm": 1277085.375, + "learning_rate": 4.74898236092266e-08, + "loss": 0.8299, + "step": 42 + }, + { + "epoch": 16.8, + "eval_accuracy": 0.5933966530981456, + "eval_f1": 0.680184987548915, + "eval_loss": 0.811866819858551, + "eval_precision": 0.7433903576982893, + "eval_recall": 0.6268852459016393, + "eval_runtime": 2.9873, + "eval_samples_per_second": 740.138, + "eval_steps_per_second": 0.67, + "step": 42 + }, + { + "epoch": 17.2, + "grad_norm": 1270486.25, + "learning_rate": 4.862053369516056e-08, + "loss": 0.8186, + "step": 43 + }, + { + "epoch": 17.6, + "grad_norm": 1266373.25, + "learning_rate": 4.9751243781094524e-08, + "loss": 0.8274, + "step": 44 + }, + { + "epoch": 18.0, + "grad_norm": 1351487.25, + "learning_rate": 5.088195386702849e-08, + "loss": 0.8469, + "step": 45 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.5938489371325192, + "eval_f1": 0.6806543385490754, + "eval_loss": 0.8110163807868958, + "eval_precision": 0.7435897435897436, + "eval_recall": 0.6275409836065574, + "eval_runtime": 2.4737, + "eval_samples_per_second": 893.803, + "eval_steps_per_second": 0.809, + "step": 45 + }, + { + "epoch": 18.4, + "grad_norm": 1335458.25, + "learning_rate": 5.2012663952962457e-08, + "loss": 0.847, + "step": 46 + }, + { + "epoch": 18.8, + "grad_norm": 1281995.5, + "learning_rate": 5.314337403889643e-08, + "loss": 0.8254, + "step": 47 + }, + { + "epoch": 18.8, + "eval_accuracy": 0.5938489371325192, + "eval_f1": 0.6806543385490754, + "eval_loss": 0.8104168772697449, + "eval_precision": 0.7435897435897436, + "eval_recall": 0.6275409836065574, + "eval_runtime": 2.4778, + "eval_samples_per_second": 892.325, + "eval_steps_per_second": 0.807, + "step": 47 + }, + { + "epoch": 19.2, + "grad_norm": 1321242.375, + "learning_rate": 5.4274084124830396e-08, + "loss": 0.853, + "step": 48 + }, + { + "epoch": 19.6, + "grad_norm": 1209938.625, + "learning_rate": 5.540479421076436e-08, + "loss": 0.8029, + "step": 49 + }, + { + "epoch": 20.0, + "grad_norm": 1300366.25, + "learning_rate": 5.653550429669833e-08, + "loss": 0.8246, + "step": 50 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.5943012211668928, + "eval_f1": 0.6811233558478492, + "eval_loss": 0.8094692826271057, + "eval_precision": 0.7437888198757764, + "eval_recall": 0.6281967213114754, + "eval_runtime": 2.4932, + "eval_samples_per_second": 886.811, + "eval_steps_per_second": 0.802, + "step": 50 + }, + { + "epoch": 20.4, + "grad_norm": 1343847.0, + "learning_rate": 5.7666214382632295e-08, + "loss": 0.853, + "step": 51 + }, + { + "epoch": 20.8, + "grad_norm": 1277880.625, + "learning_rate": 5.879692446856626e-08, + "loss": 0.8152, + "step": 52 + }, + { + "epoch": 20.8, + "eval_accuracy": 0.5947535052012664, + "eval_f1": 0.681592039800995, + "eval_loss": 0.8088060617446899, + "eval_precision": 0.7439875872769589, + "eval_recall": 0.6288524590163934, + "eval_runtime": 2.4813, + "eval_samples_per_second": 891.061, + "eval_steps_per_second": 0.806, + "step": 52 + }, + { + "epoch": 21.2, + "grad_norm": 1212167.125, + "learning_rate": 5.992763455450022e-08, + "loss": 0.8013, + "step": 53 + }, + { + "epoch": 21.6, + "grad_norm": 1303442.375, + "learning_rate": 6.10583446404342e-08, + "loss": 0.8366, + "step": 54 + }, + { + "epoch": 22.0, + "grad_norm": 1318704.125, + "learning_rate": 6.218905472636815e-08, + "loss": 0.8397, + "step": 55 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.5947535052012664, + "eval_f1": 0.681592039800995, + "eval_loss": 0.8077627420425415, + "eval_precision": 0.7439875872769589, + "eval_recall": 0.6288524590163934, + "eval_runtime": 2.4802, + "eval_samples_per_second": 891.467, + "eval_steps_per_second": 0.806, + "step": 55 + }, + { + "epoch": 22.4, + "grad_norm": 1253801.375, + "learning_rate": 6.331976481230213e-08, + "loss": 0.8223, + "step": 56 + }, + { + "epoch": 22.8, + "grad_norm": 1365371.0, + "learning_rate": 6.445047489823609e-08, + "loss": 0.8534, + "step": 57 + }, + { + "epoch": 22.8, + "eval_accuracy": 0.5952057892356399, + "eval_f1": 0.6820603907637656, + "eval_loss": 0.8070356845855713, + "eval_precision": 0.7441860465116279, + "eval_recall": 0.6295081967213115, + "eval_runtime": 2.5027, + "eval_samples_per_second": 883.442, + "eval_steps_per_second": 0.799, + "step": 57 + }, + { + "epoch": 23.2, + "grad_norm": 1199946.25, + "learning_rate": 6.558118498417006e-08, + "loss": 0.7965, + "step": 58 + }, + { + "epoch": 23.6, + "grad_norm": 1218013.375, + "learning_rate": 6.671189507010403e-08, + "loss": 0.8048, + "step": 59 + }, + { + "epoch": 24.0, + "grad_norm": 1212390.875, + "learning_rate": 6.7842605156038e-08, + "loss": 0.7992, + "step": 60 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.5956580732700135, + "eval_f1": 0.6825284090909091, + "eval_loss": 0.8058956265449524, + "eval_precision": 0.7443841982958946, + "eval_recall": 0.6301639344262295, + "eval_runtime": 2.4848, + "eval_samples_per_second": 889.806, + "eval_steps_per_second": 0.805, + "step": 60 + }, + { + "epoch": 24.4, + "grad_norm": 1233087.875, + "learning_rate": 6.897331524197196e-08, + "loss": 0.8142, + "step": 61 + }, + { + "epoch": 24.8, + "grad_norm": 1321215.375, + "learning_rate": 7.010402532790592e-08, + "loss": 0.8405, + "step": 62 + }, + { + "epoch": 24.8, + "eval_accuracy": 0.5956580732700135, + "eval_f1": 0.6825284090909091, + "eval_loss": 0.8051030039787292, + "eval_precision": 0.7443841982958946, + "eval_recall": 0.6301639344262295, + "eval_runtime": 2.7332, + "eval_samples_per_second": 808.932, + "eval_steps_per_second": 0.732, + "step": 62 + }, + { + "epoch": 25.2, + "grad_norm": 1345781.75, + "learning_rate": 7.12347354138399e-08, + "loss": 0.8502, + "step": 63 + }, + { + "epoch": 25.6, + "grad_norm": 1297744.25, + "learning_rate": 7.236544549977386e-08, + "loss": 0.8325, + "step": 64 + }, + { + "epoch": 26.0, + "grad_norm": 1232136.0, + "learning_rate": 7.349615558570783e-08, + "loss": 0.8095, + "step": 65 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.5961103573043871, + "eval_f1": 0.6829960951366703, + "eval_loss": 0.8038693070411682, + "eval_precision": 0.7445820433436533, + "eval_recall": 0.6308196721311475, + "eval_runtime": 2.4815, + "eval_samples_per_second": 890.982, + "eval_steps_per_second": 0.806, + "step": 65 + }, + { + "epoch": 26.4, + "grad_norm": 1275956.625, + "learning_rate": 7.462686567164179e-08, + "loss": 0.8265, + "step": 66 + }, + { + "epoch": 26.8, + "grad_norm": 1204233.75, + "learning_rate": 7.575757575757576e-08, + "loss": 0.8053, + "step": 67 + }, + { + "epoch": 26.8, + "eval_accuracy": 0.5961103573043871, + "eval_f1": 0.6829960951366703, + "eval_loss": 0.8030155897140503, + "eval_precision": 0.7445820433436533, + "eval_recall": 0.6308196721311475, + "eval_runtime": 2.5349, + "eval_samples_per_second": 872.209, + "eval_steps_per_second": 0.789, + "step": 67 + }, + { + "epoch": 27.2, + "grad_norm": 1270287.25, + "learning_rate": 7.688828584350972e-08, + "loss": 0.8254, + "step": 68 + }, + { + "epoch": 27.6, + "grad_norm": 1262557.375, + "learning_rate": 7.80189959294437e-08, + "loss": 0.8153, + "step": 69 + }, + { + "epoch": 28.0, + "grad_norm": 1237714.375, + "learning_rate": 7.914970601537765e-08, + "loss": 0.8129, + "step": 70 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.5961103573043871, + "eval_f1": 0.6829960951366703, + "eval_loss": 0.8016895651817322, + "eval_precision": 0.7445820433436533, + "eval_recall": 0.6308196721311475, + "eval_runtime": 2.4729, + "eval_samples_per_second": 894.086, + "eval_steps_per_second": 0.809, + "step": 70 + }, + { + "epoch": 28.4, + "grad_norm": 1308664.625, + "learning_rate": 8.028041610131163e-08, + "loss": 0.8351, + "step": 71 + }, + { + "epoch": 28.8, + "grad_norm": 1254259.625, + "learning_rate": 8.141112618724559e-08, + "loss": 0.8186, + "step": 72 + }, + { + "epoch": 28.8, + "eval_accuracy": 0.5961103573043871, + "eval_f1": 0.6829960951366703, + "eval_loss": 0.8007768392562866, + "eval_precision": 0.7445820433436533, + "eval_recall": 0.6308196721311475, + "eval_runtime": 2.4961, + "eval_samples_per_second": 885.772, + "eval_steps_per_second": 0.801, + "step": 72 + }, + { + "epoch": 29.2, + "grad_norm": 1274678.875, + "learning_rate": 8.254183627317956e-08, + "loss": 0.8094, + "step": 73 + }, + { + "epoch": 29.6, + "grad_norm": 1269523.0, + "learning_rate": 8.367254635911352e-08, + "loss": 0.8242, + "step": 74 + }, + { + "epoch": 30.0, + "grad_norm": 1350845.5, + "learning_rate": 8.480325644504748e-08, + "loss": 0.8417, + "step": 75 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.5961103573043871, + "eval_f1": 0.6829960951366703, + "eval_loss": 0.7993631958961487, + "eval_precision": 0.7445820433436533, + "eval_recall": 0.6308196721311475, + "eval_runtime": 2.4877, + "eval_samples_per_second": 888.781, + "eval_steps_per_second": 0.804, + "step": 75 + }, + { + "epoch": 30.4, + "grad_norm": 1211039.875, + "learning_rate": 8.593396653098147e-08, + "loss": 0.801, + "step": 76 + }, + { + "epoch": 30.8, + "grad_norm": 1243437.25, + "learning_rate": 8.706467661691543e-08, + "loss": 0.8095, + "step": 77 + }, + { + "epoch": 30.8, + "eval_accuracy": 0.5965626413387607, + "eval_f1": 0.6834634492547906, + "eval_loss": 0.7983915209770203, + "eval_precision": 0.7447795823665894, + "eval_recall": 0.6314754098360655, + "eval_runtime": 2.4918, + "eval_samples_per_second": 887.313, + "eval_steps_per_second": 0.803, + "step": 77 + }, + { + "epoch": 31.2, + "grad_norm": 1268283.875, + "learning_rate": 8.81953867028494e-08, + "loss": 0.8172, + "step": 78 + }, + { + "epoch": 31.6, + "grad_norm": 1183910.0, + "learning_rate": 8.932609678878336e-08, + "loss": 0.8001, + "step": 79 + }, + { + "epoch": 32.0, + "grad_norm": 1299882.5, + "learning_rate": 9.045680687471733e-08, + "loss": 0.8212, + "step": 80 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.5970149253731343, + "eval_f1": 0.6839304717985101, + "eval_loss": 0.7968913912773132, + "eval_precision": 0.7449768160741885, + "eval_recall": 0.6321311475409837, + "eval_runtime": 2.4997, + "eval_samples_per_second": 884.508, + "eval_steps_per_second": 0.8, + "step": 80 + }, + { + "epoch": 32.4, + "grad_norm": 1252672.875, + "learning_rate": 9.158751696065129e-08, + "loss": 0.8138, + "step": 81 + }, + { + "epoch": 32.8, + "grad_norm": 1247188.125, + "learning_rate": 9.271822704658526e-08, + "loss": 0.8094, + "step": 82 + }, + { + "epoch": 32.8, + "eval_accuracy": 0.5970149253731343, + "eval_f1": 0.6839304717985101, + "eval_loss": 0.7958608865737915, + "eval_precision": 0.7449768160741885, + "eval_recall": 0.6321311475409837, + "eval_runtime": 2.7169, + "eval_samples_per_second": 813.799, + "eval_steps_per_second": 0.736, + "step": 82 + }, + { + "epoch": 33.2, + "grad_norm": 1258466.0, + "learning_rate": 9.384893713251922e-08, + "loss": 0.8178, + "step": 83 + }, + { + "epoch": 33.6, + "grad_norm": 1256538.75, + "learning_rate": 9.49796472184532e-08, + "loss": 0.8108, + "step": 84 + }, + { + "epoch": 34.0, + "grad_norm": 1231118.0, + "learning_rate": 9.611035730438716e-08, + "loss": 0.8074, + "step": 85 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.5974672094075079, + "eval_f1": 0.6843971631205674, + "eval_loss": 0.7942715883255005, + "eval_precision": 0.7451737451737451, + "eval_recall": 0.6327868852459017, + "eval_runtime": 2.4685, + "eval_samples_per_second": 895.682, + "eval_steps_per_second": 0.81, + "step": 85 + }, + { + "epoch": 34.4, + "grad_norm": 1253341.875, + "learning_rate": 9.724106739032112e-08, + "loss": 0.8132, + "step": 86 + }, + { + "epoch": 34.8, + "grad_norm": 1229837.5, + "learning_rate": 9.837177747625509e-08, + "loss": 0.8085, + "step": 87 + }, + { + "epoch": 34.8, + "eval_accuracy": 0.5974672094075079, + "eval_f1": 0.6843971631205674, + "eval_loss": 0.7931840419769287, + "eval_precision": 0.7451737451737451, + "eval_recall": 0.6327868852459017, + "eval_runtime": 2.5536, + "eval_samples_per_second": 865.824, + "eval_steps_per_second": 0.783, + "step": 87 + }, + { + "epoch": 35.2, + "grad_norm": 1301174.75, + "learning_rate": 9.950248756218905e-08, + "loss": 0.8233, + "step": 88 + }, + { + "epoch": 35.6, + "grad_norm": 1183638.125, + "learning_rate": 1.0063319764812302e-07, + "loss": 0.795, + "step": 89 + }, + { + "epoch": 36.0, + "grad_norm": 1298239.125, + "learning_rate": 1.0176390773405698e-07, + "loss": 0.8192, + "step": 90 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.5974672094075079, + "eval_f1": 0.6843971631205674, + "eval_loss": 0.7915147542953491, + "eval_precision": 0.7451737451737451, + "eval_recall": 0.6327868852459017, + "eval_runtime": 2.5067, + "eval_samples_per_second": 882.047, + "eval_steps_per_second": 0.798, + "step": 90 + }, + { + "epoch": 36.4, + "grad_norm": 1284581.75, + "learning_rate": 1.0289461781999095e-07, + "loss": 0.8239, + "step": 91 + }, + { + "epoch": 36.8, + "grad_norm": 1227722.75, + "learning_rate": 1.0402532790592491e-07, + "loss": 0.7977, + "step": 92 + }, + { + "epoch": 36.8, + "eval_accuracy": 0.5974672094075079, + "eval_f1": 0.6843971631205674, + "eval_loss": 0.7903743386268616, + "eval_precision": 0.7451737451737451, + "eval_recall": 0.6327868852459017, + "eval_runtime": 2.4905, + "eval_samples_per_second": 887.77, + "eval_steps_per_second": 0.803, + "step": 92 + }, + { + "epoch": 37.2, + "grad_norm": 1243118.5, + "learning_rate": 1.051560379918589e-07, + "loss": 0.8107, + "step": 93 + }, + { + "epoch": 37.6, + "grad_norm": 1201387.375, + "learning_rate": 1.0628674807779286e-07, + "loss": 0.7946, + "step": 94 + }, + { + "epoch": 38.0, + "grad_norm": 1246958.75, + "learning_rate": 1.0741745816372683e-07, + "loss": 0.8158, + "step": 95 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.5979194934418816, + "eval_f1": 0.684863523573201, + "eval_loss": 0.788622260093689, + "eval_precision": 0.7453703703703703, + "eval_recall": 0.6334426229508197, + "eval_runtime": 2.4909, + "eval_samples_per_second": 887.63, + "eval_steps_per_second": 0.803, + "step": 95 + }, + { + "epoch": 38.4, + "grad_norm": 1147869.5, + "learning_rate": 1.0854816824966079e-07, + "loss": 0.7821, + "step": 96 + }, + { + "epoch": 38.8, + "grad_norm": 1267812.25, + "learning_rate": 1.0967887833559476e-07, + "loss": 0.8169, + "step": 97 + }, + { + "epoch": 38.8, + "eval_accuracy": 0.5979194934418816, + "eval_f1": 0.684863523573201, + "eval_loss": 0.7874265909194946, + "eval_precision": 0.7453703703703703, + "eval_recall": 0.6334426229508197, + "eval_runtime": 2.4871, + "eval_samples_per_second": 888.995, + "eval_steps_per_second": 0.804, + "step": 97 + }, + { + "epoch": 39.2, + "grad_norm": 1325768.125, + "learning_rate": 1.1080958842152872e-07, + "loss": 0.8324, + "step": 98 + }, + { + "epoch": 39.6, + "grad_norm": 1198321.125, + "learning_rate": 1.1194029850746268e-07, + "loss": 0.7988, + "step": 99 + }, + { + "epoch": 40.0, + "grad_norm": 1354937.125, + "learning_rate": 1.1307100859339666e-07, + "loss": 0.8365, + "step": 100 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.5974672094075079, + "eval_f1": 0.6843971631205674, + "eval_loss": 0.785596489906311, + "eval_precision": 0.7451737451737451, + "eval_recall": 0.6327868852459017, + "eval_runtime": 2.4823, + "eval_samples_per_second": 890.69, + "eval_steps_per_second": 0.806, + "step": 100 + }, + { + "epoch": 40.4, + "grad_norm": 1245198.375, + "learning_rate": 1.1420171867933062e-07, + "loss": 0.8083, + "step": 101 + }, + { + "epoch": 40.8, + "grad_norm": 1215457.25, + "learning_rate": 1.1533242876526459e-07, + "loss": 0.8001, + "step": 102 + }, + { + "epoch": 40.8, + "eval_accuracy": 0.5974672094075079, + "eval_f1": 0.6843971631205674, + "eval_loss": 0.7843508124351501, + "eval_precision": 0.7451737451737451, + "eval_recall": 0.6327868852459017, + "eval_runtime": 2.7623, + "eval_samples_per_second": 800.423, + "eval_steps_per_second": 0.724, + "step": 102 + }, + { + "epoch": 41.2, + "grad_norm": 1239205.375, + "learning_rate": 1.1646313885119855e-07, + "loss": 0.8124, + "step": 103 + }, + { + "epoch": 41.6, + "grad_norm": 1206158.625, + "learning_rate": 1.1759384893713252e-07, + "loss": 0.8017, + "step": 104 + }, + { + "epoch": 42.0, + "grad_norm": 1268651.375, + "learning_rate": 1.1872455902306648e-07, + "loss": 0.8133, + "step": 105 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.5979194934418816, + "eval_f1": 0.6846399432422845, + "eval_loss": 0.7824400663375854, + "eval_precision": 0.7457496136012365, + "eval_recall": 0.6327868852459017, + "eval_runtime": 2.4796, + "eval_samples_per_second": 891.684, + "eval_steps_per_second": 0.807, + "step": 105 + }, + { + "epoch": 42.4, + "grad_norm": 1282343.0, + "learning_rate": 1.1985526910900044e-07, + "loss": 0.8245, + "step": 106 + }, + { + "epoch": 42.8, + "grad_norm": 1153320.125, + "learning_rate": 1.2098597919493441e-07, + "loss": 0.7792, + "step": 107 + }, + { + "epoch": 42.8, + "eval_accuracy": 0.5992763455450023, + "eval_f1": 0.6858156028368795, + "eval_loss": 0.7811415791511536, + "eval_precision": 0.7467181467181467, + "eval_recall": 0.6340983606557377, + "eval_runtime": 2.4803, + "eval_samples_per_second": 891.413, + "eval_steps_per_second": 0.806, + "step": 107 + }, + { + "epoch": 43.2, + "grad_norm": 1243040.625, + "learning_rate": 1.221166892808684e-07, + "loss": 0.8033, + "step": 108 + }, + { + "epoch": 43.6, + "grad_norm": 1224342.0, + "learning_rate": 1.2324739936680236e-07, + "loss": 0.8028, + "step": 109 + }, + { + "epoch": 44.0, + "grad_norm": 1212647.875, + "learning_rate": 1.243781094527363e-07, + "loss": 0.7915, + "step": 110 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.6001809136137495, + "eval_f1": 0.6867469879518072, + "eval_loss": 0.7791528105735779, + "eval_precision": 0.7471087124132614, + "eval_recall": 0.6354098360655738, + "eval_runtime": 2.4869, + "eval_samples_per_second": 889.065, + "eval_steps_per_second": 0.804, + "step": 110 + }, + { + "epoch": 44.4, + "grad_norm": 1208327.625, + "learning_rate": 1.2550881953867028e-07, + "loss": 0.798, + "step": 111 + }, + { + "epoch": 44.8, + "grad_norm": 1197239.375, + "learning_rate": 1.2663952962460425e-07, + "loss": 0.7926, + "step": 112 + }, + { + "epoch": 44.8, + "eval_accuracy": 0.6006331976481231, + "eval_f1": 0.6872121856181367, + "eval_loss": 0.7778016924858093, + "eval_precision": 0.7473035439137135, + "eval_recall": 0.6360655737704918, + "eval_runtime": 2.4887, + "eval_samples_per_second": 888.419, + "eval_steps_per_second": 0.804, + "step": 112 + }, + { + "epoch": 45.2, + "grad_norm": 1275864.25, + "learning_rate": 1.277702397105382e-07, + "loss": 0.8027, + "step": 113 + }, + { + "epoch": 45.6, + "grad_norm": 1174152.75, + "learning_rate": 1.2890094979647217e-07, + "loss": 0.7915, + "step": 114 + }, + { + "epoch": 46.0, + "grad_norm": 1337862.375, + "learning_rate": 1.3003165988240614e-07, + "loss": 0.8298, + "step": 115 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.6010854816824966, + "eval_f1": 0.6876770538243626, + "eval_loss": 0.7757393717765808, + "eval_precision": 0.7474980754426482, + "eval_recall": 0.6367213114754099, + "eval_runtime": 2.5031, + "eval_samples_per_second": 883.291, + "eval_steps_per_second": 0.799, + "step": 115 + }, + { + "epoch": 46.4, + "grad_norm": 1199864.875, + "learning_rate": 1.3116236996834012e-07, + "loss": 0.7934, + "step": 116 + }, + { + "epoch": 46.8, + "grad_norm": 1141296.0, + "learning_rate": 1.3229308005427406e-07, + "loss": 0.771, + "step": 117 + }, + { + "epoch": 46.8, + "eval_accuracy": 0.6010854816824966, + "eval_f1": 0.6876770538243626, + "eval_loss": 0.7743386626243591, + "eval_precision": 0.7474980754426482, + "eval_recall": 0.6367213114754099, + "eval_runtime": 2.5061, + "eval_samples_per_second": 882.258, + "eval_steps_per_second": 0.798, + "step": 117 + }, + { + "epoch": 47.2, + "grad_norm": 1282701.25, + "learning_rate": 1.3342379014020806e-07, + "loss": 0.8084, + "step": 118 + }, + { + "epoch": 47.6, + "grad_norm": 1206960.0, + "learning_rate": 1.3455450022614204e-07, + "loss": 0.7995, + "step": 119 + }, + { + "epoch": 48.0, + "grad_norm": 1210567.125, + "learning_rate": 1.35685210312076e-07, + "loss": 0.7814, + "step": 120 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.6015377657168702, + "eval_f1": 0.6881415929203539, + "eval_loss": 0.7721933722496033, + "eval_precision": 0.7476923076923077, + "eval_recall": 0.6373770491803279, + "eval_runtime": 2.4888, + "eval_samples_per_second": 888.388, + "eval_steps_per_second": 0.804, + "step": 120 + }, + { + "epoch": 48.4, + "grad_norm": 1169100.75, + "learning_rate": 1.3681592039800996e-07, + "loss": 0.7824, + "step": 121 + }, + { + "epoch": 48.8, + "grad_norm": 1195867.5, + "learning_rate": 1.3794663048394393e-07, + "loss": 0.7926, + "step": 122 + }, + { + "epoch": 48.8, + "eval_accuracy": 0.6015377657168702, + "eval_f1": 0.6881415929203539, + "eval_loss": 0.7707341909408569, + "eval_precision": 0.7476923076923077, + "eval_recall": 0.6373770491803279, + "eval_runtime": 2.739, + "eval_samples_per_second": 807.236, + "eval_steps_per_second": 0.73, + "step": 122 + }, + { + "epoch": 49.2, + "grad_norm": 1219558.375, + "learning_rate": 1.390773405698779e-07, + "loss": 0.7978, + "step": 123 + }, + { + "epoch": 49.6, + "grad_norm": 1196311.625, + "learning_rate": 1.4020805065581185e-07, + "loss": 0.79, + "step": 124 + }, + { + "epoch": 50.0, + "grad_norm": 1175967.375, + "learning_rate": 1.4133876074174582e-07, + "loss": 0.7906, + "step": 125 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.6019900497512438, + "eval_f1": 0.6886058032554848, + "eval_loss": 0.7685143947601318, + "eval_precision": 0.7478862413528056, + "eval_recall": 0.6380327868852459, + "eval_runtime": 2.4632, + "eval_samples_per_second": 897.606, + "eval_steps_per_second": 0.812, + "step": 125 + }, + { + "epoch": 50.4, + "grad_norm": 1171851.625, + "learning_rate": 1.424694708276798e-07, + "loss": 0.7801, + "step": 126 + }, + { + "epoch": 50.8, + "grad_norm": 1102515.5, + "learning_rate": 1.4360018091361377e-07, + "loss": 0.7697, + "step": 127 + }, + { + "epoch": 50.8, + "eval_accuracy": 0.6019900497512438, + "eval_f1": 0.6886058032554848, + "eval_loss": 0.7670114636421204, + "eval_precision": 0.7478862413528056, + "eval_recall": 0.6380327868852459, + "eval_runtime": 2.4662, + "eval_samples_per_second": 896.518, + "eval_steps_per_second": 0.811, + "step": 127 + }, + { + "epoch": 51.2, + "grad_norm": 1186457.875, + "learning_rate": 1.4473089099954771e-07, + "loss": 0.7849, + "step": 128 + }, + { + "epoch": 51.6, + "grad_norm": 1147469.125, + "learning_rate": 1.4586160108548169e-07, + "loss": 0.7751, + "step": 129 + }, + { + "epoch": 52.0, + "grad_norm": 1110517.375, + "learning_rate": 1.4699231117141566e-07, + "loss": 0.7676, + "step": 130 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.602894617819991, + "eval_f1": 0.6895332390381895, + "eval_loss": 0.7647180557250977, + "eval_precision": 0.7482732156561781, + "eval_recall": 0.639344262295082, + "eval_runtime": 2.479, + "eval_samples_per_second": 891.899, + "eval_steps_per_second": 0.807, + "step": 130 + }, + { + "epoch": 52.4, + "grad_norm": 1184108.25, + "learning_rate": 1.4812302125734963e-07, + "loss": 0.7922, + "step": 131 + }, + { + "epoch": 52.8, + "grad_norm": 1135117.5, + "learning_rate": 1.4925373134328358e-07, + "loss": 0.7701, + "step": 132 + }, + { + "epoch": 52.8, + "eval_accuracy": 0.602894617819991, + "eval_f1": 0.6897526501766784, + "eval_loss": 0.7631660103797913, + "eval_precision": 0.7478927203065134, + "eval_recall": 0.64, + "eval_runtime": 2.4695, + "eval_samples_per_second": 895.336, + "eval_steps_per_second": 0.81, + "step": 132 + }, + { + "epoch": 53.2, + "grad_norm": 1208228.875, + "learning_rate": 1.5038444142921755e-07, + "loss": 0.783, + "step": 133 + }, + { + "epoch": 53.6, + "grad_norm": 1206833.125, + "learning_rate": 1.5151515151515152e-07, + "loss": 0.7939, + "step": 134 + }, + { + "epoch": 54.0, + "grad_norm": 1339050.75, + "learning_rate": 1.5264586160108547e-07, + "loss": 0.8177, + "step": 135 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.6047037539574853, + "eval_f1": 0.6916019760056458, + "eval_loss": 0.760806679725647, + "eval_precision": 0.7486631016042781, + "eval_recall": 0.6426229508196721, + "eval_runtime": 2.4859, + "eval_samples_per_second": 889.4, + "eval_steps_per_second": 0.805, + "step": 135 + }, + { + "epoch": 54.4, + "grad_norm": 1242468.875, + "learning_rate": 1.5377657168701944e-07, + "loss": 0.8017, + "step": 136 + }, + { + "epoch": 54.8, + "grad_norm": 1137151.75, + "learning_rate": 1.5490728177295342e-07, + "loss": 0.768, + "step": 137 + }, + { + "epoch": 54.8, + "eval_accuracy": 0.6047037539574853, + "eval_f1": 0.6916019760056458, + "eval_loss": 0.7592155337333679, + "eval_precision": 0.7486631016042781, + "eval_recall": 0.6426229508196721, + "eval_runtime": 2.4695, + "eval_samples_per_second": 895.315, + "eval_steps_per_second": 0.81, + "step": 137 + }, + { + "epoch": 55.2, + "grad_norm": 1227273.5, + "learning_rate": 1.560379918588874e-07, + "loss": 0.796, + "step": 138 + }, + { + "epoch": 55.6, + "grad_norm": 1101658.0, + "learning_rate": 1.5716870194482134e-07, + "loss": 0.7588, + "step": 139 + }, + { + "epoch": 56.0, + "grad_norm": 1225613.25, + "learning_rate": 1.582994120307553e-07, + "loss": 0.7872, + "step": 140 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.6051560379918589, + "eval_f1": 0.692063492063492, + "eval_loss": 0.7567971348762512, + "eval_precision": 0.748854961832061, + "eval_recall": 0.6432786885245901, + "eval_runtime": 2.4753, + "eval_samples_per_second": 893.227, + "eval_steps_per_second": 0.808, + "step": 140 + }, + { + "epoch": 56.4, + "grad_norm": 1117912.875, + "learning_rate": 1.5943012211668928e-07, + "loss": 0.7668, + "step": 141 + }, + { + "epoch": 56.8, + "grad_norm": 1143192.75, + "learning_rate": 1.6056083220262325e-07, + "loss": 0.7737, + "step": 142 + }, + { + "epoch": 56.8, + "eval_accuracy": 0.6051560379918589, + "eval_f1": 0.692063492063492, + "eval_loss": 0.7551662921905518, + "eval_precision": 0.748854961832061, + "eval_recall": 0.6432786885245901, + "eval_runtime": 2.4897, + "eval_samples_per_second": 888.056, + "eval_steps_per_second": 0.803, + "step": 142 + }, + { + "epoch": 57.2, + "grad_norm": 1116478.25, + "learning_rate": 1.616915422885572e-07, + "loss": 0.7512, + "step": 143 + }, + { + "epoch": 57.6, + "grad_norm": 1096629.125, + "learning_rate": 1.6282225237449117e-07, + "loss": 0.7518, + "step": 144 + }, + { + "epoch": 58.0, + "grad_norm": 1115783.25, + "learning_rate": 1.6395296246042515e-07, + "loss": 0.7697, + "step": 145 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.6056083220262325, + "eval_f1": 0.692524682651622, + "eval_loss": 0.7526903748512268, + "eval_precision": 0.7490465293668955, + "eval_recall": 0.6439344262295082, + "eval_runtime": 2.4641, + "eval_samples_per_second": 897.289, + "eval_steps_per_second": 0.812, + "step": 145 + }, + { + "epoch": 58.4, + "grad_norm": 1192107.25, + "learning_rate": 1.6508367254635912e-07, + "loss": 0.7783, + "step": 146 + }, + { + "epoch": 58.8, + "grad_norm": 1115252.375, + "learning_rate": 1.6621438263229307e-07, + "loss": 0.7669, + "step": 147 + }, + { + "epoch": 58.8, + "eval_accuracy": 0.6056083220262325, + "eval_f1": 0.6927413671599718, + "eval_loss": 0.7510228753089905, + "eval_precision": 0.7486671744097486, + "eval_recall": 0.6445901639344263, + "eval_runtime": 2.4836, + "eval_samples_per_second": 890.23, + "eval_steps_per_second": 0.805, + "step": 147 + }, + { + "epoch": 59.2, + "grad_norm": 1212152.875, + "learning_rate": 1.6734509271822704e-07, + "loss": 0.7826, + "step": 148 + }, + { + "epoch": 59.6, + "grad_norm": 1125442.875, + "learning_rate": 1.68475802804161e-07, + "loss": 0.7668, + "step": 149 + }, + { + "epoch": 60.0, + "grad_norm": 1034413.875, + "learning_rate": 1.6960651289009496e-07, + "loss": 0.7456, + "step": 150 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.6056083220262325, + "eval_f1": 0.6927413671599718, + "eval_loss": 0.748491108417511, + "eval_precision": 0.7486671744097486, + "eval_recall": 0.6445901639344263, + "eval_runtime": 2.4647, + "eval_samples_per_second": 897.077, + "eval_steps_per_second": 0.811, + "step": 150 + }, + { + "epoch": 60.4, + "grad_norm": 1138107.0, + "learning_rate": 1.7073722297602896e-07, + "loss": 0.7709, + "step": 151 + }, + { + "epoch": 60.8, + "grad_norm": 1086832.625, + "learning_rate": 1.7186793306196293e-07, + "loss": 0.7518, + "step": 152 + }, + { + "epoch": 60.8, + "eval_accuracy": 0.6060606060606061, + "eval_f1": 0.6932018316308559, + "eval_loss": 0.7467884421348572, + "eval_precision": 0.7488584474885844, + "eval_recall": 0.6452459016393443, + "eval_runtime": 2.4715, + "eval_samples_per_second": 894.582, + "eval_steps_per_second": 0.809, + "step": 152 + }, + { + "epoch": 61.2, + "grad_norm": 1223040.625, + "learning_rate": 1.729986431478969e-07, + "loss": 0.7802, + "step": 153 + }, + { + "epoch": 61.6, + "grad_norm": 1070359.25, + "learning_rate": 1.7412935323383085e-07, + "loss": 0.7531, + "step": 154 + }, + { + "epoch": 62.0, + "grad_norm": 1053263.625, + "learning_rate": 1.7526006331976482e-07, + "loss": 0.748, + "step": 155 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.6056083220262325, + "eval_f1": 0.6929577464788732, + "eval_loss": 0.7442070245742798, + "eval_precision": 0.7482889733840304, + "eval_recall": 0.6452459016393443, + "eval_runtime": 2.4952, + "eval_samples_per_second": 886.11, + "eval_steps_per_second": 0.802, + "step": 155 + }, + { + "epoch": 62.4, + "grad_norm": 1176378.25, + "learning_rate": 1.763907734056988e-07, + "loss": 0.7831, + "step": 156 + }, + { + "epoch": 62.8, + "grad_norm": 1069603.25, + "learning_rate": 1.7752148349163277e-07, + "loss": 0.7469, + "step": 157 + }, + { + "epoch": 62.8, + "eval_accuracy": 0.6056083220262325, + "eval_f1": 0.6929577464788732, + "eval_loss": 0.742469072341919, + "eval_precision": 0.7482889733840304, + "eval_recall": 0.6452459016393443, + "eval_runtime": 2.4849, + "eval_samples_per_second": 889.773, + "eval_steps_per_second": 0.805, + "step": 157 + }, + { + "epoch": 63.2, + "grad_norm": 1125731.875, + "learning_rate": 1.7865219357756672e-07, + "loss": 0.7628, + "step": 158 + }, + { + "epoch": 63.6, + "grad_norm": 1181959.375, + "learning_rate": 1.797829036635007e-07, + "loss": 0.7849, + "step": 159 + }, + { + "epoch": 64.0, + "grad_norm": 1140660.0, + "learning_rate": 1.8091361374943466e-07, + "loss": 0.7583, + "step": 160 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.6060606060606061, + "eval_f1": 0.6934178106300598, + "eval_loss": 0.7398371696472168, + "eval_precision": 0.7484802431610942, + "eval_recall": 0.6459016393442623, + "eval_runtime": 2.5189, + "eval_samples_per_second": 877.765, + "eval_steps_per_second": 0.794, + "step": 160 + }, + { + "epoch": 64.4, + "grad_norm": 1045759.4375, + "learning_rate": 1.820443238353686e-07, + "loss": 0.7412, + "step": 161 + }, + { + "epoch": 64.8, + "grad_norm": 1091364.625, + "learning_rate": 1.8317503392130258e-07, + "loss": 0.7486, + "step": 162 + }, + { + "epoch": 64.8, + "eval_accuracy": 0.6060606060606061, + "eval_f1": 0.6934178106300598, + "eval_loss": 0.7380635738372803, + "eval_precision": 0.7484802431610942, + "eval_recall": 0.6459016393442623, + "eval_runtime": 2.5038, + "eval_samples_per_second": 883.066, + "eval_steps_per_second": 0.799, + "step": 162 + }, + { + "epoch": 65.2, + "grad_norm": 1122266.25, + "learning_rate": 1.8430574400723655e-07, + "loss": 0.7573, + "step": 163 + }, + { + "epoch": 65.6, + "grad_norm": 1064930.0, + "learning_rate": 1.8543645409317053e-07, + "loss": 0.7428, + "step": 164 + }, + { + "epoch": 66.0, + "grad_norm": 1149179.0, + "learning_rate": 1.8656716417910447e-07, + "loss": 0.7722, + "step": 165 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.6069651741293532, + "eval_f1": 0.6943369679915582, + "eval_loss": 0.7353787422180176, + "eval_precision": 0.7488619119878603, + "eval_recall": 0.6472131147540984, + "eval_runtime": 2.7405, + "eval_samples_per_second": 806.777, + "eval_steps_per_second": 0.73, + "step": 165 + }, + { + "epoch": 66.4, + "grad_norm": 1054817.75, + "learning_rate": 1.8769787426503845e-07, + "loss": 0.7445, + "step": 166 + }, + { + "epoch": 66.8, + "grad_norm": 1124153.125, + "learning_rate": 1.8882858435097242e-07, + "loss": 0.7555, + "step": 167 + }, + { + "epoch": 66.8, + "eval_accuracy": 0.608322026232474, + "eval_f1": 0.6957132817990161, + "eval_loss": 0.7335732579231262, + "eval_precision": 0.7494322482967449, + "eval_recall": 0.6491803278688525, + "eval_runtime": 2.5149, + "eval_samples_per_second": 879.164, + "eval_steps_per_second": 0.795, + "step": 167 + }, + { + "epoch": 67.2, + "grad_norm": 1039910.6875, + "learning_rate": 1.899592944369064e-07, + "loss": 0.7431, + "step": 168 + }, + { + "epoch": 67.6, + "grad_norm": 1048712.125, + "learning_rate": 1.9109000452284034e-07, + "loss": 0.7414, + "step": 169 + }, + { + "epoch": 68.0, + "grad_norm": 1044318.0625, + "learning_rate": 1.922207146087743e-07, + "loss": 0.7391, + "step": 170 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.608322026232474, + "eval_f1": 0.6957132817990161, + "eval_loss": 0.7308428287506104, + "eval_precision": 0.7494322482967449, + "eval_recall": 0.6491803278688525, + "eval_runtime": 2.4809, + "eval_samples_per_second": 891.206, + "eval_steps_per_second": 0.806, + "step": 170 + }, + { + "epoch": 68.4, + "grad_norm": 1067868.25, + "learning_rate": 1.9335142469470828e-07, + "loss": 0.7471, + "step": 171 + }, + { + "epoch": 68.8, + "grad_norm": 1109802.375, + "learning_rate": 1.9448213478064223e-07, + "loss": 0.7614, + "step": 172 + }, + { + "epoch": 68.8, + "eval_accuracy": 0.6092265943012212, + "eval_f1": 0.6966292134831461, + "eval_loss": 0.7290031909942627, + "eval_precision": 0.7498110355253212, + "eval_recall": 0.6504918032786885, + "eval_runtime": 2.4726, + "eval_samples_per_second": 894.214, + "eval_steps_per_second": 0.809, + "step": 172 + }, + { + "epoch": 69.2, + "grad_norm": 1116069.75, + "learning_rate": 1.956128448665762e-07, + "loss": 0.7504, + "step": 173 + }, + { + "epoch": 69.6, + "grad_norm": 1057257.75, + "learning_rate": 1.9674355495251018e-07, + "loss": 0.7434, + "step": 174 + }, + { + "epoch": 70.0, + "grad_norm": 1177426.625, + "learning_rate": 1.9787426503844415e-07, + "loss": 0.769, + "step": 175 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.6092265943012212, + "eval_f1": 0.6968421052631579, + "eval_loss": 0.7262230515480042, + "eval_precision": 0.7494339622641509, + "eval_recall": 0.6511475409836066, + "eval_runtime": 2.4729, + "eval_samples_per_second": 894.088, + "eval_steps_per_second": 0.809, + "step": 175 + }, + { + "epoch": 70.4, + "grad_norm": 1058332.75, + "learning_rate": 1.990049751243781e-07, + "loss": 0.737, + "step": 176 + }, + { + "epoch": 70.8, + "grad_norm": 1083335.125, + "learning_rate": 2.0013568521031207e-07, + "loss": 0.7498, + "step": 177 + }, + { + "epoch": 70.8, + "eval_accuracy": 0.6101311623699683, + "eval_f1": 0.6977559607293128, + "eval_loss": 0.7243533730506897, + "eval_precision": 0.7498116051243406, + "eval_recall": 0.6524590163934426, + "eval_runtime": 2.4792, + "eval_samples_per_second": 891.815, + "eval_steps_per_second": 0.807, + "step": 177 + }, + { + "epoch": 71.2, + "grad_norm": 1045006.6875, + "learning_rate": 2.0126639529624604e-07, + "loss": 0.7467, + "step": 178 + }, + { + "epoch": 71.6, + "grad_norm": 1026289.9375, + "learning_rate": 2.0239710538218001e-07, + "loss": 0.7362, + "step": 179 + }, + { + "epoch": 72.0, + "grad_norm": 1030249.3125, + "learning_rate": 2.0352781546811396e-07, + "loss": 0.7209, + "step": 180 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.6110357304387155, + "eval_f1": 0.6986685353889278, + "eval_loss": 0.7215294241905212, + "eval_precision": 0.7501881113619263, + "eval_recall": 0.6537704918032787, + "eval_runtime": 2.4791, + "eval_samples_per_second": 891.846, + "eval_steps_per_second": 0.807, + "step": 180 + }, + { + "epoch": 72.4, + "grad_norm": 1063388.25, + "learning_rate": 2.0465852555404793e-07, + "loss": 0.746, + "step": 181 + }, + { + "epoch": 72.8, + "grad_norm": 1066436.25, + "learning_rate": 2.057892356399819e-07, + "loss": 0.7396, + "step": 182 + }, + { + "epoch": 72.8, + "eval_accuracy": 0.6110357304387155, + "eval_f1": 0.6986685353889278, + "eval_loss": 0.7196335792541504, + "eval_precision": 0.7501881113619263, + "eval_recall": 0.6537704918032787, + "eval_runtime": 2.4833, + "eval_samples_per_second": 890.338, + "eval_steps_per_second": 0.805, + "step": 182 + }, + { + "epoch": 73.2, + "grad_norm": 1024191.6875, + "learning_rate": 2.0691994572591588e-07, + "loss": 0.7351, + "step": 183 + }, + { + "epoch": 73.6, + "grad_norm": 1051571.125, + "learning_rate": 2.0805065581184983e-07, + "loss": 0.7386, + "step": 184 + }, + { + "epoch": 74.0, + "grad_norm": 1027219.625, + "learning_rate": 2.0918136589778383e-07, + "loss": 0.7319, + "step": 185 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.6119402985074627, + "eval_f1": 0.6995798319327731, + "eval_loss": 0.7167752385139465, + "eval_precision": 0.7505634861006761, + "eval_recall": 0.6550819672131147, + "eval_runtime": 2.8098, + "eval_samples_per_second": 786.899, + "eval_steps_per_second": 0.712, + "step": 185 + }, + { + "epoch": 74.4, + "grad_norm": 1022479.125, + "learning_rate": 2.103120759837178e-07, + "loss": 0.7291, + "step": 186 + }, + { + "epoch": 74.8, + "grad_norm": 1061070.125, + "learning_rate": 2.1144278606965175e-07, + "loss": 0.739, + "step": 187 + }, + { + "epoch": 74.8, + "eval_accuracy": 0.6123925825418363, + "eval_f1": 0.6998248686514886, + "eval_loss": 0.7148576974868774, + "eval_precision": 0.7511278195488722, + "eval_recall": 0.6550819672131147, + "eval_runtime": 2.5102, + "eval_samples_per_second": 880.811, + "eval_steps_per_second": 0.797, + "step": 187 + }, + { + "epoch": 75.2, + "grad_norm": 1046827.75, + "learning_rate": 2.1257349615558572e-07, + "loss": 0.7441, + "step": 188 + }, + { + "epoch": 75.6, + "grad_norm": 1032456.4375, + "learning_rate": 2.137042062415197e-07, + "loss": 0.7306, + "step": 189 + }, + { + "epoch": 76.0, + "grad_norm": 986433.0, + "learning_rate": 2.1483491632745366e-07, + "loss": 0.7208, + "step": 190 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.6132971506105834, + "eval_f1": 0.7007350367518376, + "eval_loss": 0.7119712829589844, + "eval_precision": 0.7515015015015015, + "eval_recall": 0.6563934426229509, + "eval_runtime": 2.4945, + "eval_samples_per_second": 886.339, + "eval_steps_per_second": 0.802, + "step": 190 + }, + { + "epoch": 76.4, + "grad_norm": 1053484.25, + "learning_rate": 2.159656264133876e-07, + "loss": 0.7389, + "step": 191 + }, + { + "epoch": 76.8, + "grad_norm": 1020256.4375, + "learning_rate": 2.1709633649932158e-07, + "loss": 0.7281, + "step": 192 + }, + { + "epoch": 76.8, + "eval_accuracy": 0.6132971506105834, + "eval_f1": 0.7007350367518376, + "eval_loss": 0.7100365161895752, + "eval_precision": 0.7515015015015015, + "eval_recall": 0.6563934426229509, + "eval_runtime": 2.5034, + "eval_samples_per_second": 883.181, + "eval_steps_per_second": 0.799, + "step": 192 + }, + { + "epoch": 77.2, + "grad_norm": 1018250.3125, + "learning_rate": 2.1822704658525556e-07, + "loss": 0.7251, + "step": 193 + }, + { + "epoch": 77.6, + "grad_norm": 1020130.0, + "learning_rate": 2.1935775667118953e-07, + "loss": 0.7299, + "step": 194 + }, + { + "epoch": 78.0, + "grad_norm": 1005927.0, + "learning_rate": 2.2048846675712348e-07, + "loss": 0.7224, + "step": 195 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.613749434644957, + "eval_f1": 0.7011896431070679, + "eval_loss": 0.7071288824081421, + "eval_precision": 0.7516879219804952, + "eval_recall": 0.6570491803278689, + "eval_runtime": 2.4917, + "eval_samples_per_second": 887.336, + "eval_steps_per_second": 0.803, + "step": 195 + }, + { + "epoch": 78.4, + "grad_norm": 1000251.3125, + "learning_rate": 2.2161917684305745e-07, + "loss": 0.7263, + "step": 196 + }, + { + "epoch": 78.8, + "grad_norm": 938441.0625, + "learning_rate": 2.2274988692899142e-07, + "loss": 0.7042, + "step": 197 + }, + { + "epoch": 78.8, + "eval_accuracy": 0.6146540027137042, + "eval_f1": 0.701889433170049, + "eval_loss": 0.705178439617157, + "eval_precision": 0.7524381095273819, + "eval_recall": 0.6577049180327869, + "eval_runtime": 2.4896, + "eval_samples_per_second": 888.099, + "eval_steps_per_second": 0.803, + "step": 197 + }, + { + "epoch": 79.2, + "grad_norm": 1057050.125, + "learning_rate": 2.2388059701492537e-07, + "loss": 0.7249, + "step": 198 + }, + { + "epoch": 79.6, + "grad_norm": 982639.6875, + "learning_rate": 2.2501130710085934e-07, + "loss": 0.7196, + "step": 199 + }, + { + "epoch": 80.0, + "grad_norm": 1043456.625, + "learning_rate": 2.2614201718679331e-07, + "loss": 0.7297, + "step": 200 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.613749434644957, + "eval_f1": 0.7009803921568627, + "eval_loss": 0.7022386789321899, + "eval_precision": 0.7520661157024794, + "eval_recall": 0.6563934426229509, + "eval_runtime": 2.5094, + "eval_samples_per_second": 881.07, + "eval_steps_per_second": 0.797, + "step": 200 + }, + { + "epoch": 80.4, + "grad_norm": 1004165.3125, + "learning_rate": 2.2727272727272729e-07, + "loss": 0.7258, + "step": 201 + }, + { + "epoch": 80.8, + "grad_norm": 966657.125, + "learning_rate": 2.2840343735866123e-07, + "loss": 0.7134, + "step": 202 + }, + { + "epoch": 80.8, + "eval_accuracy": 0.6155585707824514, + "eval_f1": 0.7025892232330301, + "eval_loss": 0.700272262096405, + "eval_precision": 0.7531882970742686, + "eval_recall": 0.658360655737705, + "eval_runtime": 2.4461, + "eval_samples_per_second": 903.877, + "eval_steps_per_second": 0.818, + "step": 202 + }, + { + "epoch": 81.2, + "grad_norm": 1000407.375, + "learning_rate": 2.295341474445952e-07, + "loss": 0.7167, + "step": 203 + }, + { + "epoch": 81.6, + "grad_norm": 931028.875, + "learning_rate": 2.3066485753052918e-07, + "loss": 0.706, + "step": 204 + }, + { + "epoch": 82.0, + "grad_norm": 949758.4375, + "learning_rate": 2.3179556761646315e-07, + "loss": 0.698, + "step": 205 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.6164631388511985, + "eval_f1": 0.7032890132960112, + "eval_loss": 0.6973046660423279, + "eval_precision": 0.7539384846211553, + "eval_recall": 0.659016393442623, + "eval_runtime": 2.4624, + "eval_samples_per_second": 897.887, + "eval_steps_per_second": 0.812, + "step": 205 + }, + { + "epoch": 82.4, + "grad_norm": 999580.6875, + "learning_rate": 2.329262777023971e-07, + "loss": 0.7209, + "step": 206 + }, + { + "epoch": 82.8, + "grad_norm": 907035.6875, + "learning_rate": 2.3405698778833107e-07, + "loss": 0.6979, + "step": 207 + }, + { + "epoch": 82.8, + "eval_accuracy": 0.6160108548168249, + "eval_f1": 0.7030430220356768, + "eval_loss": 0.6953163743019104, + "eval_precision": 0.7533733133433284, + "eval_recall": 0.659016393442623, + "eval_runtime": 2.4667, + "eval_samples_per_second": 896.33, + "eval_steps_per_second": 0.811, + "step": 207 + }, + { + "epoch": 83.2, + "grad_norm": 907903.4375, + "learning_rate": 2.3518769787426504e-07, + "loss": 0.7055, + "step": 208 + }, + { + "epoch": 83.6, + "grad_norm": 865137.8125, + "learning_rate": 2.36318407960199e-07, + "loss": 0.6919, + "step": 209 + }, + { + "epoch": 84.0, + "grad_norm": 1010854.625, + "learning_rate": 2.3744911804613296e-07, + "loss": 0.7229, + "step": 210 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.6164631388511985, + "eval_f1": 0.7034965034965035, + "eval_loss": 0.6923225522041321, + "eval_precision": 0.753558052434457, + "eval_recall": 0.659672131147541, + "eval_runtime": 2.4597, + "eval_samples_per_second": 898.891, + "eval_steps_per_second": 0.813, + "step": 210 + }, + { + "epoch": 84.4, + "grad_norm": 930195.25, + "learning_rate": 2.385798281320669e-07, + "loss": 0.7081, + "step": 211 + }, + { + "epoch": 84.8, + "grad_norm": 942246.75, + "learning_rate": 2.397105382180009e-07, + "loss": 0.7093, + "step": 212 + }, + { + "epoch": 84.8, + "eval_accuracy": 0.6155585707824514, + "eval_f1": 0.7030048916841369, + "eval_loss": 0.6903232336044312, + "eval_precision": 0.7524308152580403, + "eval_recall": 0.659672131147541, + "eval_runtime": 2.462, + "eval_samples_per_second": 898.057, + "eval_steps_per_second": 0.812, + "step": 212 + }, + { + "epoch": 85.2, + "grad_norm": 963994.75, + "learning_rate": 2.4084124830393486e-07, + "loss": 0.7084, + "step": 213 + }, + { + "epoch": 85.6, + "grad_norm": 967073.0, + "learning_rate": 2.4197195838986883e-07, + "loss": 0.7045, + "step": 214 + }, + { + "epoch": 86.0, + "grad_norm": 990376.4375, + "learning_rate": 2.431026684758028e-07, + "loss": 0.7048, + "step": 215 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.6178199909543193, + "eval_f1": 0.7050610820244329, + "eval_loss": 0.6873271465301514, + "eval_precision": 0.753731343283582, + "eval_recall": 0.6622950819672131, + "eval_runtime": 2.4724, + "eval_samples_per_second": 894.275, + "eval_steps_per_second": 0.809, + "step": 215 + }, + { + "epoch": 86.4, + "grad_norm": 916908.125, + "learning_rate": 2.442333785617368e-07, + "loss": 0.7013, + "step": 216 + }, + { + "epoch": 86.8, + "grad_norm": 939352.375, + "learning_rate": 2.4536408864767075e-07, + "loss": 0.7035, + "step": 217 + }, + { + "epoch": 86.8, + "eval_accuracy": 0.6182722749886929, + "eval_f1": 0.7055129099790649, + "eval_loss": 0.6853281259536743, + "eval_precision": 0.7539149888143176, + "eval_recall": 0.6629508196721311, + "eval_runtime": 2.4633, + "eval_samples_per_second": 897.582, + "eval_steps_per_second": 0.812, + "step": 217 + }, + { + "epoch": 87.2, + "grad_norm": 993506.0, + "learning_rate": 2.464947987336047e-07, + "loss": 0.7045, + "step": 218 + }, + { + "epoch": 87.6, + "grad_norm": 887710.5625, + "learning_rate": 2.4762550881953864e-07, + "loss": 0.6927, + "step": 219 + }, + { + "epoch": 88.0, + "grad_norm": 925844.125, + "learning_rate": 2.487562189054726e-07, + "loss": 0.7058, + "step": 220 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.6196291270918136, + "eval_f1": 0.706662016044646, + "eval_loss": 0.6823265552520752, + "eval_precision": 0.7548435171385991, + "eval_recall": 0.6642622950819672, + "eval_runtime": 2.4592, + "eval_samples_per_second": 899.065, + "eval_steps_per_second": 0.813, + "step": 220 + }, + { + "epoch": 88.4, + "grad_norm": 968884.8125, + "learning_rate": 2.498869289914066e-07, + "loss": 0.7045, + "step": 221 + }, + { + "epoch": 88.8, + "grad_norm": 898378.5, + "learning_rate": 2.5101763907734056e-07, + "loss": 0.6918, + "step": 222 + }, + { + "epoch": 88.8, + "eval_accuracy": 0.621438263229308, + "eval_f1": 0.7084639498432602, + "eval_loss": 0.6803296804428101, + "eval_precision": 0.7555720653789004, + "eval_recall": 0.6668852459016393, + "eval_runtime": 2.474, + "eval_samples_per_second": 893.696, + "eval_steps_per_second": 0.808, + "step": 222 + }, + { + "epoch": 89.2, + "grad_norm": 872072.4375, + "learning_rate": 2.5214834916327453e-07, + "loss": 0.6837, + "step": 223 + }, + { + "epoch": 89.6, + "grad_norm": 807226.875, + "learning_rate": 2.532790592492085e-07, + "loss": 0.6794, + "step": 224 + }, + { + "epoch": 90.0, + "grad_norm": 959766.0, + "learning_rate": 2.544097693351425e-07, + "loss": 0.7094, + "step": 225 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.6250565355042967, + "eval_f1": 0.7118526242613834, + "eval_loss": 0.6773191094398499, + "eval_precision": 0.757396449704142, + "eval_recall": 0.6714754098360656, + "eval_runtime": 2.4675, + "eval_samples_per_second": 896.032, + "eval_steps_per_second": 0.811, + "step": 225 + }, + { + "epoch": 90.4, + "grad_norm": 854262.6875, + "learning_rate": 2.555404794210764e-07, + "loss": 0.6947, + "step": 226 + }, + { + "epoch": 90.8, + "grad_norm": 865658.4375, + "learning_rate": 2.5667118950701037e-07, + "loss": 0.682, + "step": 227 + }, + { + "epoch": 90.8, + "eval_accuracy": 0.6259611035730439, + "eval_f1": 0.7123478260869566, + "eval_loss": 0.6753115057945251, + "eval_precision": 0.7585185185185185, + "eval_recall": 0.6714754098360656, + "eval_runtime": 2.7279, + "eval_samples_per_second": 810.501, + "eval_steps_per_second": 0.733, + "step": 227 + }, + { + "epoch": 91.2, + "grad_norm": 901009.875, + "learning_rate": 2.5780189959294434e-07, + "loss": 0.6845, + "step": 228 + }, + { + "epoch": 91.6, + "grad_norm": 861602.6875, + "learning_rate": 2.589326096788783e-07, + "loss": 0.6888, + "step": 229 + }, + { + "epoch": 92.0, + "grad_norm": 926635.375, + "learning_rate": 2.600633197648123e-07, + "loss": 0.6894, + "step": 230 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.6277702397105382, + "eval_f1": 0.7137391304347827, + "eval_loss": 0.6722956895828247, + "eval_precision": 0.76, + "eval_recall": 0.6727868852459017, + "eval_runtime": 2.4762, + "eval_samples_per_second": 892.897, + "eval_steps_per_second": 0.808, + "step": 230 + }, + { + "epoch": 92.4, + "grad_norm": 831033.75, + "learning_rate": 2.6119402985074626e-07, + "loss": 0.6812, + "step": 231 + }, + { + "epoch": 92.8, + "grad_norm": 877895.4375, + "learning_rate": 2.6232473993668024e-07, + "loss": 0.6914, + "step": 232 + }, + { + "epoch": 92.8, + "eval_accuracy": 0.6295793758480326, + "eval_f1": 0.7151304347826087, + "eval_loss": 0.6702900528907776, + "eval_precision": 0.7614814814814815, + "eval_recall": 0.6740983606557377, + "eval_runtime": 2.4823, + "eval_samples_per_second": 890.704, + "eval_steps_per_second": 0.806, + "step": 232 + }, + { + "epoch": 93.2, + "grad_norm": 932523.4375, + "learning_rate": 2.6345545002261416e-07, + "loss": 0.6942, + "step": 233 + }, + { + "epoch": 93.6, + "grad_norm": 899756.4375, + "learning_rate": 2.6458616010854813e-07, + "loss": 0.6945, + "step": 234 + }, + { + "epoch": 94.0, + "grad_norm": 920719.25, + "learning_rate": 2.6571687019448215e-07, + "loss": 0.7007, + "step": 235 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.6322930800542741, + "eval_f1": 0.7172173913043478, + "eval_loss": 0.6672995090484619, + "eval_precision": 0.7637037037037037, + "eval_recall": 0.6760655737704918, + "eval_runtime": 2.4904, + "eval_samples_per_second": 887.796, + "eval_steps_per_second": 0.803, + "step": 235 + }, + { + "epoch": 94.4, + "grad_norm": 867441.5, + "learning_rate": 2.6684758028041613e-07, + "loss": 0.6878, + "step": 236 + }, + { + "epoch": 94.8, + "grad_norm": 817898.4375, + "learning_rate": 2.679782903663501e-07, + "loss": 0.6729, + "step": 237 + }, + { + "epoch": 94.8, + "eval_accuracy": 0.6331976481230213, + "eval_f1": 0.7177166724678037, + "eval_loss": 0.665316104888916, + "eval_precision": 0.7648367952522255, + "eval_recall": 0.6760655737704918, + "eval_runtime": 2.4869, + "eval_samples_per_second": 889.063, + "eval_steps_per_second": 0.804, + "step": 237 + }, + { + "epoch": 95.2, + "grad_norm": 880826.8125, + "learning_rate": 2.6910900045228407e-07, + "loss": 0.6852, + "step": 238 + }, + { + "epoch": 95.6, + "grad_norm": 816809.0, + "learning_rate": 2.7023971053821805e-07, + "loss": 0.6755, + "step": 239 + }, + { + "epoch": 96.0, + "grad_norm": 932376.875, + "learning_rate": 2.71370420624152e-07, + "loss": 0.6929, + "step": 240 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.6350067842605156, + "eval_f1": 0.7189132706374086, + "eval_loss": 0.6623416543006897, + "eval_precision": 0.7667161961367014, + "eval_recall": 0.6767213114754098, + "eval_runtime": 2.4807, + "eval_samples_per_second": 891.268, + "eval_steps_per_second": 0.806, + "step": 240 + }, + { + "epoch": 96.4, + "grad_norm": 797217.6875, + "learning_rate": 2.7250113071008594e-07, + "loss": 0.6731, + "step": 241 + }, + { + "epoch": 96.8, + "grad_norm": 818184.0625, + "learning_rate": 2.736318407960199e-07, + "loss": 0.6748, + "step": 242 + }, + { + "epoch": 96.8, + "eval_accuracy": 0.6372682044323835, + "eval_f1": 0.720946416144746, + "eval_loss": 0.6603572964668274, + "eval_precision": 0.7679762787249814, + "eval_recall": 0.6793442622950819, + "eval_runtime": 2.4961, + "eval_samples_per_second": 885.764, + "eval_steps_per_second": 0.801, + "step": 242 + }, + { + "epoch": 97.2, + "grad_norm": 817781.875, + "learning_rate": 2.747625508819539e-07, + "loss": 0.687, + "step": 243 + }, + { + "epoch": 97.6, + "grad_norm": 811900.5, + "learning_rate": 2.7589326096788786e-07, + "loss": 0.6745, + "step": 244 + }, + { + "epoch": 98.0, + "grad_norm": 833195.0625, + "learning_rate": 2.7702397105382183e-07, + "loss": 0.6769, + "step": 245 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.6404341926729986, + "eval_f1": 0.7236704900938478, + "eval_loss": 0.6573842763900757, + "eval_precision": 0.7699704142011834, + "eval_recall": 0.6826229508196722, + "eval_runtime": 2.4684, + "eval_samples_per_second": 895.718, + "eval_steps_per_second": 0.81, + "step": 245 + }, + { + "epoch": 98.4, + "grad_norm": 758542.125, + "learning_rate": 2.781546811397558e-07, + "loss": 0.6673, + "step": 246 + }, + { + "epoch": 98.8, + "grad_norm": 834586.6875, + "learning_rate": 2.792853912256898e-07, + "loss": 0.6811, + "step": 247 + }, + { + "epoch": 98.8, + "eval_accuracy": 0.6417910447761194, + "eval_f1": 0.7248088950660181, + "eval_loss": 0.6554076671600342, + "eval_precision": 0.770879526977088, + "eval_recall": 0.6839344262295082, + "eval_runtime": 2.7427, + "eval_samples_per_second": 806.146, + "eval_steps_per_second": 0.729, + "step": 247 + }, + { + "epoch": 99.2, + "grad_norm": 849685.875, + "learning_rate": 2.804161013116237e-07, + "loss": 0.6838, + "step": 248 + }, + { + "epoch": 99.6, + "grad_norm": 769543.1875, + "learning_rate": 2.8154681139755767e-07, + "loss": 0.6616, + "step": 249 + }, + { + "epoch": 100.0, + "grad_norm": 738949.625, + "learning_rate": 2.8267752148349164e-07, + "loss": 0.6628, + "step": 250 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.6431478968792401, + "eval_f1": 0.7263267429760666, + "eval_loss": 0.6524556875228882, + "eval_precision": 0.7709867452135494, + "eval_recall": 0.6865573770491803, + "eval_runtime": 2.4714, + "eval_samples_per_second": 894.639, + "eval_steps_per_second": 0.809, + "step": 250 + }, + { + "epoch": 100.4, + "grad_norm": 768420.25, + "learning_rate": 2.838082315694256e-07, + "loss": 0.6697, + "step": 251 + }, + { + "epoch": 100.8, + "grad_norm": 727670.5625, + "learning_rate": 2.849389416553596e-07, + "loss": 0.6557, + "step": 252 + }, + { + "epoch": 100.8, + "eval_accuracy": 0.6449570330167345, + "eval_f1": 0.7280914444059577, + "eval_loss": 0.6504951119422913, + "eval_precision": 0.7716593245227606, + "eval_recall": 0.6891803278688524, + "eval_runtime": 2.4796, + "eval_samples_per_second": 891.66, + "eval_steps_per_second": 0.807, + "step": 252 + }, + { + "epoch": 101.2, + "grad_norm": 751682.6875, + "learning_rate": 2.8606965174129356e-07, + "loss": 0.6706, + "step": 253 + }, + { + "epoch": 101.6, + "grad_norm": 691285.625, + "learning_rate": 2.8720036182722753e-07, + "loss": 0.6511, + "step": 254 + }, + { + "epoch": 102.0, + "grad_norm": 701323.8125, + "learning_rate": 2.883310719131615e-07, + "loss": 0.6599, + "step": 255 + }, + { + "epoch": 102.0, + "eval_accuracy": 0.6472184531886025, + "eval_f1": 0.7301038062283737, + "eval_loss": 0.6475762128829956, + "eval_precision": 0.7728937728937729, + "eval_recall": 0.6918032786885245, + "eval_runtime": 2.5082, + "eval_samples_per_second": 881.511, + "eval_steps_per_second": 0.797, + "step": 255 + }, + { + "epoch": 102.4, + "grad_norm": 745999.5625, + "learning_rate": 2.8946178199909543e-07, + "loss": 0.6648, + "step": 256 + }, + { + "epoch": 102.8, + "grad_norm": 746598.875, + "learning_rate": 2.905924920850294e-07, + "loss": 0.6622, + "step": 257 + }, + { + "epoch": 102.8, + "eval_accuracy": 0.6476707372229761, + "eval_f1": 0.7301697263595428, + "eval_loss": 0.6456369757652283, + "eval_precision": 0.7738619676945668, + "eval_recall": 0.6911475409836065, + "eval_runtime": 2.4902, + "eval_samples_per_second": 887.897, + "eval_steps_per_second": 0.803, + "step": 257 + }, + { + "epoch": 103.2, + "grad_norm": 704690.875, + "learning_rate": 2.9172320217096337e-07, + "loss": 0.6525, + "step": 258 + }, + { + "epoch": 103.6, + "grad_norm": 747314.3125, + "learning_rate": 2.9285391225689735e-07, + "loss": 0.6655, + "step": 259 + }, + { + "epoch": 104.0, + "grad_norm": 679271.9375, + "learning_rate": 2.939846223428313e-07, + "loss": 0.6593, + "step": 260 + }, + { + "epoch": 104.0, + "eval_accuracy": 0.649932157394844, + "eval_f1": 0.7323651452282157, + "eval_loss": 0.642740786075592, + "eval_precision": 0.7746891002194587, + "eval_recall": 0.6944262295081968, + "eval_runtime": 2.5131, + "eval_samples_per_second": 879.78, + "eval_steps_per_second": 0.796, + "step": 260 + }, + { + "epoch": 104.4, + "grad_norm": 659205.625, + "learning_rate": 2.951153324287653e-07, + "loss": 0.6461, + "step": 261 + }, + { + "epoch": 104.8, + "grad_norm": 743526.125, + "learning_rate": 2.9624604251469926e-07, + "loss": 0.6589, + "step": 262 + }, + { + "epoch": 104.8, + "eval_accuracy": 0.6517412935323383, + "eval_f1": 0.7341160220994475, + "eval_loss": 0.6408228874206543, + "eval_precision": 0.775346462436178, + "eval_recall": 0.6970491803278689, + "eval_runtime": 2.5084, + "eval_samples_per_second": 881.436, + "eval_steps_per_second": 0.797, + "step": 262 + }, + { + "epoch": 105.2, + "grad_norm": 759219.375, + "learning_rate": 2.973767526006332e-07, + "loss": 0.6658, + "step": 263 + }, + { + "epoch": 105.6, + "grad_norm": 665255.75, + "learning_rate": 2.9850746268656716e-07, + "loss": 0.6437, + "step": 264 + }, + { + "epoch": 106.0, + "grad_norm": 748501.375, + "learning_rate": 2.9963817277250113e-07, + "loss": 0.658, + "step": 265 + }, + { + "epoch": 106.0, + "eval_accuracy": 0.6558118498417006, + "eval_f1": 0.7380378657487091, + "eval_loss": 0.637968897819519, + "eval_precision": 0.7768115942028986, + "eval_recall": 0.7029508196721311, + "eval_runtime": 2.5122, + "eval_samples_per_second": 880.089, + "eval_steps_per_second": 0.796, + "step": 265 + }, + { + "epoch": 106.4, + "grad_norm": 639286.3125, + "learning_rate": 3.007688828584351e-07, + "loss": 0.6432, + "step": 266 + }, + { + "epoch": 106.8, + "grad_norm": 681826.125, + "learning_rate": 3.018995929443691e-07, + "loss": 0.651, + "step": 267 + }, + { + "epoch": 106.8, + "eval_accuracy": 0.6576209859791949, + "eval_f1": 0.7397731179099347, + "eval_loss": 0.6360702514648438, + "eval_precision": 0.7774566473988439, + "eval_recall": 0.7055737704918033, + "eval_runtime": 2.7881, + "eval_samples_per_second": 793.014, + "eval_steps_per_second": 0.717, + "step": 267 + }, + { + "epoch": 107.2, + "grad_norm": 734258.875, + "learning_rate": 3.0303030303030305e-07, + "loss": 0.6475, + "step": 268 + }, + { + "epoch": 107.6, + "grad_norm": 640675.4375, + "learning_rate": 3.04161013116237e-07, + "loss": 0.6472, + "step": 269 + }, + { + "epoch": 108.0, + "grad_norm": 692492.5625, + "learning_rate": 3.0529172320217094e-07, + "loss": 0.6464, + "step": 270 + }, + { + "epoch": 108.0, + "eval_accuracy": 0.6612392582541836, + "eval_f1": 0.7432293452176894, + "eval_loss": 0.6332418322563171, + "eval_precision": 0.7787356321839081, + "eval_recall": 0.7108196721311475, + "eval_runtime": 2.4697, + "eval_samples_per_second": 895.247, + "eval_steps_per_second": 0.81, + "step": 270 + }, + { + "epoch": 108.4, + "grad_norm": 664465.9375, + "learning_rate": 3.064224332881049e-07, + "loss": 0.6449, + "step": 271 + }, + { + "epoch": 108.8, + "grad_norm": 622949.6875, + "learning_rate": 3.075531433740389e-07, + "loss": 0.6435, + "step": 272 + }, + { + "epoch": 108.8, + "eval_accuracy": 0.6644052464947987, + "eval_f1": 0.7458904109589041, + "eval_loss": 0.6313766837120056, + "eval_precision": 0.7806451612903226, + "eval_recall": 0.7140983606557377, + "eval_runtime": 2.4699, + "eval_samples_per_second": 895.169, + "eval_steps_per_second": 0.81, + "step": 272 + }, + { + "epoch": 109.2, + "grad_norm": 703384.625, + "learning_rate": 3.0868385345997286e-07, + "loss": 0.6448, + "step": 273 + }, + { + "epoch": 109.6, + "grad_norm": 645732.25, + "learning_rate": 3.0981456354590683e-07, + "loss": 0.6486, + "step": 274 + }, + { + "epoch": 110.0, + "grad_norm": 678895.0, + "learning_rate": 3.109452736318408e-07, + "loss": 0.6413, + "step": 275 + }, + { + "epoch": 110.0, + "eval_accuracy": 0.6666666666666666, + "eval_f1": 0.7478617858364693, + "eval_loss": 0.6286011338233948, + "eval_precision": 0.7818311874105865, + "eval_recall": 0.7167213114754099, + "eval_runtime": 2.4752, + "eval_samples_per_second": 893.251, + "eval_steps_per_second": 0.808, + "step": 275 + }, + { + "epoch": 110.4, + "grad_norm": 630542.25, + "learning_rate": 3.120759837177748e-07, + "loss": 0.6366, + "step": 276 + }, + { + "epoch": 110.8, + "grad_norm": 609661.25, + "learning_rate": 3.1320669380370875e-07, + "loss": 0.6372, + "step": 277 + }, + { + "epoch": 110.8, + "eval_accuracy": 0.668475802804161, + "eval_f1": 0.7492302429011289, + "eval_loss": 0.626769483089447, + "eval_precision": 0.7832618025751072, + "eval_recall": 0.7180327868852459, + "eval_runtime": 2.4793, + "eval_samples_per_second": 891.787, + "eval_steps_per_second": 0.807, + "step": 277 + }, + { + "epoch": 111.2, + "grad_norm": 581969.5, + "learning_rate": 3.1433740388964267e-07, + "loss": 0.6314, + "step": 278 + }, + { + "epoch": 111.6, + "grad_norm": 671316.0, + "learning_rate": 3.1546811397557664e-07, + "loss": 0.6408, + "step": 279 + }, + { + "epoch": 112.0, + "grad_norm": 699136.0, + "learning_rate": 3.165988240615106e-07, + "loss": 0.6559, + "step": 280 + }, + { + "epoch": 112.0, + "eval_accuracy": 0.6725463591135233, + "eval_f1": 0.7527322404371585, + "eval_loss": 0.6240522265434265, + "eval_precision": 0.7854597291518175, + "eval_recall": 0.7226229508196721, + "eval_runtime": 2.4766, + "eval_samples_per_second": 892.742, + "eval_steps_per_second": 0.808, + "step": 280 + }, + { + "epoch": 112.4, + "grad_norm": 648870.5, + "learning_rate": 3.177295341474446e-07, + "loss": 0.6434, + "step": 281 + }, + { + "epoch": 112.8, + "grad_norm": 570294.875, + "learning_rate": 3.1886024423337856e-07, + "loss": 0.6346, + "step": 282 + }, + { + "epoch": 112.8, + "eval_accuracy": 0.6743554952510177, + "eval_f1": 0.754601226993865, + "eval_loss": 0.6222699880599976, + "eval_precision": 0.7856635911994322, + "eval_recall": 0.7259016393442623, + "eval_runtime": 2.5428, + "eval_samples_per_second": 869.528, + "eval_steps_per_second": 0.787, + "step": 282 + }, + { + "epoch": 113.2, + "grad_norm": 673931.3125, + "learning_rate": 3.1999095431931254e-07, + "loss": 0.6405, + "step": 283 + }, + { + "epoch": 113.6, + "grad_norm": 585639.0, + "learning_rate": 3.211216644052465e-07, + "loss": 0.6329, + "step": 284 + }, + { + "epoch": 114.0, + "grad_norm": 563106.8125, + "learning_rate": 3.2225237449118043e-07, + "loss": 0.6296, + "step": 285 + }, + { + "epoch": 114.0, + "eval_accuracy": 0.6784260515603799, + "eval_f1": 0.7579162410623085, + "eval_loss": 0.6196380257606506, + "eval_precision": 0.7882436260623229, + "eval_recall": 0.7298360655737705, + "eval_runtime": 2.4619, + "eval_samples_per_second": 898.101, + "eval_steps_per_second": 0.812, + "step": 285 + }, + { + "epoch": 114.4, + "grad_norm": 586742.4375, + "learning_rate": 3.233830845771144e-07, + "loss": 0.6388, + "step": 286 + }, + { + "epoch": 114.8, + "grad_norm": 557021.4375, + "learning_rate": 3.245137946630484e-07, + "loss": 0.6238, + "step": 287 + }, + { + "epoch": 114.8, + "eval_accuracy": 0.6829488919041158, + "eval_f1": 0.7616456987419246, + "eval_loss": 0.6179050207138062, + "eval_precision": 0.7909604519774012, + "eval_recall": 0.7344262295081967, + "eval_runtime": 2.4767, + "eval_samples_per_second": 892.711, + "eval_steps_per_second": 0.808, + "step": 287 + }, + { + "epoch": 115.2, + "grad_norm": 574071.1875, + "learning_rate": 3.2564450474898235e-07, + "loss": 0.6233, + "step": 288 + }, + { + "epoch": 115.6, + "grad_norm": 592520.625, + "learning_rate": 3.267752148349163e-07, + "loss": 0.633, + "step": 289 + }, + { + "epoch": 116.0, + "grad_norm": 609240.25, + "learning_rate": 3.279059249208503e-07, + "loss": 0.6379, + "step": 290 + }, + { + "epoch": 116.0, + "eval_accuracy": 0.6847580280416101, + "eval_f1": 0.7633276740237691, + "eval_loss": 0.6153302192687988, + "eval_precision": 0.7915492957746478, + "eval_recall": 0.7370491803278688, + "eval_runtime": 2.4784, + "eval_samples_per_second": 892.094, + "eval_steps_per_second": 0.807, + "step": 290 + }, + { + "epoch": 116.4, + "grad_norm": 542336.5, + "learning_rate": 3.2903663500678427e-07, + "loss": 0.6193, + "step": 291 + }, + { + "epoch": 116.8, + "grad_norm": 586441.75, + "learning_rate": 3.3016734509271824e-07, + "loss": 0.6336, + "step": 292 + }, + { + "epoch": 116.8, + "eval_accuracy": 0.689280868385346, + "eval_f1": 0.766723259762309, + "eval_loss": 0.6136343479156494, + "eval_precision": 0.7950704225352113, + "eval_recall": 0.740327868852459, + "eval_runtime": 2.4588, + "eval_samples_per_second": 899.216, + "eval_steps_per_second": 0.813, + "step": 292 + }, + { + "epoch": 117.2, + "grad_norm": 507091.625, + "learning_rate": 3.3129805517865216e-07, + "loss": 0.6139, + "step": 293 + }, + { + "epoch": 117.6, + "grad_norm": 546646.5625, + "learning_rate": 3.3242876526458613e-07, + "loss": 0.628, + "step": 294 + }, + { + "epoch": 118.0, + "grad_norm": 510370.21875, + "learning_rate": 3.335594753505201e-07, + "loss": 0.6179, + "step": 295 + }, + { + "epoch": 118.0, + "eval_accuracy": 0.6901854364540931, + "eval_f1": 0.7677178704645643, + "eval_loss": 0.6111193299293518, + "eval_precision": 0.7949438202247191, + "eval_recall": 0.7422950819672132, + "eval_runtime": 2.4632, + "eval_samples_per_second": 897.621, + "eval_steps_per_second": 0.812, + "step": 295 + }, + { + "epoch": 118.4, + "grad_norm": 492218.6875, + "learning_rate": 3.346901854364541e-07, + "loss": 0.6193, + "step": 296 + }, + { + "epoch": 118.8, + "grad_norm": 505481.96875, + "learning_rate": 3.3582089552238805e-07, + "loss": 0.62, + "step": 297 + }, + { + "epoch": 118.8, + "eval_accuracy": 0.6933514246947082, + "eval_f1": 0.7700135685210312, + "eval_loss": 0.6094594597816467, + "eval_precision": 0.797610681658468, + "eval_recall": 0.7442622950819672, + "eval_runtime": 2.4932, + "eval_samples_per_second": 886.82, + "eval_steps_per_second": 0.802, + "step": 297 + }, + { + "epoch": 119.2, + "grad_norm": 575668.9375, + "learning_rate": 3.36951605608322e-07, + "loss": 0.64, + "step": 298 + }, + { + "epoch": 119.6, + "grad_norm": 531922.75, + "learning_rate": 3.38082315694256e-07, + "loss": 0.6252, + "step": 299 + }, + { + "epoch": 120.0, + "grad_norm": 474776.3125, + "learning_rate": 3.392130257801899e-07, + "loss": 0.615, + "step": 300 + }, + { + "epoch": 120.0, + "eval_accuracy": 0.6996834011759385, + "eval_f1": 0.77552400270453, + "eval_loss": 0.6070261597633362, + "eval_precision": 0.8004187020237264, + "eval_recall": 0.7521311475409836, + "eval_runtime": 2.4763, + "eval_samples_per_second": 892.854, + "eval_steps_per_second": 0.808, + "step": 300 + }, + { + "epoch": 120.4, + "grad_norm": 492731.3125, + "learning_rate": 3.403437358661239e-07, + "loss": 0.6202, + "step": 301 + }, + { + "epoch": 120.8, + "grad_norm": 469136.78125, + "learning_rate": 3.414744459520579e-07, + "loss": 0.6138, + "step": 302 + }, + { + "epoch": 120.8, + "eval_accuracy": 0.7019448213478064, + "eval_f1": 0.7781891618983507, + "eval_loss": 0.6054213643074036, + "eval_precision": 0.7994467496542186, + "eval_recall": 0.7580327868852459, + "eval_runtime": 2.5404, + "eval_samples_per_second": 870.326, + "eval_steps_per_second": 0.787, + "step": 302 + }, + { + "epoch": 121.2, + "grad_norm": 461077.1875, + "learning_rate": 3.426051560379919e-07, + "loss": 0.6212, + "step": 303 + }, + { + "epoch": 121.6, + "grad_norm": 455926.34375, + "learning_rate": 3.4373586612392586e-07, + "loss": 0.612, + "step": 304 + }, + { + "epoch": 122.0, + "grad_norm": 465265.46875, + "learning_rate": 3.4486657620985983e-07, + "loss": 0.6183, + "step": 305 + }, + { + "epoch": 122.0, + "eval_accuracy": 0.7087290818634102, + "eval_f1": 0.7841823056300268, + "eval_loss": 0.6030628085136414, + "eval_precision": 0.8019191226867718, + "eval_recall": 0.7672131147540984, + "eval_runtime": 2.48, + "eval_samples_per_second": 891.533, + "eval_steps_per_second": 0.806, + "step": 305 + }, + { + "epoch": 122.4, + "grad_norm": 458229.625, + "learning_rate": 3.459972862957938e-07, + "loss": 0.6097, + "step": 306 + }, + { + "epoch": 122.8, + "grad_norm": 434778.3125, + "learning_rate": 3.471279963817278e-07, + "loss": 0.6094, + "step": 307 + }, + { + "epoch": 122.8, + "eval_accuracy": 0.7114427860696517, + "eval_f1": 0.7870493991989319, + "eval_loss": 0.6015117168426514, + "eval_precision": 0.8014955812372536, + "eval_recall": 0.7731147540983606, + "eval_runtime": 2.4894, + "eval_samples_per_second": 888.166, + "eval_steps_per_second": 0.803, + "step": 307 + }, + { + "epoch": 123.2, + "grad_norm": 512501.28125, + "learning_rate": 3.482587064676617e-07, + "loss": 0.6178, + "step": 308 + }, + { + "epoch": 123.6, + "grad_norm": 486719.5625, + "learning_rate": 3.493894165535957e-07, + "loss": 0.611, + "step": 309 + }, + { + "epoch": 124.0, + "grad_norm": 400487.28125, + "learning_rate": 3.5052012663952965e-07, + "loss": 0.6187, + "step": 310 + }, + { + "epoch": 124.0, + "eval_accuracy": 0.7150610583446404, + "eval_f1": 0.7901399067288475, + "eval_loss": 0.5992367267608643, + "eval_precision": 0.8029790115098172, + "eval_recall": 0.7777049180327869, + "eval_runtime": 2.7478, + "eval_samples_per_second": 804.651, + "eval_steps_per_second": 0.728, + "step": 310 + }, + { + "epoch": 124.4, + "grad_norm": 459478.0, + "learning_rate": 3.516508367254636e-07, + "loss": 0.6104, + "step": 311 + }, + { + "epoch": 124.8, + "grad_norm": 438219.40625, + "learning_rate": 3.527815468113976e-07, + "loss": 0.6149, + "step": 312 + }, + { + "epoch": 124.8, + "eval_accuracy": 0.7200361827227499, + "eval_f1": 0.7942838152210037, + "eval_loss": 0.5977560877799988, + "eval_precision": 0.805256064690027, + "eval_recall": 0.7836065573770492, + "eval_runtime": 2.4698, + "eval_samples_per_second": 895.2, + "eval_steps_per_second": 0.81, + "step": 312 + }, + { + "epoch": 125.2, + "grad_norm": 465141.84375, + "learning_rate": 3.5391225689733157e-07, + "loss": 0.6125, + "step": 313 + }, + { + "epoch": 125.6, + "grad_norm": 411344.875, + "learning_rate": 3.5504296698326554e-07, + "loss": 0.6072, + "step": 314 + }, + { + "epoch": 126.0, + "grad_norm": 497907.0625, + "learning_rate": 3.5617367706919946e-07, + "loss": 0.6102, + "step": 315 + }, + { + "epoch": 126.0, + "eval_accuracy": 0.725463591135233, + "eval_f1": 0.7981376787495843, + "eval_loss": 0.5956078171730042, + "eval_precision": 0.8097165991902834, + "eval_recall": 0.7868852459016393, + "eval_runtime": 2.4747, + "eval_samples_per_second": 893.453, + "eval_steps_per_second": 0.808, + "step": 315 + }, + { + "epoch": 126.4, + "grad_norm": 361023.15625, + "learning_rate": 3.5730438715513343e-07, + "loss": 0.6043, + "step": 316 + }, + { + "epoch": 126.8, + "grad_norm": 460765.03125, + "learning_rate": 3.584350972410674e-07, + "loss": 0.6106, + "step": 317 + }, + { + "epoch": 126.8, + "eval_accuracy": 0.7268204432383537, + "eval_f1": 0.799468791500664, + "eval_loss": 0.5942061543464661, + "eval_precision": 0.8096839273705447, + "eval_recall": 0.7895081967213115, + "eval_runtime": 2.4816, + "eval_samples_per_second": 890.973, + "eval_steps_per_second": 0.806, + "step": 317 + }, + { + "epoch": 127.2, + "grad_norm": 386567.15625, + "learning_rate": 3.595658073270014e-07, + "loss": 0.6055, + "step": 318 + }, + { + "epoch": 127.6, + "grad_norm": 357971.5625, + "learning_rate": 3.6069651741293535e-07, + "loss": 0.6021, + "step": 319 + }, + { + "epoch": 128.0, + "grad_norm": 343736.15625, + "learning_rate": 3.618272274988693e-07, + "loss": 0.5944, + "step": 320 + }, + { + "epoch": 128.0, + "eval_accuracy": 0.7304387155133424, + "eval_f1": 0.8019933554817276, + "eval_loss": 0.5921348929405212, + "eval_precision": 0.8127946127946128, + "eval_recall": 0.7914754098360656, + "eval_runtime": 2.4924, + "eval_samples_per_second": 887.095, + "eval_steps_per_second": 0.802, + "step": 320 + }, + { + "epoch": 128.4, + "grad_norm": 362144.46875, + "learning_rate": 3.629579375848033e-07, + "loss": 0.6057, + "step": 321 + }, + { + "epoch": 128.8, + "grad_norm": 340872.46875, + "learning_rate": 3.640886476707372e-07, + "loss": 0.5979, + "step": 322 + }, + { + "epoch": 128.8, + "eval_accuracy": 0.7345092718227046, + "eval_f1": 0.8055647565419013, + "eval_loss": 0.5907615423202515, + "eval_precision": 0.8139223560910308, + "eval_recall": 0.7973770491803279, + "eval_runtime": 2.4925, + "eval_samples_per_second": 887.066, + "eval_steps_per_second": 0.802, + "step": 322 + }, + { + "epoch": 129.2, + "grad_norm": 438782.875, + "learning_rate": 3.652193577566712e-07, + "loss": 0.6037, + "step": 323 + }, + { + "epoch": 129.6, + "grad_norm": 341456.1875, + "learning_rate": 3.6635006784260516e-07, + "loss": 0.6026, + "step": 324 + }, + { + "epoch": 130.0, + "grad_norm": 361043.78125, + "learning_rate": 3.6748077792853913e-07, + "loss": 0.5953, + "step": 325 + }, + { + "epoch": 130.0, + "eval_accuracy": 0.7376752600633197, + "eval_f1": 0.8088332234673699, + "eval_loss": 0.588759183883667, + "eval_precision": 0.8131212723658051, + "eval_recall": 0.8045901639344263, + "eval_runtime": 2.4542, + "eval_samples_per_second": 900.894, + "eval_steps_per_second": 0.815, + "step": 325 + }, + { + "epoch": 130.4, + "grad_norm": 351720.90625, + "learning_rate": 3.686114880144731e-07, + "loss": 0.6071, + "step": 326 + }, + { + "epoch": 130.8, + "grad_norm": 289779.5625, + "learning_rate": 3.697421981004071e-07, + "loss": 0.59, + "step": 327 + }, + { + "epoch": 130.8, + "eval_accuracy": 0.7458163726820444, + "eval_f1": 0.8159790438768828, + "eval_loss": 0.5874533653259277, + "eval_precision": 0.8149117069980379, + "eval_recall": 0.8170491803278689, + "eval_runtime": 2.4859, + "eval_samples_per_second": 889.415, + "eval_steps_per_second": 0.805, + "step": 327 + }, + { + "epoch": 131.2, + "grad_norm": 335866.5, + "learning_rate": 3.7087290818634105e-07, + "loss": 0.5976, + "step": 328 + }, + { + "epoch": 131.6, + "grad_norm": 320890.75, + "learning_rate": 3.72003618272275e-07, + "loss": 0.6013, + "step": 329 + }, + { + "epoch": 132.0, + "grad_norm": 360011.65625, + "learning_rate": 3.7313432835820895e-07, + "loss": 0.5947, + "step": 330 + }, + { + "epoch": 132.0, + "eval_accuracy": 0.7526006331976481, + "eval_f1": 0.8216498206716661, + "eval_loss": 0.5855240821838379, + "eval_precision": 0.8171206225680934, + "eval_recall": 0.8262295081967214, + "eval_runtime": 2.7496, + "eval_samples_per_second": 804.125, + "eval_steps_per_second": 0.727, + "step": 330 + }, + { + "epoch": 132.4, + "grad_norm": 321731.59375, + "learning_rate": 3.742650384441429e-07, + "loss": 0.5906, + "step": 331 + }, + { + "epoch": 132.8, + "grad_norm": 333490.03125, + "learning_rate": 3.753957485300769e-07, + "loss": 0.598, + "step": 332 + }, + { + "epoch": 132.8, + "eval_accuracy": 0.7553143374038896, + "eval_f1": 0.8233757753836108, + "eval_loss": 0.5842717885971069, + "eval_precision": 0.8198959687906372, + "eval_recall": 0.8268852459016394, + "eval_runtime": 2.4659, + "eval_samples_per_second": 896.646, + "eval_steps_per_second": 0.811, + "step": 332 + }, + { + "epoch": 133.2, + "grad_norm": 277266.40625, + "learning_rate": 3.7652645861601087e-07, + "loss": 0.5978, + "step": 333 + }, + { + "epoch": 133.6, + "grad_norm": 344736.0, + "learning_rate": 3.7765716870194484e-07, + "loss": 0.6004, + "step": 334 + }, + { + "epoch": 134.0, + "grad_norm": 302644.9375, + "learning_rate": 3.787878787878788e-07, + "loss": 0.5986, + "step": 335 + }, + { + "epoch": 134.0, + "eval_accuracy": 0.7575757575757576, + "eval_f1": 0.8244924688932548, + "eval_loss": 0.5824664235115051, + "eval_precision": 0.8234139960758666, + "eval_recall": 0.8255737704918032, + "eval_runtime": 2.4743, + "eval_samples_per_second": 893.603, + "eval_steps_per_second": 0.808, + "step": 335 + }, + { + "epoch": 134.4, + "grad_norm": 290450.3125, + "learning_rate": 3.799185888738128e-07, + "loss": 0.5941, + "step": 336 + }, + { + "epoch": 134.8, + "grad_norm": 261067.46875, + "learning_rate": 3.810492989597467e-07, + "loss": 0.5924, + "step": 337 + }, + { + "epoch": 134.8, + "eval_accuracy": 0.7557666214382632, + "eval_f1": 0.8229508196721311, + "eval_loss": 0.5812984108924866, + "eval_precision": 0.8229508196721311, + "eval_recall": 0.8229508196721311, + "eval_runtime": 2.4952, + "eval_samples_per_second": 886.111, + "eval_steps_per_second": 0.802, + "step": 337 + }, + { + "epoch": 135.2, + "grad_norm": 283241.875, + "learning_rate": 3.821800090456807e-07, + "loss": 0.5928, + "step": 338 + }, + { + "epoch": 135.6, + "grad_norm": 249194.96875, + "learning_rate": 3.8331071913161465e-07, + "loss": 0.588, + "step": 339 + }, + { + "epoch": 136.0, + "grad_norm": 333682.96875, + "learning_rate": 3.844414292175486e-07, + "loss": 0.5848, + "step": 340 + }, + { + "epoch": 136.0, + "eval_accuracy": 0.7598371777476255, + "eval_f1": 0.8258445391931781, + "eval_loss": 0.5795804858207703, + "eval_precision": 0.8261154855643045, + "eval_recall": 0.8255737704918032, + "eval_runtime": 2.5022, + "eval_samples_per_second": 883.627, + "eval_steps_per_second": 0.799, + "step": 340 + }, + { + "epoch": 136.4, + "grad_norm": 272966.625, + "learning_rate": 3.855721393034826e-07, + "loss": 0.5847, + "step": 341 + }, + { + "epoch": 136.8, + "grad_norm": 309474.125, + "learning_rate": 3.8670284938941657e-07, + "loss": 0.5926, + "step": 342 + }, + { + "epoch": 136.8, + "eval_accuracy": 0.7611940298507462, + "eval_f1": 0.8268852459016394, + "eval_loss": 0.5784401297569275, + "eval_precision": 0.8268852459016394, + "eval_recall": 0.8268852459016394, + "eval_runtime": 2.4914, + "eval_samples_per_second": 887.465, + "eval_steps_per_second": 0.803, + "step": 342 + }, + { + "epoch": 137.2, + "grad_norm": 255214.625, + "learning_rate": 3.8783355947535054e-07, + "loss": 0.5943, + "step": 343 + }, + { + "epoch": 137.6, + "grad_norm": 244797.453125, + "learning_rate": 3.8896426956128446e-07, + "loss": 0.5914, + "step": 344 + }, + { + "epoch": 138.0, + "grad_norm": 211911.59375, + "learning_rate": 3.9009497964721843e-07, + "loss": 0.5867, + "step": 345 + }, + { + "epoch": 138.0, + "eval_accuracy": 0.7607417458163727, + "eval_f1": 0.8263866097801116, + "eval_loss": 0.5767723321914673, + "eval_precision": 0.8272010512483574, + "eval_recall": 0.8255737704918032, + "eval_runtime": 2.5219, + "eval_samples_per_second": 876.734, + "eval_steps_per_second": 0.793, + "step": 345 + }, + { + "epoch": 138.4, + "grad_norm": 238862.828125, + "learning_rate": 3.912256897331524e-07, + "loss": 0.5872, + "step": 346 + }, + { + "epoch": 138.8, + "grad_norm": 233874.015625, + "learning_rate": 3.923563998190864e-07, + "loss": 0.5876, + "step": 347 + }, + { + "epoch": 138.8, + "eval_accuracy": 0.7634554500226142, + "eval_f1": 0.8282430213464697, + "eval_loss": 0.5756635665893555, + "eval_precision": 0.8296052631578947, + "eval_recall": 0.8268852459016394, + "eval_runtime": 2.5554, + "eval_samples_per_second": 865.21, + "eval_steps_per_second": 0.783, + "step": 347 + }, + { + "epoch": 139.2, + "grad_norm": 232289.125, + "learning_rate": 3.9348710990502035e-07, + "loss": 0.59, + "step": 348 + }, + { + "epoch": 139.6, + "grad_norm": 225021.859375, + "learning_rate": 3.946178199909543e-07, + "loss": 0.5874, + "step": 349 + }, + { + "epoch": 140.0, + "grad_norm": 318561.9375, + "learning_rate": 3.957485300768883e-07, + "loss": 0.5886, + "step": 350 + }, + { + "epoch": 140.0, + "eval_accuracy": 0.7688828584350973, + "eval_f1": 0.832623648869964, + "eval_loss": 0.5740185976028442, + "eval_precision": 0.8318062827225131, + "eval_recall": 0.8334426229508197, + "eval_runtime": 2.7829, + "eval_samples_per_second": 794.504, + "eval_steps_per_second": 0.719, + "step": 350 + }, + { + "epoch": 140.4, + "grad_norm": 246395.90625, + "learning_rate": 3.9687924016282227e-07, + "loss": 0.5798, + "step": 351 + }, + { + "epoch": 140.8, + "grad_norm": 220087.296875, + "learning_rate": 3.980099502487562e-07, + "loss": 0.5827, + "step": 352 + }, + { + "epoch": 140.8, + "eval_accuracy": 0.7697874265038445, + "eval_f1": 0.8334968923781485, + "eval_loss": 0.5729594230651855, + "eval_precision": 0.8315926892950392, + "eval_recall": 0.8354098360655737, + "eval_runtime": 2.5072, + "eval_samples_per_second": 881.86, + "eval_steps_per_second": 0.798, + "step": 352 + }, + { + "epoch": 141.2, + "grad_norm": 248733.953125, + "learning_rate": 3.9914066033469016e-07, + "loss": 0.5849, + "step": 353 + }, + { + "epoch": 141.6, + "grad_norm": 223019.4375, + "learning_rate": 4.0027137042062414e-07, + "loss": 0.5791, + "step": 354 + }, + { + "epoch": 142.0, + "grad_norm": 207991.953125, + "learning_rate": 4.014020805065581e-07, + "loss": 0.5871, + "step": 355 + }, + { + "epoch": 142.0, + "eval_accuracy": 0.7761194029850746, + "eval_f1": 0.8382881411303496, + "eval_loss": 0.5713860988616943, + "eval_precision": 0.8352864583333334, + "eval_recall": 0.8413114754098361, + "eval_runtime": 2.5049, + "eval_samples_per_second": 882.673, + "eval_steps_per_second": 0.798, + "step": 355 + }, + { + "epoch": 142.4, + "grad_norm": 213412.484375, + "learning_rate": 4.025327905924921e-07, + "loss": 0.5833, + "step": 356 + }, + { + "epoch": 142.8, + "grad_norm": 210475.453125, + "learning_rate": 4.0366350067842606e-07, + "loss": 0.5819, + "step": 357 + }, + { + "epoch": 142.8, + "eval_accuracy": 0.7743102668475803, + "eval_f1": 0.8368747956848643, + "eval_loss": 0.5703323483467102, + "eval_precision": 0.834419817470665, + "eval_recall": 0.839344262295082, + "eval_runtime": 2.5171, + "eval_samples_per_second": 878.391, + "eval_steps_per_second": 0.795, + "step": 357 + }, + { + "epoch": 143.2, + "grad_norm": 200121.875, + "learning_rate": 4.0479421076436003e-07, + "loss": 0.5709, + "step": 358 + }, + { + "epoch": 143.6, + "grad_norm": 225925.140625, + "learning_rate": 4.0592492085029395e-07, + "loss": 0.5763, + "step": 359 + }, + { + "epoch": 144.0, + "grad_norm": 244748.8125, + "learning_rate": 4.070556309362279e-07, + "loss": 0.5705, + "step": 360 + }, + { + "epoch": 144.0, + "eval_accuracy": 0.772501130710086, + "eval_f1": 0.8354596009159306, + "eval_loss": 0.5687353610992432, + "eval_precision": 0.8335509138381201, + "eval_recall": 0.8373770491803278, + "eval_runtime": 2.5212, + "eval_samples_per_second": 876.95, + "eval_steps_per_second": 0.793, + "step": 360 + }, + { + "epoch": 144.4, + "grad_norm": 265342.84375, + "learning_rate": 4.081863410221619e-07, + "loss": 0.5682, + "step": 361 + }, + { + "epoch": 144.8, + "grad_norm": 203337.0, + "learning_rate": 4.0931705110809587e-07, + "loss": 0.5792, + "step": 362 + }, + { + "epoch": 144.8, + "eval_accuracy": 0.7738579828132067, + "eval_f1": 0.8367080339647289, + "eval_loss": 0.5676863789558411, + "eval_precision": 0.8334417696811971, + "eval_recall": 0.84, + "eval_runtime": 2.5271, + "eval_samples_per_second": 874.926, + "eval_steps_per_second": 0.791, + "step": 362 + }, + { + "epoch": 145.2, + "grad_norm": 204154.828125, + "learning_rate": 4.1044776119402984e-07, + "loss": 0.5893, + "step": 363 + }, + { + "epoch": 145.6, + "grad_norm": 223704.296875, + "learning_rate": 4.115784712799638e-07, + "loss": 0.5783, + "step": 364 + }, + { + "epoch": 146.0, + "grad_norm": 251296.75, + "learning_rate": 4.127091813658978e-07, + "loss": 0.5612, + "step": 365 + }, + { + "epoch": 146.0, + "eval_accuracy": 0.7734056987788331, + "eval_f1": 0.8361138370951914, + "eval_loss": 0.5661319494247437, + "eval_precision": 0.8342036553524804, + "eval_recall": 0.8380327868852459, + "eval_runtime": 2.434, + "eval_samples_per_second": 908.388, + "eval_steps_per_second": 0.822, + "step": 365 + }, + { + "epoch": 146.4, + "grad_norm": 207324.359375, + "learning_rate": 4.1383989145183176e-07, + "loss": 0.5721, + "step": 366 + }, + { + "epoch": 146.8, + "grad_norm": 223203.78125, + "learning_rate": 4.149706015377657e-07, + "loss": 0.5734, + "step": 367 + }, + { + "epoch": 146.8, + "eval_accuracy": 0.7747625508819539, + "eval_f1": 0.8370418848167539, + "eval_loss": 0.5650973320007324, + "eval_precision": 0.8354016982364467, + "eval_recall": 0.838688524590164, + "eval_runtime": 2.4663, + "eval_samples_per_second": 896.47, + "eval_steps_per_second": 0.811, + "step": 367 + }, + { + "epoch": 147.2, + "grad_norm": 315914.4375, + "learning_rate": 4.1610131162369965e-07, + "loss": 0.5591, + "step": 368 + }, + { + "epoch": 147.6, + "grad_norm": 220799.109375, + "learning_rate": 4.172320217096337e-07, + "loss": 0.5618, + "step": 369 + }, + { + "epoch": 148.0, + "grad_norm": 221213.8125, + "learning_rate": 4.1836273179556765e-07, + "loss": 0.5894, + "step": 370 + }, + { + "epoch": 148.0, + "eval_accuracy": 0.777928539122569, + "eval_f1": 0.8394900294213795, + "eval_loss": 0.5635684132575989, + "eval_precision": 0.8370273794002607, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.5032, + "eval_samples_per_second": 883.26, + "eval_steps_per_second": 0.799, + "step": 370 + }, + { + "epoch": 148.4, + "grad_norm": 207135.640625, + "learning_rate": 4.194934418815016e-07, + "loss": 0.572, + "step": 371 + }, + { + "epoch": 148.8, + "grad_norm": 233456.703125, + "learning_rate": 4.206241519674356e-07, + "loss": 0.5618, + "step": 372 + }, + { + "epoch": 148.8, + "eval_accuracy": 0.7792853912256897, + "eval_f1": 0.8406270411495754, + "eval_loss": 0.5625642538070679, + "eval_precision": 0.8373454782042941, + "eval_recall": 0.8439344262295082, + "eval_runtime": 2.4774, + "eval_samples_per_second": 892.484, + "eval_steps_per_second": 0.807, + "step": 372 + }, + { + "epoch": 149.2, + "grad_norm": 228610.34375, + "learning_rate": 4.2175486205336957e-07, + "loss": 0.5697, + "step": 373 + }, + { + "epoch": 149.6, + "grad_norm": 205543.796875, + "learning_rate": 4.228855721393035e-07, + "loss": 0.5718, + "step": 374 + }, + { + "epoch": 150.0, + "grad_norm": 257913.8125, + "learning_rate": 4.2401628222523746e-07, + "loss": 0.5682, + "step": 375 + }, + { + "epoch": 150.0, + "eval_accuracy": 0.7819990954319312, + "eval_f1": 0.842483660130719, + "eval_loss": 0.5610406398773193, + "eval_precision": 0.8397394136807818, + "eval_recall": 0.8452459016393442, + "eval_runtime": 2.4791, + "eval_samples_per_second": 891.869, + "eval_steps_per_second": 0.807, + "step": 375 + }, + { + "epoch": 150.4, + "grad_norm": 219972.265625, + "learning_rate": 4.2514699231117144e-07, + "loss": 0.5708, + "step": 376 + }, + { + "epoch": 150.8, + "grad_norm": 213017.140625, + "learning_rate": 4.262777023971054e-07, + "loss": 0.5683, + "step": 377 + }, + { + "epoch": 150.8, + "eval_accuracy": 0.783355947535052, + "eval_f1": 0.8437194127243067, + "eval_loss": 0.5600233674049377, + "eval_precision": 0.8396103896103896, + "eval_recall": 0.8478688524590164, + "eval_runtime": 2.4855, + "eval_samples_per_second": 889.551, + "eval_steps_per_second": 0.805, + "step": 377 + }, + { + "epoch": 151.2, + "grad_norm": 205141.96875, + "learning_rate": 4.274084124830394e-07, + "loss": 0.5716, + "step": 378 + }, + { + "epoch": 151.6, + "grad_norm": 239268.078125, + "learning_rate": 4.2853912256897335e-07, + "loss": 0.5645, + "step": 379 + }, + { + "epoch": 152.0, + "grad_norm": 240900.03125, + "learning_rate": 4.2966983265490733e-07, + "loss": 0.5672, + "step": 380 + }, + { + "epoch": 152.0, + "eval_accuracy": 0.7869742198100407, + "eval_f1": 0.8466297622924129, + "eval_loss": 0.5585131645202637, + "eval_precision": 0.8408796895213454, + "eval_recall": 0.8524590163934426, + "eval_runtime": 2.4914, + "eval_samples_per_second": 887.456, + "eval_steps_per_second": 0.803, + "step": 380 + }, + { + "epoch": 152.4, + "grad_norm": 199751.296875, + "learning_rate": 4.3080054274084125e-07, + "loss": 0.5682, + "step": 381 + }, + { + "epoch": 152.8, + "grad_norm": 213552.8125, + "learning_rate": 4.319312528267752e-07, + "loss": 0.5708, + "step": 382 + }, + { + "epoch": 152.8, + "eval_accuracy": 0.7860696517412935, + "eval_f1": 0.8457776328659928, + "eval_loss": 0.5575217008590698, + "eval_precision": 0.8411154345006485, + "eval_recall": 0.8504918032786886, + "eval_runtime": 2.4946, + "eval_samples_per_second": 886.302, + "eval_steps_per_second": 0.802, + "step": 382 + }, + { + "epoch": 153.2, + "grad_norm": 212492.875, + "learning_rate": 4.330619629127092e-07, + "loss": 0.568, + "step": 383 + }, + { + "epoch": 153.6, + "grad_norm": 215492.6875, + "learning_rate": 4.3419267299864317e-07, + "loss": 0.572, + "step": 384 + }, + { + "epoch": 154.0, + "grad_norm": 262360.0, + "learning_rate": 4.3532338308457714e-07, + "loss": 0.5571, + "step": 385 + }, + { + "epoch": 154.0, + "eval_accuracy": 0.7842605156037992, + "eval_f1": 0.8437602358336063, + "eval_loss": 0.5560232400894165, + "eval_precision": 0.8429319371727748, + "eval_recall": 0.8445901639344262, + "eval_runtime": 2.4882, + "eval_samples_per_second": 888.598, + "eval_steps_per_second": 0.804, + "step": 385 + }, + { + "epoch": 154.4, + "grad_norm": 216765.359375, + "learning_rate": 4.364540931705111e-07, + "loss": 0.5599, + "step": 386 + }, + { + "epoch": 154.8, + "grad_norm": 226638.59375, + "learning_rate": 4.375848032564451e-07, + "loss": 0.561, + "step": 387 + }, + { + "epoch": 154.8, + "eval_accuracy": 0.7829036635006784, + "eval_f1": 0.8427260812581914, + "eval_loss": 0.5549867749214172, + "eval_precision": 0.842174197773412, + "eval_recall": 0.8432786885245902, + "eval_runtime": 2.5021, + "eval_samples_per_second": 883.661, + "eval_steps_per_second": 0.799, + "step": 387 + }, + { + "epoch": 155.2, + "grad_norm": 227603.171875, + "learning_rate": 4.3871551334237906e-07, + "loss": 0.5622, + "step": 388 + }, + { + "epoch": 155.6, + "grad_norm": 202963.296875, + "learning_rate": 4.39846223428313e-07, + "loss": 0.5611, + "step": 389 + }, + { + "epoch": 156.0, + "grad_norm": 249987.25, + "learning_rate": 4.4097693351424695e-07, + "loss": 0.5727, + "step": 390 + }, + { + "epoch": 156.0, + "eval_accuracy": 0.7838082315694256, + "eval_f1": 0.8432786885245902, + "eval_loss": 0.5534038543701172, + "eval_precision": 0.8432786885245902, + "eval_recall": 0.8432786885245902, + "eval_runtime": 2.5024, + "eval_samples_per_second": 883.567, + "eval_steps_per_second": 0.799, + "step": 390 + }, + { + "epoch": 156.4, + "grad_norm": 217987.90625, + "learning_rate": 4.421076436001809e-07, + "loss": 0.5609, + "step": 391 + }, + { + "epoch": 156.8, + "grad_norm": 212561.125, + "learning_rate": 4.432383536861149e-07, + "loss": 0.5609, + "step": 392 + }, + { + "epoch": 156.8, + "eval_accuracy": 0.7847127996381728, + "eval_f1": 0.8435239973701513, + "eval_loss": 0.5523592233657837, + "eval_precision": 0.8457481872116018, + "eval_recall": 0.8413114754098361, + "eval_runtime": 2.4853, + "eval_samples_per_second": 889.633, + "eval_steps_per_second": 0.805, + "step": 392 + }, + { + "epoch": 157.2, + "grad_norm": 306891.65625, + "learning_rate": 4.4436906377204887e-07, + "loss": 0.5469, + "step": 393 + }, + { + "epoch": 157.6, + "grad_norm": 217219.0625, + "learning_rate": 4.4549977385798284e-07, + "loss": 0.5633, + "step": 394 + }, + { + "epoch": 158.0, + "grad_norm": 242299.203125, + "learning_rate": 4.466304839439168e-07, + "loss": 0.5558, + "step": 395 + }, + { + "epoch": 158.0, + "eval_accuracy": 0.7842605156037992, + "eval_f1": 0.8434525763045618, + "eval_loss": 0.5507713556289673, + "eval_precision": 0.8442838370565046, + "eval_recall": 0.8426229508196721, + "eval_runtime": 2.477, + "eval_samples_per_second": 892.604, + "eval_steps_per_second": 0.807, + "step": 395 + }, + { + "epoch": 158.4, + "grad_norm": 231001.5, + "learning_rate": 4.4776119402985074e-07, + "loss": 0.5522, + "step": 396 + }, + { + "epoch": 158.8, + "grad_norm": 214909.875, + "learning_rate": 4.488919041157847e-07, + "loss": 0.5656, + "step": 397 + }, + { + "epoch": 158.8, + "eval_accuracy": 0.7874265038444143, + "eval_f1": 0.8462041884816754, + "eval_loss": 0.5497022271156311, + "eval_precision": 0.8445460483344219, + "eval_recall": 0.8478688524590164, + "eval_runtime": 2.4755, + "eval_samples_per_second": 893.148, + "eval_steps_per_second": 0.808, + "step": 397 + }, + { + "epoch": 159.2, + "grad_norm": 222137.5625, + "learning_rate": 4.500226142017187e-07, + "loss": 0.5583, + "step": 398 + }, + { + "epoch": 159.6, + "grad_norm": 247690.078125, + "learning_rate": 4.5115332428765265e-07, + "loss": 0.5508, + "step": 399 + }, + { + "epoch": 160.0, + "grad_norm": 211568.90625, + "learning_rate": 4.5228403437358663e-07, + "loss": 0.5617, + "step": 400 + }, + { + "epoch": 160.0, + "eval_accuracy": 0.7905924920850294, + "eval_f1": 0.8488410055501142, + "eval_loss": 0.548139750957489, + "eval_precision": 0.8452535760728218, + "eval_recall": 0.8524590163934426, + "eval_runtime": 2.4692, + "eval_samples_per_second": 895.434, + "eval_steps_per_second": 0.81, + "step": 400 + }, + { + "epoch": 160.4, + "grad_norm": 230823.421875, + "learning_rate": 4.534147444595206e-07, + "loss": 0.5562, + "step": 401 + }, + { + "epoch": 160.8, + "grad_norm": 201258.375, + "learning_rate": 4.5454545454545457e-07, + "loss": 0.5555, + "step": 402 + }, + { + "epoch": 160.8, + "eval_accuracy": 0.7914970601537765, + "eval_f1": 0.8497882046269143, + "eval_loss": 0.5470873117446899, + "eval_precision": 0.844559585492228, + "eval_recall": 0.8550819672131148, + "eval_runtime": 2.5077, + "eval_samples_per_second": 881.676, + "eval_steps_per_second": 0.798, + "step": 402 + }, + { + "epoch": 161.2, + "grad_norm": 242294.515625, + "learning_rate": 4.5567616463138855e-07, + "loss": 0.5572, + "step": 403 + }, + { + "epoch": 161.6, + "grad_norm": 218842.609375, + "learning_rate": 4.5680687471732247e-07, + "loss": 0.559, + "step": 404 + }, + { + "epoch": 162.0, + "grad_norm": 263410.21875, + "learning_rate": 4.5793758480325644e-07, + "loss": 0.5371, + "step": 405 + }, + { + "epoch": 162.0, + "eval_accuracy": 0.7937584803256446, + "eval_f1": 0.8513689700130378, + "eval_loss": 0.5455149412155151, + "eval_precision": 0.846403110823072, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.4348, + "eval_samples_per_second": 908.086, + "eval_steps_per_second": 0.821, + "step": 405 + }, + { + "epoch": 162.4, + "grad_norm": 203526.765625, + "learning_rate": 4.590682948891904e-07, + "loss": 0.5556, + "step": 406 + }, + { + "epoch": 162.8, + "grad_norm": 214933.140625, + "learning_rate": 4.601990049751244e-07, + "loss": 0.5553, + "step": 407 + }, + { + "epoch": 162.8, + "eval_accuracy": 0.7937584803256446, + "eval_f1": 0.8513689700130378, + "eval_loss": 0.5444439649581909, + "eval_precision": 0.846403110823072, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.4684, + "eval_samples_per_second": 895.716, + "eval_steps_per_second": 0.81, + "step": 407 + }, + { + "epoch": 163.2, + "grad_norm": 233460.765625, + "learning_rate": 4.6132971506105836e-07, + "loss": 0.5597, + "step": 408 + }, + { + "epoch": 163.6, + "grad_norm": 216099.515625, + "learning_rate": 4.6246042514699233e-07, + "loss": 0.5483, + "step": 409 + }, + { + "epoch": 164.0, + "grad_norm": 262248.125, + "learning_rate": 4.635911352329263e-07, + "loss": 0.5634, + "step": 410 + }, + { + "epoch": 164.0, + "eval_accuracy": 0.7946630483943917, + "eval_f1": 0.8516339869281045, + "eval_loss": 0.5428987145423889, + "eval_precision": 0.8488599348534202, + "eval_recall": 0.8544262295081967, + "eval_runtime": 2.4918, + "eval_samples_per_second": 887.297, + "eval_steps_per_second": 0.803, + "step": 410 + }, + { + "epoch": 164.4, + "grad_norm": 200545.140625, + "learning_rate": 4.647218453188602e-07, + "loss": 0.5545, + "step": 411 + }, + { + "epoch": 164.8, + "grad_norm": 204986.21875, + "learning_rate": 4.658525554047942e-07, + "loss": 0.5534, + "step": 412 + }, + { + "epoch": 164.8, + "eval_accuracy": 0.7924016282225237, + "eval_f1": 0.8494588389635946, + "eval_loss": 0.5419096946716309, + "eval_precision": 0.8497375328083989, + "eval_recall": 0.8491803278688524, + "eval_runtime": 2.8042, + "eval_samples_per_second": 788.469, + "eval_steps_per_second": 0.713, + "step": 412 + }, + { + "epoch": 165.2, + "grad_norm": 234929.875, + "learning_rate": 4.6698326549072817e-07, + "loss": 0.5626, + "step": 413 + }, + { + "epoch": 165.6, + "grad_norm": 245492.96875, + "learning_rate": 4.6811397557666214e-07, + "loss": 0.5421, + "step": 414 + }, + { + "epoch": 166.0, + "grad_norm": 235446.71875, + "learning_rate": 4.692446856625961e-07, + "loss": 0.5587, + "step": 415 + }, + { + "epoch": 166.0, + "eval_accuracy": 0.7896879240162822, + "eval_f1": 0.8471902727571475, + "eval_loss": 0.5404443740844727, + "eval_precision": 0.8491436100131752, + "eval_recall": 0.8452459016393442, + "eval_runtime": 2.4948, + "eval_samples_per_second": 886.253, + "eval_steps_per_second": 0.802, + "step": 415 + }, + { + "epoch": 166.4, + "grad_norm": 272630.84375, + "learning_rate": 4.703753957485301e-07, + "loss": 0.5409, + "step": 416 + }, + { + "epoch": 166.8, + "grad_norm": 208385.078125, + "learning_rate": 4.7150610583446406e-07, + "loss": 0.5498, + "step": 417 + }, + { + "epoch": 166.8, + "eval_accuracy": 0.7919493441881501, + "eval_f1": 0.8491803278688524, + "eval_loss": 0.5394257307052612, + "eval_precision": 0.8491803278688524, + "eval_recall": 0.8491803278688524, + "eval_runtime": 2.509, + "eval_samples_per_second": 881.219, + "eval_steps_per_second": 0.797, + "step": 417 + }, + { + "epoch": 167.2, + "grad_norm": 205137.46875, + "learning_rate": 4.72636815920398e-07, + "loss": 0.5577, + "step": 418 + }, + { + "epoch": 167.6, + "grad_norm": 204032.609375, + "learning_rate": 4.7376752600633195e-07, + "loss": 0.5444, + "step": 419 + }, + { + "epoch": 168.0, + "grad_norm": 219412.8125, + "learning_rate": 4.7489823609226593e-07, + "loss": 0.5427, + "step": 420 + }, + { + "epoch": 168.0, + "eval_accuracy": 0.7946630483943917, + "eval_f1": 0.8517308948399739, + "eval_loss": 0.53790283203125, + "eval_precision": 0.8484059856864021, + "eval_recall": 0.8550819672131148, + "eval_runtime": 2.4876, + "eval_samples_per_second": 888.818, + "eval_steps_per_second": 0.804, + "step": 420 + }, + { + "epoch": 168.4, + "grad_norm": 209186.84375, + "learning_rate": 4.760289461781999e-07, + "loss": 0.5437, + "step": 421 + }, + { + "epoch": 168.8, + "grad_norm": 222802.6875, + "learning_rate": 4.771596562641338e-07, + "loss": 0.5479, + "step": 422 + }, + { + "epoch": 168.8, + "eval_accuracy": 0.7973767526006332, + "eval_f1": 0.8537859007832899, + "eval_loss": 0.5368810892105103, + "eval_precision": 0.8499025341130604, + "eval_recall": 0.8577049180327869, + "eval_runtime": 2.4978, + "eval_samples_per_second": 885.174, + "eval_steps_per_second": 0.801, + "step": 422 + }, + { + "epoch": 169.2, + "grad_norm": 262970.1875, + "learning_rate": 4.782903663500678e-07, + "loss": 0.5484, + "step": 423 + }, + { + "epoch": 169.6, + "grad_norm": 226655.203125, + "learning_rate": 4.794210764360018e-07, + "loss": 0.5437, + "step": 424 + }, + { + "epoch": 170.0, + "grad_norm": 210747.578125, + "learning_rate": 4.805517865219358e-07, + "loss": 0.5465, + "step": 425 + }, + { + "epoch": 170.0, + "eval_accuracy": 0.7978290366350068, + "eval_f1": 0.8544448062520351, + "eval_loss": 0.5353887677192688, + "eval_precision": 0.8486416558861578, + "eval_recall": 0.860327868852459, + "eval_runtime": 2.5104, + "eval_samples_per_second": 880.737, + "eval_steps_per_second": 0.797, + "step": 425 + }, + { + "epoch": 170.4, + "grad_norm": 228828.96875, + "learning_rate": 4.816824966078697e-07, + "loss": 0.543, + "step": 426 + }, + { + "epoch": 170.8, + "grad_norm": 233804.828125, + "learning_rate": 4.828132066938037e-07, + "loss": 0.5362, + "step": 427 + }, + { + "epoch": 170.8, + "eval_accuracy": 0.7978290366350068, + "eval_f1": 0.8543499511241447, + "eval_loss": 0.5343641042709351, + "eval_precision": 0.8490932642487047, + "eval_recall": 0.8596721311475409, + "eval_runtime": 2.5154, + "eval_samples_per_second": 878.998, + "eval_steps_per_second": 0.795, + "step": 427 + }, + { + "epoch": 171.2, + "grad_norm": 254422.46875, + "learning_rate": 4.839439167797377e-07, + "loss": 0.5287, + "step": 428 + }, + { + "epoch": 171.6, + "grad_norm": 204172.34375, + "learning_rate": 4.850746268656716e-07, + "loss": 0.5488, + "step": 429 + }, + { + "epoch": 172.0, + "grad_norm": 254852.96875, + "learning_rate": 4.862053369516056e-07, + "loss": 0.5338, + "step": 430 + }, + { + "epoch": 172.0, + "eval_accuracy": 0.798733604703754, + "eval_f1": 0.8543371522094927, + "eval_loss": 0.5328470468521118, + "eval_precision": 0.8529411764705882, + "eval_recall": 0.8557377049180328, + "eval_runtime": 2.4911, + "eval_samples_per_second": 887.564, + "eval_steps_per_second": 0.803, + "step": 430 + }, + { + "epoch": 172.4, + "grad_norm": 213317.859375, + "learning_rate": 4.873360470375395e-07, + "loss": 0.547, + "step": 431 + }, + { + "epoch": 172.8, + "grad_norm": 221640.984375, + "learning_rate": 4.884667571234735e-07, + "loss": 0.5452, + "step": 432 + }, + { + "epoch": 172.8, + "eval_accuracy": 0.7960199004975125, + "eval_f1": 0.8516935218678067, + "eval_loss": 0.5318477749824524, + "eval_precision": 0.854221635883905, + "eval_recall": 0.8491803278688524, + "eval_runtime": 2.5181, + "eval_samples_per_second": 878.04, + "eval_steps_per_second": 0.794, + "step": 432 + }, + { + "epoch": 173.2, + "grad_norm": 275340.4375, + "learning_rate": 4.895974672094075e-07, + "loss": 0.5418, + "step": 433 + }, + { + "epoch": 173.6, + "grad_norm": 252673.578125, + "learning_rate": 4.907281772953415e-07, + "loss": 0.5378, + "step": 434 + }, + { + "epoch": 174.0, + "grad_norm": 318623.96875, + "learning_rate": 4.918588873812754e-07, + "loss": 0.5307, + "step": 435 + }, + { + "epoch": 174.0, + "eval_accuracy": 0.798733604703754, + "eval_f1": 0.8540505083633978, + "eval_loss": 0.5303488373756409, + "eval_precision": 0.8543307086614174, + "eval_recall": 0.8537704918032787, + "eval_runtime": 3.0401, + "eval_samples_per_second": 727.277, + "eval_steps_per_second": 0.658, + "step": 435 + }, + { + "epoch": 174.4, + "grad_norm": 198950.0, + "learning_rate": 4.929895974672094e-07, + "loss": 0.5384, + "step": 436 + }, + { + "epoch": 174.8, + "grad_norm": 217245.015625, + "learning_rate": 4.941203075531434e-07, + "loss": 0.5373, + "step": 437 + }, + { + "epoch": 174.8, + "eval_accuracy": 0.798733604703754, + "eval_f1": 0.8545276234063419, + "eval_loss": 0.5293256044387817, + "eval_precision": 0.8520208604954368, + "eval_recall": 0.8570491803278688, + "eval_runtime": 2.5087, + "eval_samples_per_second": 881.323, + "eval_steps_per_second": 0.797, + "step": 437 + }, + { + "epoch": 175.2, + "grad_norm": 263284.71875, + "learning_rate": 4.952510176390773e-07, + "loss": 0.5337, + "step": 438 + }, + { + "epoch": 175.6, + "grad_norm": 204618.203125, + "learning_rate": 4.963817277250113e-07, + "loss": 0.5323, + "step": 439 + }, + { + "epoch": 176.0, + "grad_norm": 277305.75, + "learning_rate": 4.975124378109452e-07, + "loss": 0.5379, + "step": 440 + }, + { + "epoch": 176.0, + "eval_accuracy": 0.8000904568068747, + "eval_f1": 0.8558382257012394, + "eval_loss": 0.5278460383415222, + "eval_precision": 0.8513951979234263, + "eval_recall": 0.860327868852459, + "eval_runtime": 2.5357, + "eval_samples_per_second": 871.936, + "eval_steps_per_second": 0.789, + "step": 440 + }, + { + "epoch": 176.4, + "grad_norm": 219414.78125, + "learning_rate": 4.986431478968793e-07, + "loss": 0.531, + "step": 441 + }, + { + "epoch": 176.8, + "grad_norm": 209936.75, + "learning_rate": 4.997738579828132e-07, + "loss": 0.5325, + "step": 442 + }, + { + "epoch": 176.8, + "eval_accuracy": 0.8005427408412483, + "eval_f1": 0.8560235063663075, + "eval_loss": 0.5268487930297852, + "eval_precision": 0.852405721716515, + "eval_recall": 0.8596721311475409, + "eval_runtime": 2.5339, + "eval_samples_per_second": 872.561, + "eval_steps_per_second": 0.789, + "step": 442 + }, + { + "epoch": 177.2, + "grad_norm": 256585.703125, + "learning_rate": 5.009045680687472e-07, + "loss": 0.5276, + "step": 443 + }, + { + "epoch": 177.6, + "grad_norm": 218345.6875, + "learning_rate": 5.020352781546811e-07, + "loss": 0.5292, + "step": 444 + }, + { + "epoch": 178.0, + "grad_norm": 223323.90625, + "learning_rate": 5.03165988240615e-07, + "loss": 0.5395, + "step": 445 + }, + { + "epoch": 178.0, + "eval_accuracy": 0.8009950248756219, + "eval_f1": 0.856020942408377, + "eval_loss": 0.5253640413284302, + "eval_precision": 0.8543435662965382, + "eval_recall": 0.8577049180327869, + "eval_runtime": 2.4551, + "eval_samples_per_second": 900.58, + "eval_steps_per_second": 0.815, + "step": 445 + }, + { + "epoch": 178.4, + "grad_norm": 216661.640625, + "learning_rate": 5.042966983265491e-07, + "loss": 0.5323, + "step": 446 + }, + { + "epoch": 178.8, + "grad_norm": 239189.953125, + "learning_rate": 5.05427408412483e-07, + "loss": 0.5355, + "step": 447 + }, + { + "epoch": 178.8, + "eval_accuracy": 0.8009950248756219, + "eval_f1": 0.8557377049180328, + "eval_loss": 0.5243934392929077, + "eval_precision": 0.8557377049180328, + "eval_recall": 0.8557377049180328, + "eval_runtime": 2.4973, + "eval_samples_per_second": 885.35, + "eval_steps_per_second": 0.801, + "step": 447 + }, + { + "epoch": 179.2, + "grad_norm": 317579.5, + "learning_rate": 5.06558118498417e-07, + "loss": 0.5134, + "step": 448 + }, + { + "epoch": 179.6, + "grad_norm": 227783.15625, + "learning_rate": 5.076888285843509e-07, + "loss": 0.5373, + "step": 449 + }, + { + "epoch": 180.0, + "grad_norm": 209218.421875, + "learning_rate": 5.08819538670285e-07, + "loss": 0.5375, + "step": 450 + }, + { + "epoch": 180.0, + "eval_accuracy": 0.8005427408412483, + "eval_f1": 0.8555519161480512, + "eval_loss": 0.5228968858718872, + "eval_precision": 0.8547120418848168, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.5009, + "eval_samples_per_second": 884.072, + "eval_steps_per_second": 0.8, + "step": 450 + }, + { + "epoch": 180.4, + "grad_norm": 224249.1875, + "learning_rate": 5.099502487562189e-07, + "loss": 0.5271, + "step": 451 + }, + { + "epoch": 180.8, + "grad_norm": 200887.03125, + "learning_rate": 5.110809588421528e-07, + "loss": 0.5353, + "step": 452 + }, + { + "epoch": 180.8, + "eval_accuracy": 0.8009950248756219, + "eval_f1": 0.8558322411533421, + "eval_loss": 0.5219218730926514, + "eval_precision": 0.8552717747216765, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.5188, + "eval_samples_per_second": 877.783, + "eval_steps_per_second": 0.794, + "step": 452 + }, + { + "epoch": 181.2, + "grad_norm": 230493.28125, + "learning_rate": 5.122116689280868e-07, + "loss": 0.5299, + "step": 453 + }, + { + "epoch": 181.6, + "grad_norm": 224306.234375, + "learning_rate": 5.133423790140207e-07, + "loss": 0.53, + "step": 454 + }, + { + "epoch": 182.0, + "grad_norm": 250761.015625, + "learning_rate": 5.144730890999548e-07, + "loss": 0.5275, + "step": 455 + }, + { + "epoch": 182.0, + "eval_accuracy": 0.7991858887381276, + "eval_f1": 0.8542350623768877, + "eval_loss": 0.5203860402107239, + "eval_precision": 0.8553583168967784, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.7743, + "eval_samples_per_second": 796.944, + "eval_steps_per_second": 0.721, + "step": 455 + }, + { + "epoch": 182.4, + "grad_norm": 221164.53125, + "learning_rate": 5.156037991858887e-07, + "loss": 0.5236, + "step": 456 + }, + { + "epoch": 182.8, + "grad_norm": 252011.53125, + "learning_rate": 5.167345092718227e-07, + "loss": 0.5348, + "step": 457 + }, + { + "epoch": 182.8, + "eval_accuracy": 0.7996381727725012, + "eval_f1": 0.8545155993431856, + "eval_loss": 0.5193630456924438, + "eval_precision": 0.8559210526315789, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.5358, + "eval_samples_per_second": 871.906, + "eval_steps_per_second": 0.789, + "step": 457 + }, + { + "epoch": 183.2, + "grad_norm": 293829.875, + "learning_rate": 5.178652193577566e-07, + "loss": 0.5212, + "step": 458 + }, + { + "epoch": 183.6, + "grad_norm": 209784.625, + "learning_rate": 5.189959294436906e-07, + "loss": 0.5325, + "step": 459 + }, + { + "epoch": 184.0, + "grad_norm": 274777.0, + "learning_rate": 5.201266395296246e-07, + "loss": 0.5169, + "step": 460 + }, + { + "epoch": 184.0, + "eval_accuracy": 0.8005427408412483, + "eval_f1": 0.8557409224730128, + "eval_loss": 0.5179092884063721, + "eval_precision": 0.8537859007832899, + "eval_recall": 0.8577049180327869, + "eval_runtime": 2.5059, + "eval_samples_per_second": 882.33, + "eval_steps_per_second": 0.798, + "step": 460 + }, + { + "epoch": 184.4, + "grad_norm": 224043.984375, + "learning_rate": 5.212573496155585e-07, + "loss": 0.5211, + "step": 461 + }, + { + "epoch": 184.8, + "grad_norm": 229075.34375, + "learning_rate": 5.223880597014925e-07, + "loss": 0.5172, + "step": 462 + }, + { + "epoch": 184.8, + "eval_accuracy": 0.8023518769787427, + "eval_f1": 0.8576083414793092, + "eval_loss": 0.5169763565063477, + "eval_precision": 0.8523316062176166, + "eval_recall": 0.8629508196721312, + "eval_runtime": 2.5363, + "eval_samples_per_second": 871.752, + "eval_steps_per_second": 0.789, + "step": 462 + }, + { + "epoch": 185.2, + "grad_norm": 284090.65625, + "learning_rate": 5.235187697874264e-07, + "loss": 0.522, + "step": 463 + }, + { + "epoch": 185.6, + "grad_norm": 237456.28125, + "learning_rate": 5.246494798733605e-07, + "loss": 0.5271, + "step": 464 + }, + { + "epoch": 186.0, + "grad_norm": 320703.125, + "learning_rate": 5.257801899592944e-07, + "loss": 0.5311, + "step": 465 + }, + { + "epoch": 186.0, + "eval_accuracy": 0.804161013116237, + "eval_f1": 0.8591869918699186, + "eval_loss": 0.5156320929527283, + "eval_precision": 0.8522580645161291, + "eval_recall": 0.8662295081967213, + "eval_runtime": 2.5172, + "eval_samples_per_second": 878.346, + "eval_steps_per_second": 0.795, + "step": 465 + }, + { + "epoch": 186.4, + "grad_norm": 194421.125, + "learning_rate": 5.269109000452283e-07, + "loss": 0.5226, + "step": 466 + }, + { + "epoch": 186.8, + "grad_norm": 297692.53125, + "learning_rate": 5.280416101311623e-07, + "loss": 0.5287, + "step": 467 + }, + { + "epoch": 186.8, + "eval_accuracy": 0.8028041610131162, + "eval_f1": 0.8577023498694517, + "eval_loss": 0.5147150158882141, + "eval_precision": 0.8538011695906432, + "eval_recall": 0.861639344262295, + "eval_runtime": 2.5402, + "eval_samples_per_second": 870.399, + "eval_steps_per_second": 0.787, + "step": 467 + }, + { + "epoch": 187.2, + "grad_norm": 243043.96875, + "learning_rate": 5.291723202170963e-07, + "loss": 0.5271, + "step": 468 + }, + { + "epoch": 187.6, + "grad_norm": 212785.734375, + "learning_rate": 5.303030303030304e-07, + "loss": 0.5208, + "step": 469 + }, + { + "epoch": 188.0, + "grad_norm": 276103.9375, + "learning_rate": 5.314337403889643e-07, + "loss": 0.524, + "step": 470 + }, + { + "epoch": 188.0, + "eval_accuracy": 0.798733604703754, + "eval_f1": 0.8533772652388797, + "eval_loss": 0.5134932398796082, + "eval_precision": 0.8576158940397351, + "eval_recall": 0.8491803278688524, + "eval_runtime": 2.5412, + "eval_samples_per_second": 870.069, + "eval_steps_per_second": 0.787, + "step": 470 + }, + { + "epoch": 188.4, + "grad_norm": 224670.828125, + "learning_rate": 5.325644504748983e-07, + "loss": 0.521, + "step": 471 + }, + { + "epoch": 188.8, + "grad_norm": 249966.90625, + "learning_rate": 5.336951605608323e-07, + "loss": 0.5198, + "step": 472 + }, + { + "epoch": 188.8, + "eval_accuracy": 0.7978290366350068, + "eval_f1": 0.8524265434136679, + "eval_loss": 0.5126535892486572, + "eval_precision": 0.8583776595744681, + "eval_recall": 0.8465573770491803, + "eval_runtime": 2.5216, + "eval_samples_per_second": 876.837, + "eval_steps_per_second": 0.793, + "step": 472 + }, + { + "epoch": 189.2, + "grad_norm": 506244.1875, + "learning_rate": 5.348258706467663e-07, + "loss": 0.5182, + "step": 473 + }, + { + "epoch": 189.6, + "grad_norm": 224370.671875, + "learning_rate": 5.359565807327002e-07, + "loss": 0.5257, + "step": 474 + }, + { + "epoch": 190.0, + "grad_norm": 302643.78125, + "learning_rate": 5.370872908186341e-07, + "loss": 0.5198, + "step": 475 + }, + { + "epoch": 190.0, + "eval_accuracy": 0.8018995929443691, + "eval_f1": 0.8563934426229508, + "eval_loss": 0.5112202167510986, + "eval_precision": 0.8563934426229508, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.7807, + "eval_samples_per_second": 795.114, + "eval_steps_per_second": 0.719, + "step": 475 + }, + { + "epoch": 190.4, + "grad_norm": 208851.203125, + "learning_rate": 5.382180009045681e-07, + "loss": 0.5094, + "step": 476 + }, + { + "epoch": 190.8, + "grad_norm": 216616.171875, + "learning_rate": 5.393487109905021e-07, + "loss": 0.5065, + "step": 477 + }, + { + "epoch": 190.8, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8608667318344738, + "eval_loss": 0.5103092193603516, + "eval_precision": 0.8555699481865285, + "eval_recall": 0.8662295081967213, + "eval_runtime": 2.516, + "eval_samples_per_second": 878.787, + "eval_steps_per_second": 0.795, + "step": 477 + }, + { + "epoch": 191.2, + "grad_norm": 192390.78125, + "learning_rate": 5.404794210764361e-07, + "loss": 0.5294, + "step": 478 + }, + { + "epoch": 191.6, + "grad_norm": 231283.171875, + "learning_rate": 5.4161013116237e-07, + "loss": 0.5121, + "step": 479 + }, + { + "epoch": 192.0, + "grad_norm": 292600.03125, + "learning_rate": 5.42740841248304e-07, + "loss": 0.523, + "step": 480 + }, + { + "epoch": 192.0, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8614183474300585, + "eval_loss": 0.5089964270591736, + "eval_precision": 0.854744996772111, + "eval_recall": 0.8681967213114754, + "eval_runtime": 2.5185, + "eval_samples_per_second": 877.915, + "eval_steps_per_second": 0.794, + "step": 480 + }, + { + "epoch": 192.4, + "grad_norm": 412421.09375, + "learning_rate": 5.43871551334238e-07, + "loss": 0.5278, + "step": 481 + }, + { + "epoch": 192.8, + "grad_norm": 216862.015625, + "learning_rate": 5.450022614201719e-07, + "loss": 0.515, + "step": 482 + }, + { + "epoch": 192.8, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8600326264274062, + "eval_loss": 0.5081061720848083, + "eval_precision": 0.8558441558441559, + "eval_recall": 0.8642622950819672, + "eval_runtime": 2.513, + "eval_samples_per_second": 879.821, + "eval_steps_per_second": 0.796, + "step": 482 + }, + { + "epoch": 193.2, + "grad_norm": 242027.171875, + "learning_rate": 5.461329715061059e-07, + "loss": 0.5105, + "step": 483 + }, + { + "epoch": 193.6, + "grad_norm": 219185.734375, + "learning_rate": 5.472636815920398e-07, + "loss": 0.5173, + "step": 484 + }, + { + "epoch": 194.0, + "grad_norm": 236967.1875, + "learning_rate": 5.483943916779738e-07, + "loss": 0.5118, + "step": 485 + }, + { + "epoch": 194.0, + "eval_accuracy": 0.8000904568068747, + "eval_f1": 0.85498687664042, + "eval_loss": 0.506868839263916, + "eval_precision": 0.855548260013132, + "eval_recall": 0.8544262295081967, + "eval_runtime": 2.5512, + "eval_samples_per_second": 866.656, + "eval_steps_per_second": 0.784, + "step": 485 + }, + { + "epoch": 194.4, + "grad_norm": 233484.84375, + "learning_rate": 5.495251017639078e-07, + "loss": 0.5178, + "step": 486 + }, + { + "epoch": 194.8, + "grad_norm": 315429.78125, + "learning_rate": 5.506558118498418e-07, + "loss": 0.5077, + "step": 487 + }, + { + "epoch": 194.8, + "eval_accuracy": 0.7991858887381276, + "eval_f1": 0.8539473684210527, + "eval_loss": 0.506022036075592, + "eval_precision": 0.8567656765676568, + "eval_recall": 0.8511475409836066, + "eval_runtime": 2.4885, + "eval_samples_per_second": 888.48, + "eval_steps_per_second": 0.804, + "step": 487 + }, + { + "epoch": 195.2, + "grad_norm": 380493.71875, + "learning_rate": 5.517865219357757e-07, + "loss": 0.5266, + "step": 488 + }, + { + "epoch": 195.6, + "grad_norm": 337072.21875, + "learning_rate": 5.529172320217096e-07, + "loss": 0.5058, + "step": 489 + }, + { + "epoch": 196.0, + "grad_norm": 270835.9375, + "learning_rate": 5.540479421076437e-07, + "loss": 0.5071, + "step": 490 + }, + { + "epoch": 196.0, + "eval_accuracy": 0.8037087290818634, + "eval_f1": 0.8579842931937173, + "eval_loss": 0.5047447085380554, + "eval_precision": 0.8563030698889614, + "eval_recall": 0.8596721311475409, + "eval_runtime": 2.4874, + "eval_samples_per_second": 888.862, + "eval_steps_per_second": 0.804, + "step": 490 + }, + { + "epoch": 196.4, + "grad_norm": 201281.53125, + "learning_rate": 5.551786521935776e-07, + "loss": 0.5151, + "step": 491 + }, + { + "epoch": 196.8, + "grad_norm": 244919.53125, + "learning_rate": 5.563093622795116e-07, + "loss": 0.5022, + "step": 492 + }, + { + "epoch": 196.8, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.8613376835236541, + "eval_loss": 0.5039097666740417, + "eval_precision": 0.8571428571428571, + "eval_recall": 0.8655737704918033, + "eval_runtime": 2.5014, + "eval_samples_per_second": 883.916, + "eval_steps_per_second": 0.8, + "step": 492 + }, + { + "epoch": 197.2, + "grad_norm": 295290.5, + "learning_rate": 5.574400723654455e-07, + "loss": 0.5131, + "step": 493 + }, + { + "epoch": 197.6, + "grad_norm": 206901.0, + "learning_rate": 5.585707824513796e-07, + "loss": 0.5059, + "step": 494 + }, + { + "epoch": 198.0, + "grad_norm": 366658.90625, + "learning_rate": 5.597014925373135e-07, + "loss": 0.5103, + "step": 495 + }, + { + "epoch": 198.0, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8604878048780488, + "eval_loss": 0.5027384161949158, + "eval_precision": 0.8535483870967742, + "eval_recall": 0.8675409836065574, + "eval_runtime": 2.7643, + "eval_samples_per_second": 799.827, + "eval_steps_per_second": 0.723, + "step": 495 + }, + { + "epoch": 198.4, + "grad_norm": 271813.40625, + "learning_rate": 5.608322026232474e-07, + "loss": 0.5142, + "step": 496 + }, + { + "epoch": 198.8, + "grad_norm": 225646.53125, + "learning_rate": 5.619629127091814e-07, + "loss": 0.506, + "step": 497 + }, + { + "epoch": 198.8, + "eval_accuracy": 0.8050655811849842, + "eval_f1": 0.8595633756924079, + "eval_loss": 0.5019496083259583, + "eval_precision": 0.8542746113989638, + "eval_recall": 0.8649180327868853, + "eval_runtime": 2.4964, + "eval_samples_per_second": 885.69, + "eval_steps_per_second": 0.801, + "step": 497 + }, + { + "epoch": 199.2, + "grad_norm": 299855.03125, + "learning_rate": 5.630936227951153e-07, + "loss": 0.5051, + "step": 498 + }, + { + "epoch": 199.6, + "grad_norm": 183178.734375, + "learning_rate": 5.642243328810494e-07, + "loss": 0.5115, + "step": 499 + }, + { + "epoch": 200.0, + "grad_norm": 320236.0625, + "learning_rate": 5.653550429669833e-07, + "loss": 0.5089, + "step": 500 + }, + { + "epoch": 200.0, + "eval_accuracy": 0.8032564450474898, + "eval_f1": 0.8577036310107949, + "eval_loss": 0.500873863697052, + "eval_precision": 0.8557441253263708, + "eval_recall": 0.8596721311475409, + "eval_runtime": 2.4749, + "eval_samples_per_second": 893.357, + "eval_steps_per_second": 0.808, + "step": 500 + }, + { + "epoch": 200.4, + "grad_norm": 206347.203125, + "learning_rate": 5.664857530529173e-07, + "loss": 0.5019, + "step": 501 + }, + { + "epoch": 200.8, + "grad_norm": 324467.71875, + "learning_rate": 5.676164631388512e-07, + "loss": 0.5043, + "step": 502 + }, + { + "epoch": 200.8, + "eval_accuracy": 0.8009950248756219, + "eval_f1": 0.8556430446194225, + "eval_loss": 0.5001436471939087, + "eval_precision": 0.8562048588312541, + "eval_recall": 0.8550819672131148, + "eval_runtime": 2.4688, + "eval_samples_per_second": 895.588, + "eval_steps_per_second": 0.81, + "step": 502 + }, + { + "epoch": 201.2, + "grad_norm": 428580.4375, + "learning_rate": 5.687471732247853e-07, + "loss": 0.5197, + "step": 503 + }, + { + "epoch": 201.6, + "grad_norm": 209718.65625, + "learning_rate": 5.698778833107192e-07, + "loss": 0.5047, + "step": 504 + }, + { + "epoch": 202.0, + "grad_norm": 254994.1875, + "learning_rate": 5.710085933966531e-07, + "loss": 0.4972, + "step": 505 + }, + { + "epoch": 202.0, + "eval_accuracy": 0.7982813206693804, + "eval_f1": 0.8530961791831357, + "eval_loss": 0.499112993478775, + "eval_precision": 0.85704831237591, + "eval_recall": 0.8491803278688524, + "eval_runtime": 2.4911, + "eval_samples_per_second": 887.544, + "eval_steps_per_second": 0.803, + "step": 505 + }, + { + "epoch": 202.4, + "grad_norm": 435272.75, + "learning_rate": 5.721393034825871e-07, + "loss": 0.5043, + "step": 506 + }, + { + "epoch": 202.8, + "grad_norm": 234882.15625, + "learning_rate": 5.73270013568521e-07, + "loss": 0.5082, + "step": 507 + }, + { + "epoch": 202.8, + "eval_accuracy": 0.8005427408412483, + "eval_f1": 0.8550772264212948, + "eval_loss": 0.4982452392578125, + "eval_precision": 0.8570487483530962, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.4988, + "eval_samples_per_second": 884.839, + "eval_steps_per_second": 0.8, + "step": 507 + }, + { + "epoch": 203.2, + "grad_norm": 195765.09375, + "learning_rate": 5.744007236544551e-07, + "loss": 0.5148, + "step": 508 + }, + { + "epoch": 203.6, + "grad_norm": 197004.671875, + "learning_rate": 5.75531433740389e-07, + "loss": 0.4999, + "step": 509 + }, + { + "epoch": 204.0, + "grad_norm": 341697.3125, + "learning_rate": 5.76662143826323e-07, + "loss": 0.5038, + "step": 510 + }, + { + "epoch": 204.0, + "eval_accuracy": 0.8023518769787427, + "eval_f1": 0.8570493948315342, + "eval_loss": 0.4969598352909088, + "eval_precision": 0.8550913838120104, + "eval_recall": 0.8590163934426229, + "eval_runtime": 2.4912, + "eval_samples_per_second": 887.517, + "eval_steps_per_second": 0.803, + "step": 510 + }, + { + "epoch": 204.4, + "grad_norm": 204581.765625, + "learning_rate": 5.777928539122569e-07, + "loss": 0.4991, + "step": 511 + }, + { + "epoch": 204.8, + "grad_norm": 209599.796875, + "learning_rate": 5.789235639981909e-07, + "loss": 0.5007, + "step": 512 + }, + { + "epoch": 204.8, + "eval_accuracy": 0.8032564450474898, + "eval_f1": 0.8580750407830342, + "eval_loss": 0.4960744380950928, + "eval_precision": 0.8538961038961039, + "eval_recall": 0.8622950819672132, + "eval_runtime": 2.6003, + "eval_samples_per_second": 850.288, + "eval_steps_per_second": 0.769, + "step": 512 + }, + { + "epoch": 205.2, + "grad_norm": 227953.109375, + "learning_rate": 5.800542740841249e-07, + "loss": 0.5032, + "step": 513 + }, + { + "epoch": 205.6, + "grad_norm": 268922.84375, + "learning_rate": 5.811849841700588e-07, + "loss": 0.5073, + "step": 514 + }, + { + "epoch": 206.0, + "grad_norm": 263893.53125, + "learning_rate": 5.823156942559928e-07, + "loss": 0.4832, + "step": 515 + }, + { + "epoch": 206.0, + "eval_accuracy": 0.804161013116237, + "eval_f1": 0.8585429598170532, + "eval_loss": 0.49470046162605286, + "eval_precision": 0.85546875, + "eval_recall": 0.861639344262295, + "eval_runtime": 2.8543, + "eval_samples_per_second": 774.631, + "eval_steps_per_second": 0.701, + "step": 515 + }, + { + "epoch": 206.4, + "grad_norm": 219369.21875, + "learning_rate": 5.834464043419267e-07, + "loss": 0.5012, + "step": 516 + }, + { + "epoch": 206.8, + "grad_norm": 236650.90625, + "learning_rate": 5.845771144278608e-07, + "loss": 0.4991, + "step": 517 + }, + { + "epoch": 206.8, + "eval_accuracy": 0.8037087290818634, + "eval_f1": 0.8579842931937173, + "eval_loss": 0.4938981533050537, + "eval_precision": 0.8563030698889614, + "eval_recall": 0.8596721311475409, + "eval_runtime": 2.4832, + "eval_samples_per_second": 890.39, + "eval_steps_per_second": 0.805, + "step": 517 + }, + { + "epoch": 207.2, + "grad_norm": 329440.0, + "learning_rate": 5.857078245137947e-07, + "loss": 0.4826, + "step": 518 + }, + { + "epoch": 207.6, + "grad_norm": 253343.3125, + "learning_rate": 5.868385345997286e-07, + "loss": 0.4934, + "step": 519 + }, + { + "epoch": 208.0, + "grad_norm": 194635.25, + "learning_rate": 5.879692446856626e-07, + "loss": 0.5004, + "step": 520 + }, + { + "epoch": 208.0, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8598497223129696, + "eval_loss": 0.4927881956100464, + "eval_precision": 0.8567708333333334, + "eval_recall": 0.8629508196721312, + "eval_runtime": 2.4986, + "eval_samples_per_second": 884.892, + "eval_steps_per_second": 0.8, + "step": 520 + }, + { + "epoch": 208.4, + "grad_norm": 302530.28125, + "learning_rate": 5.890999547715966e-07, + "loss": 0.4992, + "step": 521 + }, + { + "epoch": 208.8, + "grad_norm": 237518.453125, + "learning_rate": 5.902306648575306e-07, + "loss": 0.4976, + "step": 522 + }, + { + "epoch": 208.8, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8597580908793724, + "eval_loss": 0.49204859137535095, + "eval_precision": 0.8572359843546284, + "eval_recall": 0.8622950819672132, + "eval_runtime": 2.4921, + "eval_samples_per_second": 887.203, + "eval_steps_per_second": 0.803, + "step": 522 + }, + { + "epoch": 209.2, + "grad_norm": 406642.21875, + "learning_rate": 5.913613749434645e-07, + "loss": 0.5013, + "step": 523 + }, + { + "epoch": 209.6, + "grad_norm": 214598.640625, + "learning_rate": 5.924920850293985e-07, + "loss": 0.4945, + "step": 524 + }, + { + "epoch": 210.0, + "grad_norm": 214259.375, + "learning_rate": 5.936227951153324e-07, + "loss": 0.504, + "step": 525 + }, + { + "epoch": 210.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8587385019710907, + "eval_loss": 0.49099186062812805, + "eval_precision": 0.8604344963791969, + "eval_recall": 0.8570491803278688, + "eval_runtime": 2.5178, + "eval_samples_per_second": 878.16, + "eval_steps_per_second": 0.794, + "step": 525 + }, + { + "epoch": 210.4, + "grad_norm": 192658.875, + "learning_rate": 5.947535052012664e-07, + "loss": 0.4999, + "step": 526 + }, + { + "epoch": 210.8, + "grad_norm": 343408.9375, + "learning_rate": 5.958842152872004e-07, + "loss": 0.4816, + "step": 527 + }, + { + "epoch": 210.8, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8585526315789473, + "eval_loss": 0.49033817648887634, + "eval_precision": 0.8613861386138614, + "eval_recall": 0.8557377049180328, + "eval_runtime": 2.4545, + "eval_samples_per_second": 900.804, + "eval_steps_per_second": 0.815, + "step": 527 + }, + { + "epoch": 211.2, + "grad_norm": 284826.0625, + "learning_rate": 5.970149253731343e-07, + "loss": 0.5065, + "step": 528 + }, + { + "epoch": 211.6, + "grad_norm": 195378.671875, + "learning_rate": 5.981456354590683e-07, + "loss": 0.4854, + "step": 529 + }, + { + "epoch": 212.0, + "grad_norm": 372135.46875, + "learning_rate": 5.992763455450023e-07, + "loss": 0.4789, + "step": 530 + }, + { + "epoch": 212.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8596721311475409, + "eval_loss": 0.48923805356025696, + "eval_precision": 0.8596721311475409, + "eval_recall": 0.8596721311475409, + "eval_runtime": 2.4719, + "eval_samples_per_second": 894.439, + "eval_steps_per_second": 0.809, + "step": 530 + }, + { + "epoch": 212.4, + "grad_norm": 237260.65625, + "learning_rate": 6.004070556309363e-07, + "loss": 0.4944, + "step": 531 + }, + { + "epoch": 212.8, + "grad_norm": 162423.765625, + "learning_rate": 6.015377657168702e-07, + "loss": 0.489, + "step": 532 + }, + { + "epoch": 212.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8608752449379491, + "eval_loss": 0.4886242151260376, + "eval_precision": 0.8575146389069617, + "eval_recall": 0.8642622950819672, + "eval_runtime": 2.4712, + "eval_samples_per_second": 894.717, + "eval_steps_per_second": 0.809, + "step": 532 + }, + { + "epoch": 213.2, + "grad_norm": 262113.234375, + "learning_rate": 6.026684758028041e-07, + "loss": 0.4925, + "step": 533 + }, + { + "epoch": 213.6, + "grad_norm": 235438.34375, + "learning_rate": 6.037991858887382e-07, + "loss": 0.4866, + "step": 534 + }, + { + "epoch": 214.0, + "grad_norm": 464044.1875, + "learning_rate": 6.049298959746721e-07, + "loss": 0.4941, + "step": 535 + }, + { + "epoch": 214.0, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8612377850162867, + "eval_loss": 0.48778578639030457, + "eval_precision": 0.855663430420712, + "eval_recall": 0.8668852459016393, + "eval_runtime": 2.485, + "eval_samples_per_second": 889.733, + "eval_steps_per_second": 0.805, + "step": 535 + }, + { + "epoch": 214.4, + "grad_norm": 241509.46875, + "learning_rate": 6.060606060606061e-07, + "loss": 0.4831, + "step": 536 + }, + { + "epoch": 214.8, + "grad_norm": 427266.71875, + "learning_rate": 6.0719131614654e-07, + "loss": 0.5005, + "step": 537 + }, + { + "epoch": 214.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8610567514677103, + "eval_loss": 0.4872070848941803, + "eval_precision": 0.8565866320571057, + "eval_recall": 0.8655737704918033, + "eval_runtime": 2.759, + "eval_samples_per_second": 801.368, + "eval_steps_per_second": 0.725, + "step": 537 + }, + { + "epoch": 215.2, + "grad_norm": 402486.4375, + "learning_rate": 6.08322026232474e-07, + "loss": 0.4821, + "step": 538 + }, + { + "epoch": 215.6, + "grad_norm": 195180.578125, + "learning_rate": 6.09452736318408e-07, + "loss": 0.4877, + "step": 539 + }, + { + "epoch": 216.0, + "grad_norm": 298034.875, + "learning_rate": 6.105834464043419e-07, + "loss": 0.4965, + "step": 540 + }, + { + "epoch": 216.0, + "eval_accuracy": 0.804161013116237, + "eval_f1": 0.8576126274251891, + "eval_loss": 0.48633021116256714, + "eval_precision": 0.8601583113456465, + "eval_recall": 0.8550819672131148, + "eval_runtime": 2.4878, + "eval_samples_per_second": 888.747, + "eval_steps_per_second": 0.804, + "step": 540 + }, + { + "epoch": 216.4, + "grad_norm": 253560.890625, + "learning_rate": 6.117141564902759e-07, + "loss": 0.4909, + "step": 541 + }, + { + "epoch": 216.8, + "grad_norm": 212034.40625, + "learning_rate": 6.128448665762098e-07, + "loss": 0.4876, + "step": 542 + }, + { + "epoch": 216.8, + "eval_accuracy": 0.8018995929443691, + "eval_f1": 0.8554455445544554, + "eval_loss": 0.4857812225818634, + "eval_precision": 0.8611295681063122, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.4856, + "eval_samples_per_second": 889.51, + "eval_steps_per_second": 0.805, + "step": 542 + }, + { + "epoch": 217.2, + "grad_norm": 190556.796875, + "learning_rate": 6.139755766621439e-07, + "loss": 0.5037, + "step": 543 + }, + { + "epoch": 217.6, + "grad_norm": 274599.0, + "learning_rate": 6.151062867480778e-07, + "loss": 0.4936, + "step": 544 + }, + { + "epoch": 218.0, + "grad_norm": 547190.75, + "learning_rate": 6.162369968340118e-07, + "loss": 0.4748, + "step": 545 + }, + { + "epoch": 218.0, + "eval_accuracy": 0.804161013116237, + "eval_f1": 0.857331136738056, + "eval_loss": 0.48480984568595886, + "eval_precision": 0.8615894039735099, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.5582, + "eval_samples_per_second": 864.293, + "eval_steps_per_second": 0.782, + "step": 545 + }, + { + "epoch": 218.4, + "grad_norm": 187737.90625, + "learning_rate": 6.173677069199457e-07, + "loss": 0.4845, + "step": 546 + }, + { + "epoch": 218.8, + "grad_norm": 352544.34375, + "learning_rate": 6.184984170058797e-07, + "loss": 0.4833, + "step": 547 + }, + { + "epoch": 218.8, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8586456278763971, + "eval_loss": 0.48411914706230164, + "eval_precision": 0.8609096901779829, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.5069, + "eval_samples_per_second": 881.961, + "eval_steps_per_second": 0.798, + "step": 547 + }, + { + "epoch": 219.2, + "grad_norm": 182616.109375, + "learning_rate": 6.196291270918137e-07, + "loss": 0.4929, + "step": 548 + }, + { + "epoch": 219.6, + "grad_norm": 317325.5625, + "learning_rate": 6.207598371777476e-07, + "loss": 0.4771, + "step": 549 + }, + { + "epoch": 220.0, + "grad_norm": 448115.9375, + "learning_rate": 6.218905472636816e-07, + "loss": 0.4884, + "step": 550 + }, + { + "epoch": 220.0, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8608752449379491, + "eval_loss": 0.4833013713359833, + "eval_precision": 0.8575146389069617, + "eval_recall": 0.8642622950819672, + "eval_runtime": 2.5065, + "eval_samples_per_second": 882.114, + "eval_steps_per_second": 0.798, + "step": 550 + }, + { + "epoch": 220.4, + "grad_norm": 297530.96875, + "learning_rate": 6.230212573496155e-07, + "loss": 0.4933, + "step": 551 + }, + { + "epoch": 220.8, + "grad_norm": 242869.09375, + "learning_rate": 6.241519674355496e-07, + "loss": 0.4874, + "step": 552 + }, + { + "epoch": 220.8, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.8608837970540099, + "eval_loss": 0.4826597273349762, + "eval_precision": 0.8594771241830066, + "eval_recall": 0.8622950819672132, + "eval_runtime": 2.5718, + "eval_samples_per_second": 859.717, + "eval_steps_per_second": 0.778, + "step": 552 + }, + { + "epoch": 221.2, + "grad_norm": 217103.25, + "learning_rate": 6.252826775214835e-07, + "loss": 0.4926, + "step": 553 + }, + { + "epoch": 221.6, + "grad_norm": 193852.265625, + "learning_rate": 6.264133876074175e-07, + "loss": 0.4829, + "step": 554 + }, + { + "epoch": 222.0, + "grad_norm": 266223.0, + "learning_rate": 6.275440976933514e-07, + "loss": 0.4853, + "step": 555 + }, + { + "epoch": 222.0, + "eval_accuracy": 0.8050655811849842, + "eval_f1": 0.8580836351662825, + "eval_loss": 0.4817172884941101, + "eval_precision": 0.8617724867724867, + "eval_recall": 0.8544262295081967, + "eval_runtime": 2.4965, + "eval_samples_per_second": 885.627, + "eval_steps_per_second": 0.801, + "step": 555 + }, + { + "epoch": 222.4, + "grad_norm": 182271.890625, + "learning_rate": 6.286748077792853e-07, + "loss": 0.4815, + "step": 556 + }, + { + "epoch": 222.8, + "grad_norm": 201292.390625, + "learning_rate": 6.298055178652194e-07, + "loss": 0.4889, + "step": 557 + }, + { + "epoch": 222.8, + "eval_accuracy": 0.804161013116237, + "eval_f1": 0.8568595041322314, + "eval_loss": 0.48110201954841614, + "eval_precision": 0.864, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.7603, + "eval_samples_per_second": 800.987, + "eval_steps_per_second": 0.725, + "step": 557 + }, + { + "epoch": 223.2, + "grad_norm": 212099.90625, + "learning_rate": 6.309362279511533e-07, + "loss": 0.4913, + "step": 558 + }, + { + "epoch": 223.6, + "grad_norm": 218010.96875, + "learning_rate": 6.320669380370873e-07, + "loss": 0.4843, + "step": 559 + }, + { + "epoch": 224.0, + "grad_norm": 230897.953125, + "learning_rate": 6.331976481230212e-07, + "loss": 0.4835, + "step": 560 + }, + { + "epoch": 224.0, + "eval_accuracy": 0.8050655811849842, + "eval_f1": 0.8577088147903599, + "eval_loss": 0.48005273938179016, + "eval_precision": 0.8636968085106383, + "eval_recall": 0.8518032786885246, + "eval_runtime": 2.4875, + "eval_samples_per_second": 888.835, + "eval_steps_per_second": 0.804, + "step": 560 + }, + { + "epoch": 224.4, + "grad_norm": 259064.140625, + "learning_rate": 6.343283582089553e-07, + "loss": 0.4871, + "step": 561 + }, + { + "epoch": 224.8, + "grad_norm": 182099.09375, + "learning_rate": 6.354590682948892e-07, + "loss": 0.4807, + "step": 562 + }, + { + "epoch": 224.8, + "eval_accuracy": 0.8046132971506106, + "eval_f1": 0.8579881656804734, + "eval_loss": 0.4793483018875122, + "eval_precision": 0.8602504943968359, + "eval_recall": 0.8557377049180328, + "eval_runtime": 2.4981, + "eval_samples_per_second": 885.07, + "eval_steps_per_second": 0.801, + "step": 562 + }, + { + "epoch": 225.2, + "grad_norm": 280379.53125, + "learning_rate": 6.365897783808231e-07, + "loss": 0.4681, + "step": 563 + }, + { + "epoch": 225.6, + "grad_norm": 308695.0, + "learning_rate": 6.377204884667571e-07, + "loss": 0.4833, + "step": 564 + }, + { + "epoch": 226.0, + "grad_norm": 220856.84375, + "learning_rate": 6.38851198552691e-07, + "loss": 0.473, + "step": 565 + }, + { + "epoch": 226.0, + "eval_accuracy": 0.804161013116237, + "eval_f1": 0.858172289551261, + "eval_loss": 0.4785252809524536, + "eval_precision": 0.8573298429319371, + "eval_recall": 0.8590163934426229, + "eval_runtime": 2.4929, + "eval_samples_per_second": 886.904, + "eval_steps_per_second": 0.802, + "step": 565 + }, + { + "epoch": 226.4, + "grad_norm": 249560.78125, + "learning_rate": 6.399819086386251e-07, + "loss": 0.4806, + "step": 566 + }, + { + "epoch": 226.8, + "grad_norm": 278894.90625, + "learning_rate": 6.41112618724559e-07, + "loss": 0.4811, + "step": 567 + }, + { + "epoch": 226.8, + "eval_accuracy": 0.8046132971506106, + "eval_f1": 0.8584534731323722, + "eval_loss": 0.4779183566570282, + "eval_precision": 0.8578912901113294, + "eval_recall": 0.8590163934426229, + "eval_runtime": 2.4503, + "eval_samples_per_second": 902.341, + "eval_steps_per_second": 0.816, + "step": 567 + }, + { + "epoch": 227.2, + "grad_norm": 460061.1875, + "learning_rate": 6.42243328810493e-07, + "loss": 0.4833, + "step": 568 + }, + { + "epoch": 227.6, + "grad_norm": 188528.796875, + "learning_rate": 6.433740388964269e-07, + "loss": 0.4792, + "step": 569 + }, + { + "epoch": 228.0, + "grad_norm": 248069.4375, + "learning_rate": 6.445047489823609e-07, + "loss": 0.4894, + "step": 570 + }, + { + "epoch": 228.0, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8599735799207398, + "eval_loss": 0.47714099287986755, + "eval_precision": 0.8662674650698603, + "eval_recall": 0.8537704918032787, + "eval_runtime": 2.4584, + "eval_samples_per_second": 899.347, + "eval_steps_per_second": 0.814, + "step": 570 + }, + { + "epoch": 228.4, + "grad_norm": 231829.640625, + "learning_rate": 6.456354590682949e-07, + "loss": 0.4789, + "step": 571 + }, + { + "epoch": 228.8, + "grad_norm": 170569.71875, + "learning_rate": 6.467661691542288e-07, + "loss": 0.4883, + "step": 572 + }, + { + "epoch": 228.8, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8580901856763926, + "eval_loss": 0.47680607438087463, + "eval_precision": 0.8678739101274312, + "eval_recall": 0.8485245901639344, + "eval_runtime": 2.4818, + "eval_samples_per_second": 890.868, + "eval_steps_per_second": 0.806, + "step": 572 + }, + { + "epoch": 229.2, + "grad_norm": 255278.828125, + "learning_rate": 6.478968792401628e-07, + "loss": 0.5022, + "step": 573 + }, + { + "epoch": 229.6, + "grad_norm": 344751.375, + "learning_rate": 6.490275893260968e-07, + "loss": 0.4797, + "step": 574 + }, + { + "epoch": 230.0, + "grad_norm": 220228.6875, + "learning_rate": 6.501582994120308e-07, + "loss": 0.4839, + "step": 575 + }, + { + "epoch": 230.0, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8609184010571523, + "eval_loss": 0.476155161857605, + "eval_precision": 0.8675099866844208, + "eval_recall": 0.8544262295081967, + "eval_runtime": 2.4901, + "eval_samples_per_second": 887.913, + "eval_steps_per_second": 0.803, + "step": 575 + }, + { + "epoch": 230.4, + "grad_norm": 172148.625, + "learning_rate": 6.512890094979647e-07, + "loss": 0.4816, + "step": 576 + }, + { + "epoch": 230.8, + "grad_norm": 188908.828125, + "learning_rate": 6.524197195838986e-07, + "loss": 0.4796, + "step": 577 + }, + { + "epoch": 230.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8617405582922825, + "eval_loss": 0.4757920801639557, + "eval_precision": 0.8631578947368421, + "eval_recall": 0.860327868852459, + "eval_runtime": 2.7352, + "eval_samples_per_second": 808.351, + "eval_steps_per_second": 0.731, + "step": 577 + }, + { + "epoch": 231.2, + "grad_norm": 316602.96875, + "learning_rate": 6.535504296698326e-07, + "loss": 0.4745, + "step": 578 + }, + { + "epoch": 231.6, + "grad_norm": 240924.296875, + "learning_rate": 6.546811397557666e-07, + "loss": 0.4722, + "step": 579 + }, + { + "epoch": 232.0, + "grad_norm": 326533.59375, + "learning_rate": 6.558118498417006e-07, + "loss": 0.4874, + "step": 580 + }, + { + "epoch": 232.0, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8614675880223758, + "eval_loss": 0.47530966997146606, + "eval_precision": 0.8645970937912814, + "eval_recall": 0.8583606557377049, + "eval_runtime": 2.4701, + "eval_samples_per_second": 895.107, + "eval_steps_per_second": 0.81, + "step": 580 + }, + { + "epoch": 232.4, + "grad_norm": 155423.90625, + "learning_rate": 6.569425599276345e-07, + "loss": 0.4823, + "step": 581 + }, + { + "epoch": 232.8, + "grad_norm": 202716.609375, + "learning_rate": 6.580732700135685e-07, + "loss": 0.474, + "step": 582 + }, + { + "epoch": 232.8, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.860442098317387, + "eval_loss": 0.4748690724372864, + "eval_precision": 0.8658698539176627, + "eval_recall": 0.8550819672131148, + "eval_runtime": 2.4773, + "eval_samples_per_second": 892.507, + "eval_steps_per_second": 0.807, + "step": 582 + }, + { + "epoch": 233.2, + "grad_norm": 473744.4375, + "learning_rate": 6.592039800995025e-07, + "loss": 0.4733, + "step": 583 + }, + { + "epoch": 233.6, + "grad_norm": 333283.90625, + "learning_rate": 6.603346901854365e-07, + "loss": 0.4728, + "step": 584 + }, + { + "epoch": 234.0, + "grad_norm": 281623.28125, + "learning_rate": 6.614654002713704e-07, + "loss": 0.4858, + "step": 585 + }, + { + "epoch": 234.0, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8593130779392338, + "eval_loss": 0.47398996353149414, + "eval_precision": 0.865602129075183, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.5241, + "eval_samples_per_second": 875.94, + "eval_steps_per_second": 0.792, + "step": 585 + }, + { + "epoch": 234.4, + "grad_norm": 323270.65625, + "learning_rate": 6.625961103573043e-07, + "loss": 0.4713, + "step": 586 + }, + { + "epoch": 234.8, + "grad_norm": 186935.859375, + "learning_rate": 6.637268204432383e-07, + "loss": 0.4725, + "step": 587 + }, + { + "epoch": 234.8, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.8600592690154758, + "eval_loss": 0.4733979403972626, + "eval_precision": 0.8637566137566137, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.4653, + "eval_samples_per_second": 896.842, + "eval_steps_per_second": 0.811, + "step": 587 + }, + { + "epoch": 235.2, + "grad_norm": 194582.5625, + "learning_rate": 6.648575305291723e-07, + "loss": 0.4948, + "step": 588 + }, + { + "epoch": 235.6, + "grad_norm": 150390.40625, + "learning_rate": 6.659882406151063e-07, + "loss": 0.474, + "step": 589 + }, + { + "epoch": 236.0, + "grad_norm": 192008.53125, + "learning_rate": 6.671189507010402e-07, + "loss": 0.4679, + "step": 590 + }, + { + "epoch": 236.0, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8596837944664032, + "eval_loss": 0.47262346744537354, + "eval_precision": 0.8636664460622104, + "eval_recall": 0.8557377049180328, + "eval_runtime": 2.4875, + "eval_samples_per_second": 888.851, + "eval_steps_per_second": 0.804, + "step": 590 + }, + { + "epoch": 236.4, + "grad_norm": 172567.640625, + "learning_rate": 6.682496607869742e-07, + "loss": 0.4665, + "step": 591 + }, + { + "epoch": 236.8, + "grad_norm": 173962.53125, + "learning_rate": 6.693803708729082e-07, + "loss": 0.4726, + "step": 592 + }, + { + "epoch": 236.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8607095926412615, + "eval_loss": 0.47206926345825195, + "eval_precision": 0.8624094799210007, + "eval_recall": 0.8590163934426229, + "eval_runtime": 2.4755, + "eval_samples_per_second": 893.145, + "eval_steps_per_second": 0.808, + "step": 592 + }, + { + "epoch": 237.2, + "grad_norm": 495151.21875, + "learning_rate": 6.705110809588421e-07, + "loss": 0.4808, + "step": 593 + }, + { + "epoch": 237.6, + "grad_norm": 155773.65625, + "learning_rate": 6.716417910447761e-07, + "loss": 0.4725, + "step": 594 + }, + { + "epoch": 238.0, + "grad_norm": 255707.78125, + "learning_rate": 6.7277250113071e-07, + "loss": 0.4602, + "step": 595 + }, + { + "epoch": 238.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8587458745874588, + "eval_loss": 0.4712398946285248, + "eval_precision": 0.8644518272425249, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.4777, + "eval_samples_per_second": 892.353, + "eval_steps_per_second": 0.807, + "step": 595 + }, + { + "epoch": 238.4, + "grad_norm": 160374.453125, + "learning_rate": 6.73903211216644e-07, + "loss": 0.4733, + "step": 596 + }, + { + "epoch": 238.8, + "grad_norm": 181442.640625, + "learning_rate": 6.75033921302578e-07, + "loss": 0.4667, + "step": 597 + }, + { + "epoch": 238.8, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8587495865034733, + "eval_loss": 0.47077810764312744, + "eval_precision": 0.8664886515353805, + "eval_recall": 0.8511475409836066, + "eval_runtime": 2.7471, + "eval_samples_per_second": 804.84, + "eval_steps_per_second": 0.728, + "step": 597 + }, + { + "epoch": 239.2, + "grad_norm": 248275.0625, + "learning_rate": 6.76164631388512e-07, + "loss": 0.4715, + "step": 598 + }, + { + "epoch": 239.6, + "grad_norm": 137917.375, + "learning_rate": 6.772953414744459e-07, + "loss": 0.4774, + "step": 599 + }, + { + "epoch": 240.0, + "grad_norm": 282006.84375, + "learning_rate": 6.784260515603798e-07, + "loss": 0.4496, + "step": 600 + }, + { + "epoch": 240.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8584656084656085, + "eval_loss": 0.4700579345226288, + "eval_precision": 0.8659106070713809, + "eval_recall": 0.8511475409836066, + "eval_runtime": 2.474, + "eval_samples_per_second": 893.698, + "eval_steps_per_second": 0.808, + "step": 600 + }, + { + "epoch": 240.4, + "grad_norm": 239822.90625, + "learning_rate": 6.795567616463139e-07, + "loss": 0.4607, + "step": 601 + }, + { + "epoch": 240.8, + "grad_norm": 279273.90625, + "learning_rate": 6.806874717322478e-07, + "loss": 0.485, + "step": 602 + }, + { + "epoch": 240.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8593130779392338, + "eval_loss": 0.4696570932865143, + "eval_precision": 0.865602129075183, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.5005, + "eval_samples_per_second": 884.224, + "eval_steps_per_second": 0.8, + "step": 602 + }, + { + "epoch": 241.2, + "grad_norm": 292187.15625, + "learning_rate": 6.818181818181819e-07, + "loss": 0.4738, + "step": 603 + }, + { + "epoch": 241.6, + "grad_norm": 167098.625, + "learning_rate": 6.829488919041158e-07, + "loss": 0.4637, + "step": 604 + }, + { + "epoch": 242.0, + "grad_norm": 453471.0, + "learning_rate": 6.840796019900499e-07, + "loss": 0.4745, + "step": 605 + }, + { + "epoch": 242.0, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.859782250082481, + "eval_loss": 0.46918076276779175, + "eval_precision": 0.8652058432934927, + "eval_recall": 0.8544262295081967, + "eval_runtime": 2.4585, + "eval_samples_per_second": 899.325, + "eval_steps_per_second": 0.814, + "step": 605 + }, + { + "epoch": 242.4, + "grad_norm": 241546.796875, + "learning_rate": 6.852103120759838e-07, + "loss": 0.4704, + "step": 606 + }, + { + "epoch": 242.8, + "grad_norm": 160962.296875, + "learning_rate": 6.863410221619177e-07, + "loss": 0.4584, + "step": 607 + }, + { + "epoch": 242.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8596955658504302, + "eval_loss": 0.4687780439853668, + "eval_precision": 0.8677354709418837, + "eval_recall": 0.8518032786885246, + "eval_runtime": 2.4337, + "eval_samples_per_second": 908.495, + "eval_steps_per_second": 0.822, + "step": 607 + }, + { + "epoch": 243.2, + "grad_norm": 749479.75, + "learning_rate": 6.874717322478517e-07, + "loss": 0.4641, + "step": 608 + }, + { + "epoch": 243.6, + "grad_norm": 190660.015625, + "learning_rate": 6.886024423337856e-07, + "loss": 0.469, + "step": 609 + }, + { + "epoch": 244.0, + "grad_norm": 425220.34375, + "learning_rate": 6.897331524197197e-07, + "loss": 0.4747, + "step": 610 + }, + { + "epoch": 244.0, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8590337524818001, + "eval_loss": 0.4680376648902893, + "eval_precision": 0.8670674682698731, + "eval_recall": 0.8511475409836066, + "eval_runtime": 2.4696, + "eval_samples_per_second": 895.281, + "eval_steps_per_second": 0.81, + "step": 610 + }, + { + "epoch": 244.4, + "grad_norm": 191264.609375, + "learning_rate": 6.908638625056536e-07, + "loss": 0.4696, + "step": 611 + }, + { + "epoch": 244.8, + "grad_norm": 164585.703125, + "learning_rate": 6.919945725915876e-07, + "loss": 0.4686, + "step": 612 + }, + { + "epoch": 244.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8590337524818001, + "eval_loss": 0.4675431549549103, + "eval_precision": 0.8670674682698731, + "eval_recall": 0.8511475409836066, + "eval_runtime": 2.4783, + "eval_samples_per_second": 892.147, + "eval_steps_per_second": 0.807, + "step": 612 + }, + { + "epoch": 245.2, + "grad_norm": 366283.3125, + "learning_rate": 6.931252826775215e-07, + "loss": 0.4636, + "step": 613 + }, + { + "epoch": 245.6, + "grad_norm": 148165.28125, + "learning_rate": 6.942559927634556e-07, + "loss": 0.462, + "step": 614 + }, + { + "epoch": 246.0, + "grad_norm": 219624.1875, + "learning_rate": 6.953867028493895e-07, + "loss": 0.466, + "step": 615 + }, + { + "epoch": 246.0, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8588469184890656, + "eval_loss": 0.46681851148605347, + "eval_precision": 0.8680509042196919, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.4819, + "eval_samples_per_second": 890.856, + "eval_steps_per_second": 0.806, + "step": 615 + }, + { + "epoch": 246.4, + "grad_norm": 218808.984375, + "learning_rate": 6.965174129353234e-07, + "loss": 0.4799, + "step": 616 + }, + { + "epoch": 246.8, + "grad_norm": 229882.1875, + "learning_rate": 6.976481230212574e-07, + "loss": 0.4652, + "step": 617 + }, + { + "epoch": 246.8, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8585591539986781, + "eval_loss": 0.466217041015625, + "eval_precision": 0.8654230512991339, + "eval_recall": 0.8518032786885246, + "eval_runtime": 2.7589, + "eval_samples_per_second": 801.399, + "eval_steps_per_second": 0.725, + "step": 617 + }, + { + "epoch": 247.2, + "grad_norm": 339791.84375, + "learning_rate": 6.987788331071913e-07, + "loss": 0.4569, + "step": 618 + }, + { + "epoch": 247.6, + "grad_norm": 316491.0625, + "learning_rate": 6.999095431931254e-07, + "loss": 0.4814, + "step": 619 + }, + { + "epoch": 248.0, + "grad_norm": 263313.34375, + "learning_rate": 7.010402532790593e-07, + "loss": 0.4449, + "step": 620 + }, + { + "epoch": 248.0, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8609010194015126, + "eval_loss": 0.46577316522598267, + "eval_precision": 0.8634564643799473, + "eval_recall": 0.8583606557377049, + "eval_runtime": 2.4782, + "eval_samples_per_second": 892.166, + "eval_steps_per_second": 0.807, + "step": 620 + }, + { + "epoch": 248.4, + "grad_norm": 446421.3125, + "learning_rate": 7.021709633649933e-07, + "loss": 0.478, + "step": 621 + }, + { + "epoch": 248.8, + "grad_norm": 323639.25, + "learning_rate": 7.033016734509272e-07, + "loss": 0.4556, + "step": 622 + }, + { + "epoch": 248.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8605263157894737, + "eval_loss": 0.4653334617614746, + "eval_precision": 0.8633663366336634, + "eval_recall": 0.8577049180327869, + "eval_runtime": 2.4804, + "eval_samples_per_second": 891.397, + "eval_steps_per_second": 0.806, + "step": 622 + }, + { + "epoch": 249.2, + "grad_norm": 249739.265625, + "learning_rate": 7.044323835368612e-07, + "loss": 0.4551, + "step": 623 + }, + { + "epoch": 249.6, + "grad_norm": 381267.8125, + "learning_rate": 7.055630936227952e-07, + "loss": 0.462, + "step": 624 + }, + { + "epoch": 250.0, + "grad_norm": 235284.21875, + "learning_rate": 7.066938037087291e-07, + "loss": 0.4631, + "step": 625 + }, + { + "epoch": 250.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8579960185799602, + "eval_loss": 0.4646071195602417, + "eval_precision": 0.8683680322364002, + "eval_recall": 0.8478688524590164, + "eval_runtime": 2.4951, + "eval_samples_per_second": 886.14, + "eval_steps_per_second": 0.802, + "step": 625 + }, + { + "epoch": 250.4, + "grad_norm": 206644.25, + "learning_rate": 7.078245137946631e-07, + "loss": 0.4699, + "step": 626 + }, + { + "epoch": 250.8, + "grad_norm": 241577.375, + "learning_rate": 7.08955223880597e-07, + "loss": 0.458, + "step": 627 + }, + { + "epoch": 250.8, + "eval_accuracy": 0.804161013116237, + "eval_f1": 0.8557147617460846, + "eval_loss": 0.4643935561180115, + "eval_precision": 0.8699186991869918, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.5266, + "eval_samples_per_second": 875.076, + "eval_steps_per_second": 0.792, + "step": 627 + }, + { + "epoch": 251.2, + "grad_norm": 639594.3125, + "learning_rate": 7.100859339665311e-07, + "loss": 0.451, + "step": 628 + }, + { + "epoch": 251.6, + "grad_norm": 380814.0625, + "learning_rate": 7.11216644052465e-07, + "loss": 0.4702, + "step": 629 + }, + { + "epoch": 252.0, + "grad_norm": 297280.1875, + "learning_rate": 7.123473541383989e-07, + "loss": 0.4545, + "step": 630 + }, + { + "epoch": 252.0, + "eval_accuracy": 0.8100407055630936, + "eval_f1": 0.8612029081295439, + "eval_loss": 0.46381983160972595, + "eval_precision": 0.8680879413724184, + "eval_recall": 0.8544262295081967, + "eval_runtime": 2.4872, + "eval_samples_per_second": 888.936, + "eval_steps_per_second": 0.804, + "step": 630 + }, + { + "epoch": 252.4, + "grad_norm": 212339.234375, + "learning_rate": 7.134780642243329e-07, + "loss": 0.4551, + "step": 631 + }, + { + "epoch": 252.8, + "grad_norm": 373783.625, + "learning_rate": 7.146087743102669e-07, + "loss": 0.4743, + "step": 632 + }, + { + "epoch": 252.8, + "eval_accuracy": 0.8100407055630936, + "eval_f1": 0.8618421052631579, + "eval_loss": 0.4636715352535248, + "eval_precision": 0.8646864686468647, + "eval_recall": 0.8590163934426229, + "eval_runtime": 2.4914, + "eval_samples_per_second": 887.437, + "eval_steps_per_second": 0.803, + "step": 632 + }, + { + "epoch": 253.2, + "grad_norm": 322612.625, + "learning_rate": 7.157394843962009e-07, + "loss": 0.4456, + "step": 633 + }, + { + "epoch": 253.6, + "grad_norm": 593569.8125, + "learning_rate": 7.168701944821348e-07, + "loss": 0.4639, + "step": 634 + }, + { + "epoch": 254.0, + "grad_norm": 648172.9375, + "learning_rate": 7.180009045680688e-07, + "loss": 0.4622, + "step": 635 + }, + { + "epoch": 254.0, + "eval_accuracy": 0.8104929895974672, + "eval_f1": 0.862306933946763, + "eval_loss": 0.46346327662467957, + "eval_precision": 0.8642951251646904, + "eval_recall": 0.860327868852459, + "eval_runtime": 2.5178, + "eval_samples_per_second": 878.16, + "eval_steps_per_second": 0.794, + "step": 635 + }, + { + "epoch": 254.4, + "grad_norm": 382733.09375, + "learning_rate": 7.191316146540028e-07, + "loss": 0.4666, + "step": 636 + }, + { + "epoch": 254.8, + "grad_norm": 257156.796875, + "learning_rate": 7.202623247399367e-07, + "loss": 0.4602, + "step": 637 + }, + { + "epoch": 254.8, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8603499504787059, + "eval_loss": 0.4629066586494446, + "eval_precision": 0.8663563829787234, + "eval_recall": 0.8544262295081967, + "eval_runtime": 2.7371, + "eval_samples_per_second": 807.8, + "eval_steps_per_second": 0.731, + "step": 637 + }, + { + "epoch": 255.2, + "grad_norm": 286541.09375, + "learning_rate": 7.213930348258707e-07, + "loss": 0.4665, + "step": 638 + }, + { + "epoch": 255.6, + "grad_norm": 217243.484375, + "learning_rate": 7.225237449118046e-07, + "loss": 0.4652, + "step": 639 + }, + { + "epoch": 256.0, + "grad_norm": 395152.28125, + "learning_rate": 7.236544549977386e-07, + "loss": 0.4485, + "step": 640 + }, + { + "epoch": 256.0, + "eval_accuracy": 0.804161013116237, + "eval_f1": 0.8557147617460846, + "eval_loss": 0.46256428956985474, + "eval_precision": 0.8699186991869918, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.4704, + "eval_samples_per_second": 894.994, + "eval_steps_per_second": 0.81, + "step": 640 + }, + { + "epoch": 256.4, + "grad_norm": 309504.875, + "learning_rate": 7.247851650836726e-07, + "loss": 0.4671, + "step": 641 + }, + { + "epoch": 256.8, + "grad_norm": 340173.84375, + "learning_rate": 7.259158751696066e-07, + "loss": 0.4611, + "step": 642 + }, + { + "epoch": 256.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8594164456233422, + "eval_loss": 0.4621763527393341, + "eval_precision": 0.869215291750503, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.4847, + "eval_samples_per_second": 889.84, + "eval_steps_per_second": 0.805, + "step": 642 + }, + { + "epoch": 257.2, + "grad_norm": 377696.0, + "learning_rate": 7.270465852555405e-07, + "loss": 0.4506, + "step": 643 + }, + { + "epoch": 257.6, + "grad_norm": 145286.25, + "learning_rate": 7.281772953414744e-07, + "loss": 0.463, + "step": 644 + }, + { + "epoch": 258.0, + "grad_norm": 661658.75, + "learning_rate": 7.293080054274085e-07, + "loss": 0.4757, + "step": 645 + }, + { + "epoch": 258.0, + "eval_accuracy": 0.8100407055630936, + "eval_f1": 0.8614775725593667, + "eval_loss": 0.4619855582714081, + "eval_precision": 0.8666224286662243, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.4953, + "eval_samples_per_second": 886.075, + "eval_steps_per_second": 0.802, + "step": 645 + }, + { + "epoch": 258.4, + "grad_norm": 339166.65625, + "learning_rate": 7.304387155133424e-07, + "loss": 0.4556, + "step": 646 + }, + { + "epoch": 258.8, + "grad_norm": 182106.59375, + "learning_rate": 7.315694255992764e-07, + "loss": 0.473, + "step": 647 + }, + { + "epoch": 258.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8610102344007924, + "eval_loss": 0.4616989493370056, + "eval_precision": 0.8670212765957447, + "eval_recall": 0.8550819672131148, + "eval_runtime": 2.509, + "eval_samples_per_second": 881.221, + "eval_steps_per_second": 0.797, + "step": 647 + }, + { + "epoch": 259.2, + "grad_norm": 621142.625, + "learning_rate": 7.327001356852103e-07, + "loss": 0.4847, + "step": 648 + }, + { + "epoch": 259.6, + "grad_norm": 243196.171875, + "learning_rate": 7.338308457711443e-07, + "loss": 0.4678, + "step": 649 + }, + { + "epoch": 260.0, + "grad_norm": 407930.65625, + "learning_rate": 7.349615558570783e-07, + "loss": 0.4579, + "step": 650 + }, + { + "epoch": 260.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8570478723404256, + "eval_loss": 0.4614317715167999, + "eval_precision": 0.8691840863115307, + "eval_recall": 0.8452459016393442, + "eval_runtime": 2.4712, + "eval_samples_per_second": 894.723, + "eval_steps_per_second": 0.809, + "step": 650 + }, + { + "epoch": 260.4, + "grad_norm": 405669.0625, + "learning_rate": 7.360922659430122e-07, + "loss": 0.4668, + "step": 651 + }, + { + "epoch": 260.8, + "grad_norm": 176683.984375, + "learning_rate": 7.372229760289462e-07, + "loss": 0.4504, + "step": 652 + }, + { + "epoch": 260.8, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8602576808721506, + "eval_loss": 0.4610856771469116, + "eval_precision": 0.8668442077230359, + "eval_recall": 0.8537704918032787, + "eval_runtime": 2.4721, + "eval_samples_per_second": 894.37, + "eval_steps_per_second": 0.809, + "step": 652 + }, + { + "epoch": 261.2, + "grad_norm": 389106.15625, + "learning_rate": 7.383536861148801e-07, + "loss": 0.4517, + "step": 653 + }, + { + "epoch": 261.6, + "grad_norm": 261994.578125, + "learning_rate": 7.394843962008142e-07, + "loss": 0.4623, + "step": 654 + }, + { + "epoch": 262.0, + "grad_norm": 344850.90625, + "learning_rate": 7.406151062867481e-07, + "loss": 0.4664, + "step": 655 + }, + { + "epoch": 262.0, + "eval_accuracy": 0.8109452736318408, + "eval_f1": 0.8623188405797102, + "eval_loss": 0.4607904553413391, + "eval_precision": 0.8663136995367307, + "eval_recall": 0.8583606557377049, + "eval_runtime": 2.4869, + "eval_samples_per_second": 889.07, + "eval_steps_per_second": 0.804, + "step": 655 + }, + { + "epoch": 262.4, + "grad_norm": 169693.234375, + "learning_rate": 7.417458163726821e-07, + "loss": 0.4457, + "step": 656 + }, + { + "epoch": 262.8, + "grad_norm": 195420.765625, + "learning_rate": 7.42876526458616e-07, + "loss": 0.463, + "step": 657 + }, + { + "epoch": 262.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8598810310641111, + "eval_loss": 0.4605135917663574, + "eval_precision": 0.8667554963357762, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.7894, + "eval_samples_per_second": 792.64, + "eval_steps_per_second": 0.717, + "step": 657 + }, + { + "epoch": 263.2, + "grad_norm": 211825.28125, + "learning_rate": 7.4400723654455e-07, + "loss": 0.4821, + "step": 658 + }, + { + "epoch": 263.6, + "grad_norm": 190842.546875, + "learning_rate": 7.45137946630484e-07, + "loss": 0.4644, + "step": 659 + }, + { + "epoch": 264.0, + "grad_norm": 303416.5, + "learning_rate": 7.462686567164179e-07, + "loss": 0.4575, + "step": 660 + }, + { + "epoch": 264.0, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8606421714664019, + "eval_loss": 0.45989668369293213, + "eval_precision": 0.8689839572192514, + "eval_recall": 0.8524590163934426, + "eval_runtime": 2.4706, + "eval_samples_per_second": 894.919, + "eval_steps_per_second": 0.81, + "step": 660 + }, + { + "epoch": 264.4, + "grad_norm": 365086.0, + "learning_rate": 7.473993668023519e-07, + "loss": 0.4575, + "step": 661 + }, + { + "epoch": 264.8, + "grad_norm": 338152.46875, + "learning_rate": 7.485300768882858e-07, + "loss": 0.4685, + "step": 662 + }, + { + "epoch": 264.8, + "eval_accuracy": 0.8109452736318408, + "eval_f1": 0.8621372031662269, + "eval_loss": 0.45946934819221497, + "eval_precision": 0.8672859986728599, + "eval_recall": 0.8570491803278688, + "eval_runtime": 2.4796, + "eval_samples_per_second": 891.676, + "eval_steps_per_second": 0.807, + "step": 662 + }, + { + "epoch": 265.2, + "grad_norm": 350104.96875, + "learning_rate": 7.496607869742199e-07, + "loss": 0.4408, + "step": 663 + }, + { + "epoch": 265.6, + "grad_norm": 270339.75, + "learning_rate": 7.507914970601538e-07, + "loss": 0.4715, + "step": 664 + }, + { + "epoch": 266.0, + "grad_norm": 310568.75, + "learning_rate": 7.519222071460878e-07, + "loss": 0.4487, + "step": 665 + }, + { + "epoch": 266.0, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.8604497354497355, + "eval_loss": 0.45888736844062805, + "eval_precision": 0.8679119412941961, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.4789, + "eval_samples_per_second": 891.939, + "eval_steps_per_second": 0.807, + "step": 665 + }, + { + "epoch": 266.4, + "grad_norm": 256675.59375, + "learning_rate": 7.530529172320217e-07, + "loss": 0.4589, + "step": 666 + }, + { + "epoch": 266.8, + "grad_norm": 158313.5, + "learning_rate": 7.541836273179556e-07, + "loss": 0.465, + "step": 667 + }, + { + "epoch": 266.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8608264462809917, + "eval_loss": 0.45848509669303894, + "eval_precision": 0.868, + "eval_recall": 0.8537704918032787, + "eval_runtime": 2.4818, + "eval_samples_per_second": 890.889, + "eval_steps_per_second": 0.806, + "step": 667 + }, + { + "epoch": 267.2, + "grad_norm": 208482.078125, + "learning_rate": 7.553143374038897e-07, + "loss": 0.4537, + "step": 668 + }, + { + "epoch": 267.6, + "grad_norm": 236345.125, + "learning_rate": 7.564450474898236e-07, + "loss": 0.453, + "step": 669 + }, + { + "epoch": 268.0, + "grad_norm": 292772.625, + "learning_rate": 7.575757575757576e-07, + "loss": 0.4647, + "step": 670 + }, + { + "epoch": 268.0, + "eval_accuracy": 0.8109452736318408, + "eval_f1": 0.8622280817402769, + "eval_loss": 0.4580378532409668, + "eval_precision": 0.8667992047713717, + "eval_recall": 0.8577049180327869, + "eval_runtime": 2.4888, + "eval_samples_per_second": 888.375, + "eval_steps_per_second": 0.804, + "step": 670 + }, + { + "epoch": 268.4, + "grad_norm": 160654.484375, + "learning_rate": 7.587064676616915e-07, + "loss": 0.4575, + "step": 671 + }, + { + "epoch": 268.8, + "grad_norm": 357645.78125, + "learning_rate": 7.598371777476256e-07, + "loss": 0.4461, + "step": 672 + }, + { + "epoch": 268.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8612850082372323, + "eval_loss": 0.4577026665210724, + "eval_precision": 0.8655629139072848, + "eval_recall": 0.8570491803278688, + "eval_runtime": 2.4879, + "eval_samples_per_second": 888.698, + "eval_steps_per_second": 0.804, + "step": 672 + }, + { + "epoch": 269.2, + "grad_norm": 271843.90625, + "learning_rate": 7.609678878335595e-07, + "loss": 0.4547, + "step": 673 + }, + { + "epoch": 269.6, + "grad_norm": 162252.40625, + "learning_rate": 7.620985979194934e-07, + "loss": 0.4448, + "step": 674 + }, + { + "epoch": 270.0, + "grad_norm": 178901.796875, + "learning_rate": 7.632293080054274e-07, + "loss": 0.4622, + "step": 675 + }, + { + "epoch": 270.0, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8609184010571523, + "eval_loss": 0.457194447517395, + "eval_precision": 0.8675099866844208, + "eval_recall": 0.8544262295081967, + "eval_runtime": 2.4955, + "eval_samples_per_second": 885.979, + "eval_steps_per_second": 0.801, + "step": 675 + }, + { + "epoch": 270.4, + "grad_norm": 144960.828125, + "learning_rate": 7.643600180913614e-07, + "loss": 0.4438, + "step": 676 + }, + { + "epoch": 270.8, + "grad_norm": 418054.65625, + "learning_rate": 7.654907281772954e-07, + "loss": 0.469, + "step": 677 + }, + { + "epoch": 270.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8608264462809917, + "eval_loss": 0.4568876028060913, + "eval_precision": 0.868, + "eval_recall": 0.8537704918032787, + "eval_runtime": 2.7518, + "eval_samples_per_second": 803.477, + "eval_steps_per_second": 0.727, + "step": 677 + }, + { + "epoch": 271.2, + "grad_norm": 319575.09375, + "learning_rate": 7.666214382632293e-07, + "loss": 0.4496, + "step": 678 + }, + { + "epoch": 271.6, + "grad_norm": 186134.78125, + "learning_rate": 7.677521483491633e-07, + "loss": 0.4411, + "step": 679 + }, + { + "epoch": 272.0, + "grad_norm": 463792.46875, + "learning_rate": 7.688828584350972e-07, + "loss": 0.4565, + "step": 680 + }, + { + "epoch": 272.0, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8600727753886868, + "eval_loss": 0.4564747214317322, + "eval_precision": 0.8678237650200267, + "eval_recall": 0.8524590163934426, + "eval_runtime": 2.4859, + "eval_samples_per_second": 889.399, + "eval_steps_per_second": 0.805, + "step": 680 + }, + { + "epoch": 272.4, + "grad_norm": 199833.953125, + "learning_rate": 7.700135685210312e-07, + "loss": 0.4558, + "step": 681 + }, + { + "epoch": 272.8, + "grad_norm": 175922.890625, + "learning_rate": 7.711442786069652e-07, + "loss": 0.4499, + "step": 682 + }, + { + "epoch": 272.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8586595885865959, + "eval_loss": 0.45632436871528625, + "eval_precision": 0.8690396239086635, + "eval_recall": 0.8485245901639344, + "eval_runtime": 2.4993, + "eval_samples_per_second": 884.652, + "eval_steps_per_second": 0.8, + "step": 682 + }, + { + "epoch": 273.2, + "grad_norm": 388655.3125, + "learning_rate": 7.722749886928991e-07, + "loss": 0.4376, + "step": 683 + }, + { + "epoch": 273.6, + "grad_norm": 174775.125, + "learning_rate": 7.734056987788331e-07, + "loss": 0.4521, + "step": 684 + }, + { + "epoch": 274.0, + "grad_norm": 256503.71875, + "learning_rate": 7.745364088647671e-07, + "loss": 0.454, + "step": 685 + }, + { + "epoch": 274.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8582781456953642, + "eval_loss": 0.4561382830142975, + "eval_precision": 0.8668896321070234, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.484, + "eval_samples_per_second": 890.09, + "eval_steps_per_second": 0.805, + "step": 685 + }, + { + "epoch": 274.4, + "grad_norm": 201554.96875, + "learning_rate": 7.756671189507011e-07, + "loss": 0.459, + "step": 686 + }, + { + "epoch": 274.8, + "grad_norm": 141052.90625, + "learning_rate": 7.76797829036635e-07, + "loss": 0.4512, + "step": 687 + }, + { + "epoch": 274.8, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8575215374420145, + "eval_loss": 0.4558574855327606, + "eval_precision": 0.8667113194909578, + "eval_recall": 0.8485245901639344, + "eval_runtime": 2.4965, + "eval_samples_per_second": 885.65, + "eval_steps_per_second": 0.801, + "step": 687 + }, + { + "epoch": 275.2, + "grad_norm": 445788.65625, + "learning_rate": 7.779285391225689e-07, + "loss": 0.4599, + "step": 688 + }, + { + "epoch": 275.6, + "grad_norm": 169196.609375, + "learning_rate": 7.79059249208503e-07, + "loss": 0.4564, + "step": 689 + }, + { + "epoch": 276.0, + "grad_norm": 488866.34375, + "learning_rate": 7.801899592944369e-07, + "loss": 0.452, + "step": 690 + }, + { + "epoch": 276.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8574270557029178, + "eval_loss": 0.4553672671318054, + "eval_precision": 0.8672032193158954, + "eval_recall": 0.8478688524590164, + "eval_runtime": 2.4637, + "eval_samples_per_second": 897.448, + "eval_steps_per_second": 0.812, + "step": 690 + }, + { + "epoch": 276.4, + "grad_norm": 291939.8125, + "learning_rate": 7.813206693803709e-07, + "loss": 0.449, + "step": 691 + }, + { + "epoch": 276.8, + "grad_norm": 281883.40625, + "learning_rate": 7.824513794663048e-07, + "loss": 0.4611, + "step": 692 + }, + { + "epoch": 276.8, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8576169930302024, + "eval_loss": 0.4550659656524658, + "eval_precision": 0.8682795698924731, + "eval_recall": 0.8472131147540983, + "eval_runtime": 2.5462, + "eval_samples_per_second": 868.348, + "eval_steps_per_second": 0.785, + "step": 692 + }, + { + "epoch": 277.2, + "grad_norm": 232455.921875, + "learning_rate": 7.835820895522388e-07, + "loss": 0.4474, + "step": 693 + }, + { + "epoch": 277.6, + "grad_norm": 251892.421875, + "learning_rate": 7.847127996381728e-07, + "loss": 0.4425, + "step": 694 + }, + { + "epoch": 278.0, + "grad_norm": 288920.28125, + "learning_rate": 7.858435097241068e-07, + "loss": 0.4766, + "step": 695 + }, + { + "epoch": 278.0, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8576169930302024, + "eval_loss": 0.4543924033641815, + "eval_precision": 0.8682795698924731, + "eval_recall": 0.8472131147540983, + "eval_runtime": 2.4754, + "eval_samples_per_second": 893.203, + "eval_steps_per_second": 0.808, + "step": 695 + }, + { + "epoch": 278.4, + "grad_norm": 217284.390625, + "learning_rate": 7.869742198100407e-07, + "loss": 0.4464, + "step": 696 + }, + { + "epoch": 278.8, + "grad_norm": 250549.03125, + "learning_rate": 7.881049298959746e-07, + "loss": 0.4466, + "step": 697 + }, + { + "epoch": 278.8, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.8592249089102352, + "eval_loss": 0.4539850354194641, + "eval_precision": 0.8681392235609103, + "eval_recall": 0.8504918032786886, + "eval_runtime": 2.7376, + "eval_samples_per_second": 807.643, + "eval_steps_per_second": 0.731, + "step": 697 + }, + { + "epoch": 279.2, + "grad_norm": 380141.5, + "learning_rate": 7.892356399819087e-07, + "loss": 0.4728, + "step": 698 + }, + { + "epoch": 279.6, + "grad_norm": 174293.578125, + "learning_rate": 7.903663500678426e-07, + "loss": 0.4528, + "step": 699 + }, + { + "epoch": 280.0, + "grad_norm": 321556.21875, + "learning_rate": 7.914970601537766e-07, + "loss": 0.4473, + "step": 700 + }, + { + "epoch": 280.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8577127659574468, + "eval_loss": 0.45340919494628906, + "eval_precision": 0.8698583951449764, + "eval_recall": 0.8459016393442623, + "eval_runtime": 2.4835, + "eval_samples_per_second": 890.262, + "eval_steps_per_second": 0.805, + "step": 700 + }, + { + "epoch": 280.4, + "grad_norm": 205691.4375, + "learning_rate": 7.926277702397105e-07, + "loss": 0.4585, + "step": 701 + }, + { + "epoch": 280.8, + "grad_norm": 168076.984375, + "learning_rate": 7.937584803256445e-07, + "loss": 0.4557, + "step": 702 + }, + { + "epoch": 280.8, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8579980046558031, + "eval_loss": 0.4532695412635803, + "eval_precision": 0.8704453441295547, + "eval_recall": 0.8459016393442623, + "eval_runtime": 2.4784, + "eval_samples_per_second": 892.105, + "eval_steps_per_second": 0.807, + "step": 702 + }, + { + "epoch": 281.2, + "grad_norm": 173831.6875, + "learning_rate": 7.948891904115785e-07, + "loss": 0.4559, + "step": 703 + }, + { + "epoch": 281.6, + "grad_norm": 411432.46875, + "learning_rate": 7.960199004975124e-07, + "loss": 0.4447, + "step": 704 + }, + { + "epoch": 282.0, + "grad_norm": 275648.09375, + "learning_rate": 7.971506105834464e-07, + "loss": 0.4735, + "step": 705 + }, + { + "epoch": 282.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8568575233022636, + "eval_loss": 0.45308932662010193, + "eval_precision": 0.8701825557809331, + "eval_recall": 0.8439344262295082, + "eval_runtime": 2.5003, + "eval_samples_per_second": 884.299, + "eval_steps_per_second": 0.8, + "step": 705 + }, + { + "epoch": 282.4, + "grad_norm": 193838.4375, + "learning_rate": 7.982813206693803e-07, + "loss": 0.45, + "step": 706 + }, + { + "epoch": 282.8, + "grad_norm": 552722.875, + "learning_rate": 7.994120307553144e-07, + "loss": 0.4429, + "step": 707 + }, + { + "epoch": 282.8, + "eval_accuracy": 0.8037087290818634, + "eval_f1": 0.8554297135243171, + "eval_loss": 0.4530775249004364, + "eval_precision": 0.8693297224102912, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.4784, + "eval_samples_per_second": 892.104, + "eval_steps_per_second": 0.807, + "step": 707 + }, + { + "epoch": 283.2, + "grad_norm": 254215.5625, + "learning_rate": 8.005427408412483e-07, + "loss": 0.4425, + "step": 708 + }, + { + "epoch": 283.6, + "grad_norm": 401322.15625, + "learning_rate": 8.016734509271823e-07, + "loss": 0.4462, + "step": 709 + }, + { + "epoch": 284.0, + "grad_norm": 301335.75, + "learning_rate": 8.028041610131162e-07, + "loss": 0.4607, + "step": 710 + }, + { + "epoch": 284.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8579920739762219, + "eval_loss": 0.4528961777687073, + "eval_precision": 0.8642714570858283, + "eval_recall": 0.8518032786885246, + "eval_runtime": 2.4858, + "eval_samples_per_second": 889.468, + "eval_steps_per_second": 0.805, + "step": 710 + }, + { + "epoch": 284.4, + "grad_norm": 184265.625, + "learning_rate": 8.039348710990501e-07, + "loss": 0.4458, + "step": 711 + }, + { + "epoch": 284.8, + "grad_norm": 194442.703125, + "learning_rate": 8.050655811849842e-07, + "loss": 0.4494, + "step": 712 + }, + { + "epoch": 284.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8603425559947299, + "eval_loss": 0.4529063105583191, + "eval_precision": 0.8643282594308405, + "eval_recall": 0.8563934426229508, + "eval_runtime": 2.4859, + "eval_samples_per_second": 889.405, + "eval_steps_per_second": 0.805, + "step": 712 + }, + { + "epoch": 285.2, + "grad_norm": 716429.4375, + "learning_rate": 8.061962912709181e-07, + "loss": 0.4722, + "step": 713 + }, + { + "epoch": 285.6, + "grad_norm": 286030.75, + "learning_rate": 8.073270013568521e-07, + "loss": 0.4399, + "step": 714 + }, + { + "epoch": 286.0, + "grad_norm": 342507.28125, + "learning_rate": 8.08457711442786e-07, + "loss": 0.4445, + "step": 715 + }, + { + "epoch": 286.0, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8588429752066116, + "eval_loss": 0.45232611894607544, + "eval_precision": 0.866, + "eval_recall": 0.8518032786885246, + "eval_runtime": 2.497, + "eval_samples_per_second": 885.462, + "eval_steps_per_second": 0.801, + "step": 715 + }, + { + "epoch": 286.4, + "grad_norm": 388690.28125, + "learning_rate": 8.095884215287201e-07, + "loss": 0.4569, + "step": 716 + }, + { + "epoch": 286.8, + "grad_norm": 154744.421875, + "learning_rate": 8.10719131614654e-07, + "loss": 0.4527, + "step": 717 + }, + { + "epoch": 286.8, + "eval_accuracy": 0.8046132971506106, + "eval_f1": 0.856, + "eval_loss": 0.45205047726631165, + "eval_precision": 0.8705084745762712, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.7464, + "eval_samples_per_second": 805.048, + "eval_steps_per_second": 0.728, + "step": 717 + }, + { + "epoch": 287.2, + "grad_norm": 502549.1875, + "learning_rate": 8.118498417005879e-07, + "loss": 0.4346, + "step": 718 + }, + { + "epoch": 287.6, + "grad_norm": 359308.96875, + "learning_rate": 8.129805517865219e-07, + "loss": 0.4418, + "step": 719 + }, + { + "epoch": 288.0, + "grad_norm": 334255.90625, + "learning_rate": 8.141112618724558e-07, + "loss": 0.4498, + "step": 720 + }, + { + "epoch": 288.0, + "eval_accuracy": 0.8046132971506106, + "eval_f1": 0.8557114228456913, + "eval_loss": 0.4517686367034912, + "eval_precision": 0.8720217835262083, + "eval_recall": 0.84, + "eval_runtime": 2.4856, + "eval_samples_per_second": 889.508, + "eval_steps_per_second": 0.805, + "step": 720 + }, + { + "epoch": 288.4, + "grad_norm": 148530.125, + "learning_rate": 8.152419719583899e-07, + "loss": 0.4593, + "step": 721 + }, + { + "epoch": 288.8, + "grad_norm": 621375.5, + "learning_rate": 8.163726820443238e-07, + "loss": 0.4323, + "step": 722 + }, + { + "epoch": 288.8, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8574283810792804, + "eval_loss": 0.45146864652633667, + "eval_precision": 0.8713608666215301, + "eval_recall": 0.8439344262295082, + "eval_runtime": 2.4831, + "eval_samples_per_second": 890.407, + "eval_steps_per_second": 0.805, + "step": 722 + }, + { + "epoch": 289.2, + "grad_norm": 203011.203125, + "learning_rate": 8.175033921302578e-07, + "loss": 0.4706, + "step": 723 + }, + { + "epoch": 289.6, + "grad_norm": 279329.875, + "learning_rate": 8.186341022161917e-07, + "loss": 0.4463, + "step": 724 + }, + { + "epoch": 290.0, + "grad_norm": 474836.5, + "learning_rate": 8.197648123021257e-07, + "loss": 0.4355, + "step": 725 + }, + { + "epoch": 290.0, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8587495865034733, + "eval_loss": 0.45117512345314026, + "eval_precision": 0.8664886515353805, + "eval_recall": 0.8511475409836066, + "eval_runtime": 2.4895, + "eval_samples_per_second": 888.146, + "eval_steps_per_second": 0.803, + "step": 725 + }, + { + "epoch": 290.4, + "grad_norm": 288094.40625, + "learning_rate": 8.208955223880597e-07, + "loss": 0.4434, + "step": 726 + }, + { + "epoch": 290.8, + "grad_norm": 199928.875, + "learning_rate": 8.220262324739936e-07, + "loss": 0.4474, + "step": 727 + }, + { + "epoch": 290.8, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8588429752066116, + "eval_loss": 0.4510100781917572, + "eval_precision": 0.866, + "eval_recall": 0.8518032786885246, + "eval_runtime": 2.4858, + "eval_samples_per_second": 889.437, + "eval_steps_per_second": 0.805, + "step": 727 + }, + { + "epoch": 291.2, + "grad_norm": 716672.375, + "learning_rate": 8.231569425599276e-07, + "loss": 0.4535, + "step": 728 + }, + { + "epoch": 291.6, + "grad_norm": 160894.828125, + "learning_rate": 8.242876526458615e-07, + "loss": 0.4362, + "step": 729 + }, + { + "epoch": 292.0, + "grad_norm": 280799.0, + "learning_rate": 8.254183627317956e-07, + "loss": 0.4412, + "step": 730 + }, + { + "epoch": 292.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8582781456953642, + "eval_loss": 0.4507199227809906, + "eval_precision": 0.8668896321070234, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.4537, + "eval_samples_per_second": 901.097, + "eval_steps_per_second": 0.815, + "step": 730 + }, + { + "epoch": 292.4, + "grad_norm": 277298.8125, + "learning_rate": 8.265490728177295e-07, + "loss": 0.4537, + "step": 731 + }, + { + "epoch": 292.8, + "grad_norm": 388103.375, + "learning_rate": 8.276797829036635e-07, + "loss": 0.4477, + "step": 732 + }, + { + "epoch": 292.8, + "eval_accuracy": 0.8050655811849842, + "eval_f1": 0.856953202787919, + "eval_loss": 0.45069363713264465, + "eval_precision": 0.8676075268817204, + "eval_recall": 0.8465573770491803, + "eval_runtime": 2.4747, + "eval_samples_per_second": 893.431, + "eval_steps_per_second": 0.808, + "step": 732 + }, + { + "epoch": 293.2, + "grad_norm": 334652.25, + "learning_rate": 8.288104929895974e-07, + "loss": 0.4465, + "step": 733 + }, + { + "epoch": 293.6, + "grad_norm": 178698.75, + "learning_rate": 8.299412030755314e-07, + "loss": 0.4471, + "step": 734 + }, + { + "epoch": 294.0, + "grad_norm": 185949.140625, + "learning_rate": 8.310719131614654e-07, + "loss": 0.4494, + "step": 735 + }, + { + "epoch": 294.0, + "eval_accuracy": 0.8050655811849842, + "eval_f1": 0.8565723793677205, + "eval_loss": 0.4504994750022888, + "eval_precision": 0.8695945945945946, + "eval_recall": 0.8439344262295082, + "eval_runtime": 2.4672, + "eval_samples_per_second": 896.159, + "eval_steps_per_second": 0.811, + "step": 735 + }, + { + "epoch": 294.4, + "grad_norm": 188772.703125, + "learning_rate": 8.322026232473993e-07, + "loss": 0.4538, + "step": 736 + }, + { + "epoch": 294.8, + "grad_norm": 233858.3125, + "learning_rate": 8.333333333333334e-07, + "loss": 0.434, + "step": 737 + }, + { + "epoch": 294.8, + "eval_accuracy": 0.8046132971506106, + "eval_f1": 0.8565737051792829, + "eval_loss": 0.45057186484336853, + "eval_precision": 0.867518493611298, + "eval_recall": 0.8459016393442623, + "eval_runtime": 2.7356, + "eval_samples_per_second": 808.243, + "eval_steps_per_second": 0.731, + "step": 737 + }, + { + "epoch": 295.2, + "grad_norm": 204070.578125, + "learning_rate": 8.344640434192674e-07, + "loss": 0.4394, + "step": 738 + }, + { + "epoch": 295.6, + "grad_norm": 266195.5, + "learning_rate": 8.355947535052014e-07, + "loss": 0.449, + "step": 739 + }, + { + "epoch": 296.0, + "grad_norm": 242642.90625, + "learning_rate": 8.367254635911353e-07, + "loss": 0.4444, + "step": 740 + }, + { + "epoch": 296.0, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8590293826345329, + "eval_loss": 0.4508047103881836, + "eval_precision": 0.8650265957446809, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.4739, + "eval_samples_per_second": 893.734, + "eval_steps_per_second": 0.808, + "step": 740 + }, + { + "epoch": 296.4, + "grad_norm": 173511.9375, + "learning_rate": 8.378561736770692e-07, + "loss": 0.4459, + "step": 741 + }, + { + "epoch": 296.8, + "grad_norm": 601479.3125, + "learning_rate": 8.389868837630032e-07, + "loss": 0.4412, + "step": 742 + }, + { + "epoch": 296.8, + "eval_accuracy": 0.8050655811849842, + "eval_f1": 0.85742639761826, + "eval_loss": 0.4503806531429291, + "eval_precision": 0.8651535380507344, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.5041, + "eval_samples_per_second": 882.942, + "eval_steps_per_second": 0.799, + "step": 742 + }, + { + "epoch": 297.2, + "grad_norm": 255373.75, + "learning_rate": 8.401175938489372e-07, + "loss": 0.4429, + "step": 743 + }, + { + "epoch": 297.6, + "grad_norm": 676720.6875, + "learning_rate": 8.412483039348712e-07, + "loss": 0.4622, + "step": 744 + }, + { + "epoch": 298.0, + "grad_norm": 455248.3125, + "learning_rate": 8.423790140208051e-07, + "loss": 0.4419, + "step": 745 + }, + { + "epoch": 298.0, + "eval_accuracy": 0.8032564450474898, + "eval_f1": 0.8554336989032901, + "eval_loss": 0.4496366083621979, + "eval_precision": 0.8672506738544474, + "eval_recall": 0.8439344262295082, + "eval_runtime": 2.5318, + "eval_samples_per_second": 873.302, + "eval_steps_per_second": 0.79, + "step": 745 + }, + { + "epoch": 298.4, + "grad_norm": 271670.5625, + "learning_rate": 8.435097241067391e-07, + "loss": 0.4441, + "step": 746 + }, + { + "epoch": 298.8, + "grad_norm": 596610.6875, + "learning_rate": 8.446404341926731e-07, + "loss": 0.4413, + "step": 747 + }, + { + "epoch": 298.8, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8567612687813022, + "eval_loss": 0.449266642332077, + "eval_precision": 0.8727891156462585, + "eval_recall": 0.8413114754098361, + "eval_runtime": 2.4865, + "eval_samples_per_second": 889.204, + "eval_steps_per_second": 0.804, + "step": 747 + }, + { + "epoch": 299.2, + "grad_norm": 218748.34375, + "learning_rate": 8.45771144278607e-07, + "loss": 0.461, + "step": 748 + }, + { + "epoch": 299.6, + "grad_norm": 225671.28125, + "learning_rate": 8.46901854364541e-07, + "loss": 0.4505, + "step": 749 + }, + { + "epoch": 300.0, + "grad_norm": 1034110.625, + "learning_rate": 8.480325644504749e-07, + "loss": 0.4252, + "step": 750 + }, + { + "epoch": 300.0, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8581939799331104, + "eval_loss": 0.4484832286834717, + "eval_precision": 0.8757679180887372, + "eval_recall": 0.8413114754098361, + "eval_runtime": 2.4895, + "eval_samples_per_second": 888.143, + "eval_steps_per_second": 0.803, + "step": 750 + }, + { + "epoch": 300.4, + "grad_norm": 272961.8125, + "learning_rate": 8.49163274536409e-07, + "loss": 0.4359, + "step": 751 + }, + { + "epoch": 300.8, + "grad_norm": 272969.71875, + "learning_rate": 8.502939846223429e-07, + "loss": 0.4461, + "step": 752 + }, + { + "epoch": 300.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8582834331337326, + "eval_loss": 0.44803541898727417, + "eval_precision": 0.8710330857528696, + "eval_recall": 0.8459016393442623, + "eval_runtime": 2.4836, + "eval_samples_per_second": 890.241, + "eval_steps_per_second": 0.805, + "step": 752 + }, + { + "epoch": 301.2, + "grad_norm": 304389.1875, + "learning_rate": 8.514246947082769e-07, + "loss": 0.4285, + "step": 753 + }, + { + "epoch": 301.6, + "grad_norm": 278434.9375, + "learning_rate": 8.525554047942108e-07, + "loss": 0.4466, + "step": 754 + }, + { + "epoch": 302.0, + "grad_norm": 393407.625, + "learning_rate": 8.536861148801447e-07, + "loss": 0.4461, + "step": 755 + }, + { + "epoch": 302.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8578982154659617, + "eval_loss": 0.44794103503227234, + "eval_precision": 0.8647568287808128, + "eval_recall": 0.8511475409836066, + "eval_runtime": 2.4855, + "eval_samples_per_second": 889.564, + "eval_steps_per_second": 0.805, + "step": 755 + }, + { + "epoch": 302.4, + "grad_norm": 293303.28125, + "learning_rate": 8.548168249660788e-07, + "loss": 0.4397, + "step": 756 + }, + { + "epoch": 302.8, + "grad_norm": 304587.4375, + "learning_rate": 8.559475350520127e-07, + "loss": 0.439, + "step": 757 + }, + { + "epoch": 302.8, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8574277168494516, + "eval_loss": 0.44752252101898193, + "eval_precision": 0.8692722371967655, + "eval_recall": 0.8459016393442623, + "eval_runtime": 2.7927, + "eval_samples_per_second": 791.708, + "eval_steps_per_second": 0.716, + "step": 757 + }, + { + "epoch": 303.2, + "grad_norm": 353619.46875, + "learning_rate": 8.570782451379467e-07, + "loss": 0.4612, + "step": 758 + }, + { + "epoch": 303.6, + "grad_norm": 143032.515625, + "learning_rate": 8.582089552238806e-07, + "loss": 0.4425, + "step": 759 + }, + { + "epoch": 304.0, + "grad_norm": 320153.09375, + "learning_rate": 8.593396653098147e-07, + "loss": 0.4339, + "step": 760 + }, + { + "epoch": 304.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8556077904633983, + "eval_loss": 0.4476761221885681, + "eval_precision": 0.8768066070199587, + "eval_recall": 0.8354098360655737, + "eval_runtime": 2.5004, + "eval_samples_per_second": 884.272, + "eval_steps_per_second": 0.8, + "step": 760 + }, + { + "epoch": 304.4, + "grad_norm": 892651.375, + "learning_rate": 8.604703753957486e-07, + "loss": 0.4247, + "step": 761 + }, + { + "epoch": 304.8, + "grad_norm": 328343.9375, + "learning_rate": 8.616010854816825e-07, + "loss": 0.4584, + "step": 762 + }, + { + "epoch": 304.8, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8559946416610851, + "eval_loss": 0.44757744669914246, + "eval_precision": 0.8747433264887063, + "eval_recall": 0.8380327868852459, + "eval_runtime": 2.4799, + "eval_samples_per_second": 891.573, + "eval_steps_per_second": 0.806, + "step": 762 + }, + { + "epoch": 305.2, + "grad_norm": 264985.78125, + "learning_rate": 8.627317955676165e-07, + "loss": 0.4384, + "step": 763 + }, + { + "epoch": 305.6, + "grad_norm": 611297.875, + "learning_rate": 8.638625056535504e-07, + "loss": 0.4311, + "step": 764 + }, + { + "epoch": 306.0, + "grad_norm": 358614.375, + "learning_rate": 8.649932157394845e-07, + "loss": 0.4356, + "step": 765 + }, + { + "epoch": 306.0, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8582807832724859, + "eval_loss": 0.44771361351013184, + "eval_precision": 0.8689516129032258, + "eval_recall": 0.8478688524590164, + "eval_runtime": 2.4965, + "eval_samples_per_second": 885.654, + "eval_steps_per_second": 0.801, + "step": 765 + }, + { + "epoch": 306.4, + "grad_norm": 239071.453125, + "learning_rate": 8.661239258254184e-07, + "loss": 0.4452, + "step": 766 + }, + { + "epoch": 306.8, + "grad_norm": 157339.9375, + "learning_rate": 8.672546359113524e-07, + "loss": 0.4458, + "step": 767 + }, + { + "epoch": 306.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8586595885865959, + "eval_loss": 0.44780877232551575, + "eval_precision": 0.8690396239086635, + "eval_recall": 0.8485245901639344, + "eval_runtime": 2.4974, + "eval_samples_per_second": 885.316, + "eval_steps_per_second": 0.801, + "step": 767 + }, + { + "epoch": 307.2, + "grad_norm": 360731.90625, + "learning_rate": 8.683853459972863e-07, + "loss": 0.4412, + "step": 768 + }, + { + "epoch": 307.6, + "grad_norm": 211905.0625, + "learning_rate": 8.695160560832204e-07, + "loss": 0.4283, + "step": 769 + }, + { + "epoch": 308.0, + "grad_norm": 347296.40625, + "learning_rate": 8.706467661691543e-07, + "loss": 0.4341, + "step": 770 + }, + { + "epoch": 308.0, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8593231585932316, + "eval_loss": 0.44787055253982544, + "eval_precision": 0.8697112155809268, + "eval_recall": 0.8491803278688524, + "eval_runtime": 2.4367, + "eval_samples_per_second": 907.387, + "eval_steps_per_second": 0.821, + "step": 770 + }, + { + "epoch": 308.4, + "grad_norm": 204874.46875, + "learning_rate": 8.717774762550882e-07, + "loss": 0.444, + "step": 771 + }, + { + "epoch": 308.8, + "grad_norm": 190759.3125, + "learning_rate": 8.729081863410222e-07, + "loss": 0.4425, + "step": 772 + }, + { + "epoch": 308.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8583776595744681, + "eval_loss": 0.4473639726638794, + "eval_precision": 0.8705327039784221, + "eval_recall": 0.8465573770491803, + "eval_runtime": 2.4746, + "eval_samples_per_second": 893.463, + "eval_steps_per_second": 0.808, + "step": 772 + }, + { + "epoch": 309.2, + "grad_norm": 354653.28125, + "learning_rate": 8.740388964269561e-07, + "loss": 0.4473, + "step": 773 + }, + { + "epoch": 309.6, + "grad_norm": 308037.875, + "learning_rate": 8.751696065128902e-07, + "loss": 0.4401, + "step": 774 + }, + { + "epoch": 310.0, + "grad_norm": 231459.5, + "learning_rate": 8.763003165988241e-07, + "loss": 0.4371, + "step": 775 + }, + { + "epoch": 310.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8570474281897128, + "eval_loss": 0.4467235207557678, + "eval_precision": 0.8733832539142273, + "eval_recall": 0.8413114754098361, + "eval_runtime": 2.4762, + "eval_samples_per_second": 892.902, + "eval_steps_per_second": 0.808, + "step": 775 + }, + { + "epoch": 310.4, + "grad_norm": 266996.40625, + "learning_rate": 8.774310266847581e-07, + "loss": 0.44, + "step": 776 + }, + { + "epoch": 310.8, + "grad_norm": 246007.703125, + "learning_rate": 8.78561736770692e-07, + "loss": 0.4479, + "step": 777 + }, + { + "epoch": 310.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8581890812250332, + "eval_loss": 0.4461815059185028, + "eval_precision": 0.8715348208248817, + "eval_recall": 0.8452459016393442, + "eval_runtime": 2.7606, + "eval_samples_per_second": 800.924, + "eval_steps_per_second": 0.724, + "step": 777 + }, + { + "epoch": 311.2, + "grad_norm": 205842.75, + "learning_rate": 8.79692446856626e-07, + "loss": 0.4448, + "step": 778 + }, + { + "epoch": 311.6, + "grad_norm": 387395.53125, + "learning_rate": 8.8082315694256e-07, + "loss": 0.4485, + "step": 779 + }, + { + "epoch": 312.0, + "grad_norm": 506083.21875, + "learning_rate": 8.819538670284939e-07, + "loss": 0.4201, + "step": 780 + }, + { + "epoch": 312.0, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8594217347956131, + "eval_loss": 0.44546598196029663, + "eval_precision": 0.8712938005390836, + "eval_recall": 0.8478688524590164, + "eval_runtime": 2.4838, + "eval_samples_per_second": 890.181, + "eval_steps_per_second": 0.805, + "step": 780 + }, + { + "epoch": 312.4, + "grad_norm": 338485.8125, + "learning_rate": 8.830845771144279e-07, + "loss": 0.4473, + "step": 781 + }, + { + "epoch": 312.8, + "grad_norm": 236398.390625, + "learning_rate": 8.842152872003618e-07, + "loss": 0.4329, + "step": 782 + }, + { + "epoch": 312.8, + "eval_accuracy": 0.8100407055630936, + "eval_f1": 0.8603723404255319, + "eval_loss": 0.44510501623153687, + "eval_precision": 0.8725556304787593, + "eval_recall": 0.8485245901639344, + "eval_runtime": 2.4764, + "eval_samples_per_second": 892.815, + "eval_steps_per_second": 0.808, + "step": 782 + }, + { + "epoch": 313.2, + "grad_norm": 526343.9375, + "learning_rate": 8.853459972862959e-07, + "loss": 0.4215, + "step": 783 + }, + { + "epoch": 313.6, + "grad_norm": 723854.5, + "learning_rate": 8.864767073722298e-07, + "loss": 0.4594, + "step": 784 + }, + { + "epoch": 314.0, + "grad_norm": 460697.375, + "learning_rate": 8.876074174581637e-07, + "loss": 0.4507, + "step": 785 + }, + { + "epoch": 314.0, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.8598937583001328, + "eval_loss": 0.4448266923427582, + "eval_precision": 0.8708809683927371, + "eval_recall": 0.8491803278688524, + "eval_runtime": 2.5276, + "eval_samples_per_second": 874.755, + "eval_steps_per_second": 0.791, + "step": 785 + }, + { + "epoch": 314.4, + "grad_norm": 228980.5625, + "learning_rate": 8.887381275440977e-07, + "loss": 0.4393, + "step": 786 + }, + { + "epoch": 314.8, + "grad_norm": 210040.71875, + "learning_rate": 8.898688376300317e-07, + "loss": 0.4404, + "step": 787 + }, + { + "epoch": 314.8, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8586702305379218, + "eval_loss": 0.44456908106803894, + "eval_precision": 0.8753405994550408, + "eval_recall": 0.8426229508196721, + "eval_runtime": 2.4812, + "eval_samples_per_second": 891.085, + "eval_steps_per_second": 0.806, + "step": 787 + }, + { + "epoch": 315.2, + "grad_norm": 196176.171875, + "learning_rate": 8.909995477159657e-07, + "loss": 0.4306, + "step": 788 + }, + { + "epoch": 315.6, + "grad_norm": 244933.984375, + "learning_rate": 8.921302578018996e-07, + "loss": 0.4459, + "step": 789 + }, + { + "epoch": 316.0, + "grad_norm": 247792.78125, + "learning_rate": 8.932609678878336e-07, + "loss": 0.4348, + "step": 790 + }, + { + "epoch": 316.0, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8586666666666667, + "eval_loss": 0.44440197944641113, + "eval_precision": 0.8732203389830508, + "eval_recall": 0.8445901639344262, + "eval_runtime": 2.4753, + "eval_samples_per_second": 893.239, + "eval_steps_per_second": 0.808, + "step": 790 + }, + { + "epoch": 316.4, + "grad_norm": 258818.28125, + "learning_rate": 8.943916779737676e-07, + "loss": 0.4397, + "step": 791 + }, + { + "epoch": 316.8, + "grad_norm": 222210.265625, + "learning_rate": 8.955223880597015e-07, + "loss": 0.4433, + "step": 792 + }, + { + "epoch": 316.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8589487691284099, + "eval_loss": 0.44444018602371216, + "eval_precision": 0.8717083051991897, + "eval_recall": 0.8465573770491803, + "eval_runtime": 2.4822, + "eval_samples_per_second": 890.748, + "eval_steps_per_second": 0.806, + "step": 792 + }, + { + "epoch": 317.2, + "grad_norm": 424005.1875, + "learning_rate": 8.966530981456355e-07, + "loss": 0.4285, + "step": 793 + }, + { + "epoch": 317.6, + "grad_norm": 346626.28125, + "learning_rate": 8.977838082315694e-07, + "loss": 0.455, + "step": 794 + }, + { + "epoch": 318.0, + "grad_norm": 549482.3125, + "learning_rate": 8.989145183175034e-07, + "loss": 0.4431, + "step": 795 + }, + { + "epoch": 318.0, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8578088578088578, + "eval_loss": 0.44444674253463745, + "eval_precision": 0.871447902571042, + "eval_recall": 0.8445901639344262, + "eval_runtime": 2.4759, + "eval_samples_per_second": 892.994, + "eval_steps_per_second": 0.808, + "step": 795 + }, + { + "epoch": 318.4, + "grad_norm": 235853.625, + "learning_rate": 9.000452284034374e-07, + "loss": 0.4365, + "step": 796 + }, + { + "epoch": 318.8, + "grad_norm": 227093.484375, + "learning_rate": 9.011759384893714e-07, + "loss": 0.4447, + "step": 797 + }, + { + "epoch": 318.8, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.8597074468085106, + "eval_loss": 0.444330632686615, + "eval_precision": 0.8718813216453135, + "eval_recall": 0.8478688524590164, + "eval_runtime": 2.7547, + "eval_samples_per_second": 802.631, + "eval_steps_per_second": 0.726, + "step": 797 + }, + { + "epoch": 319.2, + "grad_norm": 260591.75, + "learning_rate": 9.023066485753053e-07, + "loss": 0.4245, + "step": 798 + }, + { + "epoch": 319.6, + "grad_norm": 245414.0625, + "learning_rate": 9.034373586612392e-07, + "loss": 0.4426, + "step": 799 + }, + { + "epoch": 320.0, + "grad_norm": 171731.1875, + "learning_rate": 9.045680687471733e-07, + "loss": 0.4273, + "step": 800 + }, + { + "epoch": 320.0, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.859515111258718, + "eval_loss": 0.44417697191238403, + "eval_precision": 0.8707940780619112, + "eval_recall": 0.8485245901639344, + "eval_runtime": 2.5005, + "eval_samples_per_second": 884.221, + "eval_steps_per_second": 0.8, + "step": 800 + }, + { + "epoch": 320.4, + "grad_norm": 281778.0, + "learning_rate": 9.056987788331072e-07, + "loss": 0.4438, + "step": 801 + }, + { + "epoch": 320.8, + "grad_norm": 356600.5625, + "learning_rate": 9.068294889190412e-07, + "loss": 0.4249, + "step": 802 + }, + { + "epoch": 320.8, + "eval_accuracy": 0.8046132971506106, + "eval_f1": 0.855807743658211, + "eval_loss": 0.44383516907691956, + "eval_precision": 0.8715159755268524, + "eval_recall": 0.8406557377049181, + "eval_runtime": 2.4939, + "eval_samples_per_second": 886.558, + "eval_steps_per_second": 0.802, + "step": 802 + }, + { + "epoch": 321.2, + "grad_norm": 270931.4375, + "learning_rate": 9.079601990049751e-07, + "loss": 0.4342, + "step": 803 + }, + { + "epoch": 321.6, + "grad_norm": 233657.328125, + "learning_rate": 9.090909090909091e-07, + "loss": 0.4388, + "step": 804 + }, + { + "epoch": 322.0, + "grad_norm": 856584.8125, + "learning_rate": 9.102216191768431e-07, + "loss": 0.4207, + "step": 805 + }, + { + "epoch": 322.0, + "eval_accuracy": 0.8050655811849842, + "eval_f1": 0.8551260504201681, + "eval_loss": 0.44370147585868835, + "eval_precision": 0.8772413793103448, + "eval_recall": 0.8340983606557377, + "eval_runtime": 2.5132, + "eval_samples_per_second": 879.749, + "eval_steps_per_second": 0.796, + "step": 805 + }, + { + "epoch": 322.4, + "grad_norm": 506108.03125, + "learning_rate": 9.113523292627771e-07, + "loss": 0.4315, + "step": 806 + }, + { + "epoch": 322.8, + "grad_norm": 718130.25, + "learning_rate": 9.12483039348711e-07, + "loss": 0.4254, + "step": 807 + }, + { + "epoch": 322.8, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.85657104736491, + "eval_loss": 0.4434225261211395, + "eval_precision": 0.8716904276985743, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.4817, + "eval_samples_per_second": 890.938, + "eval_steps_per_second": 0.806, + "step": 807 + }, + { + "epoch": 323.2, + "grad_norm": 191962.78125, + "learning_rate": 9.136137494346449e-07, + "loss": 0.4271, + "step": 808 + }, + { + "epoch": 323.6, + "grad_norm": 157046.125, + "learning_rate": 9.14744459520579e-07, + "loss": 0.4257, + "step": 809 + }, + { + "epoch": 324.0, + "grad_norm": 430524.75, + "learning_rate": 9.158751696065129e-07, + "loss": 0.435, + "step": 810 + }, + { + "epoch": 324.0, + "eval_accuracy": 0.8113975576662144, + "eval_f1": 0.8619662363455809, + "eval_loss": 0.44361042976379395, + "eval_precision": 0.8703208556149733, + "eval_recall": 0.8537704918032787, + "eval_runtime": 2.446, + "eval_samples_per_second": 903.92, + "eval_steps_per_second": 0.818, + "step": 810 + }, + { + "epoch": 324.4, + "grad_norm": 620186.8125, + "learning_rate": 9.170058796924469e-07, + "loss": 0.439, + "step": 811 + }, + { + "epoch": 324.8, + "grad_norm": 181647.328125, + "learning_rate": 9.181365897783808e-07, + "loss": 0.4334, + "step": 812 + }, + { + "epoch": 324.8, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.859515111258718, + "eval_loss": 0.4431088864803314, + "eval_precision": 0.8707940780619112, + "eval_recall": 0.8485245901639344, + "eval_runtime": 2.4662, + "eval_samples_per_second": 896.525, + "eval_steps_per_second": 0.811, + "step": 812 + }, + { + "epoch": 325.2, + "grad_norm": 228432.46875, + "learning_rate": 9.192672998643148e-07, + "loss": 0.4313, + "step": 813 + }, + { + "epoch": 325.6, + "grad_norm": 201937.875, + "learning_rate": 9.203980099502488e-07, + "loss": 0.4346, + "step": 814 + }, + { + "epoch": 326.0, + "grad_norm": 201069.640625, + "learning_rate": 9.215287200361827e-07, + "loss": 0.4479, + "step": 815 + }, + { + "epoch": 326.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8562793821356616, + "eval_loss": 0.4427602291107178, + "eval_precision": 0.8774948382656572, + "eval_recall": 0.8360655737704918, + "eval_runtime": 2.5357, + "eval_samples_per_second": 871.96, + "eval_steps_per_second": 0.789, + "step": 815 + }, + { + "epoch": 326.4, + "grad_norm": 402124.9375, + "learning_rate": 9.226594301221167e-07, + "loss": 0.4342, + "step": 816 + }, + { + "epoch": 326.8, + "grad_norm": 306473.46875, + "learning_rate": 9.237901402080506e-07, + "loss": 0.4367, + "step": 817 + }, + { + "epoch": 326.8, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8562814070351759, + "eval_loss": 0.4426453709602356, + "eval_precision": 0.8753424657534247, + "eval_recall": 0.8380327868852459, + "eval_runtime": 2.7534, + "eval_samples_per_second": 803.002, + "eval_steps_per_second": 0.726, + "step": 817 + }, + { + "epoch": 327.2, + "grad_norm": 265891.46875, + "learning_rate": 9.249208502939847e-07, + "loss": 0.4327, + "step": 818 + }, + { + "epoch": 327.6, + "grad_norm": 193623.578125, + "learning_rate": 9.260515603799186e-07, + "loss": 0.4173, + "step": 819 + }, + { + "epoch": 328.0, + "grad_norm": 502849.59375, + "learning_rate": 9.271822704658526e-07, + "loss": 0.4553, + "step": 820 + }, + { + "epoch": 328.0, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.859515111258718, + "eval_loss": 0.4428267776966095, + "eval_precision": 0.8707940780619112, + "eval_recall": 0.8485245901639344, + "eval_runtime": 2.4645, + "eval_samples_per_second": 897.123, + "eval_steps_per_second": 0.812, + "step": 820 + }, + { + "epoch": 328.4, + "grad_norm": 245487.46875, + "learning_rate": 9.283129805517865e-07, + "loss": 0.4275, + "step": 821 + }, + { + "epoch": 328.8, + "grad_norm": 729066.6875, + "learning_rate": 9.294436906377204e-07, + "loss": 0.4454, + "step": 822 + }, + { + "epoch": 328.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8605498509440211, + "eval_loss": 0.4430939853191376, + "eval_precision": 0.8694779116465864, + "eval_recall": 0.8518032786885246, + "eval_runtime": 2.4854, + "eval_samples_per_second": 889.601, + "eval_steps_per_second": 0.805, + "step": 822 + }, + { + "epoch": 329.2, + "grad_norm": 278662.40625, + "learning_rate": 9.305744007236545e-07, + "loss": 0.4243, + "step": 823 + }, + { + "epoch": 329.6, + "grad_norm": 421849.6875, + "learning_rate": 9.317051108095884e-07, + "loss": 0.4384, + "step": 824 + }, + { + "epoch": 330.0, + "grad_norm": 313870.03125, + "learning_rate": 9.328358208955224e-07, + "loss": 0.4321, + "step": 825 + }, + { + "epoch": 330.0, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8568568568568569, + "eval_loss": 0.4423777461051941, + "eval_precision": 0.8722826086956522, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.478, + "eval_samples_per_second": 892.241, + "eval_steps_per_second": 0.807, + "step": 825 + }, + { + "epoch": 330.4, + "grad_norm": 466682.84375, + "learning_rate": 9.339665309814563e-07, + "loss": 0.4349, + "step": 826 + }, + { + "epoch": 330.8, + "grad_norm": 290991.75, + "learning_rate": 9.350972410673904e-07, + "loss": 0.426, + "step": 827 + }, + { + "epoch": 330.8, + "eval_accuracy": 0.8032564450474898, + "eval_f1": 0.8541736506872276, + "eval_loss": 0.44210660457611084, + "eval_precision": 0.8737997256515775, + "eval_recall": 0.8354098360655737, + "eval_runtime": 2.5134, + "eval_samples_per_second": 879.699, + "eval_steps_per_second": 0.796, + "step": 827 + }, + { + "epoch": 331.2, + "grad_norm": 319821.3125, + "learning_rate": 9.362279511533243e-07, + "loss": 0.4397, + "step": 828 + }, + { + "epoch": 331.6, + "grad_norm": 257250.71875, + "learning_rate": 9.373586612392582e-07, + "loss": 0.431, + "step": 829 + }, + { + "epoch": 332.0, + "grad_norm": 576118.25, + "learning_rate": 9.384893713251922e-07, + "loss": 0.4208, + "step": 830 + }, + { + "epoch": 332.0, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.859520639147803, + "eval_loss": 0.4416925013065338, + "eval_precision": 0.8728870858688302, + "eval_recall": 0.8465573770491803, + "eval_runtime": 2.486, + "eval_samples_per_second": 889.393, + "eval_steps_per_second": 0.805, + "step": 830 + }, + { + "epoch": 332.4, + "grad_norm": 130394.0, + "learning_rate": 9.396200814111262e-07, + "loss": 0.4319, + "step": 831 + }, + { + "epoch": 332.8, + "grad_norm": 947493.0, + "learning_rate": 9.407507914970602e-07, + "loss": 0.4362, + "step": 832 + }, + { + "epoch": 332.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8608264462809917, + "eval_loss": 0.4421757161617279, + "eval_precision": 0.868, + "eval_recall": 0.8537704918032787, + "eval_runtime": 2.5018, + "eval_samples_per_second": 883.757, + "eval_steps_per_second": 0.799, + "step": 832 + }, + { + "epoch": 333.2, + "grad_norm": 327124.25, + "learning_rate": 9.418815015829941e-07, + "loss": 0.4334, + "step": 833 + }, + { + "epoch": 333.6, + "grad_norm": 567491.5625, + "learning_rate": 9.430122116689281e-07, + "loss": 0.4311, + "step": 834 + }, + { + "epoch": 334.0, + "grad_norm": 610096.125, + "learning_rate": 9.44142921754862e-07, + "loss": 0.4319, + "step": 835 + }, + { + "epoch": 334.0, + "eval_accuracy": 0.8104929895974672, + "eval_f1": 0.8608435735636001, + "eval_loss": 0.44131919741630554, + "eval_precision": 0.8721399730820996, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.4994, + "eval_samples_per_second": 884.606, + "eval_steps_per_second": 0.8, + "step": 835 + }, + { + "epoch": 334.4, + "grad_norm": 567285.3125, + "learning_rate": 9.45273631840796e-07, + "loss": 0.4318, + "step": 836 + }, + { + "epoch": 334.8, + "grad_norm": 217359.671875, + "learning_rate": 9.4640434192673e-07, + "loss": 0.4353, + "step": 837 + }, + { + "epoch": 334.8, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.8573346760657939, + "eval_loss": 0.4407016634941101, + "eval_precision": 0.8782668500687758, + "eval_recall": 0.8373770491803278, + "eval_runtime": 2.4932, + "eval_samples_per_second": 886.819, + "eval_steps_per_second": 0.802, + "step": 837 + }, + { + "epoch": 335.2, + "grad_norm": 501491.09375, + "learning_rate": 9.475350520126639e-07, + "loss": 0.4563, + "step": 838 + }, + { + "epoch": 335.6, + "grad_norm": 465584.1875, + "learning_rate": 9.486657620985979e-07, + "loss": 0.4246, + "step": 839 + }, + { + "epoch": 336.0, + "grad_norm": 898131.6875, + "learning_rate": 9.497964721845319e-07, + "loss": 0.4207, + "step": 840 + }, + { + "epoch": 336.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8558922558922559, + "eval_loss": 0.4403269290924072, + "eval_precision": 0.8795847750865052, + "eval_recall": 0.8334426229508197, + "eval_runtime": 2.7566, + "eval_samples_per_second": 802.071, + "eval_steps_per_second": 0.726, + "step": 840 + }, + { + "epoch": 336.4, + "grad_norm": 505489.125, + "learning_rate": 9.509271822704659e-07, + "loss": 0.4373, + "step": 841 + }, + { + "epoch": 336.8, + "grad_norm": 243068.09375, + "learning_rate": 9.520578923563998e-07, + "loss": 0.4214, + "step": 842 + }, + { + "epoch": 336.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.859619873291097, + "eval_loss": 0.44043654203414917, + "eval_precision": 0.8744911804613297, + "eval_recall": 0.8452459016393442, + "eval_runtime": 2.4977, + "eval_samples_per_second": 885.219, + "eval_steps_per_second": 0.801, + "step": 842 + }, + { + "epoch": 337.2, + "grad_norm": 240294.09375, + "learning_rate": 9.531886024423338e-07, + "loss": 0.4402, + "step": 843 + }, + { + "epoch": 337.6, + "grad_norm": 1072344.375, + "learning_rate": 9.543193125282676e-07, + "loss": 0.442, + "step": 844 + }, + { + "epoch": 338.0, + "grad_norm": 267669.21875, + "learning_rate": 9.554500226142018e-07, + "loss": 0.4148, + "step": 845 + }, + { + "epoch": 338.0, + "eval_accuracy": 0.8100407055630936, + "eval_f1": 0.8608349900596421, + "eval_loss": 0.44077861309051514, + "eval_precision": 0.870060281312793, + "eval_recall": 0.8518032786885246, + "eval_runtime": 2.5137, + "eval_samples_per_second": 879.597, + "eval_steps_per_second": 0.796, + "step": 845 + }, + { + "epoch": 338.4, + "grad_norm": 543666.625, + "learning_rate": 9.565807327001357e-07, + "loss": 0.4317, + "step": 846 + }, + { + "epoch": 338.8, + "grad_norm": 359393.4375, + "learning_rate": 9.577114427860696e-07, + "loss": 0.4294, + "step": 847 + }, + { + "epoch": 338.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8595261928595261, + "eval_loss": 0.4403291642665863, + "eval_precision": 0.875, + "eval_recall": 0.8445901639344262, + "eval_runtime": 2.5147, + "eval_samples_per_second": 879.236, + "eval_steps_per_second": 0.795, + "step": 847 + }, + { + "epoch": 339.2, + "grad_norm": 244418.8125, + "learning_rate": 9.588421528720035e-07, + "loss": 0.4306, + "step": 848 + }, + { + "epoch": 339.6, + "grad_norm": 168253.84375, + "learning_rate": 9.599728629579377e-07, + "loss": 0.4321, + "step": 849 + }, + { + "epoch": 340.0, + "grad_norm": 764364.25, + "learning_rate": 9.611035730438716e-07, + "loss": 0.4109, + "step": 850 + }, + { + "epoch": 340.0, + "eval_accuracy": 0.8014473089099955, + "eval_f1": 0.851135978297728, + "eval_loss": 0.4408320486545563, + "eval_precision": 0.8813202247191011, + "eval_recall": 0.8229508196721311, + "eval_runtime": 2.516, + "eval_samples_per_second": 878.772, + "eval_steps_per_second": 0.795, + "step": 850 + }, + { + "epoch": 340.4, + "grad_norm": 330674.9375, + "learning_rate": 9.622342831298055e-07, + "loss": 0.4384, + "step": 851 + }, + { + "epoch": 340.8, + "grad_norm": 1214516.125, + "learning_rate": 9.633649932157394e-07, + "loss": 0.4291, + "step": 852 + }, + { + "epoch": 340.8, + "eval_accuracy": 0.8023518769787427, + "eval_f1": 0.85191460521857, + "eval_loss": 0.4408922493457794, + "eval_precision": 0.8814866760168303, + "eval_recall": 0.8242622950819672, + "eval_runtime": 2.4699, + "eval_samples_per_second": 895.182, + "eval_steps_per_second": 0.81, + "step": 852 + }, + { + "epoch": 341.2, + "grad_norm": 674754.0, + "learning_rate": 9.644957033016733e-07, + "loss": 0.4479, + "step": 853 + }, + { + "epoch": 341.6, + "grad_norm": 732592.8125, + "learning_rate": 9.656264133876075e-07, + "loss": 0.4302, + "step": 854 + }, + { + "epoch": 342.0, + "grad_norm": 520279.875, + "learning_rate": 9.667571234735414e-07, + "loss": 0.462, + "step": 855 + }, + { + "epoch": 342.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.856951871657754, + "eval_loss": 0.4403921365737915, + "eval_precision": 0.8738922972051807, + "eval_recall": 0.8406557377049181, + "eval_runtime": 2.479, + "eval_samples_per_second": 891.886, + "eval_steps_per_second": 0.807, + "step": 855 + }, + { + "epoch": 342.4, + "grad_norm": 219135.40625, + "learning_rate": 9.678878335594753e-07, + "loss": 0.4303, + "step": 856 + }, + { + "epoch": 342.8, + "grad_norm": 270984.1875, + "learning_rate": 9.690185436454092e-07, + "loss": 0.4342, + "step": 857 + }, + { + "epoch": 342.8, + "eval_accuracy": 0.8104929895974672, + "eval_f1": 0.861028192371476, + "eval_loss": 0.4404546022415161, + "eval_precision": 0.8711409395973154, + "eval_recall": 0.8511475409836066, + "eval_runtime": 2.4919, + "eval_samples_per_second": 887.283, + "eval_steps_per_second": 0.803, + "step": 857 + }, + { + "epoch": 343.2, + "grad_norm": 457149.90625, + "learning_rate": 9.701492537313432e-07, + "loss": 0.4233, + "step": 858 + }, + { + "epoch": 343.6, + "grad_norm": 314495.09375, + "learning_rate": 9.712799638172773e-07, + "loss": 0.4301, + "step": 859 + }, + { + "epoch": 344.0, + "grad_norm": 213350.671875, + "learning_rate": 9.724106739032112e-07, + "loss": 0.4307, + "step": 860 + }, + { + "epoch": 344.0, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8584779706275033, + "eval_loss": 0.4400927424430847, + "eval_precision": 0.8742352141400408, + "eval_recall": 0.8432786885245902, + "eval_runtime": 2.7522, + "eval_samples_per_second": 803.356, + "eval_steps_per_second": 0.727, + "step": 860 + }, + { + "epoch": 344.4, + "grad_norm": 192115.515625, + "learning_rate": 9.735413839891451e-07, + "loss": 0.4247, + "step": 861 + }, + { + "epoch": 344.8, + "grad_norm": 164427.578125, + "learning_rate": 9.74672094075079e-07, + "loss": 0.432, + "step": 862 + }, + { + "epoch": 344.8, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.8581915248581915, + "eval_loss": 0.4402368664741516, + "eval_precision": 0.873641304347826, + "eval_recall": 0.8432786885245902, + "eval_runtime": 2.4644, + "eval_samples_per_second": 897.18, + "eval_steps_per_second": 0.812, + "step": 862 + }, + { + "epoch": 345.2, + "grad_norm": 187468.15625, + "learning_rate": 9.758028041610132e-07, + "loss": 0.4291, + "step": 863 + }, + { + "epoch": 345.6, + "grad_norm": 319015.59375, + "learning_rate": 9.76933514246947e-07, + "loss": 0.4233, + "step": 864 + }, + { + "epoch": 346.0, + "grad_norm": 788967.1875, + "learning_rate": 9.780642243328812e-07, + "loss": 0.4354, + "step": 865 + }, + { + "epoch": 346.0, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8590425531914894, + "eval_loss": 0.4403463900089264, + "eval_precision": 0.8712070128118679, + "eval_recall": 0.8472131147540983, + "eval_runtime": 2.4793, + "eval_samples_per_second": 891.775, + "eval_steps_per_second": 0.807, + "step": 865 + }, + { + "epoch": 346.4, + "grad_norm": 349928.4375, + "learning_rate": 9.79194934418815e-07, + "loss": 0.4319, + "step": 866 + }, + { + "epoch": 346.8, + "grad_norm": 170606.25, + "learning_rate": 9.80325644504749e-07, + "loss": 0.4292, + "step": 867 + }, + { + "epoch": 346.8, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8568561872909699, + "eval_loss": 0.43995940685272217, + "eval_precision": 0.8744027303754266, + "eval_recall": 0.84, + "eval_runtime": 2.4796, + "eval_samples_per_second": 891.667, + "eval_steps_per_second": 0.807, + "step": 867 + }, + { + "epoch": 347.2, + "grad_norm": 639500.875, + "learning_rate": 9.81456354590683e-07, + "loss": 0.4172, + "step": 868 + }, + { + "epoch": 347.6, + "grad_norm": 653074.4375, + "learning_rate": 9.82587064676617e-07, + "loss": 0.4314, + "step": 869 + }, + { + "epoch": 348.0, + "grad_norm": 473536.875, + "learning_rate": 9.837177747625508e-07, + "loss": 0.4297, + "step": 870 + }, + { + "epoch": 348.0, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.8592394929953302, + "eval_loss": 0.43931373953819275, + "eval_precision": 0.8744059742023083, + "eval_recall": 0.8445901639344262, + "eval_runtime": 2.4876, + "eval_samples_per_second": 888.792, + "eval_steps_per_second": 0.804, + "step": 870 + }, + { + "epoch": 348.4, + "grad_norm": 358363.53125, + "learning_rate": 9.84848484848485e-07, + "loss": 0.4363, + "step": 871 + }, + { + "epoch": 348.8, + "grad_norm": 233994.125, + "learning_rate": 9.859791949344189e-07, + "loss": 0.4178, + "step": 872 + }, + { + "epoch": 348.8, + "eval_accuracy": 0.8113975576662144, + "eval_f1": 0.8611388611388612, + "eval_loss": 0.43888914585113525, + "eval_precision": 0.874830852503383, + "eval_recall": 0.8478688524590164, + "eval_runtime": 2.4855, + "eval_samples_per_second": 889.561, + "eval_steps_per_second": 0.805, + "step": 872 + }, + { + "epoch": 349.2, + "grad_norm": 344926.6875, + "learning_rate": 9.871099050203528e-07, + "loss": 0.4051, + "step": 873 + }, + { + "epoch": 349.6, + "grad_norm": 685523.125, + "learning_rate": 9.882406151062867e-07, + "loss": 0.4329, + "step": 874 + }, + { + "epoch": 350.0, + "grad_norm": 523134.75, + "learning_rate": 9.893713251922209e-07, + "loss": 0.418, + "step": 875 + }, + { + "epoch": 350.0, + "eval_accuracy": 0.8127544097693351, + "eval_f1": 0.8627320954907162, + "eval_loss": 0.4387931227684021, + "eval_precision": 0.8725687458081824, + "eval_recall": 0.8531147540983607, + "eval_runtime": 2.5068, + "eval_samples_per_second": 882.003, + "eval_steps_per_second": 0.798, + "step": 875 + }, + { + "epoch": 350.4, + "grad_norm": 635162.9375, + "learning_rate": 9.905020352781546e-07, + "loss": 0.4276, + "step": 876 + }, + { + "epoch": 350.8, + "grad_norm": 165741.765625, + "learning_rate": 9.916327453640887e-07, + "loss": 0.4166, + "step": 877 + }, + { + "epoch": 350.8, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8585757271815446, + "eval_loss": 0.43809816241264343, + "eval_precision": 0.8758526603001364, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.4967, + "eval_samples_per_second": 885.554, + "eval_steps_per_second": 0.801, + "step": 877 + }, + { + "epoch": 351.2, + "grad_norm": 306079.5, + "learning_rate": 9.927634554500226e-07, + "loss": 0.4218, + "step": 878 + }, + { + "epoch": 351.6, + "grad_norm": 255655.8125, + "learning_rate": 9.938941655359567e-07, + "loss": 0.4354, + "step": 879 + }, + { + "epoch": 352.0, + "grad_norm": 628245.4375, + "learning_rate": 9.950248756218905e-07, + "loss": 0.4253, + "step": 880 + }, + { + "epoch": 352.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8553162853297442, + "eval_loss": 0.4378787577152252, + "eval_precision": 0.8783690393918452, + "eval_recall": 0.8334426229508197, + "eval_runtime": 2.7545, + "eval_samples_per_second": 802.685, + "eval_steps_per_second": 0.726, + "step": 880 + }, + { + "epoch": 352.4, + "grad_norm": 159147.703125, + "learning_rate": 9.961555857078246e-07, + "loss": 0.4425, + "step": 881 + }, + { + "epoch": 352.8, + "grad_norm": 431613.46875, + "learning_rate": 9.972862957937585e-07, + "loss": 0.417, + "step": 882 + }, + { + "epoch": 352.8, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8558951965065502, + "eval_loss": 0.4377310574054718, + "eval_precision": 0.8774104683195593, + "eval_recall": 0.8354098360655737, + "eval_runtime": 2.4841, + "eval_samples_per_second": 890.048, + "eval_steps_per_second": 0.805, + "step": 882 + }, + { + "epoch": 353.2, + "grad_norm": 827942.9375, + "learning_rate": 9.984170058796924e-07, + "loss": 0.4241, + "step": 883 + }, + { + "epoch": 353.6, + "grad_norm": 566083.875, + "learning_rate": 9.995477159656263e-07, + "loss": 0.4446, + "step": 884 + }, + { + "epoch": 354.0, + "grad_norm": 378065.0, + "learning_rate": 1.0006784260515605e-06, + "loss": 0.4121, + "step": 885 + }, + { + "epoch": 354.0, + "eval_accuracy": 0.8109452736318408, + "eval_f1": 0.8610372340425532, + "eval_loss": 0.4378826916217804, + "eval_precision": 0.8732299393122049, + "eval_recall": 0.8491803278688524, + "eval_runtime": 2.4976, + "eval_samples_per_second": 885.261, + "eval_steps_per_second": 0.801, + "step": 885 + }, + { + "epoch": 354.4, + "grad_norm": 251337.265625, + "learning_rate": 1.0018091361374944e-06, + "loss": 0.4282, + "step": 886 + }, + { + "epoch": 354.8, + "grad_norm": 425018.03125, + "learning_rate": 1.0029398462234283e-06, + "loss": 0.4185, + "step": 887 + }, + { + "epoch": 354.8, + "eval_accuracy": 0.8113975576662144, + "eval_f1": 0.8614157527417746, + "eval_loss": 0.4377213418483734, + "eval_precision": 0.8733153638814016, + "eval_recall": 0.8498360655737704, + "eval_runtime": 2.4924, + "eval_samples_per_second": 887.079, + "eval_steps_per_second": 0.802, + "step": 887 + }, + { + "epoch": 355.2, + "grad_norm": 807615.875, + "learning_rate": 1.0040705563093622e-06, + "loss": 0.4527, + "step": 888 + }, + { + "epoch": 355.6, + "grad_norm": 161284.828125, + "learning_rate": 1.0052012663952964e-06, + "loss": 0.4206, + "step": 889 + }, + { + "epoch": 356.0, + "grad_norm": 326531.75, + "learning_rate": 1.00633197648123e-06, + "loss": 0.4354, + "step": 890 + }, + { + "epoch": 356.0, + "eval_accuracy": 0.8059701492537313, + "eval_f1": 0.8559919436052367, + "eval_loss": 0.4371529817581177, + "eval_precision": 0.8768913342503438, + "eval_recall": 0.8360655737704918, + "eval_runtime": 2.4838, + "eval_samples_per_second": 890.169, + "eval_steps_per_second": 0.805, + "step": 890 + }, + { + "epoch": 356.4, + "grad_norm": 424899.78125, + "learning_rate": 1.0074626865671642e-06, + "loss": 0.4256, + "step": 891 + }, + { + "epoch": 356.8, + "grad_norm": 175927.0, + "learning_rate": 1.0085933966530981e-06, + "loss": 0.4345, + "step": 892 + }, + { + "epoch": 356.8, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.855510752688172, + "eval_loss": 0.4371435344219208, + "eval_precision": 0.8773259820813232, + "eval_recall": 0.8347540983606557, + "eval_runtime": 2.4693, + "eval_samples_per_second": 895.396, + "eval_steps_per_second": 0.81, + "step": 892 + }, + { + "epoch": 357.2, + "grad_norm": 232653.8125, + "learning_rate": 1.0097241067390323e-06, + "loss": 0.4269, + "step": 893 + }, + { + "epoch": 357.6, + "grad_norm": 651609.375, + "learning_rate": 1.010854816824966e-06, + "loss": 0.4363, + "step": 894 + }, + { + "epoch": 358.0, + "grad_norm": 838520.125, + "learning_rate": 1.0119855269109e-06, + "loss": 0.4007, + "step": 895 + }, + { + "epoch": 358.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.855413584398117, + "eval_loss": 0.43710586428642273, + "eval_precision": 0.8778467908902692, + "eval_recall": 0.8340983606557377, + "eval_runtime": 2.469, + "eval_samples_per_second": 895.514, + "eval_steps_per_second": 0.81, + "step": 895 + }, + { + "epoch": 358.4, + "grad_norm": 460935.75, + "learning_rate": 1.013116236996834e-06, + "loss": 0.4303, + "step": 896 + }, + { + "epoch": 358.8, + "grad_norm": 235852.09375, + "learning_rate": 1.014246947082768e-06, + "loss": 0.4353, + "step": 897 + }, + { + "epoch": 358.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8582887700534759, + "eval_loss": 0.4371148645877838, + "eval_precision": 0.8752556237218814, + "eval_recall": 0.8419672131147541, + "eval_runtime": 2.4693, + "eval_samples_per_second": 895.396, + "eval_steps_per_second": 0.81, + "step": 897 + }, + { + "epoch": 359.2, + "grad_norm": 190911.28125, + "learning_rate": 1.0153776571687019e-06, + "loss": 0.4169, + "step": 898 + }, + { + "epoch": 359.6, + "grad_norm": 176680.859375, + "learning_rate": 1.016508367254636e-06, + "loss": 0.4201, + "step": 899 + }, + { + "epoch": 360.0, + "grad_norm": 912547.5, + "learning_rate": 1.01763907734057e-06, + "loss": 0.4409, + "step": 900 + }, + { + "epoch": 360.0, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8588588588588588, + "eval_loss": 0.4371590316295624, + "eval_precision": 0.8743206521739131, + "eval_recall": 0.8439344262295082, + "eval_runtime": 2.4767, + "eval_samples_per_second": 892.731, + "eval_steps_per_second": 0.808, + "step": 900 + }, + { + "epoch": 360.4, + "grad_norm": 166037.515625, + "learning_rate": 1.0187697874265038e-06, + "loss": 0.4192, + "step": 901 + }, + { + "epoch": 360.8, + "grad_norm": 193099.953125, + "learning_rate": 1.0199004975124378e-06, + "loss": 0.4295, + "step": 902 + }, + { + "epoch": 360.8, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8570472045530633, + "eval_loss": 0.4368695020675659, + "eval_precision": 0.8755129958960328, + "eval_recall": 0.839344262295082, + "eval_runtime": 2.9506, + "eval_samples_per_second": 749.348, + "eval_steps_per_second": 0.678, + "step": 902 + }, + { + "epoch": 361.2, + "grad_norm": 577403.3125, + "learning_rate": 1.0210312075983719e-06, + "loss": 0.457, + "step": 903 + }, + { + "epoch": 361.6, + "grad_norm": 274468.65625, + "learning_rate": 1.0221619176843056e-06, + "loss": 0.4185, + "step": 904 + }, + { + "epoch": 362.0, + "grad_norm": 411799.9375, + "learning_rate": 1.0232926277702397e-06, + "loss": 0.4304, + "step": 905 + }, + { + "epoch": 362.0, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8560860793544048, + "eval_loss": 0.43674135208129883, + "eval_precision": 0.8785369220151829, + "eval_recall": 0.8347540983606557, + "eval_runtime": 2.4751, + "eval_samples_per_second": 893.304, + "eval_steps_per_second": 0.808, + "step": 905 + }, + { + "epoch": 362.4, + "grad_norm": 470374.75, + "learning_rate": 1.0244233378561736e-06, + "loss": 0.4289, + "step": 906 + }, + { + "epoch": 362.8, + "grad_norm": 185796.484375, + "learning_rate": 1.0255540479421078e-06, + "loss": 0.4272, + "step": 907 + }, + { + "epoch": 362.8, + "eval_accuracy": 0.8073270013568521, + "eval_f1": 0.8573342263898192, + "eval_loss": 0.4365463852882385, + "eval_precision": 0.8761122518822724, + "eval_recall": 0.839344262295082, + "eval_runtime": 2.4843, + "eval_samples_per_second": 889.98, + "eval_steps_per_second": 0.805, + "step": 907 + }, + { + "epoch": 363.2, + "grad_norm": 298455.71875, + "learning_rate": 1.0266847580280415e-06, + "loss": 0.4189, + "step": 908 + }, + { + "epoch": 363.6, + "grad_norm": 524565.0625, + "learning_rate": 1.0278154681139756e-06, + "loss": 0.4259, + "step": 909 + }, + { + "epoch": 364.0, + "grad_norm": 289053.21875, + "learning_rate": 1.0289461781999095e-06, + "loss": 0.4324, + "step": 910 + }, + { + "epoch": 364.0, + "eval_accuracy": 0.8055178652193578, + "eval_f1": 0.8557046979865772, + "eval_loss": 0.4361265301704407, + "eval_precision": 0.8762886597938144, + "eval_recall": 0.8360655737704918, + "eval_runtime": 2.4853, + "eval_samples_per_second": 889.648, + "eval_steps_per_second": 0.805, + "step": 910 + }, + { + "epoch": 364.4, + "grad_norm": 329714.1875, + "learning_rate": 1.0300768882858435e-06, + "loss": 0.4113, + "step": 911 + }, + { + "epoch": 364.8, + "grad_norm": 177020.890625, + "learning_rate": 1.0312075983717774e-06, + "loss": 0.4366, + "step": 912 + }, + { + "epoch": 364.8, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.856855514582635, + "eval_loss": 0.43579068779945374, + "eval_precision": 0.8765432098765432, + "eval_recall": 0.8380327868852459, + "eval_runtime": 2.4796, + "eval_samples_per_second": 891.677, + "eval_steps_per_second": 0.807, + "step": 912 + }, + { + "epoch": 365.2, + "grad_norm": 614417.3125, + "learning_rate": 1.0323383084577115e-06, + "loss": 0.4196, + "step": 913 + }, + { + "epoch": 365.6, + "grad_norm": 404374.5, + "learning_rate": 1.0334690185436454e-06, + "loss": 0.4356, + "step": 914 + }, + { + "epoch": 366.0, + "grad_norm": 269530.46875, + "learning_rate": 1.0345997286295793e-06, + "loss": 0.4179, + "step": 915 + }, + { + "epoch": 366.0, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.856855514582635, + "eval_loss": 0.4353279769420624, + "eval_precision": 0.8765432098765432, + "eval_recall": 0.8380327868852459, + "eval_runtime": 2.485, + "eval_samples_per_second": 889.746, + "eval_steps_per_second": 0.805, + "step": 915 + }, + { + "epoch": 366.4, + "grad_norm": 164748.375, + "learning_rate": 1.0357304387155133e-06, + "loss": 0.4278, + "step": 916 + }, + { + "epoch": 366.8, + "grad_norm": 212671.046875, + "learning_rate": 1.0368611488014474e-06, + "loss": 0.4284, + "step": 917 + }, + { + "epoch": 366.8, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.8566610455311973, + "eval_loss": 0.43516167998313904, + "eval_precision": 0.8819444444444444, + "eval_recall": 0.8327868852459016, + "eval_runtime": 2.4778, + "eval_samples_per_second": 892.323, + "eval_steps_per_second": 0.807, + "step": 917 + }, + { + "epoch": 367.2, + "grad_norm": 1011349.875, + "learning_rate": 1.0379918588873811e-06, + "loss": 0.4055, + "step": 918 + }, + { + "epoch": 367.6, + "grad_norm": 326463.1875, + "learning_rate": 1.0391225689733152e-06, + "loss": 0.4208, + "step": 919 + }, + { + "epoch": 368.0, + "grad_norm": 937918.625, + "learning_rate": 1.0402532790592492e-06, + "loss": 0.4472, + "step": 920 + }, + { + "epoch": 368.0, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8569514237855946, + "eval_loss": 0.4349691569805145, + "eval_precision": 0.876027397260274, + "eval_recall": 0.838688524590164, + "eval_runtime": 2.4863, + "eval_samples_per_second": 889.273, + "eval_steps_per_second": 0.804, + "step": 920 + }, + { + "epoch": 368.4, + "grad_norm": 153484.984375, + "learning_rate": 1.0413839891451833e-06, + "loss": 0.4262, + "step": 921 + }, + { + "epoch": 368.8, + "grad_norm": 255542.1875, + "learning_rate": 1.042514699231117e-06, + "loss": 0.4118, + "step": 922 + }, + { + "epoch": 368.8, + "eval_accuracy": 0.8118498417005879, + "eval_f1": 0.8619774386197744, + "eval_loss": 0.4354184567928314, + "eval_precision": 0.8723975822699799, + "eval_recall": 0.8518032786885246, + "eval_runtime": 3.0324, + "eval_samples_per_second": 729.117, + "eval_steps_per_second": 0.66, + "step": 922 + }, + { + "epoch": 369.2, + "grad_norm": 939860.5, + "learning_rate": 1.0436454093170511e-06, + "loss": 0.4558, + "step": 923 + }, + { + "epoch": 369.6, + "grad_norm": 486501.34375, + "learning_rate": 1.044776119402985e-06, + "loss": 0.4179, + "step": 924 + }, + { + "epoch": 370.0, + "grad_norm": 538656.5625, + "learning_rate": 1.045906829488919e-06, + "loss": 0.4149, + "step": 925 + }, + { + "epoch": 370.0, + "eval_accuracy": 0.8068747173224785, + "eval_f1": 0.8571428571428571, + "eval_loss": 0.4347803294658661, + "eval_precision": 0.875, + "eval_recall": 0.84, + "eval_runtime": 2.489, + "eval_samples_per_second": 888.3, + "eval_steps_per_second": 0.804, + "step": 925 + }, + { + "epoch": 370.4, + "grad_norm": 196223.59375, + "learning_rate": 1.0470375395748529e-06, + "loss": 0.4318, + "step": 926 + }, + { + "epoch": 370.8, + "grad_norm": 498700.5, + "learning_rate": 1.048168249660787e-06, + "loss": 0.4257, + "step": 927 + }, + { + "epoch": 370.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.8568534773801485, + "eval_loss": 0.4348140060901642, + "eval_precision": 0.8830897703549061, + "eval_recall": 0.8321311475409836, + "eval_runtime": 2.5137, + "eval_samples_per_second": 879.593, + "eval_steps_per_second": 0.796, + "step": 927 + }, + { + "epoch": 371.2, + "grad_norm": 242288.265625, + "learning_rate": 1.049298959746721e-06, + "loss": 0.4412, + "step": 928 + }, + { + "epoch": 371.6, + "grad_norm": 347515.125, + "learning_rate": 1.0504296698326549e-06, + "loss": 0.4316, + "step": 929 + }, + { + "epoch": 372.0, + "grad_norm": 380393.4375, + "learning_rate": 1.0515603799185888e-06, + "loss": 0.4161, + "step": 930 + }, + { + "epoch": 372.0, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.8582941571524513, + "eval_loss": 0.43459975719451904, + "eval_precision": 0.8795595320027529, + "eval_recall": 0.8380327868852459, + "eval_runtime": 2.5414, + "eval_samples_per_second": 869.986, + "eval_steps_per_second": 0.787, + "step": 930 + }, + { + "epoch": 372.4, + "grad_norm": 452785.59375, + "learning_rate": 1.052691090004523e-06, + "loss": 0.4168, + "step": 931 + }, + { + "epoch": 372.8, + "grad_norm": 331910.15625, + "learning_rate": 1.0538218000904566e-06, + "loss": 0.4214, + "step": 932 + }, + { + "epoch": 372.8, + "eval_accuracy": 0.8086838534599728, + "eval_f1": 0.8592346089850249, + "eval_loss": 0.43512341380119324, + "eval_precision": 0.8722972972972973, + "eval_recall": 0.8465573770491803, + "eval_runtime": 2.4677, + "eval_samples_per_second": 895.969, + "eval_steps_per_second": 0.81, + "step": 932 + }, + { + "epoch": 373.2, + "grad_norm": 624111.4375, + "learning_rate": 1.0549525101763908e-06, + "loss": 0.4253, + "step": 933 + }, + { + "epoch": 373.6, + "grad_norm": 406841.59375, + "learning_rate": 1.0560832202623247e-06, + "loss": 0.4217, + "step": 934 + }, + { + "epoch": 374.0, + "grad_norm": 469528.8125, + "learning_rate": 1.0572139303482588e-06, + "loss": 0.4297, + "step": 935 + }, + { + "epoch": 374.0, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.8586738111185532, + "eval_loss": 0.4345199763774872, + "eval_precision": 0.8774811772758385, + "eval_recall": 0.8406557377049181, + "eval_runtime": 2.4769, + "eval_samples_per_second": 892.634, + "eval_steps_per_second": 0.807, + "step": 935 + }, + { + "epoch": 374.4, + "grad_norm": 210462.40625, + "learning_rate": 1.0583446404341925e-06, + "loss": 0.4283, + "step": 936 + }, + { + "epoch": 374.8, + "grad_norm": 283468.34375, + "learning_rate": 1.0594753505201266e-06, + "loss": 0.4227, + "step": 937 + }, + { + "epoch": 374.8, + "eval_accuracy": 0.8064224332881049, + "eval_f1": 0.8555030384875084, + "eval_loss": 0.4343184530735016, + "eval_precision": 0.8816979819067502, + "eval_recall": 0.8308196721311475, + "eval_runtime": 2.4873, + "eval_samples_per_second": 888.904, + "eval_steps_per_second": 0.804, + "step": 937 + }, + { + "epoch": 375.2, + "grad_norm": 619449.1875, + "learning_rate": 1.0606060606060608e-06, + "loss": 0.4168, + "step": 938 + }, + { + "epoch": 375.6, + "grad_norm": 598934.6875, + "learning_rate": 1.0617367706919947e-06, + "loss": 0.4149, + "step": 939 + }, + { + "epoch": 376.0, + "grad_norm": 715631.0, + "learning_rate": 1.0628674807779286e-06, + "loss": 0.4157, + "step": 940 + }, + { + "epoch": 376.0, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.857430392485743, + "eval_loss": 0.4339190423488617, + "eval_precision": 0.8777472527472527, + "eval_recall": 0.8380327868852459, + "eval_runtime": 2.4859, + "eval_samples_per_second": 889.41, + "eval_steps_per_second": 0.805, + "step": 940 + }, + { + "epoch": 376.4, + "grad_norm": 281253.0625, + "learning_rate": 1.0639981908638625e-06, + "loss": 0.4104, + "step": 941 + }, + { + "epoch": 376.8, + "grad_norm": 199166.890625, + "learning_rate": 1.0651289009497967e-06, + "loss": 0.4372, + "step": 942 + }, + { + "epoch": 376.8, + "eval_accuracy": 0.8104929895974672, + "eval_f1": 0.8603798733755414, + "eval_loss": 0.4342331886291504, + "eval_precision": 0.8746612466124661, + "eval_recall": 0.8465573770491803, + "eval_runtime": 2.9906, + "eval_samples_per_second": 739.32, + "eval_steps_per_second": 0.669, + "step": 942 + }, + { + "epoch": 377.2, + "grad_norm": 1353188.75, + "learning_rate": 1.0662596110357304e-06, + "loss": 0.4388, + "step": 943 + }, + { + "epoch": 377.6, + "grad_norm": 790053.875, + "learning_rate": 1.0673903211216645e-06, + "loss": 0.4268, + "step": 944 + }, + { + "epoch": 378.0, + "grad_norm": 236884.515625, + "learning_rate": 1.0685210312075984e-06, + "loss": 0.4161, + "step": 945 + }, + { + "epoch": 378.0, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8590559089387345, + "eval_loss": 0.43363067507743835, + "eval_precision": 0.8775649794801642, + "eval_recall": 0.8413114754098361, + "eval_runtime": 2.4913, + "eval_samples_per_second": 887.495, + "eval_steps_per_second": 0.803, + "step": 945 + }, + { + "epoch": 378.4, + "grad_norm": 239936.75, + "learning_rate": 1.0696517412935326e-06, + "loss": 0.4247, + "step": 946 + }, + { + "epoch": 378.8, + "grad_norm": 141048.171875, + "learning_rate": 1.0707824513794663e-06, + "loss": 0.4145, + "step": 947 + }, + { + "epoch": 378.8, + "eval_accuracy": 0.8082315694255993, + "eval_f1": 0.857335127860027, + "eval_loss": 0.4332951307296753, + "eval_precision": 0.8804422944022114, + "eval_recall": 0.8354098360655737, + "eval_runtime": 2.4977, + "eval_samples_per_second": 885.209, + "eval_steps_per_second": 0.801, + "step": 947 + }, + { + "epoch": 379.2, + "grad_norm": 276152.8125, + "learning_rate": 1.0719131614654004e-06, + "loss": 0.4417, + "step": 948 + }, + { + "epoch": 379.6, + "grad_norm": 434721.15625, + "learning_rate": 1.0730438715513343e-06, + "loss": 0.427, + "step": 949 + }, + { + "epoch": 380.0, + "grad_norm": 618086.25, + "learning_rate": 1.0741745816372682e-06, + "loss": 0.4141, + "step": 950 + }, + { + "epoch": 380.0, + "eval_accuracy": 0.8077792853912257, + "eval_f1": 0.856950521709862, + "eval_loss": 0.4328944683074951, + "eval_precision": 0.8803596127247579, + "eval_recall": 0.8347540983606557, + "eval_runtime": 2.534, + "eval_samples_per_second": 872.531, + "eval_steps_per_second": 0.789, + "step": 950 + }, + { + "epoch": 380.4, + "grad_norm": 602579.25, + "learning_rate": 1.0753052917232022e-06, + "loss": 0.4167, + "step": 951 + }, + { + "epoch": 380.8, + "grad_norm": 209176.046875, + "learning_rate": 1.0764360018091363e-06, + "loss": 0.4307, + "step": 952 + }, + { + "epoch": 380.8, + "eval_accuracy": 0.8118498417005879, + "eval_f1": 0.8613333333333333, + "eval_loss": 0.4330473840236664, + "eval_precision": 0.8759322033898305, + "eval_recall": 0.8472131147540983, + "eval_runtime": 2.4982, + "eval_samples_per_second": 885.032, + "eval_steps_per_second": 0.801, + "step": 952 + }, + { + "epoch": 381.2, + "grad_norm": 656247.375, + "learning_rate": 1.0775667118950702e-06, + "loss": 0.4228, + "step": 953 + }, + { + "epoch": 381.6, + "grad_norm": 862684.25, + "learning_rate": 1.0786974219810041e-06, + "loss": 0.4352, + "step": 954 + }, + { + "epoch": 382.0, + "grad_norm": 1486578.75, + "learning_rate": 1.079828132066938e-06, + "loss": 0.4239, + "step": 955 + }, + { + "epoch": 382.0, + "eval_accuracy": 0.8123021257349615, + "eval_f1": 0.8616205401800601, + "eval_loss": 0.4326733946800232, + "eval_precision": 0.8765264586160109, + "eval_recall": 0.8472131147540983, + "eval_runtime": 2.4912, + "eval_samples_per_second": 887.524, + "eval_steps_per_second": 0.803, + "step": 955 + }, + { + "epoch": 382.4, + "grad_norm": 475660.34375, + "learning_rate": 1.0809588421528722e-06, + "loss": 0.4149, + "step": 956 + }, + { + "epoch": 382.8, + "grad_norm": 386883.125, + "learning_rate": 1.0820895522388059e-06, + "loss": 0.4296, + "step": 957 + }, + { + "epoch": 382.8, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.8575286968264686, + "eval_loss": 0.43244293332099915, + "eval_precision": 0.883785664578984, + "eval_recall": 0.8327868852459016, + "eval_runtime": 2.4881, + "eval_samples_per_second": 888.62, + "eval_steps_per_second": 0.804, + "step": 957 + }, + { + "epoch": 383.2, + "grad_norm": 460601.84375, + "learning_rate": 1.08322026232474e-06, + "loss": 0.4192, + "step": 958 + }, + { + "epoch": 383.6, + "grad_norm": 594017.5, + "learning_rate": 1.084350972410674e-06, + "loss": 0.4196, + "step": 959 + }, + { + "epoch": 384.0, + "grad_norm": 809374.875, + "learning_rate": 1.085481682496608e-06, + "loss": 0.4242, + "step": 960 + }, + { + "epoch": 384.0, + "eval_accuracy": 0.8091361374943464, + "eval_f1": 0.8575286968264686, + "eval_loss": 0.43266159296035767, + "eval_precision": 0.883785664578984, + "eval_recall": 0.8327868852459016, + "eval_runtime": 2.5051, + "eval_samples_per_second": 882.604, + "eval_steps_per_second": 0.798, + "step": 960 + }, + { + "epoch": 384.4, + "grad_norm": 563235.4375, + "learning_rate": 1.0866123925825418e-06, + "loss": 0.4223, + "step": 961 + }, + { + "epoch": 384.8, + "grad_norm": 228576.546875, + "learning_rate": 1.087743102668476e-06, + "loss": 0.4217, + "step": 962 + }, + { + "epoch": 384.8, + "eval_accuracy": 0.80958842152872, + "eval_f1": 0.8595261928595261, + "eval_loss": 0.4328891336917877, + "eval_precision": 0.875, + "eval_recall": 0.8445901639344262, + "eval_runtime": 2.7575, + "eval_samples_per_second": 801.819, + "eval_steps_per_second": 0.725, + "step": 962 + }, + { + "epoch": 385.2, + "grad_norm": 212632.015625, + "learning_rate": 1.0888738127544098e-06, + "loss": 0.4016, + "step": 963 + }, + { + "epoch": 385.6, + "grad_norm": 624929.75, + "learning_rate": 1.0900045228403438e-06, + "loss": 0.4159, + "step": 964 + }, + { + "epoch": 386.0, + "grad_norm": 314454.09375, + "learning_rate": 1.0911352329262777e-06, + "loss": 0.4112, + "step": 965 + }, + { + "epoch": 386.0, + "eval_accuracy": 0.8141112618724559, + "eval_f1": 0.8643116540112248, + "eval_loss": 0.43417075276374817, + "eval_precision": 0.870345744680851, + "eval_recall": 0.8583606557377049, + "eval_runtime": 2.4836, + "eval_samples_per_second": 890.258, + "eval_steps_per_second": 0.805, + "step": 965 + } + ], + "logging_steps": 1, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3458458694736282e+17, + "train_batch_size": 2048, + "trial_name": null, + "trial_params": null +}