diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,26416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 100.0, + "global_step": 17600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06, + "learning_rate": 9.994318181818182e-05, + "loss": 0.9835, + "step": 10 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.8295454382896423, + "eval_loss": 0.7258287072181702, + "eval_runtime": 124.4231, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 10 + }, + { + "epoch": 0.11, + "learning_rate": 9.988636363636364e-05, + "loss": 0.5434, + "step": 20 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.7073863744735718, + "eval_loss": 0.702700674533844, + "eval_runtime": 125.8891, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 20 + }, + { + "epoch": 0.17, + "learning_rate": 9.982954545454546e-05, + "loss": 0.4226, + "step": 30 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.9886363744735718, + "eval_loss": 0.21866631507873535, + "eval_runtime": 125.6286, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 30 + }, + { + "epoch": 0.23, + "learning_rate": 9.977272727272728e-05, + "loss": 0.1114, + "step": 40 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.9886363744735718, + "eval_loss": 0.03101344220340252, + "eval_runtime": 125.4447, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 40 + }, + { + "epoch": 0.28, + "learning_rate": 9.97159090909091e-05, + "loss": 0.0708, + "step": 50 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.9659090638160706, + "eval_loss": 0.14963287115097046, + "eval_runtime": 125.3207, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 50 + }, + { + "epoch": 0.34, + "learning_rate": 9.965909090909091e-05, + "loss": 0.0852, + "step": 60 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.009183489717543125, + "eval_runtime": 126.1109, + "eval_samples_per_second": 2.791, + "eval_steps_per_second": 0.698, + "step": 60 + }, + { + "epoch": 0.4, + "learning_rate": 9.960227272727273e-05, + "loss": 0.0453, + "step": 70 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.9857954382896423, + "eval_loss": 0.04448651894927025, + "eval_runtime": 125.4142, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 9.954545454545455e-05, + "loss": 0.0023, + "step": 80 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.9886363744735718, + "eval_loss": 0.02672048658132553, + "eval_runtime": 125.096, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 80 + }, + { + "epoch": 0.51, + "learning_rate": 9.948863636363637e-05, + "loss": 0.0069, + "step": 90 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.024198301136493683, + "eval_runtime": 124.8276, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 90 + }, + { + "epoch": 0.57, + "learning_rate": 9.943181818181819e-05, + "loss": 0.0022, + "step": 100 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.017196964472532272, + "eval_runtime": 125.0858, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 100 + }, + { + "epoch": 0.62, + "learning_rate": 9.9375e-05, + "loss": 0.0597, + "step": 110 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.009186466224491596, + "eval_runtime": 124.3884, + "eval_samples_per_second": 2.83, + "eval_steps_per_second": 0.707, + "step": 110 + }, + { + "epoch": 0.68, + "learning_rate": 9.931818181818182e-05, + "loss": 0.0494, + "step": 120 + }, + { + "epoch": 0.68, + "eval_accuracy": 1.0, + "eval_loss": 0.0026742105837911367, + "eval_runtime": 123.9933, + "eval_samples_per_second": 2.839, + "eval_steps_per_second": 0.71, + "step": 120 + }, + { + "epoch": 0.74, + "learning_rate": 9.926136363636364e-05, + "loss": 0.022, + "step": 130 + }, + { + "epoch": 0.74, + "eval_accuracy": 1.0, + "eval_loss": 0.0014365765964612365, + "eval_runtime": 124.5205, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 130 + }, + { + "epoch": 0.8, + "learning_rate": 9.920454545454546e-05, + "loss": 0.0014, + "step": 140 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.031146906316280365, + "eval_runtime": 124.412, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 140 + }, + { + "epoch": 0.85, + "learning_rate": 9.914772727272728e-05, + "loss": 0.0207, + "step": 150 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02382010966539383, + "eval_runtime": 124.5113, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 150 + }, + { + "epoch": 0.91, + "learning_rate": 9.909090909090911e-05, + "loss": 0.0006, + "step": 160 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.03369242325425148, + "eval_runtime": 124.6718, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 160 + }, + { + "epoch": 0.97, + "learning_rate": 9.903977272727272e-05, + "loss": 0.1216, + "step": 170 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.020197419449687004, + "eval_runtime": 125.0636, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 170 + }, + { + "epoch": 1.02, + "learning_rate": 9.898295454545456e-05, + "loss": 0.0008, + "step": 180 + }, + { + "epoch": 1.02, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0041679213754832745, + "eval_runtime": 125.8106, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 180 + }, + { + "epoch": 1.08, + "learning_rate": 9.892613636363637e-05, + "loss": 0.0011, + "step": 190 + }, + { + "epoch": 1.08, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.004185836296528578, + "eval_runtime": 124.6013, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 190 + }, + { + "epoch": 1.14, + "learning_rate": 9.886931818181818e-05, + "loss": 0.0011, + "step": 200 + }, + { + "epoch": 1.14, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0034860221203416586, + "eval_runtime": 124.9798, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 200 + }, + { + "epoch": 1.19, + "learning_rate": 9.881250000000001e-05, + "loss": 0.0009, + "step": 210 + }, + { + "epoch": 1.19, + "eval_accuracy": 1.0, + "eval_loss": 0.002345512853935361, + "eval_runtime": 124.4281, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 210 + }, + { + "epoch": 1.25, + "learning_rate": 9.875568181818183e-05, + "loss": 0.0008, + "step": 220 + }, + { + "epoch": 1.25, + "eval_accuracy": 1.0, + "eval_loss": 0.0015128519153222442, + "eval_runtime": 124.9487, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 220 + }, + { + "epoch": 1.31, + "learning_rate": 9.869886363636363e-05, + "loss": 0.0004, + "step": 230 + }, + { + "epoch": 1.31, + "eval_accuracy": 1.0, + "eval_loss": 0.0010147449793294072, + "eval_runtime": 124.7957, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 230 + }, + { + "epoch": 1.36, + "learning_rate": 9.864204545454546e-05, + "loss": 0.0005, + "step": 240 + }, + { + "epoch": 1.36, + "eval_accuracy": 1.0, + "eval_loss": 0.0008103376603685319, + "eval_runtime": 125.1575, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 240 + }, + { + "epoch": 1.42, + "learning_rate": 9.858522727272728e-05, + "loss": 0.0004, + "step": 250 + }, + { + "epoch": 1.42, + "eval_accuracy": 1.0, + "eval_loss": 0.0006614239537157118, + "eval_runtime": 124.7263, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 250 + }, + { + "epoch": 1.48, + "learning_rate": 9.852840909090909e-05, + "loss": 0.0005, + "step": 260 + }, + { + "epoch": 1.48, + "eval_accuracy": 1.0, + "eval_loss": 0.0004330395895522088, + "eval_runtime": 124.7514, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 260 + }, + { + "epoch": 1.53, + "learning_rate": 9.847159090909092e-05, + "loss": 0.0004, + "step": 270 + }, + { + "epoch": 1.53, + "eval_accuracy": 1.0, + "eval_loss": 0.0003023791068699211, + "eval_runtime": 124.3657, + "eval_samples_per_second": 2.83, + "eval_steps_per_second": 0.708, + "step": 270 + }, + { + "epoch": 1.59, + "learning_rate": 9.841477272727274e-05, + "loss": 0.0004, + "step": 280 + }, + { + "epoch": 1.59, + "eval_accuracy": 1.0, + "eval_loss": 0.00026740168686956167, + "eval_runtime": 124.8244, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 280 + }, + { + "epoch": 1.65, + "learning_rate": 9.835795454545454e-05, + "loss": 0.0003, + "step": 290 + }, + { + "epoch": 1.65, + "eval_accuracy": 1.0, + "eval_loss": 0.0002450387692078948, + "eval_runtime": 124.9434, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 290 + }, + { + "epoch": 1.7, + "learning_rate": 9.830113636363637e-05, + "loss": 0.0003, + "step": 300 + }, + { + "epoch": 1.7, + "eval_accuracy": 1.0, + "eval_loss": 0.00022594934853259474, + "eval_runtime": 124.5966, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 300 + }, + { + "epoch": 1.76, + "learning_rate": 9.824431818181819e-05, + "loss": 0.0003, + "step": 310 + }, + { + "epoch": 1.76, + "eval_accuracy": 1.0, + "eval_loss": 0.0002115846291417256, + "eval_runtime": 124.7896, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 310 + }, + { + "epoch": 1.82, + "learning_rate": 9.818750000000001e-05, + "loss": 0.0003, + "step": 320 + }, + { + "epoch": 1.82, + "eval_accuracy": 1.0, + "eval_loss": 0.00019845501810777932, + "eval_runtime": 124.8201, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 320 + }, + { + "epoch": 1.88, + "learning_rate": 9.813068181818183e-05, + "loss": 0.0002, + "step": 330 + }, + { + "epoch": 1.88, + "eval_accuracy": 1.0, + "eval_loss": 0.00018765777349472046, + "eval_runtime": 125.5826, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 330 + }, + { + "epoch": 1.93, + "learning_rate": 9.807386363636364e-05, + "loss": 0.0002, + "step": 340 + }, + { + "epoch": 1.93, + "eval_accuracy": 1.0, + "eval_loss": 0.00017884373664855957, + "eval_runtime": 124.7264, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 340 + }, + { + "epoch": 1.99, + "learning_rate": 9.801704545454546e-05, + "loss": 0.0002, + "step": 350 + }, + { + "epoch": 1.99, + "eval_accuracy": 1.0, + "eval_loss": 0.00016994570614770055, + "eval_runtime": 125.1376, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 350 + }, + { + "epoch": 2.05, + "learning_rate": 9.796022727272728e-05, + "loss": 0.0002, + "step": 360 + }, + { + "epoch": 2.05, + "eval_accuracy": 1.0, + "eval_loss": 0.00016127560229506344, + "eval_runtime": 125.0675, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 360 + }, + { + "epoch": 2.1, + "learning_rate": 9.79034090909091e-05, + "loss": 0.0003, + "step": 370 + }, + { + "epoch": 2.1, + "eval_accuracy": 1.0, + "eval_loss": 0.00015327503206208348, + "eval_runtime": 124.9847, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 370 + }, + { + "epoch": 2.16, + "learning_rate": 9.784659090909092e-05, + "loss": 0.0002, + "step": 380 + }, + { + "epoch": 2.16, + "eval_accuracy": 1.0, + "eval_loss": 0.00014579635171685368, + "eval_runtime": 124.6233, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 380 + }, + { + "epoch": 2.22, + "learning_rate": 9.778977272727273e-05, + "loss": 0.0002, + "step": 390 + }, + { + "epoch": 2.22, + "eval_accuracy": 1.0, + "eval_loss": 0.00013981861411593854, + "eval_runtime": 125.2522, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 390 + }, + { + "epoch": 2.27, + "learning_rate": 9.773295454545455e-05, + "loss": 0.0002, + "step": 400 + }, + { + "epoch": 2.27, + "eval_accuracy": 1.0, + "eval_loss": 0.0001345899945590645, + "eval_runtime": 124.7423, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 400 + }, + { + "epoch": 2.33, + "learning_rate": 9.767613636363637e-05, + "loss": 0.0002, + "step": 410 + }, + { + "epoch": 2.33, + "eval_accuracy": 1.0, + "eval_loss": 0.00013018195750191808, + "eval_runtime": 125.105, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 410 + }, + { + "epoch": 2.39, + "learning_rate": 9.761931818181819e-05, + "loss": 0.0002, + "step": 420 + }, + { + "epoch": 2.39, + "eval_accuracy": 1.0, + "eval_loss": 0.00012942471948917955, + "eval_runtime": 124.5947, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 420 + }, + { + "epoch": 2.44, + "learning_rate": 9.75625e-05, + "loss": 0.0001, + "step": 430 + }, + { + "epoch": 2.44, + "eval_accuracy": 1.0, + "eval_loss": 0.0001293772947974503, + "eval_runtime": 124.5521, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.707, + "step": 430 + }, + { + "epoch": 2.5, + "learning_rate": 9.750568181818182e-05, + "loss": 0.0001, + "step": 440 + }, + { + "epoch": 2.5, + "eval_accuracy": 1.0, + "eval_loss": 0.00012670186697505414, + "eval_runtime": 124.6589, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 440 + }, + { + "epoch": 2.56, + "learning_rate": 9.744886363636364e-05, + "loss": 0.0002, + "step": 450 + }, + { + "epoch": 2.56, + "eval_accuracy": 1.0, + "eval_loss": 0.00012187321408418939, + "eval_runtime": 124.6413, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 450 + }, + { + "epoch": 2.61, + "learning_rate": 9.739204545454546e-05, + "loss": 0.0001, + "step": 460 + }, + { + "epoch": 2.61, + "eval_accuracy": 1.0, + "eval_loss": 0.0001166594956885092, + "eval_runtime": 124.7275, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 460 + }, + { + "epoch": 2.67, + "learning_rate": 9.733522727272728e-05, + "loss": 0.0001, + "step": 470 + }, + { + "epoch": 2.67, + "eval_accuracy": 1.0, + "eval_loss": 0.00011213665857212618, + "eval_runtime": 124.6644, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 470 + }, + { + "epoch": 2.73, + "learning_rate": 9.72784090909091e-05, + "loss": 0.0001, + "step": 480 + }, + { + "epoch": 2.73, + "eval_accuracy": 1.0, + "eval_loss": 0.00010796433343784884, + "eval_runtime": 125.1169, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 480 + }, + { + "epoch": 2.78, + "learning_rate": 9.722159090909091e-05, + "loss": 0.0001, + "step": 490 + }, + { + "epoch": 2.78, + "eval_accuracy": 1.0, + "eval_loss": 0.0001041415089275688, + "eval_runtime": 124.2928, + "eval_samples_per_second": 2.832, + "eval_steps_per_second": 0.708, + "step": 490 + }, + { + "epoch": 2.84, + "learning_rate": 9.716477272727273e-05, + "loss": 0.0001, + "step": 500 + }, + { + "epoch": 2.84, + "eval_accuracy": 1.0, + "eval_loss": 0.00010053745791083202, + "eval_runtime": 124.4807, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 500 + }, + { + "epoch": 2.9, + "learning_rate": 9.710795454545455e-05, + "loss": 0.0001, + "step": 510 + }, + { + "epoch": 2.9, + "eval_accuracy": 1.0, + "eval_loss": 9.685145050752908e-05, + "eval_runtime": 124.5352, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 510 + }, + { + "epoch": 2.95, + "learning_rate": 9.705113636363637e-05, + "loss": 0.0002, + "step": 520 + }, + { + "epoch": 2.95, + "eval_accuracy": 1.0, + "eval_loss": 9.310990571975708e-05, + "eval_runtime": 124.7236, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 520 + }, + { + "epoch": 3.01, + "learning_rate": 9.699431818181819e-05, + "loss": 0.0001, + "step": 530 + }, + { + "epoch": 3.01, + "eval_accuracy": 1.0, + "eval_loss": 9.156898886431009e-05, + "eval_runtime": 124.5995, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 530 + }, + { + "epoch": 3.07, + "learning_rate": 9.69375e-05, + "loss": 0.0001, + "step": 540 + }, + { + "epoch": 3.07, + "eval_accuracy": 1.0, + "eval_loss": 8.88979557203129e-05, + "eval_runtime": 125.2152, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 540 + }, + { + "epoch": 3.12, + "learning_rate": 9.688068181818182e-05, + "loss": 0.0001, + "step": 550 + }, + { + "epoch": 3.12, + "eval_accuracy": 1.0, + "eval_loss": 8.60603031469509e-05, + "eval_runtime": 125.1712, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 550 + }, + { + "epoch": 3.18, + "learning_rate": 9.682386363636364e-05, + "loss": 0.0001, + "step": 560 + }, + { + "epoch": 3.18, + "eval_accuracy": 1.0, + "eval_loss": 8.364801033167168e-05, + "eval_runtime": 125.0092, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 560 + }, + { + "epoch": 3.24, + "learning_rate": 9.676704545454546e-05, + "loss": 0.0001, + "step": 570 + }, + { + "epoch": 3.24, + "eval_accuracy": 1.0, + "eval_loss": 8.131156937452033e-05, + "eval_runtime": 125.6026, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 570 + }, + { + "epoch": 3.3, + "learning_rate": 9.671022727272728e-05, + "loss": 0.0001, + "step": 580 + }, + { + "epoch": 3.3, + "eval_accuracy": 1.0, + "eval_loss": 7.901543722255155e-05, + "eval_runtime": 124.6461, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 580 + }, + { + "epoch": 3.35, + "learning_rate": 9.66534090909091e-05, + "loss": 0.0001, + "step": 590 + }, + { + "epoch": 3.35, + "eval_accuracy": 1.0, + "eval_loss": 7.633864879608154e-05, + "eval_runtime": 125.1464, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 590 + }, + { + "epoch": 3.41, + "learning_rate": 9.659659090909091e-05, + "loss": 0.0002, + "step": 600 + }, + { + "epoch": 3.41, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02739788219332695, + "eval_runtime": 125.151, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 600 + }, + { + "epoch": 3.47, + "learning_rate": 9.653977272727273e-05, + "loss": 0.0002, + "step": 610 + }, + { + "epoch": 3.47, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0035530910827219486, + "eval_runtime": 125.1084, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 610 + }, + { + "epoch": 3.52, + "learning_rate": 9.648295454545455e-05, + "loss": 0.0001, + "step": 620 + }, + { + "epoch": 3.52, + "eval_accuracy": 1.0, + "eval_loss": 6.85063932905905e-05, + "eval_runtime": 125.3933, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 620 + }, + { + "epoch": 3.58, + "learning_rate": 9.642613636363637e-05, + "loss": 0.0001, + "step": 630 + }, + { + "epoch": 3.58, + "eval_accuracy": 1.0, + "eval_loss": 6.615302845602855e-05, + "eval_runtime": 124.6598, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 630 + }, + { + "epoch": 3.64, + "learning_rate": 9.636931818181819e-05, + "loss": 0.0001, + "step": 640 + }, + { + "epoch": 3.64, + "eval_accuracy": 1.0, + "eval_loss": 6.407906039385125e-05, + "eval_runtime": 124.645, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 640 + }, + { + "epoch": 3.69, + "learning_rate": 9.63125e-05, + "loss": 0.0001, + "step": 650 + }, + { + "epoch": 3.69, + "eval_accuracy": 1.0, + "eval_loss": 6.218389899004251e-05, + "eval_runtime": 124.6068, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 650 + }, + { + "epoch": 3.75, + "learning_rate": 9.625568181818182e-05, + "loss": 0.0001, + "step": 660 + }, + { + "epoch": 3.75, + "eval_accuracy": 1.0, + "eval_loss": 5.854462870047428e-05, + "eval_runtime": 124.4852, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 660 + }, + { + "epoch": 3.81, + "learning_rate": 9.619886363636364e-05, + "loss": 0.0001, + "step": 670 + }, + { + "epoch": 3.81, + "eval_accuracy": 1.0, + "eval_loss": 5.642731048283167e-05, + "eval_runtime": 124.5552, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.707, + "step": 670 + }, + { + "epoch": 3.86, + "learning_rate": 9.614204545454546e-05, + "loss": 0.0001, + "step": 680 + }, + { + "epoch": 3.86, + "eval_accuracy": 1.0, + "eval_loss": 5.486133522936143e-05, + "eval_runtime": 124.2343, + "eval_samples_per_second": 2.833, + "eval_steps_per_second": 0.708, + "step": 680 + }, + { + "epoch": 3.92, + "learning_rate": 9.608522727272728e-05, + "loss": 0.0001, + "step": 690 + }, + { + "epoch": 3.92, + "eval_accuracy": 1.0, + "eval_loss": 5.353038795874454e-05, + "eval_runtime": 123.9216, + "eval_samples_per_second": 2.841, + "eval_steps_per_second": 0.71, + "step": 690 + }, + { + "epoch": 3.98, + "learning_rate": 9.60284090909091e-05, + "loss": 0.0001, + "step": 700 + }, + { + "epoch": 3.98, + "eval_accuracy": 1.0, + "eval_loss": 5.2431427320698276e-05, + "eval_runtime": 124.0825, + "eval_samples_per_second": 2.837, + "eval_steps_per_second": 0.709, + "step": 700 + }, + { + "epoch": 4.03, + "learning_rate": 9.597159090909091e-05, + "loss": 0.0001, + "step": 710 + }, + { + "epoch": 4.03, + "eval_accuracy": 1.0, + "eval_loss": 5.1352788432268426e-05, + "eval_runtime": 124.1823, + "eval_samples_per_second": 2.835, + "eval_steps_per_second": 0.709, + "step": 710 + }, + { + "epoch": 4.09, + "learning_rate": 9.591477272727273e-05, + "loss": 0.0001, + "step": 720 + }, + { + "epoch": 4.09, + "eval_accuracy": 1.0, + "eval_loss": 5.032731860410422e-05, + "eval_runtime": 124.2689, + "eval_samples_per_second": 2.833, + "eval_steps_per_second": 0.708, + "step": 720 + }, + { + "epoch": 4.15, + "learning_rate": 9.585795454545455e-05, + "loss": 0.0001, + "step": 730 + }, + { + "epoch": 4.15, + "eval_accuracy": 1.0, + "eval_loss": 4.935535616823472e-05, + "eval_runtime": 124.5267, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 730 + }, + { + "epoch": 4.2, + "learning_rate": 9.580113636363637e-05, + "loss": 0.0001, + "step": 740 + }, + { + "epoch": 4.2, + "eval_accuracy": 1.0, + "eval_loss": 4.844536670134403e-05, + "eval_runtime": 124.2279, + "eval_samples_per_second": 2.834, + "eval_steps_per_second": 0.708, + "step": 740 + }, + { + "epoch": 4.26, + "learning_rate": 9.574431818181818e-05, + "loss": 0.0001, + "step": 750 + }, + { + "epoch": 4.26, + "eval_accuracy": 1.0, + "eval_loss": 4.755400732392445e-05, + "eval_runtime": 124.1201, + "eval_samples_per_second": 2.836, + "eval_steps_per_second": 0.709, + "step": 750 + }, + { + "epoch": 4.32, + "learning_rate": 9.56875e-05, + "loss": 0.0001, + "step": 760 + }, + { + "epoch": 4.32, + "eval_accuracy": 1.0, + "eval_loss": 4.6653844037791714e-05, + "eval_runtime": 124.3199, + "eval_samples_per_second": 2.831, + "eval_steps_per_second": 0.708, + "step": 760 + }, + { + "epoch": 4.38, + "learning_rate": 9.563068181818182e-05, + "loss": 0.0001, + "step": 770 + }, + { + "epoch": 4.38, + "eval_accuracy": 1.0, + "eval_loss": 4.573403566610068e-05, + "eval_runtime": 123.9908, + "eval_samples_per_second": 2.839, + "eval_steps_per_second": 0.71, + "step": 770 + }, + { + "epoch": 4.43, + "learning_rate": 9.557386363636364e-05, + "loss": 0.0001, + "step": 780 + }, + { + "epoch": 4.43, + "eval_accuracy": 1.0, + "eval_loss": 4.478611663216725e-05, + "eval_runtime": 124.2854, + "eval_samples_per_second": 2.832, + "eval_steps_per_second": 0.708, + "step": 780 + }, + { + "epoch": 4.49, + "learning_rate": 9.551704545454546e-05, + "loss": 0.0001, + "step": 790 + }, + { + "epoch": 4.49, + "eval_accuracy": 1.0, + "eval_loss": 4.390085450722836e-05, + "eval_runtime": 123.9649, + "eval_samples_per_second": 2.84, + "eval_steps_per_second": 0.71, + "step": 790 + }, + { + "epoch": 4.55, + "learning_rate": 9.546022727272727e-05, + "loss": 0.0001, + "step": 800 + }, + { + "epoch": 4.55, + "eval_accuracy": 1.0, + "eval_loss": 4.302134402678348e-05, + "eval_runtime": 123.7503, + "eval_samples_per_second": 2.844, + "eval_steps_per_second": 0.711, + "step": 800 + }, + { + "epoch": 4.6, + "learning_rate": 9.540340909090909e-05, + "loss": 0.0001, + "step": 810 + }, + { + "epoch": 4.6, + "eval_accuracy": 1.0, + "eval_loss": 4.222006828058511e-05, + "eval_runtime": 123.7057, + "eval_samples_per_second": 2.845, + "eval_steps_per_second": 0.711, + "step": 810 + }, + { + "epoch": 4.66, + "learning_rate": 9.534659090909091e-05, + "loss": 0.0001, + "step": 820 + }, + { + "epoch": 4.66, + "eval_accuracy": 1.0, + "eval_loss": 4.144520789850503e-05, + "eval_runtime": 124.0381, + "eval_samples_per_second": 2.838, + "eval_steps_per_second": 0.709, + "step": 820 + }, + { + "epoch": 4.72, + "learning_rate": 9.528977272727273e-05, + "loss": 0.0001, + "step": 830 + }, + { + "epoch": 4.72, + "eval_accuracy": 1.0, + "eval_loss": 4.1195613448508084e-05, + "eval_runtime": 124.067, + "eval_samples_per_second": 2.837, + "eval_steps_per_second": 0.709, + "step": 830 + }, + { + "epoch": 4.77, + "learning_rate": 9.523295454545455e-05, + "loss": 0.0001, + "step": 840 + }, + { + "epoch": 4.77, + "eval_accuracy": 1.0, + "eval_loss": 4.0973452996695414e-05, + "eval_runtime": 124.145, + "eval_samples_per_second": 2.835, + "eval_steps_per_second": 0.709, + "step": 840 + }, + { + "epoch": 4.83, + "learning_rate": 9.517613636363636e-05, + "loss": 0.0001, + "step": 850 + }, + { + "epoch": 4.83, + "eval_accuracy": 1.0, + "eval_loss": 4.052269287058152e-05, + "eval_runtime": 123.9384, + "eval_samples_per_second": 2.84, + "eval_steps_per_second": 0.71, + "step": 850 + }, + { + "epoch": 4.89, + "learning_rate": 9.511931818181818e-05, + "loss": 0.0, + "step": 860 + }, + { + "epoch": 4.89, + "eval_accuracy": 1.0, + "eval_loss": 3.994222424807958e-05, + "eval_runtime": 124.6943, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 860 + }, + { + "epoch": 4.94, + "learning_rate": 9.506250000000001e-05, + "loss": 0.0001, + "step": 870 + }, + { + "epoch": 4.94, + "eval_accuracy": 1.0, + "eval_loss": 3.9259142795344815e-05, + "eval_runtime": 124.7191, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 870 + }, + { + "epoch": 5.0, + "learning_rate": 9.500568181818182e-05, + "loss": 0.0001, + "step": 880 + }, + { + "epoch": 5.0, + "eval_accuracy": 1.0, + "eval_loss": 3.84582053811755e-05, + "eval_runtime": 124.0551, + "eval_samples_per_second": 2.837, + "eval_steps_per_second": 0.709, + "step": 880 + }, + { + "epoch": 5.06, + "learning_rate": 9.494886363636364e-05, + "loss": 0.0, + "step": 890 + }, + { + "epoch": 5.06, + "eval_accuracy": 1.0, + "eval_loss": 3.770332477870397e-05, + "eval_runtime": 123.9923, + "eval_samples_per_second": 2.839, + "eval_steps_per_second": 0.71, + "step": 890 + }, + { + "epoch": 5.11, + "learning_rate": 9.489204545454547e-05, + "loss": 0.0001, + "step": 900 + }, + { + "epoch": 5.11, + "eval_accuracy": 1.0, + "eval_loss": 3.6997549614170566e-05, + "eval_runtime": 124.046, + "eval_samples_per_second": 2.838, + "eval_steps_per_second": 0.709, + "step": 900 + }, + { + "epoch": 5.17, + "learning_rate": 9.483522727272727e-05, + "loss": 0.0, + "step": 910 + }, + { + "epoch": 5.17, + "eval_accuracy": 1.0, + "eval_loss": 3.6393714253790677e-05, + "eval_runtime": 123.9806, + "eval_samples_per_second": 2.839, + "eval_steps_per_second": 0.71, + "step": 910 + }, + { + "epoch": 5.23, + "learning_rate": 9.477840909090909e-05, + "loss": 0.0001, + "step": 920 + }, + { + "epoch": 5.23, + "eval_accuracy": 1.0, + "eval_loss": 3.5763125197263435e-05, + "eval_runtime": 123.9937, + "eval_samples_per_second": 2.839, + "eval_steps_per_second": 0.71, + "step": 920 + }, + { + "epoch": 5.28, + "learning_rate": 9.472159090909092e-05, + "loss": 0.0, + "step": 930 + }, + { + "epoch": 5.28, + "eval_accuracy": 1.0, + "eval_loss": 3.5139310057275e-05, + "eval_runtime": 124.8468, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 930 + }, + { + "epoch": 5.34, + "learning_rate": 9.466477272727273e-05, + "loss": 0.0, + "step": 940 + }, + { + "epoch": 5.34, + "eval_accuracy": 1.0, + "eval_loss": 3.462183303781785e-05, + "eval_runtime": 124.4635, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 940 + }, + { + "epoch": 5.4, + "learning_rate": 9.460795454545454e-05, + "loss": 0.0, + "step": 950 + }, + { + "epoch": 5.4, + "eval_accuracy": 1.0, + "eval_loss": 3.411044963286258e-05, + "eval_runtime": 124.0197, + "eval_samples_per_second": 2.838, + "eval_steps_per_second": 0.71, + "step": 950 + }, + { + "epoch": 5.45, + "learning_rate": 9.455113636363638e-05, + "loss": 0.0001, + "step": 960 + }, + { + "epoch": 5.45, + "eval_accuracy": 1.0, + "eval_loss": 3.347071833559312e-05, + "eval_runtime": 124.8135, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 960 + }, + { + "epoch": 5.51, + "learning_rate": 9.449431818181818e-05, + "loss": 0.0, + "step": 970 + }, + { + "epoch": 5.51, + "eval_accuracy": 1.0, + "eval_loss": 3.285875209257938e-05, + "eval_runtime": 124.1875, + "eval_samples_per_second": 2.834, + "eval_steps_per_second": 0.709, + "step": 970 + }, + { + "epoch": 5.57, + "learning_rate": 9.44375e-05, + "loss": 0.0, + "step": 980 + }, + { + "epoch": 5.57, + "eval_accuracy": 1.0, + "eval_loss": 3.2389707484981045e-05, + "eval_runtime": 123.713, + "eval_samples_per_second": 2.845, + "eval_steps_per_second": 0.711, + "step": 980 + }, + { + "epoch": 5.62, + "learning_rate": 9.438068181818183e-05, + "loss": 0.0001, + "step": 990 + }, + { + "epoch": 5.62, + "eval_accuracy": 1.0, + "eval_loss": 3.18857746606227e-05, + "eval_runtime": 124.1113, + "eval_samples_per_second": 2.836, + "eval_steps_per_second": 0.709, + "step": 990 + }, + { + "epoch": 5.68, + "learning_rate": 9.432386363636363e-05, + "loss": 0.0, + "step": 1000 + }, + { + "epoch": 5.68, + "eval_accuracy": 1.0, + "eval_loss": 3.1367620977107435e-05, + "eval_runtime": 124.9427, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 1000 + }, + { + "epoch": 5.74, + "learning_rate": 9.426704545454545e-05, + "loss": 0.0001, + "step": 1010 + }, + { + "epoch": 5.74, + "eval_accuracy": 1.0, + "eval_loss": 3.0681829230161384e-05, + "eval_runtime": 124.3354, + "eval_samples_per_second": 2.831, + "eval_steps_per_second": 0.708, + "step": 1010 + }, + { + "epoch": 5.8, + "learning_rate": 9.421022727272728e-05, + "loss": 0.0, + "step": 1020 + }, + { + "epoch": 5.8, + "eval_accuracy": 1.0, + "eval_loss": 3.010847285622731e-05, + "eval_runtime": 123.7527, + "eval_samples_per_second": 2.844, + "eval_steps_per_second": 0.711, + "step": 1020 + }, + { + "epoch": 5.85, + "learning_rate": 9.415340909090909e-05, + "loss": 0.0, + "step": 1030 + }, + { + "epoch": 5.85, + "eval_accuracy": 1.0, + "eval_loss": 2.962587859656196e-05, + "eval_runtime": 124.0826, + "eval_samples_per_second": 2.837, + "eval_steps_per_second": 0.709, + "step": 1030 + }, + { + "epoch": 5.91, + "learning_rate": 9.40965909090909e-05, + "loss": 0.0, + "step": 1040 + }, + { + "epoch": 5.91, + "eval_accuracy": 1.0, + "eval_loss": 2.9179860575823113e-05, + "eval_runtime": 124.0611, + "eval_samples_per_second": 2.837, + "eval_steps_per_second": 0.709, + "step": 1040 + }, + { + "epoch": 5.97, + "learning_rate": 9.403977272727274e-05, + "loss": 0.0, + "step": 1050 + }, + { + "epoch": 5.97, + "eval_accuracy": 1.0, + "eval_loss": 2.8774480597348884e-05, + "eval_runtime": 123.7212, + "eval_samples_per_second": 2.845, + "eval_steps_per_second": 0.711, + "step": 1050 + }, + { + "epoch": 6.02, + "learning_rate": 9.398295454545454e-05, + "loss": 0.0, + "step": 1060 + }, + { + "epoch": 6.02, + "eval_accuracy": 1.0, + "eval_loss": 2.8411095627234317e-05, + "eval_runtime": 123.7259, + "eval_samples_per_second": 2.845, + "eval_steps_per_second": 0.711, + "step": 1060 + }, + { + "epoch": 6.08, + "learning_rate": 9.392613636363636e-05, + "loss": 0.0, + "step": 1070 + }, + { + "epoch": 6.08, + "eval_accuracy": 1.0, + "eval_loss": 2.8028407541569322e-05, + "eval_runtime": 123.5911, + "eval_samples_per_second": 2.848, + "eval_steps_per_second": 0.712, + "step": 1070 + }, + { + "epoch": 6.14, + "learning_rate": 9.386931818181819e-05, + "loss": 0.0, + "step": 1080 + }, + { + "epoch": 6.14, + "eval_accuracy": 1.0, + "eval_loss": 2.761794712569099e-05, + "eval_runtime": 124.1904, + "eval_samples_per_second": 2.834, + "eval_steps_per_second": 0.709, + "step": 1080 + }, + { + "epoch": 6.19, + "learning_rate": 9.38125e-05, + "loss": 0.0, + "step": 1090 + }, + { + "epoch": 6.19, + "eval_accuracy": 1.0, + "eval_loss": 2.7216970920562744e-05, + "eval_runtime": 123.8019, + "eval_samples_per_second": 2.843, + "eval_steps_per_second": 0.711, + "step": 1090 + }, + { + "epoch": 6.25, + "learning_rate": 9.375568181818181e-05, + "loss": 0.0, + "step": 1100 + }, + { + "epoch": 6.25, + "eval_accuracy": 1.0, + "eval_loss": 2.683699131011963e-05, + "eval_runtime": 124.6983, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 1100 + }, + { + "epoch": 6.31, + "learning_rate": 9.369886363636365e-05, + "loss": 0.0, + "step": 1110 + }, + { + "epoch": 6.31, + "eval_accuracy": 1.0, + "eval_loss": 2.6444819013704546e-05, + "eval_runtime": 124.2605, + "eval_samples_per_second": 2.833, + "eval_steps_per_second": 0.708, + "step": 1110 + }, + { + "epoch": 6.36, + "learning_rate": 9.364204545454545e-05, + "loss": 0.0, + "step": 1120 + }, + { + "epoch": 6.36, + "eval_accuracy": 1.0, + "eval_loss": 2.6035037080873735e-05, + "eval_runtime": 124.157, + "eval_samples_per_second": 2.835, + "eval_steps_per_second": 0.709, + "step": 1120 + }, + { + "epoch": 6.42, + "learning_rate": 9.358522727272728e-05, + "loss": 0.0, + "step": 1130 + }, + { + "epoch": 6.42, + "eval_accuracy": 1.0, + "eval_loss": 2.55727627518354e-05, + "eval_runtime": 124.4467, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 1130 + }, + { + "epoch": 6.48, + "learning_rate": 9.35284090909091e-05, + "loss": 0.0, + "step": 1140 + }, + { + "epoch": 6.48, + "eval_accuracy": 1.0, + "eval_loss": 2.514604784664698e-05, + "eval_runtime": 124.177, + "eval_samples_per_second": 2.835, + "eval_steps_per_second": 0.709, + "step": 1140 + }, + { + "epoch": 6.53, + "learning_rate": 9.34715909090909e-05, + "loss": 0.0, + "step": 1150 + }, + { + "epoch": 6.53, + "eval_accuracy": 1.0, + "eval_loss": 2.4763698093011044e-05, + "eval_runtime": 124.4695, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 1150 + }, + { + "epoch": 6.59, + "learning_rate": 9.341477272727274e-05, + "loss": 0.0, + "step": 1160 + }, + { + "epoch": 6.59, + "eval_accuracy": 1.0, + "eval_loss": 2.440539356030058e-05, + "eval_runtime": 124.5, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 1160 + }, + { + "epoch": 6.65, + "learning_rate": 9.335795454545455e-05, + "loss": 0.0, + "step": 1170 + }, + { + "epoch": 6.65, + "eval_accuracy": 1.0, + "eval_loss": 2.405690793239046e-05, + "eval_runtime": 124.2578, + "eval_samples_per_second": 2.833, + "eval_steps_per_second": 0.708, + "step": 1170 + }, + { + "epoch": 6.7, + "learning_rate": 9.330113636363636e-05, + "loss": 0.0, + "step": 1180 + }, + { + "epoch": 6.7, + "eval_accuracy": 1.0, + "eval_loss": 2.37019903579494e-05, + "eval_runtime": 123.8934, + "eval_samples_per_second": 2.841, + "eval_steps_per_second": 0.71, + "step": 1180 + }, + { + "epoch": 6.76, + "learning_rate": 9.324431818181819e-05, + "loss": 0.0, + "step": 1190 + }, + { + "epoch": 6.76, + "eval_accuracy": 1.0, + "eval_loss": 2.3363327272818424e-05, + "eval_runtime": 124.1347, + "eval_samples_per_second": 2.836, + "eval_steps_per_second": 0.709, + "step": 1190 + }, + { + "epoch": 6.82, + "learning_rate": 9.318750000000001e-05, + "loss": 0.0, + "step": 1200 + }, + { + "epoch": 6.82, + "eval_accuracy": 1.0, + "eval_loss": 2.304430745425634e-05, + "eval_runtime": 124.2938, + "eval_samples_per_second": 2.832, + "eval_steps_per_second": 0.708, + "step": 1200 + }, + { + "epoch": 6.88, + "learning_rate": 9.313068181818181e-05, + "loss": 0.0, + "step": 1210 + }, + { + "epoch": 6.88, + "eval_accuracy": 1.0, + "eval_loss": 2.275237966387067e-05, + "eval_runtime": 124.9269, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 1210 + }, + { + "epoch": 6.93, + "learning_rate": 9.307386363636364e-05, + "loss": 0.0, + "step": 1220 + }, + { + "epoch": 6.93, + "eval_accuracy": 1.0, + "eval_loss": 2.2431328034144826e-05, + "eval_runtime": 123.9363, + "eval_samples_per_second": 2.84, + "eval_steps_per_second": 0.71, + "step": 1220 + }, + { + "epoch": 6.99, + "learning_rate": 9.301704545454546e-05, + "loss": 0.0, + "step": 1230 + }, + { + "epoch": 6.99, + "eval_accuracy": 1.0, + "eval_loss": 2.215294625784736e-05, + "eval_runtime": 124.0994, + "eval_samples_per_second": 2.836, + "eval_steps_per_second": 0.709, + "step": 1230 + }, + { + "epoch": 7.05, + "learning_rate": 9.296022727272727e-05, + "loss": 0.0, + "step": 1240 + }, + { + "epoch": 7.05, + "eval_accuracy": 1.0, + "eval_loss": 2.197108551627025e-05, + "eval_runtime": 124.1454, + "eval_samples_per_second": 2.835, + "eval_steps_per_second": 0.709, + "step": 1240 + }, + { + "epoch": 7.1, + "learning_rate": 9.29034090909091e-05, + "loss": 0.0152, + "step": 1250 + }, + { + "epoch": 7.1, + "eval_accuracy": 0.9801136255264282, + "eval_loss": 0.14261282980442047, + "eval_runtime": 123.7372, + "eval_samples_per_second": 2.845, + "eval_steps_per_second": 0.711, + "step": 1250 + }, + { + "epoch": 7.16, + "learning_rate": 9.284659090909092e-05, + "loss": 0.2832, + "step": 1260 + }, + { + "epoch": 7.16, + "eval_accuracy": 0.8920454382896423, + "eval_loss": 0.9508521556854248, + "eval_runtime": 123.7844, + "eval_samples_per_second": 2.844, + "eval_steps_per_second": 0.711, + "step": 1260 + }, + { + "epoch": 7.22, + "learning_rate": 9.278977272727272e-05, + "loss": 0.167, + "step": 1270 + }, + { + "epoch": 7.22, + "eval_accuracy": 0.9772727489471436, + "eval_loss": 0.08081092685461044, + "eval_runtime": 124.0375, + "eval_samples_per_second": 2.838, + "eval_steps_per_second": 0.709, + "step": 1270 + }, + { + "epoch": 7.27, + "learning_rate": 9.273295454545455e-05, + "loss": 0.1159, + "step": 1280 + }, + { + "epoch": 7.27, + "eval_accuracy": 0.9772727489471436, + "eval_loss": 0.08310722559690475, + "eval_runtime": 124.1607, + "eval_samples_per_second": 2.835, + "eval_steps_per_second": 0.709, + "step": 1280 + }, + { + "epoch": 7.33, + "learning_rate": 9.267613636363637e-05, + "loss": 0.1013, + "step": 1290 + }, + { + "epoch": 7.33, + "eval_accuracy": 0.9630681872367859, + "eval_loss": 0.27514922618865967, + "eval_runtime": 123.8811, + "eval_samples_per_second": 2.841, + "eval_steps_per_second": 0.71, + "step": 1290 + }, + { + "epoch": 7.39, + "learning_rate": 9.261931818181818e-05, + "loss": 0.087, + "step": 1300 + }, + { + "epoch": 7.39, + "eval_accuracy": 0.9914772510528564, + "eval_loss": 0.04930020496249199, + "eval_runtime": 124.486, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 1300 + }, + { + "epoch": 7.44, + "learning_rate": 9.256250000000001e-05, + "loss": 0.0125, + "step": 1310 + }, + { + "epoch": 7.44, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02058441750705242, + "eval_runtime": 124.4019, + "eval_samples_per_second": 2.83, + "eval_steps_per_second": 0.707, + "step": 1310 + }, + { + "epoch": 7.5, + "learning_rate": 9.250568181818183e-05, + "loss": 0.0235, + "step": 1320 + }, + { + "epoch": 7.5, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.031971585005521774, + "eval_runtime": 124.1968, + "eval_samples_per_second": 2.834, + "eval_steps_per_second": 0.709, + "step": 1320 + }, + { + "epoch": 7.56, + "learning_rate": 9.244886363636363e-05, + "loss": 0.0003, + "step": 1330 + }, + { + "epoch": 7.56, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.032685697078704834, + "eval_runtime": 124.3057, + "eval_samples_per_second": 2.832, + "eval_steps_per_second": 0.708, + "step": 1330 + }, + { + "epoch": 7.61, + "learning_rate": 9.239772727272727e-05, + "loss": 0.0109, + "step": 1340 + }, + { + "epoch": 7.61, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.04043057933449745, + "eval_runtime": 124.3394, + "eval_samples_per_second": 2.831, + "eval_steps_per_second": 0.708, + "step": 1340 + }, + { + "epoch": 7.67, + "learning_rate": 9.23409090909091e-05, + "loss": 0.0003, + "step": 1350 + }, + { + "epoch": 7.67, + "eval_accuracy": 0.9914772510528564, + "eval_loss": 0.05372649431228638, + "eval_runtime": 124.765, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 1350 + }, + { + "epoch": 7.73, + "learning_rate": 9.228409090909091e-05, + "loss": 0.0001, + "step": 1360 + }, + { + "epoch": 7.73, + "eval_accuracy": 0.9914772510528564, + "eval_loss": 0.05597938597202301, + "eval_runtime": 124.4405, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 1360 + }, + { + "epoch": 7.78, + "learning_rate": 9.222727272727273e-05, + "loss": 0.0002, + "step": 1370 + }, + { + "epoch": 7.78, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.041254255920648575, + "eval_runtime": 124.2043, + "eval_samples_per_second": 2.834, + "eval_steps_per_second": 0.709, + "step": 1370 + }, + { + "epoch": 7.84, + "learning_rate": 9.217613636363637e-05, + "loss": 0.1607, + "step": 1380 + }, + { + "epoch": 7.84, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022855179384350777, + "eval_runtime": 124.3279, + "eval_samples_per_second": 2.831, + "eval_steps_per_second": 0.708, + "step": 1380 + }, + { + "epoch": 7.9, + "learning_rate": 9.211931818181818e-05, + "loss": 0.0001, + "step": 1390 + }, + { + "epoch": 7.9, + "eval_accuracy": 0.9829545617103577, + "eval_loss": 0.12740494310855865, + "eval_runtime": 124.3624, + "eval_samples_per_second": 2.83, + "eval_steps_per_second": 0.708, + "step": 1390 + }, + { + "epoch": 7.95, + "learning_rate": 9.206250000000001e-05, + "loss": 0.1101, + "step": 1400 + }, + { + "epoch": 7.95, + "eval_accuracy": 0.9829545617103577, + "eval_loss": 0.06436696648597717, + "eval_runtime": 125.1936, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 1400 + }, + { + "epoch": 8.01, + "learning_rate": 9.200568181818183e-05, + "loss": 0.1208, + "step": 1410 + }, + { + "epoch": 8.01, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.01757746934890747, + "eval_runtime": 125.3438, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 1410 + }, + { + "epoch": 8.07, + "learning_rate": 9.194886363636363e-05, + "loss": 0.0017, + "step": 1420 + }, + { + "epoch": 8.07, + "eval_accuracy": 1.0, + "eval_loss": 0.0008121105493046343, + "eval_runtime": 124.5886, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 1420 + }, + { + "epoch": 8.12, + "learning_rate": 9.189204545454546e-05, + "loss": 0.0006, + "step": 1430 + }, + { + "epoch": 8.12, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.006536738481372595, + "eval_runtime": 124.8501, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 1430 + }, + { + "epoch": 8.18, + "learning_rate": 9.183522727272728e-05, + "loss": 0.0034, + "step": 1440 + }, + { + "epoch": 8.18, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0141005152836442, + "eval_runtime": 125.1039, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 1440 + }, + { + "epoch": 8.24, + "learning_rate": 9.177840909090908e-05, + "loss": 0.0003, + "step": 1450 + }, + { + "epoch": 8.24, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.02440841682255268, + "eval_runtime": 124.6568, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 1450 + }, + { + "epoch": 8.3, + "learning_rate": 9.172159090909092e-05, + "loss": 0.0505, + "step": 1460 + }, + { + "epoch": 8.3, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.010290426202118397, + "eval_runtime": 125.0089, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 1460 + }, + { + "epoch": 8.35, + "learning_rate": 9.166477272727273e-05, + "loss": 0.3288, + "step": 1470 + }, + { + "epoch": 8.35, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.014201739802956581, + "eval_runtime": 124.7285, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 1470 + }, + { + "epoch": 8.41, + "learning_rate": 9.160795454545455e-05, + "loss": 0.0055, + "step": 1480 + }, + { + "epoch": 8.41, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.013243576511740685, + "eval_runtime": 124.726, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 1480 + }, + { + "epoch": 8.47, + "learning_rate": 9.155113636363637e-05, + "loss": 0.1189, + "step": 1490 + }, + { + "epoch": 8.47, + "eval_accuracy": 1.0, + "eval_loss": 0.004915300291031599, + "eval_runtime": 124.6749, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 1490 + }, + { + "epoch": 8.52, + "learning_rate": 9.149431818181819e-05, + "loss": 0.005, + "step": 1500 + }, + { + "epoch": 8.52, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.005603615660220385, + "eval_runtime": 125.0546, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 1500 + }, + { + "epoch": 8.58, + "learning_rate": 9.14375e-05, + "loss": 0.0007, + "step": 1510 + }, + { + "epoch": 8.58, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.014124225825071335, + "eval_runtime": 124.7126, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 1510 + }, + { + "epoch": 8.64, + "learning_rate": 9.138068181818182e-05, + "loss": 0.0004, + "step": 1520 + }, + { + "epoch": 8.64, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016052501276135445, + "eval_runtime": 124.9532, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 1520 + }, + { + "epoch": 8.69, + "learning_rate": 9.132386363636364e-05, + "loss": 0.0004, + "step": 1530 + }, + { + "epoch": 8.69, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016775131225585938, + "eval_runtime": 124.6637, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 1530 + }, + { + "epoch": 8.75, + "learning_rate": 9.126704545454546e-05, + "loss": 0.0004, + "step": 1540 + }, + { + "epoch": 8.75, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016452878713607788, + "eval_runtime": 124.9073, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 1540 + }, + { + "epoch": 8.81, + "learning_rate": 9.121022727272728e-05, + "loss": 0.0032, + "step": 1550 + }, + { + "epoch": 8.81, + "eval_accuracy": 0.8920454382896423, + "eval_loss": 0.43611031770706177, + "eval_runtime": 124.7311, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 1550 + }, + { + "epoch": 8.86, + "learning_rate": 9.11534090909091e-05, + "loss": 0.0131, + "step": 1560 + }, + { + "epoch": 8.86, + "eval_accuracy": 1.0, + "eval_loss": 0.0009678713977336884, + "eval_runtime": 124.4853, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 1560 + }, + { + "epoch": 8.92, + "learning_rate": 9.109659090909091e-05, + "loss": 0.0003, + "step": 1570 + }, + { + "epoch": 8.92, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.007599758915603161, + "eval_runtime": 124.9752, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 1570 + }, + { + "epoch": 8.98, + "learning_rate": 9.103977272727273e-05, + "loss": 0.0003, + "step": 1580 + }, + { + "epoch": 8.98, + "eval_accuracy": 0.9914772510528564, + "eval_loss": 0.016397761180996895, + "eval_runtime": 125.7125, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 1580 + }, + { + "epoch": 9.03, + "learning_rate": 9.098295454545455e-05, + "loss": 0.0005, + "step": 1590 + }, + { + "epoch": 9.03, + "eval_accuracy": 0.9914772510528564, + "eval_loss": 0.01749684102833271, + "eval_runtime": 125.127, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 1590 + }, + { + "epoch": 9.09, + "learning_rate": 9.092613636363637e-05, + "loss": 0.0003, + "step": 1600 + }, + { + "epoch": 9.09, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.01627124845981598, + "eval_runtime": 124.4898, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 1600 + }, + { + "epoch": 9.15, + "learning_rate": 9.086931818181819e-05, + "loss": 0.0003, + "step": 1610 + }, + { + "epoch": 9.15, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.013749724254012108, + "eval_runtime": 124.661, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 1610 + }, + { + "epoch": 9.2, + "learning_rate": 9.08125e-05, + "loss": 0.0002, + "step": 1620 + }, + { + "epoch": 9.2, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.011139818467199802, + "eval_runtime": 124.6377, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 1620 + }, + { + "epoch": 9.26, + "learning_rate": 9.075568181818182e-05, + "loss": 0.0002, + "step": 1630 + }, + { + "epoch": 9.26, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.007344536948949099, + "eval_runtime": 124.7363, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 1630 + }, + { + "epoch": 9.32, + "learning_rate": 9.069886363636364e-05, + "loss": 0.0001, + "step": 1640 + }, + { + "epoch": 9.32, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0050169010646641254, + "eval_runtime": 125.0169, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 1640 + }, + { + "epoch": 9.38, + "learning_rate": 9.064204545454546e-05, + "loss": 0.0002, + "step": 1650 + }, + { + "epoch": 9.38, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.003527298802509904, + "eval_runtime": 124.5992, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 1650 + }, + { + "epoch": 9.43, + "learning_rate": 9.058522727272728e-05, + "loss": 0.0001, + "step": 1660 + }, + { + "epoch": 9.43, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0028742486611008644, + "eval_runtime": 125.2365, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 1660 + }, + { + "epoch": 9.49, + "learning_rate": 9.05284090909091e-05, + "loss": 0.0001, + "step": 1670 + }, + { + "epoch": 9.49, + "eval_accuracy": 1.0, + "eval_loss": 0.002490126295015216, + "eval_runtime": 125.6634, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 1670 + }, + { + "epoch": 9.55, + "learning_rate": 9.047159090909091e-05, + "loss": 0.0001, + "step": 1680 + }, + { + "epoch": 9.55, + "eval_accuracy": 1.0, + "eval_loss": 0.0018948238575831056, + "eval_runtime": 125.2203, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 1680 + }, + { + "epoch": 9.6, + "learning_rate": 9.041477272727273e-05, + "loss": 0.0001, + "step": 1690 + }, + { + "epoch": 9.6, + "eval_accuracy": 1.0, + "eval_loss": 0.0013538467464968562, + "eval_runtime": 124.8264, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 1690 + }, + { + "epoch": 9.66, + "learning_rate": 9.035795454545455e-05, + "loss": 0.0923, + "step": 1700 + }, + { + "epoch": 9.66, + "eval_accuracy": 1.0, + "eval_loss": 0.0013651837361976504, + "eval_runtime": 125.2225, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 1700 + }, + { + "epoch": 9.72, + "learning_rate": 9.030113636363637e-05, + "loss": 0.0001, + "step": 1710 + }, + { + "epoch": 9.72, + "eval_accuracy": 1.0, + "eval_loss": 0.0017142099095508456, + "eval_runtime": 125.3157, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 1710 + }, + { + "epoch": 9.77, + "learning_rate": 9.024431818181819e-05, + "loss": 0.0001, + "step": 1720 + }, + { + "epoch": 9.77, + "eval_accuracy": 1.0, + "eval_loss": 0.0015917017590254545, + "eval_runtime": 125.2455, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 1720 + }, + { + "epoch": 9.83, + "learning_rate": 9.01875e-05, + "loss": 0.0002, + "step": 1730 + }, + { + "epoch": 9.83, + "eval_accuracy": 1.0, + "eval_loss": 0.0011166059412062168, + "eval_runtime": 125.2464, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 1730 + }, + { + "epoch": 9.89, + "learning_rate": 9.013068181818182e-05, + "loss": 0.0001, + "step": 1740 + }, + { + "epoch": 9.89, + "eval_accuracy": 1.0, + "eval_loss": 0.0006581758498214185, + "eval_runtime": 125.1352, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 1740 + }, + { + "epoch": 9.94, + "learning_rate": 9.007386363636364e-05, + "loss": 0.0001, + "step": 1750 + }, + { + "epoch": 9.94, + "eval_accuracy": 1.0, + "eval_loss": 0.00047917800839059055, + "eval_runtime": 124.7846, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 1750 + }, + { + "epoch": 10.0, + "learning_rate": 9.001704545454546e-05, + "loss": 0.0001, + "step": 1760 + }, + { + "epoch": 10.0, + "eval_accuracy": 1.0, + "eval_loss": 0.000382251018891111, + "eval_runtime": 124.834, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 1760 + }, + { + "epoch": 10.06, + "learning_rate": 8.996022727272728e-05, + "loss": 0.0001, + "step": 1770 + }, + { + "epoch": 10.06, + "eval_accuracy": 1.0, + "eval_loss": 0.00027223912184126675, + "eval_runtime": 124.8266, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 1770 + }, + { + "epoch": 10.11, + "learning_rate": 8.99034090909091e-05, + "loss": 0.0001, + "step": 1780 + }, + { + "epoch": 10.11, + "eval_accuracy": 1.0, + "eval_loss": 0.00022248517780099064, + "eval_runtime": 124.8269, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 1780 + }, + { + "epoch": 10.17, + "learning_rate": 8.984659090909091e-05, + "loss": 0.0001, + "step": 1790 + }, + { + "epoch": 10.17, + "eval_accuracy": 1.0, + "eval_loss": 0.00019708242325577885, + "eval_runtime": 125.1717, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 1790 + }, + { + "epoch": 10.23, + "learning_rate": 8.978977272727273e-05, + "loss": 0.0001, + "step": 1800 + }, + { + "epoch": 10.23, + "eval_accuracy": 1.0, + "eval_loss": 0.0001719675346976146, + "eval_runtime": 124.8232, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 1800 + }, + { + "epoch": 10.28, + "learning_rate": 8.973295454545455e-05, + "loss": 0.0001, + "step": 1810 + }, + { + "epoch": 10.28, + "eval_accuracy": 1.0, + "eval_loss": 0.0001546804851386696, + "eval_runtime": 125.7629, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 1810 + }, + { + "epoch": 10.34, + "learning_rate": 8.967613636363637e-05, + "loss": 0.0001, + "step": 1820 + }, + { + "epoch": 10.34, + "eval_accuracy": 1.0, + "eval_loss": 0.00014202127931639552, + "eval_runtime": 125.1643, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 1820 + }, + { + "epoch": 10.4, + "learning_rate": 8.961931818181818e-05, + "loss": 0.0001, + "step": 1830 + }, + { + "epoch": 10.4, + "eval_accuracy": 1.0, + "eval_loss": 0.00013119998038746417, + "eval_runtime": 125.1334, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 1830 + }, + { + "epoch": 10.45, + "learning_rate": 8.95625e-05, + "loss": 0.1166, + "step": 1840 + }, + { + "epoch": 10.45, + "eval_accuracy": 1.0, + "eval_loss": 0.00037732484634034336, + "eval_runtime": 125.1909, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 1840 + }, + { + "epoch": 10.51, + "learning_rate": 8.950568181818182e-05, + "loss": 0.0004, + "step": 1850 + }, + { + "epoch": 10.51, + "eval_accuracy": 1.0, + "eval_loss": 0.003863053862005472, + "eval_runtime": 125.1158, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 1850 + }, + { + "epoch": 10.57, + "learning_rate": 8.944886363636364e-05, + "loss": 0.0016, + "step": 1860 + }, + { + "epoch": 10.57, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.005520069506019354, + "eval_runtime": 124.8464, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 1860 + }, + { + "epoch": 10.62, + "learning_rate": 8.939204545454546e-05, + "loss": 0.0008, + "step": 1870 + }, + { + "epoch": 10.62, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.002867434872314334, + "eval_runtime": 125.1425, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 1870 + }, + { + "epoch": 10.68, + "learning_rate": 8.933522727272727e-05, + "loss": 0.0003, + "step": 1880 + }, + { + "epoch": 10.68, + "eval_accuracy": 1.0, + "eval_loss": 0.0015116618014872074, + "eval_runtime": 124.8242, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 1880 + }, + { + "epoch": 10.74, + "learning_rate": 8.927840909090909e-05, + "loss": 0.0002, + "step": 1890 + }, + { + "epoch": 10.74, + "eval_accuracy": 1.0, + "eval_loss": 0.0009241331135854125, + "eval_runtime": 124.999, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 1890 + }, + { + "epoch": 10.8, + "learning_rate": 8.922159090909091e-05, + "loss": 0.0001, + "step": 1900 + }, + { + "epoch": 10.8, + "eval_accuracy": 1.0, + "eval_loss": 0.0006819574045948684, + "eval_runtime": 125.2265, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 1900 + }, + { + "epoch": 10.85, + "learning_rate": 8.916477272727273e-05, + "loss": 0.0001, + "step": 1910 + }, + { + "epoch": 10.85, + "eval_accuracy": 1.0, + "eval_loss": 0.0005185363697819412, + "eval_runtime": 124.9976, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 1910 + }, + { + "epoch": 10.91, + "learning_rate": 8.910795454545455e-05, + "loss": 0.0001, + "step": 1920 + }, + { + "epoch": 10.91, + "eval_accuracy": 1.0, + "eval_loss": 0.00042119555291719735, + "eval_runtime": 124.8421, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 1920 + }, + { + "epoch": 10.97, + "learning_rate": 8.905113636363636e-05, + "loss": 0.0001, + "step": 1930 + }, + { + "epoch": 10.97, + "eval_accuracy": 1.0, + "eval_loss": 0.0003791417693719268, + "eval_runtime": 124.6867, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 1930 + }, + { + "epoch": 11.02, + "learning_rate": 8.899431818181818e-05, + "loss": 0.0001, + "step": 1940 + }, + { + "epoch": 11.02, + "eval_accuracy": 1.0, + "eval_loss": 0.0003483309119474143, + "eval_runtime": 125.0165, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 1940 + }, + { + "epoch": 11.08, + "learning_rate": 8.89375e-05, + "loss": 0.0001, + "step": 1950 + }, + { + "epoch": 11.08, + "eval_accuracy": 1.0, + "eval_loss": 0.00032202256261371076, + "eval_runtime": 125.2832, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 1950 + }, + { + "epoch": 11.14, + "learning_rate": 8.888068181818182e-05, + "loss": 0.0001, + "step": 1960 + }, + { + "epoch": 11.14, + "eval_accuracy": 1.0, + "eval_loss": 0.0002962726866826415, + "eval_runtime": 124.7889, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 1960 + }, + { + "epoch": 11.19, + "learning_rate": 8.882386363636364e-05, + "loss": 0.0001, + "step": 1970 + }, + { + "epoch": 11.19, + "eval_accuracy": 1.0, + "eval_loss": 0.00026980246184393764, + "eval_runtime": 125.3115, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 1970 + }, + { + "epoch": 11.25, + "learning_rate": 8.876704545454547e-05, + "loss": 0.0001, + "step": 1980 + }, + { + "epoch": 11.25, + "eval_accuracy": 1.0, + "eval_loss": 0.00024066933838184923, + "eval_runtime": 125.3732, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 1980 + }, + { + "epoch": 11.31, + "learning_rate": 8.871022727272727e-05, + "loss": 0.0001, + "step": 1990 + }, + { + "epoch": 11.31, + "eval_accuracy": 1.0, + "eval_loss": 0.00022344697208609432, + "eval_runtime": 124.7815, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 1990 + }, + { + "epoch": 11.36, + "learning_rate": 8.865340909090909e-05, + "loss": 0.0001, + "step": 2000 + }, + { + "epoch": 11.36, + "eval_accuracy": 1.0, + "eval_loss": 0.0002117438125424087, + "eval_runtime": 124.9597, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 2000 + }, + { + "epoch": 11.42, + "learning_rate": 8.859659090909092e-05, + "loss": 0.0001, + "step": 2010 + }, + { + "epoch": 11.42, + "eval_accuracy": 1.0, + "eval_loss": 0.00019789249927271158, + "eval_runtime": 124.7501, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 2010 + }, + { + "epoch": 11.48, + "learning_rate": 8.853977272727273e-05, + "loss": 0.0001, + "step": 2020 + }, + { + "epoch": 11.48, + "eval_accuracy": 1.0, + "eval_loss": 0.00018351931066717952, + "eval_runtime": 124.6217, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 2020 + }, + { + "epoch": 11.53, + "learning_rate": 8.848295454545456e-05, + "loss": 0.0, + "step": 2030 + }, + { + "epoch": 11.53, + "eval_accuracy": 1.0, + "eval_loss": 0.0001731708034640178, + "eval_runtime": 125.7491, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 2030 + }, + { + "epoch": 11.59, + "learning_rate": 8.842613636363638e-05, + "loss": 0.0001, + "step": 2040 + }, + { + "epoch": 11.59, + "eval_accuracy": 1.0, + "eval_loss": 0.00015948306827340275, + "eval_runtime": 125.3542, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 2040 + }, + { + "epoch": 11.65, + "learning_rate": 8.836931818181818e-05, + "loss": 0.0001, + "step": 2050 + }, + { + "epoch": 11.65, + "eval_accuracy": 1.0, + "eval_loss": 0.000145960264489986, + "eval_runtime": 125.3416, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 2050 + }, + { + "epoch": 11.7, + "learning_rate": 8.831250000000001e-05, + "loss": 0.0001, + "step": 2060 + }, + { + "epoch": 11.7, + "eval_accuracy": 1.0, + "eval_loss": 0.00013693726214114577, + "eval_runtime": 125.0324, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 2060 + }, + { + "epoch": 11.76, + "learning_rate": 8.825568181818183e-05, + "loss": 0.0, + "step": 2070 + }, + { + "epoch": 11.76, + "eval_accuracy": 1.0, + "eval_loss": 0.00013189086166676134, + "eval_runtime": 124.7514, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 2070 + }, + { + "epoch": 11.82, + "learning_rate": 8.819886363636363e-05, + "loss": 0.0, + "step": 2080 + }, + { + "epoch": 11.82, + "eval_accuracy": 1.0, + "eval_loss": 0.00012500652519520372, + "eval_runtime": 125.1584, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 2080 + }, + { + "epoch": 11.88, + "learning_rate": 8.814204545454547e-05, + "loss": 0.0, + "step": 2090 + }, + { + "epoch": 11.88, + "eval_accuracy": 1.0, + "eval_loss": 0.00011977655231021345, + "eval_runtime": 125.2019, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 2090 + }, + { + "epoch": 11.93, + "learning_rate": 8.808522727272728e-05, + "loss": 0.0001, + "step": 2100 + }, + { + "epoch": 11.93, + "eval_accuracy": 1.0, + "eval_loss": 0.00011545927554834634, + "eval_runtime": 124.9281, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 2100 + }, + { + "epoch": 11.99, + "learning_rate": 8.802840909090909e-05, + "loss": 0.0, + "step": 2110 + }, + { + "epoch": 11.99, + "eval_accuracy": 1.0, + "eval_loss": 0.00011268393427599221, + "eval_runtime": 124.7903, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 2110 + }, + { + "epoch": 12.05, + "learning_rate": 8.797159090909092e-05, + "loss": 0.0, + "step": 2120 + }, + { + "epoch": 12.05, + "eval_accuracy": 1.0, + "eval_loss": 0.0001103549511753954, + "eval_runtime": 125.2521, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 2120 + }, + { + "epoch": 12.1, + "learning_rate": 8.791477272727274e-05, + "loss": 0.0, + "step": 2130 + }, + { + "epoch": 12.1, + "eval_accuracy": 1.0, + "eval_loss": 0.00010800327436299995, + "eval_runtime": 124.895, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 2130 + }, + { + "epoch": 12.16, + "learning_rate": 8.785795454545454e-05, + "loss": 0.0, + "step": 2140 + }, + { + "epoch": 12.16, + "eval_accuracy": 1.0, + "eval_loss": 0.00010656260565156117, + "eval_runtime": 125.3055, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 2140 + }, + { + "epoch": 12.22, + "learning_rate": 8.780113636363637e-05, + "loss": 0.0, + "step": 2150 + }, + { + "epoch": 12.22, + "eval_accuracy": 1.0, + "eval_loss": 0.00010102039232151583, + "eval_runtime": 124.4947, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 2150 + }, + { + "epoch": 12.27, + "learning_rate": 8.774431818181819e-05, + "loss": 0.0, + "step": 2160 + }, + { + "epoch": 12.27, + "eval_accuracy": 1.0, + "eval_loss": 9.356709779240191e-05, + "eval_runtime": 125.4562, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 2160 + }, + { + "epoch": 12.33, + "learning_rate": 8.76875e-05, + "loss": 0.0, + "step": 2170 + }, + { + "epoch": 12.33, + "eval_accuracy": 1.0, + "eval_loss": 8.781592623563483e-05, + "eval_runtime": 125.0962, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 2170 + }, + { + "epoch": 12.39, + "learning_rate": 8.763068181818183e-05, + "loss": 0.0, + "step": 2180 + }, + { + "epoch": 12.39, + "eval_accuracy": 1.0, + "eval_loss": 8.271769911516458e-05, + "eval_runtime": 124.8904, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 2180 + }, + { + "epoch": 12.44, + "learning_rate": 8.757386363636365e-05, + "loss": 0.0, + "step": 2190 + }, + { + "epoch": 12.44, + "eval_accuracy": 1.0, + "eval_loss": 7.996098429430276e-05, + "eval_runtime": 124.9251, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 2190 + }, + { + "epoch": 12.5, + "learning_rate": 8.751704545454545e-05, + "loss": 0.0, + "step": 2200 + }, + { + "epoch": 12.5, + "eval_accuracy": 1.0, + "eval_loss": 7.653101056348532e-05, + "eval_runtime": 124.9219, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 2200 + }, + { + "epoch": 12.56, + "learning_rate": 8.746022727272728e-05, + "loss": 0.0, + "step": 2210 + }, + { + "epoch": 12.56, + "eval_accuracy": 1.0, + "eval_loss": 7.469647243851796e-05, + "eval_runtime": 124.7458, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 2210 + }, + { + "epoch": 12.61, + "learning_rate": 8.74034090909091e-05, + "loss": 0.0, + "step": 2220 + }, + { + "epoch": 12.61, + "eval_accuracy": 1.0, + "eval_loss": 7.244233711389825e-05, + "eval_runtime": 125.2475, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 2220 + }, + { + "epoch": 12.67, + "learning_rate": 8.73465909090909e-05, + "loss": 0.0, + "step": 2230 + }, + { + "epoch": 12.67, + "eval_accuracy": 1.0, + "eval_loss": 6.993317947490141e-05, + "eval_runtime": 124.9466, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 2230 + }, + { + "epoch": 12.73, + "learning_rate": 8.728977272727274e-05, + "loss": 0.0, + "step": 2240 + }, + { + "epoch": 12.73, + "eval_accuracy": 1.0, + "eval_loss": 6.771019980078563e-05, + "eval_runtime": 124.8076, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 2240 + }, + { + "epoch": 12.78, + "learning_rate": 8.723295454545455e-05, + "loss": 0.0, + "step": 2250 + }, + { + "epoch": 12.78, + "eval_accuracy": 1.0, + "eval_loss": 6.51644731988199e-05, + "eval_runtime": 124.8286, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 2250 + }, + { + "epoch": 12.84, + "learning_rate": 8.717613636363636e-05, + "loss": 0.0, + "step": 2260 + }, + { + "epoch": 12.84, + "eval_accuracy": 1.0, + "eval_loss": 6.281177775235847e-05, + "eval_runtime": 125.4258, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 2260 + }, + { + "epoch": 12.9, + "learning_rate": 8.711931818181819e-05, + "loss": 0.0001, + "step": 2270 + }, + { + "epoch": 12.9, + "eval_accuracy": 1.0, + "eval_loss": 4.0041792090050876e-05, + "eval_runtime": 125.2972, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 2270 + }, + { + "epoch": 12.95, + "learning_rate": 8.706250000000001e-05, + "loss": 0.0, + "step": 2280 + }, + { + "epoch": 12.95, + "eval_accuracy": 1.0, + "eval_loss": 3.4748154575936496e-05, + "eval_runtime": 124.926, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 2280 + }, + { + "epoch": 13.01, + "learning_rate": 8.700568181818183e-05, + "loss": 0.0, + "step": 2290 + }, + { + "epoch": 13.01, + "eval_accuracy": 1.0, + "eval_loss": 3.283166006440297e-05, + "eval_runtime": 125.3117, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 2290 + }, + { + "epoch": 13.07, + "learning_rate": 8.694886363636364e-05, + "loss": 0.0, + "step": 2300 + }, + { + "epoch": 13.07, + "eval_accuracy": 1.0, + "eval_loss": 3.1736086384626105e-05, + "eval_runtime": 124.895, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 2300 + }, + { + "epoch": 13.12, + "learning_rate": 8.689204545454546e-05, + "loss": 0.0, + "step": 2310 + }, + { + "epoch": 13.12, + "eval_accuracy": 1.0, + "eval_loss": 3.1114301236812025e-05, + "eval_runtime": 124.7252, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 2310 + }, + { + "epoch": 13.18, + "learning_rate": 8.683522727272728e-05, + "loss": 0.0, + "step": 2320 + }, + { + "epoch": 13.18, + "eval_accuracy": 1.0, + "eval_loss": 3.044205550395418e-05, + "eval_runtime": 125.1506, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 2320 + }, + { + "epoch": 13.24, + "learning_rate": 8.67784090909091e-05, + "loss": 0.0, + "step": 2330 + }, + { + "epoch": 13.24, + "eval_accuracy": 1.0, + "eval_loss": 2.9821965654264204e-05, + "eval_runtime": 125.2967, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 2330 + }, + { + "epoch": 13.3, + "learning_rate": 8.672159090909092e-05, + "loss": 0.0, + "step": 2340 + }, + { + "epoch": 13.3, + "eval_accuracy": 1.0, + "eval_loss": 2.930821392510552e-05, + "eval_runtime": 124.867, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 2340 + }, + { + "epoch": 13.35, + "learning_rate": 8.666477272727274e-05, + "loss": 0.0, + "step": 2350 + }, + { + "epoch": 13.35, + "eval_accuracy": 1.0, + "eval_loss": 2.8899108656332828e-05, + "eval_runtime": 124.974, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 2350 + }, + { + "epoch": 13.41, + "learning_rate": 8.660795454545455e-05, + "loss": 0.0, + "step": 2360 + }, + { + "epoch": 13.41, + "eval_accuracy": 1.0, + "eval_loss": 2.8522516004159115e-05, + "eval_runtime": 125.3348, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 2360 + }, + { + "epoch": 13.47, + "learning_rate": 8.655113636363637e-05, + "loss": 0.0, + "step": 2370 + }, + { + "epoch": 13.47, + "eval_accuracy": 1.0, + "eval_loss": 2.8096139430999756e-05, + "eval_runtime": 125.0784, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 2370 + }, + { + "epoch": 13.52, + "learning_rate": 8.649431818181819e-05, + "loss": 0.0, + "step": 2380 + }, + { + "epoch": 13.52, + "eval_accuracy": 1.0, + "eval_loss": 2.7719885110855103e-05, + "eval_runtime": 124.7489, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 2380 + }, + { + "epoch": 13.58, + "learning_rate": 8.643750000000001e-05, + "loss": 0.0, + "step": 2390 + }, + { + "epoch": 13.58, + "eval_accuracy": 1.0, + "eval_loss": 2.7391042749513872e-05, + "eval_runtime": 125.2265, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 2390 + }, + { + "epoch": 13.64, + "learning_rate": 8.638068181818183e-05, + "loss": 0.0, + "step": 2400 + }, + { + "epoch": 13.64, + "eval_accuracy": 1.0, + "eval_loss": 2.7056783437728882e-05, + "eval_runtime": 125.1304, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 2400 + }, + { + "epoch": 13.69, + "learning_rate": 8.632386363636364e-05, + "loss": 0.0, + "step": 2410 + }, + { + "epoch": 13.69, + "eval_accuracy": 1.0, + "eval_loss": 2.6709314624895342e-05, + "eval_runtime": 125.6263, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 2410 + }, + { + "epoch": 13.75, + "learning_rate": 8.626704545454546e-05, + "loss": 0.0, + "step": 2420 + }, + { + "epoch": 13.75, + "eval_accuracy": 1.0, + "eval_loss": 2.6398083718959242e-05, + "eval_runtime": 125.2204, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 2420 + }, + { + "epoch": 13.81, + "learning_rate": 8.621022727272728e-05, + "loss": 0.0, + "step": 2430 + }, + { + "epoch": 13.81, + "eval_accuracy": 1.0, + "eval_loss": 2.6026909836218692e-05, + "eval_runtime": 124.9214, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 2430 + }, + { + "epoch": 13.86, + "learning_rate": 8.61534090909091e-05, + "loss": 0.0, + "step": 2440 + }, + { + "epoch": 13.86, + "eval_accuracy": 1.0, + "eval_loss": 2.5635416022851132e-05, + "eval_runtime": 124.6635, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 2440 + }, + { + "epoch": 13.92, + "learning_rate": 8.609659090909092e-05, + "loss": 0.0, + "step": 2450 + }, + { + "epoch": 13.92, + "eval_accuracy": 1.0, + "eval_loss": 2.5231391191482544e-05, + "eval_runtime": 124.943, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 2450 + }, + { + "epoch": 13.98, + "learning_rate": 8.603977272727273e-05, + "loss": 0.0, + "step": 2460 + }, + { + "epoch": 13.98, + "eval_accuracy": 1.0, + "eval_loss": 2.4845992811606266e-05, + "eval_runtime": 124.6233, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 2460 + }, + { + "epoch": 14.03, + "learning_rate": 8.598295454545455e-05, + "loss": 0.0, + "step": 2470 + }, + { + "epoch": 14.03, + "eval_accuracy": 1.0, + "eval_loss": 2.4439259505015798e-05, + "eval_runtime": 125.875, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 2470 + }, + { + "epoch": 14.09, + "learning_rate": 8.592613636363637e-05, + "loss": 0.0, + "step": 2480 + }, + { + "epoch": 14.09, + "eval_accuracy": 1.0, + "eval_loss": 2.4050812498899177e-05, + "eval_runtime": 125.2049, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 2480 + }, + { + "epoch": 14.15, + "learning_rate": 8.586931818181819e-05, + "loss": 0.0, + "step": 2490 + }, + { + "epoch": 14.15, + "eval_accuracy": 1.0, + "eval_loss": 2.3623759261681698e-05, + "eval_runtime": 125.0033, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 2490 + }, + { + "epoch": 14.2, + "learning_rate": 8.58125e-05, + "loss": 0.0, + "step": 2500 + }, + { + "epoch": 14.2, + "eval_accuracy": 1.0, + "eval_loss": 2.333691190870013e-05, + "eval_runtime": 124.6554, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 2500 + }, + { + "epoch": 14.26, + "learning_rate": 8.575568181818182e-05, + "loss": 0.0, + "step": 2510 + }, + { + "epoch": 14.26, + "eval_accuracy": 1.0, + "eval_loss": 2.306124042661395e-05, + "eval_runtime": 125.2769, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 2510 + }, + { + "epoch": 14.32, + "learning_rate": 8.569886363636364e-05, + "loss": 0.0, + "step": 2520 + }, + { + "epoch": 14.32, + "eval_accuracy": 1.0, + "eval_loss": 2.2737478502676822e-05, + "eval_runtime": 125.1342, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 2520 + }, + { + "epoch": 14.38, + "learning_rate": 8.564204545454546e-05, + "loss": 0.0, + "step": 2530 + }, + { + "epoch": 14.38, + "eval_accuracy": 1.0, + "eval_loss": 2.2442165573011152e-05, + "eval_runtime": 125.1983, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 2530 + }, + { + "epoch": 14.43, + "learning_rate": 8.558522727272728e-05, + "loss": 0.0, + "step": 2540 + }, + { + "epoch": 14.43, + "eval_accuracy": 1.0, + "eval_loss": 2.2234902644413523e-05, + "eval_runtime": 125.0344, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 2540 + }, + { + "epoch": 14.49, + "learning_rate": 8.55284090909091e-05, + "loss": 0.0, + "step": 2550 + }, + { + "epoch": 14.49, + "eval_accuracy": 1.0, + "eval_loss": 2.200867675128393e-05, + "eval_runtime": 125.2513, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 2550 + }, + { + "epoch": 14.55, + "learning_rate": 8.547159090909091e-05, + "loss": 0.0, + "step": 2560 + }, + { + "epoch": 14.55, + "eval_accuracy": 1.0, + "eval_loss": 2.1746212951256894e-05, + "eval_runtime": 125.1342, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 2560 + }, + { + "epoch": 14.6, + "learning_rate": 8.541477272727273e-05, + "loss": 0.0, + "step": 2570 + }, + { + "epoch": 14.6, + "eval_accuracy": 1.0, + "eval_loss": 2.1460042262333445e-05, + "eval_runtime": 125.1712, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 2570 + }, + { + "epoch": 14.66, + "learning_rate": 8.535795454545455e-05, + "loss": 0.0, + "step": 2580 + }, + { + "epoch": 14.66, + "eval_accuracy": 1.0, + "eval_loss": 2.1201643903623335e-05, + "eval_runtime": 124.7023, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 2580 + }, + { + "epoch": 14.72, + "learning_rate": 8.530113636363637e-05, + "loss": 0.0, + "step": 2590 + }, + { + "epoch": 14.72, + "eval_accuracy": 1.0, + "eval_loss": 2.0911747924401425e-05, + "eval_runtime": 125.0113, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 2590 + }, + { + "epoch": 14.77, + "learning_rate": 8.524431818181819e-05, + "loss": 0.0, + "step": 2600 + }, + { + "epoch": 14.77, + "eval_accuracy": 1.0, + "eval_loss": 2.0641156879719347e-05, + "eval_runtime": 125.1738, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 2600 + }, + { + "epoch": 14.83, + "learning_rate": 8.51875e-05, + "loss": 0.0001, + "step": 2610 + }, + { + "epoch": 14.83, + "eval_accuracy": 1.0, + "eval_loss": 2.0041723473696038e-05, + "eval_runtime": 125.0127, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 2610 + }, + { + "epoch": 14.89, + "learning_rate": 8.513068181818182e-05, + "loss": 0.0, + "step": 2620 + }, + { + "epoch": 14.89, + "eval_accuracy": 1.0, + "eval_loss": 1.9451434127404355e-05, + "eval_runtime": 125.4359, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 2620 + }, + { + "epoch": 14.94, + "learning_rate": 8.507386363636364e-05, + "loss": 0.0, + "step": 2630 + }, + { + "epoch": 14.94, + "eval_accuracy": 1.0, + "eval_loss": 1.9157812857883982e-05, + "eval_runtime": 124.8498, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 2630 + }, + { + "epoch": 15.0, + "learning_rate": 8.501704545454546e-05, + "loss": 0.0, + "step": 2640 + }, + { + "epoch": 15.0, + "eval_accuracy": 1.0, + "eval_loss": 1.8913637177320197e-05, + "eval_runtime": 124.7813, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 2640 + }, + { + "epoch": 15.06, + "learning_rate": 8.496022727272728e-05, + "loss": 0.0, + "step": 2650 + }, + { + "epoch": 15.06, + "eval_accuracy": 1.0, + "eval_loss": 1.8691813238547184e-05, + "eval_runtime": 124.8224, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 2650 + }, + { + "epoch": 15.11, + "learning_rate": 8.49034090909091e-05, + "loss": 0.0, + "step": 2660 + }, + { + "epoch": 15.11, + "eval_accuracy": 1.0, + "eval_loss": 1.8500808437238447e-05, + "eval_runtime": 124.7138, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 2660 + }, + { + "epoch": 15.17, + "learning_rate": 8.484659090909091e-05, + "loss": 0.0, + "step": 2670 + }, + { + "epoch": 15.17, + "eval_accuracy": 1.0, + "eval_loss": 1.816349868022371e-05, + "eval_runtime": 125.0339, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 2670 + }, + { + "epoch": 15.23, + "learning_rate": 8.478977272727273e-05, + "loss": 0.0, + "step": 2680 + }, + { + "epoch": 15.23, + "eval_accuracy": 1.0, + "eval_loss": 1.7680904420558363e-05, + "eval_runtime": 124.8969, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 2680 + }, + { + "epoch": 15.28, + "learning_rate": 8.473295454545455e-05, + "loss": 0.0, + "step": 2690 + }, + { + "epoch": 15.28, + "eval_accuracy": 1.0, + "eval_loss": 1.7430633306503296e-05, + "eval_runtime": 125.2344, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 2690 + }, + { + "epoch": 15.34, + "learning_rate": 8.467613636363637e-05, + "loss": 0.0, + "step": 2700 + }, + { + "epoch": 15.34, + "eval_accuracy": 1.0, + "eval_loss": 1.722641900414601e-05, + "eval_runtime": 124.9051, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 2700 + }, + { + "epoch": 15.4, + "learning_rate": 8.461931818181818e-05, + "loss": 0.0, + "step": 2710 + }, + { + "epoch": 15.4, + "eval_accuracy": 1.0, + "eval_loss": 1.7042526451405138e-05, + "eval_runtime": 124.8462, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 2710 + }, + { + "epoch": 15.45, + "learning_rate": 8.45625e-05, + "loss": 0.0, + "step": 2720 + }, + { + "epoch": 15.45, + "eval_accuracy": 1.0, + "eval_loss": 1.687353324086871e-05, + "eval_runtime": 125.1604, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 2720 + }, + { + "epoch": 15.51, + "learning_rate": 8.450568181818182e-05, + "loss": 0.0, + "step": 2730 + }, + { + "epoch": 15.51, + "eval_accuracy": 1.0, + "eval_loss": 1.6705218513379805e-05, + "eval_runtime": 124.7389, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 2730 + }, + { + "epoch": 15.57, + "learning_rate": 8.444886363636364e-05, + "loss": 0.0, + "step": 2740 + }, + { + "epoch": 15.57, + "eval_accuracy": 1.0, + "eval_loss": 1.6531483197468333e-05, + "eval_runtime": 124.8035, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 2740 + }, + { + "epoch": 15.62, + "learning_rate": 8.439204545454546e-05, + "loss": 0.0, + "step": 2750 + }, + { + "epoch": 15.62, + "eval_accuracy": 1.0, + "eval_loss": 1.6382471585529856e-05, + "eval_runtime": 124.7676, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 2750 + }, + { + "epoch": 15.68, + "learning_rate": 8.433522727272727e-05, + "loss": 0.0, + "step": 2760 + }, + { + "epoch": 15.68, + "eval_accuracy": 1.0, + "eval_loss": 1.6204336134251207e-05, + "eval_runtime": 124.7961, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 2760 + }, + { + "epoch": 15.74, + "learning_rate": 8.427840909090909e-05, + "loss": 0.0, + "step": 2770 + }, + { + "epoch": 15.74, + "eval_accuracy": 1.0, + "eval_loss": 1.60394065460423e-05, + "eval_runtime": 125.2266, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 2770 + }, + { + "epoch": 15.8, + "learning_rate": 8.422159090909091e-05, + "loss": 0.0, + "step": 2780 + }, + { + "epoch": 15.8, + "eval_accuracy": 1.0, + "eval_loss": 1.5865334717091173e-05, + "eval_runtime": 124.9661, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 2780 + }, + { + "epoch": 15.85, + "learning_rate": 8.416477272727273e-05, + "loss": 0.0, + "step": 2790 + }, + { + "epoch": 15.85, + "eval_accuracy": 1.0, + "eval_loss": 1.568550396768842e-05, + "eval_runtime": 125.1878, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 2790 + }, + { + "epoch": 15.91, + "learning_rate": 8.410795454545455e-05, + "loss": 0.0, + "step": 2800 + }, + { + "epoch": 15.91, + "eval_accuracy": 1.0, + "eval_loss": 1.5556473954347894e-05, + "eval_runtime": 125.014, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 2800 + }, + { + "epoch": 15.97, + "learning_rate": 8.405113636363636e-05, + "loss": 0.0, + "step": 2810 + }, + { + "epoch": 15.97, + "eval_accuracy": 1.0, + "eval_loss": 1.5364114005933516e-05, + "eval_runtime": 125.4686, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 2810 + }, + { + "epoch": 16.02, + "learning_rate": 8.399431818181818e-05, + "loss": 0.0, + "step": 2820 + }, + { + "epoch": 16.02, + "eval_accuracy": 1.0, + "eval_loss": 1.5224923117784783e-05, + "eval_runtime": 125.2571, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 2820 + }, + { + "epoch": 16.08, + "learning_rate": 8.39375e-05, + "loss": 0.0, + "step": 2830 + }, + { + "epoch": 16.08, + "eval_accuracy": 1.0, + "eval_loss": 1.5076588169904426e-05, + "eval_runtime": 125.0387, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 2830 + }, + { + "epoch": 16.14, + "learning_rate": 8.388068181818183e-05, + "loss": 0.0, + "step": 2840 + }, + { + "epoch": 16.14, + "eval_accuracy": 1.0, + "eval_loss": 1.4913014638295863e-05, + "eval_runtime": 124.8045, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 2840 + }, + { + "epoch": 16.19, + "learning_rate": 8.382386363636364e-05, + "loss": 0.0, + "step": 2850 + }, + { + "epoch": 16.19, + "eval_accuracy": 1.0, + "eval_loss": 1.4775178897252772e-05, + "eval_runtime": 125.6145, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 2850 + }, + { + "epoch": 16.25, + "learning_rate": 8.376704545454545e-05, + "loss": 0.0, + "step": 2860 + }, + { + "epoch": 16.25, + "eval_accuracy": 1.0, + "eval_loss": 1.463666558265686e-05, + "eval_runtime": 126.0472, + "eval_samples_per_second": 2.793, + "eval_steps_per_second": 0.698, + "step": 2860 + }, + { + "epoch": 16.31, + "learning_rate": 8.371022727272729e-05, + "loss": 0.0, + "step": 2870 + }, + { + "epoch": 16.31, + "eval_accuracy": 1.0, + "eval_loss": 1.4458189980359748e-05, + "eval_runtime": 125.3543, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 2870 + }, + { + "epoch": 16.36, + "learning_rate": 8.365340909090909e-05, + "loss": 0.0, + "step": 2880 + }, + { + "epoch": 16.36, + "eval_accuracy": 1.0, + "eval_loss": 1.4309178368421271e-05, + "eval_runtime": 125.0773, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 2880 + }, + { + "epoch": 16.42, + "learning_rate": 8.359659090909091e-05, + "loss": 0.0, + "step": 2890 + }, + { + "epoch": 16.42, + "eval_accuracy": 1.0, + "eval_loss": 1.4213675967766903e-05, + "eval_runtime": 125.1309, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 2890 + }, + { + "epoch": 16.48, + "learning_rate": 8.353977272727274e-05, + "loss": 0.0, + "step": 2900 + }, + { + "epoch": 16.48, + "eval_accuracy": 1.0, + "eval_loss": 1.4047053809917998e-05, + "eval_runtime": 125.0746, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 2900 + }, + { + "epoch": 16.53, + "learning_rate": 8.348295454545454e-05, + "loss": 0.0, + "step": 2910 + }, + { + "epoch": 16.53, + "eval_accuracy": 1.0, + "eval_loss": 1.3901767488277983e-05, + "eval_runtime": 124.6969, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 2910 + }, + { + "epoch": 16.59, + "learning_rate": 8.342613636363636e-05, + "loss": 0.0, + "step": 2920 + }, + { + "epoch": 16.59, + "eval_accuracy": 1.0, + "eval_loss": 1.3806941751681734e-05, + "eval_runtime": 125.7716, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 2920 + }, + { + "epoch": 16.65, + "learning_rate": 8.33693181818182e-05, + "loss": 0.0, + "step": 2930 + }, + { + "epoch": 16.65, + "eval_accuracy": 1.0, + "eval_loss": 1.3683668839803431e-05, + "eval_runtime": 124.6251, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 2930 + }, + { + "epoch": 16.7, + "learning_rate": 8.33125e-05, + "loss": 0.0, + "step": 2940 + }, + { + "epoch": 16.7, + "eval_accuracy": 1.0, + "eval_loss": 1.3582746760221198e-05, + "eval_runtime": 125.3293, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 2940 + }, + { + "epoch": 16.76, + "learning_rate": 8.325568181818182e-05, + "loss": 0.0, + "step": 2950 + }, + { + "epoch": 16.76, + "eval_accuracy": 1.0, + "eval_loss": 1.3478100299835205e-05, + "eval_runtime": 125.47, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 2950 + }, + { + "epoch": 16.82, + "learning_rate": 8.319886363636365e-05, + "loss": 0.0, + "step": 2960 + }, + { + "epoch": 16.82, + "eval_accuracy": 1.0, + "eval_loss": 1.336905097559793e-05, + "eval_runtime": 124.9305, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 2960 + }, + { + "epoch": 16.88, + "learning_rate": 8.314204545454545e-05, + "loss": 0.0, + "step": 2970 + }, + { + "epoch": 16.88, + "eval_accuracy": 1.0, + "eval_loss": 1.3262711036077235e-05, + "eval_runtime": 124.9261, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 2970 + }, + { + "epoch": 16.93, + "learning_rate": 8.308522727272727e-05, + "loss": 0.0, + "step": 2980 + }, + { + "epoch": 16.93, + "eval_accuracy": 1.0, + "eval_loss": 1.3134357686794829e-05, + "eval_runtime": 124.7873, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 2980 + }, + { + "epoch": 16.99, + "learning_rate": 8.30284090909091e-05, + "loss": 0.0, + "step": 2990 + }, + { + "epoch": 16.99, + "eval_accuracy": 1.0, + "eval_loss": 1.3035468327871058e-05, + "eval_runtime": 125.3122, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 2990 + }, + { + "epoch": 17.05, + "learning_rate": 8.297159090909091e-05, + "loss": 0.0, + "step": 3000 + }, + { + "epoch": 17.05, + "eval_accuracy": 1.0, + "eval_loss": 1.2894923202111386e-05, + "eval_runtime": 125.1044, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 3000 + }, + { + "epoch": 17.1, + "learning_rate": 8.291477272727273e-05, + "loss": 0.0, + "step": 3010 + }, + { + "epoch": 17.1, + "eval_accuracy": 1.0, + "eval_loss": 1.276656985282898e-05, + "eval_runtime": 124.8841, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 3010 + }, + { + "epoch": 17.16, + "learning_rate": 8.285795454545456e-05, + "loss": 0.0, + "step": 3020 + }, + { + "epoch": 17.16, + "eval_accuracy": 1.0, + "eval_loss": 1.2659214007726405e-05, + "eval_runtime": 125.2555, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 3020 + }, + { + "epoch": 17.22, + "learning_rate": 8.280113636363636e-05, + "loss": 0.0, + "step": 3030 + }, + { + "epoch": 17.22, + "eval_accuracy": 1.0, + "eval_loss": 1.2559647075249813e-05, + "eval_runtime": 124.8786, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 3030 + }, + { + "epoch": 17.27, + "learning_rate": 8.274431818181818e-05, + "loss": 0.0, + "step": 3040 + }, + { + "epoch": 17.27, + "eval_accuracy": 1.0, + "eval_loss": 1.2452629562176298e-05, + "eval_runtime": 124.8026, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 3040 + }, + { + "epoch": 17.33, + "learning_rate": 8.268750000000001e-05, + "loss": 0.0, + "step": 3050 + }, + { + "epoch": 17.33, + "eval_accuracy": 1.0, + "eval_loss": 1.2341886758804321e-05, + "eval_runtime": 125.591, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 3050 + }, + { + "epoch": 17.39, + "learning_rate": 8.263068181818182e-05, + "loss": 0.0, + "step": 3060 + }, + { + "epoch": 17.39, + "eval_accuracy": 1.0, + "eval_loss": 1.2238932868058328e-05, + "eval_runtime": 125.0607, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 3060 + }, + { + "epoch": 17.44, + "learning_rate": 8.257386363636363e-05, + "loss": 0.0, + "step": 3070 + }, + { + "epoch": 17.44, + "eval_accuracy": 1.0, + "eval_loss": 1.2137334124417976e-05, + "eval_runtime": 125.2836, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 3070 + }, + { + "epoch": 17.5, + "learning_rate": 8.251704545454546e-05, + "loss": 0.0, + "step": 3080 + }, + { + "epoch": 17.5, + "eval_accuracy": 1.0, + "eval_loss": 1.2022527698718477e-05, + "eval_runtime": 125.0346, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 3080 + }, + { + "epoch": 17.56, + "learning_rate": 8.246022727272727e-05, + "loss": 0.0, + "step": 3090 + }, + { + "epoch": 17.56, + "eval_accuracy": 1.0, + "eval_loss": 1.187351608678e-05, + "eval_runtime": 124.9534, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 3090 + }, + { + "epoch": 17.61, + "learning_rate": 8.240340909090909e-05, + "loss": 0.0, + "step": 3100 + }, + { + "epoch": 17.61, + "eval_accuracy": 1.0, + "eval_loss": 1.1740083209588192e-05, + "eval_runtime": 124.6233, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 3100 + }, + { + "epoch": 17.67, + "learning_rate": 8.234659090909092e-05, + "loss": 0.0, + "step": 3110 + }, + { + "epoch": 17.67, + "eval_accuracy": 1.0, + "eval_loss": 1.1663883924484253e-05, + "eval_runtime": 124.5589, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.706, + "step": 3110 + }, + { + "epoch": 17.73, + "learning_rate": 8.228977272727272e-05, + "loss": 0.0, + "step": 3120 + }, + { + "epoch": 17.73, + "eval_accuracy": 1.0, + "eval_loss": 1.1568719855858944e-05, + "eval_runtime": 124.5591, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.706, + "step": 3120 + }, + { + "epoch": 17.78, + "learning_rate": 8.223295454545456e-05, + "loss": 0.0, + "step": 3130 + }, + { + "epoch": 17.78, + "eval_accuracy": 1.0, + "eval_loss": 1.1476265171950217e-05, + "eval_runtime": 124.7473, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 3130 + }, + { + "epoch": 17.84, + "learning_rate": 8.217613636363637e-05, + "loss": 0.0, + "step": 3140 + }, + { + "epoch": 17.84, + "eval_accuracy": 1.0, + "eval_loss": 1.1378390809113625e-05, + "eval_runtime": 125.1191, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 3140 + }, + { + "epoch": 17.9, + "learning_rate": 8.211931818181818e-05, + "loss": 0.0, + "step": 3150 + }, + { + "epoch": 17.9, + "eval_accuracy": 1.0, + "eval_loss": 1.128864550992148e-05, + "eval_runtime": 124.9595, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 3150 + }, + { + "epoch": 17.95, + "learning_rate": 8.206250000000001e-05, + "loss": 0.0, + "step": 3160 + }, + { + "epoch": 17.95, + "eval_accuracy": 1.0, + "eval_loss": 1.1222606190131046e-05, + "eval_runtime": 124.9915, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 3160 + }, + { + "epoch": 18.01, + "learning_rate": 8.200568181818183e-05, + "loss": 0.0, + "step": 3170 + }, + { + "epoch": 18.01, + "eval_accuracy": 1.0, + "eval_loss": 1.1088834980910178e-05, + "eval_runtime": 124.8248, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 3170 + }, + { + "epoch": 18.07, + "learning_rate": 8.194886363636363e-05, + "loss": 0.0, + "step": 3180 + }, + { + "epoch": 18.07, + "eval_accuracy": 1.0, + "eval_loss": 1.0908665899478365e-05, + "eval_runtime": 128.4229, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 0.685, + "step": 3180 + }, + { + "epoch": 18.12, + "learning_rate": 8.189204545454546e-05, + "loss": 0.0, + "step": 3190 + }, + { + "epoch": 18.12, + "eval_accuracy": 1.0, + "eval_loss": 1.0795552952913567e-05, + "eval_runtime": 128.719, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 3190 + }, + { + "epoch": 18.18, + "learning_rate": 8.183522727272728e-05, + "loss": 0.0, + "step": 3200 + }, + { + "epoch": 18.18, + "eval_accuracy": 1.0, + "eval_loss": 1.0704113265092019e-05, + "eval_runtime": 128.7122, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 3200 + }, + { + "epoch": 18.24, + "learning_rate": 8.177840909090909e-05, + "loss": 0.0, + "step": 3210 + }, + { + "epoch": 18.24, + "eval_accuracy": 1.0, + "eval_loss": 1.0583888069959357e-05, + "eval_runtime": 129.0763, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 0.682, + "step": 3210 + }, + { + "epoch": 18.3, + "learning_rate": 8.172159090909092e-05, + "loss": 0.0, + "step": 3220 + }, + { + "epoch": 18.3, + "eval_accuracy": 1.0, + "eval_loss": 1.0486692190170288e-05, + "eval_runtime": 129.056, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 0.682, + "step": 3220 + }, + { + "epoch": 18.35, + "learning_rate": 8.166477272727274e-05, + "loss": 0.0, + "step": 3230 + }, + { + "epoch": 18.35, + "eval_accuracy": 1.0, + "eval_loss": 1.0401010513305664e-05, + "eval_runtime": 129.0881, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 0.682, + "step": 3230 + }, + { + "epoch": 18.41, + "learning_rate": 8.160795454545454e-05, + "loss": 0.0, + "step": 3240 + }, + { + "epoch": 18.41, + "eval_accuracy": 1.0, + "eval_loss": 1.031499050441198e-05, + "eval_runtime": 129.3117, + "eval_samples_per_second": 2.722, + "eval_steps_per_second": 0.681, + "step": 3240 + }, + { + "epoch": 18.47, + "learning_rate": 8.155113636363637e-05, + "loss": 0.0, + "step": 3250 + }, + { + "epoch": 18.47, + "eval_accuracy": 1.0, + "eval_loss": 1.0253014806949068e-05, + "eval_runtime": 128.541, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.685, + "step": 3250 + }, + { + "epoch": 18.52, + "learning_rate": 8.149431818181819e-05, + "loss": 0.0, + "step": 3260 + }, + { + "epoch": 18.52, + "eval_accuracy": 1.0, + "eval_loss": 1.013820838124957e-05, + "eval_runtime": 128.9704, + "eval_samples_per_second": 2.729, + "eval_steps_per_second": 0.682, + "step": 3260 + }, + { + "epoch": 18.58, + "learning_rate": 8.14375e-05, + "loss": 0.0, + "step": 3270 + }, + { + "epoch": 18.58, + "eval_accuracy": 1.0, + "eval_loss": 1.0105358342116233e-05, + "eval_runtime": 129.1004, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 0.682, + "step": 3270 + }, + { + "epoch": 18.64, + "learning_rate": 8.138068181818183e-05, + "loss": 0.0, + "step": 3280 + }, + { + "epoch": 18.64, + "eval_accuracy": 1.0, + "eval_loss": 1.0027803909906652e-05, + "eval_runtime": 128.5928, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 3280 + }, + { + "epoch": 18.69, + "learning_rate": 8.132386363636364e-05, + "loss": 0.0, + "step": 3290 + }, + { + "epoch": 18.69, + "eval_accuracy": 1.0, + "eval_loss": 9.950250387191772e-06, + "eval_runtime": 128.6881, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 3290 + }, + { + "epoch": 18.75, + "learning_rate": 8.126704545454545e-05, + "loss": 0.0, + "step": 3300 + }, + { + "epoch": 18.75, + "eval_accuracy": 1.0, + "eval_loss": 9.877776392386295e-06, + "eval_runtime": 128.461, + "eval_samples_per_second": 2.74, + "eval_steps_per_second": 0.685, + "step": 3300 + }, + { + "epoch": 18.81, + "learning_rate": 8.121022727272728e-05, + "loss": 0.0, + "step": 3310 + }, + { + "epoch": 18.81, + "eval_accuracy": 1.0, + "eval_loss": 9.79141714196885e-06, + "eval_runtime": 128.5425, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.685, + "step": 3310 + }, + { + "epoch": 18.86, + "learning_rate": 8.11534090909091e-05, + "loss": 0.0, + "step": 3320 + }, + { + "epoch": 18.86, + "eval_accuracy": 1.0, + "eval_loss": 9.720298294269014e-06, + "eval_runtime": 129.2891, + "eval_samples_per_second": 2.723, + "eval_steps_per_second": 0.681, + "step": 3320 + }, + { + "epoch": 18.92, + "learning_rate": 8.10965909090909e-05, + "loss": 0.0, + "step": 3330 + }, + { + "epoch": 18.92, + "eval_accuracy": 1.0, + "eval_loss": 9.641728865972254e-06, + "eval_runtime": 128.8641, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 3330 + }, + { + "epoch": 18.98, + "learning_rate": 8.103977272727273e-05, + "loss": 0.0, + "step": 3340 + }, + { + "epoch": 18.98, + "eval_accuracy": 1.0, + "eval_loss": 9.57805968937464e-06, + "eval_runtime": 129.3949, + "eval_samples_per_second": 2.72, + "eval_steps_per_second": 0.68, + "step": 3340 + }, + { + "epoch": 19.03, + "learning_rate": 8.098295454545455e-05, + "loss": 0.0, + "step": 3350 + }, + { + "epoch": 19.03, + "eval_accuracy": 1.0, + "eval_loss": 9.501522072241642e-06, + "eval_runtime": 128.867, + "eval_samples_per_second": 2.731, + "eval_steps_per_second": 0.683, + "step": 3350 + }, + { + "epoch": 19.09, + "learning_rate": 8.092613636363636e-05, + "loss": 0.0, + "step": 3360 + }, + { + "epoch": 19.09, + "eval_accuracy": 1.0, + "eval_loss": 9.435821993974969e-06, + "eval_runtime": 128.9324, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.683, + "step": 3360 + }, + { + "epoch": 19.15, + "learning_rate": 8.086931818181819e-05, + "loss": 0.0, + "step": 3370 + }, + { + "epoch": 19.15, + "eval_accuracy": 1.0, + "eval_loss": 9.351832886750344e-06, + "eval_runtime": 128.3919, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.685, + "step": 3370 + }, + { + "epoch": 19.2, + "learning_rate": 8.08125e-05, + "loss": 0.0, + "step": 3380 + }, + { + "epoch": 19.2, + "eval_accuracy": 1.0, + "eval_loss": 9.278004654333927e-06, + "eval_runtime": 128.6225, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 3380 + }, + { + "epoch": 19.26, + "learning_rate": 8.075568181818182e-05, + "loss": 0.0, + "step": 3390 + }, + { + "epoch": 19.26, + "eval_accuracy": 1.0, + "eval_loss": 9.210611096932553e-06, + "eval_runtime": 128.7858, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 3390 + }, + { + "epoch": 19.32, + "learning_rate": 8.069886363636364e-05, + "loss": 0.0, + "step": 3400 + }, + { + "epoch": 19.32, + "eval_accuracy": 1.0, + "eval_loss": 9.141524060396478e-06, + "eval_runtime": 128.3549, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.686, + "step": 3400 + }, + { + "epoch": 19.38, + "learning_rate": 8.064204545454546e-05, + "loss": 0.0, + "step": 3410 + }, + { + "epoch": 19.38, + "eval_accuracy": 1.0, + "eval_loss": 9.060244337888435e-06, + "eval_runtime": 129.1155, + "eval_samples_per_second": 2.726, + "eval_steps_per_second": 0.682, + "step": 3410 + }, + { + "epoch": 19.43, + "learning_rate": 8.058522727272728e-05, + "loss": 0.0, + "step": 3420 + }, + { + "epoch": 19.43, + "eval_accuracy": 1.0, + "eval_loss": 9.001317266665865e-06, + "eval_runtime": 129.2984, + "eval_samples_per_second": 2.722, + "eval_steps_per_second": 0.681, + "step": 3420 + }, + { + "epoch": 19.49, + "learning_rate": 8.05284090909091e-05, + "loss": 0.0, + "step": 3430 + }, + { + "epoch": 19.49, + "eval_accuracy": 1.0, + "eval_loss": 8.897686711861752e-06, + "eval_runtime": 129.0934, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 0.682, + "step": 3430 + }, + { + "epoch": 19.55, + "learning_rate": 8.047159090909091e-05, + "loss": 0.0, + "step": 3440 + }, + { + "epoch": 19.55, + "eval_accuracy": 1.0, + "eval_loss": 8.805908692011144e-06, + "eval_runtime": 129.3713, + "eval_samples_per_second": 2.721, + "eval_steps_per_second": 0.68, + "step": 3440 + }, + { + "epoch": 19.6, + "learning_rate": 8.041477272727273e-05, + "loss": 0.0, + "step": 3450 + }, + { + "epoch": 19.6, + "eval_accuracy": 1.0, + "eval_loss": 8.725984116608743e-06, + "eval_runtime": 128.4963, + "eval_samples_per_second": 2.739, + "eval_steps_per_second": 0.685, + "step": 3450 + }, + { + "epoch": 19.66, + "learning_rate": 8.035795454545455e-05, + "loss": 0.0, + "step": 3460 + }, + { + "epoch": 19.66, + "eval_accuracy": 1.0, + "eval_loss": 8.633529432700016e-06, + "eval_runtime": 128.5769, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.684, + "step": 3460 + }, + { + "epoch": 19.72, + "learning_rate": 8.030113636363637e-05, + "loss": 0.0, + "step": 3470 + }, + { + "epoch": 19.72, + "eval_accuracy": 1.0, + "eval_loss": 8.559023626730777e-06, + "eval_runtime": 129.2285, + "eval_samples_per_second": 2.724, + "eval_steps_per_second": 0.681, + "step": 3470 + }, + { + "epoch": 19.77, + "learning_rate": 8.024431818181819e-05, + "loss": 0.0, + "step": 3480 + }, + { + "epoch": 19.77, + "eval_accuracy": 1.0, + "eval_loss": 8.496709597238805e-06, + "eval_runtime": 128.8669, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 3480 + }, + { + "epoch": 19.83, + "learning_rate": 8.01875e-05, + "loss": 0.0, + "step": 3490 + }, + { + "epoch": 19.83, + "eval_accuracy": 1.0, + "eval_loss": 8.44523310661316e-06, + "eval_runtime": 128.3928, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.685, + "step": 3490 + }, + { + "epoch": 19.89, + "learning_rate": 8.013068181818182e-05, + "loss": 0.0, + "step": 3500 + }, + { + "epoch": 19.89, + "eval_accuracy": 1.0, + "eval_loss": 8.37106563267298e-06, + "eval_runtime": 128.2404, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3500 + }, + { + "epoch": 19.94, + "learning_rate": 8.007386363636364e-05, + "loss": 0.0, + "step": 3510 + }, + { + "epoch": 19.94, + "eval_accuracy": 1.0, + "eval_loss": 8.313493708556052e-06, + "eval_runtime": 128.7172, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 3510 + }, + { + "epoch": 20.0, + "learning_rate": 8.001704545454546e-05, + "loss": 0.0, + "step": 3520 + }, + { + "epoch": 20.0, + "eval_accuracy": 1.0, + "eval_loss": 8.246776815212797e-06, + "eval_runtime": 128.6402, + "eval_samples_per_second": 2.736, + "eval_steps_per_second": 0.684, + "step": 3520 + }, + { + "epoch": 20.06, + "learning_rate": 7.996022727272728e-05, + "loss": 0.0, + "step": 3530 + }, + { + "epoch": 20.06, + "eval_accuracy": 1.0, + "eval_loss": 8.150935173034668e-06, + "eval_runtime": 128.2124, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3530 + }, + { + "epoch": 20.11, + "learning_rate": 7.99034090909091e-05, + "loss": 0.0, + "step": 3540 + }, + { + "epoch": 20.11, + "eval_accuracy": 1.0, + "eval_loss": 8.067962880886625e-06, + "eval_runtime": 128.6987, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 3540 + }, + { + "epoch": 20.17, + "learning_rate": 7.984659090909091e-05, + "loss": 0.0, + "step": 3550 + }, + { + "epoch": 20.17, + "eval_accuracy": 1.0, + "eval_loss": 8.006664756976534e-06, + "eval_runtime": 128.5795, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.684, + "step": 3550 + }, + { + "epoch": 20.23, + "learning_rate": 7.978977272727273e-05, + "loss": 0.0, + "step": 3560 + }, + { + "epoch": 20.23, + "eval_accuracy": 1.0, + "eval_loss": 7.779760380799416e-06, + "eval_runtime": 128.3965, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.685, + "step": 3560 + }, + { + "epoch": 20.28, + "learning_rate": 7.973295454545455e-05, + "loss": 0.0, + "step": 3570 + }, + { + "epoch": 20.28, + "eval_accuracy": 1.0, + "eval_loss": 7.664615623070858e-06, + "eval_runtime": 128.5607, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.685, + "step": 3570 + }, + { + "epoch": 20.34, + "learning_rate": 7.967613636363637e-05, + "loss": 0.0, + "step": 3580 + }, + { + "epoch": 20.34, + "eval_accuracy": 1.0, + "eval_loss": 7.573515176773071e-06, + "eval_runtime": 128.5445, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.685, + "step": 3580 + }, + { + "epoch": 20.4, + "learning_rate": 7.961931818181818e-05, + "loss": 0.0, + "step": 3590 + }, + { + "epoch": 20.4, + "eval_accuracy": 1.0, + "eval_loss": 7.514587650803151e-06, + "eval_runtime": 128.2392, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3590 + }, + { + "epoch": 20.45, + "learning_rate": 7.95625e-05, + "loss": 0.0, + "step": 3600 + }, + { + "epoch": 20.45, + "eval_accuracy": 1.0, + "eval_loss": 7.4607405622373335e-06, + "eval_runtime": 128.1995, + "eval_samples_per_second": 2.746, + "eval_steps_per_second": 0.686, + "step": 3600 + }, + { + "epoch": 20.51, + "learning_rate": 7.950568181818182e-05, + "loss": 0.0, + "step": 3610 + }, + { + "epoch": 20.51, + "eval_accuracy": 1.0, + "eval_loss": 7.405199994536815e-06, + "eval_runtime": 128.6601, + "eval_samples_per_second": 2.736, + "eval_steps_per_second": 0.684, + "step": 3610 + }, + { + "epoch": 20.57, + "learning_rate": 7.944886363636364e-05, + "loss": 0.0, + "step": 3620 + }, + { + "epoch": 20.57, + "eval_accuracy": 1.0, + "eval_loss": 7.370317689492367e-06, + "eval_runtime": 128.3397, + "eval_samples_per_second": 2.743, + "eval_steps_per_second": 0.686, + "step": 3620 + }, + { + "epoch": 20.62, + "learning_rate": 7.939204545454546e-05, + "loss": 0.0, + "step": 3630 + }, + { + "epoch": 20.62, + "eval_accuracy": 1.0, + "eval_loss": 7.323581939999713e-06, + "eval_runtime": 128.4132, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 0.685, + "step": 3630 + }, + { + "epoch": 20.68, + "learning_rate": 7.933522727272727e-05, + "loss": 0.0, + "step": 3640 + }, + { + "epoch": 20.68, + "eval_accuracy": 1.0, + "eval_loss": 7.263300176418852e-06, + "eval_runtime": 128.4918, + "eval_samples_per_second": 2.739, + "eval_steps_per_second": 0.685, + "step": 3640 + }, + { + "epoch": 20.74, + "learning_rate": 7.927840909090909e-05, + "loss": 0.0, + "step": 3650 + }, + { + "epoch": 20.74, + "eval_accuracy": 1.0, + "eval_loss": 7.210130206658505e-06, + "eval_runtime": 128.1784, + "eval_samples_per_second": 2.746, + "eval_steps_per_second": 0.687, + "step": 3650 + }, + { + "epoch": 20.8, + "learning_rate": 7.922159090909091e-05, + "loss": 0.0, + "step": 3660 + }, + { + "epoch": 20.8, + "eval_accuracy": 1.0, + "eval_loss": 7.16339445716585e-06, + "eval_runtime": 128.4265, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 0.685, + "step": 3660 + }, + { + "epoch": 20.85, + "learning_rate": 7.916477272727273e-05, + "loss": 0.0, + "step": 3670 + }, + { + "epoch": 20.85, + "eval_accuracy": 1.0, + "eval_loss": 7.094307420629775e-06, + "eval_runtime": 128.86, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 3670 + }, + { + "epoch": 20.91, + "learning_rate": 7.910795454545456e-05, + "loss": 0.0, + "step": 3680 + }, + { + "epoch": 20.91, + "eval_accuracy": 1.0, + "eval_loss": 7.042492143227719e-06, + "eval_runtime": 128.1572, + "eval_samples_per_second": 2.747, + "eval_steps_per_second": 0.687, + "step": 3680 + }, + { + "epoch": 20.97, + "learning_rate": 7.905113636363636e-05, + "loss": 0.0, + "step": 3690 + }, + { + "epoch": 20.97, + "eval_accuracy": 1.0, + "eval_loss": 6.994063369347714e-06, + "eval_runtime": 128.252, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3690 + }, + { + "epoch": 21.02, + "learning_rate": 7.899431818181818e-05, + "loss": 0.0, + "step": 3700 + }, + { + "epoch": 21.02, + "eval_accuracy": 1.0, + "eval_loss": 6.964599378989078e-06, + "eval_runtime": 128.2304, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3700 + }, + { + "epoch": 21.08, + "learning_rate": 7.893750000000001e-05, + "loss": 0.0, + "step": 3710 + }, + { + "epoch": 21.08, + "eval_accuracy": 1.0, + "eval_loss": 6.906688213348389e-06, + "eval_runtime": 127.7789, + "eval_samples_per_second": 2.755, + "eval_steps_per_second": 0.689, + "step": 3710 + }, + { + "epoch": 21.14, + "learning_rate": 7.888068181818182e-05, + "loss": 0.0, + "step": 3720 + }, + { + "epoch": 21.14, + "eval_accuracy": 1.0, + "eval_loss": 6.861307610961376e-06, + "eval_runtime": 128.9392, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.682, + "step": 3720 + }, + { + "epoch": 21.19, + "learning_rate": 7.882386363636364e-05, + "loss": 0.0, + "step": 3730 + }, + { + "epoch": 21.19, + "eval_accuracy": 1.0, + "eval_loss": 6.829134235886158e-06, + "eval_runtime": 128.2542, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3730 + }, + { + "epoch": 21.25, + "learning_rate": 7.876704545454547e-05, + "loss": 0.0, + "step": 3740 + }, + { + "epoch": 21.25, + "eval_accuracy": 1.0, + "eval_loss": 6.7844307523046155e-06, + "eval_runtime": 128.5418, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.685, + "step": 3740 + }, + { + "epoch": 21.31, + "learning_rate": 7.871022727272727e-05, + "loss": 0.0, + "step": 3750 + }, + { + "epoch": 21.31, + "eval_accuracy": 1.0, + "eval_loss": 6.743452559021534e-06, + "eval_runtime": 128.1975, + "eval_samples_per_second": 2.746, + "eval_steps_per_second": 0.686, + "step": 3750 + }, + { + "epoch": 21.36, + "learning_rate": 7.865340909090909e-05, + "loss": 0.0, + "step": 3760 + }, + { + "epoch": 21.36, + "eval_accuracy": 1.0, + "eval_loss": 6.684186701022554e-06, + "eval_runtime": 128.1411, + "eval_samples_per_second": 2.747, + "eval_steps_per_second": 0.687, + "step": 3760 + }, + { + "epoch": 21.42, + "learning_rate": 7.859659090909092e-05, + "loss": 0.0, + "step": 3770 + }, + { + "epoch": 21.42, + "eval_accuracy": 1.0, + "eval_loss": 6.635757927142549e-06, + "eval_runtime": 128.4864, + "eval_samples_per_second": 2.74, + "eval_steps_per_second": 0.685, + "step": 3770 + }, + { + "epoch": 21.48, + "learning_rate": 7.853977272727273e-05, + "loss": 0.0, + "step": 3780 + }, + { + "epoch": 21.48, + "eval_accuracy": 1.0, + "eval_loss": 6.587667940038955e-06, + "eval_runtime": 128.3559, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.686, + "step": 3780 + }, + { + "epoch": 21.53, + "learning_rate": 7.848295454545455e-05, + "loss": 0.0, + "step": 3790 + }, + { + "epoch": 21.53, + "eval_accuracy": 1.0, + "eval_loss": 6.549399131472455e-06, + "eval_runtime": 128.3917, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.685, + "step": 3790 + }, + { + "epoch": 21.59, + "learning_rate": 7.842613636363638e-05, + "loss": 0.0, + "step": 3800 + }, + { + "epoch": 21.59, + "eval_accuracy": 1.0, + "eval_loss": 6.492503871413646e-06, + "eval_runtime": 128.7316, + "eval_samples_per_second": 2.734, + "eval_steps_per_second": 0.684, + "step": 3800 + }, + { + "epoch": 21.65, + "learning_rate": 7.836931818181818e-05, + "loss": 0.0, + "step": 3810 + }, + { + "epoch": 21.65, + "eval_accuracy": 1.0, + "eval_loss": 6.444752216339111e-06, + "eval_runtime": 128.131, + "eval_samples_per_second": 2.747, + "eval_steps_per_second": 0.687, + "step": 3810 + }, + { + "epoch": 21.7, + "learning_rate": 7.83125e-05, + "loss": 0.0, + "step": 3820 + }, + { + "epoch": 21.7, + "eval_accuracy": 1.0, + "eval_loss": 6.415965799533296e-06, + "eval_runtime": 128.5374, + "eval_samples_per_second": 2.739, + "eval_steps_per_second": 0.685, + "step": 3820 + }, + { + "epoch": 21.76, + "learning_rate": 7.825568181818183e-05, + "loss": 0.0, + "step": 3830 + }, + { + "epoch": 21.76, + "eval_accuracy": 1.0, + "eval_loss": 6.3766810853849165e-06, + "eval_runtime": 128.2193, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3830 + }, + { + "epoch": 21.82, + "learning_rate": 7.819886363636364e-05, + "loss": 0.0, + "step": 3840 + }, + { + "epoch": 21.82, + "eval_accuracy": 1.0, + "eval_loss": 6.3387506088474765e-06, + "eval_runtime": 127.9363, + "eval_samples_per_second": 2.751, + "eval_steps_per_second": 0.688, + "step": 3840 + }, + { + "epoch": 21.88, + "learning_rate": 7.814204545454545e-05, + "loss": 0.0, + "step": 3850 + }, + { + "epoch": 21.88, + "eval_accuracy": 1.0, + "eval_loss": 6.301497705862857e-06, + "eval_runtime": 128.202, + "eval_samples_per_second": 2.746, + "eval_steps_per_second": 0.686, + "step": 3850 + }, + { + "epoch": 21.93, + "learning_rate": 7.808522727272729e-05, + "loss": 0.0, + "step": 3860 + }, + { + "epoch": 21.93, + "eval_accuracy": 1.0, + "eval_loss": 6.256455890252255e-06, + "eval_runtime": 128.1911, + "eval_samples_per_second": 2.746, + "eval_steps_per_second": 0.686, + "step": 3860 + }, + { + "epoch": 21.99, + "learning_rate": 7.802840909090909e-05, + "loss": 0.0, + "step": 3870 + }, + { + "epoch": 21.99, + "eval_accuracy": 1.0, + "eval_loss": 6.233765361685073e-06, + "eval_runtime": 128.1577, + "eval_samples_per_second": 2.747, + "eval_steps_per_second": 0.687, + "step": 3870 + }, + { + "epoch": 22.05, + "learning_rate": 7.797159090909091e-05, + "loss": 0.0, + "step": 3880 + }, + { + "epoch": 22.05, + "eval_accuracy": 1.0, + "eval_loss": 6.188384304550709e-06, + "eval_runtime": 128.2292, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3880 + }, + { + "epoch": 22.1, + "learning_rate": 7.791477272727274e-05, + "loss": 0.0, + "step": 3890 + }, + { + "epoch": 22.1, + "eval_accuracy": 1.0, + "eval_loss": 6.1446971812983975e-06, + "eval_runtime": 128.3511, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.686, + "step": 3890 + }, + { + "epoch": 22.16, + "learning_rate": 7.785795454545454e-05, + "loss": 0.0, + "step": 3900 + }, + { + "epoch": 22.16, + "eval_accuracy": 1.0, + "eval_loss": 6.122006652731216e-06, + "eval_runtime": 128.3888, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.685, + "step": 3900 + }, + { + "epoch": 22.22, + "learning_rate": 7.780113636363636e-05, + "loss": 0.0, + "step": 3910 + }, + { + "epoch": 22.22, + "eval_accuracy": 1.0, + "eval_loss": 6.085430868552066e-06, + "eval_runtime": 128.6091, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 3910 + }, + { + "epoch": 22.27, + "learning_rate": 7.77443181818182e-05, + "loss": 0.0, + "step": 3920 + }, + { + "epoch": 22.27, + "eval_accuracy": 1.0, + "eval_loss": 6.062740794732235e-06, + "eval_runtime": 128.5995, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 3920 + }, + { + "epoch": 22.33, + "learning_rate": 7.76875e-05, + "loss": 0.0, + "step": 3930 + }, + { + "epoch": 22.33, + "eval_accuracy": 1.0, + "eval_loss": 6.031583779986249e-06, + "eval_runtime": 128.0217, + "eval_samples_per_second": 2.75, + "eval_steps_per_second": 0.687, + "step": 3930 + }, + { + "epoch": 22.39, + "learning_rate": 7.763068181818183e-05, + "loss": 0.0, + "step": 3940 + }, + { + "epoch": 22.39, + "eval_accuracy": 1.0, + "eval_loss": 6.0021197896276135e-06, + "eval_runtime": 128.2568, + "eval_samples_per_second": 2.744, + "eval_steps_per_second": 0.686, + "step": 3940 + }, + { + "epoch": 22.44, + "learning_rate": 7.757386363636365e-05, + "loss": 0.0, + "step": 3950 + }, + { + "epoch": 22.44, + "eval_accuracy": 1.0, + "eval_loss": 5.964189767837524e-06, + "eval_runtime": 128.2432, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3950 + }, + { + "epoch": 22.5, + "learning_rate": 7.751704545454545e-05, + "loss": 0.0, + "step": 3960 + }, + { + "epoch": 22.5, + "eval_accuracy": 1.0, + "eval_loss": 5.927275651629316e-06, + "eval_runtime": 128.1276, + "eval_samples_per_second": 2.747, + "eval_steps_per_second": 0.687, + "step": 3960 + }, + { + "epoch": 22.56, + "learning_rate": 7.746022727272728e-05, + "loss": 0.0, + "step": 3970 + }, + { + "epoch": 22.56, + "eval_accuracy": 1.0, + "eval_loss": 5.905601028644014e-06, + "eval_runtime": 127.998, + "eval_samples_per_second": 2.75, + "eval_steps_per_second": 0.688, + "step": 3970 + }, + { + "epoch": 22.61, + "learning_rate": 7.74034090909091e-05, + "loss": 0.0, + "step": 3980 + }, + { + "epoch": 22.61, + "eval_accuracy": 1.0, + "eval_loss": 5.846335170645034e-06, + "eval_runtime": 128.2393, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 3980 + }, + { + "epoch": 22.67, + "learning_rate": 7.73465909090909e-05, + "loss": 0.0, + "step": 3990 + }, + { + "epoch": 22.67, + "eval_accuracy": 1.0, + "eval_loss": 5.81991935177939e-06, + "eval_runtime": 127.85, + "eval_samples_per_second": 2.753, + "eval_steps_per_second": 0.688, + "step": 3990 + }, + { + "epoch": 22.73, + "learning_rate": 7.728977272727274e-05, + "loss": 0.0, + "step": 4000 + }, + { + "epoch": 22.73, + "eval_accuracy": 1.0, + "eval_loss": 5.803663498227252e-06, + "eval_runtime": 128.8013, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 4000 + }, + { + "epoch": 22.78, + "learning_rate": 7.723295454545456e-05, + "loss": 0.0, + "step": 4010 + }, + { + "epoch": 22.78, + "eval_accuracy": 1.0, + "eval_loss": 5.777248134108959e-06, + "eval_runtime": 128.6086, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 4010 + }, + { + "epoch": 22.84, + "learning_rate": 7.717613636363636e-05, + "loss": 0.0, + "step": 4020 + }, + { + "epoch": 22.84, + "eval_accuracy": 1.0, + "eval_loss": 5.76065349378041e-06, + "eval_runtime": 128.971, + "eval_samples_per_second": 2.729, + "eval_steps_per_second": 0.682, + "step": 4020 + }, + { + "epoch": 22.9, + "learning_rate": 7.711931818181819e-05, + "loss": 0.0, + "step": 4030 + }, + { + "epoch": 22.9, + "eval_accuracy": 1.0, + "eval_loss": 5.730512384616304e-06, + "eval_runtime": 128.3645, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.686, + "step": 4030 + }, + { + "epoch": 22.95, + "learning_rate": 7.706250000000001e-05, + "loss": 0.0, + "step": 4040 + }, + { + "epoch": 22.95, + "eval_accuracy": 1.0, + "eval_loss": 5.6966459851537365e-06, + "eval_runtime": 129.1383, + "eval_samples_per_second": 2.726, + "eval_steps_per_second": 0.681, + "step": 4040 + }, + { + "epoch": 23.01, + "learning_rate": 7.700568181818181e-05, + "loss": 0.0, + "step": 4050 + }, + { + "epoch": 23.01, + "eval_accuracy": 1.0, + "eval_loss": 5.673278337781085e-06, + "eval_runtime": 129.5839, + "eval_samples_per_second": 2.716, + "eval_steps_per_second": 0.679, + "step": 4050 + }, + { + "epoch": 23.07, + "learning_rate": 7.694886363636365e-05, + "loss": 0.0, + "step": 4060 + }, + { + "epoch": 23.07, + "eval_accuracy": 1.0, + "eval_loss": 5.633654836856294e-06, + "eval_runtime": 128.7794, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 4060 + }, + { + "epoch": 23.12, + "learning_rate": 7.689204545454546e-05, + "loss": 0.0, + "step": 4070 + }, + { + "epoch": 23.12, + "eval_accuracy": 1.0, + "eval_loss": 5.6041913012450095e-06, + "eval_runtime": 128.6275, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 4070 + }, + { + "epoch": 23.18, + "learning_rate": 7.683522727272727e-05, + "loss": 0.0, + "step": 4080 + }, + { + "epoch": 23.18, + "eval_accuracy": 1.0, + "eval_loss": 5.567954303842271e-06, + "eval_runtime": 128.9985, + "eval_samples_per_second": 2.729, + "eval_steps_per_second": 0.682, + "step": 4080 + }, + { + "epoch": 23.24, + "learning_rate": 7.67784090909091e-05, + "loss": 0.0, + "step": 4090 + }, + { + "epoch": 23.24, + "eval_accuracy": 1.0, + "eval_loss": 5.528669589693891e-06, + "eval_runtime": 128.4264, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 0.685, + "step": 4090 + }, + { + "epoch": 23.3, + "learning_rate": 7.672159090909092e-05, + "loss": 0.0, + "step": 4100 + }, + { + "epoch": 23.3, + "eval_accuracy": 1.0, + "eval_loss": 5.4964966693660244e-06, + "eval_runtime": 129.1372, + "eval_samples_per_second": 2.726, + "eval_steps_per_second": 0.681, + "step": 4100 + }, + { + "epoch": 23.35, + "learning_rate": 7.666477272727272e-05, + "loss": 0.0, + "step": 4110 + }, + { + "epoch": 23.35, + "eval_accuracy": 1.0, + "eval_loss": 5.459920885186875e-06, + "eval_runtime": 129.3604, + "eval_samples_per_second": 2.721, + "eval_steps_per_second": 0.68, + "step": 4110 + }, + { + "epoch": 23.41, + "learning_rate": 7.660795454545455e-05, + "loss": 0.0, + "step": 4120 + }, + { + "epoch": 23.41, + "eval_accuracy": 1.0, + "eval_loss": 5.420974503067555e-06, + "eval_runtime": 129.1516, + "eval_samples_per_second": 2.725, + "eval_steps_per_second": 0.681, + "step": 4120 + }, + { + "epoch": 23.47, + "learning_rate": 7.655113636363637e-05, + "loss": 0.0, + "step": 4130 + }, + { + "epoch": 23.47, + "eval_accuracy": 1.0, + "eval_loss": 5.397606855694903e-06, + "eval_runtime": 128.3667, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.686, + "step": 4130 + }, + { + "epoch": 23.52, + "learning_rate": 7.649431818181818e-05, + "loss": 0.0, + "step": 4140 + }, + { + "epoch": 23.52, + "eval_accuracy": 1.0, + "eval_loss": 5.361708645068575e-06, + "eval_runtime": 124.7909, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 4140 + }, + { + "epoch": 23.58, + "learning_rate": 7.643750000000001e-05, + "loss": 0.0, + "step": 4150 + }, + { + "epoch": 23.58, + "eval_accuracy": 1.0, + "eval_loss": 5.324116955307545e-06, + "eval_runtime": 124.7802, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 4150 + }, + { + "epoch": 23.64, + "learning_rate": 7.638068181818183e-05, + "loss": 0.0, + "step": 4160 + }, + { + "epoch": 23.64, + "eval_accuracy": 1.0, + "eval_loss": 5.27568818142754e-06, + "eval_runtime": 124.8571, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 4160 + }, + { + "epoch": 23.69, + "learning_rate": 7.632386363636363e-05, + "loss": 0.0, + "step": 4170 + }, + { + "epoch": 23.69, + "eval_accuracy": 1.0, + "eval_loss": 5.240806331130443e-06, + "eval_runtime": 125.5928, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 4170 + }, + { + "epoch": 23.75, + "learning_rate": 7.626704545454546e-05, + "loss": 0.0, + "step": 4180 + }, + { + "epoch": 23.75, + "eval_accuracy": 1.0, + "eval_loss": 5.204907665756764e-06, + "eval_runtime": 125.516, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 4180 + }, + { + "epoch": 23.81, + "learning_rate": 7.621022727272728e-05, + "loss": 0.0, + "step": 4190 + }, + { + "epoch": 23.81, + "eval_accuracy": 1.0, + "eval_loss": 5.3532421588897705e-06, + "eval_runtime": 124.869, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 4190 + }, + { + "epoch": 23.86, + "learning_rate": 7.615340909090908e-05, + "loss": 0.0, + "step": 4200 + }, + { + "epoch": 23.86, + "eval_accuracy": 1.0, + "eval_loss": 5.557794338528765e-06, + "eval_runtime": 125.1015, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 4200 + }, + { + "epoch": 23.92, + "learning_rate": 7.609659090909092e-05, + "loss": 0.0, + "step": 4210 + }, + { + "epoch": 23.92, + "eval_accuracy": 1.0, + "eval_loss": 5.618414888886036e-06, + "eval_runtime": 125.0142, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 4210 + }, + { + "epoch": 23.98, + "learning_rate": 7.603977272727273e-05, + "loss": 0.0, + "step": 4220 + }, + { + "epoch": 23.98, + "eval_accuracy": 1.0, + "eval_loss": 5.612657787423814e-06, + "eval_runtime": 125.3438, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 4220 + }, + { + "epoch": 24.03, + "learning_rate": 7.598295454545455e-05, + "loss": 0.0, + "step": 4230 + }, + { + "epoch": 24.03, + "eval_accuracy": 1.0, + "eval_loss": 5.5889513532747515e-06, + "eval_runtime": 125.9608, + "eval_samples_per_second": 2.795, + "eval_steps_per_second": 0.699, + "step": 4230 + }, + { + "epoch": 24.09, + "learning_rate": 7.592613636363637e-05, + "loss": 0.0, + "step": 4240 + }, + { + "epoch": 24.09, + "eval_accuracy": 1.0, + "eval_loss": 5.557794338528765e-06, + "eval_runtime": 125.4089, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 4240 + }, + { + "epoch": 24.15, + "learning_rate": 7.586931818181819e-05, + "loss": 0.0, + "step": 4250 + }, + { + "epoch": 24.15, + "eval_accuracy": 1.0, + "eval_loss": 5.515123120858334e-06, + "eval_runtime": 125.0024, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 4250 + }, + { + "epoch": 24.2, + "learning_rate": 7.58125e-05, + "loss": 0.0, + "step": 4260 + }, + { + "epoch": 24.2, + "eval_accuracy": 1.0, + "eval_loss": 5.4744832596043125e-06, + "eval_runtime": 125.3418, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 4260 + }, + { + "epoch": 24.26, + "learning_rate": 7.575568181818182e-05, + "loss": 0.0, + "step": 4270 + }, + { + "epoch": 24.26, + "eval_accuracy": 1.0, + "eval_loss": 5.439939741336275e-06, + "eval_runtime": 125.0071, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 4270 + }, + { + "epoch": 24.32, + "learning_rate": 7.569886363636364e-05, + "loss": 0.0, + "step": 4280 + }, + { + "epoch": 24.32, + "eval_accuracy": 1.0, + "eval_loss": 5.3918497542326804e-06, + "eval_runtime": 125.0933, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 4280 + }, + { + "epoch": 24.38, + "learning_rate": 7.564204545454546e-05, + "loss": 0.0, + "step": 4290 + }, + { + "epoch": 24.38, + "eval_accuracy": 1.0, + "eval_loss": 5.362724550650455e-06, + "eval_runtime": 125.1291, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 4290 + }, + { + "epoch": 24.43, + "learning_rate": 7.558522727272728e-05, + "loss": 0.0, + "step": 4300 + }, + { + "epoch": 24.43, + "eval_accuracy": 1.0, + "eval_loss": 5.305152171786176e-06, + "eval_runtime": 124.7683, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 4300 + }, + { + "epoch": 24.49, + "learning_rate": 7.55284090909091e-05, + "loss": 0.0, + "step": 4310 + }, + { + "epoch": 24.49, + "eval_accuracy": 1.0, + "eval_loss": 5.261125807010103e-06, + "eval_runtime": 125.2589, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 4310 + }, + { + "epoch": 24.55, + "learning_rate": 7.547159090909091e-05, + "loss": 0.0569, + "step": 4320 + }, + { + "epoch": 24.55, + "eval_accuracy": 1.0, + "eval_loss": 6.383623258443549e-05, + "eval_runtime": 125.1077, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 4320 + }, + { + "epoch": 24.6, + "learning_rate": 7.541477272727273e-05, + "loss": 0.1075, + "step": 4330 + }, + { + "epoch": 24.6, + "eval_accuracy": 0.9886363744735718, + "eval_loss": 0.06387602537870407, + "eval_runtime": 125.2171, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 4330 + }, + { + "epoch": 24.66, + "learning_rate": 7.535795454545455e-05, + "loss": 0.0, + "step": 4340 + }, + { + "epoch": 24.66, + "eval_accuracy": 0.9715909361839294, + "eval_loss": 0.14371606707572937, + "eval_runtime": 125.2097, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 4340 + }, + { + "epoch": 24.72, + "learning_rate": 7.530113636363637e-05, + "loss": 0.03, + "step": 4350 + }, + { + "epoch": 24.72, + "eval_accuracy": 1.0, + "eval_loss": 0.00018699229985941201, + "eval_runtime": 125.1948, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 4350 + }, + { + "epoch": 24.77, + "learning_rate": 7.524431818181819e-05, + "loss": 0.0016, + "step": 4360 + }, + { + "epoch": 24.77, + "eval_accuracy": 0.9801136255264282, + "eval_loss": 0.12097599357366562, + "eval_runtime": 125.1159, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 4360 + }, + { + "epoch": 24.83, + "learning_rate": 7.51875e-05, + "loss": 0.0249, + "step": 4370 + }, + { + "epoch": 24.83, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.017367534339427948, + "eval_runtime": 125.0644, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 4370 + }, + { + "epoch": 24.89, + "learning_rate": 7.513068181818182e-05, + "loss": 0.0018, + "step": 4380 + }, + { + "epoch": 24.89, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.03312718868255615, + "eval_runtime": 125.2488, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 4380 + }, + { + "epoch": 24.94, + "learning_rate": 7.507386363636364e-05, + "loss": 0.0001, + "step": 4390 + }, + { + "epoch": 24.94, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.006286283954977989, + "eval_runtime": 125.1478, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 4390 + }, + { + "epoch": 25.0, + "learning_rate": 7.501704545454546e-05, + "loss": 0.0001, + "step": 4400 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9886363744735718, + "eval_loss": 0.04271954298019409, + "eval_runtime": 125.2659, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 4400 + }, + { + "epoch": 25.06, + "learning_rate": 7.496022727272728e-05, + "loss": 0.0319, + "step": 4410 + }, + { + "epoch": 25.06, + "eval_accuracy": 1.0, + "eval_loss": 0.0003254816692788154, + "eval_runtime": 125.2785, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 4410 + }, + { + "epoch": 25.11, + "learning_rate": 7.49034090909091e-05, + "loss": 0.0055, + "step": 4420 + }, + { + "epoch": 25.11, + "eval_accuracy": 0.9914772510528564, + "eval_loss": 0.039574023336172104, + "eval_runtime": 125.1178, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 4420 + }, + { + "epoch": 25.17, + "learning_rate": 7.484659090909091e-05, + "loss": 0.105, + "step": 4430 + }, + { + "epoch": 25.17, + "eval_accuracy": 1.0, + "eval_loss": 3.78740114683751e-05, + "eval_runtime": 125.5546, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 4430 + }, + { + "epoch": 25.23, + "learning_rate": 7.478977272727273e-05, + "loss": 0.0004, + "step": 4440 + }, + { + "epoch": 25.23, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.012259497307240963, + "eval_runtime": 125.5825, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 4440 + }, + { + "epoch": 25.28, + "learning_rate": 7.473295454545455e-05, + "loss": 0.0714, + "step": 4450 + }, + { + "epoch": 25.28, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.014759432524442673, + "eval_runtime": 125.4882, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 4450 + }, + { + "epoch": 25.34, + "learning_rate": 7.467613636363637e-05, + "loss": 0.0105, + "step": 4460 + }, + { + "epoch": 25.34, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.013050341978669167, + "eval_runtime": 125.3731, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 4460 + }, + { + "epoch": 25.4, + "learning_rate": 7.461931818181819e-05, + "loss": 0.0767, + "step": 4470 + }, + { + "epoch": 25.4, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.015735168009996414, + "eval_runtime": 125.2034, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 4470 + }, + { + "epoch": 25.45, + "learning_rate": 7.45625e-05, + "loss": 0.0011, + "step": 4480 + }, + { + "epoch": 25.45, + "eval_accuracy": 1.0, + "eval_loss": 0.002707006176933646, + "eval_runtime": 126.016, + "eval_samples_per_second": 2.793, + "eval_steps_per_second": 0.698, + "step": 4480 + }, + { + "epoch": 25.51, + "learning_rate": 7.450568181818182e-05, + "loss": 0.0031, + "step": 4490 + }, + { + "epoch": 25.51, + "eval_accuracy": 1.0, + "eval_loss": 0.002346001798287034, + "eval_runtime": 125.214, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 4490 + }, + { + "epoch": 25.57, + "learning_rate": 7.444886363636364e-05, + "loss": 0.0004, + "step": 4500 + }, + { + "epoch": 25.57, + "eval_accuracy": 1.0, + "eval_loss": 0.0009429536294192076, + "eval_runtime": 125.4978, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 4500 + }, + { + "epoch": 25.62, + "learning_rate": 7.439204545454546e-05, + "loss": 0.0011, + "step": 4510 + }, + { + "epoch": 25.62, + "eval_accuracy": 1.0, + "eval_loss": 0.0003920014714822173, + "eval_runtime": 125.2314, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 4510 + }, + { + "epoch": 25.68, + "learning_rate": 7.433522727272728e-05, + "loss": 0.0004, + "step": 4520 + }, + { + "epoch": 25.68, + "eval_accuracy": 1.0, + "eval_loss": 0.0002152259403374046, + "eval_runtime": 125.3591, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 4520 + }, + { + "epoch": 25.74, + "learning_rate": 7.42784090909091e-05, + "loss": 0.0003, + "step": 4530 + }, + { + "epoch": 25.74, + "eval_accuracy": 1.0, + "eval_loss": 0.0001528618740849197, + "eval_runtime": 125.5781, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 4530 + }, + { + "epoch": 25.8, + "learning_rate": 7.422159090909091e-05, + "loss": 0.0002, + "step": 4540 + }, + { + "epoch": 25.8, + "eval_accuracy": 1.0, + "eval_loss": 0.00011554767115740106, + "eval_runtime": 125.4126, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 4540 + }, + { + "epoch": 25.85, + "learning_rate": 7.416477272727273e-05, + "loss": 0.0001, + "step": 4550 + }, + { + "epoch": 25.85, + "eval_accuracy": 1.0, + "eval_loss": 9.85810038400814e-05, + "eval_runtime": 125.4555, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 4550 + }, + { + "epoch": 25.91, + "learning_rate": 7.410795454545455e-05, + "loss": 0.0001, + "step": 4560 + }, + { + "epoch": 25.91, + "eval_accuracy": 1.0, + "eval_loss": 9.03877371456474e-05, + "eval_runtime": 125.0427, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 4560 + }, + { + "epoch": 25.97, + "learning_rate": 7.405113636363637e-05, + "loss": 0.0001, + "step": 4570 + }, + { + "epoch": 25.97, + "eval_accuracy": 1.0, + "eval_loss": 8.399073703913018e-05, + "eval_runtime": 125.2698, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 4570 + }, + { + "epoch": 26.02, + "learning_rate": 7.399431818181818e-05, + "loss": 0.0001, + "step": 4580 + }, + { + "epoch": 26.02, + "eval_accuracy": 1.0, + "eval_loss": 7.698312401771545e-05, + "eval_runtime": 125.0705, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 4580 + }, + { + "epoch": 26.08, + "learning_rate": 7.39375e-05, + "loss": 0.0001, + "step": 4590 + }, + { + "epoch": 26.08, + "eval_accuracy": 1.0, + "eval_loss": 7.066300167934969e-05, + "eval_runtime": 124.9955, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 4590 + }, + { + "epoch": 26.14, + "learning_rate": 7.388068181818182e-05, + "loss": 0.0001, + "step": 4600 + }, + { + "epoch": 26.14, + "eval_accuracy": 1.0, + "eval_loss": 6.626071990467608e-05, + "eval_runtime": 125.4641, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 4600 + }, + { + "epoch": 26.19, + "learning_rate": 7.382386363636364e-05, + "loss": 0.0001, + "step": 4610 + }, + { + "epoch": 26.19, + "eval_accuracy": 1.0, + "eval_loss": 6.20887367404066e-05, + "eval_runtime": 125.3418, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 4610 + }, + { + "epoch": 26.25, + "learning_rate": 7.376704545454546e-05, + "loss": 0.0, + "step": 4620 + }, + { + "epoch": 26.25, + "eval_accuracy": 1.0, + "eval_loss": 5.780566789326258e-05, + "eval_runtime": 125.4932, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 4620 + }, + { + "epoch": 26.31, + "learning_rate": 7.371022727272727e-05, + "loss": 0.0001, + "step": 4630 + }, + { + "epoch": 26.31, + "eval_accuracy": 1.0, + "eval_loss": 5.506114393938333e-05, + "eval_runtime": 124.9413, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 4630 + }, + { + "epoch": 26.36, + "learning_rate": 7.365340909090909e-05, + "loss": 0.0001, + "step": 4640 + }, + { + "epoch": 26.36, + "eval_accuracy": 1.0, + "eval_loss": 5.241144754108973e-05, + "eval_runtime": 125.4587, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 4640 + }, + { + "epoch": 26.42, + "learning_rate": 7.359659090909091e-05, + "loss": 0.0, + "step": 4650 + }, + { + "epoch": 26.42, + "eval_accuracy": 1.0, + "eval_loss": 5.0265341997146606e-05, + "eval_runtime": 125.2226, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 4650 + }, + { + "epoch": 26.48, + "learning_rate": 7.353977272727273e-05, + "loss": 0.0001, + "step": 4660 + }, + { + "epoch": 26.48, + "eval_accuracy": 1.0, + "eval_loss": 4.8022378905443475e-05, + "eval_runtime": 125.4096, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 4660 + }, + { + "epoch": 26.53, + "learning_rate": 7.348295454545455e-05, + "loss": 0.0001, + "step": 4670 + }, + { + "epoch": 26.53, + "eval_accuracy": 1.0, + "eval_loss": 4.580820314004086e-05, + "eval_runtime": 125.0251, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 4670 + }, + { + "epoch": 26.59, + "learning_rate": 7.342613636363636e-05, + "loss": 0.0, + "step": 4680 + }, + { + "epoch": 26.59, + "eval_accuracy": 1.0, + "eval_loss": 4.3646516132866964e-05, + "eval_runtime": 124.983, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 4680 + }, + { + "epoch": 26.65, + "learning_rate": 7.336931818181818e-05, + "loss": 0.1038, + "step": 4690 + }, + { + "epoch": 26.65, + "eval_accuracy": 1.0, + "eval_loss": 6.768682942492887e-05, + "eval_runtime": 125.5609, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 4690 + }, + { + "epoch": 26.7, + "learning_rate": 7.33125e-05, + "loss": 0.0002, + "step": 4700 + }, + { + "epoch": 26.7, + "eval_accuracy": 1.0, + "eval_loss": 0.0001422773057129234, + "eval_runtime": 125.0235, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 4700 + }, + { + "epoch": 26.76, + "learning_rate": 7.325568181818182e-05, + "loss": 0.0516, + "step": 4710 + }, + { + "epoch": 26.76, + "eval_accuracy": 1.0, + "eval_loss": 7.315725088119507e-05, + "eval_runtime": 125.1332, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 4710 + }, + { + "epoch": 26.82, + "learning_rate": 7.320454545454546e-05, + "loss": 0.1014, + "step": 4720 + }, + { + "epoch": 26.82, + "eval_accuracy": 1.0, + "eval_loss": 7.035075395833701e-05, + "eval_runtime": 125.2123, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 4720 + }, + { + "epoch": 26.88, + "learning_rate": 7.314772727272727e-05, + "loss": 0.0633, + "step": 4730 + }, + { + "epoch": 26.88, + "eval_accuracy": 1.0, + "eval_loss": 6.294284685282037e-05, + "eval_runtime": 125.2781, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 4730 + }, + { + "epoch": 26.93, + "learning_rate": 7.30909090909091e-05, + "loss": 0.0001, + "step": 4740 + }, + { + "epoch": 26.93, + "eval_accuracy": 1.0, + "eval_loss": 6.204436795087531e-05, + "eval_runtime": 125.1314, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 4740 + }, + { + "epoch": 26.99, + "learning_rate": 7.303409090909092e-05, + "loss": 0.1523, + "step": 4750 + }, + { + "epoch": 26.99, + "eval_accuracy": 1.0, + "eval_loss": 0.00040512430132366717, + "eval_runtime": 125.2051, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 4750 + }, + { + "epoch": 27.05, + "learning_rate": 7.297727272727272e-05, + "loss": 0.0008, + "step": 4760 + }, + { + "epoch": 27.05, + "eval_accuracy": 1.0, + "eval_loss": 0.0007869113469496369, + "eval_runtime": 125.2734, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 4760 + }, + { + "epoch": 27.1, + "learning_rate": 7.292045454545455e-05, + "loss": 0.0003, + "step": 4770 + }, + { + "epoch": 27.1, + "eval_accuracy": 1.0, + "eval_loss": 0.00020626187324523926, + "eval_runtime": 125.0626, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 4770 + }, + { + "epoch": 27.16, + "learning_rate": 7.286363636363637e-05, + "loss": 0.0362, + "step": 4780 + }, + { + "epoch": 27.16, + "eval_accuracy": 1.0, + "eval_loss": 6.136095180409029e-05, + "eval_runtime": 124.9346, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 4780 + }, + { + "epoch": 27.22, + "learning_rate": 7.280681818181817e-05, + "loss": 0.0001, + "step": 4790 + }, + { + "epoch": 27.22, + "eval_accuracy": 1.0, + "eval_loss": 0.00013784556358586997, + "eval_runtime": 125.3731, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 4790 + }, + { + "epoch": 27.27, + "learning_rate": 7.275e-05, + "loss": 0.0002, + "step": 4800 + }, + { + "epoch": 27.27, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.01473953202366829, + "eval_runtime": 125.3187, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 4800 + }, + { + "epoch": 27.33, + "learning_rate": 7.269318181818182e-05, + "loss": 0.0001, + "step": 4810 + }, + { + "epoch": 27.33, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.017152711749076843, + "eval_runtime": 125.1722, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 4810 + }, + { + "epoch": 27.39, + "learning_rate": 7.263636363636363e-05, + "loss": 0.0001, + "step": 4820 + }, + { + "epoch": 27.39, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.017775364220142365, + "eval_runtime": 125.2621, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 4820 + }, + { + "epoch": 27.44, + "learning_rate": 7.257954545454546e-05, + "loss": 0.0001, + "step": 4830 + }, + { + "epoch": 27.44, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.017923938110470772, + "eval_runtime": 125.5006, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 4830 + }, + { + "epoch": 27.5, + "learning_rate": 7.252272727272728e-05, + "loss": 0.0001, + "step": 4840 + }, + { + "epoch": 27.5, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.017952509224414825, + "eval_runtime": 124.9251, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 4840 + }, + { + "epoch": 27.56, + "learning_rate": 7.246590909090908e-05, + "loss": 0.0852, + "step": 4850 + }, + { + "epoch": 27.56, + "eval_accuracy": 1.0, + "eval_loss": 7.263672887347639e-05, + "eval_runtime": 125.1647, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 4850 + }, + { + "epoch": 27.61, + "learning_rate": 7.240909090909091e-05, + "loss": 0.0001, + "step": 4860 + }, + { + "epoch": 27.61, + "eval_accuracy": 1.0, + "eval_loss": 0.00024259192286990583, + "eval_runtime": 125.3947, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 4860 + }, + { + "epoch": 27.67, + "learning_rate": 7.235227272727273e-05, + "loss": 0.0002, + "step": 4870 + }, + { + "epoch": 27.67, + "eval_accuracy": 1.0, + "eval_loss": 0.00032938001095317304, + "eval_runtime": 125.5401, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 4870 + }, + { + "epoch": 27.73, + "learning_rate": 7.229545454545455e-05, + "loss": 0.0005, + "step": 4880 + }, + { + "epoch": 27.73, + "eval_accuracy": 1.0, + "eval_loss": 0.0001963444665307179, + "eval_runtime": 125.6119, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 4880 + }, + { + "epoch": 27.78, + "learning_rate": 7.223863636363637e-05, + "loss": 0.0002, + "step": 4890 + }, + { + "epoch": 27.78, + "eval_accuracy": 1.0, + "eval_loss": 0.00010629777534632012, + "eval_runtime": 125.6233, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 4890 + }, + { + "epoch": 27.84, + "learning_rate": 7.218181818181819e-05, + "loss": 0.0001, + "step": 4900 + }, + { + "epoch": 27.84, + "eval_accuracy": 1.0, + "eval_loss": 7.651712076039985e-05, + "eval_runtime": 125.0706, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 4900 + }, + { + "epoch": 27.9, + "learning_rate": 7.2125e-05, + "loss": 0.0001, + "step": 4910 + }, + { + "epoch": 27.9, + "eval_accuracy": 1.0, + "eval_loss": 6.487830250989646e-05, + "eval_runtime": 125.4064, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 4910 + }, + { + "epoch": 27.95, + "learning_rate": 7.206818181818182e-05, + "loss": 0.2166, + "step": 4920 + }, + { + "epoch": 27.95, + "eval_accuracy": 1.0, + "eval_loss": 8.850104495650157e-05, + "eval_runtime": 125.4294, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 4920 + }, + { + "epoch": 28.01, + "learning_rate": 7.201136363636364e-05, + "loss": 0.0001, + "step": 4930 + }, + { + "epoch": 28.01, + "eval_accuracy": 1.0, + "eval_loss": 0.000107141378975939, + "eval_runtime": 125.1899, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 4930 + }, + { + "epoch": 28.07, + "learning_rate": 7.195454545454546e-05, + "loss": 0.0001, + "step": 4940 + }, + { + "epoch": 28.07, + "eval_accuracy": 1.0, + "eval_loss": 0.00010262395517202094, + "eval_runtime": 125.417, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 4940 + }, + { + "epoch": 28.12, + "learning_rate": 7.189772727272728e-05, + "loss": 0.0001, + "step": 4950 + }, + { + "epoch": 28.12, + "eval_accuracy": 1.0, + "eval_loss": 8.707188681000844e-05, + "eval_runtime": 125.1622, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 4950 + }, + { + "epoch": 28.18, + "learning_rate": 7.18409090909091e-05, + "loss": 0.2783, + "step": 4960 + }, + { + "epoch": 28.18, + "eval_accuracy": 1.0, + "eval_loss": 7.78687244746834e-05, + "eval_runtime": 125.2755, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 4960 + }, + { + "epoch": 28.24, + "learning_rate": 7.178409090909091e-05, + "loss": 0.0008, + "step": 4970 + }, + { + "epoch": 28.24, + "eval_accuracy": 1.0, + "eval_loss": 0.0005910281324759126, + "eval_runtime": 125.4466, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 4970 + }, + { + "epoch": 28.3, + "learning_rate": 7.172727272727273e-05, + "loss": 0.0006, + "step": 4980 + }, + { + "epoch": 28.3, + "eval_accuracy": 1.0, + "eval_loss": 0.0004354332631919533, + "eval_runtime": 125.1912, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 4980 + }, + { + "epoch": 28.35, + "learning_rate": 7.167045454545455e-05, + "loss": 0.0005, + "step": 4990 + }, + { + "epoch": 28.35, + "eval_accuracy": 1.0, + "eval_loss": 0.00029325587092898786, + "eval_runtime": 125.674, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 4990 + }, + { + "epoch": 28.41, + "learning_rate": 7.161363636363637e-05, + "loss": 0.0003, + "step": 5000 + }, + { + "epoch": 28.41, + "eval_accuracy": 1.0, + "eval_loss": 0.00019672005146276206, + "eval_runtime": 124.9872, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5000 + }, + { + "epoch": 28.47, + "learning_rate": 7.155681818181819e-05, + "loss": 0.0002, + "step": 5010 + }, + { + "epoch": 28.47, + "eval_accuracy": 1.0, + "eval_loss": 0.00015122747572604567, + "eval_runtime": 125.2825, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 5010 + }, + { + "epoch": 28.52, + "learning_rate": 7.15e-05, + "loss": 0.0002, + "step": 5020 + }, + { + "epoch": 28.52, + "eval_accuracy": 1.0, + "eval_loss": 0.00012332234473433346, + "eval_runtime": 125.4079, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 5020 + }, + { + "epoch": 28.58, + "learning_rate": 7.144318181818182e-05, + "loss": 0.0001, + "step": 5030 + }, + { + "epoch": 28.58, + "eval_accuracy": 1.0, + "eval_loss": 0.00010597028449410573, + "eval_runtime": 125.2067, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 5030 + }, + { + "epoch": 28.64, + "learning_rate": 7.138636363636364e-05, + "loss": 0.0001, + "step": 5040 + }, + { + "epoch": 28.64, + "eval_accuracy": 1.0, + "eval_loss": 9.461594163440168e-05, + "eval_runtime": 125.1123, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 5040 + }, + { + "epoch": 28.69, + "learning_rate": 7.132954545454546e-05, + "loss": 0.0001, + "step": 5050 + }, + { + "epoch": 28.69, + "eval_accuracy": 1.0, + "eval_loss": 8.396228804485872e-05, + "eval_runtime": 125.3181, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 5050 + }, + { + "epoch": 28.75, + "learning_rate": 7.127272727272728e-05, + "loss": 0.0001, + "step": 5060 + }, + { + "epoch": 28.75, + "eval_accuracy": 1.0, + "eval_loss": 7.660416304133832e-05, + "eval_runtime": 124.9398, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 5060 + }, + { + "epoch": 28.81, + "learning_rate": 7.12159090909091e-05, + "loss": 0.0001, + "step": 5070 + }, + { + "epoch": 28.81, + "eval_accuracy": 1.0, + "eval_loss": 7.111409649951383e-05, + "eval_runtime": 125.8253, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 5070 + }, + { + "epoch": 28.86, + "learning_rate": 7.115909090909091e-05, + "loss": 0.0001, + "step": 5080 + }, + { + "epoch": 28.86, + "eval_accuracy": 1.0, + "eval_loss": 6.693974137306213e-05, + "eval_runtime": 125.7519, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 5080 + }, + { + "epoch": 28.92, + "learning_rate": 7.110227272727273e-05, + "loss": 0.0001, + "step": 5090 + }, + { + "epoch": 28.92, + "eval_accuracy": 1.0, + "eval_loss": 6.309151649475098e-05, + "eval_runtime": 125.265, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 5090 + }, + { + "epoch": 28.98, + "learning_rate": 7.104545454545455e-05, + "loss": 0.0001, + "step": 5100 + }, + { + "epoch": 28.98, + "eval_accuracy": 1.0, + "eval_loss": 5.948204852757044e-05, + "eval_runtime": 125.4193, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 5100 + }, + { + "epoch": 29.03, + "learning_rate": 7.098863636363637e-05, + "loss": 0.0001, + "step": 5110 + }, + { + "epoch": 29.03, + "eval_accuracy": 1.0, + "eval_loss": 5.6454064178979024e-05, + "eval_runtime": 124.9626, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 5110 + }, + { + "epoch": 29.09, + "learning_rate": 7.093181818181818e-05, + "loss": 0.0009, + "step": 5120 + }, + { + "epoch": 29.09, + "eval_accuracy": 1.0, + "eval_loss": 5.2297658839961514e-05, + "eval_runtime": 125.0728, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 5120 + }, + { + "epoch": 29.15, + "learning_rate": 7.0875e-05, + "loss": 0.0001, + "step": 5130 + }, + { + "epoch": 29.15, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0221712663769722, + "eval_runtime": 124.9967, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5130 + }, + { + "epoch": 29.2, + "learning_rate": 7.081818181818182e-05, + "loss": 0.1262, + "step": 5140 + }, + { + "epoch": 29.2, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.02120029740035534, + "eval_runtime": 125.0495, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 5140 + }, + { + "epoch": 29.26, + "learning_rate": 7.076136363636364e-05, + "loss": 0.0003, + "step": 5150 + }, + { + "epoch": 29.26, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.004268791992217302, + "eval_runtime": 125.0683, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 5150 + }, + { + "epoch": 29.32, + "learning_rate": 7.070454545454546e-05, + "loss": 0.0007, + "step": 5160 + }, + { + "epoch": 29.32, + "eval_accuracy": 1.0, + "eval_loss": 0.001265845145098865, + "eval_runtime": 125.5791, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 5160 + }, + { + "epoch": 29.38, + "learning_rate": 7.064772727272727e-05, + "loss": 0.0003, + "step": 5170 + }, + { + "epoch": 29.38, + "eval_accuracy": 1.0, + "eval_loss": 0.0005623928736895323, + "eval_runtime": 125.2555, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 5170 + }, + { + "epoch": 29.43, + "learning_rate": 7.059090909090909e-05, + "loss": 0.0002, + "step": 5180 + }, + { + "epoch": 29.43, + "eval_accuracy": 1.0, + "eval_loss": 0.0004020898777525872, + "eval_runtime": 125.4732, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 5180 + }, + { + "epoch": 29.49, + "learning_rate": 7.053409090909091e-05, + "loss": 0.0001, + "step": 5190 + }, + { + "epoch": 29.49, + "eval_accuracy": 1.0, + "eval_loss": 0.00033972447272390127, + "eval_runtime": 125.2025, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 5190 + }, + { + "epoch": 29.55, + "learning_rate": 7.047727272727273e-05, + "loss": 0.0001, + "step": 5200 + }, + { + "epoch": 29.55, + "eval_accuracy": 1.0, + "eval_loss": 0.0003047338395845145, + "eval_runtime": 125.1313, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 5200 + }, + { + "epoch": 29.6, + "learning_rate": 7.042045454545455e-05, + "loss": 0.0001, + "step": 5210 + }, + { + "epoch": 29.6, + "eval_accuracy": 1.0, + "eval_loss": 0.00028334659873507917, + "eval_runtime": 125.0614, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 5210 + }, + { + "epoch": 29.66, + "learning_rate": 7.036363636363636e-05, + "loss": 0.0001, + "step": 5220 + }, + { + "epoch": 29.66, + "eval_accuracy": 1.0, + "eval_loss": 0.00025112926959991455, + "eval_runtime": 125.5653, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 5220 + }, + { + "epoch": 29.72, + "learning_rate": 7.03068181818182e-05, + "loss": 0.0001, + "step": 5230 + }, + { + "epoch": 29.72, + "eval_accuracy": 1.0, + "eval_loss": 0.00023339247854892164, + "eval_runtime": 125.3237, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 5230 + }, + { + "epoch": 29.77, + "learning_rate": 7.025e-05, + "loss": 0.0001, + "step": 5240 + }, + { + "epoch": 29.77, + "eval_accuracy": 1.0, + "eval_loss": 0.00021613491117022932, + "eval_runtime": 125.2144, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 5240 + }, + { + "epoch": 29.83, + "learning_rate": 7.019318181818182e-05, + "loss": 0.0001, + "step": 5250 + }, + { + "epoch": 29.83, + "eval_accuracy": 1.0, + "eval_loss": 0.00019904394866898656, + "eval_runtime": 124.9554, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 5250 + }, + { + "epoch": 29.89, + "learning_rate": 7.013636363636365e-05, + "loss": 0.0001, + "step": 5260 + }, + { + "epoch": 29.89, + "eval_accuracy": 1.0, + "eval_loss": 0.00018491731316316873, + "eval_runtime": 125.214, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 5260 + }, + { + "epoch": 29.94, + "learning_rate": 7.007954545454545e-05, + "loss": 0.0001, + "step": 5270 + }, + { + "epoch": 29.94, + "eval_accuracy": 1.0, + "eval_loss": 0.00016047264216467738, + "eval_runtime": 125.4432, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 5270 + }, + { + "epoch": 30.0, + "learning_rate": 7.002272727272727e-05, + "loss": 0.0, + "step": 5280 + }, + { + "epoch": 30.0, + "eval_accuracy": 1.0, + "eval_loss": 0.00014507770538330078, + "eval_runtime": 125.226, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 5280 + }, + { + "epoch": 30.06, + "learning_rate": 6.99659090909091e-05, + "loss": 0.0001, + "step": 5290 + }, + { + "epoch": 30.06, + "eval_accuracy": 1.0, + "eval_loss": 0.00013778527500107884, + "eval_runtime": 125.4117, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 5290 + }, + { + "epoch": 30.11, + "learning_rate": 6.990909090909091e-05, + "loss": 0.0001, + "step": 5300 + }, + { + "epoch": 30.11, + "eval_accuracy": 1.0, + "eval_loss": 0.0001297200215049088, + "eval_runtime": 124.9887, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5300 + }, + { + "epoch": 30.17, + "learning_rate": 6.985227272727273e-05, + "loss": 0.0, + "step": 5310 + }, + { + "epoch": 30.17, + "eval_accuracy": 1.0, + "eval_loss": 0.0001246587053174153, + "eval_runtime": 125.3104, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 5310 + }, + { + "epoch": 30.23, + "learning_rate": 6.979545454545456e-05, + "loss": 0.0, + "step": 5320 + }, + { + "epoch": 30.23, + "eval_accuracy": 1.0, + "eval_loss": 0.0001159886087407358, + "eval_runtime": 125.0942, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 5320 + }, + { + "epoch": 30.28, + "learning_rate": 6.973863636363636e-05, + "loss": 0.0, + "step": 5330 + }, + { + "epoch": 30.28, + "eval_accuracy": 1.0, + "eval_loss": 0.00010932169243460521, + "eval_runtime": 125.0814, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 5330 + }, + { + "epoch": 30.34, + "learning_rate": 6.968181818181818e-05, + "loss": 0.0001, + "step": 5340 + }, + { + "epoch": 30.34, + "eval_accuracy": 1.0, + "eval_loss": 0.00010214745998382568, + "eval_runtime": 125.1739, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 5340 + }, + { + "epoch": 30.4, + "learning_rate": 6.962500000000001e-05, + "loss": 0.0, + "step": 5350 + }, + { + "epoch": 30.4, + "eval_accuracy": 1.0, + "eval_loss": 9.530952229397371e-05, + "eval_runtime": 124.8759, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 5350 + }, + { + "epoch": 30.45, + "learning_rate": 6.956818181818182e-05, + "loss": 0.0, + "step": 5360 + }, + { + "epoch": 30.45, + "eval_accuracy": 1.0, + "eval_loss": 9.028579370351508e-05, + "eval_runtime": 125.182, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 5360 + }, + { + "epoch": 30.51, + "learning_rate": 6.951136363636363e-05, + "loss": 0.0, + "step": 5370 + }, + { + "epoch": 30.51, + "eval_accuracy": 1.0, + "eval_loss": 8.688087837072089e-05, + "eval_runtime": 125.5713, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 5370 + }, + { + "epoch": 30.57, + "learning_rate": 6.945454545454547e-05, + "loss": 0.0, + "step": 5380 + }, + { + "epoch": 30.57, + "eval_accuracy": 1.0, + "eval_loss": 8.329004049301147e-05, + "eval_runtime": 125.3819, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 5380 + }, + { + "epoch": 30.62, + "learning_rate": 6.939772727272727e-05, + "loss": 0.0001, + "step": 5390 + }, + { + "epoch": 30.62, + "eval_accuracy": 1.0, + "eval_loss": 7.668916805414483e-05, + "eval_runtime": 124.7562, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 5390 + }, + { + "epoch": 30.68, + "learning_rate": 6.934090909090909e-05, + "loss": 0.0, + "step": 5400 + }, + { + "epoch": 30.68, + "eval_accuracy": 1.0, + "eval_loss": 7.311491935979575e-05, + "eval_runtime": 125.0024, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5400 + }, + { + "epoch": 30.74, + "learning_rate": 6.928409090909092e-05, + "loss": 0.0, + "step": 5410 + }, + { + "epoch": 30.74, + "eval_accuracy": 1.0, + "eval_loss": 7.040730997687206e-05, + "eval_runtime": 125.1275, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 5410 + }, + { + "epoch": 30.8, + "learning_rate": 6.922727272727272e-05, + "loss": 0.0002, + "step": 5420 + }, + { + "epoch": 30.8, + "eval_accuracy": 1.0, + "eval_loss": 7.511133298976347e-05, + "eval_runtime": 125.3442, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 5420 + }, + { + "epoch": 30.85, + "learning_rate": 6.917045454545454e-05, + "loss": 0.0, + "step": 5430 + }, + { + "epoch": 30.85, + "eval_accuracy": 1.0, + "eval_loss": 8.386339322896674e-05, + "eval_runtime": 125.4675, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 5430 + }, + { + "epoch": 30.91, + "learning_rate": 6.911363636363637e-05, + "loss": 0.0002, + "step": 5440 + }, + { + "epoch": 30.91, + "eval_accuracy": 1.0, + "eval_loss": 0.00010277093679178506, + "eval_runtime": 125.5118, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 5440 + }, + { + "epoch": 30.97, + "learning_rate": 6.905681818181818e-05, + "loss": 0.0001, + "step": 5450 + }, + { + "epoch": 30.97, + "eval_accuracy": 1.0, + "eval_loss": 0.00010895966261159629, + "eval_runtime": 125.0953, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 5450 + }, + { + "epoch": 31.02, + "learning_rate": 6.9e-05, + "loss": 0.0, + "step": 5460 + }, + { + "epoch": 31.02, + "eval_accuracy": 1.0, + "eval_loss": 0.00011004304542439058, + "eval_runtime": 125.3824, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 5460 + }, + { + "epoch": 31.08, + "learning_rate": 6.894318181818183e-05, + "loss": 0.0004, + "step": 5470 + }, + { + "epoch": 31.08, + "eval_accuracy": 1.0, + "eval_loss": 1.844966936914716e-05, + "eval_runtime": 125.6726, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 5470 + }, + { + "epoch": 31.14, + "learning_rate": 6.888636363636363e-05, + "loss": 0.0, + "step": 5480 + }, + { + "epoch": 31.14, + "eval_accuracy": 1.0, + "eval_loss": 1.7358835975755937e-05, + "eval_runtime": 125.0901, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 5480 + }, + { + "epoch": 31.19, + "learning_rate": 6.882954545454546e-05, + "loss": 0.0, + "step": 5490 + }, + { + "epoch": 31.19, + "eval_accuracy": 1.0, + "eval_loss": 1.7075713913072832e-05, + "eval_runtime": 124.9843, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5490 + }, + { + "epoch": 31.25, + "learning_rate": 6.877272727272728e-05, + "loss": 0.0, + "step": 5500 + }, + { + "epoch": 31.25, + "eval_accuracy": 1.0, + "eval_loss": 1.6918575056479312e-05, + "eval_runtime": 124.6018, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 5500 + }, + { + "epoch": 31.31, + "learning_rate": 6.871590909090909e-05, + "loss": 0.0006, + "step": 5510 + }, + { + "epoch": 31.31, + "eval_accuracy": 1.0, + "eval_loss": 1.7523765563964844e-05, + "eval_runtime": 124.7808, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 5510 + }, + { + "epoch": 31.36, + "learning_rate": 6.865909090909092e-05, + "loss": 0.0001, + "step": 5520 + }, + { + "epoch": 31.36, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.027890264987945557, + "eval_runtime": 125.5946, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 5520 + }, + { + "epoch": 31.42, + "learning_rate": 6.860227272727274e-05, + "loss": 0.0, + "step": 5530 + }, + { + "epoch": 31.42, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03363867849111557, + "eval_runtime": 125.4803, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 5530 + }, + { + "epoch": 31.48, + "learning_rate": 6.854545454545454e-05, + "loss": 0.0001, + "step": 5540 + }, + { + "epoch": 31.48, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03488156571984291, + "eval_runtime": 125.1661, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 5540 + }, + { + "epoch": 31.53, + "learning_rate": 6.848863636363637e-05, + "loss": 0.0, + "step": 5550 + }, + { + "epoch": 31.53, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03532543405890465, + "eval_runtime": 125.2674, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 5550 + }, + { + "epoch": 31.59, + "learning_rate": 6.843181818181819e-05, + "loss": 0.0, + "step": 5560 + }, + { + "epoch": 31.59, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03545850142836571, + "eval_runtime": 125.1434, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 5560 + }, + { + "epoch": 31.65, + "learning_rate": 6.8375e-05, + "loss": 0.0, + "step": 5570 + }, + { + "epoch": 31.65, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.035480570048093796, + "eval_runtime": 124.9946, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5570 + }, + { + "epoch": 31.7, + "learning_rate": 6.831818181818183e-05, + "loss": 0.0, + "step": 5580 + }, + { + "epoch": 31.7, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.035347096621990204, + "eval_runtime": 125.5738, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 5580 + }, + { + "epoch": 31.76, + "learning_rate": 6.826136363636364e-05, + "loss": 0.0, + "step": 5590 + }, + { + "epoch": 31.76, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03528031334280968, + "eval_runtime": 125.1019, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 5590 + }, + { + "epoch": 31.82, + "learning_rate": 6.820454545454545e-05, + "loss": 0.0, + "step": 5600 + }, + { + "epoch": 31.82, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03528016805648804, + "eval_runtime": 125.3222, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 5600 + }, + { + "epoch": 31.88, + "learning_rate": 6.814772727272728e-05, + "loss": 0.0, + "step": 5610 + }, + { + "epoch": 31.88, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.035280052572488785, + "eval_runtime": 125.4093, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 5610 + }, + { + "epoch": 31.93, + "learning_rate": 6.80909090909091e-05, + "loss": 0.0, + "step": 5620 + }, + { + "epoch": 31.93, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03527995944023132, + "eval_runtime": 125.0551, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 5620 + }, + { + "epoch": 31.99, + "learning_rate": 6.80340909090909e-05, + "loss": 0.0, + "step": 5630 + }, + { + "epoch": 31.99, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.035213205963373184, + "eval_runtime": 125.0196, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5630 + }, + { + "epoch": 32.05, + "learning_rate": 6.797727272727273e-05, + "loss": 0.0, + "step": 5640 + }, + { + "epoch": 32.05, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.035168636590242386, + "eval_runtime": 125.3227, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 5640 + }, + { + "epoch": 32.1, + "learning_rate": 6.792045454545455e-05, + "loss": 0.0, + "step": 5650 + }, + { + "epoch": 32.1, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03507964685559273, + "eval_runtime": 124.748, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 5650 + }, + { + "epoch": 32.16, + "learning_rate": 6.786363636363636e-05, + "loss": 0.0, + "step": 5660 + }, + { + "epoch": 32.16, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03499067574739456, + "eval_runtime": 125.4141, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 5660 + }, + { + "epoch": 32.22, + "learning_rate": 6.780681818181819e-05, + "loss": 0.0, + "step": 5670 + }, + { + "epoch": 32.22, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03494615852832794, + "eval_runtime": 125.3731, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 5670 + }, + { + "epoch": 32.27, + "learning_rate": 6.775000000000001e-05, + "loss": 0.0, + "step": 5680 + }, + { + "epoch": 32.27, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03494603559374809, + "eval_runtime": 125.1269, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 5680 + }, + { + "epoch": 32.33, + "learning_rate": 6.769318181818181e-05, + "loss": 0.0, + "step": 5690 + }, + { + "epoch": 32.33, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.034945935010910034, + "eval_runtime": 125.0174, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5690 + }, + { + "epoch": 32.39, + "learning_rate": 6.763636363636364e-05, + "loss": 0.0, + "step": 5700 + }, + { + "epoch": 32.39, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03494579717516899, + "eval_runtime": 125.299, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 5700 + }, + { + "epoch": 32.44, + "learning_rate": 6.757954545454546e-05, + "loss": 0.0, + "step": 5710 + }, + { + "epoch": 32.44, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03494568169116974, + "eval_runtime": 124.7147, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 5710 + }, + { + "epoch": 32.5, + "learning_rate": 6.752272727272727e-05, + "loss": 0.0, + "step": 5720 + }, + { + "epoch": 32.5, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03494556248188019, + "eval_runtime": 124.4876, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 5720 + }, + { + "epoch": 32.56, + "learning_rate": 6.74659090909091e-05, + "loss": 0.0, + "step": 5730 + }, + { + "epoch": 32.56, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03494545817375183, + "eval_runtime": 124.9493, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 5730 + }, + { + "epoch": 32.61, + "learning_rate": 6.740909090909092e-05, + "loss": 0.0, + "step": 5740 + }, + { + "epoch": 32.61, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03496754541993141, + "eval_runtime": 124.8465, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 5740 + }, + { + "epoch": 32.67, + "learning_rate": 6.735227272727272e-05, + "loss": 0.0, + "step": 5750 + }, + { + "epoch": 32.67, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.034967437386512756, + "eval_runtime": 124.7009, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 5750 + }, + { + "epoch": 32.73, + "learning_rate": 6.729545454545455e-05, + "loss": 0.0, + "step": 5760 + }, + { + "epoch": 32.73, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03494513779878616, + "eval_runtime": 125.367, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 5760 + }, + { + "epoch": 32.78, + "learning_rate": 6.723863636363637e-05, + "loss": 0.0, + "step": 5770 + }, + { + "epoch": 32.78, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0349450521171093, + "eval_runtime": 125.1528, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 5770 + }, + { + "epoch": 32.84, + "learning_rate": 6.718181818181819e-05, + "loss": 0.0, + "step": 5780 + }, + { + "epoch": 32.84, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03494495153427124, + "eval_runtime": 125.1484, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 5780 + }, + { + "epoch": 32.9, + "learning_rate": 6.7125e-05, + "loss": 0.0, + "step": 5790 + }, + { + "epoch": 32.9, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03498926758766174, + "eval_runtime": 125.1032, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 5790 + }, + { + "epoch": 32.95, + "learning_rate": 6.706818181818182e-05, + "loss": 0.0, + "step": 5800 + }, + { + "epoch": 32.95, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03498917445540428, + "eval_runtime": 124.9774, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 5800 + }, + { + "epoch": 33.01, + "learning_rate": 6.701136363636364e-05, + "loss": 0.0, + "step": 5810 + }, + { + "epoch": 33.01, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03498907387256622, + "eval_runtime": 125.0925, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 5810 + }, + { + "epoch": 33.07, + "learning_rate": 6.695454545454546e-05, + "loss": 0.0, + "step": 5820 + }, + { + "epoch": 33.07, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03498896583914757, + "eval_runtime": 124.995, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5820 + }, + { + "epoch": 33.12, + "learning_rate": 6.689772727272728e-05, + "loss": 0.0, + "step": 5830 + }, + { + "epoch": 33.12, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03498886898159981, + "eval_runtime": 125.7343, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 5830 + }, + { + "epoch": 33.18, + "learning_rate": 6.68409090909091e-05, + "loss": 0.0, + "step": 5840 + }, + { + "epoch": 33.18, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03498878702521324, + "eval_runtime": 124.969, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 5840 + }, + { + "epoch": 33.24, + "learning_rate": 6.678409090909091e-05, + "loss": 0.0, + "step": 5850 + }, + { + "epoch": 33.24, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03416718542575836, + "eval_runtime": 125.3974, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 5850 + }, + { + "epoch": 33.3, + "learning_rate": 6.672727272727273e-05, + "loss": 0.0, + "step": 5860 + }, + { + "epoch": 33.3, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.033589933067560196, + "eval_runtime": 124.962, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 5860 + }, + { + "epoch": 33.35, + "learning_rate": 6.667045454545455e-05, + "loss": 0.0, + "step": 5870 + }, + { + "epoch": 33.35, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.033367861062288284, + "eval_runtime": 125.0933, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 5870 + }, + { + "epoch": 33.41, + "learning_rate": 6.661363636363637e-05, + "loss": 0.0, + "step": 5880 + }, + { + "epoch": 33.41, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03332336246967316, + "eval_runtime": 125.0314, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 5880 + }, + { + "epoch": 33.47, + "learning_rate": 6.655681818181819e-05, + "loss": 0.0, + "step": 5890 + }, + { + "epoch": 33.47, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03330105170607567, + "eval_runtime": 125.6404, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 5890 + }, + { + "epoch": 33.52, + "learning_rate": 6.65e-05, + "loss": 0.0, + "step": 5900 + }, + { + "epoch": 33.52, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0333009697496891, + "eval_runtime": 125.3491, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 5900 + }, + { + "epoch": 33.58, + "learning_rate": 6.644318181818182e-05, + "loss": 0.0, + "step": 5910 + }, + { + "epoch": 33.58, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03332308679819107, + "eval_runtime": 125.1471, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 5910 + }, + { + "epoch": 33.64, + "learning_rate": 6.638636363636364e-05, + "loss": 0.0, + "step": 5920 + }, + { + "epoch": 33.64, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0333230160176754, + "eval_runtime": 125.0686, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 5920 + }, + { + "epoch": 33.69, + "learning_rate": 6.632954545454546e-05, + "loss": 0.0, + "step": 5930 + }, + { + "epoch": 33.69, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03332293778657913, + "eval_runtime": 124.763, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 5930 + }, + { + "epoch": 33.75, + "learning_rate": 6.627272727272728e-05, + "loss": 0.0, + "step": 5940 + }, + { + "epoch": 33.75, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03332284837961197, + "eval_runtime": 125.0941, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 5940 + }, + { + "epoch": 33.81, + "learning_rate": 6.62159090909091e-05, + "loss": 0.0, + "step": 5950 + }, + { + "epoch": 33.81, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.033322758972644806, + "eval_runtime": 124.9893, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 5950 + }, + { + "epoch": 33.86, + "learning_rate": 6.615909090909091e-05, + "loss": 0.0, + "step": 5960 + }, + { + "epoch": 33.86, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03332267329096794, + "eval_runtime": 124.8576, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 5960 + }, + { + "epoch": 33.92, + "learning_rate": 6.610227272727273e-05, + "loss": 0.0, + "step": 5970 + }, + { + "epoch": 33.92, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03334479779005051, + "eval_runtime": 124.9288, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 5970 + }, + { + "epoch": 33.98, + "learning_rate": 6.604545454545455e-05, + "loss": 0.0, + "step": 5980 + }, + { + "epoch": 33.98, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.033344727009534836, + "eval_runtime": 124.7591, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 5980 + }, + { + "epoch": 34.03, + "learning_rate": 6.598863636363637e-05, + "loss": 0.0, + "step": 5990 + }, + { + "epoch": 34.03, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03334466740489006, + "eval_runtime": 125.2242, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 5990 + }, + { + "epoch": 34.09, + "learning_rate": 6.593181818181818e-05, + "loss": 0.0, + "step": 6000 + }, + { + "epoch": 34.09, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03334460407495499, + "eval_runtime": 125.3229, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 6000 + }, + { + "epoch": 34.15, + "learning_rate": 6.5875e-05, + "loss": 0.0, + "step": 6010 + }, + { + "epoch": 34.15, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03336673974990845, + "eval_runtime": 125.6606, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 6010 + }, + { + "epoch": 34.2, + "learning_rate": 6.581818181818182e-05, + "loss": 0.0, + "step": 6020 + }, + { + "epoch": 34.2, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.03336668387055397, + "eval_runtime": 124.9486, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 6020 + }, + { + "epoch": 34.26, + "learning_rate": 6.576136363636364e-05, + "loss": 0.0, + "step": 6030 + }, + { + "epoch": 34.26, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.0333666168153286, + "eval_runtime": 124.8126, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 6030 + }, + { + "epoch": 34.32, + "learning_rate": 6.570454545454547e-05, + "loss": 0.0001, + "step": 6040 + }, + { + "epoch": 34.32, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.030236991122364998, + "eval_runtime": 125.1675, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 6040 + }, + { + "epoch": 34.38, + "learning_rate": 6.564772727272727e-05, + "loss": 0.0, + "step": 6050 + }, + { + "epoch": 34.38, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02368944324553013, + "eval_runtime": 125.5564, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 6050 + }, + { + "epoch": 34.43, + "learning_rate": 6.559090909090909e-05, + "loss": 0.0, + "step": 6060 + }, + { + "epoch": 34.43, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.018961908295750618, + "eval_runtime": 125.1617, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 6060 + }, + { + "epoch": 34.49, + "learning_rate": 6.553409090909092e-05, + "loss": 0.0, + "step": 6070 + }, + { + "epoch": 34.49, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.017141876742243767, + "eval_runtime": 125.1108, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 6070 + }, + { + "epoch": 34.55, + "learning_rate": 6.547727272727273e-05, + "loss": 0.0, + "step": 6080 + }, + { + "epoch": 34.55, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016620228067040443, + "eval_runtime": 125.7956, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.7, + "step": 6080 + }, + { + "epoch": 34.6, + "learning_rate": 6.542045454545455e-05, + "loss": 0.0, + "step": 6090 + }, + { + "epoch": 34.6, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016464799642562866, + "eval_runtime": 125.5908, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 6090 + }, + { + "epoch": 34.66, + "learning_rate": 6.536363636363638e-05, + "loss": 0.0, + "step": 6100 + }, + { + "epoch": 34.66, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.01640924997627735, + "eval_runtime": 125.5809, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 6100 + }, + { + "epoch": 34.72, + "learning_rate": 6.530681818181818e-05, + "loss": 0.0, + "step": 6110 + }, + { + "epoch": 34.72, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016398094594478607, + "eval_runtime": 125.252, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 6110 + }, + { + "epoch": 34.77, + "learning_rate": 6.525e-05, + "loss": 0.0, + "step": 6120 + }, + { + "epoch": 34.77, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016398044303059578, + "eval_runtime": 125.254, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 6120 + }, + { + "epoch": 34.83, + "learning_rate": 6.519318181818183e-05, + "loss": 0.0, + "step": 6130 + }, + { + "epoch": 34.83, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016353614628314972, + "eval_runtime": 124.9984, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 6130 + }, + { + "epoch": 34.89, + "learning_rate": 6.513636363636364e-05, + "loss": 0.0, + "step": 6140 + }, + { + "epoch": 34.89, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016331372782588005, + "eval_runtime": 124.9586, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 6140 + }, + { + "epoch": 34.94, + "learning_rate": 6.507954545454545e-05, + "loss": 0.0, + "step": 6150 + }, + { + "epoch": 34.94, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016298027709126472, + "eval_runtime": 125.1564, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 6150 + }, + { + "epoch": 35.0, + "learning_rate": 6.502272727272729e-05, + "loss": 0.0, + "step": 6160 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016286874189972878, + "eval_runtime": 125.1196, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 6160 + }, + { + "epoch": 35.06, + "learning_rate": 6.496590909090909e-05, + "loss": 0.0, + "step": 6170 + }, + { + "epoch": 35.06, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016297906637191772, + "eval_runtime": 125.2078, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 6170 + }, + { + "epoch": 35.11, + "learning_rate": 6.490909090909091e-05, + "loss": 0.0, + "step": 6180 + }, + { + "epoch": 35.11, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016297848895192146, + "eval_runtime": 125.3641, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 6180 + }, + { + "epoch": 35.17, + "learning_rate": 6.485227272727274e-05, + "loss": 0.0, + "step": 6190 + }, + { + "epoch": 35.17, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.01630888693034649, + "eval_runtime": 125.3183, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 6190 + }, + { + "epoch": 35.23, + "learning_rate": 6.479545454545454e-05, + "loss": 0.0, + "step": 6200 + }, + { + "epoch": 35.23, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016342120245099068, + "eval_runtime": 125.2788, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 6200 + }, + { + "epoch": 35.28, + "learning_rate": 6.473863636363636e-05, + "loss": 0.0, + "step": 6210 + }, + { + "epoch": 35.28, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.01634206622838974, + "eval_runtime": 125.7223, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 6210 + }, + { + "epoch": 35.34, + "learning_rate": 6.46818181818182e-05, + "loss": 0.0, + "step": 6220 + }, + { + "epoch": 35.34, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016308700665831566, + "eval_runtime": 125.4469, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 6220 + }, + { + "epoch": 35.4, + "learning_rate": 6.4625e-05, + "loss": 0.0, + "step": 6230 + }, + { + "epoch": 35.4, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.01631973683834076, + "eval_runtime": 125.0558, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 6230 + }, + { + "epoch": 35.45, + "learning_rate": 6.456818181818182e-05, + "loss": 0.0, + "step": 6240 + }, + { + "epoch": 35.45, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016319680958986282, + "eval_runtime": 126.509, + "eval_samples_per_second": 2.782, + "eval_steps_per_second": 0.696, + "step": 6240 + }, + { + "epoch": 35.51, + "learning_rate": 6.451136363636365e-05, + "loss": 0.0, + "step": 6250 + }, + { + "epoch": 35.51, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016352912411093712, + "eval_runtime": 125.0571, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 6250 + }, + { + "epoch": 35.57, + "learning_rate": 6.445454545454545e-05, + "loss": 0.0, + "step": 6260 + }, + { + "epoch": 35.57, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.016386140137910843, + "eval_runtime": 124.9127, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 6260 + }, + { + "epoch": 35.62, + "learning_rate": 6.439772727272727e-05, + "loss": 0.0, + "step": 6270 + }, + { + "epoch": 35.62, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.01636389084160328, + "eval_runtime": 125.035, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 6270 + }, + { + "epoch": 35.68, + "learning_rate": 6.43409090909091e-05, + "loss": 0.0007, + "step": 6280 + }, + { + "epoch": 35.68, + "eval_accuracy": 1.0, + "eval_loss": 9.598718861525413e-06, + "eval_runtime": 125.3792, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6280 + }, + { + "epoch": 35.74, + "learning_rate": 6.428409090909091e-05, + "loss": 0.0037, + "step": 6290 + }, + { + "epoch": 35.74, + "eval_accuracy": 0.9914772510528564, + "eval_loss": 0.07262156903743744, + "eval_runtime": 125.3168, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 6290 + }, + { + "epoch": 35.8, + "learning_rate": 6.422727272727272e-05, + "loss": 0.0, + "step": 6300 + }, + { + "epoch": 35.8, + "eval_accuracy": 1.0, + "eval_loss": 5.950304512225557e-06, + "eval_runtime": 125.383, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6300 + }, + { + "epoch": 35.85, + "learning_rate": 6.417045454545456e-05, + "loss": 0.0, + "step": 6310 + }, + { + "epoch": 35.85, + "eval_accuracy": 1.0, + "eval_loss": 6.149099590402329e-06, + "eval_runtime": 125.0026, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 6310 + }, + { + "epoch": 35.91, + "learning_rate": 6.411363636363636e-05, + "loss": 0.0, + "step": 6320 + }, + { + "epoch": 35.91, + "eval_accuracy": 1.0, + "eval_loss": 6.238845344341826e-06, + "eval_runtime": 125.3836, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6320 + }, + { + "epoch": 35.97, + "learning_rate": 6.405681818181819e-05, + "loss": 0.0, + "step": 6330 + }, + { + "epoch": 35.97, + "eval_accuracy": 1.0, + "eval_loss": 6.243925326998578e-06, + "eval_runtime": 125.7017, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 6330 + }, + { + "epoch": 36.02, + "learning_rate": 6.400000000000001e-05, + "loss": 0.0, + "step": 6340 + }, + { + "epoch": 36.02, + "eval_accuracy": 1.0, + "eval_loss": 6.22800826022285e-06, + "eval_runtime": 125.9446, + "eval_samples_per_second": 2.795, + "eval_steps_per_second": 0.699, + "step": 6340 + }, + { + "epoch": 36.08, + "learning_rate": 6.394318181818182e-05, + "loss": 0.0, + "step": 6350 + }, + { + "epoch": 36.08, + "eval_accuracy": 1.0, + "eval_loss": 6.186691280163359e-06, + "eval_runtime": 125.7245, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 6350 + }, + { + "epoch": 36.14, + "learning_rate": 6.388636363636365e-05, + "loss": 0.0, + "step": 6360 + }, + { + "epoch": 36.14, + "eval_accuracy": 1.0, + "eval_loss": 6.148083684820449e-06, + "eval_runtime": 125.2918, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 6360 + }, + { + "epoch": 36.19, + "learning_rate": 6.382954545454546e-05, + "loss": 0.0, + "step": 6370 + }, + { + "epoch": 36.19, + "eval_accuracy": 1.0, + "eval_loss": 6.103380201238906e-06, + "eval_runtime": 125.268, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 6370 + }, + { + "epoch": 36.25, + "learning_rate": 6.377272727272727e-05, + "loss": 0.0, + "step": 6380 + }, + { + "epoch": 36.25, + "eval_accuracy": 1.0, + "eval_loss": 6.067143203836167e-06, + "eval_runtime": 125.2618, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 6380 + }, + { + "epoch": 36.31, + "learning_rate": 6.37159090909091e-05, + "loss": 0.0, + "step": 6390 + }, + { + "epoch": 36.31, + "eval_accuracy": 1.0, + "eval_loss": 6.029890300851548e-06, + "eval_runtime": 124.9628, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 6390 + }, + { + "epoch": 36.36, + "learning_rate": 6.365909090909092e-05, + "loss": 0.0, + "step": 6400 + }, + { + "epoch": 36.36, + "eval_accuracy": 1.0, + "eval_loss": 5.991960279061459e-06, + "eval_runtime": 125.3995, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6400 + }, + { + "epoch": 36.42, + "learning_rate": 6.360227272727272e-05, + "loss": 0.0, + "step": 6410 + }, + { + "epoch": 36.42, + "eval_accuracy": 1.0, + "eval_loss": 5.958770998404361e-06, + "eval_runtime": 125.3195, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 6410 + }, + { + "epoch": 36.48, + "learning_rate": 6.354545454545455e-05, + "loss": 0.0, + "step": 6420 + }, + { + "epoch": 36.48, + "eval_accuracy": 1.0, + "eval_loss": 5.912374490435468e-06, + "eval_runtime": 126.1446, + "eval_samples_per_second": 2.79, + "eval_steps_per_second": 0.698, + "step": 6420 + }, + { + "epoch": 36.53, + "learning_rate": 6.348863636363637e-05, + "loss": 0.0, + "step": 6430 + }, + { + "epoch": 36.53, + "eval_accuracy": 1.0, + "eval_loss": 5.888668056286406e-06, + "eval_runtime": 125.6017, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 6430 + }, + { + "epoch": 36.59, + "learning_rate": 6.343181818181818e-05, + "loss": 0.0, + "step": 6440 + }, + { + "epoch": 36.59, + "eval_accuracy": 1.0, + "eval_loss": 5.8551404436002485e-06, + "eval_runtime": 125.5524, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 6440 + }, + { + "epoch": 36.65, + "learning_rate": 6.337500000000001e-05, + "loss": 0.0, + "step": 6450 + }, + { + "epoch": 36.65, + "eval_accuracy": 1.0, + "eval_loss": 5.816194061480928e-06, + "eval_runtime": 125.4871, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 6450 + }, + { + "epoch": 36.7, + "learning_rate": 6.331818181818183e-05, + "loss": 0.0, + "step": 6460 + }, + { + "epoch": 36.7, + "eval_accuracy": 1.0, + "eval_loss": 5.771490577899385e-06, + "eval_runtime": 125.5209, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 6460 + }, + { + "epoch": 36.76, + "learning_rate": 6.326136363636363e-05, + "loss": 0.0, + "step": 6470 + }, + { + "epoch": 36.76, + "eval_accuracy": 1.0, + "eval_loss": 5.733560556109296e-06, + "eval_runtime": 125.4408, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 6470 + }, + { + "epoch": 36.82, + "learning_rate": 6.320454545454546e-05, + "loss": 0.0, + "step": 6480 + }, + { + "epoch": 36.82, + "eval_accuracy": 1.0, + "eval_loss": 5.696307653124677e-06, + "eval_runtime": 125.7105, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 6480 + }, + { + "epoch": 36.88, + "learning_rate": 6.314772727272728e-05, + "loss": 0.0, + "step": 6490 + }, + { + "epoch": 36.88, + "eval_accuracy": 1.0, + "eval_loss": 5.6648118516022805e-06, + "eval_runtime": 125.2532, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 6490 + }, + { + "epoch": 36.93, + "learning_rate": 6.309090909090909e-05, + "loss": 0.0, + "step": 6500 + }, + { + "epoch": 36.93, + "eval_accuracy": 1.0, + "eval_loss": 5.621463060379028e-06, + "eval_runtime": 125.5657, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 6500 + }, + { + "epoch": 36.99, + "learning_rate": 6.303409090909092e-05, + "loss": 0.0, + "step": 6510 + }, + { + "epoch": 36.99, + "eval_accuracy": 1.0, + "eval_loss": 5.583871370617999e-06, + "eval_runtime": 125.4621, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 6510 + }, + { + "epoch": 37.05, + "learning_rate": 6.297727272727274e-05, + "loss": 0.0, + "step": 6520 + }, + { + "epoch": 37.05, + "eval_accuracy": 1.0, + "eval_loss": 5.555084953812184e-06, + "eval_runtime": 125.8203, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 6520 + }, + { + "epoch": 37.1, + "learning_rate": 6.292045454545454e-05, + "loss": 0.0, + "step": 6530 + }, + { + "epoch": 37.1, + "eval_accuracy": 1.0, + "eval_loss": 5.523928393813549e-06, + "eval_runtime": 125.7564, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 6530 + }, + { + "epoch": 37.16, + "learning_rate": 6.286363636363637e-05, + "loss": 0.0, + "step": 6540 + }, + { + "epoch": 37.16, + "eval_accuracy": 1.0, + "eval_loss": 5.48836851521628e-06, + "eval_runtime": 125.8626, + "eval_samples_per_second": 2.797, + "eval_steps_per_second": 0.699, + "step": 6540 + }, + { + "epoch": 37.22, + "learning_rate": 6.280681818181819e-05, + "loss": 0.0, + "step": 6550 + }, + { + "epoch": 37.22, + "eval_accuracy": 1.0, + "eval_loss": 5.4585661928285845e-06, + "eval_runtime": 125.7022, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 6550 + }, + { + "epoch": 37.27, + "learning_rate": 6.275e-05, + "loss": 0.0, + "step": 6560 + }, + { + "epoch": 37.27, + "eval_accuracy": 1.0, + "eval_loss": 5.417587999545503e-06, + "eval_runtime": 125.2556, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 6560 + }, + { + "epoch": 37.33, + "learning_rate": 6.269318181818183e-05, + "loss": 0.0, + "step": 6570 + }, + { + "epoch": 37.33, + "eval_accuracy": 1.0, + "eval_loss": 5.373900421545841e-06, + "eval_runtime": 125.5495, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 6570 + }, + { + "epoch": 37.39, + "learning_rate": 6.263636363636364e-05, + "loss": 0.0, + "step": 6580 + }, + { + "epoch": 37.39, + "eval_accuracy": 1.0, + "eval_loss": 5.288557531457627e-06, + "eval_runtime": 125.9146, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 6580 + }, + { + "epoch": 37.44, + "learning_rate": 6.257954545454546e-05, + "loss": 0.0, + "step": 6590 + }, + { + "epoch": 37.44, + "eval_accuracy": 1.0, + "eval_loss": 5.204230546951294e-06, + "eval_runtime": 125.4214, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6590 + }, + { + "epoch": 37.5, + "learning_rate": 6.252272727272728e-05, + "loss": 0.0, + "step": 6600 + }, + { + "epoch": 37.5, + "eval_accuracy": 1.0, + "eval_loss": 5.156817678653169e-06, + "eval_runtime": 125.6566, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 6600 + }, + { + "epoch": 37.56, + "learning_rate": 6.24659090909091e-05, + "loss": 0.0, + "step": 6610 + }, + { + "epoch": 37.56, + "eval_accuracy": 1.0, + "eval_loss": 5.1243059715488926e-06, + "eval_runtime": 124.9031, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 6610 + }, + { + "epoch": 37.61, + "learning_rate": 6.240909090909092e-05, + "loss": 0.0, + "step": 6620 + }, + { + "epoch": 37.61, + "eval_accuracy": 1.0, + "eval_loss": 5.0978906074306e-06, + "eval_runtime": 125.4774, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 6620 + }, + { + "epoch": 37.67, + "learning_rate": 6.235227272727273e-05, + "loss": 0.0, + "step": 6630 + }, + { + "epoch": 37.67, + "eval_accuracy": 1.0, + "eval_loss": 5.065717687102733e-06, + "eval_runtime": 125.8145, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 6630 + }, + { + "epoch": 37.73, + "learning_rate": 6.229545454545455e-05, + "loss": 0.0, + "step": 6640 + }, + { + "epoch": 37.73, + "eval_accuracy": 1.0, + "eval_loss": 5.038963081460679e-06, + "eval_runtime": 125.2539, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 6640 + }, + { + "epoch": 37.78, + "learning_rate": 6.223863636363637e-05, + "loss": 0.0, + "step": 6650 + }, + { + "epoch": 37.78, + "eval_accuracy": 1.0, + "eval_loss": 5.005435468774522e-06, + "eval_runtime": 125.1413, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 6650 + }, + { + "epoch": 37.84, + "learning_rate": 6.218181818181819e-05, + "loss": 0.0, + "step": 6660 + }, + { + "epoch": 37.84, + "eval_accuracy": 1.0, + "eval_loss": 4.969876044924604e-06, + "eval_runtime": 125.7743, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 6660 + }, + { + "epoch": 37.9, + "learning_rate": 6.2125e-05, + "loss": 0.0, + "step": 6670 + }, + { + "epoch": 37.9, + "eval_accuracy": 1.0, + "eval_loss": 4.9309296628052834e-06, + "eval_runtime": 125.3283, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 6670 + }, + { + "epoch": 37.95, + "learning_rate": 6.206818181818182e-05, + "loss": 0.0, + "step": 6680 + }, + { + "epoch": 37.95, + "eval_accuracy": 1.0, + "eval_loss": 4.90383672513417e-06, + "eval_runtime": 125.6514, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 6680 + }, + { + "epoch": 38.01, + "learning_rate": 6.201136363636364e-05, + "loss": 0.0, + "step": 6690 + }, + { + "epoch": 38.01, + "eval_accuracy": 1.0, + "eval_loss": 4.877420906268526e-06, + "eval_runtime": 125.0863, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 6690 + }, + { + "epoch": 38.07, + "learning_rate": 6.195454545454546e-05, + "loss": 0.0, + "step": 6700 + }, + { + "epoch": 38.07, + "eval_accuracy": 1.0, + "eval_loss": 4.837797405343736e-06, + "eval_runtime": 125.7754, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 6700 + }, + { + "epoch": 38.12, + "learning_rate": 6.189772727272728e-05, + "loss": 0.0, + "step": 6710 + }, + { + "epoch": 38.12, + "eval_accuracy": 1.0, + "eval_loss": 4.812397946807323e-06, + "eval_runtime": 125.4681, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 6710 + }, + { + "epoch": 38.18, + "learning_rate": 6.18409090909091e-05, + "loss": 0.0, + "step": 6720 + }, + { + "epoch": 38.18, + "eval_accuracy": 1.0, + "eval_loss": 4.781918050866807e-06, + "eval_runtime": 125.195, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 6720 + }, + { + "epoch": 38.24, + "learning_rate": 6.178409090909091e-05, + "loss": 0.0, + "step": 6730 + }, + { + "epoch": 38.24, + "eval_accuracy": 1.0, + "eval_loss": 4.756518592330394e-06, + "eval_runtime": 125.3461, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 6730 + }, + { + "epoch": 38.3, + "learning_rate": 6.172727272727273e-05, + "loss": 0.0, + "step": 6740 + }, + { + "epoch": 38.3, + "eval_accuracy": 1.0, + "eval_loss": 4.731457465823041e-06, + "eval_runtime": 125.4129, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6740 + }, + { + "epoch": 38.35, + "learning_rate": 6.167045454545455e-05, + "loss": 0.0, + "step": 6750 + }, + { + "epoch": 38.35, + "eval_accuracy": 1.0, + "eval_loss": 4.668465862778248e-06, + "eval_runtime": 125.554, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 6750 + }, + { + "epoch": 38.41, + "learning_rate": 6.161363636363637e-05, + "loss": 0.0, + "step": 6760 + }, + { + "epoch": 38.41, + "eval_accuracy": 1.0, + "eval_loss": 4.607168193615507e-06, + "eval_runtime": 125.5928, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 6760 + }, + { + "epoch": 38.47, + "learning_rate": 6.155681818181819e-05, + "loss": 0.0, + "step": 6770 + }, + { + "epoch": 38.47, + "eval_accuracy": 1.0, + "eval_loss": 4.5709311962127686e-06, + "eval_runtime": 125.9267, + "eval_samples_per_second": 2.795, + "eval_steps_per_second": 0.699, + "step": 6770 + }, + { + "epoch": 38.52, + "learning_rate": 6.15e-05, + "loss": 0.0, + "step": 6780 + }, + { + "epoch": 38.52, + "eval_accuracy": 1.0, + "eval_loss": 4.543838258541655e-06, + "eval_runtime": 125.4833, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 6780 + }, + { + "epoch": 38.58, + "learning_rate": 6.144318181818182e-05, + "loss": 0.0, + "step": 6790 + }, + { + "epoch": 38.58, + "eval_accuracy": 1.0, + "eval_loss": 4.519115918810712e-06, + "eval_runtime": 125.2731, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 6790 + }, + { + "epoch": 38.64, + "learning_rate": 6.138636363636364e-05, + "loss": 0.0, + "step": 6800 + }, + { + "epoch": 38.64, + "eval_accuracy": 1.0, + "eval_loss": 4.4950706978852395e-06, + "eval_runtime": 125.7507, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 6800 + }, + { + "epoch": 38.69, + "learning_rate": 6.132954545454546e-05, + "loss": 0.0, + "step": 6810 + }, + { + "epoch": 38.69, + "eval_accuracy": 1.0, + "eval_loss": 4.47644424639293e-06, + "eval_runtime": 125.4019, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6810 + }, + { + "epoch": 38.75, + "learning_rate": 6.127272727272728e-05, + "loss": 0.0, + "step": 6820 + }, + { + "epoch": 38.75, + "eval_accuracy": 1.0, + "eval_loss": 4.460527179617202e-06, + "eval_runtime": 125.554, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 6820 + }, + { + "epoch": 38.81, + "learning_rate": 6.12159090909091e-05, + "loss": 0.0, + "step": 6830 + }, + { + "epoch": 38.81, + "eval_accuracy": 1.0, + "eval_loss": 4.435804839886259e-06, + "eval_runtime": 125.2249, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 6830 + }, + { + "epoch": 38.86, + "learning_rate": 6.115909090909091e-05, + "loss": 0.0, + "step": 6840 + }, + { + "epoch": 38.86, + "eval_accuracy": 1.0, + "eval_loss": 4.411420832184376e-06, + "eval_runtime": 125.528, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 6840 + }, + { + "epoch": 38.92, + "learning_rate": 6.110227272727273e-05, + "loss": 0.0, + "step": 6850 + }, + { + "epoch": 38.92, + "eval_accuracy": 1.0, + "eval_loss": 4.381957296573091e-06, + "eval_runtime": 125.276, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 6850 + }, + { + "epoch": 38.98, + "learning_rate": 6.104545454545455e-05, + "loss": 0.0, + "step": 6860 + }, + { + "epoch": 38.98, + "eval_accuracy": 1.0, + "eval_loss": 4.343349701230181e-06, + "eval_runtime": 125.4622, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 6860 + }, + { + "epoch": 39.03, + "learning_rate": 6.0988636363636366e-05, + "loss": 0.0, + "step": 6870 + }, + { + "epoch": 39.03, + "eval_accuracy": 1.0, + "eval_loss": 4.31930493505206e-06, + "eval_runtime": 125.4155, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6870 + }, + { + "epoch": 39.09, + "learning_rate": 6.093181818181819e-05, + "loss": 0.0, + "step": 6880 + }, + { + "epoch": 39.09, + "eval_accuracy": 1.0, + "eval_loss": 4.305080892663682e-06, + "eval_runtime": 125.294, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 6880 + }, + { + "epoch": 39.15, + "learning_rate": 6.0875e-05, + "loss": 0.0, + "step": 6890 + }, + { + "epoch": 39.15, + "eval_accuracy": 1.0, + "eval_loss": 4.284760962036671e-06, + "eval_runtime": 126.0955, + "eval_samples_per_second": 2.792, + "eval_steps_per_second": 0.698, + "step": 6890 + }, + { + "epoch": 39.2, + "learning_rate": 6.081818181818182e-05, + "loss": 0.0, + "step": 6900 + }, + { + "epoch": 39.2, + "eval_accuracy": 1.0, + "eval_loss": 4.268166776455473e-06, + "eval_runtime": 125.3896, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 6900 + }, + { + "epoch": 39.26, + "learning_rate": 6.0761363636363645e-05, + "loss": 0.0, + "step": 6910 + }, + { + "epoch": 39.26, + "eval_accuracy": 1.0, + "eval_loss": 4.249879111739574e-06, + "eval_runtime": 125.1118, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 6910 + }, + { + "epoch": 39.32, + "learning_rate": 6.0704545454545457e-05, + "loss": 0.0, + "step": 6920 + }, + { + "epoch": 39.32, + "eval_accuracy": 1.0, + "eval_loss": 4.233284471411025e-06, + "eval_runtime": 125.3105, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 6920 + }, + { + "epoch": 39.38, + "learning_rate": 6.0647727272727275e-05, + "loss": 0.0, + "step": 6930 + }, + { + "epoch": 39.38, + "eval_accuracy": 1.0, + "eval_loss": 4.215673925500596e-06, + "eval_runtime": 125.1369, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 6930 + }, + { + "epoch": 39.43, + "learning_rate": 6.05909090909091e-05, + "loss": 0.0, + "step": 6940 + }, + { + "epoch": 39.43, + "eval_accuracy": 1.0, + "eval_loss": 4.197725047561107e-06, + "eval_runtime": 125.7828, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.7, + "step": 6940 + }, + { + "epoch": 39.49, + "learning_rate": 6.053409090909091e-05, + "loss": 0.0, + "step": 6950 + }, + { + "epoch": 39.49, + "eval_accuracy": 1.0, + "eval_loss": 4.187903414276661e-06, + "eval_runtime": 125.494, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 6950 + }, + { + "epoch": 39.55, + "learning_rate": 6.047727272727273e-05, + "loss": 0.0, + "step": 6960 + }, + { + "epoch": 39.55, + "eval_accuracy": 1.0, + "eval_loss": 4.169954536337173e-06, + "eval_runtime": 125.4308, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 6960 + }, + { + "epoch": 39.6, + "learning_rate": 6.0420454545454553e-05, + "loss": 0.0, + "step": 6970 + }, + { + "epoch": 39.6, + "eval_accuracy": 1.0, + "eval_loss": 4.156069280725205e-06, + "eval_runtime": 125.0397, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 6970 + }, + { + "epoch": 39.66, + "learning_rate": 6.0363636363636365e-05, + "loss": 0.0, + "step": 6980 + }, + { + "epoch": 39.66, + "eval_accuracy": 1.0, + "eval_loss": 4.127960437472211e-06, + "eval_runtime": 124.782, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 6980 + }, + { + "epoch": 39.72, + "learning_rate": 6.030681818181818e-05, + "loss": 0.0931, + "step": 6990 + }, + { + "epoch": 39.72, + "eval_accuracy": 1.0, + "eval_loss": 4.5292758841242176e-06, + "eval_runtime": 124.9031, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 6990 + }, + { + "epoch": 39.77, + "learning_rate": 6.025000000000001e-05, + "loss": 0.0, + "step": 7000 + }, + { + "epoch": 39.77, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.06971220672130585, + "eval_runtime": 125.1319, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7000 + }, + { + "epoch": 39.83, + "learning_rate": 6.019886363636363e-05, + "loss": 0.1497, + "step": 7010 + }, + { + "epoch": 39.83, + "eval_accuracy": 0.9801136255264282, + "eval_loss": 0.24045677483081818, + "eval_runtime": 124.9307, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 7010 + }, + { + "epoch": 39.89, + "learning_rate": 6.0142045454545456e-05, + "loss": 0.1894, + "step": 7020 + }, + { + "epoch": 39.89, + "eval_accuracy": 1.0, + "eval_loss": 3.619729250203818e-05, + "eval_runtime": 125.2819, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 7020 + }, + { + "epoch": 39.94, + "learning_rate": 6.0085227272727274e-05, + "loss": 0.0001, + "step": 7030 + }, + { + "epoch": 39.94, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.02549550123512745, + "eval_runtime": 125.2288, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 7030 + }, + { + "epoch": 40.0, + "learning_rate": 6.00284090909091e-05, + "loss": 0.0003, + "step": 7040 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9886363744735718, + "eval_loss": 0.07287121564149857, + "eval_runtime": 124.7788, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 7040 + }, + { + "epoch": 40.06, + "learning_rate": 5.997159090909091e-05, + "loss": 0.0003, + "step": 7050 + }, + { + "epoch": 40.06, + "eval_accuracy": 0.9857954382896423, + "eval_loss": 0.10117223113775253, + "eval_runtime": 124.8236, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 7050 + }, + { + "epoch": 40.11, + "learning_rate": 5.991477272727273e-05, + "loss": 0.1786, + "step": 7060 + }, + { + "epoch": 40.11, + "eval_accuracy": 0.9886363744735718, + "eval_loss": 0.042395737022161484, + "eval_runtime": 124.9496, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 7060 + }, + { + "epoch": 40.17, + "learning_rate": 5.985795454545455e-05, + "loss": 0.0058, + "step": 7070 + }, + { + "epoch": 40.17, + "eval_accuracy": 1.0, + "eval_loss": 0.0029279410373419523, + "eval_runtime": 125.5422, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 7070 + }, + { + "epoch": 40.23, + "learning_rate": 5.9801136363636365e-05, + "loss": 0.0018, + "step": 7080 + }, + { + "epoch": 40.23, + "eval_accuracy": 1.0, + "eval_loss": 0.0004978247452527285, + "eval_runtime": 124.8594, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 7080 + }, + { + "epoch": 40.28, + "learning_rate": 5.974431818181818e-05, + "loss": 0.0006, + "step": 7090 + }, + { + "epoch": 40.28, + "eval_accuracy": 1.0, + "eval_loss": 0.00016718222468625754, + "eval_runtime": 125.0547, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 7090 + }, + { + "epoch": 40.34, + "learning_rate": 5.968750000000001e-05, + "loss": 0.0001, + "step": 7100 + }, + { + "epoch": 40.34, + "eval_accuracy": 1.0, + "eval_loss": 0.00010889023542404175, + "eval_runtime": 125.2189, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 7100 + }, + { + "epoch": 40.4, + "learning_rate": 5.963068181818182e-05, + "loss": 0.0001, + "step": 7110 + }, + { + "epoch": 40.4, + "eval_accuracy": 1.0, + "eval_loss": 8.916109800338745e-05, + "eval_runtime": 125.1063, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 7110 + }, + { + "epoch": 40.45, + "learning_rate": 5.957386363636364e-05, + "loss": 0.0001, + "step": 7120 + }, + { + "epoch": 40.45, + "eval_accuracy": 1.0, + "eval_loss": 7.886067032814026e-05, + "eval_runtime": 125.2639, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 7120 + }, + { + "epoch": 40.51, + "learning_rate": 5.951704545454546e-05, + "loss": 0.0001, + "step": 7130 + }, + { + "epoch": 40.51, + "eval_accuracy": 1.0, + "eval_loss": 7.29750536265783e-05, + "eval_runtime": 124.9752, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 7130 + }, + { + "epoch": 40.57, + "learning_rate": 5.946022727272727e-05, + "loss": 0.0001, + "step": 7140 + }, + { + "epoch": 40.57, + "eval_accuracy": 1.0, + "eval_loss": 6.818433030275628e-05, + "eval_runtime": 1583.1593, + "eval_samples_per_second": 0.222, + "eval_steps_per_second": 0.056, + "step": 7140 + }, + { + "epoch": 40.62, + "learning_rate": 5.940340909090909e-05, + "loss": 0.0001, + "step": 7150 + }, + { + "epoch": 40.62, + "eval_accuracy": 1.0, + "eval_loss": 6.383217260008678e-05, + "eval_runtime": 125.3148, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 7150 + }, + { + "epoch": 40.68, + "learning_rate": 5.9346590909090916e-05, + "loss": 0.0001, + "step": 7160 + }, + { + "epoch": 40.68, + "eval_accuracy": 1.0, + "eval_loss": 5.9960239013889804e-05, + "eval_runtime": 125.4403, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 7160 + }, + { + "epoch": 40.74, + "learning_rate": 5.928977272727273e-05, + "loss": 0.0001, + "step": 7170 + }, + { + "epoch": 40.74, + "eval_accuracy": 1.0, + "eval_loss": 5.706264346372336e-05, + "eval_runtime": 125.1124, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7170 + }, + { + "epoch": 40.8, + "learning_rate": 5.9232954545454545e-05, + "loss": 0.0001, + "step": 7180 + }, + { + "epoch": 40.8, + "eval_accuracy": 1.0, + "eval_loss": 5.438788502942771e-05, + "eval_runtime": 125.0946, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 7180 + }, + { + "epoch": 40.85, + "learning_rate": 5.917613636363637e-05, + "loss": 0.0001, + "step": 7190 + }, + { + "epoch": 40.85, + "eval_accuracy": 1.0, + "eval_loss": 5.215880446485244e-05, + "eval_runtime": 125.4686, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 7190 + }, + { + "epoch": 40.91, + "learning_rate": 5.911931818181818e-05, + "loss": 0.0, + "step": 7200 + }, + { + "epoch": 40.91, + "eval_accuracy": 1.0, + "eval_loss": 5.0537626520963386e-05, + "eval_runtime": 125.2141, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 7200 + }, + { + "epoch": 40.97, + "learning_rate": 5.90625e-05, + "loss": 0.0566, + "step": 7210 + }, + { + "epoch": 40.97, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02528224140405655, + "eval_runtime": 125.2069, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 7210 + }, + { + "epoch": 41.02, + "learning_rate": 5.9005681818181824e-05, + "loss": 0.1135, + "step": 7220 + }, + { + "epoch": 41.02, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.028770500794053078, + "eval_runtime": 125.3885, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 7220 + }, + { + "epoch": 41.08, + "learning_rate": 5.8948863636363635e-05, + "loss": 0.0002, + "step": 7230 + }, + { + "epoch": 41.08, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.025004040449857712, + "eval_runtime": 125.2085, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 7230 + }, + { + "epoch": 41.14, + "learning_rate": 5.889204545454545e-05, + "loss": 0.0787, + "step": 7240 + }, + { + "epoch": 41.14, + "eval_accuracy": 1.0, + "eval_loss": 0.009448859840631485, + "eval_runtime": 125.3403, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 7240 + }, + { + "epoch": 41.19, + "learning_rate": 5.883522727272728e-05, + "loss": 0.0039, + "step": 7250 + }, + { + "epoch": 41.19, + "eval_accuracy": 1.0, + "eval_loss": 0.00023394246818497777, + "eval_runtime": 125.1978, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 7250 + }, + { + "epoch": 41.25, + "learning_rate": 5.877840909090909e-05, + "loss": 0.0003, + "step": 7260 + }, + { + "epoch": 41.25, + "eval_accuracy": 1.0, + "eval_loss": 0.00020482559921219945, + "eval_runtime": 125.4808, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 7260 + }, + { + "epoch": 41.31, + "learning_rate": 5.872159090909091e-05, + "loss": 0.0002, + "step": 7270 + }, + { + "epoch": 41.31, + "eval_accuracy": 1.0, + "eval_loss": 0.00018427317263558507, + "eval_runtime": 125.3473, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 7270 + }, + { + "epoch": 41.36, + "learning_rate": 5.866477272727273e-05, + "loss": 0.0002, + "step": 7280 + }, + { + "epoch": 41.36, + "eval_accuracy": 1.0, + "eval_loss": 0.00014931200712453574, + "eval_runtime": 125.1325, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7280 + }, + { + "epoch": 41.42, + "learning_rate": 5.860795454545454e-05, + "loss": 0.0002, + "step": 7290 + }, + { + "epoch": 41.42, + "eval_accuracy": 1.0, + "eval_loss": 0.000118328767712228, + "eval_runtime": 125.5272, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 7290 + }, + { + "epoch": 41.48, + "learning_rate": 5.8551136363636375e-05, + "loss": 0.0001, + "step": 7300 + }, + { + "epoch": 41.48, + "eval_accuracy": 1.0, + "eval_loss": 9.786879672901705e-05, + "eval_runtime": 125.234, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 7300 + }, + { + "epoch": 41.53, + "learning_rate": 5.8494318181818186e-05, + "loss": 0.0001, + "step": 7310 + }, + { + "epoch": 41.53, + "eval_accuracy": 1.0, + "eval_loss": 8.637288556201383e-05, + "eval_runtime": 125.5108, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 7310 + }, + { + "epoch": 41.59, + "learning_rate": 5.84375e-05, + "loss": 0.0001, + "step": 7320 + }, + { + "epoch": 41.59, + "eval_accuracy": 1.0, + "eval_loss": 7.888674736022949e-05, + "eval_runtime": 125.3801, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 7320 + }, + { + "epoch": 41.65, + "learning_rate": 5.838068181818183e-05, + "loss": 0.0001, + "step": 7330 + }, + { + "epoch": 41.65, + "eval_accuracy": 1.0, + "eval_loss": 7.27522128727287e-05, + "eval_runtime": 125.0592, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 7330 + }, + { + "epoch": 41.7, + "learning_rate": 5.832386363636364e-05, + "loss": 0.0001, + "step": 7340 + }, + { + "epoch": 41.7, + "eval_accuracy": 1.0, + "eval_loss": 6.797130481572822e-05, + "eval_runtime": 125.1611, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 7340 + }, + { + "epoch": 41.76, + "learning_rate": 5.826704545454545e-05, + "loss": 0.0001, + "step": 7350 + }, + { + "epoch": 41.76, + "eval_accuracy": 1.0, + "eval_loss": 6.371566996676847e-05, + "eval_runtime": 124.813, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 7350 + }, + { + "epoch": 41.82, + "learning_rate": 5.821022727272728e-05, + "loss": 0.0001, + "step": 7360 + }, + { + "epoch": 41.82, + "eval_accuracy": 1.0, + "eval_loss": 6.009536809870042e-05, + "eval_runtime": 124.76, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 7360 + }, + { + "epoch": 41.88, + "learning_rate": 5.8153409090909094e-05, + "loss": 0.0001, + "step": 7370 + }, + { + "epoch": 41.88, + "eval_accuracy": 1.0, + "eval_loss": 5.722113564843312e-05, + "eval_runtime": 125.603, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 7370 + }, + { + "epoch": 41.93, + "learning_rate": 5.8096590909090906e-05, + "loss": 0.0001, + "step": 7380 + }, + { + "epoch": 41.93, + "eval_accuracy": 1.0, + "eval_loss": 5.4739415645599365e-05, + "eval_runtime": 125.1991, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 7380 + }, + { + "epoch": 41.99, + "learning_rate": 5.803977272727274e-05, + "loss": 0.0001, + "step": 7390 + }, + { + "epoch": 41.99, + "eval_accuracy": 1.0, + "eval_loss": 5.317344039212912e-05, + "eval_runtime": 124.9423, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 7390 + }, + { + "epoch": 42.05, + "learning_rate": 5.798295454545455e-05, + "loss": 0.0001, + "step": 7400 + }, + { + "epoch": 42.05, + "eval_accuracy": 1.0, + "eval_loss": 5.167011477169581e-05, + "eval_runtime": 124.7907, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 7400 + }, + { + "epoch": 42.1, + "learning_rate": 5.792613636363636e-05, + "loss": 0.0001, + "step": 7410 + }, + { + "epoch": 42.1, + "eval_accuracy": 1.0, + "eval_loss": 5.048107050242834e-05, + "eval_runtime": 125.1549, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7410 + }, + { + "epoch": 42.16, + "learning_rate": 5.786931818181819e-05, + "loss": 0.0001, + "step": 7420 + }, + { + "epoch": 42.16, + "eval_accuracy": 1.0, + "eval_loss": 4.8980455176206306e-05, + "eval_runtime": 124.8207, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 7420 + }, + { + "epoch": 42.22, + "learning_rate": 5.78125e-05, + "loss": 0.0001, + "step": 7430 + }, + { + "epoch": 42.22, + "eval_accuracy": 1.0, + "eval_loss": 4.713813177659176e-05, + "eval_runtime": 124.638, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 7430 + }, + { + "epoch": 42.27, + "learning_rate": 5.7755681818181814e-05, + "loss": 0.0001, + "step": 7440 + }, + { + "epoch": 42.27, + "eval_accuracy": 1.0, + "eval_loss": 4.560568049782887e-05, + "eval_runtime": 124.7752, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 7440 + }, + { + "epoch": 42.33, + "learning_rate": 5.7698863636363645e-05, + "loss": 0.0001, + "step": 7450 + }, + { + "epoch": 42.33, + "eval_accuracy": 1.0, + "eval_loss": 4.4262207666179165e-05, + "eval_runtime": 124.8214, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 7450 + }, + { + "epoch": 42.39, + "learning_rate": 5.764204545454546e-05, + "loss": 0.0001, + "step": 7460 + }, + { + "epoch": 42.39, + "eval_accuracy": 1.0, + "eval_loss": 4.272061414667405e-05, + "eval_runtime": 125.2794, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 7460 + }, + { + "epoch": 42.44, + "learning_rate": 5.758522727272727e-05, + "loss": 0.0, + "step": 7470 + }, + { + "epoch": 42.44, + "eval_accuracy": 1.0, + "eval_loss": 4.103678293176927e-05, + "eval_runtime": 124.952, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 7470 + }, + { + "epoch": 42.5, + "learning_rate": 5.75284090909091e-05, + "loss": 0.0, + "step": 7480 + }, + { + "epoch": 42.5, + "eval_accuracy": 1.0, + "eval_loss": 3.96350551454816e-05, + "eval_runtime": 124.9746, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 7480 + }, + { + "epoch": 42.56, + "learning_rate": 5.747159090909091e-05, + "loss": 0.0, + "step": 7490 + }, + { + "epoch": 42.56, + "eval_accuracy": 1.0, + "eval_loss": 3.812665454461239e-05, + "eval_runtime": 124.8406, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 7490 + }, + { + "epoch": 42.61, + "learning_rate": 5.741477272727272e-05, + "loss": 0.0, + "step": 7500 + }, + { + "epoch": 42.61, + "eval_accuracy": 1.0, + "eval_loss": 3.6983667087042704e-05, + "eval_runtime": 125.2871, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 7500 + }, + { + "epoch": 42.67, + "learning_rate": 5.7357954545454554e-05, + "loss": 0.0001, + "step": 7510 + }, + { + "epoch": 42.67, + "eval_accuracy": 1.0, + "eval_loss": 3.637508780229837e-05, + "eval_runtime": 125.0412, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 7510 + }, + { + "epoch": 42.73, + "learning_rate": 5.7301136363636365e-05, + "loss": 0.0, + "step": 7520 + }, + { + "epoch": 42.73, + "eval_accuracy": 1.0, + "eval_loss": 3.573399953893386e-05, + "eval_runtime": 124.95, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 7520 + }, + { + "epoch": 42.78, + "learning_rate": 5.724431818181818e-05, + "loss": 0.0, + "step": 7530 + }, + { + "epoch": 42.78, + "eval_accuracy": 1.0, + "eval_loss": 3.5352666600374505e-05, + "eval_runtime": 124.9785, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 7530 + }, + { + "epoch": 42.84, + "learning_rate": 5.718750000000001e-05, + "loss": 0.0, + "step": 7540 + }, + { + "epoch": 42.84, + "eval_accuracy": 1.0, + "eval_loss": 3.4850090742111206e-05, + "eval_runtime": 124.8259, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 7540 + }, + { + "epoch": 42.9, + "learning_rate": 5.713068181818182e-05, + "loss": 0.0815, + "step": 7550 + }, + { + "epoch": 42.9, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.01628447137773037, + "eval_runtime": 125.0588, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 7550 + }, + { + "epoch": 42.95, + "learning_rate": 5.707386363636364e-05, + "loss": 0.0001, + "step": 7560 + }, + { + "epoch": 42.95, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.023417841643095016, + "eval_runtime": 124.8162, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 7560 + }, + { + "epoch": 43.01, + "learning_rate": 5.701704545454546e-05, + "loss": 0.0001, + "step": 7570 + }, + { + "epoch": 43.01, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.024348806589841843, + "eval_runtime": 125.5065, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 7570 + }, + { + "epoch": 43.07, + "learning_rate": 5.696022727272727e-05, + "loss": 0.0002, + "step": 7580 + }, + { + "epoch": 43.07, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.024346329271793365, + "eval_runtime": 125.1446, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7580 + }, + { + "epoch": 43.12, + "learning_rate": 5.69034090909091e-05, + "loss": 0.0001, + "step": 7590 + }, + { + "epoch": 43.12, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02429196424782276, + "eval_runtime": 125.1988, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 7590 + }, + { + "epoch": 43.18, + "learning_rate": 5.6846590909090916e-05, + "loss": 0.0001, + "step": 7600 + }, + { + "epoch": 43.18, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.024270594120025635, + "eval_runtime": 125.1161, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7600 + }, + { + "epoch": 43.24, + "learning_rate": 5.678977272727273e-05, + "loss": 0.0001, + "step": 7610 + }, + { + "epoch": 43.24, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.023876434192061424, + "eval_runtime": 125.4831, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 7610 + }, + { + "epoch": 43.3, + "learning_rate": 5.673295454545455e-05, + "loss": 0.0001, + "step": 7620 + }, + { + "epoch": 43.3, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.023419933393597603, + "eval_runtime": 125.0431, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 7620 + }, + { + "epoch": 43.35, + "learning_rate": 5.667613636363637e-05, + "loss": 0.0001, + "step": 7630 + }, + { + "epoch": 43.35, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.023300619795918465, + "eval_runtime": 125.0077, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 7630 + }, + { + "epoch": 43.41, + "learning_rate": 5.661931818181818e-05, + "loss": 0.0001, + "step": 7640 + }, + { + "epoch": 43.41, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.023205911740660667, + "eval_runtime": 125.1987, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 7640 + }, + { + "epoch": 43.47, + "learning_rate": 5.6562500000000006e-05, + "loss": 0.0001, + "step": 7650 + }, + { + "epoch": 43.47, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.023089254274964333, + "eval_runtime": 125.1345, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7650 + }, + { + "epoch": 43.52, + "learning_rate": 5.6505681818181824e-05, + "loss": 0.0001, + "step": 7660 + }, + { + "epoch": 43.52, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02297447808086872, + "eval_runtime": 125.7047, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 7660 + }, + { + "epoch": 43.58, + "learning_rate": 5.6448863636363635e-05, + "loss": 0.0001, + "step": 7670 + }, + { + "epoch": 43.58, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022949684411287308, + "eval_runtime": 125.0415, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 7670 + }, + { + "epoch": 43.64, + "learning_rate": 5.639204545454546e-05, + "loss": 0.0001, + "step": 7680 + }, + { + "epoch": 43.64, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022858820855617523, + "eval_runtime": 125.515, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 7680 + }, + { + "epoch": 43.69, + "learning_rate": 5.633522727272728e-05, + "loss": 0.0001, + "step": 7690 + }, + { + "epoch": 43.69, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02275734581053257, + "eval_runtime": 124.8292, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 7690 + }, + { + "epoch": 43.75, + "learning_rate": 5.627840909090909e-05, + "loss": 0.0001, + "step": 7700 + }, + { + "epoch": 43.75, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022678004577755928, + "eval_runtime": 124.9402, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 7700 + }, + { + "epoch": 43.81, + "learning_rate": 5.6221590909090914e-05, + "loss": 0.0001, + "step": 7710 + }, + { + "epoch": 43.81, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02262079156935215, + "eval_runtime": 125.8137, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 7710 + }, + { + "epoch": 43.86, + "learning_rate": 5.616477272727273e-05, + "loss": 0.0001, + "step": 7720 + }, + { + "epoch": 43.86, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02267303317785263, + "eval_runtime": 125.2138, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 7720 + }, + { + "epoch": 43.92, + "learning_rate": 5.6107954545454544e-05, + "loss": 0.0, + "step": 7730 + }, + { + "epoch": 43.92, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02270560897886753, + "eval_runtime": 125.3306, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 7730 + }, + { + "epoch": 43.98, + "learning_rate": 5.605113636363637e-05, + "loss": 0.0001, + "step": 7740 + }, + { + "epoch": 43.98, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022748924791812897, + "eval_runtime": 125.4791, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 7740 + }, + { + "epoch": 44.03, + "learning_rate": 5.5994318181818186e-05, + "loss": 0.0, + "step": 7750 + }, + { + "epoch": 44.03, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022692037746310234, + "eval_runtime": 125.086, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 7750 + }, + { + "epoch": 44.09, + "learning_rate": 5.59375e-05, + "loss": 0.0, + "step": 7760 + }, + { + "epoch": 44.09, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02253543771803379, + "eval_runtime": 125.022, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 7760 + }, + { + "epoch": 44.15, + "learning_rate": 5.588068181818182e-05, + "loss": 0.0, + "step": 7770 + }, + { + "epoch": 44.15, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022412054240703583, + "eval_runtime": 125.0881, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 7770 + }, + { + "epoch": 44.2, + "learning_rate": 5.582386363636364e-05, + "loss": 0.0, + "step": 7780 + }, + { + "epoch": 44.2, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022355616092681885, + "eval_runtime": 125.3785, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 7780 + }, + { + "epoch": 44.26, + "learning_rate": 5.576704545454545e-05, + "loss": 0.0, + "step": 7790 + }, + { + "epoch": 44.26, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02235487662255764, + "eval_runtime": 124.9176, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 7790 + }, + { + "epoch": 44.32, + "learning_rate": 5.5710227272727277e-05, + "loss": 0.0, + "step": 7800 + }, + { + "epoch": 44.32, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022343160584568977, + "eval_runtime": 125.3059, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 7800 + }, + { + "epoch": 44.38, + "learning_rate": 5.5653409090909095e-05, + "loss": 0.0, + "step": 7810 + }, + { + "epoch": 44.38, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022342540323734283, + "eval_runtime": 124.9989, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 7810 + }, + { + "epoch": 44.43, + "learning_rate": 5.5596590909090906e-05, + "loss": 0.0, + "step": 7820 + }, + { + "epoch": 44.43, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.022375257685780525, + "eval_runtime": 124.7904, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 7820 + }, + { + "epoch": 44.49, + "learning_rate": 5.553977272727273e-05, + "loss": 0.0, + "step": 7830 + }, + { + "epoch": 44.49, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02236352674663067, + "eval_runtime": 125.1174, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7830 + }, + { + "epoch": 44.55, + "learning_rate": 5.548295454545455e-05, + "loss": 0.0, + "step": 7840 + }, + { + "epoch": 44.55, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02237412892282009, + "eval_runtime": 124.7721, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 7840 + }, + { + "epoch": 44.6, + "learning_rate": 5.5426136363636373e-05, + "loss": 0.0048, + "step": 7850 + }, + { + "epoch": 44.6, + "eval_accuracy": 1.0, + "eval_loss": 3.12131924147252e-05, + "eval_runtime": 125.2517, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 7850 + }, + { + "epoch": 44.66, + "learning_rate": 5.5369318181818185e-05, + "loss": 0.0, + "step": 7860 + }, + { + "epoch": 44.66, + "eval_accuracy": 1.0, + "eval_loss": 3.338164970045909e-05, + "eval_runtime": 124.6681, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 7860 + }, + { + "epoch": 44.72, + "learning_rate": 5.53125e-05, + "loss": 0.0, + "step": 7870 + }, + { + "epoch": 44.72, + "eval_accuracy": 1.0, + "eval_loss": 3.153018042212352e-05, + "eval_runtime": 125.8452, + "eval_samples_per_second": 2.797, + "eval_steps_per_second": 0.699, + "step": 7870 + }, + { + "epoch": 44.77, + "learning_rate": 5.525568181818183e-05, + "loss": 0.0, + "step": 7880 + }, + { + "epoch": 44.77, + "eval_accuracy": 1.0, + "eval_loss": 2.977726217068266e-05, + "eval_runtime": 125.4609, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 7880 + }, + { + "epoch": 44.83, + "learning_rate": 5.519886363636364e-05, + "loss": 0.0, + "step": 7890 + }, + { + "epoch": 44.83, + "eval_accuracy": 1.0, + "eval_loss": 2.783164381980896e-05, + "eval_runtime": 125.19, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 7890 + }, + { + "epoch": 44.89, + "learning_rate": 5.514204545454546e-05, + "loss": 0.0, + "step": 7900 + }, + { + "epoch": 44.89, + "eval_accuracy": 1.0, + "eval_loss": 2.5875866413116455e-05, + "eval_runtime": 125.0308, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 7900 + }, + { + "epoch": 44.94, + "learning_rate": 5.508522727272728e-05, + "loss": 0.0, + "step": 7910 + }, + { + "epoch": 44.94, + "eval_accuracy": 1.0, + "eval_loss": 2.446330472594127e-05, + "eval_runtime": 125.1854, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 7910 + }, + { + "epoch": 45.0, + "learning_rate": 5.502840909090909e-05, + "loss": 0.0, + "step": 7920 + }, + { + "epoch": 45.0, + "eval_accuracy": 1.0, + "eval_loss": 2.360750295338221e-05, + "eval_runtime": 125.1624, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 7920 + }, + { + "epoch": 45.06, + "learning_rate": 5.497159090909091e-05, + "loss": 0.0, + "step": 7930 + }, + { + "epoch": 45.06, + "eval_accuracy": 1.0, + "eval_loss": 2.290918018843513e-05, + "eval_runtime": 125.1511, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7930 + }, + { + "epoch": 45.11, + "learning_rate": 5.4914772727272736e-05, + "loss": 0.0, + "step": 7940 + }, + { + "epoch": 45.11, + "eval_accuracy": 1.0, + "eval_loss": 2.2121450456324965e-05, + "eval_runtime": 125.6274, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 7940 + }, + { + "epoch": 45.17, + "learning_rate": 5.485795454545455e-05, + "loss": 0.0, + "step": 7950 + }, + { + "epoch": 45.17, + "eval_accuracy": 1.0, + "eval_loss": 2.154098365281243e-05, + "eval_runtime": 125.2157, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 7950 + }, + { + "epoch": 45.23, + "learning_rate": 5.4801136363636365e-05, + "loss": 0.0, + "step": 7960 + }, + { + "epoch": 45.23, + "eval_accuracy": 1.0, + "eval_loss": 2.0835886971326545e-05, + "eval_runtime": 124.9553, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 7960 + }, + { + "epoch": 45.28, + "learning_rate": 5.474431818181819e-05, + "loss": 0.0, + "step": 7970 + }, + { + "epoch": 45.28, + "eval_accuracy": 1.0, + "eval_loss": 2.0325860532466322e-05, + "eval_runtime": 124.7069, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 7970 + }, + { + "epoch": 45.34, + "learning_rate": 5.46875e-05, + "loss": 0.0, + "step": 7980 + }, + { + "epoch": 45.34, + "eval_accuracy": 1.0, + "eval_loss": 1.9825318304356188e-05, + "eval_runtime": 124.7933, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 7980 + }, + { + "epoch": 45.4, + "learning_rate": 5.463068181818182e-05, + "loss": 0.0, + "step": 7990 + }, + { + "epoch": 45.4, + "eval_accuracy": 1.0, + "eval_loss": 1.932477425725665e-05, + "eval_runtime": 125.1307, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 7990 + }, + { + "epoch": 45.45, + "learning_rate": 5.4573863636363644e-05, + "loss": 0.0, + "step": 8000 + }, + { + "epoch": 45.45, + "eval_accuracy": 1.0, + "eval_loss": 1.880424861155916e-05, + "eval_runtime": 125.0231, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 8000 + }, + { + "epoch": 45.51, + "learning_rate": 5.4517045454545455e-05, + "loss": 0.0, + "step": 8010 + }, + { + "epoch": 45.51, + "eval_accuracy": 1.0, + "eval_loss": 1.8398532120045274e-05, + "eval_runtime": 124.8611, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 8010 + }, + { + "epoch": 45.57, + "learning_rate": 5.446022727272727e-05, + "loss": 0.0, + "step": 8020 + }, + { + "epoch": 45.57, + "eval_accuracy": 1.0, + "eval_loss": 1.81814484676579e-05, + "eval_runtime": 124.8348, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 8020 + }, + { + "epoch": 45.62, + "learning_rate": 5.44034090909091e-05, + "loss": 0.0, + "step": 8030 + }, + { + "epoch": 45.62, + "eval_accuracy": 1.0, + "eval_loss": 1.8309463484911248e-05, + "eval_runtime": 124.7126, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 8030 + }, + { + "epoch": 45.68, + "learning_rate": 5.434659090909091e-05, + "loss": 0.0, + "step": 8040 + }, + { + "epoch": 45.68, + "eval_accuracy": 1.0, + "eval_loss": 1.8633225408848375e-05, + "eval_runtime": 125.42, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 8040 + }, + { + "epoch": 45.74, + "learning_rate": 5.428977272727273e-05, + "loss": 0.0, + "step": 8050 + }, + { + "epoch": 45.74, + "eval_accuracy": 1.0, + "eval_loss": 1.8968839867739007e-05, + "eval_runtime": 125.0447, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 8050 + }, + { + "epoch": 45.8, + "learning_rate": 5.423295454545455e-05, + "loss": 0.0, + "step": 8060 + }, + { + "epoch": 45.8, + "eval_accuracy": 1.0, + "eval_loss": 1.90335249499185e-05, + "eval_runtime": 125.1811, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 8060 + }, + { + "epoch": 45.85, + "learning_rate": 5.4176136363636363e-05, + "loss": 0.0, + "step": 8070 + }, + { + "epoch": 45.85, + "eval_accuracy": 1.0, + "eval_loss": 1.9684772269101813e-05, + "eval_runtime": 124.9878, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8070 + }, + { + "epoch": 45.91, + "learning_rate": 5.411931818181818e-05, + "loss": 0.0, + "step": 8080 + }, + { + "epoch": 45.91, + "eval_accuracy": 1.0, + "eval_loss": 2.0622868760256097e-05, + "eval_runtime": 124.7586, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 8080 + }, + { + "epoch": 45.97, + "learning_rate": 5.4062500000000006e-05, + "loss": 0.0, + "step": 8090 + }, + { + "epoch": 45.97, + "eval_accuracy": 1.0, + "eval_loss": 2.126903746102471e-05, + "eval_runtime": 124.7572, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 8090 + }, + { + "epoch": 46.02, + "learning_rate": 5.400568181818182e-05, + "loss": 0.0, + "step": 8100 + }, + { + "epoch": 46.02, + "eval_accuracy": 1.0, + "eval_loss": 2.0897185095236637e-05, + "eval_runtime": 124.8707, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 8100 + }, + { + "epoch": 46.08, + "learning_rate": 5.3948863636363636e-05, + "loss": 0.0, + "step": 8110 + }, + { + "epoch": 46.08, + "eval_accuracy": 1.0, + "eval_loss": 2.0890072846668772e-05, + "eval_runtime": 125.1369, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 8110 + }, + { + "epoch": 46.14, + "learning_rate": 5.389204545454546e-05, + "loss": 0.0, + "step": 8120 + }, + { + "epoch": 46.14, + "eval_accuracy": 1.0, + "eval_loss": 2.1188096070545726e-05, + "eval_runtime": 125.4348, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 8120 + }, + { + "epoch": 46.19, + "learning_rate": 5.383522727272727e-05, + "loss": 0.0, + "step": 8130 + }, + { + "epoch": 46.19, + "eval_accuracy": 1.0, + "eval_loss": 2.2670423277304508e-05, + "eval_runtime": 126.2962, + "eval_samples_per_second": 2.787, + "eval_steps_per_second": 0.697, + "step": 8130 + }, + { + "epoch": 46.25, + "learning_rate": 5.3778409090909096e-05, + "loss": 0.0, + "step": 8140 + }, + { + "epoch": 46.25, + "eval_accuracy": 1.0, + "eval_loss": 2.3421916921506636e-05, + "eval_runtime": 125.2327, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 8140 + }, + { + "epoch": 46.31, + "learning_rate": 5.3721590909090914e-05, + "loss": 0.0001, + "step": 8150 + }, + { + "epoch": 46.31, + "eval_accuracy": 1.0, + "eval_loss": 2.1651387214660645e-05, + "eval_runtime": 124.9216, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 8150 + }, + { + "epoch": 46.36, + "learning_rate": 5.3664772727272726e-05, + "loss": 0.0, + "step": 8160 + }, + { + "epoch": 46.36, + "eval_accuracy": 1.0, + "eval_loss": 1.8462200387148187e-05, + "eval_runtime": 124.8778, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 8160 + }, + { + "epoch": 46.42, + "learning_rate": 5.360795454545455e-05, + "loss": 0.0, + "step": 8170 + }, + { + "epoch": 46.42, + "eval_accuracy": 1.0, + "eval_loss": 1.78336413227953e-05, + "eval_runtime": 124.8919, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 8170 + }, + { + "epoch": 46.48, + "learning_rate": 5.355113636363637e-05, + "loss": 0.0, + "step": 8180 + }, + { + "epoch": 46.48, + "eval_accuracy": 1.0, + "eval_loss": 1.768734000506811e-05, + "eval_runtime": 124.8345, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 8180 + }, + { + "epoch": 46.53, + "learning_rate": 5.349431818181818e-05, + "loss": 0.0, + "step": 8190 + }, + { + "epoch": 46.53, + "eval_accuracy": 1.0, + "eval_loss": 1.7619946447666734e-05, + "eval_runtime": 124.9889, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8190 + }, + { + "epoch": 46.59, + "learning_rate": 5.3437500000000005e-05, + "loss": 0.0, + "step": 8200 + }, + { + "epoch": 46.59, + "eval_accuracy": 1.0, + "eval_loss": 1.753900505718775e-05, + "eval_runtime": 125.0091, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8200 + }, + { + "epoch": 46.65, + "learning_rate": 5.338068181818182e-05, + "loss": 0.0, + "step": 8210 + }, + { + "epoch": 46.65, + "eval_accuracy": 1.0, + "eval_loss": 1.7379155906382948e-05, + "eval_runtime": 125.8905, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 8210 + }, + { + "epoch": 46.7, + "learning_rate": 5.3323863636363634e-05, + "loss": 0.0, + "step": 8220 + }, + { + "epoch": 46.7, + "eval_accuracy": 1.0, + "eval_loss": 1.7101452613133006e-05, + "eval_runtime": 125.6084, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 8220 + }, + { + "epoch": 46.76, + "learning_rate": 5.326704545454546e-05, + "loss": 0.0, + "step": 8230 + }, + { + "epoch": 46.76, + "eval_accuracy": 1.0, + "eval_loss": 1.6948039046837948e-05, + "eval_runtime": 124.937, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 8230 + }, + { + "epoch": 46.82, + "learning_rate": 5.321022727272728e-05, + "loss": 0.0, + "step": 8240 + }, + { + "epoch": 46.82, + "eval_accuracy": 1.0, + "eval_loss": 1.6825442799017765e-05, + "eval_runtime": 125.0781, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 8240 + }, + { + "epoch": 46.88, + "learning_rate": 5.315340909090909e-05, + "loss": 0.0, + "step": 8250 + }, + { + "epoch": 46.88, + "eval_accuracy": 1.0, + "eval_loss": 1.6627325749141164e-05, + "eval_runtime": 125.3763, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 8250 + }, + { + "epoch": 46.93, + "learning_rate": 5.309659090909091e-05, + "loss": 0.0, + "step": 8260 + }, + { + "epoch": 46.93, + "eval_accuracy": 1.0, + "eval_loss": 1.6390600649174303e-05, + "eval_runtime": 124.911, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 8260 + }, + { + "epoch": 46.99, + "learning_rate": 5.303977272727273e-05, + "loss": 0.0, + "step": 8270 + }, + { + "epoch": 46.99, + "eval_accuracy": 1.0, + "eval_loss": 1.6166404748219065e-05, + "eval_runtime": 125.3563, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 8270 + }, + { + "epoch": 47.05, + "learning_rate": 5.298295454545454e-05, + "loss": 0.0, + "step": 8280 + }, + { + "epoch": 47.05, + "eval_accuracy": 1.0, + "eval_loss": 1.6006217265385203e-05, + "eval_runtime": 125.3479, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 8280 + }, + { + "epoch": 47.1, + "learning_rate": 5.292613636363637e-05, + "loss": 0.0, + "step": 8290 + }, + { + "epoch": 47.1, + "eval_accuracy": 1.0, + "eval_loss": 1.587820406712126e-05, + "eval_runtime": 125.1652, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 8290 + }, + { + "epoch": 47.16, + "learning_rate": 5.2869318181818185e-05, + "loss": 0.0, + "step": 8300 + }, + { + "epoch": 47.16, + "eval_accuracy": 1.0, + "eval_loss": 1.5694309695390984e-05, + "eval_runtime": 125.3574, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 8300 + }, + { + "epoch": 47.22, + "learning_rate": 5.2812499999999996e-05, + "loss": 0.0, + "step": 8310 + }, + { + "epoch": 47.22, + "eval_accuracy": 1.0, + "eval_loss": 1.5527348296018317e-05, + "eval_runtime": 125.1676, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 8310 + }, + { + "epoch": 47.27, + "learning_rate": 5.275568181818182e-05, + "loss": 0.0, + "step": 8320 + }, + { + "epoch": 47.27, + "eval_accuracy": 1.0, + "eval_loss": 1.5342438928200863e-05, + "eval_runtime": 125.2274, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 8320 + }, + { + "epoch": 47.33, + "learning_rate": 5.269886363636364e-05, + "loss": 0.0, + "step": 8330 + }, + { + "epoch": 47.33, + "eval_accuracy": 1.0, + "eval_loss": 1.5174461623246316e-05, + "eval_runtime": 124.9677, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 8330 + }, + { + "epoch": 47.39, + "learning_rate": 5.264204545454545e-05, + "loss": 0.0, + "step": 8340 + }, + { + "epoch": 47.39, + "eval_accuracy": 1.0, + "eval_loss": 1.5029514543130063e-05, + "eval_runtime": 125.3313, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 8340 + }, + { + "epoch": 47.44, + "learning_rate": 5.2585227272727275e-05, + "loss": 0.0001, + "step": 8350 + }, + { + "epoch": 47.44, + "eval_accuracy": 1.0, + "eval_loss": 1.5135176909097936e-05, + "eval_runtime": 125.0991, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 8350 + }, + { + "epoch": 47.5, + "learning_rate": 5.252840909090909e-05, + "loss": 0.0, + "step": 8360 + }, + { + "epoch": 47.5, + "eval_accuracy": 1.0, + "eval_loss": 1.5425410310854204e-05, + "eval_runtime": 124.7627, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 8360 + }, + { + "epoch": 47.56, + "learning_rate": 5.2471590909090904e-05, + "loss": 0.0, + "step": 8370 + }, + { + "epoch": 47.56, + "eval_accuracy": 1.0, + "eval_loss": 1.5340745449066162e-05, + "eval_runtime": 125.8951, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 8370 + }, + { + "epoch": 47.61, + "learning_rate": 5.241477272727273e-05, + "loss": 0.0002, + "step": 8380 + }, + { + "epoch": 47.61, + "eval_accuracy": 1.0, + "eval_loss": 1.2268396858416963e-05, + "eval_runtime": 125.2202, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 8380 + }, + { + "epoch": 47.67, + "learning_rate": 5.235795454545455e-05, + "loss": 0.0, + "step": 8390 + }, + { + "epoch": 47.67, + "eval_accuracy": 1.0, + "eval_loss": 1.0771507731988095e-05, + "eval_runtime": 124.9898, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8390 + }, + { + "epoch": 47.73, + "learning_rate": 5.230113636363637e-05, + "loss": 0.0, + "step": 8400 + }, + { + "epoch": 47.73, + "eval_accuracy": 1.0, + "eval_loss": 1.0409815331513528e-05, + "eval_runtime": 125.7765, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 8400 + }, + { + "epoch": 47.78, + "learning_rate": 5.224431818181818e-05, + "loss": 0.0, + "step": 8410 + }, + { + "epoch": 47.78, + "eval_accuracy": 1.0, + "eval_loss": 1.023709774017334e-05, + "eval_runtime": 125.3631, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 8410 + }, + { + "epoch": 47.84, + "learning_rate": 5.21875e-05, + "loss": 0.0, + "step": 8420 + }, + { + "epoch": 47.84, + "eval_accuracy": 1.0, + "eval_loss": 1.0128386747965124e-05, + "eval_runtime": 125.2494, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 8420 + }, + { + "epoch": 47.9, + "learning_rate": 5.2130681818181826e-05, + "loss": 0.0, + "step": 8430 + }, + { + "epoch": 47.9, + "eval_accuracy": 1.0, + "eval_loss": 1.0035593732027337e-05, + "eval_runtime": 124.978, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8430 + }, + { + "epoch": 47.95, + "learning_rate": 5.207386363636364e-05, + "loss": 0.0, + "step": 8440 + }, + { + "epoch": 47.95, + "eval_accuracy": 1.0, + "eval_loss": 9.94686342892237e-06, + "eval_runtime": 124.9637, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 8440 + }, + { + "epoch": 48.01, + "learning_rate": 5.2017045454545455e-05, + "loss": 0.0, + "step": 8450 + }, + { + "epoch": 48.01, + "eval_accuracy": 1.0, + "eval_loss": 9.853392839431763e-06, + "eval_runtime": 125.7122, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 8450 + }, + { + "epoch": 48.07, + "learning_rate": 5.196022727272728e-05, + "loss": 0.0, + "step": 8460 + }, + { + "epoch": 48.07, + "eval_accuracy": 1.0, + "eval_loss": 9.754164238984231e-06, + "eval_runtime": 125.1143, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 8460 + }, + { + "epoch": 48.12, + "learning_rate": 5.190340909090909e-05, + "loss": 0.0, + "step": 8470 + }, + { + "epoch": 48.12, + "eval_accuracy": 1.0, + "eval_loss": 9.669160135672428e-06, + "eval_runtime": 125.4071, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 8470 + }, + { + "epoch": 48.18, + "learning_rate": 5.184659090909091e-05, + "loss": 0.0, + "step": 8480 + }, + { + "epoch": 48.18, + "eval_accuracy": 1.0, + "eval_loss": 9.593976756150369e-06, + "eval_runtime": 125.2481, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 8480 + }, + { + "epoch": 48.24, + "learning_rate": 5.1789772727272734e-05, + "loss": 0.0, + "step": 8490 + }, + { + "epoch": 48.24, + "eval_accuracy": 1.0, + "eval_loss": 9.547919034957886e-06, + "eval_runtime": 124.9415, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 8490 + }, + { + "epoch": 48.3, + "learning_rate": 5.1732954545454546e-05, + "loss": 0.0, + "step": 8500 + }, + { + "epoch": 48.3, + "eval_accuracy": 1.0, + "eval_loss": 9.49542663875036e-06, + "eval_runtime": 125.6388, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 8500 + }, + { + "epoch": 48.35, + "learning_rate": 5.1676136363636364e-05, + "loss": 0.0, + "step": 8510 + }, + { + "epoch": 48.35, + "eval_accuracy": 1.0, + "eval_loss": 9.403986950928811e-06, + "eval_runtime": 125.0475, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 8510 + }, + { + "epoch": 48.41, + "learning_rate": 5.161931818181819e-05, + "loss": 0.0, + "step": 8520 + }, + { + "epoch": 48.41, + "eval_accuracy": 1.0, + "eval_loss": 9.304420927946921e-06, + "eval_runtime": 124.9083, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 8520 + }, + { + "epoch": 48.47, + "learning_rate": 5.15625e-05, + "loss": 0.0, + "step": 8530 + }, + { + "epoch": 48.47, + "eval_accuracy": 1.0, + "eval_loss": 9.20620823308127e-06, + "eval_runtime": 125.1947, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 8530 + }, + { + "epoch": 48.52, + "learning_rate": 5.150568181818182e-05, + "loss": 0.0, + "step": 8540 + }, + { + "epoch": 48.52, + "eval_accuracy": 1.0, + "eval_loss": 9.111044164455961e-06, + "eval_runtime": 125.1846, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 8540 + }, + { + "epoch": 48.58, + "learning_rate": 5.144886363636364e-05, + "loss": 0.0, + "step": 8550 + }, + { + "epoch": 48.58, + "eval_accuracy": 1.0, + "eval_loss": 9.05177785170963e-06, + "eval_runtime": 124.8318, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 8550 + }, + { + "epoch": 48.64, + "learning_rate": 5.1392045454545454e-05, + "loss": 0.0, + "step": 8560 + }, + { + "epoch": 48.64, + "eval_accuracy": 1.0, + "eval_loss": 8.993528354039881e-06, + "eval_runtime": 124.6851, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 8560 + }, + { + "epoch": 48.69, + "learning_rate": 5.133522727272727e-05, + "loss": 0.0, + "step": 8570 + }, + { + "epoch": 48.69, + "eval_accuracy": 1.0, + "eval_loss": 8.936971426010132e-06, + "eval_runtime": 125.1773, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 8570 + }, + { + "epoch": 48.75, + "learning_rate": 5.12784090909091e-05, + "loss": 0.0, + "step": 8580 + }, + { + "epoch": 48.75, + "eval_accuracy": 1.0, + "eval_loss": 8.87465739651816e-06, + "eval_runtime": 124.9018, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 8580 + }, + { + "epoch": 48.81, + "learning_rate": 5.122159090909091e-05, + "loss": 0.0, + "step": 8590 + }, + { + "epoch": 48.81, + "eval_accuracy": 1.0, + "eval_loss": 8.793039341981057e-06, + "eval_runtime": 125.4705, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 8590 + }, + { + "epoch": 48.86, + "learning_rate": 5.1164772727272726e-05, + "loss": 0.0, + "step": 8600 + }, + { + "epoch": 48.86, + "eval_accuracy": 1.0, + "eval_loss": 8.724629878997803e-06, + "eval_runtime": 125.5857, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 8600 + }, + { + "epoch": 48.92, + "learning_rate": 5.110795454545455e-05, + "loss": 0.0, + "step": 8610 + }, + { + "epoch": 48.92, + "eval_accuracy": 1.0, + "eval_loss": 8.672814146848395e-06, + "eval_runtime": 125.0857, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 8610 + }, + { + "epoch": 48.98, + "learning_rate": 5.105113636363636e-05, + "loss": 0.0, + "step": 8620 + }, + { + "epoch": 48.98, + "eval_accuracy": 1.0, + "eval_loss": 8.62133765622275e-06, + "eval_runtime": 125.8176, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 8620 + }, + { + "epoch": 49.03, + "learning_rate": 5.099431818181818e-05, + "loss": 0.0, + "step": 8630 + }, + { + "epoch": 49.03, + "eval_accuracy": 1.0, + "eval_loss": 8.527189493179321e-06, + "eval_runtime": 125.2051, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 8630 + }, + { + "epoch": 49.09, + "learning_rate": 5.0937500000000005e-05, + "loss": 0.0, + "step": 8640 + }, + { + "epoch": 49.09, + "eval_accuracy": 1.0, + "eval_loss": 8.44794249132974e-06, + "eval_runtime": 124.8643, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 8640 + }, + { + "epoch": 49.15, + "learning_rate": 5.0880681818181816e-05, + "loss": 0.0, + "step": 8650 + }, + { + "epoch": 49.15, + "eval_accuracy": 1.0, + "eval_loss": 8.383934982703067e-06, + "eval_runtime": 125.5549, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 8650 + }, + { + "epoch": 49.2, + "learning_rate": 5.0823863636363634e-05, + "loss": 0.0, + "step": 8660 + }, + { + "epoch": 49.2, + "eval_accuracy": 1.0, + "eval_loss": 8.343634362972807e-06, + "eval_runtime": 125.4324, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 8660 + }, + { + "epoch": 49.26, + "learning_rate": 5.076704545454546e-05, + "loss": 0.0, + "step": 8670 + }, + { + "epoch": 49.26, + "eval_accuracy": 1.0, + "eval_loss": 8.297575732285623e-06, + "eval_runtime": 125.1799, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 8670 + }, + { + "epoch": 49.32, + "learning_rate": 5.071022727272727e-05, + "loss": 0.0, + "step": 8680 + }, + { + "epoch": 49.32, + "eval_accuracy": 1.0, + "eval_loss": 8.243051524914335e-06, + "eval_runtime": 124.9243, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 8680 + }, + { + "epoch": 49.38, + "learning_rate": 5.06534090909091e-05, + "loss": 0.0, + "step": 8690 + }, + { + "epoch": 49.38, + "eval_accuracy": 1.0, + "eval_loss": 8.191913366317749e-06, + "eval_runtime": 125.2436, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 8690 + }, + { + "epoch": 49.43, + "learning_rate": 5.059659090909091e-05, + "loss": 0.0, + "step": 8700 + }, + { + "epoch": 49.43, + "eval_accuracy": 1.0, + "eval_loss": 8.129261004796717e-06, + "eval_runtime": 125.3747, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 8700 + }, + { + "epoch": 49.49, + "learning_rate": 5.0539772727272724e-05, + "loss": 0.0, + "step": 8710 + }, + { + "epoch": 49.49, + "eval_accuracy": 1.0, + "eval_loss": 8.073042408796027e-06, + "eval_runtime": 125.0678, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 8710 + }, + { + "epoch": 49.55, + "learning_rate": 5.0482954545454556e-05, + "loss": 0.0, + "step": 8720 + }, + { + "epoch": 49.55, + "eval_accuracy": 1.0, + "eval_loss": 8.032064215512946e-06, + "eval_runtime": 125.014, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8720 + }, + { + "epoch": 49.6, + "learning_rate": 5.042613636363637e-05, + "loss": 0.0, + "step": 8730 + }, + { + "epoch": 49.6, + "eval_accuracy": 1.0, + "eval_loss": 7.98024939285824e-06, + "eval_runtime": 125.0069, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8730 + }, + { + "epoch": 49.66, + "learning_rate": 5.036931818181818e-05, + "loss": 0.0, + "step": 8740 + }, + { + "epoch": 49.66, + "eval_accuracy": 1.0, + "eval_loss": 7.926740181574132e-06, + "eval_runtime": 125.6048, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 8740 + }, + { + "epoch": 49.72, + "learning_rate": 5.031250000000001e-05, + "loss": 0.0, + "step": 8750 + }, + { + "epoch": 49.72, + "eval_accuracy": 1.0, + "eval_loss": 7.878650649217889e-06, + "eval_runtime": 124.9708, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 8750 + }, + { + "epoch": 49.77, + "learning_rate": 5.025568181818182e-05, + "loss": 0.0, + "step": 8760 + }, + { + "epoch": 49.77, + "eval_accuracy": 1.0, + "eval_loss": 7.845461368560791e-06, + "eval_runtime": 126.0304, + "eval_samples_per_second": 2.793, + "eval_steps_per_second": 0.698, + "step": 8760 + }, + { + "epoch": 49.83, + "learning_rate": 5.019886363636363e-05, + "loss": 0.0, + "step": 8770 + }, + { + "epoch": 49.83, + "eval_accuracy": 1.0, + "eval_loss": 7.80482150730677e-06, + "eval_runtime": 125.0183, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8770 + }, + { + "epoch": 49.89, + "learning_rate": 5.0142045454545464e-05, + "loss": 0.0, + "step": 8780 + }, + { + "epoch": 49.89, + "eval_accuracy": 1.0, + "eval_loss": 7.76655269874027e-06, + "eval_runtime": 125.202, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 8780 + }, + { + "epoch": 49.94, + "learning_rate": 5.0085227272727275e-05, + "loss": 0.0, + "step": 8790 + }, + { + "epoch": 49.94, + "eval_accuracy": 1.0, + "eval_loss": 7.730993274890352e-06, + "eval_runtime": 124.8508, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 8790 + }, + { + "epoch": 50.0, + "learning_rate": 5.0028409090909087e-05, + "loss": 0.0, + "step": 8800 + }, + { + "epoch": 50.0, + "eval_accuracy": 1.0, + "eval_loss": 7.701190952502657e-06, + "eval_runtime": 125.1213, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 8800 + }, + { + "epoch": 50.06, + "learning_rate": 4.997159090909091e-05, + "loss": 0.0, + "step": 8810 + }, + { + "epoch": 50.06, + "eval_accuracy": 1.0, + "eval_loss": 7.664953955099918e-06, + "eval_runtime": 125.0032, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8810 + }, + { + "epoch": 50.11, + "learning_rate": 4.991477272727273e-05, + "loss": 0.0, + "step": 8820 + }, + { + "epoch": 50.11, + "eval_accuracy": 1.0, + "eval_loss": 7.645649930054788e-06, + "eval_runtime": 125.7492, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 8820 + }, + { + "epoch": 50.17, + "learning_rate": 4.985795454545455e-05, + "loss": 0.0, + "step": 8830 + }, + { + "epoch": 50.17, + "eval_accuracy": 1.0, + "eval_loss": 7.605010978295468e-06, + "eval_runtime": 125.065, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 8830 + }, + { + "epoch": 50.23, + "learning_rate": 4.9801136363636366e-05, + "loss": 0.0, + "step": 8840 + }, + { + "epoch": 50.23, + "eval_accuracy": 1.0, + "eval_loss": 7.563693998235976e-06, + "eval_runtime": 125.4788, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 8840 + }, + { + "epoch": 50.28, + "learning_rate": 4.9744318181818184e-05, + "loss": 0.0, + "step": 8850 + }, + { + "epoch": 50.28, + "eval_accuracy": 1.0, + "eval_loss": 7.5372781793703325e-06, + "eval_runtime": 125.8866, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 8850 + }, + { + "epoch": 50.34, + "learning_rate": 4.96875e-05, + "loss": 0.0, + "step": 8860 + }, + { + "epoch": 50.34, + "eval_accuracy": 1.0, + "eval_loss": 7.504089353460586e-06, + "eval_runtime": 125.1551, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 8860 + }, + { + "epoch": 50.4, + "learning_rate": 4.963068181818182e-05, + "loss": 0.0, + "step": 8870 + }, + { + "epoch": 50.4, + "eval_accuracy": 1.0, + "eval_loss": 7.460062988684513e-06, + "eval_runtime": 124.9982, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8870 + }, + { + "epoch": 50.45, + "learning_rate": 4.957386363636364e-05, + "loss": 0.0, + "step": 8880 + }, + { + "epoch": 50.45, + "eval_accuracy": 1.0, + "eval_loss": 7.413666480715619e-06, + "eval_runtime": 125.0705, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 8880 + }, + { + "epoch": 50.51, + "learning_rate": 4.9517045454545456e-05, + "loss": 0.0, + "step": 8890 + }, + { + "epoch": 50.51, + "eval_accuracy": 1.0, + "eval_loss": 7.35812545826775e-06, + "eval_runtime": 124.8746, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 8890 + }, + { + "epoch": 50.57, + "learning_rate": 4.946022727272728e-05, + "loss": 0.0, + "step": 8900 + }, + { + "epoch": 50.57, + "eval_accuracy": 1.0, + "eval_loss": 7.3151154538209084e-06, + "eval_runtime": 125.0146, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8900 + }, + { + "epoch": 50.62, + "learning_rate": 4.940340909090909e-05, + "loss": 0.0, + "step": 8910 + }, + { + "epoch": 50.62, + "eval_accuracy": 1.0, + "eval_loss": 7.261945484060561e-06, + "eval_runtime": 125.002, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8910 + }, + { + "epoch": 50.68, + "learning_rate": 4.934659090909091e-05, + "loss": 0.0, + "step": 8920 + }, + { + "epoch": 50.68, + "eval_accuracy": 1.0, + "eval_loss": 7.177618499554228e-06, + "eval_runtime": 125.0102, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 8920 + }, + { + "epoch": 50.74, + "learning_rate": 4.9289772727272735e-05, + "loss": 0.0, + "step": 8930 + }, + { + "epoch": 50.74, + "eval_accuracy": 1.0, + "eval_loss": 7.110902060958324e-06, + "eval_runtime": 125.8011, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.7, + "step": 8930 + }, + { + "epoch": 50.8, + "learning_rate": 4.9232954545454546e-05, + "loss": 0.0, + "step": 8940 + }, + { + "epoch": 50.8, + "eval_accuracy": 1.0, + "eval_loss": 7.0695850808988325e-06, + "eval_runtime": 124.8907, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 8940 + }, + { + "epoch": 50.85, + "learning_rate": 4.9176136363636364e-05, + "loss": 0.0, + "step": 8950 + }, + { + "epoch": 50.85, + "eval_accuracy": 1.0, + "eval_loss": 7.036057468212675e-06, + "eval_runtime": 124.925, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 8950 + }, + { + "epoch": 50.91, + "learning_rate": 4.911931818181819e-05, + "loss": 0.0, + "step": 8960 + }, + { + "epoch": 50.91, + "eval_accuracy": 1.0, + "eval_loss": 6.992031103436602e-06, + "eval_runtime": 124.9621, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 8960 + }, + { + "epoch": 50.97, + "learning_rate": 4.90625e-05, + "loss": 0.0, + "step": 8970 + }, + { + "epoch": 50.97, + "eval_accuracy": 1.0, + "eval_loss": 6.945295808691299e-06, + "eval_runtime": 124.9374, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 8970 + }, + { + "epoch": 51.02, + "learning_rate": 4.900568181818182e-05, + "loss": 0.0, + "step": 8980 + }, + { + "epoch": 51.02, + "eval_accuracy": 1.0, + "eval_loss": 6.887722975079669e-06, + "eval_runtime": 125.5028, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 8980 + }, + { + "epoch": 51.08, + "learning_rate": 4.894886363636364e-05, + "loss": 0.0, + "step": 8990 + }, + { + "epoch": 51.08, + "eval_accuracy": 1.0, + "eval_loss": 6.828118330304278e-06, + "eval_runtime": 124.8299, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 8990 + }, + { + "epoch": 51.14, + "learning_rate": 4.8892045454545454e-05, + "loss": 0.0, + "step": 9000 + }, + { + "epoch": 51.14, + "eval_accuracy": 1.0, + "eval_loss": 6.729567758156918e-06, + "eval_runtime": 125.0627, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 9000 + }, + { + "epoch": 51.19, + "learning_rate": 4.883522727272727e-05, + "loss": 0.0, + "step": 9010 + }, + { + "epoch": 51.19, + "eval_accuracy": 1.0, + "eval_loss": 6.632710210396908e-06, + "eval_runtime": 124.9385, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 9010 + }, + { + "epoch": 51.25, + "learning_rate": 4.87784090909091e-05, + "loss": 0.0, + "step": 9020 + }, + { + "epoch": 51.25, + "eval_accuracy": 1.0, + "eval_loss": 6.553801540576387e-06, + "eval_runtime": 125.4996, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 9020 + }, + { + "epoch": 51.31, + "learning_rate": 4.8721590909090915e-05, + "loss": 0.0, + "step": 9030 + }, + { + "epoch": 51.31, + "eval_accuracy": 1.0, + "eval_loss": 6.502325049950741e-06, + "eval_runtime": 124.9288, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 9030 + }, + { + "epoch": 51.36, + "learning_rate": 4.8664772727272726e-05, + "loss": 0.0, + "step": 9040 + }, + { + "epoch": 51.36, + "eval_accuracy": 1.0, + "eval_loss": 6.4623627622495405e-06, + "eval_runtime": 124.974, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 9040 + }, + { + "epoch": 51.42, + "learning_rate": 4.860795454545455e-05, + "loss": 0.0, + "step": 9050 + }, + { + "epoch": 51.42, + "eval_accuracy": 1.0, + "eval_loss": 6.409869911294663e-06, + "eval_runtime": 125.4621, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 9050 + }, + { + "epoch": 51.48, + "learning_rate": 4.855113636363637e-05, + "loss": 0.0, + "step": 9060 + }, + { + "epoch": 51.48, + "eval_accuracy": 1.0, + "eval_loss": 6.360086445056368e-06, + "eval_runtime": 124.8614, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 9060 + }, + { + "epoch": 51.53, + "learning_rate": 4.849431818181818e-05, + "loss": 0.0, + "step": 9070 + }, + { + "epoch": 51.53, + "eval_accuracy": 1.0, + "eval_loss": 6.30556178293773e-06, + "eval_runtime": 124.9994, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9070 + }, + { + "epoch": 51.59, + "learning_rate": 4.8437500000000005e-05, + "loss": 0.0, + "step": 9080 + }, + { + "epoch": 51.59, + "eval_accuracy": 1.0, + "eval_loss": 6.251375907595502e-06, + "eval_runtime": 124.8784, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 9080 + }, + { + "epoch": 51.65, + "learning_rate": 4.838068181818182e-05, + "loss": 0.0, + "step": 9090 + }, + { + "epoch": 51.65, + "eval_accuracy": 1.0, + "eval_loss": 6.192109594849171e-06, + "eval_runtime": 124.6945, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 9090 + }, + { + "epoch": 51.7, + "learning_rate": 4.8323863636363634e-05, + "loss": 0.0, + "step": 9100 + }, + { + "epoch": 51.7, + "eval_accuracy": 1.0, + "eval_loss": 6.133182523626601e-06, + "eval_runtime": 125.8185, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 9100 + }, + { + "epoch": 51.76, + "learning_rate": 4.826704545454546e-05, + "loss": 0.0, + "step": 9110 + }, + { + "epoch": 51.76, + "eval_accuracy": 1.0, + "eval_loss": 6.09288144914899e-06, + "eval_runtime": 125.695, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 9110 + }, + { + "epoch": 51.82, + "learning_rate": 4.821022727272728e-05, + "loss": 0.0, + "step": 9120 + }, + { + "epoch": 51.82, + "eval_accuracy": 1.0, + "eval_loss": 6.044114343239926e-06, + "eval_runtime": 124.9953, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9120 + }, + { + "epoch": 51.88, + "learning_rate": 4.815340909090909e-05, + "loss": 0.0, + "step": 9130 + }, + { + "epoch": 51.88, + "eval_accuracy": 1.0, + "eval_loss": 6.007199772284366e-06, + "eval_runtime": 125.5233, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 9130 + }, + { + "epoch": 51.93, + "learning_rate": 4.809659090909091e-05, + "loss": 0.0, + "step": 9140 + }, + { + "epoch": 51.93, + "eval_accuracy": 1.0, + "eval_loss": 5.963173862255644e-06, + "eval_runtime": 124.6559, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 9140 + }, + { + "epoch": 51.99, + "learning_rate": 4.803977272727273e-05, + "loss": 0.0, + "step": 9150 + }, + { + "epoch": 51.99, + "eval_accuracy": 1.0, + "eval_loss": 5.910681011300767e-06, + "eval_runtime": 124.953, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 9150 + }, + { + "epoch": 52.05, + "learning_rate": 4.798295454545455e-05, + "loss": 0.0, + "step": 9160 + }, + { + "epoch": 52.05, + "eval_accuracy": 1.0, + "eval_loss": 5.868009338882985e-06, + "eval_runtime": 125.2729, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 9160 + }, + { + "epoch": 52.1, + "learning_rate": 4.792613636363637e-05, + "loss": 0.0, + "step": 9170 + }, + { + "epoch": 52.1, + "eval_accuracy": 1.0, + "eval_loss": 5.822289949719561e-06, + "eval_runtime": 125.4153, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 9170 + }, + { + "epoch": 52.16, + "learning_rate": 4.7869318181818185e-05, + "loss": 0.0, + "step": 9180 + }, + { + "epoch": 52.16, + "eval_accuracy": 1.0, + "eval_loss": 5.780973424407421e-06, + "eval_runtime": 125.2372, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 9180 + }, + { + "epoch": 52.22, + "learning_rate": 4.7812500000000003e-05, + "loss": 0.0, + "step": 9190 + }, + { + "epoch": 52.22, + "eval_accuracy": 1.0, + "eval_loss": 5.738640538766049e-06, + "eval_runtime": 125.0039, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9190 + }, + { + "epoch": 52.27, + "learning_rate": 4.775568181818182e-05, + "loss": 0.0, + "step": 9200 + }, + { + "epoch": 52.27, + "eval_accuracy": 1.0, + "eval_loss": 5.707144737243652e-06, + "eval_runtime": 124.755, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 9200 + }, + { + "epoch": 52.33, + "learning_rate": 4.769886363636364e-05, + "loss": 0.0, + "step": 9210 + }, + { + "epoch": 52.33, + "eval_accuracy": 1.0, + "eval_loss": 5.66447306482587e-06, + "eval_runtime": 124.7973, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 9210 + }, + { + "epoch": 52.39, + "learning_rate": 4.764204545454546e-05, + "loss": 0.0, + "step": 9220 + }, + { + "epoch": 52.39, + "eval_accuracy": 1.0, + "eval_loss": 5.635009529214585e-06, + "eval_runtime": 125.2597, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 9220 + }, + { + "epoch": 52.44, + "learning_rate": 4.7585227272727276e-05, + "loss": 0.0, + "step": 9230 + }, + { + "epoch": 52.44, + "eval_accuracy": 1.0, + "eval_loss": 5.609948402707232e-06, + "eval_runtime": 125.395, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 9230 + }, + { + "epoch": 52.5, + "learning_rate": 4.7528409090909094e-05, + "loss": 0.0, + "step": 9240 + }, + { + "epoch": 52.5, + "eval_accuracy": 1.0, + "eval_loss": 5.580484867095947e-06, + "eval_runtime": 125.1235, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 9240 + }, + { + "epoch": 52.56, + "learning_rate": 4.747159090909091e-05, + "loss": 0.0, + "step": 9250 + }, + { + "epoch": 52.56, + "eval_accuracy": 1.0, + "eval_loss": 5.5374748626491055e-06, + "eval_runtime": 124.6577, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 9250 + }, + { + "epoch": 52.61, + "learning_rate": 4.741477272727273e-05, + "loss": 0.0, + "step": 9260 + }, + { + "epoch": 52.61, + "eval_accuracy": 1.0, + "eval_loss": 5.491755018738331e-06, + "eval_runtime": 125.1065, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 9260 + }, + { + "epoch": 52.67, + "learning_rate": 4.735795454545455e-05, + "loss": 0.0, + "step": 9270 + }, + { + "epoch": 52.67, + "eval_accuracy": 1.0, + "eval_loss": 5.446713203127729e-06, + "eval_runtime": 124.6713, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 9270 + }, + { + "epoch": 52.73, + "learning_rate": 4.7301136363636366e-05, + "loss": 0.0, + "step": 9280 + }, + { + "epoch": 52.73, + "eval_accuracy": 1.0, + "eval_loss": 5.403702743933536e-06, + "eval_runtime": 125.1063, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 9280 + }, + { + "epoch": 52.78, + "learning_rate": 4.7244318181818184e-05, + "loss": 0.0, + "step": 9290 + }, + { + "epoch": 52.78, + "eval_accuracy": 1.0, + "eval_loss": 5.3691592256654985e-06, + "eval_runtime": 125.5119, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 9290 + }, + { + "epoch": 52.84, + "learning_rate": 4.71875e-05, + "loss": 0.0, + "step": 9300 + }, + { + "epoch": 52.84, + "eval_accuracy": 1.0, + "eval_loss": 5.279413471726002e-06, + "eval_runtime": 125.1978, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 9300 + }, + { + "epoch": 52.9, + "learning_rate": 4.713068181818182e-05, + "loss": 0.0, + "step": 9310 + }, + { + "epoch": 52.9, + "eval_accuracy": 1.0, + "eval_loss": 5.2167611102049705e-06, + "eval_runtime": 124.8427, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 9310 + }, + { + "epoch": 52.95, + "learning_rate": 4.707386363636364e-05, + "loss": 0.0, + "step": 9320 + }, + { + "epoch": 52.95, + "eval_accuracy": 1.0, + "eval_loss": 5.166300525161205e-06, + "eval_runtime": 125.2264, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 9320 + }, + { + "epoch": 53.01, + "learning_rate": 4.7017045454545456e-05, + "loss": 0.0, + "step": 9330 + }, + { + "epoch": 53.01, + "eval_accuracy": 1.0, + "eval_loss": 5.127692929818295e-06, + "eval_runtime": 125.358, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 9330 + }, + { + "epoch": 53.07, + "learning_rate": 4.6960227272727274e-05, + "loss": 0.0, + "step": 9340 + }, + { + "epoch": 53.07, + "eval_accuracy": 1.0, + "eval_loss": 5.085021257400513e-06, + "eval_runtime": 125.5335, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 9340 + }, + { + "epoch": 53.12, + "learning_rate": 4.690340909090909e-05, + "loss": 0.0, + "step": 9350 + }, + { + "epoch": 53.12, + "eval_accuracy": 1.0, + "eval_loss": 5.061653610027861e-06, + "eval_runtime": 125.3441, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 9350 + }, + { + "epoch": 53.18, + "learning_rate": 4.684659090909091e-05, + "loss": 0.0, + "step": 9360 + }, + { + "epoch": 53.18, + "eval_accuracy": 1.0, + "eval_loss": 5.027110091759823e-06, + "eval_runtime": 125.2532, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 9360 + }, + { + "epoch": 53.24, + "learning_rate": 4.678977272727273e-05, + "loss": 0.0, + "step": 9370 + }, + { + "epoch": 53.24, + "eval_accuracy": 1.0, + "eval_loss": 5.0037419896398205e-06, + "eval_runtime": 125.1765, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 9370 + }, + { + "epoch": 53.3, + "learning_rate": 4.6732954545454546e-05, + "loss": 0.0, + "step": 9380 + }, + { + "epoch": 53.3, + "eval_accuracy": 1.0, + "eval_loss": 4.972246642864775e-06, + "eval_runtime": 124.9953, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9380 + }, + { + "epoch": 53.35, + "learning_rate": 4.6676136363636364e-05, + "loss": 0.0, + "step": 9390 + }, + { + "epoch": 53.35, + "eval_accuracy": 1.0, + "eval_loss": 4.9468467295810115e-06, + "eval_runtime": 125.2818, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 9390 + }, + { + "epoch": 53.41, + "learning_rate": 4.661931818181818e-05, + "loss": 0.1317, + "step": 9400 + }, + { + "epoch": 53.41, + "eval_accuracy": 1.0, + "eval_loss": 5.9157609939575195e-05, + "eval_runtime": 124.9934, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9400 + }, + { + "epoch": 53.47, + "learning_rate": 4.65625e-05, + "loss": 0.0001, + "step": 9410 + }, + { + "epoch": 53.47, + "eval_accuracy": 1.0, + "eval_loss": 0.0001642853021621704, + "eval_runtime": 125.7837, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.7, + "step": 9410 + }, + { + "epoch": 53.52, + "learning_rate": 4.650568181818182e-05, + "loss": 0.0001, + "step": 9420 + }, + { + "epoch": 53.52, + "eval_accuracy": 1.0, + "eval_loss": 4.85076816403307e-05, + "eval_runtime": 125.1412, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 9420 + }, + { + "epoch": 53.58, + "learning_rate": 4.6448863636363636e-05, + "loss": 0.0, + "step": 9430 + }, + { + "epoch": 53.58, + "eval_accuracy": 1.0, + "eval_loss": 2.5623223336879164e-05, + "eval_runtime": 125.1907, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 9430 + }, + { + "epoch": 53.64, + "learning_rate": 4.6392045454545454e-05, + "loss": 0.0, + "step": 9440 + }, + { + "epoch": 53.64, + "eval_accuracy": 1.0, + "eval_loss": 1.8265436665387824e-05, + "eval_runtime": 125.573, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 9440 + }, + { + "epoch": 53.69, + "learning_rate": 4.634090909090909e-05, + "loss": 0.0, + "step": 9450 + }, + { + "epoch": 53.69, + "eval_accuracy": 1.0, + "eval_loss": 1.6162341125891544e-05, + "eval_runtime": 125.0923, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 9450 + }, + { + "epoch": 53.75, + "learning_rate": 4.628409090909091e-05, + "loss": 0.0, + "step": 9460 + }, + { + "epoch": 53.75, + "eval_accuracy": 1.0, + "eval_loss": 1.5271658412530087e-05, + "eval_runtime": 125.3189, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 9460 + }, + { + "epoch": 53.81, + "learning_rate": 4.623295454545455e-05, + "loss": 0.0, + "step": 9470 + }, + { + "epoch": 53.81, + "eval_accuracy": 1.0, + "eval_loss": 1.4864246622892097e-05, + "eval_runtime": 125.1525, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 9470 + }, + { + "epoch": 53.86, + "learning_rate": 4.6176136363636365e-05, + "loss": 0.0, + "step": 9480 + }, + { + "epoch": 53.86, + "eval_accuracy": 1.0, + "eval_loss": 1.469322251068661e-05, + "eval_runtime": 125.4733, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 9480 + }, + { + "epoch": 53.92, + "learning_rate": 4.6125e-05, + "loss": 0.0, + "step": 9490 + }, + { + "epoch": 53.92, + "eval_accuracy": 1.0, + "eval_loss": 1.4593316336686257e-05, + "eval_runtime": 125.0375, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 9490 + }, + { + "epoch": 53.98, + "learning_rate": 4.607954545454546e-05, + "loss": 0.0, + "step": 9500 + }, + { + "epoch": 53.98, + "eval_accuracy": 1.0, + "eval_loss": 1.461532974644797e-05, + "eval_runtime": 124.6945, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 9500 + }, + { + "epoch": 54.03, + "learning_rate": 4.602840909090909e-05, + "loss": 0.0, + "step": 9510 + }, + { + "epoch": 54.03, + "eval_accuracy": 1.0, + "eval_loss": 1.4555386769643519e-05, + "eval_runtime": 125.1478, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 9510 + }, + { + "epoch": 54.09, + "learning_rate": 4.597727272727273e-05, + "loss": 0.001, + "step": 9520 + }, + { + "epoch": 54.09, + "eval_accuracy": 1.0, + "eval_loss": 1.2388283721520565e-05, + "eval_runtime": 125.2716, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 9520 + }, + { + "epoch": 54.15, + "learning_rate": 4.5926136363636366e-05, + "loss": 0.0, + "step": 9530 + }, + { + "epoch": 54.15, + "eval_accuracy": 0.9801136255264282, + "eval_loss": NaN, + "eval_runtime": 125.0673, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 9530 + }, + { + "epoch": 54.2, + "learning_rate": 4.5875000000000004e-05, + "loss": 0.0, + "step": 9540 + }, + { + "epoch": 54.2, + "eval_accuracy": 0.9772727489471436, + "eval_loss": NaN, + "eval_runtime": 125.2077, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 9540 + }, + { + "epoch": 54.26, + "learning_rate": 4.582954545454546e-05, + "loss": 0.0, + "step": 9550 + }, + { + "epoch": 54.26, + "eval_accuracy": 0.9744318127632141, + "eval_loss": NaN, + "eval_runtime": 124.9598, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 9550 + }, + { + "epoch": 54.32, + "learning_rate": 4.577272727272727e-05, + "loss": 0.0013, + "step": 9560 + }, + { + "epoch": 54.32, + "eval_accuracy": 1.0, + "eval_loss": 0.00034921549377031624, + "eval_runtime": 125.2842, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 9560 + }, + { + "epoch": 54.38, + "learning_rate": 4.571590909090909e-05, + "loss": 0.0, + "step": 9570 + }, + { + "epoch": 54.38, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.04198275879025459, + "eval_runtime": 124.8942, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 9570 + }, + { + "epoch": 54.43, + "learning_rate": 4.5659090909090915e-05, + "loss": 0.0, + "step": 9580 + }, + { + "epoch": 54.43, + "eval_accuracy": 0.9914772510528564, + "eval_loss": 0.09170820564031601, + "eval_runtime": 124.894, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 9580 + }, + { + "epoch": 54.49, + "learning_rate": 4.5602272727272726e-05, + "loss": 0.1551, + "step": 9590 + }, + { + "epoch": 54.49, + "eval_accuracy": 1.0, + "eval_loss": 0.00015253132733050734, + "eval_runtime": 124.9705, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 9590 + }, + { + "epoch": 54.55, + "learning_rate": 4.5545454545454544e-05, + "loss": 0.0006, + "step": 9600 + }, + { + "epoch": 54.55, + "eval_accuracy": 1.0, + "eval_loss": 0.0008415572810918093, + "eval_runtime": 125.7902, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.7, + "step": 9600 + }, + { + "epoch": 54.6, + "learning_rate": 4.548863636363637e-05, + "loss": 0.0009, + "step": 9610 + }, + { + "epoch": 54.6, + "eval_accuracy": 1.0, + "eval_loss": 0.0010648637544363737, + "eval_runtime": 125.0188, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9610 + }, + { + "epoch": 54.66, + "learning_rate": 4.543181818181819e-05, + "loss": 0.0006, + "step": 9620 + }, + { + "epoch": 54.66, + "eval_accuracy": 1.0, + "eval_loss": 0.0014904364943504333, + "eval_runtime": 125.3634, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 9620 + }, + { + "epoch": 54.72, + "learning_rate": 4.5375e-05, + "loss": 0.0003, + "step": 9630 + }, + { + "epoch": 54.72, + "eval_accuracy": 1.0, + "eval_loss": 0.0017835010075941682, + "eval_runtime": 125.419, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 9630 + }, + { + "epoch": 54.77, + "learning_rate": 4.531818181818182e-05, + "loss": 0.0002, + "step": 9640 + }, + { + "epoch": 54.77, + "eval_accuracy": 1.0, + "eval_loss": 0.0019196458160877228, + "eval_runtime": 125.2988, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 9640 + }, + { + "epoch": 54.83, + "learning_rate": 4.526136363636364e-05, + "loss": 0.0001, + "step": 9650 + }, + { + "epoch": 54.83, + "eval_accuracy": 1.0, + "eval_loss": 0.0019542605150491, + "eval_runtime": 125.1401, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 9650 + }, + { + "epoch": 54.89, + "learning_rate": 4.520454545454545e-05, + "loss": 0.0002, + "step": 9660 + }, + { + "epoch": 54.89, + "eval_accuracy": 1.0, + "eval_loss": 0.0019864842761307955, + "eval_runtime": 125.6693, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 9660 + }, + { + "epoch": 54.94, + "learning_rate": 4.514772727272728e-05, + "loss": 0.0001, + "step": 9670 + }, + { + "epoch": 54.94, + "eval_accuracy": 1.0, + "eval_loss": 0.0019668482709676027, + "eval_runtime": 125.632, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 9670 + }, + { + "epoch": 55.0, + "learning_rate": 4.5090909090909095e-05, + "loss": 0.0001, + "step": 9680 + }, + { + "epoch": 55.0, + "eval_accuracy": 1.0, + "eval_loss": 0.001999635249376297, + "eval_runtime": 125.2212, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 9680 + }, + { + "epoch": 55.06, + "learning_rate": 4.5034090909090907e-05, + "loss": 0.1, + "step": 9690 + }, + { + "epoch": 55.06, + "eval_accuracy": 1.0, + "eval_loss": 8.60186482896097e-05, + "eval_runtime": 124.9634, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 9690 + }, + { + "epoch": 55.11, + "learning_rate": 4.497727272727273e-05, + "loss": 0.0001, + "step": 9700 + }, + { + "epoch": 55.11, + "eval_accuracy": 1.0, + "eval_loss": 8.45007598400116e-05, + "eval_runtime": 125.1595, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 9700 + }, + { + "epoch": 55.17, + "learning_rate": 4.492045454545455e-05, + "loss": 0.0001, + "step": 9710 + }, + { + "epoch": 55.17, + "eval_accuracy": 1.0, + "eval_loss": 0.0005408335709944367, + "eval_runtime": 125.4051, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 9710 + }, + { + "epoch": 55.23, + "learning_rate": 4.486363636363636e-05, + "loss": 0.0001, + "step": 9720 + }, + { + "epoch": 55.23, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.014795326627790928, + "eval_runtime": 125.1797, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 9720 + }, + { + "epoch": 55.28, + "learning_rate": 4.4806818181818186e-05, + "loss": 0.0001, + "step": 9730 + }, + { + "epoch": 55.28, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.024879997596144676, + "eval_runtime": 125.2678, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 9730 + }, + { + "epoch": 55.34, + "learning_rate": 4.4750000000000004e-05, + "loss": 0.0001, + "step": 9740 + }, + { + "epoch": 55.34, + "eval_accuracy": 0.9971590638160706, + "eval_loss": 0.02690892107784748, + "eval_runtime": 125.1451, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 9740 + }, + { + "epoch": 55.4, + "learning_rate": 4.469318181818182e-05, + "loss": 0.0001, + "step": 9750 + }, + { + "epoch": 55.4, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.027611009776592255, + "eval_runtime": 125.1821, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 9750 + }, + { + "epoch": 55.45, + "learning_rate": 4.463636363636364e-05, + "loss": 0.0001, + "step": 9760 + }, + { + "epoch": 55.45, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.028200460597872734, + "eval_runtime": 125.1071, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 9760 + }, + { + "epoch": 55.51, + "learning_rate": 4.457954545454546e-05, + "loss": 0.0001, + "step": 9770 + }, + { + "epoch": 55.51, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.028465455397963524, + "eval_runtime": 124.865, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 9770 + }, + { + "epoch": 55.57, + "learning_rate": 4.4522727272727276e-05, + "loss": 0.0001, + "step": 9780 + }, + { + "epoch": 55.57, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.028566544875502586, + "eval_runtime": 124.9793, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9780 + }, + { + "epoch": 55.62, + "learning_rate": 4.4465909090909094e-05, + "loss": 0.0001, + "step": 9790 + }, + { + "epoch": 55.62, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.028852349147200584, + "eval_runtime": 125.0472, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 9790 + }, + { + "epoch": 55.68, + "learning_rate": 4.440909090909091e-05, + "loss": 0.0001, + "step": 9800 + }, + { + "epoch": 55.68, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.02900678478181362, + "eval_runtime": 125.387, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 9800 + }, + { + "epoch": 55.74, + "learning_rate": 4.435227272727273e-05, + "loss": 0.0001, + "step": 9810 + }, + { + "epoch": 55.74, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.029142946004867554, + "eval_runtime": 125.1206, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 9810 + }, + { + "epoch": 55.8, + "learning_rate": 4.429545454545455e-05, + "loss": 0.0001, + "step": 9820 + }, + { + "epoch": 55.8, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.029204918071627617, + "eval_runtime": 124.9372, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 9820 + }, + { + "epoch": 55.85, + "learning_rate": 4.4238636363636366e-05, + "loss": 0.0008, + "step": 9830 + }, + { + "epoch": 55.85, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.028957514092326164, + "eval_runtime": 125.5304, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 9830 + }, + { + "epoch": 55.91, + "learning_rate": 4.4181818181818184e-05, + "loss": 0.0001, + "step": 9840 + }, + { + "epoch": 55.91, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.027500122785568237, + "eval_runtime": 125.0873, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 9840 + }, + { + "epoch": 55.97, + "learning_rate": 4.4125e-05, + "loss": 0.0001, + "step": 9850 + }, + { + "epoch": 55.97, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.026858974248170853, + "eval_runtime": 125.4227, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 9850 + }, + { + "epoch": 56.02, + "learning_rate": 4.406818181818182e-05, + "loss": 0.0001, + "step": 9860 + }, + { + "epoch": 56.02, + "eval_accuracy": 0.9943181872367859, + "eval_loss": 0.02414500154554844, + "eval_runtime": 125.6916, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 9860 + }, + { + "epoch": 56.08, + "learning_rate": 4.401136363636364e-05, + "loss": 0.0111, + "step": 9870 + }, + { + "epoch": 56.08, + "eval_accuracy": 1.0, + "eval_loss": 8.433515176875517e-05, + "eval_runtime": 125.5652, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 9870 + }, + { + "epoch": 56.14, + "learning_rate": 4.3954545454545456e-05, + "loss": 0.0001, + "step": 9880 + }, + { + "epoch": 56.14, + "eval_accuracy": 1.0, + "eval_loss": 0.00014762309729121625, + "eval_runtime": 125.7903, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.7, + "step": 9880 + }, + { + "epoch": 56.19, + "learning_rate": 4.3897727272727274e-05, + "loss": 0.0084, + "step": 9890 + }, + { + "epoch": 56.19, + "eval_accuracy": 1.0, + "eval_loss": 0.000234335326240398, + "eval_runtime": 125.1548, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 9890 + }, + { + "epoch": 56.25, + "learning_rate": 4.384090909090909e-05, + "loss": 0.0241, + "step": 9900 + }, + { + "epoch": 56.25, + "eval_accuracy": 1.0, + "eval_loss": 0.0002298897015862167, + "eval_runtime": 125.1705, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 9900 + }, + { + "epoch": 56.31, + "learning_rate": 4.378409090909092e-05, + "loss": 0.2374, + "step": 9910 + }, + { + "epoch": 56.31, + "eval_accuracy": 1.0, + "eval_loss": 0.0001003803190542385, + "eval_runtime": 125.765, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 9910 + }, + { + "epoch": 56.36, + "learning_rate": 4.372727272727273e-05, + "loss": 0.0001, + "step": 9920 + }, + { + "epoch": 56.36, + "eval_accuracy": 1.0, + "eval_loss": 0.0001276274269912392, + "eval_runtime": 124.9993, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9920 + }, + { + "epoch": 56.42, + "learning_rate": 4.3670454545454546e-05, + "loss": 0.0001, + "step": 9930 + }, + { + "epoch": 56.42, + "eval_accuracy": 1.0, + "eval_loss": 0.00016014785796869546, + "eval_runtime": 125.8838, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 9930 + }, + { + "epoch": 56.48, + "learning_rate": 4.361363636363637e-05, + "loss": 0.0002, + "step": 9940 + }, + { + "epoch": 56.48, + "eval_accuracy": 1.0, + "eval_loss": 0.00016658684762660414, + "eval_runtime": 125.5118, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 9940 + }, + { + "epoch": 56.53, + "learning_rate": 4.355681818181818e-05, + "loss": 0.0002, + "step": 9950 + }, + { + "epoch": 56.53, + "eval_accuracy": 1.0, + "eval_loss": 0.00015887449262663722, + "eval_runtime": 125.3416, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 9950 + }, + { + "epoch": 56.59, + "learning_rate": 4.35e-05, + "loss": 0.0002, + "step": 9960 + }, + { + "epoch": 56.59, + "eval_accuracy": 1.0, + "eval_loss": 0.00014719671162310988, + "eval_runtime": 125.5348, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 9960 + }, + { + "epoch": 56.65, + "learning_rate": 4.3443181818181825e-05, + "loss": 0.0001, + "step": 9970 + }, + { + "epoch": 56.65, + "eval_accuracy": 1.0, + "eval_loss": 0.00013572383613791317, + "eval_runtime": 124.9778, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 9970 + }, + { + "epoch": 56.7, + "learning_rate": 4.3386363636363636e-05, + "loss": 0.0002, + "step": 9980 + }, + { + "epoch": 56.7, + "eval_accuracy": 1.0, + "eval_loss": 0.00012371722550597042, + "eval_runtime": 125.3066, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 9980 + }, + { + "epoch": 56.76, + "learning_rate": 4.3329545454545454e-05, + "loss": 0.0001, + "step": 9990 + }, + { + "epoch": 56.76, + "eval_accuracy": 1.0, + "eval_loss": 0.00010807270155055448, + "eval_runtime": 125.6185, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 9990 + }, + { + "epoch": 56.82, + "learning_rate": 4.327272727272728e-05, + "loss": 0.0001, + "step": 10000 + }, + { + "epoch": 56.82, + "eval_accuracy": 1.0, + "eval_loss": 0.00010009347170125693, + "eval_runtime": 125.4101, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 10000 + }, + { + "epoch": 56.88, + "learning_rate": 4.321590909090909e-05, + "loss": 0.0001, + "step": 10010 + }, + { + "epoch": 56.88, + "eval_accuracy": 1.0, + "eval_loss": 9.442560985917225e-05, + "eval_runtime": 125.675, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 10010 + }, + { + "epoch": 56.93, + "learning_rate": 4.315909090909091e-05, + "loss": 0.0001, + "step": 10020 + }, + { + "epoch": 56.93, + "eval_accuracy": 1.0, + "eval_loss": 8.971278293756768e-05, + "eval_runtime": 125.3573, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 10020 + }, + { + "epoch": 56.99, + "learning_rate": 4.310227272727273e-05, + "loss": 0.0001, + "step": 10030 + }, + { + "epoch": 56.99, + "eval_accuracy": 1.0, + "eval_loss": 8.597394480602816e-05, + "eval_runtime": 125.4951, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 10030 + }, + { + "epoch": 57.05, + "learning_rate": 4.304545454545455e-05, + "loss": 0.0001, + "step": 10040 + }, + { + "epoch": 57.05, + "eval_accuracy": 1.0, + "eval_loss": 8.212436659960076e-05, + "eval_runtime": 125.2538, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 10040 + }, + { + "epoch": 57.1, + "learning_rate": 4.298863636363636e-05, + "loss": 0.0001, + "step": 10050 + }, + { + "epoch": 57.1, + "eval_accuracy": 1.0, + "eval_loss": 7.894635200500488e-05, + "eval_runtime": 124.9472, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 10050 + }, + { + "epoch": 57.16, + "learning_rate": 4.293181818181819e-05, + "loss": 0.0001, + "step": 10060 + }, + { + "epoch": 57.16, + "eval_accuracy": 1.0, + "eval_loss": 7.629597530467436e-05, + "eval_runtime": 125.1308, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 10060 + }, + { + "epoch": 57.22, + "learning_rate": 4.2875000000000005e-05, + "loss": 0.0001, + "step": 10070 + }, + { + "epoch": 57.22, + "eval_accuracy": 1.0, + "eval_loss": 7.404657662846148e-05, + "eval_runtime": 125.2524, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 10070 + }, + { + "epoch": 57.27, + "learning_rate": 4.281818181818182e-05, + "loss": 0.0001, + "step": 10080 + }, + { + "epoch": 57.27, + "eval_accuracy": 1.0, + "eval_loss": 7.166340947151184e-05, + "eval_runtime": 125.5903, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 10080 + }, + { + "epoch": 57.33, + "learning_rate": 4.276136363636364e-05, + "loss": 0.0001, + "step": 10090 + }, + { + "epoch": 57.33, + "eval_accuracy": 1.0, + "eval_loss": 6.922503962414339e-05, + "eval_runtime": 125.586, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 10090 + }, + { + "epoch": 57.39, + "learning_rate": 4.270454545454546e-05, + "loss": 0.0001, + "step": 10100 + }, + { + "epoch": 57.39, + "eval_accuracy": 1.0, + "eval_loss": 6.703152030240744e-05, + "eval_runtime": 125.1157, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 10100 + }, + { + "epoch": 57.44, + "learning_rate": 4.264772727272727e-05, + "loss": 0.0001, + "step": 10110 + }, + { + "epoch": 57.44, + "eval_accuracy": 1.0, + "eval_loss": 6.490200757980347e-05, + "eval_runtime": 125.3, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 10110 + }, + { + "epoch": 57.5, + "learning_rate": 4.2590909090909096e-05, + "loss": 0.0001, + "step": 10120 + }, + { + "epoch": 57.5, + "eval_accuracy": 1.0, + "eval_loss": 6.303936243057251e-05, + "eval_runtime": 125.1686, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10120 + }, + { + "epoch": 57.56, + "learning_rate": 4.2534090909090914e-05, + "loss": 0.0001, + "step": 10130 + }, + { + "epoch": 57.56, + "eval_accuracy": 1.0, + "eval_loss": 6.129321991465986e-05, + "eval_runtime": 125.0635, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 10130 + }, + { + "epoch": 57.61, + "learning_rate": 4.2477272727272725e-05, + "loss": 0.0001, + "step": 10140 + }, + { + "epoch": 57.61, + "eval_accuracy": 1.0, + "eval_loss": 5.9356403653509915e-05, + "eval_runtime": 125.2384, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 10140 + }, + { + "epoch": 57.67, + "learning_rate": 4.242045454545455e-05, + "loss": 0.0001, + "step": 10150 + }, + { + "epoch": 57.67, + "eval_accuracy": 1.0, + "eval_loss": 5.757537655881606e-05, + "eval_runtime": 125.0676, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 10150 + }, + { + "epoch": 57.73, + "learning_rate": 4.236363636363637e-05, + "loss": 0.0001, + "step": 10160 + }, + { + "epoch": 57.73, + "eval_accuracy": 1.0, + "eval_loss": 5.593117020907812e-05, + "eval_runtime": 125.5307, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 10160 + }, + { + "epoch": 57.78, + "learning_rate": 4.2306818181818186e-05, + "loss": 0.0, + "step": 10170 + }, + { + "epoch": 57.78, + "eval_accuracy": 1.0, + "eval_loss": 5.447729199659079e-05, + "eval_runtime": 125.34, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 10170 + }, + { + "epoch": 57.84, + "learning_rate": 4.2250000000000004e-05, + "loss": 0.0, + "step": 10180 + }, + { + "epoch": 57.84, + "eval_accuracy": 1.0, + "eval_loss": 5.319138654158451e-05, + "eval_runtime": 125.5264, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 10180 + }, + { + "epoch": 57.9, + "learning_rate": 4.219318181818182e-05, + "loss": 0.0001, + "step": 10190 + }, + { + "epoch": 57.9, + "eval_accuracy": 1.0, + "eval_loss": 5.1897357479901984e-05, + "eval_runtime": 125.3808, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 10190 + }, + { + "epoch": 57.95, + "learning_rate": 4.213636363636364e-05, + "loss": 0.0001, + "step": 10200 + }, + { + "epoch": 57.95, + "eval_accuracy": 1.0, + "eval_loss": 5.050816253060475e-05, + "eval_runtime": 126.0439, + "eval_samples_per_second": 2.793, + "eval_steps_per_second": 0.698, + "step": 10200 + }, + { + "epoch": 58.01, + "learning_rate": 4.207954545454546e-05, + "loss": 0.0, + "step": 10210 + }, + { + "epoch": 58.01, + "eval_accuracy": 1.0, + "eval_loss": 4.929744318360463e-05, + "eval_runtime": 125.3028, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 10210 + }, + { + "epoch": 58.07, + "learning_rate": 4.2022727272727276e-05, + "loss": 0.0, + "step": 10220 + }, + { + "epoch": 58.07, + "eval_accuracy": 1.0, + "eval_loss": 4.834580249735154e-05, + "eval_runtime": 125.306, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 10220 + }, + { + "epoch": 58.12, + "learning_rate": 4.1965909090909094e-05, + "loss": 0.0, + "step": 10230 + }, + { + "epoch": 58.12, + "eval_accuracy": 1.0, + "eval_loss": 4.7322700993390754e-05, + "eval_runtime": 125.4006, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 10230 + }, + { + "epoch": 58.18, + "learning_rate": 4.190909090909091e-05, + "loss": 0.0, + "step": 10240 + }, + { + "epoch": 58.18, + "eval_accuracy": 1.0, + "eval_loss": 4.63520955236163e-05, + "eval_runtime": 125.1475, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 10240 + }, + { + "epoch": 58.24, + "learning_rate": 4.185227272727273e-05, + "loss": 0.0, + "step": 10250 + }, + { + "epoch": 58.24, + "eval_accuracy": 1.0, + "eval_loss": 4.534728213911876e-05, + "eval_runtime": 125.3015, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 10250 + }, + { + "epoch": 58.3, + "learning_rate": 4.179545454545455e-05, + "loss": 0.0001, + "step": 10260 + }, + { + "epoch": 58.3, + "eval_accuracy": 1.0, + "eval_loss": 4.42652526544407e-05, + "eval_runtime": 125.4793, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 10260 + }, + { + "epoch": 58.35, + "learning_rate": 4.1738636363636366e-05, + "loss": 0.0, + "step": 10270 + }, + { + "epoch": 58.35, + "eval_accuracy": 1.0, + "eval_loss": 4.3308871681801975e-05, + "eval_runtime": 125.169, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10270 + }, + { + "epoch": 58.41, + "learning_rate": 4.1681818181818184e-05, + "loss": 0.0, + "step": 10280 + }, + { + "epoch": 58.41, + "eval_accuracy": 1.0, + "eval_loss": 4.241344504407607e-05, + "eval_runtime": 125.414, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 10280 + }, + { + "epoch": 58.47, + "learning_rate": 4.1625e-05, + "loss": 0.0, + "step": 10290 + }, + { + "epoch": 58.47, + "eval_accuracy": 1.0, + "eval_loss": 4.163147241342813e-05, + "eval_runtime": 125.202, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 10290 + }, + { + "epoch": 58.52, + "learning_rate": 4.156818181818182e-05, + "loss": 0.0, + "step": 10300 + }, + { + "epoch": 58.52, + "eval_accuracy": 1.0, + "eval_loss": 4.080581493326463e-05, + "eval_runtime": 125.28, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 10300 + }, + { + "epoch": 58.58, + "learning_rate": 4.151136363636364e-05, + "loss": 0.0, + "step": 10310 + }, + { + "epoch": 58.58, + "eval_accuracy": 1.0, + "eval_loss": 4.0010294469539076e-05, + "eval_runtime": 125.6035, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 10310 + }, + { + "epoch": 58.64, + "learning_rate": 4.1454545454545456e-05, + "loss": 0.0, + "step": 10320 + }, + { + "epoch": 58.64, + "eval_accuracy": 1.0, + "eval_loss": 3.9227983506862074e-05, + "eval_runtime": 125.7612, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 10320 + }, + { + "epoch": 58.69, + "learning_rate": 4.1397727272727274e-05, + "loss": 0.0, + "step": 10330 + }, + { + "epoch": 58.69, + "eval_accuracy": 1.0, + "eval_loss": 3.8599766412517056e-05, + "eval_runtime": 125.1141, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 10330 + }, + { + "epoch": 58.75, + "learning_rate": 4.134090909090909e-05, + "loss": 0.0, + "step": 10340 + }, + { + "epoch": 58.75, + "eval_accuracy": 1.0, + "eval_loss": 3.7891619285801426e-05, + "eval_runtime": 125.7195, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 10340 + }, + { + "epoch": 58.81, + "learning_rate": 4.128409090909091e-05, + "loss": 0.0, + "step": 10350 + }, + { + "epoch": 58.81, + "eval_accuracy": 1.0, + "eval_loss": 3.7236648495309055e-05, + "eval_runtime": 125.3661, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 10350 + }, + { + "epoch": 58.86, + "learning_rate": 4.122727272727273e-05, + "loss": 0.0, + "step": 10360 + }, + { + "epoch": 58.86, + "eval_accuracy": 1.0, + "eval_loss": 3.6578287108568475e-05, + "eval_runtime": 125.2624, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 10360 + }, + { + "epoch": 58.92, + "learning_rate": 4.1170454545454546e-05, + "loss": 0.0, + "step": 10370 + }, + { + "epoch": 58.92, + "eval_accuracy": 1.0, + "eval_loss": 3.5993754863739014e-05, + "eval_runtime": 125.5659, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 10370 + }, + { + "epoch": 58.98, + "learning_rate": 4.1113636363636364e-05, + "loss": 0.0, + "step": 10380 + }, + { + "epoch": 58.98, + "eval_accuracy": 1.0, + "eval_loss": 3.537468001013622e-05, + "eval_runtime": 125.7133, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 10380 + }, + { + "epoch": 59.03, + "learning_rate": 4.105681818181818e-05, + "loss": 0.0, + "step": 10390 + }, + { + "epoch": 59.03, + "eval_accuracy": 1.0, + "eval_loss": 3.478235885268077e-05, + "eval_runtime": 125.6233, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 10390 + }, + { + "epoch": 59.09, + "learning_rate": 4.1e-05, + "loss": 0.0, + "step": 10400 + }, + { + "epoch": 59.09, + "eval_accuracy": 1.0, + "eval_loss": 3.4150412830058485e-05, + "eval_runtime": 125.9163, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 10400 + }, + { + "epoch": 59.15, + "learning_rate": 4.094318181818182e-05, + "loss": 0.0, + "step": 10410 + }, + { + "epoch": 59.15, + "eval_accuracy": 1.0, + "eval_loss": 3.35933145834133e-05, + "eval_runtime": 125.2219, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 10410 + }, + { + "epoch": 59.2, + "learning_rate": 4.0886363636363637e-05, + "loss": 0.0, + "step": 10420 + }, + { + "epoch": 59.2, + "eval_accuracy": 1.0, + "eval_loss": 3.311512045911513e-05, + "eval_runtime": 125.4783, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 10420 + }, + { + "epoch": 59.26, + "learning_rate": 4.082954545454546e-05, + "loss": 0.0, + "step": 10430 + }, + { + "epoch": 59.26, + "eval_accuracy": 1.0, + "eval_loss": 3.2620675483485684e-05, + "eval_runtime": 125.688, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 10430 + }, + { + "epoch": 59.32, + "learning_rate": 4.077272727272727e-05, + "loss": 0.0, + "step": 10440 + }, + { + "epoch": 59.32, + "eval_accuracy": 1.0, + "eval_loss": 3.211234070477076e-05, + "eval_runtime": 125.0951, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 10440 + }, + { + "epoch": 59.38, + "learning_rate": 4.071590909090909e-05, + "loss": 0.0, + "step": 10450 + }, + { + "epoch": 59.38, + "eval_accuracy": 1.0, + "eval_loss": 3.1547449907520786e-05, + "eval_runtime": 125.2281, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 10450 + }, + { + "epoch": 59.43, + "learning_rate": 4.0659090909090915e-05, + "loss": 0.0, + "step": 10460 + }, + { + "epoch": 59.43, + "eval_accuracy": 1.0, + "eval_loss": 3.0966984922997653e-05, + "eval_runtime": 125.5878, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 10460 + }, + { + "epoch": 59.49, + "learning_rate": 4.060227272727273e-05, + "loss": 0.0, + "step": 10470 + }, + { + "epoch": 59.49, + "eval_accuracy": 1.0, + "eval_loss": 3.0500306820613332e-05, + "eval_runtime": 125.411, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 10470 + }, + { + "epoch": 59.55, + "learning_rate": 4.0545454545454545e-05, + "loss": 0.0, + "step": 10480 + }, + { + "epoch": 59.55, + "eval_accuracy": 1.0, + "eval_loss": 3.0043111109989695e-05, + "eval_runtime": 125.388, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 10480 + }, + { + "epoch": 59.6, + "learning_rate": 4.048863636363637e-05, + "loss": 0.0, + "step": 10490 + }, + { + "epoch": 59.6, + "eval_accuracy": 1.0, + "eval_loss": 2.9613009246531874e-05, + "eval_runtime": 125.2665, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 10490 + }, + { + "epoch": 59.66, + "learning_rate": 4.043181818181818e-05, + "loss": 0.0, + "step": 10500 + }, + { + "epoch": 59.66, + "eval_accuracy": 1.0, + "eval_loss": 2.9245898986118846e-05, + "eval_runtime": 125.2696, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 10500 + }, + { + "epoch": 59.72, + "learning_rate": 4.0375e-05, + "loss": 0.0, + "step": 10510 + }, + { + "epoch": 59.72, + "eval_accuracy": 1.0, + "eval_loss": 2.8873369956272654e-05, + "eval_runtime": 125.4541, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 10510 + }, + { + "epoch": 59.77, + "learning_rate": 4.0318181818181824e-05, + "loss": 0.0, + "step": 10520 + }, + { + "epoch": 59.77, + "eval_accuracy": 1.0, + "eval_loss": 2.8499825930339284e-05, + "eval_runtime": 125.8082, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 10520 + }, + { + "epoch": 59.83, + "learning_rate": 4.0261363636363635e-05, + "loss": 0.0, + "step": 10530 + }, + { + "epoch": 59.83, + "eval_accuracy": 1.0, + "eval_loss": 2.814287472574506e-05, + "eval_runtime": 126.0852, + "eval_samples_per_second": 2.792, + "eval_steps_per_second": 0.698, + "step": 10530 + }, + { + "epoch": 59.89, + "learning_rate": 4.020454545454545e-05, + "loss": 0.0, + "step": 10540 + }, + { + "epoch": 59.89, + "eval_accuracy": 1.0, + "eval_loss": 2.775408938759938e-05, + "eval_runtime": 125.7529, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 10540 + }, + { + "epoch": 59.94, + "learning_rate": 4.014772727272728e-05, + "loss": 0.0, + "step": 10550 + }, + { + "epoch": 59.94, + "eval_accuracy": 1.0, + "eval_loss": 2.7256593966740184e-05, + "eval_runtime": 125.7906, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.7, + "step": 10550 + }, + { + "epoch": 60.0, + "learning_rate": 4.009090909090909e-05, + "loss": 0.0, + "step": 10560 + }, + { + "epoch": 60.0, + "eval_accuracy": 1.0, + "eval_loss": 2.6843765226658434e-05, + "eval_runtime": 126.0571, + "eval_samples_per_second": 2.792, + "eval_steps_per_second": 0.698, + "step": 10560 + }, + { + "epoch": 60.06, + "learning_rate": 4.003409090909091e-05, + "loss": 0.0, + "step": 10570 + }, + { + "epoch": 60.06, + "eval_accuracy": 1.0, + "eval_loss": 2.646378561621532e-05, + "eval_runtime": 125.2543, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 10570 + }, + { + "epoch": 60.11, + "learning_rate": 3.997727272727273e-05, + "loss": 0.0, + "step": 10580 + }, + { + "epoch": 60.11, + "eval_accuracy": 1.0, + "eval_loss": 2.6106496079592034e-05, + "eval_runtime": 125.1985, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10580 + }, + { + "epoch": 60.17, + "learning_rate": 3.992045454545455e-05, + "loss": 0.0, + "step": 10590 + }, + { + "epoch": 60.17, + "eval_accuracy": 1.0, + "eval_loss": 2.5742772777448408e-05, + "eval_runtime": 125.3636, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 10590 + }, + { + "epoch": 60.23, + "learning_rate": 3.986363636363636e-05, + "loss": 0.0, + "step": 10600 + }, + { + "epoch": 60.23, + "eval_accuracy": 1.0, + "eval_loss": 2.5449151507928036e-05, + "eval_runtime": 125.3643, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 10600 + }, + { + "epoch": 60.28, + "learning_rate": 3.9806818181818186e-05, + "loss": 0.0, + "step": 10610 + }, + { + "epoch": 60.28, + "eval_accuracy": 1.0, + "eval_loss": 2.5061044652829878e-05, + "eval_runtime": 125.1344, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 10610 + }, + { + "epoch": 60.34, + "learning_rate": 3.9750000000000004e-05, + "loss": 0.0, + "step": 10620 + }, + { + "epoch": 60.34, + "eval_accuracy": 1.0, + "eval_loss": 2.4766406568232924e-05, + "eval_runtime": 125.1695, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10620 + }, + { + "epoch": 60.4, + "learning_rate": 3.9693181818181815e-05, + "loss": 0.0, + "step": 10630 + }, + { + "epoch": 60.4, + "eval_accuracy": 1.0, + "eval_loss": 2.444704841764178e-05, + "eval_runtime": 124.8357, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 10630 + }, + { + "epoch": 60.45, + "learning_rate": 3.963636363636364e-05, + "loss": 0.0, + "step": 10640 + }, + { + "epoch": 60.45, + "eval_accuracy": 1.0, + "eval_loss": 2.4160877728718333e-05, + "eval_runtime": 125.2307, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 10640 + }, + { + "epoch": 60.51, + "learning_rate": 3.957954545454546e-05, + "loss": 0.0, + "step": 10650 + }, + { + "epoch": 60.51, + "eval_accuracy": 1.0, + "eval_loss": 2.3877755666035227e-05, + "eval_runtime": 125.5723, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 10650 + }, + { + "epoch": 60.57, + "learning_rate": 3.952272727272727e-05, + "loss": 0.0, + "step": 10660 + }, + { + "epoch": 60.57, + "eval_accuracy": 1.0, + "eval_loss": 2.36271462199511e-05, + "eval_runtime": 126.0718, + "eval_samples_per_second": 2.792, + "eval_steps_per_second": 0.698, + "step": 10660 + }, + { + "epoch": 60.62, + "learning_rate": 3.9465909090909094e-05, + "loss": 0.0, + "step": 10670 + }, + { + "epoch": 60.62, + "eval_accuracy": 1.0, + "eval_loss": 2.3377551769954152e-05, + "eval_runtime": 125.9006, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 10670 + }, + { + "epoch": 60.68, + "learning_rate": 3.940909090909091e-05, + "loss": 0.0, + "step": 10680 + }, + { + "epoch": 60.68, + "eval_accuracy": 1.0, + "eval_loss": 2.3088672605808824e-05, + "eval_runtime": 125.2108, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 10680 + }, + { + "epoch": 60.74, + "learning_rate": 3.9352272727272723e-05, + "loss": 0.0, + "step": 10690 + }, + { + "epoch": 60.74, + "eval_accuracy": 1.0, + "eval_loss": 2.2800808437750675e-05, + "eval_runtime": 124.9181, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 10690 + }, + { + "epoch": 60.8, + "learning_rate": 3.929545454545455e-05, + "loss": 0.0, + "step": 10700 + }, + { + "epoch": 60.8, + "eval_accuracy": 1.0, + "eval_loss": 2.253969978482928e-05, + "eval_runtime": 125.0746, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 10700 + }, + { + "epoch": 60.85, + "learning_rate": 3.9238636363636366e-05, + "loss": 0.0, + "step": 10710 + }, + { + "epoch": 60.85, + "eval_accuracy": 1.0, + "eval_loss": 2.2275204173638485e-05, + "eval_runtime": 125.1162, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 10710 + }, + { + "epoch": 60.91, + "learning_rate": 3.9181818181818184e-05, + "loss": 0.0, + "step": 10720 + }, + { + "epoch": 60.91, + "eval_accuracy": 1.0, + "eval_loss": 2.2017820811015554e-05, + "eval_runtime": 126.029, + "eval_samples_per_second": 2.793, + "eval_steps_per_second": 0.698, + "step": 10720 + }, + { + "epoch": 60.97, + "learning_rate": 3.9125e-05, + "loss": 0.0, + "step": 10730 + }, + { + "epoch": 60.97, + "eval_accuracy": 1.0, + "eval_loss": 2.1747568098362535e-05, + "eval_runtime": 125.8872, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 10730 + }, + { + "epoch": 61.02, + "learning_rate": 3.906818181818182e-05, + "loss": 0.0, + "step": 10740 + }, + { + "epoch": 61.02, + "eval_accuracy": 1.0, + "eval_loss": 2.152879096684046e-05, + "eval_runtime": 125.3713, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 10740 + }, + { + "epoch": 61.08, + "learning_rate": 3.901136363636364e-05, + "loss": 0.0, + "step": 10750 + }, + { + "epoch": 61.08, + "eval_accuracy": 1.0, + "eval_loss": 2.131306246155873e-05, + "eval_runtime": 125.1666, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10750 + }, + { + "epoch": 61.14, + "learning_rate": 3.8954545454545456e-05, + "loss": 0.0, + "step": 10760 + }, + { + "epoch": 61.14, + "eval_accuracy": 1.0, + "eval_loss": 2.1103431208757684e-05, + "eval_runtime": 125.1617, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10760 + }, + { + "epoch": 61.19, + "learning_rate": 3.8897727272727274e-05, + "loss": 0.0, + "step": 10770 + }, + { + "epoch": 61.19, + "eval_accuracy": 1.0, + "eval_loss": 2.0849773136433214e-05, + "eval_runtime": 125.2462, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 10770 + }, + { + "epoch": 61.25, + "learning_rate": 3.884090909090909e-05, + "loss": 0.0, + "step": 10780 + }, + { + "epoch": 61.25, + "eval_accuracy": 1.0, + "eval_loss": 2.060932274616789e-05, + "eval_runtime": 125.1403, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 10780 + }, + { + "epoch": 61.31, + "learning_rate": 3.878409090909091e-05, + "loss": 0.0, + "step": 10790 + }, + { + "epoch": 61.31, + "eval_accuracy": 1.0, + "eval_loss": 2.033365126408171e-05, + "eval_runtime": 125.2227, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 10790 + }, + { + "epoch": 61.36, + "learning_rate": 3.872727272727273e-05, + "loss": 0.0, + "step": 10800 + }, + { + "epoch": 61.36, + "eval_accuracy": 1.0, + "eval_loss": 2.0083378331037238e-05, + "eval_runtime": 126.2669, + "eval_samples_per_second": 2.788, + "eval_steps_per_second": 0.697, + "step": 10800 + }, + { + "epoch": 61.42, + "learning_rate": 3.8670454545454547e-05, + "loss": 0.0, + "step": 10810 + }, + { + "epoch": 61.42, + "eval_accuracy": 1.0, + "eval_loss": 1.9875440557370894e-05, + "eval_runtime": 124.9531, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 10810 + }, + { + "epoch": 61.48, + "learning_rate": 3.8613636363636365e-05, + "loss": 0.0, + "step": 10820 + }, + { + "epoch": 61.48, + "eval_accuracy": 1.0, + "eval_loss": 1.9668177628773265e-05, + "eval_runtime": 125.1503, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 10820 + }, + { + "epoch": 61.53, + "learning_rate": 3.855681818181818e-05, + "loss": 0.0, + "step": 10830 + }, + { + "epoch": 61.53, + "eval_accuracy": 1.0, + "eval_loss": 1.944499854289461e-05, + "eval_runtime": 124.9795, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 10830 + }, + { + "epoch": 61.59, + "learning_rate": 3.85e-05, + "loss": 0.0, + "step": 10840 + }, + { + "epoch": 61.59, + "eval_accuracy": 1.0, + "eval_loss": 1.924891330418177e-05, + "eval_runtime": 125.0198, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 10840 + }, + { + "epoch": 61.65, + "learning_rate": 3.8443181818181826e-05, + "loss": 0.0, + "step": 10850 + }, + { + "epoch": 61.65, + "eval_accuracy": 1.0, + "eval_loss": 1.9060278646065854e-05, + "eval_runtime": 125.5125, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 10850 + }, + { + "epoch": 61.7, + "learning_rate": 3.838636363636364e-05, + "loss": 0.0, + "step": 10860 + }, + { + "epoch": 61.7, + "eval_accuracy": 1.0, + "eval_loss": 1.884116318251472e-05, + "eval_runtime": 124.9285, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 10860 + }, + { + "epoch": 61.76, + "learning_rate": 3.8329545454545455e-05, + "loss": 0.0, + "step": 10870 + }, + { + "epoch": 61.76, + "eval_accuracy": 1.0, + "eval_loss": 1.866776801762171e-05, + "eval_runtime": 125.1732, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10870 + }, + { + "epoch": 61.82, + "learning_rate": 3.827272727272728e-05, + "loss": 0.0, + "step": 10880 + }, + { + "epoch": 61.82, + "eval_accuracy": 1.0, + "eval_loss": 1.8517401258577593e-05, + "eval_runtime": 125.5972, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 10880 + }, + { + "epoch": 61.88, + "learning_rate": 3.821590909090909e-05, + "loss": 0.0, + "step": 10890 + }, + { + "epoch": 61.88, + "eval_accuracy": 1.0, + "eval_loss": 1.835518196457997e-05, + "eval_runtime": 124.8975, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 10890 + }, + { + "epoch": 61.93, + "learning_rate": 3.815909090909091e-05, + "loss": 0.0, + "step": 10900 + }, + { + "epoch": 61.93, + "eval_accuracy": 1.0, + "eval_loss": 1.8149276002077386e-05, + "eval_runtime": 125.52, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 10900 + }, + { + "epoch": 61.99, + "learning_rate": 3.8102272727272734e-05, + "loss": 0.0, + "step": 10910 + }, + { + "epoch": 61.99, + "eval_accuracy": 1.0, + "eval_loss": 1.790476198948454e-05, + "eval_runtime": 125.2621, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 10910 + }, + { + "epoch": 62.05, + "learning_rate": 3.8045454545454545e-05, + "loss": 0.0, + "step": 10920 + }, + { + "epoch": 62.05, + "eval_accuracy": 1.0, + "eval_loss": 1.772797986632213e-05, + "eval_runtime": 125.2724, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 10920 + }, + { + "epoch": 62.1, + "learning_rate": 3.798863636363636e-05, + "loss": 0.0, + "step": 10930 + }, + { + "epoch": 62.1, + "eval_accuracy": 1.0, + "eval_loss": 1.755153607518878e-05, + "eval_runtime": 125.3325, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 10930 + }, + { + "epoch": 62.16, + "learning_rate": 3.793181818181819e-05, + "loss": 0.0, + "step": 10940 + }, + { + "epoch": 62.16, + "eval_accuracy": 1.0, + "eval_loss": 1.740049265208654e-05, + "eval_runtime": 125.9882, + "eval_samples_per_second": 2.794, + "eval_steps_per_second": 0.698, + "step": 10940 + }, + { + "epoch": 62.22, + "learning_rate": 3.7875e-05, + "loss": 0.0, + "step": 10950 + }, + { + "epoch": 62.22, + "eval_accuracy": 1.0, + "eval_loss": 1.7235563063877635e-05, + "eval_runtime": 125.1625, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10950 + }, + { + "epoch": 62.27, + "learning_rate": 3.781818181818182e-05, + "loss": 0.0, + "step": 10960 + }, + { + "epoch": 62.27, + "eval_accuracy": 1.0, + "eval_loss": 1.703236557659693e-05, + "eval_runtime": 125.1755, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 10960 + }, + { + "epoch": 62.33, + "learning_rate": 3.776136363636364e-05, + "loss": 0.0, + "step": 10970 + }, + { + "epoch": 62.33, + "eval_accuracy": 1.0, + "eval_loss": 1.6794285329524428e-05, + "eval_runtime": 125.2721, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 10970 + }, + { + "epoch": 62.39, + "learning_rate": 3.770454545454546e-05, + "loss": 0.0, + "step": 10980 + }, + { + "epoch": 62.39, + "eval_accuracy": 1.0, + "eval_loss": 1.6586347555858083e-05, + "eval_runtime": 125.5375, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 10980 + }, + { + "epoch": 62.44, + "learning_rate": 3.764772727272727e-05, + "loss": 0.0, + "step": 10990 + }, + { + "epoch": 62.44, + "eval_accuracy": 1.0, + "eval_loss": 1.6411258911830373e-05, + "eval_runtime": 125.7002, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 10990 + }, + { + "epoch": 62.5, + "learning_rate": 3.7590909090909096e-05, + "loss": 0.0, + "step": 11000 + }, + { + "epoch": 62.5, + "eval_accuracy": 1.0, + "eval_loss": 1.621517185412813e-05, + "eval_runtime": 126.0349, + "eval_samples_per_second": 2.793, + "eval_steps_per_second": 0.698, + "step": 11000 + }, + { + "epoch": 62.56, + "learning_rate": 3.7534090909090914e-05, + "loss": 0.0, + "step": 11010 + }, + { + "epoch": 62.56, + "eval_accuracy": 1.0, + "eval_loss": 1.6052952560130507e-05, + "eval_runtime": 125.3666, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 11010 + }, + { + "epoch": 62.61, + "learning_rate": 3.7477272727272725e-05, + "loss": 0.0, + "step": 11020 + }, + { + "epoch": 62.61, + "eval_accuracy": 1.0, + "eval_loss": 1.592053558852058e-05, + "eval_runtime": 124.8452, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 11020 + }, + { + "epoch": 62.67, + "learning_rate": 3.742045454545455e-05, + "loss": 0.0, + "step": 11030 + }, + { + "epoch": 62.67, + "eval_accuracy": 1.0, + "eval_loss": 1.5814195648999885e-05, + "eval_runtime": 124.7452, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 11030 + }, + { + "epoch": 62.73, + "learning_rate": 3.736363636363637e-05, + "loss": 0.0, + "step": 11040 + }, + { + "epoch": 62.73, + "eval_accuracy": 1.0, + "eval_loss": 1.566823266330175e-05, + "eval_runtime": 125.5015, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 11040 + }, + { + "epoch": 62.78, + "learning_rate": 3.730681818181818e-05, + "loss": 0.0, + "step": 11050 + }, + { + "epoch": 62.78, + "eval_accuracy": 1.0, + "eval_loss": 1.5536492355749942e-05, + "eval_runtime": 124.8782, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 11050 + }, + { + "epoch": 62.84, + "learning_rate": 3.7250000000000004e-05, + "loss": 0.0, + "step": 11060 + }, + { + "epoch": 62.84, + "eval_accuracy": 1.0, + "eval_loss": 1.5400350093841553e-05, + "eval_runtime": 124.9551, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 11060 + }, + { + "epoch": 62.9, + "learning_rate": 3.719318181818182e-05, + "loss": 0.0, + "step": 11070 + }, + { + "epoch": 62.9, + "eval_accuracy": 1.0, + "eval_loss": 1.5287236237782054e-05, + "eval_runtime": 125.6008, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 11070 + }, + { + "epoch": 62.95, + "learning_rate": 3.7136363636363633e-05, + "loss": 0.0, + "step": 11080 + }, + { + "epoch": 62.95, + "eval_accuracy": 1.0, + "eval_loss": 1.5171075574471615e-05, + "eval_runtime": 125.0267, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 11080 + }, + { + "epoch": 63.01, + "learning_rate": 3.707954545454546e-05, + "loss": 0.0, + "step": 11090 + }, + { + "epoch": 63.01, + "eval_accuracy": 1.0, + "eval_loss": 1.500513099017553e-05, + "eval_runtime": 125.2364, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 11090 + }, + { + "epoch": 63.07, + "learning_rate": 3.7022727272727276e-05, + "loss": 0.0, + "step": 11100 + }, + { + "epoch": 63.07, + "eval_accuracy": 1.0, + "eval_loss": 1.4871020539430901e-05, + "eval_runtime": 125.5001, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 11100 + }, + { + "epoch": 63.12, + "learning_rate": 3.696590909090909e-05, + "loss": 0.0, + "step": 11110 + }, + { + "epoch": 63.12, + "eval_accuracy": 1.0, + "eval_loss": 1.4762309547222685e-05, + "eval_runtime": 125.1975, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 11110 + }, + { + "epoch": 63.18, + "learning_rate": 3.690909090909091e-05, + "loss": 0.0, + "step": 11120 + }, + { + "epoch": 63.18, + "eval_accuracy": 1.0, + "eval_loss": 1.4620410183852073e-05, + "eval_runtime": 125.0984, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 11120 + }, + { + "epoch": 63.24, + "learning_rate": 3.685227272727273e-05, + "loss": 0.0, + "step": 11130 + }, + { + "epoch": 63.24, + "eval_accuracy": 1.0, + "eval_loss": 1.4505602848657873e-05, + "eval_runtime": 125.4931, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 11130 + }, + { + "epoch": 63.3, + "learning_rate": 3.679545454545455e-05, + "loss": 0.0, + "step": 11140 + }, + { + "epoch": 63.3, + "eval_accuracy": 1.0, + "eval_loss": 1.4391473996511195e-05, + "eval_runtime": 125.9531, + "eval_samples_per_second": 2.795, + "eval_steps_per_second": 0.699, + "step": 11140 + }, + { + "epoch": 63.35, + "learning_rate": 3.6738636363636366e-05, + "loss": 0.0, + "step": 11150 + }, + { + "epoch": 63.35, + "eval_accuracy": 1.0, + "eval_loss": 1.4271925465436652e-05, + "eval_runtime": 124.7912, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 11150 + }, + { + "epoch": 63.41, + "learning_rate": 3.6681818181818185e-05, + "loss": 0.0, + "step": 11160 + }, + { + "epoch": 63.41, + "eval_accuracy": 1.0, + "eval_loss": 1.4161182662064675e-05, + "eval_runtime": 124.8795, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 11160 + }, + { + "epoch": 63.47, + "learning_rate": 3.6625e-05, + "loss": 0.0, + "step": 11170 + }, + { + "epoch": 63.47, + "eval_accuracy": 1.0, + "eval_loss": 1.4015558008395601e-05, + "eval_runtime": 125.9193, + "eval_samples_per_second": 2.795, + "eval_steps_per_second": 0.699, + "step": 11170 + }, + { + "epoch": 63.52, + "learning_rate": 3.656818181818182e-05, + "loss": 0.0, + "step": 11180 + }, + { + "epoch": 63.52, + "eval_accuracy": 1.0, + "eval_loss": 1.3866884728486184e-05, + "eval_runtime": 125.1717, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 11180 + }, + { + "epoch": 63.58, + "learning_rate": 3.651136363636364e-05, + "loss": 0.0, + "step": 11190 + }, + { + "epoch": 63.58, + "eval_accuracy": 1.0, + "eval_loss": 1.3740224858338479e-05, + "eval_runtime": 124.9783, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 11190 + }, + { + "epoch": 63.64, + "learning_rate": 3.645454545454546e-05, + "loss": 0.0, + "step": 11200 + }, + { + "epoch": 63.64, + "eval_accuracy": 1.0, + "eval_loss": 1.3598663826996926e-05, + "eval_runtime": 125.4601, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 11200 + }, + { + "epoch": 63.69, + "learning_rate": 3.6397727272727275e-05, + "loss": 0.0, + "step": 11210 + }, + { + "epoch": 63.69, + "eval_accuracy": 1.0, + "eval_loss": 1.3473020771925803e-05, + "eval_runtime": 125.345, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 11210 + }, + { + "epoch": 63.75, + "learning_rate": 3.634090909090909e-05, + "loss": 0.0, + "step": 11220 + }, + { + "epoch": 63.75, + "eval_accuracy": 1.0, + "eval_loss": 1.335177876171656e-05, + "eval_runtime": 125.0847, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 11220 + }, + { + "epoch": 63.81, + "learning_rate": 3.628409090909091e-05, + "loss": 0.0, + "step": 11230 + }, + { + "epoch": 63.81, + "eval_accuracy": 1.0, + "eval_loss": 1.325255107076373e-05, + "eval_runtime": 124.839, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 11230 + }, + { + "epoch": 63.86, + "learning_rate": 3.622727272727273e-05, + "loss": 0.0, + "step": 11240 + }, + { + "epoch": 63.86, + "eval_accuracy": 1.0, + "eval_loss": 1.3150952327123377e-05, + "eval_runtime": 125.2957, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 11240 + }, + { + "epoch": 63.92, + "learning_rate": 3.617045454545455e-05, + "loss": 0.0, + "step": 11250 + }, + { + "epoch": 63.92, + "eval_accuracy": 1.0, + "eval_loss": 1.3038854376645759e-05, + "eval_runtime": 125.2724, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 11250 + }, + { + "epoch": 63.98, + "learning_rate": 3.6113636363636365e-05, + "loss": 0.0, + "step": 11260 + }, + { + "epoch": 63.98, + "eval_accuracy": 1.0, + "eval_loss": 1.2902712114737369e-05, + "eval_runtime": 124.6988, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 11260 + }, + { + "epoch": 64.03, + "learning_rate": 3.605681818181818e-05, + "loss": 0.0, + "step": 11270 + }, + { + "epoch": 64.03, + "eval_accuracy": 1.0, + "eval_loss": 1.2781809346051887e-05, + "eval_runtime": 125.1961, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 11270 + }, + { + "epoch": 64.09, + "learning_rate": 3.6e-05, + "loss": 0.0, + "step": 11280 + }, + { + "epoch": 64.09, + "eval_accuracy": 1.0, + "eval_loss": 1.2672421689785551e-05, + "eval_runtime": 125.2548, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 11280 + }, + { + "epoch": 64.15, + "learning_rate": 3.594318181818182e-05, + "loss": 0.0, + "step": 11290 + }, + { + "epoch": 64.15, + "eval_accuracy": 1.0, + "eval_loss": 1.2578951100294944e-05, + "eval_runtime": 125.4987, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 11290 + }, + { + "epoch": 64.2, + "learning_rate": 3.588636363636364e-05, + "loss": 0.0, + "step": 11300 + }, + { + "epoch": 64.2, + "eval_accuracy": 1.0, + "eval_loss": 1.2470578440115787e-05, + "eval_runtime": 125.7587, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 11300 + }, + { + "epoch": 64.26, + "learning_rate": 3.5829545454545455e-05, + "loss": 0.0, + "step": 11310 + }, + { + "epoch": 64.26, + "eval_accuracy": 1.0, + "eval_loss": 1.2358142157609109e-05, + "eval_runtime": 125.0841, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 11310 + }, + { + "epoch": 64.32, + "learning_rate": 3.577272727272727e-05, + "loss": 0.0, + "step": 11320 + }, + { + "epoch": 64.32, + "eval_accuracy": 1.0, + "eval_loss": 1.224028801516397e-05, + "eval_runtime": 124.8419, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 11320 + }, + { + "epoch": 64.38, + "learning_rate": 3.571590909090909e-05, + "loss": 0.0, + "step": 11330 + }, + { + "epoch": 64.38, + "eval_accuracy": 1.0, + "eval_loss": 1.2129206879762933e-05, + "eval_runtime": 125.1123, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 11330 + }, + { + "epoch": 64.43, + "learning_rate": 3.565909090909091e-05, + "loss": 0.0, + "step": 11340 + }, + { + "epoch": 64.43, + "eval_accuracy": 1.0, + "eval_loss": 1.2011351827823091e-05, + "eval_runtime": 124.9921, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 11340 + }, + { + "epoch": 64.49, + "learning_rate": 3.560227272727273e-05, + "loss": 0.0, + "step": 11350 + }, + { + "epoch": 64.49, + "eval_accuracy": 1.0, + "eval_loss": 1.191280080092838e-05, + "eval_runtime": 125.1028, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 11350 + }, + { + "epoch": 64.55, + "learning_rate": 3.5545454545454545e-05, + "loss": 0.0, + "step": 11360 + }, + { + "epoch": 64.55, + "eval_accuracy": 1.0, + "eval_loss": 1.183253789349692e-05, + "eval_runtime": 124.8547, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 11360 + }, + { + "epoch": 64.6, + "learning_rate": 3.548863636363636e-05, + "loss": 0.0, + "step": 11370 + }, + { + "epoch": 64.6, + "eval_accuracy": 1.0, + "eval_loss": 1.1760063898691442e-05, + "eval_runtime": 123.9413, + "eval_samples_per_second": 2.84, + "eval_steps_per_second": 0.71, + "step": 11370 + }, + { + "epoch": 64.66, + "learning_rate": 3.543181818181818e-05, + "loss": 0.0, + "step": 11380 + }, + { + "epoch": 64.66, + "eval_accuracy": 1.0, + "eval_loss": 1.1670996173052117e-05, + "eval_runtime": 124.4256, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 11380 + }, + { + "epoch": 64.72, + "learning_rate": 3.5375e-05, + "loss": 0.0, + "step": 11390 + }, + { + "epoch": 64.72, + "eval_accuracy": 1.0, + "eval_loss": 1.1577524674066808e-05, + "eval_runtime": 123.873, + "eval_samples_per_second": 2.842, + "eval_steps_per_second": 0.71, + "step": 11390 + }, + { + "epoch": 64.77, + "learning_rate": 3.5318181818181824e-05, + "loss": 0.0, + "step": 11400 + }, + { + "epoch": 64.77, + "eval_accuracy": 1.0, + "eval_loss": 1.148202227341244e-05, + "eval_runtime": 123.6154, + "eval_samples_per_second": 2.848, + "eval_steps_per_second": 0.712, + "step": 11400 + }, + { + "epoch": 64.83, + "learning_rate": 3.5261363636363635e-05, + "loss": 0.0, + "step": 11410 + }, + { + "epoch": 64.83, + "eval_accuracy": 1.0, + "eval_loss": 1.1368570085323881e-05, + "eval_runtime": 123.6317, + "eval_samples_per_second": 2.847, + "eval_steps_per_second": 0.712, + "step": 11410 + }, + { + "epoch": 64.89, + "learning_rate": 3.520454545454545e-05, + "loss": 0.0, + "step": 11420 + }, + { + "epoch": 64.89, + "eval_accuracy": 1.0, + "eval_loss": 1.127001905842917e-05, + "eval_runtime": 123.3901, + "eval_samples_per_second": 2.853, + "eval_steps_per_second": 0.713, + "step": 11420 + }, + { + "epoch": 64.94, + "learning_rate": 3.514772727272728e-05, + "loss": 0.0, + "step": 11430 + }, + { + "epoch": 64.94, + "eval_accuracy": 1.0, + "eval_loss": 1.1195851584488992e-05, + "eval_runtime": 123.4089, + "eval_samples_per_second": 2.852, + "eval_steps_per_second": 0.713, + "step": 11430 + }, + { + "epoch": 65.0, + "learning_rate": 3.509090909090909e-05, + "loss": 0.0, + "step": 11440 + }, + { + "epoch": 65.0, + "eval_accuracy": 1.0, + "eval_loss": 1.1103058568551205e-05, + "eval_runtime": 123.8167, + "eval_samples_per_second": 2.843, + "eval_steps_per_second": 0.711, + "step": 11440 + }, + { + "epoch": 65.06, + "learning_rate": 3.503409090909091e-05, + "loss": 0.0, + "step": 11450 + }, + { + "epoch": 65.06, + "eval_accuracy": 1.0, + "eval_loss": 1.101432917494094e-05, + "eval_runtime": 122.9738, + "eval_samples_per_second": 2.862, + "eval_steps_per_second": 0.716, + "step": 11450 + }, + { + "epoch": 65.11, + "learning_rate": 3.497727272727273e-05, + "loss": 0.0, + "step": 11460 + }, + { + "epoch": 65.11, + "eval_accuracy": 1.0, + "eval_loss": 1.0923567060672212e-05, + "eval_runtime": 123.1488, + "eval_samples_per_second": 2.858, + "eval_steps_per_second": 0.715, + "step": 11460 + }, + { + "epoch": 65.17, + "learning_rate": 3.4920454545454544e-05, + "loss": 0.0, + "step": 11470 + }, + { + "epoch": 65.17, + "eval_accuracy": 1.0, + "eval_loss": 1.083924053091323e-05, + "eval_runtime": 122.8697, + "eval_samples_per_second": 2.865, + "eval_steps_per_second": 0.716, + "step": 11470 + }, + { + "epoch": 65.23, + "learning_rate": 3.486363636363637e-05, + "loss": 0.0, + "step": 11480 + }, + { + "epoch": 65.23, + "eval_accuracy": 1.0, + "eval_loss": 1.0756606570794247e-05, + "eval_runtime": 123.1129, + "eval_samples_per_second": 2.859, + "eval_steps_per_second": 0.715, + "step": 11480 + }, + { + "epoch": 65.28, + "learning_rate": 3.4806818181818186e-05, + "loss": 0.0, + "step": 11490 + }, + { + "epoch": 65.28, + "eval_accuracy": 1.0, + "eval_loss": 1.0676343663362786e-05, + "eval_runtime": 123.2106, + "eval_samples_per_second": 2.857, + "eval_steps_per_second": 0.714, + "step": 11490 + }, + { + "epoch": 65.34, + "learning_rate": 3.475e-05, + "loss": 0.0, + "step": 11500 + }, + { + "epoch": 65.34, + "eval_accuracy": 1.0, + "eval_loss": 1.0604885574139189e-05, + "eval_runtime": 122.5068, + "eval_samples_per_second": 2.873, + "eval_steps_per_second": 0.718, + "step": 11500 + }, + { + "epoch": 65.4, + "learning_rate": 3.469318181818182e-05, + "loss": 0.0, + "step": 11510 + }, + { + "epoch": 65.4, + "eval_accuracy": 1.0, + "eval_loss": 1.0541893971094396e-05, + "eval_runtime": 123.1992, + "eval_samples_per_second": 2.857, + "eval_steps_per_second": 0.714, + "step": 11510 + }, + { + "epoch": 65.45, + "learning_rate": 3.463636363636364e-05, + "loss": 0.0, + "step": 11520 + }, + { + "epoch": 65.45, + "eval_accuracy": 1.0, + "eval_loss": 1.047111345542362e-05, + "eval_runtime": 122.9069, + "eval_samples_per_second": 2.864, + "eval_steps_per_second": 0.716, + "step": 11520 + }, + { + "epoch": 65.51, + "learning_rate": 3.457954545454546e-05, + "loss": 0.0, + "step": 11530 + }, + { + "epoch": 65.51, + "eval_accuracy": 1.0, + "eval_loss": 1.0399994607723784e-05, + "eval_runtime": 123.4069, + "eval_samples_per_second": 2.852, + "eval_steps_per_second": 0.713, + "step": 11530 + }, + { + "epoch": 65.57, + "learning_rate": 3.4522727272727277e-05, + "loss": 0.0, + "step": 11540 + }, + { + "epoch": 65.57, + "eval_accuracy": 1.0, + "eval_loss": 1.0343437679694034e-05, + "eval_runtime": 122.6082, + "eval_samples_per_second": 2.871, + "eval_steps_per_second": 0.718, + "step": 11540 + }, + { + "epoch": 65.62, + "learning_rate": 3.4465909090909095e-05, + "loss": 0.0, + "step": 11550 + }, + { + "epoch": 65.62, + "eval_accuracy": 1.0, + "eval_loss": 1.0288235898769926e-05, + "eval_runtime": 122.768, + "eval_samples_per_second": 2.867, + "eval_steps_per_second": 0.717, + "step": 11550 + }, + { + "epoch": 65.68, + "learning_rate": 3.440909090909091e-05, + "loss": 0.0, + "step": 11560 + }, + { + "epoch": 65.68, + "eval_accuracy": 1.0, + "eval_loss": 1.0222534911008552e-05, + "eval_runtime": 122.7421, + "eval_samples_per_second": 2.868, + "eval_steps_per_second": 0.717, + "step": 11560 + }, + { + "epoch": 65.74, + "learning_rate": 3.435227272727273e-05, + "loss": 0.0, + "step": 11570 + }, + { + "epoch": 65.74, + "eval_accuracy": 1.0, + "eval_loss": 1.0150738489755895e-05, + "eval_runtime": 122.4537, + "eval_samples_per_second": 2.875, + "eval_steps_per_second": 0.719, + "step": 11570 + }, + { + "epoch": 65.8, + "learning_rate": 3.429545454545455e-05, + "loss": 0.0, + "step": 11580 + }, + { + "epoch": 65.8, + "eval_accuracy": 1.0, + "eval_loss": 1.008063554763794e-05, + "eval_runtime": 122.6983, + "eval_samples_per_second": 2.869, + "eval_steps_per_second": 0.717, + "step": 11580 + }, + { + "epoch": 65.85, + "learning_rate": 3.423863636363637e-05, + "loss": 0.0, + "step": 11590 + }, + { + "epoch": 65.85, + "eval_accuracy": 1.0, + "eval_loss": 1.0008839126385283e-05, + "eval_runtime": 122.33, + "eval_samples_per_second": 2.877, + "eval_steps_per_second": 0.719, + "step": 11590 + }, + { + "epoch": 65.91, + "learning_rate": 3.4181818181818185e-05, + "loss": 0.0, + "step": 11600 + }, + { + "epoch": 65.91, + "eval_accuracy": 1.0, + "eval_loss": 9.94550919131143e-06, + "eval_runtime": 122.6642, + "eval_samples_per_second": 2.87, + "eval_steps_per_second": 0.717, + "step": 11600 + }, + { + "epoch": 65.97, + "learning_rate": 3.4125e-05, + "loss": 0.0, + "step": 11610 + }, + { + "epoch": 65.97, + "eval_accuracy": 1.0, + "eval_loss": 9.875744581222534e-06, + "eval_runtime": 122.6718, + "eval_samples_per_second": 2.869, + "eval_steps_per_second": 0.717, + "step": 11610 + }, + { + "epoch": 66.02, + "learning_rate": 3.406818181818182e-05, + "loss": 0.0, + "step": 11620 + }, + { + "epoch": 66.02, + "eval_accuracy": 1.0, + "eval_loss": 9.795142432267312e-06, + "eval_runtime": 122.5052, + "eval_samples_per_second": 2.873, + "eval_steps_per_second": 0.718, + "step": 11620 + }, + { + "epoch": 66.08, + "learning_rate": 3.401136363636364e-05, + "loss": 0.0, + "step": 11630 + }, + { + "epoch": 66.08, + "eval_accuracy": 1.0, + "eval_loss": 9.73249007074628e-06, + "eval_runtime": 122.9666, + "eval_samples_per_second": 2.863, + "eval_steps_per_second": 0.716, + "step": 11630 + }, + { + "epoch": 66.14, + "learning_rate": 3.395454545454546e-05, + "loss": 0.0, + "step": 11640 + }, + { + "epoch": 66.14, + "eval_accuracy": 1.0, + "eval_loss": 9.672207852418069e-06, + "eval_runtime": 122.8602, + "eval_samples_per_second": 2.865, + "eval_steps_per_second": 0.716, + "step": 11640 + }, + { + "epoch": 66.19, + "learning_rate": 3.3897727272727275e-05, + "loss": 0.0, + "step": 11650 + }, + { + "epoch": 66.19, + "eval_accuracy": 1.0, + "eval_loss": 9.59634780883789e-06, + "eval_runtime": 122.9053, + "eval_samples_per_second": 2.864, + "eval_steps_per_second": 0.716, + "step": 11650 + }, + { + "epoch": 66.25, + "learning_rate": 3.384090909090909e-05, + "loss": 0.0, + "step": 11660 + }, + { + "epoch": 66.25, + "eval_accuracy": 1.0, + "eval_loss": 9.508295079285745e-06, + "eval_runtime": 122.909, + "eval_samples_per_second": 2.864, + "eval_steps_per_second": 0.716, + "step": 11660 + }, + { + "epoch": 66.31, + "learning_rate": 3.378409090909091e-05, + "loss": 0.0, + "step": 11670 + }, + { + "epoch": 66.31, + "eval_accuracy": 1.0, + "eval_loss": 9.442256668990012e-06, + "eval_runtime": 122.5621, + "eval_samples_per_second": 2.872, + "eval_steps_per_second": 0.718, + "step": 11670 + }, + { + "epoch": 66.36, + "learning_rate": 3.372727272727273e-05, + "loss": 0.0, + "step": 11680 + }, + { + "epoch": 66.36, + "eval_accuracy": 1.0, + "eval_loss": 9.372152817377355e-06, + "eval_runtime": 122.7691, + "eval_samples_per_second": 2.867, + "eval_steps_per_second": 0.717, + "step": 11680 + }, + { + "epoch": 66.42, + "learning_rate": 3.367045454545455e-05, + "loss": 0.0, + "step": 11690 + }, + { + "epoch": 66.42, + "eval_accuracy": 1.0, + "eval_loss": 9.288164619647432e-06, + "eval_runtime": 122.7698, + "eval_samples_per_second": 2.867, + "eval_steps_per_second": 0.717, + "step": 11690 + }, + { + "epoch": 66.48, + "learning_rate": 3.3613636363636365e-05, + "loss": 0.0, + "step": 11700 + }, + { + "epoch": 66.48, + "eval_accuracy": 1.0, + "eval_loss": 9.215013960783836e-06, + "eval_runtime": 122.7589, + "eval_samples_per_second": 2.867, + "eval_steps_per_second": 0.717, + "step": 11700 + }, + { + "epoch": 66.53, + "learning_rate": 3.355681818181818e-05, + "loss": 0.0, + "step": 11710 + }, + { + "epoch": 66.53, + "eval_accuracy": 1.0, + "eval_loss": 9.158117791230325e-06, + "eval_runtime": 122.9101, + "eval_samples_per_second": 2.864, + "eval_steps_per_second": 0.716, + "step": 11710 + }, + { + "epoch": 66.59, + "learning_rate": 3.35e-05, + "loss": 0.0, + "step": 11720 + }, + { + "epoch": 66.59, + "eval_accuracy": 1.0, + "eval_loss": 9.10698054212844e-06, + "eval_runtime": 124.4663, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 11720 + }, + { + "epoch": 66.65, + "learning_rate": 3.344318181818182e-05, + "loss": 0.0, + "step": 11730 + }, + { + "epoch": 66.65, + "eval_accuracy": 1.0, + "eval_loss": 9.060921911441255e-06, + "eval_runtime": 123.9257, + "eval_samples_per_second": 2.84, + "eval_steps_per_second": 0.71, + "step": 11730 + }, + { + "epoch": 66.7, + "learning_rate": 3.338636363636364e-05, + "loss": 0.0, + "step": 11740 + }, + { + "epoch": 66.7, + "eval_accuracy": 1.0, + "eval_loss": 9.01012299436843e-06, + "eval_runtime": 124.5753, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.706, + "step": 11740 + }, + { + "epoch": 66.76, + "learning_rate": 3.3329545454545455e-05, + "loss": 0.0, + "step": 11750 + }, + { + "epoch": 66.76, + "eval_accuracy": 1.0, + "eval_loss": 8.960000741353724e-06, + "eval_runtime": 124.8554, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 11750 + }, + { + "epoch": 66.82, + "learning_rate": 3.327272727272727e-05, + "loss": 0.0, + "step": 11760 + }, + { + "epoch": 66.82, + "eval_accuracy": 1.0, + "eval_loss": 8.919360880099703e-06, + "eval_runtime": 124.9554, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 11760 + }, + { + "epoch": 66.88, + "learning_rate": 3.321590909090909e-05, + "loss": 0.0, + "step": 11770 + }, + { + "epoch": 66.88, + "eval_accuracy": 1.0, + "eval_loss": 8.87567330210004e-06, + "eval_runtime": 124.8794, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 11770 + }, + { + "epoch": 66.93, + "learning_rate": 3.315909090909091e-05, + "loss": 0.0, + "step": 11780 + }, + { + "epoch": 66.93, + "eval_accuracy": 1.0, + "eval_loss": 8.825890290609095e-06, + "eval_runtime": 124.8126, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 11780 + }, + { + "epoch": 66.99, + "learning_rate": 3.310227272727273e-05, + "loss": 0.0, + "step": 11790 + }, + { + "epoch": 66.99, + "eval_accuracy": 1.0, + "eval_loss": 8.784234523773193e-06, + "eval_runtime": 124.8421, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 11790 + }, + { + "epoch": 67.05, + "learning_rate": 3.3045454545454545e-05, + "loss": 0.0, + "step": 11800 + }, + { + "epoch": 67.05, + "eval_accuracy": 1.0, + "eval_loss": 8.702278137207031e-06, + "eval_runtime": 124.9429, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 11800 + }, + { + "epoch": 67.1, + "learning_rate": 3.2988636363636363e-05, + "loss": 0.0, + "step": 11810 + }, + { + "epoch": 67.1, + "eval_accuracy": 1.0, + "eval_loss": 8.626418093626853e-06, + "eval_runtime": 125.0789, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 11810 + }, + { + "epoch": 67.16, + "learning_rate": 3.293181818181819e-05, + "loss": 0.0, + "step": 11820 + }, + { + "epoch": 67.16, + "eval_accuracy": 1.0, + "eval_loss": 8.550218808522914e-06, + "eval_runtime": 124.685, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 11820 + }, + { + "epoch": 67.22, + "learning_rate": 3.2875e-05, + "loss": 0.0, + "step": 11830 + }, + { + "epoch": 67.22, + "eval_accuracy": 1.0, + "eval_loss": 8.489936590194702e-06, + "eval_runtime": 125.0778, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 11830 + }, + { + "epoch": 67.27, + "learning_rate": 3.281818181818182e-05, + "loss": 0.0, + "step": 11840 + }, + { + "epoch": 67.27, + "eval_accuracy": 1.0, + "eval_loss": 8.439476005150937e-06, + "eval_runtime": 124.8447, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 11840 + }, + { + "epoch": 67.33, + "learning_rate": 3.276136363636364e-05, + "loss": 0.0, + "step": 11850 + }, + { + "epoch": 67.33, + "eval_accuracy": 1.0, + "eval_loss": 8.395450095122214e-06, + "eval_runtime": 124.9646, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 11850 + }, + { + "epoch": 67.39, + "learning_rate": 3.2704545454545454e-05, + "loss": 0.0, + "step": 11860 + }, + { + "epoch": 67.39, + "eval_accuracy": 1.0, + "eval_loss": 8.358873856195714e-06, + "eval_runtime": 124.983, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 11860 + }, + { + "epoch": 67.44, + "learning_rate": 3.264772727272727e-05, + "loss": 0.0, + "step": 11870 + }, + { + "epoch": 67.44, + "eval_accuracy": 1.0, + "eval_loss": 8.339570740645286e-06, + "eval_runtime": 124.7551, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 11870 + }, + { + "epoch": 67.5, + "learning_rate": 3.2590909090909096e-05, + "loss": 0.0, + "step": 11880 + }, + { + "epoch": 67.5, + "eval_accuracy": 1.0, + "eval_loss": 8.278949280793313e-06, + "eval_runtime": 125.4387, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 11880 + }, + { + "epoch": 67.56, + "learning_rate": 3.253409090909091e-05, + "loss": 0.0, + "step": 11890 + }, + { + "epoch": 67.56, + "eval_accuracy": 1.0, + "eval_loss": 8.220699783123564e-06, + "eval_runtime": 125.1784, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 11890 + }, + { + "epoch": 67.61, + "learning_rate": 3.2477272727272726e-05, + "loss": 0.0, + "step": 11900 + }, + { + "epoch": 67.61, + "eval_accuracy": 1.0, + "eval_loss": 8.15330622572219e-06, + "eval_runtime": 124.6566, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 11900 + }, + { + "epoch": 67.67, + "learning_rate": 3.242045454545455e-05, + "loss": 0.0, + "step": 11910 + }, + { + "epoch": 67.67, + "eval_accuracy": 1.0, + "eval_loss": 8.100812920019962e-06, + "eval_runtime": 124.8862, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 11910 + }, + { + "epoch": 67.73, + "learning_rate": 3.236363636363636e-05, + "loss": 0.0, + "step": 11920 + }, + { + "epoch": 67.73, + "eval_accuracy": 1.0, + "eval_loss": 8.061528205871582e-06, + "eval_runtime": 125.4979, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 11920 + }, + { + "epoch": 67.78, + "learning_rate": 3.230681818181818e-05, + "loss": 0.0, + "step": 11930 + }, + { + "epoch": 67.78, + "eval_accuracy": 1.0, + "eval_loss": 8.01648639026098e-06, + "eval_runtime": 125.1711, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 11930 + }, + { + "epoch": 67.84, + "learning_rate": 3.2250000000000005e-05, + "loss": 0.0, + "step": 11940 + }, + { + "epoch": 67.84, + "eval_accuracy": 1.0, + "eval_loss": 7.972798812261317e-06, + "eval_runtime": 125.0023, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 11940 + }, + { + "epoch": 67.9, + "learning_rate": 3.219318181818182e-05, + "loss": 0.0, + "step": 11950 + }, + { + "epoch": 67.9, + "eval_accuracy": 1.0, + "eval_loss": 7.917596121842507e-06, + "eval_runtime": 125.8687, + "eval_samples_per_second": 2.797, + "eval_steps_per_second": 0.699, + "step": 11950 + }, + { + "epoch": 67.95, + "learning_rate": 3.2136363636363634e-05, + "loss": 0.0, + "step": 11960 + }, + { + "epoch": 67.95, + "eval_accuracy": 1.0, + "eval_loss": 7.860700861783698e-06, + "eval_runtime": 125.1761, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 11960 + }, + { + "epoch": 68.01, + "learning_rate": 3.207954545454546e-05, + "loss": 0.0001, + "step": 11970 + }, + { + "epoch": 68.01, + "eval_accuracy": 1.0, + "eval_loss": 5.993653303448809e-06, + "eval_runtime": 124.5436, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.707, + "step": 11970 + }, + { + "epoch": 68.07, + "learning_rate": 3.202272727272728e-05, + "loss": 0.0, + "step": 11980 + }, + { + "epoch": 68.07, + "eval_accuracy": 1.0, + "eval_loss": 4.94413734486443e-06, + "eval_runtime": 124.974, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 11980 + }, + { + "epoch": 68.12, + "learning_rate": 3.196590909090909e-05, + "loss": 0.0, + "step": 11990 + }, + { + "epoch": 68.12, + "eval_accuracy": 1.0, + "eval_loss": 5.0606377044459805e-06, + "eval_runtime": 125.1515, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 11990 + }, + { + "epoch": 68.18, + "learning_rate": 3.190909090909091e-05, + "loss": 0.0, + "step": 12000 + }, + { + "epoch": 68.18, + "eval_accuracy": 1.0, + "eval_loss": 5.1500446716090664e-06, + "eval_runtime": 124.76, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12000 + }, + { + "epoch": 68.24, + "learning_rate": 3.185227272727273e-05, + "loss": 0.0, + "step": 12010 + }, + { + "epoch": 68.24, + "eval_accuracy": 1.0, + "eval_loss": 5.166977643966675e-06, + "eval_runtime": 125.166, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 12010 + }, + { + "epoch": 68.3, + "learning_rate": 3.179545454545454e-05, + "loss": 0.0, + "step": 12020 + }, + { + "epoch": 68.3, + "eval_accuracy": 1.0, + "eval_loss": 4.3101608753204346e-06, + "eval_runtime": 125.1092, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 12020 + }, + { + "epoch": 68.35, + "learning_rate": 3.173863636363637e-05, + "loss": 0.0, + "step": 12030 + }, + { + "epoch": 68.35, + "eval_accuracy": 1.0, + "eval_loss": 3.939663656638004e-06, + "eval_runtime": 124.6523, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 12030 + }, + { + "epoch": 68.41, + "learning_rate": 3.1681818181818185e-05, + "loss": 0.0, + "step": 12040 + }, + { + "epoch": 68.41, + "eval_accuracy": 1.0, + "eval_loss": 3.838742486550473e-06, + "eval_runtime": 124.6895, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 12040 + }, + { + "epoch": 68.47, + "learning_rate": 3.1624999999999996e-05, + "loss": 0.0, + "step": 12050 + }, + { + "epoch": 68.47, + "eval_accuracy": 1.0, + "eval_loss": 3.796748160311836e-06, + "eval_runtime": 124.6623, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 12050 + }, + { + "epoch": 68.52, + "learning_rate": 3.156818181818182e-05, + "loss": 0.0, + "step": 12060 + }, + { + "epoch": 68.52, + "eval_accuracy": 1.0, + "eval_loss": 3.772364379983628e-06, + "eval_runtime": 124.6901, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 12060 + }, + { + "epoch": 68.58, + "learning_rate": 3.151136363636364e-05, + "loss": 0.0, + "step": 12070 + }, + { + "epoch": 68.58, + "eval_accuracy": 1.0, + "eval_loss": 3.7557699670287548e-06, + "eval_runtime": 124.7214, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 12070 + }, + { + "epoch": 68.64, + "learning_rate": 3.145454545454546e-05, + "loss": 0.0, + "step": 12080 + }, + { + "epoch": 68.64, + "eval_accuracy": 1.0, + "eval_loss": 3.736127609954565e-06, + "eval_runtime": 124.8822, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 12080 + }, + { + "epoch": 68.69, + "learning_rate": 3.1397727272727275e-05, + "loss": 0.0, + "step": 12090 + }, + { + "epoch": 68.69, + "eval_accuracy": 1.0, + "eval_loss": 3.7225809137453325e-06, + "eval_runtime": 124.622, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 12090 + }, + { + "epoch": 68.75, + "learning_rate": 3.134090909090909e-05, + "loss": 0.0, + "step": 12100 + }, + { + "epoch": 68.75, + "eval_accuracy": 1.0, + "eval_loss": 3.705647941387724e-06, + "eval_runtime": 124.9784, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 12100 + }, + { + "epoch": 68.81, + "learning_rate": 3.128409090909091e-05, + "loss": 0.0, + "step": 12110 + }, + { + "epoch": 68.81, + "eval_accuracy": 1.0, + "eval_loss": 3.693794724313193e-06, + "eval_runtime": 125.1943, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 12110 + }, + { + "epoch": 68.86, + "learning_rate": 3.122727272727273e-05, + "loss": 0.0, + "step": 12120 + }, + { + "epoch": 68.86, + "eval_accuracy": 1.0, + "eval_loss": 3.6775388707610546e-06, + "eval_runtime": 125.1785, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 12120 + }, + { + "epoch": 68.92, + "learning_rate": 3.117045454545455e-05, + "loss": 0.0, + "step": 12130 + }, + { + "epoch": 68.92, + "eval_accuracy": 1.0, + "eval_loss": 3.6653470942837885e-06, + "eval_runtime": 124.9792, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 12130 + }, + { + "epoch": 68.98, + "learning_rate": 3.1113636363636365e-05, + "loss": 0.0, + "step": 12140 + }, + { + "epoch": 68.98, + "eval_accuracy": 1.0, + "eval_loss": 3.6521391848509666e-06, + "eval_runtime": 125.5315, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 12140 + }, + { + "epoch": 69.03, + "learning_rate": 3.105681818181818e-05, + "loss": 0.0, + "step": 12150 + }, + { + "epoch": 69.03, + "eval_accuracy": 1.0, + "eval_loss": 3.6352059851196827e-06, + "eval_runtime": 125.1741, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 12150 + }, + { + "epoch": 69.09, + "learning_rate": 3.1e-05, + "loss": 0.0, + "step": 12160 + }, + { + "epoch": 69.09, + "eval_accuracy": 1.0, + "eval_loss": 3.614547495089937e-06, + "eval_runtime": 124.7346, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 12160 + }, + { + "epoch": 69.15, + "learning_rate": 3.094318181818182e-05, + "loss": 0.0, + "step": 12170 + }, + { + "epoch": 69.15, + "eval_accuracy": 1.0, + "eval_loss": 3.597275735955918e-06, + "eval_runtime": 125.5683, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 12170 + }, + { + "epoch": 69.2, + "learning_rate": 3.088636363636364e-05, + "loss": 0.0, + "step": 12180 + }, + { + "epoch": 69.2, + "eval_accuracy": 1.0, + "eval_loss": 3.5820360153593356e-06, + "eval_runtime": 125.5462, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 12180 + }, + { + "epoch": 69.26, + "learning_rate": 3.0829545454545455e-05, + "loss": 0.0, + "step": 12190 + }, + { + "epoch": 69.26, + "eval_accuracy": 1.0, + "eval_loss": 3.571198703866685e-06, + "eval_runtime": 125.0, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 12190 + }, + { + "epoch": 69.32, + "learning_rate": 3.0772727272727273e-05, + "loss": 0.0, + "step": 12200 + }, + { + "epoch": 69.32, + "eval_accuracy": 1.0, + "eval_loss": 3.557652235031128e-06, + "eval_runtime": 125.4739, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 12200 + }, + { + "epoch": 69.38, + "learning_rate": 3.071590909090909e-05, + "loss": 0.0, + "step": 12210 + }, + { + "epoch": 69.38, + "eval_accuracy": 1.0, + "eval_loss": 3.5393643429415533e-06, + "eval_runtime": 125.4351, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 12210 + }, + { + "epoch": 69.43, + "learning_rate": 3.065909090909091e-05, + "loss": 0.0, + "step": 12220 + }, + { + "epoch": 69.43, + "eval_accuracy": 1.0, + "eval_loss": 3.522431370583945e-06, + "eval_runtime": 125.645, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 12220 + }, + { + "epoch": 69.49, + "learning_rate": 3.060227272727273e-05, + "loss": 0.0, + "step": 12230 + }, + { + "epoch": 69.49, + "eval_accuracy": 1.0, + "eval_loss": 3.5061755170318065e-06, + "eval_runtime": 125.1534, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 12230 + }, + { + "epoch": 69.55, + "learning_rate": 3.054545454545455e-05, + "loss": 0.0, + "step": 12240 + }, + { + "epoch": 69.55, + "eval_accuracy": 1.0, + "eval_loss": 3.491612915240694e-06, + "eval_runtime": 124.9685, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 12240 + }, + { + "epoch": 69.6, + "learning_rate": 3.0488636363636364e-05, + "loss": 0.0, + "step": 12250 + }, + { + "epoch": 69.6, + "eval_accuracy": 1.0, + "eval_loss": 3.476034407867701e-06, + "eval_runtime": 125.8257, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 12250 + }, + { + "epoch": 69.66, + "learning_rate": 3.0431818181818185e-05, + "loss": 0.0, + "step": 12260 + }, + { + "epoch": 69.66, + "eval_accuracy": 1.0, + "eval_loss": 3.4651973237487255e-06, + "eval_runtime": 124.7945, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12260 + }, + { + "epoch": 69.72, + "learning_rate": 3.0375000000000003e-05, + "loss": 0.0, + "step": 12270 + }, + { + "epoch": 69.72, + "eval_accuracy": 1.0, + "eval_loss": 3.451312068136758e-06, + "eval_runtime": 125.4816, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 12270 + }, + { + "epoch": 69.77, + "learning_rate": 3.0318181818181818e-05, + "loss": 0.0, + "step": 12280 + }, + { + "epoch": 69.77, + "eval_accuracy": 1.0, + "eval_loss": 3.4374270398984663e-06, + "eval_runtime": 125.3671, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 12280 + }, + { + "epoch": 69.83, + "learning_rate": 3.026136363636364e-05, + "loss": 0.0, + "step": 12290 + }, + { + "epoch": 69.83, + "eval_accuracy": 1.0, + "eval_loss": 3.4245576898683794e-06, + "eval_runtime": 125.2126, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 12290 + }, + { + "epoch": 69.89, + "learning_rate": 3.0204545454545457e-05, + "loss": 0.0, + "step": 12300 + }, + { + "epoch": 69.89, + "eval_accuracy": 1.0, + "eval_loss": 3.400174136913847e-06, + "eval_runtime": 125.1816, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 12300 + }, + { + "epoch": 69.94, + "learning_rate": 3.0147727272727272e-05, + "loss": 0.0, + "step": 12310 + }, + { + "epoch": 69.94, + "eval_accuracy": 1.0, + "eval_loss": 3.380192993063247e-06, + "eval_runtime": 125.1294, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 12310 + }, + { + "epoch": 70.0, + "learning_rate": 3.0090909090909093e-05, + "loss": 0.0, + "step": 12320 + }, + { + "epoch": 70.0, + "eval_accuracy": 1.0, + "eval_loss": 3.3659691780485446e-06, + "eval_runtime": 125.3679, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 12320 + }, + { + "epoch": 70.06, + "learning_rate": 3.003409090909091e-05, + "loss": 0.0, + "step": 12330 + }, + { + "epoch": 70.06, + "eval_accuracy": 1.0, + "eval_loss": 3.351406576257432e-06, + "eval_runtime": 125.1455, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 12330 + }, + { + "epoch": 70.11, + "learning_rate": 2.9977272727272726e-05, + "loss": 0.0, + "step": 12340 + }, + { + "epoch": 70.11, + "eval_accuracy": 1.0, + "eval_loss": 3.3392147997801658e-06, + "eval_runtime": 124.8651, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 12340 + }, + { + "epoch": 70.17, + "learning_rate": 2.9920454545454547e-05, + "loss": 0.0, + "step": 12350 + }, + { + "epoch": 70.17, + "eval_accuracy": 1.0, + "eval_loss": 3.326006890347344e-06, + "eval_runtime": 125.1623, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 12350 + }, + { + "epoch": 70.23, + "learning_rate": 2.9863636363636365e-05, + "loss": 0.0, + "step": 12360 + }, + { + "epoch": 70.23, + "eval_accuracy": 1.0, + "eval_loss": 3.312460421511787e-06, + "eval_runtime": 124.942, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 12360 + }, + { + "epoch": 70.28, + "learning_rate": 2.9806818181818187e-05, + "loss": 0.0, + "step": 12370 + }, + { + "epoch": 70.28, + "eval_accuracy": 1.0, + "eval_loss": 3.2918019314820413e-06, + "eval_runtime": 125.0036, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 12370 + }, + { + "epoch": 70.34, + "learning_rate": 2.975e-05, + "loss": 0.0, + "step": 12380 + }, + { + "epoch": 70.34, + "eval_accuracy": 1.0, + "eval_loss": 3.2731754799897317e-06, + "eval_runtime": 124.6135, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 12380 + }, + { + "epoch": 70.4, + "learning_rate": 2.969318181818182e-05, + "loss": 0.0, + "step": 12390 + }, + { + "epoch": 70.4, + "eval_accuracy": 1.0, + "eval_loss": 3.2599675705569098e-06, + "eval_runtime": 125.0854, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 12390 + }, + { + "epoch": 70.45, + "learning_rate": 2.963636363636364e-05, + "loss": 0.0, + "step": 12400 + }, + { + "epoch": 70.45, + "eval_accuracy": 1.0, + "eval_loss": 3.2477757940796437e-06, + "eval_runtime": 125.6964, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 12400 + }, + { + "epoch": 70.51, + "learning_rate": 2.9579545454545456e-05, + "loss": 0.0, + "step": 12410 + }, + { + "epoch": 70.51, + "eval_accuracy": 1.0, + "eval_loss": 3.2355840176023776e-06, + "eval_runtime": 125.1853, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 12410 + }, + { + "epoch": 70.57, + "learning_rate": 2.9522727272727274e-05, + "loss": 0.0, + "step": 12420 + }, + { + "epoch": 70.57, + "eval_accuracy": 1.0, + "eval_loss": 3.2237308005278464e-06, + "eval_runtime": 124.8603, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 12420 + }, + { + "epoch": 70.62, + "learning_rate": 2.9465909090909095e-05, + "loss": 0.0, + "step": 12430 + }, + { + "epoch": 70.62, + "eval_accuracy": 1.0, + "eval_loss": 3.5027887861360796e-06, + "eval_runtime": 124.9337, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 12430 + }, + { + "epoch": 70.68, + "learning_rate": 2.940909090909091e-05, + "loss": 0.0, + "step": 12440 + }, + { + "epoch": 70.68, + "eval_accuracy": 1.0, + "eval_loss": 3.6622989227907965e-06, + "eval_runtime": 124.8342, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 12440 + }, + { + "epoch": 70.74, + "learning_rate": 2.9352272727272728e-05, + "loss": 0.0, + "step": 12450 + }, + { + "epoch": 70.74, + "eval_accuracy": 1.0, + "eval_loss": 3.719532969626016e-06, + "eval_runtime": 124.6828, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 12450 + }, + { + "epoch": 70.8, + "learning_rate": 2.929545454545455e-05, + "loss": 0.0, + "step": 12460 + }, + { + "epoch": 70.8, + "eval_accuracy": 1.0, + "eval_loss": 3.72698355022294e-06, + "eval_runtime": 125.0295, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 12460 + }, + { + "epoch": 70.85, + "learning_rate": 2.9238636363636364e-05, + "loss": 0.0, + "step": 12470 + }, + { + "epoch": 70.85, + "eval_accuracy": 1.0, + "eval_loss": 3.7208876619843068e-06, + "eval_runtime": 124.711, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 12470 + }, + { + "epoch": 70.91, + "learning_rate": 2.9181818181818182e-05, + "loss": 0.0, + "step": 12480 + }, + { + "epoch": 70.91, + "eval_accuracy": 1.0, + "eval_loss": 3.7144529869692633e-06, + "eval_runtime": 125.0418, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 12480 + }, + { + "epoch": 70.97, + "learning_rate": 2.9125000000000003e-05, + "loss": 0.0, + "step": 12490 + }, + { + "epoch": 70.97, + "eval_accuracy": 1.0, + "eval_loss": 3.7025999972684076e-06, + "eval_runtime": 125.0348, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 12490 + }, + { + "epoch": 71.02, + "learning_rate": 2.906818181818182e-05, + "loss": 0.0, + "step": 12500 + }, + { + "epoch": 71.02, + "eval_accuracy": 1.0, + "eval_loss": 3.6924400319549022e-06, + "eval_runtime": 125.0273, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 12500 + }, + { + "epoch": 71.08, + "learning_rate": 2.9011363636363636e-05, + "loss": 0.0, + "step": 12510 + }, + { + "epoch": 71.08, + "eval_accuracy": 1.0, + "eval_loss": 3.674829486044473e-06, + "eval_runtime": 124.7629, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12510 + }, + { + "epoch": 71.14, + "learning_rate": 2.8954545454545457e-05, + "loss": 0.0, + "step": 12520 + }, + { + "epoch": 71.14, + "eval_accuracy": 1.0, + "eval_loss": 3.65925097867148e-06, + "eval_runtime": 124.9206, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 12520 + }, + { + "epoch": 71.19, + "learning_rate": 2.8897727272727275e-05, + "loss": 0.0, + "step": 12530 + }, + { + "epoch": 71.19, + "eval_accuracy": 1.0, + "eval_loss": 3.6443498174776323e-06, + "eval_runtime": 124.4888, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 12530 + }, + { + "epoch": 71.25, + "learning_rate": 2.884090909090909e-05, + "loss": 0.0, + "step": 12540 + }, + { + "epoch": 71.25, + "eval_accuracy": 1.0, + "eval_loss": 3.632158041000366e-06, + "eval_runtime": 125.1146, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 12540 + }, + { + "epoch": 71.31, + "learning_rate": 2.878409090909091e-05, + "loss": 0.0, + "step": 12550 + }, + { + "epoch": 71.31, + "eval_accuracy": 1.0, + "eval_loss": 3.616579533627373e-06, + "eval_runtime": 124.8899, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 12550 + }, + { + "epoch": 71.36, + "learning_rate": 2.872727272727273e-05, + "loss": 0.0, + "step": 12560 + }, + { + "epoch": 71.36, + "eval_accuracy": 1.0, + "eval_loss": 3.602017159209936e-06, + "eval_runtime": 125.3951, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 12560 + }, + { + "epoch": 71.42, + "learning_rate": 2.8670454545454544e-05, + "loss": 0.0, + "step": 12570 + }, + { + "epoch": 71.42, + "eval_accuracy": 1.0, + "eval_loss": 3.5877931168215582e-06, + "eval_runtime": 124.9332, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 12570 + }, + { + "epoch": 71.48, + "learning_rate": 2.8613636363636366e-05, + "loss": 0.0, + "step": 12580 + }, + { + "epoch": 71.48, + "eval_accuracy": 1.0, + "eval_loss": 3.5722148368222406e-06, + "eval_runtime": 126.2249, + "eval_samples_per_second": 2.789, + "eval_steps_per_second": 0.697, + "step": 12580 + }, + { + "epoch": 71.53, + "learning_rate": 2.8556818181818184e-05, + "loss": 0.0, + "step": 12590 + }, + { + "epoch": 71.53, + "eval_accuracy": 1.0, + "eval_loss": 3.555958983270102e-06, + "eval_runtime": 125.313, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 12590 + }, + { + "epoch": 71.59, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.0, + "step": 12600 + }, + { + "epoch": 71.59, + "eval_accuracy": 1.0, + "eval_loss": 3.540719035299844e-06, + "eval_runtime": 125.1849, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 12600 + }, + { + "epoch": 71.65, + "learning_rate": 2.844318181818182e-05, + "loss": 0.0, + "step": 12610 + }, + { + "epoch": 71.65, + "eval_accuracy": 1.0, + "eval_loss": 3.52276992998668e-06, + "eval_runtime": 124.9039, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 12610 + }, + { + "epoch": 71.7, + "learning_rate": 2.8386363636363638e-05, + "loss": 0.0, + "step": 12620 + }, + { + "epoch": 71.7, + "eval_accuracy": 1.0, + "eval_loss": 3.50312757291249e-06, + "eval_runtime": 124.6014, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 12620 + }, + { + "epoch": 71.76, + "learning_rate": 2.832954545454546e-05, + "loss": 0.0, + "step": 12630 + }, + { + "epoch": 71.76, + "eval_accuracy": 1.0, + "eval_loss": 3.4868717193603516e-06, + "eval_runtime": 125.4879, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 12630 + }, + { + "epoch": 71.82, + "learning_rate": 2.8272727272727274e-05, + "loss": 0.0, + "step": 12640 + }, + { + "epoch": 71.82, + "eval_accuracy": 1.0, + "eval_loss": 3.4692611734499224e-06, + "eval_runtime": 125.3984, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 12640 + }, + { + "epoch": 71.88, + "learning_rate": 2.8215909090909092e-05, + "loss": 0.0, + "step": 12650 + }, + { + "epoch": 71.88, + "eval_accuracy": 1.0, + "eval_loss": 3.460456127868383e-06, + "eval_runtime": 124.7626, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12650 + }, + { + "epoch": 71.93, + "learning_rate": 2.8159090909090913e-05, + "loss": 0.0, + "step": 12660 + }, + { + "epoch": 71.93, + "eval_accuracy": 1.0, + "eval_loss": 3.44487762049539e-06, + "eval_runtime": 124.7109, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 12660 + }, + { + "epoch": 71.99, + "learning_rate": 2.8102272727272728e-05, + "loss": 0.0, + "step": 12670 + }, + { + "epoch": 71.99, + "eval_accuracy": 1.0, + "eval_loss": 3.4303150187042775e-06, + "eval_runtime": 124.761, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12670 + }, + { + "epoch": 72.05, + "learning_rate": 2.8045454545454546e-05, + "loss": 0.0, + "step": 12680 + }, + { + "epoch": 72.05, + "eval_accuracy": 1.0, + "eval_loss": 3.419816493988037e-06, + "eval_runtime": 124.7441, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 12680 + }, + { + "epoch": 72.1, + "learning_rate": 2.7988636363636367e-05, + "loss": 0.0, + "step": 12690 + }, + { + "epoch": 72.1, + "eval_accuracy": 1.0, + "eval_loss": 3.4110112210328225e-06, + "eval_runtime": 125.0166, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 12690 + }, + { + "epoch": 72.16, + "learning_rate": 2.7931818181818182e-05, + "loss": 0.0, + "step": 12700 + }, + { + "epoch": 72.16, + "eval_accuracy": 1.0, + "eval_loss": 3.3967874060181202e-06, + "eval_runtime": 125.0258, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 12700 + }, + { + "epoch": 72.22, + "learning_rate": 2.7875e-05, + "loss": 0.0, + "step": 12710 + }, + { + "epoch": 72.22, + "eval_accuracy": 1.0, + "eval_loss": 3.38628888130188e-06, + "eval_runtime": 125.0682, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 12710 + }, + { + "epoch": 72.27, + "learning_rate": 2.781818181818182e-05, + "loss": 0.0, + "step": 12720 + }, + { + "epoch": 72.27, + "eval_accuracy": 1.0, + "eval_loss": 3.3700330277497414e-06, + "eval_runtime": 125.1209, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 12720 + }, + { + "epoch": 72.33, + "learning_rate": 2.7761363636363636e-05, + "loss": 0.0, + "step": 12730 + }, + { + "epoch": 72.33, + "eval_accuracy": 1.0, + "eval_loss": 3.3598732898099115e-06, + "eval_runtime": 125.2479, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 12730 + }, + { + "epoch": 72.39, + "learning_rate": 2.7704545454545454e-05, + "loss": 0.0, + "step": 12740 + }, + { + "epoch": 72.39, + "eval_accuracy": 1.0, + "eval_loss": 3.3466653803770896e-06, + "eval_runtime": 124.76, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12740 + }, + { + "epoch": 72.44, + "learning_rate": 2.7647727272727275e-05, + "loss": 0.0, + "step": 12750 + }, + { + "epoch": 72.44, + "eval_accuracy": 1.0, + "eval_loss": 3.335828068884439e-06, + "eval_runtime": 125.3666, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 12750 + }, + { + "epoch": 72.5, + "learning_rate": 2.759090909090909e-05, + "loss": 0.0, + "step": 12760 + }, + { + "epoch": 72.5, + "eval_accuracy": 1.0, + "eval_loss": 3.325668330944609e-06, + "eval_runtime": 124.7933, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12760 + }, + { + "epoch": 72.56, + "learning_rate": 2.7534090909090908e-05, + "loss": 0.0, + "step": 12770 + }, + { + "epoch": 72.56, + "eval_accuracy": 1.0, + "eval_loss": 3.3111057291534962e-06, + "eval_runtime": 125.0498, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 12770 + }, + { + "epoch": 72.61, + "learning_rate": 2.747727272727273e-05, + "loss": 0.0, + "step": 12780 + }, + { + "epoch": 72.61, + "eval_accuracy": 1.0, + "eval_loss": 3.2999298582581105e-06, + "eval_runtime": 124.5415, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.707, + "step": 12780 + }, + { + "epoch": 72.67, + "learning_rate": 2.7420454545454548e-05, + "loss": 0.0, + "step": 12790 + }, + { + "epoch": 72.67, + "eval_accuracy": 1.0, + "eval_loss": 3.28943133354187e-06, + "eval_runtime": 125.2676, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 12790 + }, + { + "epoch": 72.73, + "learning_rate": 2.7363636363636362e-05, + "loss": 0.0, + "step": 12800 + }, + { + "epoch": 72.73, + "eval_accuracy": 1.0, + "eval_loss": 3.277239557064604e-06, + "eval_runtime": 124.8424, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 12800 + }, + { + "epoch": 72.78, + "learning_rate": 2.7306818181818184e-05, + "loss": 0.0, + "step": 12810 + }, + { + "epoch": 72.78, + "eval_accuracy": 1.0, + "eval_loss": 3.267756937930244e-06, + "eval_runtime": 124.5888, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 12810 + }, + { + "epoch": 72.84, + "learning_rate": 2.725e-05, + "loss": 0.0, + "step": 12820 + }, + { + "epoch": 72.84, + "eval_accuracy": 1.0, + "eval_loss": 3.2528557767363964e-06, + "eval_runtime": 124.7775, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12820 + }, + { + "epoch": 72.9, + "learning_rate": 2.7193181818181816e-05, + "loss": 0.0, + "step": 12830 + }, + { + "epoch": 72.9, + "eval_accuracy": 1.0, + "eval_loss": 3.2376158287661383e-06, + "eval_runtime": 124.8136, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 12830 + }, + { + "epoch": 72.95, + "learning_rate": 2.7136363636363638e-05, + "loss": 0.0, + "step": 12840 + }, + { + "epoch": 72.95, + "eval_accuracy": 1.0, + "eval_loss": 3.2240693599305814e-06, + "eval_runtime": 124.8748, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 12840 + }, + { + "epoch": 73.01, + "learning_rate": 2.7079545454545456e-05, + "loss": 0.0, + "step": 12850 + }, + { + "epoch": 73.01, + "eval_accuracy": 1.0, + "eval_loss": 3.2122161428560503e-06, + "eval_runtime": 124.7348, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 12850 + }, + { + "epoch": 73.07, + "learning_rate": 2.702272727272727e-05, + "loss": 0.0, + "step": 12860 + }, + { + "epoch": 73.07, + "eval_accuracy": 1.0, + "eval_loss": 3.1986696740204934e-06, + "eval_runtime": 125.2576, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 12860 + }, + { + "epoch": 73.12, + "learning_rate": 2.6965909090909092e-05, + "loss": 0.0, + "step": 12870 + }, + { + "epoch": 73.12, + "eval_accuracy": 1.0, + "eval_loss": 3.1830911666475004e-06, + "eval_runtime": 125.0463, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 12870 + }, + { + "epoch": 73.18, + "learning_rate": 2.6909090909090913e-05, + "loss": 0.0, + "step": 12880 + }, + { + "epoch": 73.18, + "eval_accuracy": 1.0, + "eval_loss": 3.1732699881104054e-06, + "eval_runtime": 124.7702, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 12880 + }, + { + "epoch": 73.24, + "learning_rate": 2.6852272727272725e-05, + "loss": 0.0, + "step": 12890 + }, + { + "epoch": 73.24, + "eval_accuracy": 1.0, + "eval_loss": 3.1614167710358743e-06, + "eval_runtime": 124.7178, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 12890 + }, + { + "epoch": 73.3, + "learning_rate": 2.6795454545454546e-05, + "loss": 0.0, + "step": 12900 + }, + { + "epoch": 73.3, + "eval_accuracy": 1.0, + "eval_loss": 3.1512570330960443e-06, + "eval_runtime": 124.8475, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 12900 + }, + { + "epoch": 73.35, + "learning_rate": 2.6738636363636367e-05, + "loss": 0.0, + "step": 12910 + }, + { + "epoch": 73.35, + "eval_accuracy": 1.0, + "eval_loss": 3.138726469842368e-06, + "eval_runtime": 125.0222, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 12910 + }, + { + "epoch": 73.41, + "learning_rate": 2.6681818181818185e-05, + "loss": 0.0, + "step": 12920 + }, + { + "epoch": 73.41, + "eval_accuracy": 1.0, + "eval_loss": 3.1295824101107428e-06, + "eval_runtime": 125.0198, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 12920 + }, + { + "epoch": 73.47, + "learning_rate": 2.6625e-05, + "loss": 0.0, + "step": 12930 + }, + { + "epoch": 73.47, + "eval_accuracy": 1.0, + "eval_loss": 3.1167132874543313e-06, + "eval_runtime": 125.007, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 12930 + }, + { + "epoch": 73.52, + "learning_rate": 2.656818181818182e-05, + "loss": 0.0, + "step": 12940 + }, + { + "epoch": 73.52, + "eval_accuracy": 1.0, + "eval_loss": 3.1035053780215094e-06, + "eval_runtime": 125.3978, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 12940 + }, + { + "epoch": 73.58, + "learning_rate": 2.651136363636364e-05, + "loss": 0.0, + "step": 12950 + }, + { + "epoch": 73.58, + "eval_accuracy": 1.0, + "eval_loss": 3.0923295071261236e-06, + "eval_runtime": 124.9489, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 12950 + }, + { + "epoch": 73.64, + "learning_rate": 2.6454545454545454e-05, + "loss": 0.0, + "step": 12960 + }, + { + "epoch": 73.64, + "eval_accuracy": 1.0, + "eval_loss": 3.08420180772373e-06, + "eval_runtime": 124.6355, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 12960 + }, + { + "epoch": 73.69, + "learning_rate": 2.6397727272727276e-05, + "loss": 0.0, + "step": 12970 + }, + { + "epoch": 73.69, + "eval_accuracy": 1.0, + "eval_loss": 3.0720098038727883e-06, + "eval_runtime": 124.6643, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 12970 + }, + { + "epoch": 73.75, + "learning_rate": 2.6340909090909094e-05, + "loss": 0.0, + "step": 12980 + }, + { + "epoch": 73.75, + "eval_accuracy": 1.0, + "eval_loss": 3.0618500659329584e-06, + "eval_runtime": 124.6916, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 12980 + }, + { + "epoch": 73.81, + "learning_rate": 2.6284090909090908e-05, + "loss": 0.0, + "step": 12990 + }, + { + "epoch": 73.81, + "eval_accuracy": 1.0, + "eval_loss": 3.0489807159028715e-06, + "eval_runtime": 125.1246, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 12990 + }, + { + "epoch": 73.86, + "learning_rate": 2.622727272727273e-05, + "loss": 0.0, + "step": 13000 + }, + { + "epoch": 73.86, + "eval_accuracy": 1.0, + "eval_loss": 3.0408527891268022e-06, + "eval_runtime": 124.7159, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 13000 + }, + { + "epoch": 73.92, + "learning_rate": 2.6170454545454548e-05, + "loss": 0.0, + "step": 13010 + }, + { + "epoch": 73.92, + "eval_accuracy": 1.0, + "eval_loss": 3.0293383588286815e-06, + "eval_runtime": 125.1642, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 13010 + }, + { + "epoch": 73.98, + "learning_rate": 2.6113636363636362e-05, + "loss": 0.0, + "step": 13020 + }, + { + "epoch": 73.98, + "eval_accuracy": 1.0, + "eval_loss": 3.0212104320526123e-06, + "eval_runtime": 124.5966, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 13020 + }, + { + "epoch": 74.03, + "learning_rate": 2.6056818181818184e-05, + "loss": 0.0, + "step": 13030 + }, + { + "epoch": 74.03, + "eval_accuracy": 1.0, + "eval_loss": 3.013082505276543e-06, + "eval_runtime": 124.9897, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 13030 + }, + { + "epoch": 74.09, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.0, + "step": 13040 + }, + { + "epoch": 74.09, + "eval_accuracy": 1.0, + "eval_loss": 3.0015680749784224e-06, + "eval_runtime": 125.1209, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 13040 + }, + { + "epoch": 74.15, + "learning_rate": 2.5943181818181823e-05, + "loss": 0.0, + "step": 13050 + }, + { + "epoch": 74.15, + "eval_accuracy": 1.0, + "eval_loss": 2.994794840560644e-06, + "eval_runtime": 125.227, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 13050 + }, + { + "epoch": 74.2, + "learning_rate": 2.5886363636363638e-05, + "loss": 0.0, + "step": 13060 + }, + { + "epoch": 74.2, + "eval_accuracy": 1.0, + "eval_loss": 2.9805707981722662e-06, + "eval_runtime": 125.7571, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 13060 + }, + { + "epoch": 74.26, + "learning_rate": 2.5829545454545456e-05, + "loss": 0.0, + "step": 13070 + }, + { + "epoch": 74.26, + "eval_accuracy": 1.0, + "eval_loss": 2.9670243293367093e-06, + "eval_runtime": 124.5232, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 13070 + }, + { + "epoch": 74.32, + "learning_rate": 2.5772727272727277e-05, + "loss": 0.0, + "step": 13080 + }, + { + "epoch": 74.32, + "eval_accuracy": 1.0, + "eval_loss": 2.957541937576025e-06, + "eval_runtime": 124.6484, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 13080 + }, + { + "epoch": 74.38, + "learning_rate": 2.5715909090909092e-05, + "loss": 0.0, + "step": 13090 + }, + { + "epoch": 74.38, + "eval_accuracy": 1.0, + "eval_loss": 2.9497525702026905e-06, + "eval_runtime": 125.4475, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 13090 + }, + { + "epoch": 74.43, + "learning_rate": 2.565909090909091e-05, + "loss": 0.0, + "step": 13100 + }, + { + "epoch": 74.43, + "eval_accuracy": 1.0, + "eval_loss": 2.943317895187647e-06, + "eval_runtime": 124.7941, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 13100 + }, + { + "epoch": 74.49, + "learning_rate": 2.560227272727273e-05, + "loss": 0.0, + "step": 13110 + }, + { + "epoch": 74.49, + "eval_accuracy": 1.0, + "eval_loss": 2.9318034648895264e-06, + "eval_runtime": 125.3529, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 13110 + }, + { + "epoch": 74.55, + "learning_rate": 2.5545454545454546e-05, + "loss": 0.0, + "step": 13120 + }, + { + "epoch": 74.55, + "eval_accuracy": 1.0, + "eval_loss": 2.9199502478149952e-06, + "eval_runtime": 125.2707, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 13120 + }, + { + "epoch": 74.6, + "learning_rate": 2.5488636363636364e-05, + "loss": 0.0, + "step": 13130 + }, + { + "epoch": 74.6, + "eval_accuracy": 1.0, + "eval_loss": 2.9087743769196095e-06, + "eval_runtime": 124.5659, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.706, + "step": 13130 + }, + { + "epoch": 74.66, + "learning_rate": 2.5431818181818186e-05, + "loss": 0.0, + "step": 13140 + }, + { + "epoch": 74.66, + "eval_accuracy": 1.0, + "eval_loss": 2.8972599466214888e-06, + "eval_runtime": 124.4654, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 13140 + }, + { + "epoch": 74.72, + "learning_rate": 2.5375e-05, + "loss": 0.0, + "step": 13150 + }, + { + "epoch": 74.72, + "eval_accuracy": 1.0, + "eval_loss": 2.882697344830376e-06, + "eval_runtime": 124.945, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 13150 + }, + { + "epoch": 74.77, + "learning_rate": 2.5318181818181818e-05, + "loss": 0.0, + "step": 13160 + }, + { + "epoch": 74.77, + "eval_accuracy": 1.0, + "eval_loss": 2.8677961836365284e-06, + "eval_runtime": 124.9689, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 13160 + }, + { + "epoch": 74.83, + "learning_rate": 2.526136363636364e-05, + "loss": 0.0, + "step": 13170 + }, + { + "epoch": 74.83, + "eval_accuracy": 1.0, + "eval_loss": 2.855265620382852e-06, + "eval_runtime": 125.1035, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 13170 + }, + { + "epoch": 74.89, + "learning_rate": 2.5204545454545458e-05, + "loss": 0.0, + "step": 13180 + }, + { + "epoch": 74.89, + "eval_accuracy": 1.0, + "eval_loss": 2.843073843905586e-06, + "eval_runtime": 124.7213, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 13180 + }, + { + "epoch": 74.94, + "learning_rate": 2.5147727272727272e-05, + "loss": 0.0, + "step": 13190 + }, + { + "epoch": 74.94, + "eval_accuracy": 1.0, + "eval_loss": 2.832236532412935e-06, + "eval_runtime": 124.7745, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 13190 + }, + { + "epoch": 75.0, + "learning_rate": 2.5090909090909094e-05, + "loss": 0.0, + "step": 13200 + }, + { + "epoch": 75.0, + "eval_accuracy": 1.0, + "eval_loss": 2.8207221021148143e-06, + "eval_runtime": 124.9796, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 13200 + }, + { + "epoch": 75.06, + "learning_rate": 2.5034090909090912e-05, + "loss": 0.0, + "step": 13210 + }, + { + "epoch": 75.06, + "eval_accuracy": 1.0, + "eval_loss": 2.81225561593601e-06, + "eval_runtime": 125.176, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 13210 + }, + { + "epoch": 75.11, + "learning_rate": 2.497727272727273e-05, + "loss": 0.0, + "step": 13220 + }, + { + "epoch": 75.11, + "eval_accuracy": 1.0, + "eval_loss": 2.804466248562676e-06, + "eval_runtime": 124.9828, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 13220 + }, + { + "epoch": 75.17, + "learning_rate": 2.4920454545454548e-05, + "loss": 0.0, + "step": 13230 + }, + { + "epoch": 75.17, + "eval_accuracy": 1.0, + "eval_loss": 2.7936291644437006e-06, + "eval_runtime": 125.0789, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 13230 + }, + { + "epoch": 75.23, + "learning_rate": 2.4863636363636362e-05, + "loss": 0.0, + "step": 13240 + }, + { + "epoch": 75.23, + "eval_accuracy": 1.0, + "eval_loss": 2.7817759473691694e-06, + "eval_runtime": 125.3488, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 13240 + }, + { + "epoch": 75.28, + "learning_rate": 2.4806818181818184e-05, + "loss": 0.0, + "step": 13250 + }, + { + "epoch": 75.28, + "eval_accuracy": 1.0, + "eval_loss": 2.7689065973390825e-06, + "eval_runtime": 125.2436, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 13250 + }, + { + "epoch": 75.34, + "learning_rate": 2.4750000000000002e-05, + "loss": 0.0, + "step": 13260 + }, + { + "epoch": 75.34, + "eval_accuracy": 1.0, + "eval_loss": 2.7628107091004495e-06, + "eval_runtime": 125.4938, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 13260 + }, + { + "epoch": 75.4, + "learning_rate": 2.4693181818181817e-05, + "loss": 0.0, + "step": 13270 + }, + { + "epoch": 75.4, + "eval_accuracy": 1.0, + "eval_loss": 2.7570533802645514e-06, + "eval_runtime": 125.8671, + "eval_samples_per_second": 2.797, + "eval_steps_per_second": 0.699, + "step": 13270 + }, + { + "epoch": 75.45, + "learning_rate": 2.4636363636363638e-05, + "loss": 0.0, + "step": 13280 + }, + { + "epoch": 75.45, + "eval_accuracy": 1.0, + "eval_loss": 2.7492642402648926e-06, + "eval_runtime": 125.0894, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 13280 + }, + { + "epoch": 75.51, + "learning_rate": 2.4579545454545456e-05, + "loss": 0.0, + "step": 13290 + }, + { + "epoch": 75.51, + "eval_accuracy": 1.0, + "eval_loss": 2.7414751002652338e-06, + "eval_runtime": 124.8015, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 13290 + }, + { + "epoch": 75.57, + "learning_rate": 2.4522727272727274e-05, + "loss": 0.0, + "step": 13300 + }, + { + "epoch": 75.57, + "eval_accuracy": 1.0, + "eval_loss": 2.7323310405336088e-06, + "eval_runtime": 125.3908, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 13300 + }, + { + "epoch": 75.62, + "learning_rate": 2.4465909090909092e-05, + "loss": 0.0, + "step": 13310 + }, + { + "epoch": 75.62, + "eval_accuracy": 1.0, + "eval_loss": 2.7242031137575395e-06, + "eval_runtime": 125.058, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 13310 + }, + { + "epoch": 75.68, + "learning_rate": 2.440909090909091e-05, + "loss": 0.0, + "step": 13320 + }, + { + "epoch": 75.68, + "eval_accuracy": 1.0, + "eval_loss": 2.7086246063845465e-06, + "eval_runtime": 124.8955, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 13320 + }, + { + "epoch": 75.74, + "learning_rate": 2.4352272727272728e-05, + "loss": 0.0, + "step": 13330 + }, + { + "epoch": 75.74, + "eval_accuracy": 1.0, + "eval_loss": 2.6988034278474515e-06, + "eval_runtime": 125.0881, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 13330 + }, + { + "epoch": 75.8, + "learning_rate": 2.4295454545454546e-05, + "loss": 0.0, + "step": 13340 + }, + { + "epoch": 75.8, + "eval_accuracy": 1.0, + "eval_loss": 2.689321036086767e-06, + "eval_runtime": 124.7833, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 13340 + }, + { + "epoch": 75.85, + "learning_rate": 2.4238636363636368e-05, + "loss": 0.0, + "step": 13350 + }, + { + "epoch": 75.85, + "eval_accuracy": 1.0, + "eval_loss": 2.701851371966768e-06, + "eval_runtime": 125.6804, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 13350 + }, + { + "epoch": 75.91, + "learning_rate": 2.4181818181818182e-05, + "loss": 0.0, + "step": 13360 + }, + { + "epoch": 75.91, + "eval_accuracy": 1.0, + "eval_loss": 2.701512812564033e-06, + "eval_runtime": 125.4014, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 13360 + }, + { + "epoch": 75.97, + "learning_rate": 2.4125e-05, + "loss": 0.0, + "step": 13370 + }, + { + "epoch": 75.97, + "eval_accuracy": 1.0, + "eval_loss": 2.6954169243254e-06, + "eval_runtime": 125.8205, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 13370 + }, + { + "epoch": 76.02, + "learning_rate": 2.406818181818182e-05, + "loss": 0.0, + "step": 13380 + }, + { + "epoch": 76.02, + "eval_accuracy": 1.0, + "eval_loss": 2.687966343728476e-06, + "eval_runtime": 125.413, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 13380 + }, + { + "epoch": 76.08, + "learning_rate": 2.4011363636363636e-05, + "loss": 0.0, + "step": 13390 + }, + { + "epoch": 76.08, + "eval_accuracy": 1.0, + "eval_loss": 2.6811931093106978e-06, + "eval_runtime": 124.8639, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 13390 + }, + { + "epoch": 76.14, + "learning_rate": 2.3954545454545454e-05, + "loss": 0.0, + "step": 13400 + }, + { + "epoch": 76.14, + "eval_accuracy": 1.0, + "eval_loss": 2.672387836355483e-06, + "eval_runtime": 125.408, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 13400 + }, + { + "epoch": 76.19, + "learning_rate": 2.3897727272727276e-05, + "loss": 0.0, + "step": 13410 + }, + { + "epoch": 76.19, + "eval_accuracy": 1.0, + "eval_loss": 2.6632440039975336e-06, + "eval_runtime": 124.9976, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 13410 + }, + { + "epoch": 76.25, + "learning_rate": 2.384090909090909e-05, + "loss": 0.0, + "step": 13420 + }, + { + "epoch": 76.25, + "eval_accuracy": 1.0, + "eval_loss": 2.6588413675199263e-06, + "eval_runtime": 125.0954, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 13420 + }, + { + "epoch": 76.31, + "learning_rate": 2.3784090909090912e-05, + "loss": 0.0, + "step": 13430 + }, + { + "epoch": 76.31, + "eval_accuracy": 1.0, + "eval_loss": 2.6476654966245405e-06, + "eval_runtime": 125.48, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 13430 + }, + { + "epoch": 76.36, + "learning_rate": 2.372727272727273e-05, + "loss": 0.0, + "step": 13440 + }, + { + "epoch": 76.36, + "eval_accuracy": 1.0, + "eval_loss": 2.638860223669326e-06, + "eval_runtime": 125.5905, + "eval_samples_per_second": 2.803, + "eval_steps_per_second": 0.701, + "step": 13440 + }, + { + "epoch": 76.42, + "learning_rate": 2.3670454545454545e-05, + "loss": 0.0, + "step": 13450 + }, + { + "epoch": 76.42, + "eval_accuracy": 1.0, + "eval_loss": 2.631409643072402e-06, + "eval_runtime": 125.2056, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 13450 + }, + { + "epoch": 76.48, + "learning_rate": 2.3613636363636366e-05, + "loss": 0.0, + "step": 13460 + }, + { + "epoch": 76.48, + "eval_accuracy": 1.0, + "eval_loss": 2.6188790798187256e-06, + "eval_runtime": 125.4738, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 13460 + }, + { + "epoch": 76.53, + "learning_rate": 2.3556818181818184e-05, + "loss": 0.0, + "step": 13470 + }, + { + "epoch": 76.53, + "eval_accuracy": 1.0, + "eval_loss": 2.6110899398190668e-06, + "eval_runtime": 124.8928, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 13470 + }, + { + "epoch": 76.59, + "learning_rate": 2.35e-05, + "loss": 0.0, + "step": 13480 + }, + { + "epoch": 76.59, + "eval_accuracy": 1.0, + "eval_loss": 2.6029620130429976e-06, + "eval_runtime": 125.181, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 13480 + }, + { + "epoch": 76.65, + "learning_rate": 2.344318181818182e-05, + "loss": 0.0, + "step": 13490 + }, + { + "epoch": 76.65, + "eval_accuracy": 1.0, + "eval_loss": 2.5853514671325684e-06, + "eval_runtime": 125.2171, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 13490 + }, + { + "epoch": 76.7, + "learning_rate": 2.3386363636363638e-05, + "loss": 0.0, + "step": 13500 + }, + { + "epoch": 76.7, + "eval_accuracy": 1.0, + "eval_loss": 2.5701117465359857e-06, + "eval_runtime": 124.9594, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 13500 + }, + { + "epoch": 76.76, + "learning_rate": 2.3329545454545456e-05, + "loss": 0.0, + "step": 13510 + }, + { + "epoch": 76.76, + "eval_accuracy": 1.0, + "eval_loss": 2.561645032983506e-06, + "eval_runtime": 125.1626, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 13510 + }, + { + "epoch": 76.82, + "learning_rate": 2.3272727272727274e-05, + "loss": 0.0, + "step": 13520 + }, + { + "epoch": 76.82, + "eval_accuracy": 1.0, + "eval_loss": 2.551485295043676e-06, + "eval_runtime": 124.8102, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 13520 + }, + { + "epoch": 76.88, + "learning_rate": 2.3215909090909092e-05, + "loss": 0.0, + "step": 13530 + }, + { + "epoch": 76.88, + "eval_accuracy": 1.0, + "eval_loss": 2.5426800220884616e-06, + "eval_runtime": 125.2515, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 13530 + }, + { + "epoch": 76.93, + "learning_rate": 2.315909090909091e-05, + "loss": 0.0, + "step": 13540 + }, + { + "epoch": 76.93, + "eval_accuracy": 1.0, + "eval_loss": 2.53048801823752e-06, + "eval_runtime": 125.154, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 13540 + }, + { + "epoch": 76.99, + "learning_rate": 2.3102272727272728e-05, + "loss": 0.0, + "step": 13550 + }, + { + "epoch": 76.99, + "eval_accuracy": 1.0, + "eval_loss": 2.5237147838197416e-06, + "eval_runtime": 125.767, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 13550 + }, + { + "epoch": 77.05, + "learning_rate": 2.3045454545454546e-05, + "loss": 0.0, + "step": 13560 + }, + { + "epoch": 77.05, + "eval_accuracy": 1.0, + "eval_loss": 2.514232392059057e-06, + "eval_runtime": 124.7978, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 13560 + }, + { + "epoch": 77.1, + "learning_rate": 2.2988636363636364e-05, + "loss": 0.0, + "step": 13570 + }, + { + "epoch": 77.1, + "eval_accuracy": 1.0, + "eval_loss": 2.507120370864868e-06, + "eval_runtime": 125.2762, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 13570 + }, + { + "epoch": 77.16, + "learning_rate": 2.2931818181818182e-05, + "loss": 0.0, + "step": 13580 + }, + { + "epoch": 77.16, + "eval_accuracy": 1.0, + "eval_loss": 2.5003471364470897e-06, + "eval_runtime": 124.7534, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 13580 + }, + { + "epoch": 77.22, + "learning_rate": 2.2875e-05, + "loss": 0.0, + "step": 13590 + }, + { + "epoch": 77.22, + "eval_accuracy": 1.0, + "eval_loss": 2.49120330408914e-06, + "eval_runtime": 125.0373, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 13590 + }, + { + "epoch": 77.27, + "learning_rate": 2.281818181818182e-05, + "loss": 0.0, + "step": 13600 + }, + { + "epoch": 77.27, + "eval_accuracy": 1.0, + "eval_loss": 2.479350087014609e-06, + "eval_runtime": 124.9192, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 13600 + }, + { + "epoch": 77.33, + "learning_rate": 2.2761363636363636e-05, + "loss": 0.0, + "step": 13610 + }, + { + "epoch": 77.33, + "eval_accuracy": 1.0, + "eval_loss": 2.4715607196412748e-06, + "eval_runtime": 125.0423, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 13610 + }, + { + "epoch": 77.39, + "learning_rate": 2.2704545454545454e-05, + "loss": 0.0, + "step": 13620 + }, + { + "epoch": 77.39, + "eval_accuracy": 1.0, + "eval_loss": 2.461400981701445e-06, + "eval_runtime": 125.7263, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 13620 + }, + { + "epoch": 77.44, + "learning_rate": 2.2647727272727272e-05, + "loss": 0.0, + "step": 13630 + }, + { + "epoch": 77.44, + "eval_accuracy": 1.0, + "eval_loss": 2.452934268148965e-06, + "eval_runtime": 125.1292, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 13630 + }, + { + "epoch": 77.5, + "learning_rate": 2.2590909090909094e-05, + "loss": 0.0, + "step": 13640 + }, + { + "epoch": 77.5, + "eval_accuracy": 1.0, + "eval_loss": 2.4475157260894775e-06, + "eval_runtime": 125.1978, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 13640 + }, + { + "epoch": 77.56, + "learning_rate": 2.253409090909091e-05, + "loss": 0.0, + "step": 13650 + }, + { + "epoch": 77.56, + "eval_accuracy": 1.0, + "eval_loss": 2.4393877993134083e-06, + "eval_runtime": 125.5048, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 13650 + }, + { + "epoch": 77.61, + "learning_rate": 2.2477272727272727e-05, + "loss": 0.0, + "step": 13660 + }, + { + "epoch": 77.61, + "eval_accuracy": 1.0, + "eval_loss": 2.434985162835801e-06, + "eval_runtime": 125.461, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 13660 + }, + { + "epoch": 77.67, + "learning_rate": 2.2420454545454548e-05, + "loss": 0.0, + "step": 13670 + }, + { + "epoch": 77.67, + "eval_accuracy": 1.0, + "eval_loss": 2.426857236059732e-06, + "eval_runtime": 125.0555, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 13670 + }, + { + "epoch": 77.73, + "learning_rate": 2.2363636363636366e-05, + "loss": 0.0, + "step": 13680 + }, + { + "epoch": 77.73, + "eval_accuracy": 1.0, + "eval_loss": 2.4204227884183638e-06, + "eval_runtime": 125.3666, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 13680 + }, + { + "epoch": 77.78, + "learning_rate": 2.230681818181818e-05, + "loss": 0.0, + "step": 13690 + }, + { + "epoch": 77.78, + "eval_accuracy": 1.0, + "eval_loss": 2.4160201519407565e-06, + "eval_runtime": 125.3269, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 13690 + }, + { + "epoch": 77.84, + "learning_rate": 2.2250000000000002e-05, + "loss": 0.0, + "step": 13700 + }, + { + "epoch": 77.84, + "eval_accuracy": 1.0, + "eval_loss": 2.409585476925713e-06, + "eval_runtime": 125.4213, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 13700 + }, + { + "epoch": 77.9, + "learning_rate": 2.219318181818182e-05, + "loss": 0.0, + "step": 13710 + }, + { + "epoch": 77.9, + "eval_accuracy": 1.0, + "eval_loss": 2.4007802039704984e-06, + "eval_runtime": 125.2287, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 13710 + }, + { + "epoch": 77.95, + "learning_rate": 2.2136363636363638e-05, + "loss": 0.0, + "step": 13720 + }, + { + "epoch": 77.95, + "eval_accuracy": 1.0, + "eval_loss": 2.3953616619110107e-06, + "eval_runtime": 125.0989, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 13720 + }, + { + "epoch": 78.01, + "learning_rate": 2.2079545454545456e-05, + "loss": 0.0, + "step": 13730 + }, + { + "epoch": 78.01, + "eval_accuracy": 1.0, + "eval_loss": 2.387911081314087e-06, + "eval_runtime": 125.0719, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 13730 + }, + { + "epoch": 78.07, + "learning_rate": 2.2022727272727274e-05, + "loss": 0.0, + "step": 13740 + }, + { + "epoch": 78.07, + "eval_accuracy": 1.0, + "eval_loss": 2.380460500717163e-06, + "eval_runtime": 125.4065, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 13740 + }, + { + "epoch": 78.12, + "learning_rate": 2.1965909090909092e-05, + "loss": 0.0, + "step": 13750 + }, + { + "epoch": 78.12, + "eval_accuracy": 1.0, + "eval_loss": 2.3733484795229742e-06, + "eval_runtime": 125.2371, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 13750 + }, + { + "epoch": 78.18, + "learning_rate": 2.190909090909091e-05, + "loss": 0.0, + "step": 13760 + }, + { + "epoch": 78.18, + "eval_accuracy": 1.0, + "eval_loss": 2.36488199334417e-06, + "eval_runtime": 125.2656, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 13760 + }, + { + "epoch": 78.24, + "learning_rate": 2.185227272727273e-05, + "loss": 0.0, + "step": 13770 + }, + { + "epoch": 78.24, + "eval_accuracy": 1.0, + "eval_loss": 2.3574314127472462e-06, + "eval_runtime": 124.4528, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 13770 + }, + { + "epoch": 78.3, + "learning_rate": 2.1795454545454546e-05, + "loss": 0.0, + "step": 13780 + }, + { + "epoch": 78.3, + "eval_accuracy": 1.0, + "eval_loss": 2.3506581783294678e-06, + "eval_runtime": 124.8593, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 13780 + }, + { + "epoch": 78.35, + "learning_rate": 2.1738636363636364e-05, + "loss": 0.0, + "step": 13790 + }, + { + "epoch": 78.35, + "eval_accuracy": 1.0, + "eval_loss": 2.3425302515533986e-06, + "eval_runtime": 125.7714, + "eval_samples_per_second": 2.799, + "eval_steps_per_second": 0.7, + "step": 13790 + }, + { + "epoch": 78.41, + "learning_rate": 2.1681818181818182e-05, + "loss": 0.0, + "step": 13800 + }, + { + "epoch": 78.41, + "eval_accuracy": 1.0, + "eval_loss": 2.3377890556730563e-06, + "eval_runtime": 125.1233, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 13800 + }, + { + "epoch": 78.47, + "learning_rate": 2.1625e-05, + "loss": 0.0, + "step": 13810 + }, + { + "epoch": 78.47, + "eval_accuracy": 1.0, + "eval_loss": 2.3316931674344232e-06, + "eval_runtime": 125.377, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 13810 + }, + { + "epoch": 78.52, + "learning_rate": 2.156818181818182e-05, + "loss": 0.0, + "step": 13820 + }, + { + "epoch": 78.52, + "eval_accuracy": 1.0, + "eval_loss": 2.3252584924193798e-06, + "eval_runtime": 124.8292, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 13820 + }, + { + "epoch": 78.58, + "learning_rate": 2.1511363636363637e-05, + "loss": 0.0, + "step": 13830 + }, + { + "epoch": 78.58, + "eval_accuracy": 1.0, + "eval_loss": 2.31611466006143e-06, + "eval_runtime": 125.373, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 13830 + }, + { + "epoch": 78.64, + "learning_rate": 2.1454545454545455e-05, + "loss": 0.0, + "step": 13840 + }, + { + "epoch": 78.64, + "eval_accuracy": 1.0, + "eval_loss": 2.3073093871062156e-06, + "eval_runtime": 125.2802, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 13840 + }, + { + "epoch": 78.69, + "learning_rate": 2.1397727272727276e-05, + "loss": 0.0, + "step": 13850 + }, + { + "epoch": 78.69, + "eval_accuracy": 1.0, + "eval_loss": 2.3049387891660444e-06, + "eval_runtime": 125.1457, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 13850 + }, + { + "epoch": 78.75, + "learning_rate": 2.134090909090909e-05, + "loss": 0.0, + "step": 13860 + }, + { + "epoch": 78.75, + "eval_accuracy": 1.0, + "eval_loss": 2.29714942179271e-06, + "eval_runtime": 124.7684, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 13860 + }, + { + "epoch": 78.81, + "learning_rate": 2.128409090909091e-05, + "loss": 0.0, + "step": 13870 + }, + { + "epoch": 78.81, + "eval_accuracy": 1.0, + "eval_loss": 2.28665089707647e-06, + "eval_runtime": 125.6674, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 13870 + }, + { + "epoch": 78.86, + "learning_rate": 2.122727272727273e-05, + "loss": 0.0, + "step": 13880 + }, + { + "epoch": 78.86, + "eval_accuracy": 1.0, + "eval_loss": 2.2795391032559564e-06, + "eval_runtime": 124.8597, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 13880 + }, + { + "epoch": 78.92, + "learning_rate": 2.1170454545454545e-05, + "loss": 0.0, + "step": 13890 + }, + { + "epoch": 78.92, + "eval_accuracy": 1.0, + "eval_loss": 2.270733830300742e-06, + "eval_runtime": 125.1621, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 13890 + }, + { + "epoch": 78.98, + "learning_rate": 2.1113636363636366e-05, + "loss": 0.0, + "step": 13900 + }, + { + "epoch": 78.98, + "eval_accuracy": 1.0, + "eval_loss": 2.259896518808091e-06, + "eval_runtime": 125.1283, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 13900 + }, + { + "epoch": 79.03, + "learning_rate": 2.1056818181818184e-05, + "loss": 0.0, + "step": 13910 + }, + { + "epoch": 79.03, + "eval_accuracy": 1.0, + "eval_loss": 2.2507526864501415e-06, + "eval_runtime": 124.9125, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 13910 + }, + { + "epoch": 79.09, + "learning_rate": 2.1e-05, + "loss": 0.0, + "step": 13920 + }, + { + "epoch": 79.09, + "eval_accuracy": 1.0, + "eval_loss": 2.2412700673157815e-06, + "eval_runtime": 125.0946, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 13920 + }, + { + "epoch": 79.15, + "learning_rate": 2.094318181818182e-05, + "loss": 0.0, + "step": 13930 + }, + { + "epoch": 79.15, + "eval_accuracy": 1.0, + "eval_loss": 2.2311103293759516e-06, + "eval_runtime": 125.0737, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 13930 + }, + { + "epoch": 79.2, + "learning_rate": 2.0886363636363638e-05, + "loss": 0.0, + "step": 13940 + }, + { + "epoch": 79.2, + "eval_accuracy": 1.0, + "eval_loss": 2.2239983081817627e-06, + "eval_runtime": 124.532, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 13940 + }, + { + "epoch": 79.26, + "learning_rate": 2.0829545454545453e-05, + "loss": 0.0, + "step": 13950 + }, + { + "epoch": 79.26, + "eval_accuracy": 1.0, + "eval_loss": 2.216547727584839e-06, + "eval_runtime": 125.0708, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 13950 + }, + { + "epoch": 79.32, + "learning_rate": 2.0772727272727274e-05, + "loss": 0.0, + "step": 13960 + }, + { + "epoch": 79.32, + "eval_accuracy": 1.0, + "eval_loss": 2.209097146987915e-06, + "eval_runtime": 125.1962, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 13960 + }, + { + "epoch": 79.38, + "learning_rate": 2.0715909090909092e-05, + "loss": 0.0, + "step": 13970 + }, + { + "epoch": 79.38, + "eval_accuracy": 1.0, + "eval_loss": 2.200630660809111e-06, + "eval_runtime": 125.3941, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 13970 + }, + { + "epoch": 79.43, + "learning_rate": 2.065909090909091e-05, + "loss": 0.0, + "step": 13980 + }, + { + "epoch": 79.43, + "eval_accuracy": 1.0, + "eval_loss": 2.1938574263913324e-06, + "eval_runtime": 125.2105, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 13980 + }, + { + "epoch": 79.49, + "learning_rate": 2.060227272727273e-05, + "loss": 0.0, + "step": 13990 + }, + { + "epoch": 79.49, + "eval_accuracy": 1.0, + "eval_loss": 2.1870839645998785e-06, + "eval_runtime": 125.0105, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 13990 + }, + { + "epoch": 79.55, + "learning_rate": 2.0545454545454546e-05, + "loss": 0.0, + "step": 14000 + }, + { + "epoch": 79.55, + "eval_accuracy": 1.0, + "eval_loss": 2.1803107301821e-06, + "eval_runtime": 124.9519, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 14000 + }, + { + "epoch": 79.6, + "learning_rate": 2.0488636363636365e-05, + "loss": 0.0, + "step": 14010 + }, + { + "epoch": 79.6, + "eval_accuracy": 1.0, + "eval_loss": 2.172182803406031e-06, + "eval_runtime": 124.9855, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14010 + }, + { + "epoch": 79.66, + "learning_rate": 2.0431818181818183e-05, + "loss": 0.0, + "step": 14020 + }, + { + "epoch": 79.66, + "eval_accuracy": 1.0, + "eval_loss": 2.1637163172272267e-06, + "eval_runtime": 124.8071, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 14020 + }, + { + "epoch": 79.72, + "learning_rate": 2.0375e-05, + "loss": 0.0, + "step": 14030 + }, + { + "epoch": 79.72, + "eval_accuracy": 1.0, + "eval_loss": 2.1566045234067133e-06, + "eval_runtime": 125.1218, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 14030 + }, + { + "epoch": 79.77, + "learning_rate": 2.031818181818182e-05, + "loss": 0.0, + "step": 14040 + }, + { + "epoch": 79.77, + "eval_accuracy": 1.0, + "eval_loss": 2.1498310616152594e-06, + "eval_runtime": 124.9228, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 14040 + }, + { + "epoch": 79.83, + "learning_rate": 2.0261363636363637e-05, + "loss": 0.0, + "step": 14050 + }, + { + "epoch": 79.83, + "eval_accuracy": 1.0, + "eval_loss": 2.1440739601530368e-06, + "eval_runtime": 124.6753, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 14050 + }, + { + "epoch": 79.89, + "learning_rate": 2.0204545454545458e-05, + "loss": 0.0, + "step": 14060 + }, + { + "epoch": 79.89, + "eval_accuracy": 1.0, + "eval_loss": 2.138993977496284e-06, + "eval_runtime": 124.547, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.707, + "step": 14060 + }, + { + "epoch": 79.94, + "learning_rate": 2.0147727272727273e-05, + "loss": 0.0, + "step": 14070 + }, + { + "epoch": 79.94, + "eval_accuracy": 1.0, + "eval_loss": 2.13052749131748e-06, + "eval_runtime": 124.6576, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 14070 + }, + { + "epoch": 80.0, + "learning_rate": 2.009090909090909e-05, + "loss": 0.0, + "step": 14080 + }, + { + "epoch": 80.0, + "eval_accuracy": 1.0, + "eval_loss": 2.124770162481582e-06, + "eval_runtime": 125.0136, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14080 + }, + { + "epoch": 80.06, + "learning_rate": 2.0034090909090912e-05, + "loss": 0.0, + "step": 14090 + }, + { + "epoch": 80.06, + "eval_accuracy": 1.0, + "eval_loss": 2.119351620422094e-06, + "eval_runtime": 125.6079, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 14090 + }, + { + "epoch": 80.11, + "learning_rate": 1.9977272727272727e-05, + "loss": 0.0, + "step": 14100 + }, + { + "epoch": 80.11, + "eval_accuracy": 1.0, + "eval_loss": 2.113932850988931e-06, + "eval_runtime": 124.8054, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 14100 + }, + { + "epoch": 80.17, + "learning_rate": 1.9920454545454548e-05, + "loss": 0.0, + "step": 14110 + }, + { + "epoch": 80.17, + "eval_accuracy": 1.0, + "eval_loss": 2.1051275780337164e-06, + "eval_runtime": 124.8148, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 14110 + }, + { + "epoch": 80.23, + "learning_rate": 1.9863636363636366e-05, + "loss": 0.0, + "step": 14120 + }, + { + "epoch": 80.23, + "eval_accuracy": 1.0, + "eval_loss": 2.098354343615938e-06, + "eval_runtime": 124.6405, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 14120 + }, + { + "epoch": 80.28, + "learning_rate": 1.980681818181818e-05, + "loss": 0.0, + "step": 14130 + }, + { + "epoch": 80.28, + "eval_accuracy": 1.0, + "eval_loss": 2.0939517071383307e-06, + "eval_runtime": 124.9259, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 14130 + }, + { + "epoch": 80.34, + "learning_rate": 1.9750000000000002e-05, + "loss": 0.0, + "step": 14140 + }, + { + "epoch": 80.34, + "eval_accuracy": 1.0, + "eval_loss": 2.088533165078843e-06, + "eval_runtime": 125.001, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14140 + }, + { + "epoch": 80.4, + "learning_rate": 1.969318181818182e-05, + "loss": 0.0, + "step": 14150 + }, + { + "epoch": 80.4, + "eval_accuracy": 1.0, + "eval_loss": 2.0841305286012357e-06, + "eval_runtime": 125.6375, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 14150 + }, + { + "epoch": 80.45, + "learning_rate": 1.9636363636363635e-05, + "loss": 0.0, + "step": 14160 + }, + { + "epoch": 80.45, + "eval_accuracy": 1.0, + "eval_loss": 2.0807440250791842e-06, + "eval_runtime": 124.9908, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14160 + }, + { + "epoch": 80.51, + "learning_rate": 1.9579545454545456e-05, + "loss": 0.0, + "step": 14170 + }, + { + "epoch": 80.51, + "eval_accuracy": 1.0, + "eval_loss": 2.07295465770585e-06, + "eval_runtime": 125.0683, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 14170 + }, + { + "epoch": 80.57, + "learning_rate": 1.9522727272727274e-05, + "loss": 0.0, + "step": 14180 + }, + { + "epoch": 80.57, + "eval_accuracy": 1.0, + "eval_loss": 2.066858769467217e-06, + "eval_runtime": 124.8568, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 14180 + }, + { + "epoch": 80.62, + "learning_rate": 1.9465909090909092e-05, + "loss": 0.0, + "step": 14190 + }, + { + "epoch": 80.62, + "eval_accuracy": 1.0, + "eval_loss": 2.0583922832884127e-06, + "eval_runtime": 124.4461, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 14190 + }, + { + "epoch": 80.68, + "learning_rate": 1.940909090909091e-05, + "loss": 0.0, + "step": 14200 + }, + { + "epoch": 80.68, + "eval_accuracy": 1.0, + "eval_loss": 2.0526349544525146e-06, + "eval_runtime": 124.5152, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 14200 + }, + { + "epoch": 80.74, + "learning_rate": 1.935227272727273e-05, + "loss": 0.0, + "step": 14210 + }, + { + "epoch": 80.74, + "eval_accuracy": 1.0, + "eval_loss": 2.047216412393027e-06, + "eval_runtime": 124.8437, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 14210 + }, + { + "epoch": 80.8, + "learning_rate": 1.9295454545454547e-05, + "loss": 0.0, + "step": 14220 + }, + { + "epoch": 80.8, + "eval_accuracy": 1.0, + "eval_loss": 2.0387496988405474e-06, + "eval_runtime": 125.1375, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 14220 + }, + { + "epoch": 80.85, + "learning_rate": 1.9238636363636365e-05, + "loss": 0.0, + "step": 14230 + }, + { + "epoch": 80.85, + "eval_accuracy": 1.0, + "eval_loss": 2.03366994355747e-06, + "eval_runtime": 125.0108, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14230 + }, + { + "epoch": 80.91, + "learning_rate": 1.9181818181818183e-05, + "loss": 0.0, + "step": 14240 + }, + { + "epoch": 80.91, + "eval_accuracy": 1.0, + "eval_loss": 2.028251174124307e-06, + "eval_runtime": 125.2745, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 14240 + }, + { + "epoch": 80.97, + "learning_rate": 1.9125e-05, + "loss": 0.0, + "step": 14250 + }, + { + "epoch": 80.97, + "eval_accuracy": 1.0, + "eval_loss": 2.018091436184477e-06, + "eval_runtime": 125.1517, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 14250 + }, + { + "epoch": 81.02, + "learning_rate": 1.906818181818182e-05, + "loss": 0.0, + "step": 14260 + }, + { + "epoch": 81.02, + "eval_accuracy": 1.0, + "eval_loss": 2.0113182017666986e-06, + "eval_runtime": 125.1354, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 14260 + }, + { + "epoch": 81.08, + "learning_rate": 1.9011363636363637e-05, + "loss": 0.0, + "step": 14270 + }, + { + "epoch": 81.08, + "eval_accuracy": 1.0, + "eval_loss": 2.0075929114682367e-06, + "eval_runtime": 125.2377, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 14270 + }, + { + "epoch": 81.14, + "learning_rate": 1.8954545454545455e-05, + "loss": 0.0, + "step": 14280 + }, + { + "epoch": 81.14, + "eval_accuracy": 1.0, + "eval_loss": 2.0014967958559282e-06, + "eval_runtime": 124.7734, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 14280 + }, + { + "epoch": 81.19, + "learning_rate": 1.8897727272727273e-05, + "loss": 0.0, + "step": 14290 + }, + { + "epoch": 81.19, + "eval_accuracy": 1.0, + "eval_loss": 1.9957396943937056e-06, + "eval_runtime": 125.473, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 14290 + }, + { + "epoch": 81.25, + "learning_rate": 1.884090909090909e-05, + "loss": 0.0, + "step": 14300 + }, + { + "epoch": 81.25, + "eval_accuracy": 1.0, + "eval_loss": 1.9903209249605425e-06, + "eval_runtime": 125.4765, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 14300 + }, + { + "epoch": 81.31, + "learning_rate": 1.878409090909091e-05, + "loss": 0.0, + "step": 14310 + }, + { + "epoch": 81.31, + "eval_accuracy": 1.0, + "eval_loss": 1.983547690542764e-06, + "eval_runtime": 125.0952, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 14310 + }, + { + "epoch": 81.36, + "learning_rate": 1.872727272727273e-05, + "loss": 0.0, + "step": 14320 + }, + { + "epoch": 81.36, + "eval_accuracy": 1.0, + "eval_loss": 1.9767744561249856e-06, + "eval_runtime": 125.4013, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 14320 + }, + { + "epoch": 81.42, + "learning_rate": 1.8670454545454545e-05, + "loss": 0.0, + "step": 14330 + }, + { + "epoch": 81.42, + "eval_accuracy": 1.0, + "eval_loss": 1.971694473468233e-06, + "eval_runtime": 125.0947, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 14330 + }, + { + "epoch": 81.48, + "learning_rate": 1.8613636363636363e-05, + "loss": 0.0, + "step": 14340 + }, + { + "epoch": 81.48, + "eval_accuracy": 1.0, + "eval_loss": 1.9649212390504545e-06, + "eval_runtime": 124.7733, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 14340 + }, + { + "epoch": 81.53, + "learning_rate": 1.8556818181818184e-05, + "loss": 0.0, + "step": 14350 + }, + { + "epoch": 81.53, + "eval_accuracy": 1.0, + "eval_loss": 1.9608573893492576e-06, + "eval_runtime": 125.2058, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 14350 + }, + { + "epoch": 81.59, + "learning_rate": 1.85e-05, + "loss": 0.0, + "step": 14360 + }, + { + "epoch": 81.59, + "eval_accuracy": 1.0, + "eval_loss": 1.954422714334214e-06, + "eval_runtime": 125.0701, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 14360 + }, + { + "epoch": 81.65, + "learning_rate": 1.8443181818181817e-05, + "loss": 0.0, + "step": 14370 + }, + { + "epoch": 81.65, + "eval_accuracy": 1.0, + "eval_loss": 1.950697424035752e-06, + "eval_runtime": 124.9485, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 14370 + }, + { + "epoch": 81.7, + "learning_rate": 1.838636363636364e-05, + "loss": 0.0, + "step": 14380 + }, + { + "epoch": 81.7, + "eval_accuracy": 1.0, + "eval_loss": 1.9452788819762645e-06, + "eval_runtime": 125.1396, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 14380 + }, + { + "epoch": 81.76, + "learning_rate": 1.8329545454545453e-05, + "loss": 0.0, + "step": 14390 + }, + { + "epoch": 81.76, + "eval_accuracy": 1.0, + "eval_loss": 1.938844206961221e-06, + "eval_runtime": 125.2965, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 14390 + }, + { + "epoch": 81.82, + "learning_rate": 1.8272727272727275e-05, + "loss": 0.0, + "step": 14400 + }, + { + "epoch": 81.82, + "eval_accuracy": 1.0, + "eval_loss": 1.932748318722588e-06, + "eval_runtime": 125.0784, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 14400 + }, + { + "epoch": 81.88, + "learning_rate": 1.8215909090909093e-05, + "loss": 0.0, + "step": 14410 + }, + { + "epoch": 81.88, + "eval_accuracy": 1.0, + "eval_loss": 1.929023028424126e-06, + "eval_runtime": 125.3066, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 14410 + }, + { + "epoch": 81.93, + "learning_rate": 1.8159090909090907e-05, + "loss": 0.0, + "step": 14420 + }, + { + "epoch": 81.93, + "eval_accuracy": 1.0, + "eval_loss": 1.924959178722929e-06, + "eval_runtime": 124.9897, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14420 + }, + { + "epoch": 81.99, + "learning_rate": 1.810227272727273e-05, + "loss": 0.0, + "step": 14430 + }, + { + "epoch": 81.99, + "eval_accuracy": 1.0, + "eval_loss": 1.920895101648057e-06, + "eval_runtime": 124.9777, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 14430 + }, + { + "epoch": 82.05, + "learning_rate": 1.8045454545454547e-05, + "loss": 0.0, + "step": 14440 + }, + { + "epoch": 82.05, + "eval_accuracy": 1.0, + "eval_loss": 1.91683125194686e-06, + "eval_runtime": 125.0425, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 14440 + }, + { + "epoch": 82.1, + "learning_rate": 1.7988636363636365e-05, + "loss": 0.0, + "step": 14450 + }, + { + "epoch": 82.1, + "eval_accuracy": 1.0, + "eval_loss": 1.910735363708227e-06, + "eval_runtime": 125.9625, + "eval_samples_per_second": 2.794, + "eval_steps_per_second": 0.699, + "step": 14450 + }, + { + "epoch": 82.16, + "learning_rate": 1.7931818181818183e-05, + "loss": 0.0, + "step": 14460 + }, + { + "epoch": 82.16, + "eval_accuracy": 1.0, + "eval_loss": 1.9059939404542092e-06, + "eval_runtime": 125.624, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 14460 + }, + { + "epoch": 82.22, + "learning_rate": 1.7875e-05, + "loss": 0.0, + "step": 14470 + }, + { + "epoch": 82.22, + "eval_accuracy": 1.0, + "eval_loss": 1.9009140714842943e-06, + "eval_runtime": 125.4685, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 14470 + }, + { + "epoch": 82.27, + "learning_rate": 1.781818181818182e-05, + "loss": 0.0, + "step": 14480 + }, + { + "epoch": 82.27, + "eval_accuracy": 1.0, + "eval_loss": 1.8971887811858323e-06, + "eval_runtime": 125.5184, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 14480 + }, + { + "epoch": 82.33, + "learning_rate": 1.7761363636363637e-05, + "loss": 0.0, + "step": 14490 + }, + { + "epoch": 82.33, + "eval_accuracy": 1.0, + "eval_loss": 1.8910927792603616e-06, + "eval_runtime": 124.7588, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 14490 + }, + { + "epoch": 82.39, + "learning_rate": 1.7704545454545455e-05, + "loss": 0.0, + "step": 14500 + }, + { + "epoch": 82.39, + "eval_accuracy": 1.0, + "eval_loss": 1.8846582179321558e-06, + "eval_runtime": 124.9885, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14500 + }, + { + "epoch": 82.44, + "learning_rate": 1.7647727272727273e-05, + "loss": 0.0001, + "step": 14510 + }, + { + "epoch": 82.44, + "eval_accuracy": 1.0, + "eval_loss": 1.7532570382172707e-06, + "eval_runtime": 124.7884, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 14510 + }, + { + "epoch": 82.5, + "learning_rate": 1.759090909090909e-05, + "loss": 0.0, + "step": 14520 + }, + { + "epoch": 82.5, + "eval_accuracy": 1.0, + "eval_loss": 1.5737658713987912e-06, + "eval_runtime": 124.7437, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 14520 + }, + { + "epoch": 82.56, + "learning_rate": 1.7534090909090912e-05, + "loss": 0.0, + "step": 14530 + }, + { + "epoch": 82.56, + "eval_accuracy": 1.0, + "eval_loss": 1.5266916761902394e-06, + "eval_runtime": 125.0331, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 14530 + }, + { + "epoch": 82.61, + "learning_rate": 1.7477272727272727e-05, + "loss": 0.0, + "step": 14540 + }, + { + "epoch": 82.61, + "eval_accuracy": 1.0, + "eval_loss": 1.5111131688172463e-06, + "eval_runtime": 124.5294, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 14540 + }, + { + "epoch": 82.67, + "learning_rate": 1.7420454545454545e-05, + "loss": 0.0, + "step": 14550 + }, + { + "epoch": 82.67, + "eval_accuracy": 1.0, + "eval_loss": 1.5067106460264768e-06, + "eval_runtime": 124.9181, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 14550 + }, + { + "epoch": 82.73, + "learning_rate": 1.7363636363636366e-05, + "loss": 0.0, + "step": 14560 + }, + { + "epoch": 82.73, + "eval_accuracy": 1.0, + "eval_loss": 1.5046786074890406e-06, + "eval_runtime": 125.0188, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14560 + }, + { + "epoch": 82.78, + "learning_rate": 1.730681818181818e-05, + "loss": 0.0, + "step": 14570 + }, + { + "epoch": 82.78, + "eval_accuracy": 1.0, + "eval_loss": 1.5019693364592968e-06, + "eval_runtime": 125.5242, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 14570 + }, + { + "epoch": 82.84, + "learning_rate": 1.725e-05, + "loss": 0.0, + "step": 14580 + }, + { + "epoch": 82.84, + "eval_accuracy": 1.0, + "eval_loss": 1.499598624832288e-06, + "eval_runtime": 125.0706, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 14580 + }, + { + "epoch": 82.9, + "learning_rate": 1.719318181818182e-05, + "loss": 0.0, + "step": 14590 + }, + { + "epoch": 82.9, + "eval_accuracy": 1.0, + "eval_loss": 1.4965506807129714e-06, + "eval_runtime": 125.238, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 14590 + }, + { + "epoch": 82.95, + "learning_rate": 1.7136363636363635e-05, + "loss": 0.0, + "step": 14600 + }, + { + "epoch": 82.95, + "eval_accuracy": 1.0, + "eval_loss": 1.4941800827728002e-06, + "eval_runtime": 125.0623, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 14600 + }, + { + "epoch": 83.01, + "learning_rate": 1.7079545454545457e-05, + "loss": 0.0, + "step": 14610 + }, + { + "epoch": 83.01, + "eval_accuracy": 1.0, + "eval_loss": 1.4891001001160475e-06, + "eval_runtime": 125.1487, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 14610 + }, + { + "epoch": 83.07, + "learning_rate": 1.7022727272727275e-05, + "loss": 0.0, + "step": 14620 + }, + { + "epoch": 83.07, + "eval_accuracy": 1.0, + "eval_loss": 1.4758923043700634e-06, + "eval_runtime": 125.0674, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 14620 + }, + { + "epoch": 83.12, + "learning_rate": 1.696590909090909e-05, + "loss": 0.0, + "step": 14630 + }, + { + "epoch": 83.12, + "eval_accuracy": 1.0, + "eval_loss": 1.4670870314148488e-06, + "eval_runtime": 125.1118, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 14630 + }, + { + "epoch": 83.18, + "learning_rate": 1.690909090909091e-05, + "loss": 0.0, + "step": 14640 + }, + { + "epoch": 83.18, + "eval_accuracy": 1.0, + "eval_loss": 1.4599751239074976e-06, + "eval_runtime": 125.2047, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 14640 + }, + { + "epoch": 83.24, + "learning_rate": 1.685227272727273e-05, + "loss": 0.0, + "step": 14650 + }, + { + "epoch": 83.24, + "eval_accuracy": 1.0, + "eval_loss": 1.456927179788181e-06, + "eval_runtime": 124.8351, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 14650 + }, + { + "epoch": 83.3, + "learning_rate": 1.6795454545454547e-05, + "loss": 0.0, + "step": 14660 + }, + { + "epoch": 83.3, + "eval_accuracy": 1.0, + "eval_loss": 1.4552338143403176e-06, + "eval_runtime": 124.9138, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 14660 + }, + { + "epoch": 83.35, + "learning_rate": 1.6738636363636365e-05, + "loss": 0.0, + "step": 14670 + }, + { + "epoch": 83.35, + "eval_accuracy": 1.0, + "eval_loss": 1.452185870221001e-06, + "eval_runtime": 124.7765, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 14670 + }, + { + "epoch": 83.41, + "learning_rate": 1.6681818181818183e-05, + "loss": 0.0, + "step": 14680 + }, + { + "epoch": 83.41, + "eval_accuracy": 1.0, + "eval_loss": 1.44981527228083e-06, + "eval_runtime": 125.4013, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 14680 + }, + { + "epoch": 83.47, + "learning_rate": 1.6625e-05, + "loss": 0.0, + "step": 14690 + }, + { + "epoch": 83.47, + "eval_accuracy": 1.0, + "eval_loss": 1.4464286550719407e-06, + "eval_runtime": 124.7341, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 14690 + }, + { + "epoch": 83.52, + "learning_rate": 1.656818181818182e-05, + "loss": 0.0, + "step": 14700 + }, + { + "epoch": 83.52, + "eval_accuracy": 1.0, + "eval_loss": 1.4410099993256154e-06, + "eval_runtime": 124.8634, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 14700 + }, + { + "epoch": 83.58, + "learning_rate": 1.6511363636363637e-05, + "loss": 0.0, + "step": 14710 + }, + { + "epoch": 83.58, + "eval_accuracy": 1.0, + "eval_loss": 1.4386394013854442e-06, + "eval_runtime": 125.6714, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 0.7, + "step": 14710 + }, + { + "epoch": 83.64, + "learning_rate": 1.6454545454545455e-05, + "loss": 0.0, + "step": 14720 + }, + { + "epoch": 83.64, + "eval_accuracy": 1.0, + "eval_loss": 1.4261088381317677e-06, + "eval_runtime": 124.9774, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 14720 + }, + { + "epoch": 83.69, + "learning_rate": 1.6397727272727273e-05, + "loss": 0.0, + "step": 14730 + }, + { + "epoch": 83.69, + "eval_accuracy": 1.0, + "eval_loss": 1.4156103134155273e-06, + "eval_runtime": 124.8629, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 14730 + }, + { + "epoch": 83.75, + "learning_rate": 1.6340909090909094e-05, + "loss": 0.0, + "step": 14740 + }, + { + "epoch": 83.75, + "eval_accuracy": 1.0, + "eval_loss": 1.4122236962066381e-06, + "eval_runtime": 125.6089, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 14740 + }, + { + "epoch": 83.81, + "learning_rate": 1.628409090909091e-05, + "loss": 0.0, + "step": 14750 + }, + { + "epoch": 83.81, + "eval_accuracy": 1.0, + "eval_loss": 1.4071437135498854e-06, + "eval_runtime": 124.6562, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 14750 + }, + { + "epoch": 83.86, + "learning_rate": 1.6227272727272727e-05, + "loss": 0.0, + "step": 14760 + }, + { + "epoch": 83.86, + "eval_accuracy": 1.0, + "eval_loss": 1.4027410770722781e-06, + "eval_runtime": 124.7413, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 14760 + }, + { + "epoch": 83.92, + "learning_rate": 1.617045454545455e-05, + "loss": 0.0, + "step": 14770 + }, + { + "epoch": 83.92, + "eval_accuracy": 1.0, + "eval_loss": 1.3996931329529616e-06, + "eval_runtime": 124.9136, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 14770 + }, + { + "epoch": 83.98, + "learning_rate": 1.6113636363636363e-05, + "loss": 0.0, + "step": 14780 + }, + { + "epoch": 83.98, + "eval_accuracy": 1.0, + "eval_loss": 1.3963065157440724e-06, + "eval_runtime": 124.784, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 14780 + }, + { + "epoch": 84.03, + "learning_rate": 1.605681818181818e-05, + "loss": 0.0, + "step": 14790 + }, + { + "epoch": 84.03, + "eval_accuracy": 1.0, + "eval_loss": 1.3946132639830466e-06, + "eval_runtime": 124.678, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 14790 + }, + { + "epoch": 84.09, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0, + "step": 14800 + }, + { + "epoch": 84.09, + "eval_accuracy": 1.0, + "eval_loss": 1.389533281326294e-06, + "eval_runtime": 125.4577, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.701, + "step": 14800 + }, + { + "epoch": 84.15, + "learning_rate": 1.5943181818181817e-05, + "loss": 0.0, + "step": 14810 + }, + { + "epoch": 84.15, + "eval_accuracy": 1.0, + "eval_loss": 1.3878399158784305e-06, + "eval_runtime": 124.8956, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 14810 + }, + { + "epoch": 84.2, + "learning_rate": 1.588636363636364e-05, + "loss": 0.0, + "step": 14820 + }, + { + "epoch": 84.2, + "eval_accuracy": 1.0, + "eval_loss": 1.385807991027832e-06, + "eval_runtime": 125.2681, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 14820 + }, + { + "epoch": 84.26, + "learning_rate": 1.5829545454545457e-05, + "loss": 0.0, + "step": 14830 + }, + { + "epoch": 84.26, + "eval_accuracy": 1.0, + "eval_loss": 1.3820827007293701e-06, + "eval_runtime": 125.1208, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 14830 + }, + { + "epoch": 84.32, + "learning_rate": 1.577272727272727e-05, + "loss": 0.0, + "step": 14840 + }, + { + "epoch": 84.32, + "eval_accuracy": 1.0, + "eval_loss": 1.3780187373413355e-06, + "eval_runtime": 126.026, + "eval_samples_per_second": 2.793, + "eval_steps_per_second": 0.698, + "step": 14840 + }, + { + "epoch": 84.38, + "learning_rate": 1.5715909090909093e-05, + "loss": 0.0, + "step": 14850 + }, + { + "epoch": 84.38, + "eval_accuracy": 1.0, + "eval_loss": 1.3736161008637282e-06, + "eval_runtime": 124.8872, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 14850 + }, + { + "epoch": 84.43, + "learning_rate": 1.565909090909091e-05, + "loss": 0.0, + "step": 14860 + }, + { + "epoch": 84.43, + "eval_accuracy": 1.0, + "eval_loss": 1.370229483654839e-06, + "eval_runtime": 124.7362, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 14860 + }, + { + "epoch": 84.49, + "learning_rate": 1.560227272727273e-05, + "loss": 0.0, + "step": 14870 + }, + { + "epoch": 84.49, + "eval_accuracy": 1.0, + "eval_loss": 1.3671815395355225e-06, + "eval_runtime": 124.4409, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 14870 + }, + { + "epoch": 84.55, + "learning_rate": 1.5545454545454547e-05, + "loss": 0.0, + "step": 14880 + }, + { + "epoch": 84.55, + "eval_accuracy": 1.0, + "eval_loss": 1.3634562492370605e-06, + "eval_runtime": 125.5056, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 14880 + }, + { + "epoch": 84.6, + "learning_rate": 1.5488636363636365e-05, + "loss": 0.0, + "step": 14890 + }, + { + "epoch": 84.6, + "eval_accuracy": 1.0, + "eval_loss": 1.3600696320281713e-06, + "eval_runtime": 124.5535, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.707, + "step": 14890 + }, + { + "epoch": 84.66, + "learning_rate": 1.5431818181818183e-05, + "loss": 0.0, + "step": 14900 + }, + { + "epoch": 84.66, + "eval_accuracy": 1.0, + "eval_loss": 1.3549896493714186e-06, + "eval_runtime": 124.8863, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 14900 + }, + { + "epoch": 84.72, + "learning_rate": 1.5375e-05, + "loss": 0.0, + "step": 14910 + }, + { + "epoch": 84.72, + "eval_accuracy": 1.0, + "eval_loss": 1.3526190514312475e-06, + "eval_runtime": 124.9519, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 14910 + }, + { + "epoch": 84.77, + "learning_rate": 1.531818181818182e-05, + "loss": 0.0, + "step": 14920 + }, + { + "epoch": 84.77, + "eval_accuracy": 1.0, + "eval_loss": 1.349571107311931e-06, + "eval_runtime": 124.8593, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 14920 + }, + { + "epoch": 84.83, + "learning_rate": 1.5261363636363637e-05, + "loss": 0.0, + "step": 14930 + }, + { + "epoch": 84.83, + "eval_accuracy": 1.0, + "eval_loss": 1.3451684708343237e-06, + "eval_runtime": 125.3129, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 14930 + }, + { + "epoch": 84.89, + "learning_rate": 1.5204545454545455e-05, + "loss": 0.0, + "step": 14940 + }, + { + "epoch": 84.89, + "eval_accuracy": 1.0, + "eval_loss": 1.3424591998045798e-06, + "eval_runtime": 125.005, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14940 + }, + { + "epoch": 84.94, + "learning_rate": 1.5147727272727275e-05, + "loss": 0.0, + "step": 14950 + }, + { + "epoch": 84.94, + "eval_accuracy": 1.0, + "eval_loss": 1.3383952364165452e-06, + "eval_runtime": 124.7757, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 14950 + }, + { + "epoch": 85.0, + "learning_rate": 1.5090909090909091e-05, + "loss": 0.0, + "step": 14960 + }, + { + "epoch": 85.0, + "eval_accuracy": 1.0, + "eval_loss": 1.3346699461180833e-06, + "eval_runtime": 125.0977, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 14960 + }, + { + "epoch": 85.06, + "learning_rate": 1.503409090909091e-05, + "loss": 0.0, + "step": 14970 + }, + { + "epoch": 85.06, + "eval_accuracy": 1.0, + "eval_loss": 1.3316220019987668e-06, + "eval_runtime": 124.594, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 14970 + }, + { + "epoch": 85.11, + "learning_rate": 1.4977272727272729e-05, + "loss": 0.0, + "step": 14980 + }, + { + "epoch": 85.11, + "eval_accuracy": 1.0, + "eval_loss": 1.3285739441926125e-06, + "eval_runtime": 124.9842, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 14980 + }, + { + "epoch": 85.17, + "learning_rate": 1.4920454545454545e-05, + "loss": 0.0, + "step": 14990 + }, + { + "epoch": 85.17, + "eval_accuracy": 1.0, + "eval_loss": 1.3251873269837233e-06, + "eval_runtime": 125.9986, + "eval_samples_per_second": 2.794, + "eval_steps_per_second": 0.698, + "step": 14990 + }, + { + "epoch": 85.23, + "learning_rate": 1.4863636363636365e-05, + "loss": 0.0, + "step": 15000 + }, + { + "epoch": 85.23, + "eval_accuracy": 1.0, + "eval_loss": 1.3201074580138084e-06, + "eval_runtime": 124.9174, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 15000 + }, + { + "epoch": 85.28, + "learning_rate": 1.4806818181818183e-05, + "loss": 0.0, + "step": 15010 + }, + { + "epoch": 85.28, + "eval_accuracy": 1.0, + "eval_loss": 1.3173980732972268e-06, + "eval_runtime": 125.2451, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 15010 + }, + { + "epoch": 85.34, + "learning_rate": 1.475e-05, + "loss": 0.0, + "step": 15020 + }, + { + "epoch": 85.34, + "eval_accuracy": 1.0, + "eval_loss": 1.3150274753570557e-06, + "eval_runtime": 125.3448, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 15020 + }, + { + "epoch": 85.4, + "learning_rate": 1.4693181818181819e-05, + "loss": 0.0, + "step": 15030 + }, + { + "epoch": 85.4, + "eval_accuracy": 1.0, + "eval_loss": 1.3129955505064572e-06, + "eval_runtime": 125.2921, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 15030 + }, + { + "epoch": 85.45, + "learning_rate": 1.4636363636363637e-05, + "loss": 0.0, + "step": 15040 + }, + { + "epoch": 85.45, + "eval_accuracy": 1.0, + "eval_loss": 1.3089315871184226e-06, + "eval_runtime": 125.183, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 15040 + }, + { + "epoch": 85.51, + "learning_rate": 1.4579545454545453e-05, + "loss": 0.0, + "step": 15050 + }, + { + "epoch": 85.51, + "eval_accuracy": 1.0, + "eval_loss": 1.3052062968199607e-06, + "eval_runtime": 125.3443, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 15050 + }, + { + "epoch": 85.57, + "learning_rate": 1.4522727272727273e-05, + "loss": 0.0, + "step": 15060 + }, + { + "epoch": 85.57, + "eval_accuracy": 1.0, + "eval_loss": 1.3018196796110715e-06, + "eval_runtime": 125.8753, + "eval_samples_per_second": 2.796, + "eval_steps_per_second": 0.699, + "step": 15060 + }, + { + "epoch": 85.62, + "learning_rate": 1.4465909090909091e-05, + "loss": 0.0, + "step": 15070 + }, + { + "epoch": 85.62, + "eval_accuracy": 1.0, + "eval_loss": 1.2987716218049172e-06, + "eval_runtime": 125.4889, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 15070 + }, + { + "epoch": 85.68, + "learning_rate": 1.4409090909090911e-05, + "loss": 0.0, + "step": 15080 + }, + { + "epoch": 85.68, + "eval_accuracy": 1.0, + "eval_loss": 1.296401023864746e-06, + "eval_runtime": 124.8217, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 15080 + }, + { + "epoch": 85.74, + "learning_rate": 1.4352272727272727e-05, + "loss": 0.0, + "step": 15090 + }, + { + "epoch": 85.74, + "eval_accuracy": 1.0, + "eval_loss": 1.2947076584168826e-06, + "eval_runtime": 124.8471, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 15090 + }, + { + "epoch": 85.8, + "learning_rate": 1.4295454545454545e-05, + "loss": 0.0, + "step": 15100 + }, + { + "epoch": 85.8, + "eval_accuracy": 1.0, + "eval_loss": 1.2919983873871388e-06, + "eval_runtime": 124.9329, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 15100 + }, + { + "epoch": 85.85, + "learning_rate": 1.4238636363636365e-05, + "loss": 0.0, + "step": 15110 + }, + { + "epoch": 85.85, + "eval_accuracy": 1.0, + "eval_loss": 1.2899664625365403e-06, + "eval_runtime": 124.8377, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 15110 + }, + { + "epoch": 85.91, + "learning_rate": 1.4181818181818181e-05, + "loss": 0.0, + "step": 15120 + }, + { + "epoch": 85.91, + "eval_accuracy": 1.0, + "eval_loss": 1.2862411722380784e-06, + "eval_runtime": 124.9721, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 15120 + }, + { + "epoch": 85.97, + "learning_rate": 1.4125e-05, + "loss": 0.0, + "step": 15130 + }, + { + "epoch": 85.97, + "eval_accuracy": 1.0, + "eval_loss": 1.2825158819396165e-06, + "eval_runtime": 125.0498, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 15130 + }, + { + "epoch": 86.02, + "learning_rate": 1.406818181818182e-05, + "loss": 0.0, + "step": 15140 + }, + { + "epoch": 86.02, + "eval_accuracy": 1.0, + "eval_loss": 1.2794679378203e-06, + "eval_runtime": 124.6807, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 15140 + }, + { + "epoch": 86.08, + "learning_rate": 1.4011363636363637e-05, + "loss": 0.0, + "step": 15150 + }, + { + "epoch": 86.08, + "eval_accuracy": 1.0, + "eval_loss": 1.2764198800141457e-06, + "eval_runtime": 124.8695, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 15150 + }, + { + "epoch": 86.14, + "learning_rate": 1.3954545454545457e-05, + "loss": 0.0, + "step": 15160 + }, + { + "epoch": 86.14, + "eval_accuracy": 1.0, + "eval_loss": 1.2733719358948292e-06, + "eval_runtime": 125.1107, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 15160 + }, + { + "epoch": 86.19, + "learning_rate": 1.3897727272727273e-05, + "loss": 0.0, + "step": 15170 + }, + { + "epoch": 86.19, + "eval_accuracy": 1.0, + "eval_loss": 1.2720173572233762e-06, + "eval_runtime": 124.4272, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 15170 + }, + { + "epoch": 86.25, + "learning_rate": 1.3840909090909091e-05, + "loss": 0.0, + "step": 15180 + }, + { + "epoch": 86.25, + "eval_accuracy": 1.0, + "eval_loss": 1.2682920669249143e-06, + "eval_runtime": 124.4145, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 15180 + }, + { + "epoch": 86.31, + "learning_rate": 1.3784090909090911e-05, + "loss": 0.0, + "step": 15190 + }, + { + "epoch": 86.31, + "eval_accuracy": 1.0, + "eval_loss": 1.2665987014770508e-06, + "eval_runtime": 125.1069, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 15190 + }, + { + "epoch": 86.36, + "learning_rate": 1.3727272727272727e-05, + "loss": 0.0, + "step": 15200 + }, + { + "epoch": 86.36, + "eval_accuracy": 1.0, + "eval_loss": 1.2645667766264523e-06, + "eval_runtime": 125.4894, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 15200 + }, + { + "epoch": 86.42, + "learning_rate": 1.3670454545454547e-05, + "loss": 0.0, + "step": 15210 + }, + { + "epoch": 86.42, + "eval_accuracy": 1.0, + "eval_loss": 1.2625347380890162e-06, + "eval_runtime": 124.9677, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 15210 + }, + { + "epoch": 86.48, + "learning_rate": 1.3613636363636365e-05, + "loss": 0.0, + "step": 15220 + }, + { + "epoch": 86.48, + "eval_accuracy": 1.0, + "eval_loss": 1.2598254670592723e-06, + "eval_runtime": 125.0841, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 15220 + }, + { + "epoch": 86.53, + "learning_rate": 1.3556818181818181e-05, + "loss": 0.0, + "step": 15230 + }, + { + "epoch": 86.53, + "eval_accuracy": 1.0, + "eval_loss": 1.2567775229399558e-06, + "eval_runtime": 125.1204, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 15230 + }, + { + "epoch": 86.59, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0, + "step": 15240 + }, + { + "epoch": 86.59, + "eval_accuracy": 1.0, + "eval_loss": 1.255422830581665e-06, + "eval_runtime": 125.2094, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 15240 + }, + { + "epoch": 86.65, + "learning_rate": 1.344318181818182e-05, + "loss": 0.0, + "step": 15250 + }, + { + "epoch": 86.65, + "eval_accuracy": 1.0, + "eval_loss": 1.253052232641494e-06, + "eval_runtime": 125.1302, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 15250 + }, + { + "epoch": 86.7, + "learning_rate": 1.3386363636363636e-05, + "loss": 0.0, + "step": 15260 + }, + { + "epoch": 86.7, + "eval_accuracy": 1.0, + "eval_loss": 1.2513588671936304e-06, + "eval_runtime": 125.2976, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 15260 + }, + { + "epoch": 86.76, + "learning_rate": 1.3329545454545455e-05, + "loss": 0.0, + "step": 15270 + }, + { + "epoch": 86.76, + "eval_accuracy": 1.0, + "eval_loss": 1.2486495961638866e-06, + "eval_runtime": 125.537, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 15270 + }, + { + "epoch": 86.82, + "learning_rate": 1.3272727272727273e-05, + "loss": 0.0, + "step": 15280 + }, + { + "epoch": 86.82, + "eval_accuracy": 1.0, + "eval_loss": 1.2472949038055958e-06, + "eval_runtime": 124.7571, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 15280 + }, + { + "epoch": 86.88, + "learning_rate": 1.3215909090909093e-05, + "loss": 0.0, + "step": 15290 + }, + { + "epoch": 86.88, + "eval_accuracy": 1.0, + "eval_loss": 1.2452629789549974e-06, + "eval_runtime": 125.958, + "eval_samples_per_second": 2.795, + "eval_steps_per_second": 0.699, + "step": 15290 + }, + { + "epoch": 86.93, + "learning_rate": 1.315909090909091e-05, + "loss": 0.0, + "step": 15300 + }, + { + "epoch": 86.93, + "eval_accuracy": 1.0, + "eval_loss": 1.2432309404175612e-06, + "eval_runtime": 125.1642, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 15300 + }, + { + "epoch": 86.99, + "learning_rate": 1.3102272727272727e-05, + "loss": 0.0, + "step": 15310 + }, + { + "epoch": 86.99, + "eval_accuracy": 1.0, + "eval_loss": 1.2415376886565355e-06, + "eval_runtime": 125.3045, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 15310 + }, + { + "epoch": 87.05, + "learning_rate": 1.3045454545454547e-05, + "loss": 0.0, + "step": 15320 + }, + { + "epoch": 87.05, + "eval_accuracy": 1.0, + "eval_loss": 1.2378123983580736e-06, + "eval_runtime": 124.8551, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 15320 + }, + { + "epoch": 87.1, + "learning_rate": 1.2988636363636363e-05, + "loss": 0.0, + "step": 15330 + }, + { + "epoch": 87.1, + "eval_accuracy": 1.0, + "eval_loss": 1.234764454238757e-06, + "eval_runtime": 124.9871, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 15330 + }, + { + "epoch": 87.16, + "learning_rate": 1.2931818181818182e-05, + "loss": 0.0, + "step": 15340 + }, + { + "epoch": 87.16, + "eval_accuracy": 1.0, + "eval_loss": 1.2296844715820043e-06, + "eval_runtime": 125.6265, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 15340 + }, + { + "epoch": 87.22, + "learning_rate": 1.2875000000000001e-05, + "loss": 0.0, + "step": 15350 + }, + { + "epoch": 87.22, + "eval_accuracy": 1.0, + "eval_loss": 1.2283297792237136e-06, + "eval_runtime": 124.7918, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 15350 + }, + { + "epoch": 87.27, + "learning_rate": 1.2818181818181818e-05, + "loss": 0.0, + "step": 15360 + }, + { + "epoch": 87.27, + "eval_accuracy": 1.0, + "eval_loss": 1.2259591812835424e-06, + "eval_runtime": 125.4316, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 15360 + }, + { + "epoch": 87.33, + "learning_rate": 1.2761363636363637e-05, + "loss": 0.0, + "step": 15370 + }, + { + "epoch": 87.33, + "eval_accuracy": 1.0, + "eval_loss": 1.224265815835679e-06, + "eval_runtime": 125.4041, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 15370 + }, + { + "epoch": 87.39, + "learning_rate": 1.2704545454545455e-05, + "loss": 0.0, + "step": 15380 + }, + { + "epoch": 87.39, + "eval_accuracy": 1.0, + "eval_loss": 1.2232499102537986e-06, + "eval_runtime": 124.5194, + "eval_samples_per_second": 2.827, + "eval_steps_per_second": 0.707, + "step": 15380 + }, + { + "epoch": 87.44, + "learning_rate": 1.2647727272727272e-05, + "loss": 0.0, + "step": 15390 + }, + { + "epoch": 87.44, + "eval_accuracy": 1.0, + "eval_loss": 1.2212178717163624e-06, + "eval_runtime": 125.0038, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 15390 + }, + { + "epoch": 87.5, + "learning_rate": 1.2590909090909091e-05, + "loss": 0.2124, + "step": 15400 + }, + { + "epoch": 87.5, + "eval_accuracy": 1.0, + "eval_loss": 1.5869736671447754e-06, + "eval_runtime": 124.9006, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 15400 + }, + { + "epoch": 87.56, + "learning_rate": 1.253409090909091e-05, + "loss": 0.0, + "step": 15410 + }, + { + "epoch": 87.56, + "eval_accuracy": 1.0, + "eval_loss": 2.0133500129304593e-06, + "eval_runtime": 124.9435, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 15410 + }, + { + "epoch": 87.61, + "learning_rate": 1.2477272727272727e-05, + "loss": 0.0, + "step": 15420 + }, + { + "epoch": 87.61, + "eval_accuracy": 1.0, + "eval_loss": 2.2233209620026173e-06, + "eval_runtime": 124.8405, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 15420 + }, + { + "epoch": 87.67, + "learning_rate": 1.2420454545454546e-05, + "loss": 0.0, + "step": 15430 + }, + { + "epoch": 87.67, + "eval_accuracy": 1.0, + "eval_loss": 2.2937629182706587e-06, + "eval_runtime": 124.6803, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 15430 + }, + { + "epoch": 87.73, + "learning_rate": 1.2363636363636365e-05, + "loss": 0.0, + "step": 15440 + }, + { + "epoch": 87.73, + "eval_accuracy": 1.0, + "eval_loss": 2.3090026388672413e-06, + "eval_runtime": 124.6335, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 15440 + }, + { + "epoch": 87.78, + "learning_rate": 1.2306818181818182e-05, + "loss": 0.0, + "step": 15450 + }, + { + "epoch": 87.78, + "eval_accuracy": 1.0, + "eval_loss": 2.310695890628267e-06, + "eval_runtime": 124.7072, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 15450 + }, + { + "epoch": 87.84, + "learning_rate": 1.225e-05, + "loss": 0.0, + "step": 15460 + }, + { + "epoch": 87.84, + "eval_accuracy": 1.0, + "eval_loss": 2.304261442986899e-06, + "eval_runtime": 125.0047, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 15460 + }, + { + "epoch": 87.9, + "learning_rate": 1.219318181818182e-05, + "loss": 0.0, + "step": 15470 + }, + { + "epoch": 87.9, + "eval_accuracy": 1.0, + "eval_loss": 2.298504114151001e-06, + "eval_runtime": 125.2431, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 15470 + }, + { + "epoch": 87.95, + "learning_rate": 1.2136363636363637e-05, + "loss": 0.0, + "step": 15480 + }, + { + "epoch": 87.95, + "eval_accuracy": 1.0, + "eval_loss": 2.2917308797332225e-06, + "eval_runtime": 125.8264, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 15480 + }, + { + "epoch": 88.01, + "learning_rate": 1.2079545454545454e-05, + "loss": 0.0, + "step": 15490 + }, + { + "epoch": 88.01, + "eval_accuracy": 1.0, + "eval_loss": 2.2859735508973245e-06, + "eval_runtime": 125.6997, + "eval_samples_per_second": 2.8, + "eval_steps_per_second": 0.7, + "step": 15490 + }, + { + "epoch": 88.07, + "learning_rate": 1.2022727272727273e-05, + "loss": 0.0, + "step": 15500 + }, + { + "epoch": 88.07, + "eval_accuracy": 1.0, + "eval_loss": 2.2785229703004006e-06, + "eval_runtime": 124.7482, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 15500 + }, + { + "epoch": 88.12, + "learning_rate": 1.1965909090909091e-05, + "loss": 0.0, + "step": 15510 + }, + { + "epoch": 88.12, + "eval_accuracy": 1.0, + "eval_loss": 2.2690403511660406e-06, + "eval_runtime": 125.1998, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 15510 + }, + { + "epoch": 88.18, + "learning_rate": 1.190909090909091e-05, + "loss": 0.0, + "step": 15520 + }, + { + "epoch": 88.18, + "eval_accuracy": 1.0, + "eval_loss": 2.261251211166382e-06, + "eval_runtime": 124.9199, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 15520 + }, + { + "epoch": 88.24, + "learning_rate": 1.185227272727273e-05, + "loss": 0.0, + "step": 15530 + }, + { + "epoch": 88.24, + "eval_accuracy": 1.0, + "eval_loss": 2.2554938823304838e-06, + "eval_runtime": 125.0763, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 15530 + }, + { + "epoch": 88.3, + "learning_rate": 1.1795454545454546e-05, + "loss": 0.0, + "step": 15540 + }, + { + "epoch": 88.3, + "eval_accuracy": 1.0, + "eval_loss": 2.2429633190768072e-06, + "eval_runtime": 125.1122, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 15540 + }, + { + "epoch": 88.35, + "learning_rate": 1.1738636363636364e-05, + "loss": 0.0, + "step": 15550 + }, + { + "epoch": 88.35, + "eval_accuracy": 1.0, + "eval_loss": 2.230432983196806e-06, + "eval_runtime": 125.1385, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 15550 + }, + { + "epoch": 88.41, + "learning_rate": 1.1681818181818183e-05, + "loss": 0.0, + "step": 15560 + }, + { + "epoch": 88.41, + "eval_accuracy": 1.0, + "eval_loss": 2.2195956717041554e-06, + "eval_runtime": 124.6898, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 15560 + }, + { + "epoch": 88.47, + "learning_rate": 1.1625000000000001e-05, + "loss": 0.0, + "step": 15570 + }, + { + "epoch": 88.47, + "eval_accuracy": 1.0, + "eval_loss": 2.2134997834655223e-06, + "eval_runtime": 125.1522, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 15570 + }, + { + "epoch": 88.52, + "learning_rate": 1.1568181818181818e-05, + "loss": 0.0, + "step": 15580 + }, + { + "epoch": 88.52, + "eval_accuracy": 1.0, + "eval_loss": 2.205710416092188e-06, + "eval_runtime": 125.2579, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 15580 + }, + { + "epoch": 88.58, + "learning_rate": 1.1511363636363637e-05, + "loss": 0.0, + "step": 15590 + }, + { + "epoch": 88.58, + "eval_accuracy": 1.0, + "eval_loss": 2.200630660809111e-06, + "eval_runtime": 125.3451, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 15590 + }, + { + "epoch": 88.64, + "learning_rate": 1.1454545454545455e-05, + "loss": 0.0, + "step": 15600 + }, + { + "epoch": 88.64, + "eval_accuracy": 1.0, + "eval_loss": 2.1921639472566312e-06, + "eval_runtime": 125.2593, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 15600 + }, + { + "epoch": 88.69, + "learning_rate": 1.1397727272727273e-05, + "loss": 0.0, + "step": 15610 + }, + { + "epoch": 88.69, + "eval_accuracy": 1.0, + "eval_loss": 2.1881000975554343e-06, + "eval_runtime": 125.2043, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 15610 + }, + { + "epoch": 88.75, + "learning_rate": 1.1340909090909092e-05, + "loss": 0.0, + "step": 15620 + }, + { + "epoch": 88.75, + "eval_accuracy": 1.0, + "eval_loss": 2.1826815554959467e-06, + "eval_runtime": 124.9574, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 15620 + }, + { + "epoch": 88.81, + "learning_rate": 1.128409090909091e-05, + "loss": 0.0, + "step": 15630 + }, + { + "epoch": 88.81, + "eval_accuracy": 1.0, + "eval_loss": 2.1782789190183394e-06, + "eval_runtime": 125.0728, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 15630 + }, + { + "epoch": 88.86, + "learning_rate": 1.1227272727272728e-05, + "loss": 0.0, + "step": 15640 + }, + { + "epoch": 88.86, + "eval_accuracy": 1.0, + "eval_loss": 2.1735374957643216e-06, + "eval_runtime": 125.0335, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 15640 + }, + { + "epoch": 88.92, + "learning_rate": 1.1170454545454546e-05, + "loss": 0.0, + "step": 15650 + }, + { + "epoch": 88.92, + "eval_accuracy": 1.0, + "eval_loss": 2.1674416075256886e-06, + "eval_runtime": 125.0613, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 15650 + }, + { + "epoch": 88.98, + "learning_rate": 1.1113636363636364e-05, + "loss": 0.0, + "step": 15660 + }, + { + "epoch": 88.98, + "eval_accuracy": 1.0, + "eval_loss": 2.1589751213468844e-06, + "eval_runtime": 125.2351, + "eval_samples_per_second": 2.811, + "eval_steps_per_second": 0.703, + "step": 15660 + }, + { + "epoch": 89.03, + "learning_rate": 1.1056818181818182e-05, + "loss": 0.0, + "step": 15670 + }, + { + "epoch": 89.03, + "eval_accuracy": 1.0, + "eval_loss": 2.142719267794746e-06, + "eval_runtime": 124.9193, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 15670 + }, + { + "epoch": 89.09, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0, + "step": 15680 + }, + { + "epoch": 89.09, + "eval_accuracy": 1.0, + "eval_loss": 2.1179969280638034e-06, + "eval_runtime": 124.729, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 15680 + }, + { + "epoch": 89.15, + "learning_rate": 1.094318181818182e-05, + "loss": 0.0, + "step": 15690 + }, + { + "epoch": 89.15, + "eval_accuracy": 1.0, + "eval_loss": 2.1034343262726907e-06, + "eval_runtime": 125.416, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 0.702, + "step": 15690 + }, + { + "epoch": 89.2, + "learning_rate": 1.0886363636363636e-05, + "loss": 0.0, + "step": 15700 + }, + { + "epoch": 89.2, + "eval_accuracy": 1.0, + "eval_loss": 2.0932745883328607e-06, + "eval_runtime": 124.8044, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 15700 + }, + { + "epoch": 89.26, + "learning_rate": 1.0829545454545456e-05, + "loss": 0.0, + "step": 15710 + }, + { + "epoch": 89.26, + "eval_accuracy": 1.0, + "eval_loss": 2.0868399133178173e-06, + "eval_runtime": 124.8059, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 15710 + }, + { + "epoch": 89.32, + "learning_rate": 1.0772727272727274e-05, + "loss": 0.0, + "step": 15720 + }, + { + "epoch": 89.32, + "eval_accuracy": 1.0, + "eval_loss": 2.0797278921236284e-06, + "eval_runtime": 125.1852, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 15720 + }, + { + "epoch": 89.38, + "learning_rate": 1.0715909090909092e-05, + "loss": 0.0, + "step": 15730 + }, + { + "epoch": 89.38, + "eval_accuracy": 1.0, + "eval_loss": 2.0709228465420892e-06, + "eval_runtime": 124.8689, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 15730 + }, + { + "epoch": 89.43, + "learning_rate": 1.065909090909091e-05, + "loss": 0.0, + "step": 15740 + }, + { + "epoch": 89.43, + "eval_accuracy": 1.0, + "eval_loss": 2.051280262094224e-06, + "eval_runtime": 124.6976, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 15740 + }, + { + "epoch": 89.49, + "learning_rate": 1.0602272727272728e-05, + "loss": 0.0, + "step": 15750 + }, + { + "epoch": 89.49, + "eval_accuracy": 1.0, + "eval_loss": 2.0265579223632812e-06, + "eval_runtime": 124.746, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 15750 + }, + { + "epoch": 89.55, + "learning_rate": 1.0545454545454546e-05, + "loss": 0.0, + "step": 15760 + }, + { + "epoch": 89.55, + "eval_accuracy": 1.0, + "eval_loss": 2.0170753032289213e-06, + "eval_runtime": 124.7068, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 15760 + }, + { + "epoch": 89.6, + "learning_rate": 1.0488636363636364e-05, + "loss": 0.0, + "step": 15770 + }, + { + "epoch": 89.6, + "eval_accuracy": 1.0, + "eval_loss": 2.009963509408408e-06, + "eval_runtime": 124.7518, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 15770 + }, + { + "epoch": 89.66, + "learning_rate": 1.0431818181818183e-05, + "loss": 0.0, + "step": 15780 + }, + { + "epoch": 89.66, + "eval_accuracy": 1.0, + "eval_loss": 2.0035288343933644e-06, + "eval_runtime": 125.3359, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 15780 + }, + { + "epoch": 89.72, + "learning_rate": 1.0375e-05, + "loss": 0.0, + "step": 15790 + }, + { + "epoch": 89.72, + "eval_accuracy": 1.0, + "eval_loss": 1.9984488517366117e-06, + "eval_runtime": 124.8878, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 15790 + }, + { + "epoch": 89.77, + "learning_rate": 1.0318181818181818e-05, + "loss": 0.0, + "step": 15800 + }, + { + "epoch": 89.77, + "eval_accuracy": 1.0, + "eval_loss": 1.992691750274389e-06, + "eval_runtime": 124.5747, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.706, + "step": 15800 + }, + { + "epoch": 89.83, + "learning_rate": 1.0261363636363638e-05, + "loss": 0.0, + "step": 15810 + }, + { + "epoch": 89.83, + "eval_accuracy": 1.0, + "eval_loss": 1.9842250367219094e-06, + "eval_runtime": 124.4575, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 15810 + }, + { + "epoch": 89.89, + "learning_rate": 1.0204545454545456e-05, + "loss": 0.0, + "step": 15820 + }, + { + "epoch": 89.89, + "eval_accuracy": 1.0, + "eval_loss": 1.9777905890805414e-06, + "eval_runtime": 124.9208, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 15820 + }, + { + "epoch": 89.94, + "learning_rate": 1.0147727272727272e-05, + "loss": 0.0, + "step": 15830 + }, + { + "epoch": 89.94, + "eval_accuracy": 1.0, + "eval_loss": 1.971694473468233e-06, + "eval_runtime": 125.4997, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 15830 + }, + { + "epoch": 90.0, + "learning_rate": 1.0090909090909092e-05, + "loss": 0.0, + "step": 15840 + }, + { + "epoch": 90.0, + "eval_accuracy": 1.0, + "eval_loss": 1.9625506411102833e-06, + "eval_runtime": 124.6118, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 15840 + }, + { + "epoch": 90.06, + "learning_rate": 1.003409090909091e-05, + "loss": 0.0, + "step": 15850 + }, + { + "epoch": 90.06, + "eval_accuracy": 1.0, + "eval_loss": 1.952390675796778e-06, + "eval_runtime": 124.7905, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 15850 + }, + { + "epoch": 90.11, + "learning_rate": 9.977272727272728e-06, + "loss": 0.0, + "step": 15860 + }, + { + "epoch": 90.11, + "eval_accuracy": 1.0, + "eval_loss": 1.944940095199854e-06, + "eval_runtime": 124.7938, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 15860 + }, + { + "epoch": 90.17, + "learning_rate": 9.920454545454546e-06, + "loss": 0.0, + "step": 15870 + }, + { + "epoch": 90.17, + "eval_accuracy": 1.0, + "eval_loss": 1.936135049618315e-06, + "eval_runtime": 125.6242, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.701, + "step": 15870 + }, + { + "epoch": 90.23, + "learning_rate": 9.863636363636364e-06, + "loss": 0.0, + "step": 15880 + }, + { + "epoch": 90.23, + "eval_accuracy": 1.0, + "eval_loss": 1.930716280185152e-06, + "eval_runtime": 125.1713, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 15880 + }, + { + "epoch": 90.28, + "learning_rate": 9.806818181818182e-06, + "loss": 0.0, + "step": 15890 + }, + { + "epoch": 90.28, + "eval_accuracy": 1.0, + "eval_loss": 1.924620391946519e-06, + "eval_runtime": 125.4359, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 15890 + }, + { + "epoch": 90.34, + "learning_rate": 9.750000000000002e-06, + "loss": 0.0, + "step": 15900 + }, + { + "epoch": 90.34, + "eval_accuracy": 1.0, + "eval_loss": 1.920895101648057e-06, + "eval_runtime": 124.6934, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 15900 + }, + { + "epoch": 90.4, + "learning_rate": 9.69318181818182e-06, + "loss": 0.0, + "step": 15910 + }, + { + "epoch": 90.4, + "eval_accuracy": 1.0, + "eval_loss": 1.9175085981260054e-06, + "eval_runtime": 125.029, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 15910 + }, + { + "epoch": 90.45, + "learning_rate": 9.636363636363636e-06, + "loss": 0.0, + "step": 15920 + }, + { + "epoch": 90.45, + "eval_accuracy": 1.0, + "eval_loss": 1.9154765595885692e-06, + "eval_runtime": 124.8252, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 15920 + }, + { + "epoch": 90.51, + "learning_rate": 9.579545454545456e-06, + "loss": 0.0, + "step": 15930 + }, + { + "epoch": 90.51, + "eval_accuracy": 1.0, + "eval_loss": 1.9117512692901073e-06, + "eval_runtime": 125.3259, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 15930 + }, + { + "epoch": 90.57, + "learning_rate": 9.522727272727274e-06, + "loss": 0.0, + "step": 15940 + }, + { + "epoch": 90.57, + "eval_accuracy": 1.0, + "eval_loss": 1.9059939404542092e-06, + "eval_runtime": 125.2785, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 15940 + }, + { + "epoch": 90.62, + "learning_rate": 9.465909090909092e-06, + "loss": 0.0, + "step": 15950 + }, + { + "epoch": 90.62, + "eval_accuracy": 1.0, + "eval_loss": 1.9015914176634396e-06, + "eval_runtime": 124.5553, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.707, + "step": 15950 + }, + { + "epoch": 90.68, + "learning_rate": 9.40909090909091e-06, + "loss": 0.0, + "step": 15960 + }, + { + "epoch": 90.68, + "eval_accuracy": 1.0, + "eval_loss": 1.8951567426483962e-06, + "eval_runtime": 124.4632, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 15960 + }, + { + "epoch": 90.74, + "learning_rate": 9.352272727272728e-06, + "loss": 0.0, + "step": 15970 + }, + { + "epoch": 90.74, + "eval_accuracy": 1.0, + "eval_loss": 1.8887221813201904e-06, + "eval_runtime": 124.4594, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 15970 + }, + { + "epoch": 90.8, + "learning_rate": 9.295454545454546e-06, + "loss": 0.0, + "step": 15980 + }, + { + "epoch": 90.8, + "eval_accuracy": 1.0, + "eval_loss": 1.8822876199919847e-06, + "eval_runtime": 124.3179, + "eval_samples_per_second": 2.831, + "eval_steps_per_second": 0.708, + "step": 15980 + }, + { + "epoch": 90.85, + "learning_rate": 9.238636363636364e-06, + "loss": 0.0, + "step": 15990 + }, + { + "epoch": 90.85, + "eval_accuracy": 1.0, + "eval_loss": 1.8755143855742062e-06, + "eval_runtime": 125.1432, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 15990 + }, + { + "epoch": 90.91, + "learning_rate": 9.181818181818182e-06, + "loss": 0.0, + "step": 16000 + }, + { + "epoch": 90.91, + "eval_accuracy": 1.0, + "eval_loss": 1.8667091126189916e-06, + "eval_runtime": 124.9345, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 16000 + }, + { + "epoch": 90.97, + "learning_rate": 9.125e-06, + "loss": 0.0, + "step": 16010 + }, + { + "epoch": 90.97, + "eval_accuracy": 1.0, + "eval_loss": 1.8595972051116405e-06, + "eval_runtime": 124.7242, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 16010 + }, + { + "epoch": 91.02, + "learning_rate": 9.068181818181818e-06, + "loss": 0.0, + "step": 16020 + }, + { + "epoch": 91.02, + "eval_accuracy": 1.0, + "eval_loss": 1.8551945686340332e-06, + "eval_runtime": 124.612, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 16020 + }, + { + "epoch": 91.08, + "learning_rate": 9.011363636363638e-06, + "loss": 0.0, + "step": 16030 + }, + { + "epoch": 91.08, + "eval_accuracy": 1.0, + "eval_loss": 1.8521466245147167e-06, + "eval_runtime": 123.563, + "eval_samples_per_second": 2.849, + "eval_steps_per_second": 0.712, + "step": 16030 + }, + { + "epoch": 91.14, + "learning_rate": 8.954545454545454e-06, + "loss": 0.0, + "step": 16040 + }, + { + "epoch": 91.14, + "eval_accuracy": 1.0, + "eval_loss": 1.8474053149475367e-06, + "eval_runtime": 124.9749, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 16040 + }, + { + "epoch": 91.19, + "learning_rate": 8.897727272727272e-06, + "loss": 0.0, + "step": 16050 + }, + { + "epoch": 91.19, + "eval_accuracy": 1.0, + "eval_loss": 1.8406320805297582e-06, + "eval_runtime": 124.7905, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 16050 + }, + { + "epoch": 91.25, + "learning_rate": 8.840909090909092e-06, + "loss": 0.0, + "step": 16060 + }, + { + "epoch": 91.25, + "eval_accuracy": 1.0, + "eval_loss": 1.8338588461119798e-06, + "eval_runtime": 124.55, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.707, + "step": 16060 + }, + { + "epoch": 91.31, + "learning_rate": 8.78409090909091e-06, + "loss": 0.0, + "step": 16070 + }, + { + "epoch": 91.31, + "eval_accuracy": 1.0, + "eval_loss": 1.8287788634552271e-06, + "eval_runtime": 125.1451, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 16070 + }, + { + "epoch": 91.36, + "learning_rate": 8.727272727272728e-06, + "loss": 0.0, + "step": 16080 + }, + { + "epoch": 91.36, + "eval_accuracy": 1.0, + "eval_loss": 1.8243762269776198e-06, + "eval_runtime": 124.5874, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 16080 + }, + { + "epoch": 91.42, + "learning_rate": 8.670454545454546e-06, + "loss": 0.0, + "step": 16090 + }, + { + "epoch": 91.42, + "eval_accuracy": 1.0, + "eval_loss": 1.8199735905000125e-06, + "eval_runtime": 124.3801, + "eval_samples_per_second": 2.83, + "eval_steps_per_second": 0.708, + "step": 16090 + }, + { + "epoch": 91.48, + "learning_rate": 8.613636363636364e-06, + "loss": 0.0, + "step": 16100 + }, + { + "epoch": 91.48, + "eval_accuracy": 1.0, + "eval_loss": 1.8128616829926614e-06, + "eval_runtime": 124.5612, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.706, + "step": 16100 + }, + { + "epoch": 91.53, + "learning_rate": 8.556818181818182e-06, + "loss": 0.0, + "step": 16110 + }, + { + "epoch": 91.53, + "eval_accuracy": 1.0, + "eval_loss": 1.8084591602018918e-06, + "eval_runtime": 124.569, + "eval_samples_per_second": 2.826, + "eval_steps_per_second": 0.706, + "step": 16110 + }, + { + "epoch": 91.59, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0, + "step": 16120 + }, + { + "epoch": 91.59, + "eval_accuracy": 1.0, + "eval_loss": 1.8057497754853102e-06, + "eval_runtime": 124.6419, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 16120 + }, + { + "epoch": 91.65, + "learning_rate": 8.443181818181818e-06, + "loss": 0.0, + "step": 16130 + }, + { + "epoch": 91.65, + "eval_accuracy": 1.0, + "eval_loss": 1.8037178506347118e-06, + "eval_runtime": 124.9266, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 16130 + }, + { + "epoch": 91.7, + "learning_rate": 8.386363636363636e-06, + "loss": 0.0, + "step": 16140 + }, + { + "epoch": 91.7, + "eval_accuracy": 1.0, + "eval_loss": 1.8003312334258226e-06, + "eval_runtime": 125.1785, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 16140 + }, + { + "epoch": 91.76, + "learning_rate": 8.329545454545456e-06, + "loss": 0.0, + "step": 16150 + }, + { + "epoch": 91.76, + "eval_accuracy": 1.0, + "eval_loss": 1.7955899238586426e-06, + "eval_runtime": 124.3071, + "eval_samples_per_second": 2.832, + "eval_steps_per_second": 0.708, + "step": 16150 + }, + { + "epoch": 91.82, + "learning_rate": 8.272727272727274e-06, + "loss": 0.0, + "step": 16160 + }, + { + "epoch": 91.82, + "eval_accuracy": 1.0, + "eval_loss": 1.791525960470608e-06, + "eval_runtime": 124.3313, + "eval_samples_per_second": 2.831, + "eval_steps_per_second": 0.708, + "step": 16160 + }, + { + "epoch": 91.88, + "learning_rate": 8.21590909090909e-06, + "loss": 0.0, + "step": 16170 + }, + { + "epoch": 91.88, + "eval_accuracy": 1.0, + "eval_loss": 1.786784650903428e-06, + "eval_runtime": 124.5949, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 16170 + }, + { + "epoch": 91.93, + "learning_rate": 8.15909090909091e-06, + "loss": 0.0, + "step": 16180 + }, + { + "epoch": 91.93, + "eval_accuracy": 1.0, + "eval_loss": 1.7813661088439403e-06, + "eval_runtime": 124.6624, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 16180 + }, + { + "epoch": 91.99, + "learning_rate": 8.102272727272728e-06, + "loss": 0.0, + "step": 16190 + }, + { + "epoch": 91.99, + "eval_accuracy": 1.0, + "eval_loss": 1.7786568378141965e-06, + "eval_runtime": 125.1833, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 16190 + }, + { + "epoch": 92.05, + "learning_rate": 8.045454545454546e-06, + "loss": 0.0, + "step": 16200 + }, + { + "epoch": 92.05, + "eval_accuracy": 1.0, + "eval_loss": 1.7725608358887257e-06, + "eval_runtime": 124.6293, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 16200 + }, + { + "epoch": 92.1, + "learning_rate": 7.988636363636364e-06, + "loss": 0.0, + "step": 16210 + }, + { + "epoch": 92.1, + "eval_accuracy": 1.0, + "eval_loss": 1.7668036207396653e-06, + "eval_runtime": 125.3156, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 16210 + }, + { + "epoch": 92.16, + "learning_rate": 7.931818181818182e-06, + "loss": 0.0, + "step": 16220 + }, + { + "epoch": 92.16, + "eval_accuracy": 1.0, + "eval_loss": 1.7610462919037673e-06, + "eval_runtime": 125.1332, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 16220 + }, + { + "epoch": 92.22, + "learning_rate": 7.875e-06, + "loss": 0.0, + "step": 16230 + }, + { + "epoch": 92.22, + "eval_accuracy": 1.0, + "eval_loss": 1.75664365542616e-06, + "eval_runtime": 124.9029, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 16230 + }, + { + "epoch": 92.27, + "learning_rate": 7.81818181818182e-06, + "loss": 0.0, + "step": 16240 + }, + { + "epoch": 92.27, + "eval_accuracy": 1.0, + "eval_loss": 1.752918365127698e-06, + "eval_runtime": 124.4367, + "eval_samples_per_second": 2.829, + "eval_steps_per_second": 0.707, + "step": 16240 + }, + { + "epoch": 92.33, + "learning_rate": 7.761363636363636e-06, + "loss": 0.0, + "step": 16250 + }, + { + "epoch": 92.33, + "eval_accuracy": 1.0, + "eval_loss": 1.7491930748292361e-06, + "eval_runtime": 124.6139, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 16250 + }, + { + "epoch": 92.39, + "learning_rate": 7.704545454545454e-06, + "loss": 0.0, + "step": 16260 + }, + { + "epoch": 92.39, + "eval_accuracy": 1.0, + "eval_loss": 1.7461451307099196e-06, + "eval_runtime": 125.0187, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 16260 + }, + { + "epoch": 92.44, + "learning_rate": 7.647727272727274e-06, + "loss": 0.0, + "step": 16270 + }, + { + "epoch": 92.44, + "eval_accuracy": 1.0, + "eval_loss": 1.7427585135010304e-06, + "eval_runtime": 124.8789, + "eval_samples_per_second": 2.819, + "eval_steps_per_second": 0.705, + "step": 16270 + }, + { + "epoch": 92.5, + "learning_rate": 7.590909090909092e-06, + "loss": 0.0, + "step": 16280 + }, + { + "epoch": 92.5, + "eval_accuracy": 1.0, + "eval_loss": 1.7390332232025685e-06, + "eval_runtime": 124.6171, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 16280 + }, + { + "epoch": 92.56, + "learning_rate": 7.53409090909091e-06, + "loss": 0.0, + "step": 16290 + }, + { + "epoch": 92.56, + "eval_accuracy": 1.0, + "eval_loss": 1.7376786445311154e-06, + "eval_runtime": 125.0342, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 16290 + }, + { + "epoch": 92.61, + "learning_rate": 7.477272727272727e-06, + "loss": 0.0, + "step": 16300 + }, + { + "epoch": 92.61, + "eval_accuracy": 1.0, + "eval_loss": 1.7353079329041066e-06, + "eval_runtime": 125.1094, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 16300 + }, + { + "epoch": 92.67, + "learning_rate": 7.420454545454546e-06, + "loss": 0.0, + "step": 16310 + }, + { + "epoch": 92.67, + "eval_accuracy": 1.0, + "eval_loss": 1.7302280639341916e-06, + "eval_runtime": 124.6426, + "eval_samples_per_second": 2.824, + "eval_steps_per_second": 0.706, + "step": 16310 + }, + { + "epoch": 92.73, + "learning_rate": 7.363636363636364e-06, + "loss": 0.0, + "step": 16320 + }, + { + "epoch": 92.73, + "eval_accuracy": 1.0, + "eval_loss": 1.7268413330384647e-06, + "eval_runtime": 124.4799, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 0.707, + "step": 16320 + }, + { + "epoch": 92.78, + "learning_rate": 7.306818181818183e-06, + "loss": 0.0, + "step": 16330 + }, + { + "epoch": 92.78, + "eval_accuracy": 1.0, + "eval_loss": 1.7248094081878662e-06, + "eval_runtime": 124.7413, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.705, + "step": 16330 + }, + { + "epoch": 92.84, + "learning_rate": 7.25e-06, + "loss": 0.0, + "step": 16340 + }, + { + "epoch": 92.84, + "eval_accuracy": 1.0, + "eval_loss": 1.7231160427400027e-06, + "eval_runtime": 124.7041, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 16340 + }, + { + "epoch": 92.9, + "learning_rate": 7.193181818181818e-06, + "loss": 0.0, + "step": 16350 + }, + { + "epoch": 92.9, + "eval_accuracy": 1.0, + "eval_loss": 1.7187135199492332e-06, + "eval_runtime": 124.5799, + "eval_samples_per_second": 2.825, + "eval_steps_per_second": 0.706, + "step": 16350 + }, + { + "epoch": 92.95, + "learning_rate": 7.136363636363637e-06, + "loss": 0.0, + "step": 16360 + }, + { + "epoch": 92.95, + "eval_accuracy": 1.0, + "eval_loss": 1.7149882296507712e-06, + "eval_runtime": 124.8277, + "eval_samples_per_second": 2.82, + "eval_steps_per_second": 0.705, + "step": 16360 + }, + { + "epoch": 93.01, + "learning_rate": 7.079545454545455e-06, + "loss": 0.0, + "step": 16370 + }, + { + "epoch": 93.01, + "eval_accuracy": 1.0, + "eval_loss": 1.7099082469940186e-06, + "eval_runtime": 124.7946, + "eval_samples_per_second": 2.821, + "eval_steps_per_second": 0.705, + "step": 16370 + }, + { + "epoch": 93.07, + "learning_rate": 7.022727272727272e-06, + "loss": 0.0, + "step": 16380 + }, + { + "epoch": 93.07, + "eval_accuracy": 1.0, + "eval_loss": 1.7061829566955566e-06, + "eval_runtime": 125.138, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.703, + "step": 16380 + }, + { + "epoch": 93.12, + "learning_rate": 6.965909090909091e-06, + "loss": 0.0, + "step": 16390 + }, + { + "epoch": 93.12, + "eval_accuracy": 1.0, + "eval_loss": 1.7017803202179493e-06, + "eval_runtime": 124.9324, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.704, + "step": 16390 + }, + { + "epoch": 93.18, + "learning_rate": 6.909090909090909e-06, + "loss": 0.0, + "step": 16400 + }, + { + "epoch": 93.18, + "eval_accuracy": 1.0, + "eval_loss": 1.696023105068889e-06, + "eval_runtime": 126.0846, + "eval_samples_per_second": 2.792, + "eval_steps_per_second": 0.698, + "step": 16400 + }, + { + "epoch": 93.24, + "learning_rate": 6.852272727272728e-06, + "loss": 0.0, + "step": 16410 + }, + { + "epoch": 93.24, + "eval_accuracy": 1.0, + "eval_loss": 1.6933137203523074e-06, + "eval_runtime": 125.8224, + "eval_samples_per_second": 2.798, + "eval_steps_per_second": 0.699, + "step": 16410 + }, + { + "epoch": 93.3, + "learning_rate": 6.795454545454545e-06, + "loss": 0.0, + "step": 16420 + }, + { + "epoch": 93.3, + "eval_accuracy": 1.0, + "eval_loss": 1.6892498706511105e-06, + "eval_runtime": 125.3441, + "eval_samples_per_second": 2.808, + "eval_steps_per_second": 0.702, + "step": 16420 + }, + { + "epoch": 93.35, + "learning_rate": 6.738636363636364e-06, + "loss": 0.0, + "step": 16430 + }, + { + "epoch": 93.35, + "eval_accuracy": 1.0, + "eval_loss": 1.6858631397553836e-06, + "eval_runtime": 125.0234, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 16430 + }, + { + "epoch": 93.41, + "learning_rate": 6.681818181818182e-06, + "loss": 0.0, + "step": 16440 + }, + { + "epoch": 93.41, + "eval_accuracy": 1.0, + "eval_loss": 1.680444597695896e-06, + "eval_runtime": 125.0985, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.703, + "step": 16440 + }, + { + "epoch": 93.47, + "learning_rate": 6.625000000000001e-06, + "loss": 0.0, + "step": 16450 + }, + { + "epoch": 93.47, + "eval_accuracy": 1.0, + "eval_loss": 1.6726553440093994e-06, + "eval_runtime": 124.9086, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 16450 + }, + { + "epoch": 93.52, + "learning_rate": 6.568181818181819e-06, + "loss": 0.0, + "step": 16460 + }, + { + "epoch": 93.52, + "eval_accuracy": 1.0, + "eval_loss": 1.666898128860339e-06, + "eval_runtime": 124.9045, + "eval_samples_per_second": 2.818, + "eval_steps_per_second": 0.705, + "step": 16460 + }, + { + "epoch": 93.58, + "learning_rate": 6.511363636363636e-06, + "loss": 0.0, + "step": 16470 + }, + { + "epoch": 93.58, + "eval_accuracy": 1.0, + "eval_loss": 1.662156819293159e-06, + "eval_runtime": 125.0814, + "eval_samples_per_second": 2.814, + "eval_steps_per_second": 0.704, + "step": 16470 + }, + { + "epoch": 93.64, + "learning_rate": 6.454545454545455e-06, + "loss": 0.0, + "step": 16480 + }, + { + "epoch": 93.64, + "eval_accuracy": 1.0, + "eval_loss": 1.657415509725979e-06, + "eval_runtime": 124.9704, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 16480 + }, + { + "epoch": 93.69, + "learning_rate": 6.397727272727273e-06, + "loss": 0.0, + "step": 16490 + }, + { + "epoch": 93.69, + "eval_accuracy": 1.0, + "eval_loss": 1.6553835848753806e-06, + "eval_runtime": 125.0611, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 16490 + }, + { + "epoch": 93.75, + "learning_rate": 6.340909090909092e-06, + "loss": 0.0, + "step": 16500 + }, + { + "epoch": 93.75, + "eval_accuracy": 1.0, + "eval_loss": 1.650303602218628e-06, + "eval_runtime": 125.0589, + "eval_samples_per_second": 2.815, + "eval_steps_per_second": 0.704, + "step": 16500 + }, + { + "epoch": 93.81, + "learning_rate": 6.284090909090909e-06, + "loss": 0.0, + "step": 16510 + }, + { + "epoch": 93.81, + "eval_accuracy": 1.0, + "eval_loss": 1.6442077139799949e-06, + "eval_runtime": 125.1607, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 16510 + }, + { + "epoch": 93.86, + "learning_rate": 6.227272727272727e-06, + "loss": 0.0, + "step": 16520 + }, + { + "epoch": 93.86, + "eval_accuracy": 1.0, + "eval_loss": 1.6391277313232422e-06, + "eval_runtime": 124.7058, + "eval_samples_per_second": 2.823, + "eval_steps_per_second": 0.706, + "step": 16520 + }, + { + "epoch": 93.92, + "learning_rate": 6.170454545454546e-06, + "loss": 0.0, + "step": 16530 + }, + { + "epoch": 93.92, + "eval_accuracy": 1.0, + "eval_loss": 1.6354024410247803e-06, + "eval_runtime": 124.9599, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 16530 + }, + { + "epoch": 93.98, + "learning_rate": 6.113636363636364e-06, + "loss": 0.0, + "step": 16540 + }, + { + "epoch": 93.98, + "eval_accuracy": 1.0, + "eval_loss": 1.632015823815891e-06, + "eval_runtime": 125.5039, + "eval_samples_per_second": 2.805, + "eval_steps_per_second": 0.701, + "step": 16540 + }, + { + "epoch": 94.03, + "learning_rate": 6.056818181818182e-06, + "loss": 0.0, + "step": 16550 + }, + { + "epoch": 94.03, + "eval_accuracy": 1.0, + "eval_loss": 1.6282905335174291e-06, + "eval_runtime": 125.2538, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.703, + "step": 16550 + }, + { + "epoch": 94.09, + "learning_rate": 6e-06, + "loss": 0.0, + "step": 16560 + }, + { + "epoch": 94.09, + "eval_accuracy": 1.0, + "eval_loss": 1.6232105508606764e-06, + "eval_runtime": 125.3285, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 0.702, + "step": 16560 + }, + { + "epoch": 94.15, + "learning_rate": 5.943181818181818e-06, + "loss": 0.0, + "step": 16570 + }, + { + "epoch": 94.15, + "eval_accuracy": 1.0, + "eval_loss": 1.6208399529205053e-06, + "eval_runtime": 124.9652, + "eval_samples_per_second": 2.817, + "eval_steps_per_second": 0.704, + "step": 16570 + }, + { + "epoch": 94.2, + "learning_rate": 5.886363636363636e-06, + "loss": 0.0, + "step": 16580 + }, + { + "epoch": 94.2, + "eval_accuracy": 1.0, + "eval_loss": 1.6191465874726418e-06, + "eval_runtime": 124.9832, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 16580 + }, + { + "epoch": 94.26, + "learning_rate": 5.829545454545455e-06, + "loss": 0.0, + "step": 16590 + }, + { + "epoch": 94.26, + "eval_accuracy": 1.0, + "eval_loss": 1.6150826240846072e-06, + "eval_runtime": 125.2831, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 16590 + }, + { + "epoch": 94.32, + "learning_rate": 5.772727272727272e-06, + "loss": 0.0, + "step": 16600 + }, + { + "epoch": 94.32, + "eval_accuracy": 1.0, + "eval_loss": 1.610341428204265e-06, + "eval_runtime": 125.4449, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 16600 + }, + { + "epoch": 94.38, + "learning_rate": 5.715909090909091e-06, + "loss": 0.0, + "step": 16610 + }, + { + "epoch": 94.38, + "eval_accuracy": 1.0, + "eval_loss": 1.6076320434876834e-06, + "eval_runtime": 124.999, + "eval_samples_per_second": 2.816, + "eval_steps_per_second": 0.704, + "step": 16610 + }, + { + "epoch": 94.43, + "learning_rate": 5.659090909090909e-06, + "loss": 0.0, + "step": 16620 + }, + { + "epoch": 94.43, + "eval_accuracy": 1.0, + "eval_loss": 1.606616137905803e-06, + "eval_runtime": 128.335, + "eval_samples_per_second": 2.743, + "eval_steps_per_second": 0.686, + "step": 16620 + }, + { + "epoch": 94.49, + "learning_rate": 5.602272727272727e-06, + "loss": 0.0, + "step": 16630 + }, + { + "epoch": 94.49, + "eval_accuracy": 1.0, + "eval_loss": 1.6049227724579396e-06, + "eval_runtime": 128.5341, + "eval_samples_per_second": 2.739, + "eval_steps_per_second": 0.685, + "step": 16630 + }, + { + "epoch": 94.55, + "learning_rate": 5.545454545454546e-06, + "loss": 0.0, + "step": 16640 + }, + { + "epoch": 94.55, + "eval_accuracy": 1.0, + "eval_loss": 1.601874828338623e-06, + "eval_runtime": 128.7037, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 16640 + }, + { + "epoch": 94.6, + "learning_rate": 5.488636363636364e-06, + "loss": 0.0, + "step": 16650 + }, + { + "epoch": 94.6, + "eval_accuracy": 1.0, + "eval_loss": 1.599504230398452e-06, + "eval_runtime": 129.0269, + "eval_samples_per_second": 2.728, + "eval_steps_per_second": 0.682, + "step": 16650 + }, + { + "epoch": 94.66, + "learning_rate": 5.431818181818182e-06, + "loss": 0.0, + "step": 16660 + }, + { + "epoch": 94.66, + "eval_accuracy": 1.0, + "eval_loss": 1.5954402670104173e-06, + "eval_runtime": 128.7892, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 16660 + }, + { + "epoch": 94.72, + "learning_rate": 5.375e-06, + "loss": 0.0, + "step": 16670 + }, + { + "epoch": 94.72, + "eval_accuracy": 1.0, + "eval_loss": 1.5927308822938357e-06, + "eval_runtime": 128.8098, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 16670 + }, + { + "epoch": 94.77, + "learning_rate": 5.318181818181819e-06, + "loss": 0.0, + "step": 16680 + }, + { + "epoch": 94.77, + "eval_accuracy": 1.0, + "eval_loss": 1.5890055919953738e-06, + "eval_runtime": 129.0885, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 0.682, + "step": 16680 + }, + { + "epoch": 94.83, + "learning_rate": 5.261363636363636e-06, + "loss": 0.0, + "step": 16690 + }, + { + "epoch": 94.83, + "eval_accuracy": 1.0, + "eval_loss": 1.584941742294177e-06, + "eval_runtime": 128.2033, + "eval_samples_per_second": 2.746, + "eval_steps_per_second": 0.686, + "step": 16690 + }, + { + "epoch": 94.89, + "learning_rate": 5.204545454545455e-06, + "loss": 0.0, + "step": 16700 + }, + { + "epoch": 94.89, + "eval_accuracy": 1.0, + "eval_loss": 1.5808777789061423e-06, + "eval_runtime": 129.0862, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 0.682, + "step": 16700 + }, + { + "epoch": 94.94, + "learning_rate": 5.147727272727273e-06, + "loss": 0.0, + "step": 16710 + }, + { + "epoch": 94.94, + "eval_accuracy": 1.0, + "eval_loss": 1.5785070672791335e-06, + "eval_runtime": 128.7084, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 16710 + }, + { + "epoch": 95.0, + "learning_rate": 5.090909090909091e-06, + "loss": 0.0, + "step": 16720 + }, + { + "epoch": 95.0, + "eval_accuracy": 1.0, + "eval_loss": 1.5751204500702443e-06, + "eval_runtime": 128.9023, + "eval_samples_per_second": 2.731, + "eval_steps_per_second": 0.683, + "step": 16720 + }, + { + "epoch": 95.06, + "learning_rate": 5.034090909090909e-06, + "loss": 0.0, + "step": 16730 + }, + { + "epoch": 95.06, + "eval_accuracy": 1.0, + "eval_loss": 1.571733832861355e-06, + "eval_runtime": 128.597, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 16730 + }, + { + "epoch": 95.11, + "learning_rate": 4.977272727272727e-06, + "loss": 0.0, + "step": 16740 + }, + { + "epoch": 95.11, + "eval_accuracy": 1.0, + "eval_loss": 1.5686858887420385e-06, + "eval_runtime": 128.8221, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 16740 + }, + { + "epoch": 95.17, + "learning_rate": 4.920454545454545e-06, + "loss": 0.0, + "step": 16750 + }, + { + "epoch": 95.17, + "eval_accuracy": 1.0, + "eval_loss": 1.5642832522644312e-06, + "eval_runtime": 128.6752, + "eval_samples_per_second": 2.736, + "eval_steps_per_second": 0.684, + "step": 16750 + }, + { + "epoch": 95.23, + "learning_rate": 4.863636363636364e-06, + "loss": 0.0, + "step": 16760 + }, + { + "epoch": 95.23, + "eval_accuracy": 1.0, + "eval_loss": 1.5602192888763966e-06, + "eval_runtime": 128.3234, + "eval_samples_per_second": 2.743, + "eval_steps_per_second": 0.686, + "step": 16760 + }, + { + "epoch": 95.28, + "learning_rate": 4.806818181818181e-06, + "loss": 0.0, + "step": 16770 + }, + { + "epoch": 95.28, + "eval_accuracy": 1.0, + "eval_loss": 1.55717134475708e-06, + "eval_runtime": 129.4927, + "eval_samples_per_second": 2.718, + "eval_steps_per_second": 0.68, + "step": 16770 + }, + { + "epoch": 95.34, + "learning_rate": 4.75e-06, + "loss": 0.0, + "step": 16780 + }, + { + "epoch": 95.34, + "eval_accuracy": 1.0, + "eval_loss": 1.5514141296080197e-06, + "eval_runtime": 125.4396, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 0.702, + "step": 16780 + }, + { + "epoch": 95.4, + "learning_rate": 4.693181818181818e-06, + "loss": 0.0, + "step": 16790 + }, + { + "epoch": 95.4, + "eval_accuracy": 1.0, + "eval_loss": 1.5483660718018655e-06, + "eval_runtime": 124.7147, + "eval_samples_per_second": 2.822, + "eval_steps_per_second": 0.706, + "step": 16790 + }, + { + "epoch": 95.45, + "learning_rate": 4.636363636363636e-06, + "loss": 0.0, + "step": 16800 + }, + { + "epoch": 95.45, + "eval_accuracy": 1.0, + "eval_loss": 1.5459954738616943e-06, + "eval_runtime": 125.2819, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 0.702, + "step": 16800 + }, + { + "epoch": 95.51, + "learning_rate": 4.579545454545455e-06, + "loss": 0.0, + "step": 16810 + }, + { + "epoch": 95.51, + "eval_accuracy": 1.0, + "eval_loss": 1.5429475297423778e-06, + "eval_runtime": 126.2148, + "eval_samples_per_second": 2.789, + "eval_steps_per_second": 0.697, + "step": 16810 + }, + { + "epoch": 95.57, + "learning_rate": 4.522727272727273e-06, + "loss": 0.0, + "step": 16820 + }, + { + "epoch": 95.57, + "eval_accuracy": 1.0, + "eval_loss": 1.5398995856230613e-06, + "eval_runtime": 128.0346, + "eval_samples_per_second": 2.749, + "eval_steps_per_second": 0.687, + "step": 16820 + }, + { + "epoch": 95.62, + "learning_rate": 4.465909090909091e-06, + "loss": 0.0, + "step": 16830 + }, + { + "epoch": 95.62, + "eval_accuracy": 1.0, + "eval_loss": 1.5382062201751978e-06, + "eval_runtime": 129.0119, + "eval_samples_per_second": 2.728, + "eval_steps_per_second": 0.682, + "step": 16830 + }, + { + "epoch": 95.68, + "learning_rate": 4.409090909090909e-06, + "loss": 0.0, + "step": 16840 + }, + { + "epoch": 95.68, + "eval_accuracy": 1.0, + "eval_loss": 1.535496949145454e-06, + "eval_runtime": 125.531, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 16840 + }, + { + "epoch": 95.74, + "learning_rate": 4.352272727272727e-06, + "loss": 0.0, + "step": 16850 + }, + { + "epoch": 95.74, + "eval_accuracy": 1.0, + "eval_loss": 1.5334649106080178e-06, + "eval_runtime": 125.6357, + "eval_samples_per_second": 2.802, + "eval_steps_per_second": 0.7, + "step": 16850 + }, + { + "epoch": 95.8, + "learning_rate": 4.295454545454545e-06, + "loss": 0.0, + "step": 16860 + }, + { + "epoch": 95.8, + "eval_accuracy": 1.0, + "eval_loss": 1.530755639578274e-06, + "eval_runtime": 125.1608, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 0.703, + "step": 16860 + }, + { + "epoch": 95.85, + "learning_rate": 4.238636363636364e-06, + "loss": 0.0, + "step": 16870 + }, + { + "epoch": 95.85, + "eval_accuracy": 1.0, + "eval_loss": 1.5287237147276755e-06, + "eval_runtime": 125.5548, + "eval_samples_per_second": 2.804, + "eval_steps_per_second": 0.701, + "step": 16870 + }, + { + "epoch": 95.91, + "learning_rate": 4.181818181818182e-06, + "loss": 0.0, + "step": 16880 + }, + { + "epoch": 95.91, + "eval_accuracy": 1.0, + "eval_loss": 1.5273690223693848e-06, + "eval_runtime": 128.1603, + "eval_samples_per_second": 2.747, + "eval_steps_per_second": 0.687, + "step": 16880 + }, + { + "epoch": 95.97, + "learning_rate": 4.125e-06, + "loss": 0.0, + "step": 16890 + }, + { + "epoch": 95.97, + "eval_accuracy": 1.0, + "eval_loss": 1.526014330011094e-06, + "eval_runtime": 128.3605, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.686, + "step": 16890 + }, + { + "epoch": 96.02, + "learning_rate": 4.068181818181818e-06, + "loss": 0.0, + "step": 16900 + }, + { + "epoch": 96.02, + "eval_accuracy": 1.0, + "eval_loss": 1.5249984244292136e-06, + "eval_runtime": 128.5769, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.684, + "step": 16900 + }, + { + "epoch": 96.08, + "learning_rate": 4.011363636363636e-06, + "loss": 0.0, + "step": 16910 + }, + { + "epoch": 96.08, + "eval_accuracy": 1.0, + "eval_loss": 1.5233050589813502e-06, + "eval_runtime": 128.8804, + "eval_samples_per_second": 2.731, + "eval_steps_per_second": 0.683, + "step": 16910 + }, + { + "epoch": 96.14, + "learning_rate": 3.954545454545454e-06, + "loss": 0.0, + "step": 16920 + }, + { + "epoch": 96.14, + "eval_accuracy": 1.0, + "eval_loss": 1.522289039712632e-06, + "eval_runtime": 128.5949, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 16920 + }, + { + "epoch": 96.19, + "learning_rate": 3.897727272727273e-06, + "loss": 0.0, + "step": 16930 + }, + { + "epoch": 96.19, + "eval_accuracy": 1.0, + "eval_loss": 1.5212731341307517e-06, + "eval_runtime": 129.4507, + "eval_samples_per_second": 2.719, + "eval_steps_per_second": 0.68, + "step": 16930 + }, + { + "epoch": 96.25, + "learning_rate": 3.8409090909090905e-06, + "loss": 0.0, + "step": 16940 + }, + { + "epoch": 96.25, + "eval_accuracy": 1.0, + "eval_loss": 1.519918441772461e-06, + "eval_runtime": 128.8318, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 16940 + }, + { + "epoch": 96.31, + "learning_rate": 3.7840909090909094e-06, + "loss": 0.0159, + "step": 16950 + }, + { + "epoch": 96.31, + "eval_accuracy": 1.0, + "eval_loss": 1.5263530031006667e-06, + "eval_runtime": 128.5007, + "eval_samples_per_second": 2.739, + "eval_steps_per_second": 0.685, + "step": 16950 + }, + { + "epoch": 96.36, + "learning_rate": 3.727272727272727e-06, + "loss": 0.0, + "step": 16960 + }, + { + "epoch": 96.36, + "eval_accuracy": 1.0, + "eval_loss": 1.545318127682549e-06, + "eval_runtime": 128.9438, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.682, + "step": 16960 + }, + { + "epoch": 96.42, + "learning_rate": 3.670454545454546e-06, + "loss": 0.0, + "step": 16970 + }, + { + "epoch": 96.42, + "eval_accuracy": 1.0, + "eval_loss": 1.600858809069905e-06, + "eval_runtime": 128.5326, + "eval_samples_per_second": 2.739, + "eval_steps_per_second": 0.685, + "step": 16970 + }, + { + "epoch": 96.48, + "learning_rate": 3.6136363636363635e-06, + "loss": 0.0, + "step": 16980 + }, + { + "epoch": 96.48, + "eval_accuracy": 1.0, + "eval_loss": 1.6472556580993114e-06, + "eval_runtime": 128.3588, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.686, + "step": 16980 + }, + { + "epoch": 96.53, + "learning_rate": 3.556818181818182e-06, + "loss": 0.0, + "step": 16990 + }, + { + "epoch": 96.53, + "eval_accuracy": 1.0, + "eval_loss": 1.6675753613526467e-06, + "eval_runtime": 128.455, + "eval_samples_per_second": 2.74, + "eval_steps_per_second": 0.685, + "step": 16990 + }, + { + "epoch": 96.59, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0, + "step": 17000 + }, + { + "epoch": 96.59, + "eval_accuracy": 1.0, + "eval_loss": 1.6740100363676902e-06, + "eval_runtime": 128.4533, + "eval_samples_per_second": 2.74, + "eval_steps_per_second": 0.685, + "step": 17000 + }, + { + "epoch": 96.65, + "learning_rate": 3.4431818181818184e-06, + "loss": 0.0, + "step": 17010 + }, + { + "epoch": 96.65, + "eval_accuracy": 1.0, + "eval_loss": 1.6743487094572629e-06, + "eval_runtime": 128.7996, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 17010 + }, + { + "epoch": 96.7, + "learning_rate": 3.386363636363637e-06, + "loss": 0.0, + "step": 17020 + }, + { + "epoch": 96.7, + "eval_accuracy": 1.0, + "eval_loss": 1.6726553440093994e-06, + "eval_runtime": 128.9587, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.682, + "step": 17020 + }, + { + "epoch": 96.76, + "learning_rate": 3.3295454545454545e-06, + "loss": 0.0, + "step": 17030 + }, + { + "epoch": 96.76, + "eval_accuracy": 1.0, + "eval_loss": 1.6692687268005102e-06, + "eval_runtime": 128.9136, + "eval_samples_per_second": 2.731, + "eval_steps_per_second": 0.683, + "step": 17030 + }, + { + "epoch": 96.82, + "learning_rate": 3.2727272727272733e-06, + "loss": 0.0, + "step": 17040 + }, + { + "epoch": 96.82, + "eval_accuracy": 1.0, + "eval_loss": 1.6675753613526467e-06, + "eval_runtime": 129.384, + "eval_samples_per_second": 2.721, + "eval_steps_per_second": 0.68, + "step": 17040 + }, + { + "epoch": 96.88, + "learning_rate": 3.215909090909091e-06, + "loss": 0.0, + "step": 17050 + }, + { + "epoch": 96.88, + "eval_accuracy": 1.0, + "eval_loss": 1.6662207826811937e-06, + "eval_runtime": 127.9549, + "eval_samples_per_second": 2.751, + "eval_steps_per_second": 0.688, + "step": 17050 + }, + { + "epoch": 96.93, + "learning_rate": 3.1590909090909094e-06, + "loss": 0.0, + "step": 17060 + }, + { + "epoch": 96.93, + "eval_accuracy": 1.0, + "eval_loss": 1.6645274172333302e-06, + "eval_runtime": 128.2882, + "eval_samples_per_second": 2.744, + "eval_steps_per_second": 0.686, + "step": 17060 + }, + { + "epoch": 96.99, + "learning_rate": 3.1022727272727274e-06, + "loss": 0.0, + "step": 17070 + }, + { + "epoch": 96.99, + "eval_accuracy": 1.0, + "eval_loss": 1.6631728385618771e-06, + "eval_runtime": 128.9062, + "eval_samples_per_second": 2.731, + "eval_steps_per_second": 0.683, + "step": 17070 + }, + { + "epoch": 97.05, + "learning_rate": 3.0454545454545455e-06, + "loss": 0.0, + "step": 17080 + }, + { + "epoch": 97.05, + "eval_accuracy": 1.0, + "eval_loss": 1.6635113979646121e-06, + "eval_runtime": 129.017, + "eval_samples_per_second": 2.728, + "eval_steps_per_second": 0.682, + "step": 17080 + }, + { + "epoch": 97.1, + "learning_rate": 2.988636363636364e-06, + "loss": 0.0, + "step": 17090 + }, + { + "epoch": 97.1, + "eval_accuracy": 1.0, + "eval_loss": 1.662156819293159e-06, + "eval_runtime": 128.2745, + "eval_samples_per_second": 2.744, + "eval_steps_per_second": 0.686, + "step": 17090 + }, + { + "epoch": 97.16, + "learning_rate": 2.931818181818182e-06, + "loss": 0.0, + "step": 17100 + }, + { + "epoch": 97.16, + "eval_accuracy": 1.0, + "eval_loss": 1.660124780755723e-06, + "eval_runtime": 128.2129, + "eval_samples_per_second": 2.745, + "eval_steps_per_second": 0.686, + "step": 17100 + }, + { + "epoch": 97.22, + "learning_rate": 2.8750000000000004e-06, + "loss": 0.0, + "step": 17110 + }, + { + "epoch": 97.22, + "eval_accuracy": 1.0, + "eval_loss": 1.6587702020842698e-06, + "eval_runtime": 128.1639, + "eval_samples_per_second": 2.746, + "eval_steps_per_second": 0.687, + "step": 17110 + }, + { + "epoch": 97.27, + "learning_rate": 2.8181818181818185e-06, + "loss": 0.0, + "step": 17120 + }, + { + "epoch": 97.27, + "eval_accuracy": 1.0, + "eval_loss": 1.6577541828155518e-06, + "eval_runtime": 128.2074, + "eval_samples_per_second": 2.746, + "eval_steps_per_second": 0.686, + "step": 17120 + }, + { + "epoch": 97.33, + "learning_rate": 2.7613636363636365e-06, + "loss": 0.0, + "step": 17130 + }, + { + "epoch": 97.33, + "eval_accuracy": 1.0, + "eval_loss": 1.656399490457261e-06, + "eval_runtime": 128.4531, + "eval_samples_per_second": 2.74, + "eval_steps_per_second": 0.685, + "step": 17130 + }, + { + "epoch": 97.39, + "learning_rate": 2.704545454545455e-06, + "loss": 0.0, + "step": 17140 + }, + { + "epoch": 97.39, + "eval_accuracy": 1.0, + "eval_loss": 1.6547062386962352e-06, + "eval_runtime": 128.7424, + "eval_samples_per_second": 2.734, + "eval_steps_per_second": 0.684, + "step": 17140 + }, + { + "epoch": 97.44, + "learning_rate": 2.647727272727273e-06, + "loss": 0.0, + "step": 17150 + }, + { + "epoch": 97.44, + "eval_accuracy": 1.0, + "eval_loss": 1.6533515463379445e-06, + "eval_runtime": 129.331, + "eval_samples_per_second": 2.722, + "eval_steps_per_second": 0.68, + "step": 17150 + }, + { + "epoch": 97.5, + "learning_rate": 2.590909090909091e-06, + "loss": 0.0, + "step": 17160 + }, + { + "epoch": 97.5, + "eval_accuracy": 1.0, + "eval_loss": 1.6523355270692264e-06, + "eval_runtime": 128.549, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.685, + "step": 17160 + }, + { + "epoch": 97.56, + "learning_rate": 2.534090909090909e-06, + "loss": 0.0, + "step": 17170 + }, + { + "epoch": 97.56, + "eval_accuracy": 1.0, + "eval_loss": 1.6516582945769187e-06, + "eval_runtime": 128.82, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 17170 + }, + { + "epoch": 97.61, + "learning_rate": 2.4772727272727275e-06, + "loss": 0.0, + "step": 17180 + }, + { + "epoch": 97.61, + "eval_accuracy": 1.0, + "eval_loss": 1.6506422753082006e-06, + "eval_runtime": 128.9887, + "eval_samples_per_second": 2.729, + "eval_steps_per_second": 0.682, + "step": 17180 + }, + { + "epoch": 97.67, + "learning_rate": 2.4204545454545455e-06, + "loss": 0.0, + "step": 17190 + }, + { + "epoch": 97.67, + "eval_accuracy": 1.0, + "eval_loss": 1.6496262560394825e-06, + "eval_runtime": 128.9481, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.682, + "step": 17190 + }, + { + "epoch": 97.73, + "learning_rate": 2.3636363636363636e-06, + "loss": 0.0, + "step": 17200 + }, + { + "epoch": 97.73, + "eval_accuracy": 1.0, + "eval_loss": 1.6482716773680295e-06, + "eval_runtime": 129.3128, + "eval_samples_per_second": 2.722, + "eval_steps_per_second": 0.681, + "step": 17200 + }, + { + "epoch": 97.78, + "learning_rate": 2.306818181818182e-06, + "loss": 0.0, + "step": 17210 + }, + { + "epoch": 97.78, + "eval_accuracy": 1.0, + "eval_loss": 1.647594331188884e-06, + "eval_runtime": 129.5367, + "eval_samples_per_second": 2.717, + "eval_steps_per_second": 0.679, + "step": 17210 + }, + { + "epoch": 97.84, + "learning_rate": 2.25e-06, + "loss": 0.0, + "step": 17220 + }, + { + "epoch": 97.84, + "eval_accuracy": 1.0, + "eval_loss": 1.6462396388305933e-06, + "eval_runtime": 129.3697, + "eval_samples_per_second": 2.721, + "eval_steps_per_second": 0.68, + "step": 17220 + }, + { + "epoch": 97.9, + "learning_rate": 2.193181818181818e-06, + "loss": 0.0, + "step": 17230 + }, + { + "epoch": 97.9, + "eval_accuracy": 1.0, + "eval_loss": 1.645562292651448e-06, + "eval_runtime": 129.1418, + "eval_samples_per_second": 2.726, + "eval_steps_per_second": 0.681, + "step": 17230 + }, + { + "epoch": 97.95, + "learning_rate": 2.1363636363636365e-06, + "loss": 0.0, + "step": 17240 + }, + { + "epoch": 97.95, + "eval_accuracy": 1.0, + "eval_loss": 1.6448849464723025e-06, + "eval_runtime": 129.2715, + "eval_samples_per_second": 2.723, + "eval_steps_per_second": 0.681, + "step": 17240 + }, + { + "epoch": 98.01, + "learning_rate": 2.0795454545454546e-06, + "loss": 0.0, + "step": 17250 + }, + { + "epoch": 98.01, + "eval_accuracy": 1.0, + "eval_loss": 1.6435303678008495e-06, + "eval_runtime": 128.8965, + "eval_samples_per_second": 2.731, + "eval_steps_per_second": 0.683, + "step": 17250 + }, + { + "epoch": 98.07, + "learning_rate": 2.0227272727272726e-06, + "loss": 0.0, + "step": 17260 + }, + { + "epoch": 98.07, + "eval_accuracy": 1.0, + "eval_loss": 1.6421756754425587e-06, + "eval_runtime": 128.9972, + "eval_samples_per_second": 2.729, + "eval_steps_per_second": 0.682, + "step": 17260 + }, + { + "epoch": 98.12, + "learning_rate": 1.965909090909091e-06, + "loss": 0.0, + "step": 17270 + }, + { + "epoch": 98.12, + "eval_accuracy": 1.0, + "eval_loss": 1.6414983292634133e-06, + "eval_runtime": 128.8559, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 17270 + }, + { + "epoch": 98.18, + "learning_rate": 1.9090909090909095e-06, + "loss": 0.0, + "step": 17280 + }, + { + "epoch": 98.18, + "eval_accuracy": 1.0, + "eval_loss": 1.6408210967711057e-06, + "eval_runtime": 128.8529, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 17280 + }, + { + "epoch": 98.24, + "learning_rate": 1.8522727272727276e-06, + "loss": 0.0, + "step": 17290 + }, + { + "epoch": 98.24, + "eval_accuracy": 1.0, + "eval_loss": 1.640482423681533e-06, + "eval_runtime": 128.893, + "eval_samples_per_second": 2.731, + "eval_steps_per_second": 0.683, + "step": 17290 + }, + { + "epoch": 98.3, + "learning_rate": 1.7954545454545456e-06, + "loss": 0.0, + "step": 17300 + }, + { + "epoch": 98.3, + "eval_accuracy": 1.0, + "eval_loss": 1.6401437505919603e-06, + "eval_runtime": 129.1611, + "eval_samples_per_second": 2.725, + "eval_steps_per_second": 0.681, + "step": 17300 + }, + { + "epoch": 98.35, + "learning_rate": 1.7386363636363638e-06, + "loss": 0.0, + "step": 17310 + }, + { + "epoch": 98.35, + "eval_accuracy": 1.0, + "eval_loss": 1.6374343658753787e-06, + "eval_runtime": 128.9357, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.683, + "step": 17310 + }, + { + "epoch": 98.41, + "learning_rate": 1.681818181818182e-06, + "loss": 0.0, + "step": 17320 + }, + { + "epoch": 98.41, + "eval_accuracy": 1.0, + "eval_loss": 1.6370958064726437e-06, + "eval_runtime": 128.8644, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 17320 + }, + { + "epoch": 98.47, + "learning_rate": 1.6250000000000001e-06, + "loss": 0.0, + "step": 17330 + }, + { + "epoch": 98.47, + "eval_accuracy": 1.0, + "eval_loss": 1.6360797872039257e-06, + "eval_runtime": 128.3943, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.685, + "step": 17330 + }, + { + "epoch": 98.52, + "learning_rate": 1.5681818181818184e-06, + "loss": 0.0, + "step": 17340 + }, + { + "epoch": 98.52, + "eval_accuracy": 1.0, + "eval_loss": 1.6343864217560622e-06, + "eval_runtime": 128.8096, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 17340 + }, + { + "epoch": 98.58, + "learning_rate": 1.5113636363636364e-06, + "loss": 0.0, + "step": 17350 + }, + { + "epoch": 98.58, + "eval_accuracy": 1.0, + "eval_loss": 1.6337090755769168e-06, + "eval_runtime": 128.3927, + "eval_samples_per_second": 2.742, + "eval_steps_per_second": 0.685, + "step": 17350 + }, + { + "epoch": 98.64, + "learning_rate": 1.4545454545454546e-06, + "loss": 0.0, + "step": 17360 + }, + { + "epoch": 98.64, + "eval_accuracy": 1.0, + "eval_loss": 1.6333705161741818e-06, + "eval_runtime": 128.5863, + "eval_samples_per_second": 2.737, + "eval_steps_per_second": 0.684, + "step": 17360 + }, + { + "epoch": 98.69, + "learning_rate": 1.3977272727272729e-06, + "loss": 0.0, + "step": 17370 + }, + { + "epoch": 98.69, + "eval_accuracy": 1.0, + "eval_loss": 1.6326931699950364e-06, + "eval_runtime": 128.7255, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 17370 + }, + { + "epoch": 98.75, + "learning_rate": 1.340909090909091e-06, + "loss": 0.0, + "step": 17380 + }, + { + "epoch": 98.75, + "eval_accuracy": 1.0, + "eval_loss": 1.632015823815891e-06, + "eval_runtime": 128.5571, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.685, + "step": 17380 + }, + { + "epoch": 98.81, + "learning_rate": 1.2840909090909092e-06, + "loss": 0.0, + "step": 17390 + }, + { + "epoch": 98.81, + "eval_accuracy": 1.0, + "eval_loss": 1.6313384776367457e-06, + "eval_runtime": 128.7796, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 17390 + }, + { + "epoch": 98.86, + "learning_rate": 1.2272727272727272e-06, + "loss": 0.0, + "step": 17400 + }, + { + "epoch": 98.86, + "eval_accuracy": 1.0, + "eval_loss": 1.6303224583680276e-06, + "eval_runtime": 128.7505, + "eval_samples_per_second": 2.734, + "eval_steps_per_second": 0.683, + "step": 17400 + }, + { + "epoch": 98.92, + "learning_rate": 1.1704545454545456e-06, + "loss": 0.0, + "step": 17410 + }, + { + "epoch": 98.92, + "eval_accuracy": 1.0, + "eval_loss": 1.6299837852784549e-06, + "eval_runtime": 128.397, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 0.685, + "step": 17410 + }, + { + "epoch": 98.98, + "learning_rate": 1.1136363636363637e-06, + "loss": 0.0, + "step": 17420 + }, + { + "epoch": 98.98, + "eval_accuracy": 1.0, + "eval_loss": 1.62964522587572e-06, + "eval_runtime": 128.8268, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 17420 + }, + { + "epoch": 99.03, + "learning_rate": 1.056818181818182e-06, + "loss": 0.0, + "step": 17430 + }, + { + "epoch": 99.03, + "eval_accuracy": 1.0, + "eval_loss": 1.6289678796965745e-06, + "eval_runtime": 128.9544, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.682, + "step": 17430 + }, + { + "epoch": 99.09, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0, + "step": 17440 + }, + { + "epoch": 99.09, + "eval_accuracy": 1.0, + "eval_loss": 1.6286292066070018e-06, + "eval_runtime": 128.7111, + "eval_samples_per_second": 2.735, + "eval_steps_per_second": 0.684, + "step": 17440 + }, + { + "epoch": 99.15, + "learning_rate": 9.431818181818182e-07, + "loss": 0.0, + "step": 17450 + }, + { + "epoch": 99.15, + "eval_accuracy": 1.0, + "eval_loss": 1.6276131873382838e-06, + "eval_runtime": 128.8381, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 17450 + }, + { + "epoch": 99.2, + "learning_rate": 8.863636363636363e-07, + "loss": 0.0, + "step": 17460 + }, + { + "epoch": 99.2, + "eval_accuracy": 1.0, + "eval_loss": 1.627274514248711e-06, + "eval_runtime": 128.4904, + "eval_samples_per_second": 2.74, + "eval_steps_per_second": 0.685, + "step": 17460 + }, + { + "epoch": 99.26, + "learning_rate": 8.295454545454546e-07, + "loss": 0.0, + "step": 17470 + }, + { + "epoch": 99.26, + "eval_accuracy": 1.0, + "eval_loss": 1.625919935577258e-06, + "eval_runtime": 128.0767, + "eval_samples_per_second": 2.748, + "eval_steps_per_second": 0.687, + "step": 17470 + }, + { + "epoch": 99.32, + "learning_rate": 7.727272727272728e-07, + "loss": 0.0, + "step": 17480 + }, + { + "epoch": 99.32, + "eval_accuracy": 1.0, + "eval_loss": 1.6252425893981126e-06, + "eval_runtime": 128.7382, + "eval_samples_per_second": 2.734, + "eval_steps_per_second": 0.684, + "step": 17480 + }, + { + "epoch": 99.38, + "learning_rate": 7.15909090909091e-07, + "loss": 0.0, + "step": 17490 + }, + { + "epoch": 99.38, + "eval_accuracy": 1.0, + "eval_loss": 1.6252425893981126e-06, + "eval_runtime": 129.1882, + "eval_samples_per_second": 2.725, + "eval_steps_per_second": 0.681, + "step": 17490 + }, + { + "epoch": 99.43, + "learning_rate": 6.590909090909091e-07, + "loss": 0.0, + "step": 17500 + }, + { + "epoch": 99.43, + "eval_accuracy": 1.0, + "eval_loss": 1.62490391630854e-06, + "eval_runtime": 128.5537, + "eval_samples_per_second": 2.738, + "eval_steps_per_second": 0.685, + "step": 17500 + }, + { + "epoch": 99.49, + "learning_rate": 6.022727272727272e-07, + "loss": 0.0, + "step": 17510 + }, + { + "epoch": 99.49, + "eval_accuracy": 1.0, + "eval_loss": 1.62490391630854e-06, + "eval_runtime": 128.643, + "eval_samples_per_second": 2.736, + "eval_steps_per_second": 0.684, + "step": 17510 + }, + { + "epoch": 99.55, + "learning_rate": 5.454545454545455e-07, + "loss": 0.0, + "step": 17520 + }, + { + "epoch": 99.55, + "eval_accuracy": 1.0, + "eval_loss": 1.6245652432189672e-06, + "eval_runtime": 128.8198, + "eval_samples_per_second": 2.733, + "eval_steps_per_second": 0.683, + "step": 17520 + }, + { + "epoch": 99.6, + "learning_rate": 4.886363636363637e-07, + "loss": 0.0, + "step": 17530 + }, + { + "epoch": 99.6, + "eval_accuracy": 1.0, + "eval_loss": 1.6242265701293945e-06, + "eval_runtime": 128.9596, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.682, + "step": 17530 + }, + { + "epoch": 99.66, + "learning_rate": 4.318181818181818e-07, + "loss": 0.0, + "step": 17540 + }, + { + "epoch": 99.66, + "eval_accuracy": 1.0, + "eval_loss": 1.6242265701293945e-06, + "eval_runtime": 129.0605, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 0.682, + "step": 17540 + }, + { + "epoch": 99.72, + "learning_rate": 3.75e-07, + "loss": 0.0, + "step": 17550 + }, + { + "epoch": 99.72, + "eval_accuracy": 1.0, + "eval_loss": 1.6232105508606764e-06, + "eval_runtime": 129.1811, + "eval_samples_per_second": 2.725, + "eval_steps_per_second": 0.681, + "step": 17550 + }, + { + "epoch": 99.77, + "learning_rate": 3.181818181818182e-07, + "loss": 0.0, + "step": 17560 + }, + { + "epoch": 99.77, + "eval_accuracy": 1.0, + "eval_loss": 1.6232105508606764e-06, + "eval_runtime": 128.824, + "eval_samples_per_second": 2.732, + "eval_steps_per_second": 0.683, + "step": 17560 + }, + { + "epoch": 99.83, + "learning_rate": 2.613636363636364e-07, + "loss": 0.0, + "step": 17570 + }, + { + "epoch": 99.83, + "eval_accuracy": 1.0, + "eval_loss": 1.6232105508606764e-06, + "eval_runtime": 129.5639, + "eval_samples_per_second": 2.717, + "eval_steps_per_second": 0.679, + "step": 17570 + }, + { + "epoch": 99.89, + "learning_rate": 2.0454545454545458e-07, + "loss": 0.0, + "step": 17580 + }, + { + "epoch": 99.89, + "eval_accuracy": 1.0, + "eval_loss": 1.6232105508606764e-06, + "eval_runtime": 129.3846, + "eval_samples_per_second": 2.721, + "eval_steps_per_second": 0.68, + "step": 17580 + }, + { + "epoch": 99.94, + "learning_rate": 1.4772727272727272e-07, + "loss": 0.0, + "step": 17590 + }, + { + "epoch": 99.94, + "eval_accuracy": 1.0, + "eval_loss": 1.622533204681531e-06, + "eval_runtime": 129.588, + "eval_samples_per_second": 2.716, + "eval_steps_per_second": 0.679, + "step": 17590 + }, + { + "epoch": 100.0, + "learning_rate": 9.090909090909091e-08, + "loss": 0.0, + "step": 17600 + }, + { + "epoch": 100.0, + "eval_accuracy": 1.0, + "eval_loss": 1.622194645278796e-06, + "eval_runtime": 128.9413, + "eval_samples_per_second": 2.73, + "eval_steps_per_second": 0.682, + "step": 17600 + } + ], + "max_steps": 17600, + "num_train_epochs": 100, + "total_flos": 2.6811975506874753e+19, + "trial_name": null, + "trial_params": null +}