{ "best_metric": 0.4159727096557617, "best_model_checkpoint": "vit-msn-small-lateral_flow_ivalidation_train_test_7/checkpoint-3", "epoch": 92.3076923076923, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9230769230769231, "eval_accuracy": 0.8791208791208791, "eval_loss": 0.4159727096557617, "eval_runtime": 1.1776, "eval_samples_per_second": 231.83, "eval_steps_per_second": 4.246, "step": 3 }, { "epoch": 1.8461538461538463, "eval_accuracy": 0.8388278388278388, "eval_loss": 0.4668194353580475, "eval_runtime": 1.1816, "eval_samples_per_second": 231.05, "eval_steps_per_second": 4.232, "step": 6 }, { "epoch": 2.769230769230769, "eval_accuracy": 0.8021978021978022, "eval_loss": 0.543300986289978, "eval_runtime": 1.172, "eval_samples_per_second": 232.936, "eval_steps_per_second": 4.266, "step": 9 }, { "epoch": 3.076923076923077, "grad_norm": 27.540616989135742, "learning_rate": 1.6666666666666667e-06, "loss": 0.3869, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.8168498168498168, "eval_loss": 0.5052347779273987, "eval_runtime": 1.166, "eval_samples_per_second": 234.13, "eval_steps_per_second": 4.288, "step": 13 }, { "epoch": 4.923076923076923, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.45910125970840454, "eval_runtime": 1.1375, "eval_samples_per_second": 239.995, "eval_steps_per_second": 4.396, "step": 16 }, { "epoch": 5.846153846153846, "eval_accuracy": 0.8278388278388278, "eval_loss": 0.4820004999637604, "eval_runtime": 1.1463, "eval_samples_per_second": 238.165, "eval_steps_per_second": 4.362, "step": 19 }, { "epoch": 6.153846153846154, "grad_norm": 4.4243292808532715, "learning_rate": 3.3333333333333333e-06, "loss": 0.3658, "step": 20 }, { "epoch": 6.769230769230769, "eval_accuracy": 0.8095238095238095, "eval_loss": 0.4953090250492096, "eval_runtime": 1.1575, "eval_samples_per_second": 235.858, "eval_steps_per_second": 4.32, "step": 22 }, { "epoch": 8.0, "eval_accuracy": 0.8608058608058609, "eval_loss": 0.4496568441390991, "eval_runtime": 1.2191, "eval_samples_per_second": 223.934, "eval_steps_per_second": 4.101, "step": 26 }, { "epoch": 8.923076923076923, "eval_accuracy": 0.8315018315018315, "eval_loss": 0.46859756112098694, "eval_runtime": 1.1496, "eval_samples_per_second": 237.484, "eval_steps_per_second": 4.35, "step": 29 }, { "epoch": 9.23076923076923, "grad_norm": 9.303510665893555, "learning_rate": 5e-06, "loss": 0.3439, "step": 30 }, { "epoch": 9.846153846153847, "eval_accuracy": 0.8608058608058609, "eval_loss": 0.45060521364212036, "eval_runtime": 1.205, "eval_samples_per_second": 226.549, "eval_steps_per_second": 4.149, "step": 32 }, { "epoch": 10.76923076923077, "eval_accuracy": 0.8168498168498168, "eval_loss": 0.485858678817749, "eval_runtime": 1.1431, "eval_samples_per_second": 238.818, "eval_steps_per_second": 4.374, "step": 35 }, { "epoch": 12.0, "eval_accuracy": 0.8168498168498168, "eval_loss": 0.49286317825317383, "eval_runtime": 1.1624, "eval_samples_per_second": 234.854, "eval_steps_per_second": 4.301, "step": 39 }, { "epoch": 12.307692307692308, "grad_norm": 5.916250228881836, "learning_rate": 6.666666666666667e-06, "loss": 0.3416, "step": 40 }, { "epoch": 12.923076923076923, "eval_accuracy": 0.8058608058608059, "eval_loss": 0.49565577507019043, "eval_runtime": 1.2294, "eval_samples_per_second": 222.062, "eval_steps_per_second": 4.067, "step": 42 }, { "epoch": 13.846153846153847, "eval_accuracy": 0.7875457875457875, "eval_loss": 0.5228819251060486, "eval_runtime": 1.2179, "eval_samples_per_second": 224.159, "eval_steps_per_second": 4.105, "step": 45 }, { "epoch": 14.76923076923077, "eval_accuracy": 0.8534798534798534, "eval_loss": 0.44727277755737305, "eval_runtime": 1.1724, "eval_samples_per_second": 232.862, "eval_steps_per_second": 4.265, "step": 48 }, { "epoch": 15.384615384615385, "grad_norm": 8.750184059143066, "learning_rate": 8.333333333333334e-06, "loss": 0.324, "step": 50 }, { "epoch": 16.0, "eval_accuracy": 0.8058608058608059, "eval_loss": 0.5260118842124939, "eval_runtime": 1.1888, "eval_samples_per_second": 229.645, "eval_steps_per_second": 4.206, "step": 52 }, { "epoch": 16.923076923076923, "eval_accuracy": 0.8461538461538461, "eval_loss": 0.4581928849220276, "eval_runtime": 1.1456, "eval_samples_per_second": 238.297, "eval_steps_per_second": 4.364, "step": 55 }, { "epoch": 17.846153846153847, "eval_accuracy": 0.7838827838827839, "eval_loss": 0.5298556089401245, "eval_runtime": 1.1808, "eval_samples_per_second": 231.203, "eval_steps_per_second": 4.234, "step": 58 }, { "epoch": 18.46153846153846, "grad_norm": 11.985779762268066, "learning_rate": 1e-05, "loss": 0.3273, "step": 60 }, { "epoch": 18.76923076923077, "eval_accuracy": 0.8205128205128205, "eval_loss": 0.49467265605926514, "eval_runtime": 1.2593, "eval_samples_per_second": 216.781, "eval_steps_per_second": 3.97, "step": 61 }, { "epoch": 20.0, "eval_accuracy": 0.7692307692307693, "eval_loss": 0.5393444895744324, "eval_runtime": 1.2277, "eval_samples_per_second": 222.374, "eval_steps_per_second": 4.073, "step": 65 }, { "epoch": 20.923076923076923, "eval_accuracy": 0.8278388278388278, "eval_loss": 0.49159082770347595, "eval_runtime": 1.1597, "eval_samples_per_second": 235.406, "eval_steps_per_second": 4.311, "step": 68 }, { "epoch": 21.53846153846154, "grad_norm": 5.8529887199401855, "learning_rate": 9.583333333333335e-06, "loss": 0.3397, "step": 70 }, { "epoch": 21.846153846153847, "eval_accuracy": 0.7802197802197802, "eval_loss": 0.5359878540039062, "eval_runtime": 1.1883, "eval_samples_per_second": 229.735, "eval_steps_per_second": 4.208, "step": 71 }, { "epoch": 22.76923076923077, "eval_accuracy": 0.7655677655677655, "eval_loss": 0.5660694241523743, "eval_runtime": 1.174, "eval_samples_per_second": 232.531, "eval_steps_per_second": 4.259, "step": 74 }, { "epoch": 24.0, "eval_accuracy": 0.7216117216117216, "eval_loss": 0.6353771090507507, "eval_runtime": 1.231, "eval_samples_per_second": 221.763, "eval_steps_per_second": 4.062, "step": 78 }, { "epoch": 24.615384615384617, "grad_norm": 13.490166664123535, "learning_rate": 9.166666666666666e-06, "loss": 0.3344, "step": 80 }, { "epoch": 24.923076923076923, "eval_accuracy": 0.7032967032967034, "eval_loss": 0.6782160401344299, "eval_runtime": 1.1828, "eval_samples_per_second": 230.817, "eval_steps_per_second": 4.227, "step": 81 }, { "epoch": 25.846153846153847, "eval_accuracy": 0.7582417582417582, "eval_loss": 0.5704138875007629, "eval_runtime": 1.1635, "eval_samples_per_second": 234.632, "eval_steps_per_second": 4.297, "step": 84 }, { "epoch": 26.76923076923077, "eval_accuracy": 0.6776556776556777, "eval_loss": 0.653683066368103, "eval_runtime": 1.149, "eval_samples_per_second": 237.606, "eval_steps_per_second": 4.352, "step": 87 }, { "epoch": 27.692307692307693, "grad_norm": 11.456792831420898, "learning_rate": 8.750000000000001e-06, "loss": 0.3325, "step": 90 }, { "epoch": 28.0, "eval_accuracy": 0.8424908424908425, "eval_loss": 0.47976481914520264, "eval_runtime": 1.2235, "eval_samples_per_second": 223.128, "eval_steps_per_second": 4.087, "step": 91 }, { "epoch": 28.923076923076923, "eval_accuracy": 0.8058608058608059, "eval_loss": 0.5158453583717346, "eval_runtime": 1.1703, "eval_samples_per_second": 233.277, "eval_steps_per_second": 4.272, "step": 94 }, { "epoch": 29.846153846153847, "eval_accuracy": 0.7912087912087912, "eval_loss": 0.5408114790916443, "eval_runtime": 1.2604, "eval_samples_per_second": 216.59, "eval_steps_per_second": 3.967, "step": 97 }, { "epoch": 30.76923076923077, "grad_norm": 13.521626472473145, "learning_rate": 8.333333333333334e-06, "loss": 0.3283, "step": 100 }, { "epoch": 30.76923076923077, "eval_accuracy": 0.73992673992674, "eval_loss": 0.5963976383209229, "eval_runtime": 1.2378, "eval_samples_per_second": 220.561, "eval_steps_per_second": 4.04, "step": 100 }, { "epoch": 32.0, "eval_accuracy": 0.8205128205128205, "eval_loss": 0.5069018602371216, "eval_runtime": 1.2566, "eval_samples_per_second": 217.258, "eval_steps_per_second": 3.979, "step": 104 }, { "epoch": 32.92307692307692, "eval_accuracy": 0.7875457875457875, "eval_loss": 0.5396149754524231, "eval_runtime": 1.1548, "eval_samples_per_second": 236.412, "eval_steps_per_second": 4.33, "step": 107 }, { "epoch": 33.84615384615385, "grad_norm": 7.220104694366455, "learning_rate": 7.916666666666667e-06, "loss": 0.3229, "step": 110 }, { "epoch": 33.84615384615385, "eval_accuracy": 0.7985347985347986, "eval_loss": 0.52034592628479, "eval_runtime": 1.2979, "eval_samples_per_second": 210.337, "eval_steps_per_second": 3.852, "step": 110 }, { "epoch": 34.76923076923077, "eval_accuracy": 0.7875457875457875, "eval_loss": 0.5464060306549072, "eval_runtime": 1.1496, "eval_samples_per_second": 237.467, "eval_steps_per_second": 4.349, "step": 113 }, { "epoch": 36.0, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5890459418296814, "eval_runtime": 1.1555, "eval_samples_per_second": 236.272, "eval_steps_per_second": 4.327, "step": 117 }, { "epoch": 36.92307692307692, "grad_norm": 7.553558826446533, "learning_rate": 7.500000000000001e-06, "loss": 0.3207, "step": 120 }, { "epoch": 36.92307692307692, "eval_accuracy": 0.8131868131868132, "eval_loss": 0.5079773664474487, "eval_runtime": 1.1865, "eval_samples_per_second": 230.094, "eval_steps_per_second": 4.214, "step": 120 }, { "epoch": 37.84615384615385, "eval_accuracy": 0.8168498168498168, "eval_loss": 0.49443885684013367, "eval_runtime": 1.2087, "eval_samples_per_second": 225.87, "eval_steps_per_second": 4.137, "step": 123 }, { "epoch": 38.76923076923077, "eval_accuracy": 0.8095238095238095, "eval_loss": 0.49684473872184753, "eval_runtime": 1.2281, "eval_samples_per_second": 222.303, "eval_steps_per_second": 4.071, "step": 126 }, { "epoch": 40.0, "grad_norm": 7.015711307525635, "learning_rate": 7.083333333333335e-06, "loss": 0.3286, "step": 130 }, { "epoch": 40.0, "eval_accuracy": 0.8131868131868132, "eval_loss": 0.4874400496482849, "eval_runtime": 1.1583, "eval_samples_per_second": 235.685, "eval_steps_per_second": 4.317, "step": 130 }, { "epoch": 40.92307692307692, "eval_accuracy": 0.8058608058608059, "eval_loss": 0.5012958645820618, "eval_runtime": 1.1701, "eval_samples_per_second": 233.312, "eval_steps_per_second": 4.273, "step": 133 }, { "epoch": 41.84615384615385, "eval_accuracy": 0.7655677655677655, "eval_loss": 0.5328759551048279, "eval_runtime": 1.2067, "eval_samples_per_second": 226.236, "eval_steps_per_second": 4.144, "step": 136 }, { "epoch": 42.76923076923077, "eval_accuracy": 0.6996336996336996, "eval_loss": 0.6198887228965759, "eval_runtime": 1.1849, "eval_samples_per_second": 230.395, "eval_steps_per_second": 4.22, "step": 139 }, { "epoch": 43.07692307692308, "grad_norm": 12.217916488647461, "learning_rate": 6.666666666666667e-06, "loss": 0.3154, "step": 140 }, { "epoch": 44.0, "eval_accuracy": 0.8058608058608059, "eval_loss": 0.4853915870189667, "eval_runtime": 1.2003, "eval_samples_per_second": 227.44, "eval_steps_per_second": 4.166, "step": 143 }, { "epoch": 44.92307692307692, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5544903874397278, "eval_runtime": 1.2613, "eval_samples_per_second": 216.445, "eval_steps_per_second": 3.964, "step": 146 }, { "epoch": 45.84615384615385, "eval_accuracy": 0.7728937728937729, "eval_loss": 0.5267203450202942, "eval_runtime": 1.1694, "eval_samples_per_second": 233.448, "eval_steps_per_second": 4.276, "step": 149 }, { "epoch": 46.15384615384615, "grad_norm": 5.860217571258545, "learning_rate": 6.25e-06, "loss": 0.3119, "step": 150 }, { "epoch": 46.76923076923077, "eval_accuracy": 0.7802197802197802, "eval_loss": 0.5214048027992249, "eval_runtime": 1.1505, "eval_samples_per_second": 237.28, "eval_steps_per_second": 4.346, "step": 152 }, { "epoch": 48.0, "eval_accuracy": 0.7838827838827839, "eval_loss": 0.5265443325042725, "eval_runtime": 1.2352, "eval_samples_per_second": 221.013, "eval_steps_per_second": 4.048, "step": 156 }, { "epoch": 48.92307692307692, "eval_accuracy": 0.7985347985347986, "eval_loss": 0.5136817693710327, "eval_runtime": 1.1863, "eval_samples_per_second": 230.125, "eval_steps_per_second": 4.215, "step": 159 }, { "epoch": 49.23076923076923, "grad_norm": 10.198068618774414, "learning_rate": 5.833333333333334e-06, "loss": 0.3036, "step": 160 }, { "epoch": 49.84615384615385, "eval_accuracy": 0.7838827838827839, "eval_loss": 0.5353964567184448, "eval_runtime": 1.2084, "eval_samples_per_second": 225.92, "eval_steps_per_second": 4.138, "step": 162 }, { "epoch": 50.76923076923077, "eval_accuracy": 0.7875457875457875, "eval_loss": 0.5269169807434082, "eval_runtime": 1.1557, "eval_samples_per_second": 236.211, "eval_steps_per_second": 4.326, "step": 165 }, { "epoch": 52.0, "eval_accuracy": 0.73992673992674, "eval_loss": 0.579745888710022, "eval_runtime": 1.1971, "eval_samples_per_second": 228.046, "eval_steps_per_second": 4.177, "step": 169 }, { "epoch": 52.30769230769231, "grad_norm": 4.502013206481934, "learning_rate": 5.416666666666667e-06, "loss": 0.2995, "step": 170 }, { "epoch": 52.92307692307692, "eval_accuracy": 0.717948717948718, "eval_loss": 0.6257714629173279, "eval_runtime": 1.2141, "eval_samples_per_second": 224.863, "eval_steps_per_second": 4.118, "step": 172 }, { "epoch": 53.84615384615385, "eval_accuracy": 0.7692307692307693, "eval_loss": 0.5511525869369507, "eval_runtime": 1.2158, "eval_samples_per_second": 224.549, "eval_steps_per_second": 4.113, "step": 175 }, { "epoch": 54.76923076923077, "eval_accuracy": 0.7619047619047619, "eval_loss": 0.5516501069068909, "eval_runtime": 1.1426, "eval_samples_per_second": 238.92, "eval_steps_per_second": 4.376, "step": 178 }, { "epoch": 55.38461538461539, "grad_norm": 7.7625651359558105, "learning_rate": 5e-06, "loss": 0.306, "step": 180 }, { "epoch": 56.0, "eval_accuracy": 0.7545787545787546, "eval_loss": 0.558984637260437, "eval_runtime": 1.2053, "eval_samples_per_second": 226.503, "eval_steps_per_second": 4.148, "step": 182 }, { "epoch": 56.92307692307692, "eval_accuracy": 0.7619047619047619, "eval_loss": 0.5514388084411621, "eval_runtime": 1.1913, "eval_samples_per_second": 229.152, "eval_steps_per_second": 4.197, "step": 185 }, { "epoch": 57.84615384615385, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5596668124198914, "eval_runtime": 1.2098, "eval_samples_per_second": 225.659, "eval_steps_per_second": 4.133, "step": 188 }, { "epoch": 58.46153846153846, "grad_norm": 8.888143539428711, "learning_rate": 4.583333333333333e-06, "loss": 0.2989, "step": 190 }, { "epoch": 58.76923076923077, "eval_accuracy": 0.7326007326007326, "eval_loss": 0.5956733822822571, "eval_runtime": 1.1927, "eval_samples_per_second": 228.893, "eval_steps_per_second": 4.192, "step": 191 }, { "epoch": 60.0, "eval_accuracy": 0.7765567765567766, "eval_loss": 0.5365986824035645, "eval_runtime": 1.2153, "eval_samples_per_second": 224.642, "eval_steps_per_second": 4.114, "step": 195 }, { "epoch": 60.92307692307692, "eval_accuracy": 0.7728937728937729, "eval_loss": 0.5464850664138794, "eval_runtime": 1.23, "eval_samples_per_second": 221.956, "eval_steps_per_second": 4.065, "step": 198 }, { "epoch": 61.53846153846154, "grad_norm": 10.457231521606445, "learning_rate": 4.166666666666667e-06, "loss": 0.2931, "step": 200 }, { "epoch": 61.84615384615385, "eval_accuracy": 0.7252747252747253, "eval_loss": 0.6171274781227112, "eval_runtime": 1.2391, "eval_samples_per_second": 220.329, "eval_steps_per_second": 4.035, "step": 201 }, { "epoch": 62.76923076923077, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5768119692802429, "eval_runtime": 1.1827, "eval_samples_per_second": 230.828, "eval_steps_per_second": 4.228, "step": 204 }, { "epoch": 64.0, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5706220865249634, "eval_runtime": 1.1854, "eval_samples_per_second": 230.307, "eval_steps_per_second": 4.218, "step": 208 }, { "epoch": 64.61538461538461, "grad_norm": 6.158288478851318, "learning_rate": 3.7500000000000005e-06, "loss": 0.299, "step": 210 }, { "epoch": 64.92307692307692, "eval_accuracy": 0.7362637362637363, "eval_loss": 0.5962250232696533, "eval_runtime": 1.1514, "eval_samples_per_second": 237.094, "eval_steps_per_second": 4.342, "step": 211 }, { "epoch": 65.84615384615384, "eval_accuracy": 0.7216117216117216, "eval_loss": 0.6219912767410278, "eval_runtime": 1.2105, "eval_samples_per_second": 225.52, "eval_steps_per_second": 4.13, "step": 214 }, { "epoch": 66.76923076923077, "eval_accuracy": 0.7362637362637363, "eval_loss": 0.5929316282272339, "eval_runtime": 1.1491, "eval_samples_per_second": 237.574, "eval_steps_per_second": 4.351, "step": 217 }, { "epoch": 67.6923076923077, "grad_norm": 5.717153549194336, "learning_rate": 3.3333333333333333e-06, "loss": 0.2969, "step": 220 }, { "epoch": 68.0, "eval_accuracy": 0.7252747252747253, "eval_loss": 0.6135749816894531, "eval_runtime": 1.2314, "eval_samples_per_second": 221.695, "eval_steps_per_second": 4.06, "step": 221 }, { "epoch": 68.92307692307692, "eval_accuracy": 0.7289377289377289, "eval_loss": 0.6092182993888855, "eval_runtime": 1.165, "eval_samples_per_second": 234.334, "eval_steps_per_second": 4.292, "step": 224 }, { "epoch": 69.84615384615384, "eval_accuracy": 0.7252747252747253, "eval_loss": 0.6028769612312317, "eval_runtime": 1.1827, "eval_samples_per_second": 230.837, "eval_steps_per_second": 4.228, "step": 227 }, { "epoch": 70.76923076923077, "grad_norm": 4.415896415710449, "learning_rate": 2.916666666666667e-06, "loss": 0.3015, "step": 230 }, { "epoch": 70.76923076923077, "eval_accuracy": 0.7765567765567766, "eval_loss": 0.5355977416038513, "eval_runtime": 1.2214, "eval_samples_per_second": 223.506, "eval_steps_per_second": 4.094, "step": 230 }, { "epoch": 72.0, "eval_accuracy": 0.7692307692307693, "eval_loss": 0.5375617146492004, "eval_runtime": 1.2823, "eval_samples_per_second": 212.899, "eval_steps_per_second": 3.899, "step": 234 }, { "epoch": 72.92307692307692, "eval_accuracy": 0.7435897435897436, "eval_loss": 0.5885952115058899, "eval_runtime": 1.158, "eval_samples_per_second": 235.755, "eval_steps_per_second": 4.318, "step": 237 }, { "epoch": 73.84615384615384, "grad_norm": 9.932133674621582, "learning_rate": 2.5e-06, "loss": 0.2919, "step": 240 }, { "epoch": 73.84615384615384, "eval_accuracy": 0.7435897435897436, "eval_loss": 0.5869150161743164, "eval_runtime": 1.2325, "eval_samples_per_second": 221.501, "eval_steps_per_second": 4.057, "step": 240 }, { "epoch": 74.76923076923077, "eval_accuracy": 0.7472527472527473, "eval_loss": 0.584571897983551, "eval_runtime": 1.1539, "eval_samples_per_second": 236.584, "eval_steps_per_second": 4.333, "step": 243 }, { "epoch": 76.0, "eval_accuracy": 0.7655677655677655, "eval_loss": 0.5507005453109741, "eval_runtime": 1.172, "eval_samples_per_second": 232.937, "eval_steps_per_second": 4.266, "step": 247 }, { "epoch": 76.92307692307692, "grad_norm": 8.504337310791016, "learning_rate": 2.0833333333333334e-06, "loss": 0.288, "step": 250 }, { "epoch": 76.92307692307692, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5800787806510925, "eval_runtime": 1.1706, "eval_samples_per_second": 233.207, "eval_steps_per_second": 4.271, "step": 250 }, { "epoch": 77.84615384615384, "eval_accuracy": 0.73992673992674, "eval_loss": 0.6076557040214539, "eval_runtime": 1.1434, "eval_samples_per_second": 238.759, "eval_steps_per_second": 4.373, "step": 253 }, { "epoch": 78.76923076923077, "eval_accuracy": 0.7435897435897436, "eval_loss": 0.5848133563995361, "eval_runtime": 1.2128, "eval_samples_per_second": 225.096, "eval_steps_per_second": 4.123, "step": 256 }, { "epoch": 80.0, "grad_norm": 5.167693614959717, "learning_rate": 1.6666666666666667e-06, "loss": 0.2951, "step": 260 }, { "epoch": 80.0, "eval_accuracy": 0.7692307692307693, "eval_loss": 0.5435333847999573, "eval_runtime": 1.1913, "eval_samples_per_second": 229.17, "eval_steps_per_second": 4.197, "step": 260 }, { "epoch": 80.92307692307692, "eval_accuracy": 0.7655677655677655, "eval_loss": 0.56382155418396, "eval_runtime": 1.1629, "eval_samples_per_second": 234.755, "eval_steps_per_second": 4.3, "step": 263 }, { "epoch": 81.84615384615384, "eval_accuracy": 0.73992673992674, "eval_loss": 0.5795217156410217, "eval_runtime": 1.1592, "eval_samples_per_second": 235.498, "eval_steps_per_second": 4.313, "step": 266 }, { "epoch": 82.76923076923077, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5773537755012512, "eval_runtime": 1.1515, "eval_samples_per_second": 237.079, "eval_steps_per_second": 4.342, "step": 269 }, { "epoch": 83.07692307692308, "grad_norm": 5.818239688873291, "learning_rate": 1.25e-06, "loss": 0.2875, "step": 270 }, { "epoch": 84.0, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5702620148658752, "eval_runtime": 1.1961, "eval_samples_per_second": 228.243, "eval_steps_per_second": 4.18, "step": 273 }, { "epoch": 84.92307692307692, "eval_accuracy": 0.7509157509157509, "eval_loss": 0.5712842345237732, "eval_runtime": 1.1634, "eval_samples_per_second": 234.664, "eval_steps_per_second": 4.298, "step": 276 }, { "epoch": 85.84615384615384, "eval_accuracy": 0.7472527472527473, "eval_loss": 0.5783692598342896, "eval_runtime": 1.204, "eval_samples_per_second": 226.74, "eval_steps_per_second": 4.153, "step": 279 }, { "epoch": 86.15384615384616, "grad_norm": 6.248855113983154, "learning_rate": 8.333333333333333e-07, "loss": 0.2855, "step": 280 }, { "epoch": 86.76923076923077, "eval_accuracy": 0.7435897435897436, "eval_loss": 0.5903602838516235, "eval_runtime": 1.1602, "eval_samples_per_second": 235.3, "eval_steps_per_second": 4.31, "step": 282 }, { "epoch": 88.0, "eval_accuracy": 0.7326007326007326, "eval_loss": 0.5916617512702942, "eval_runtime": 1.2119, "eval_samples_per_second": 225.264, "eval_steps_per_second": 4.126, "step": 286 }, { "epoch": 88.92307692307692, "eval_accuracy": 0.7472527472527473, "eval_loss": 0.5859794020652771, "eval_runtime": 1.1576, "eval_samples_per_second": 235.841, "eval_steps_per_second": 4.319, "step": 289 }, { "epoch": 89.23076923076923, "grad_norm": 5.813708305358887, "learning_rate": 4.1666666666666667e-07, "loss": 0.2964, "step": 290 }, { "epoch": 89.84615384615384, "eval_accuracy": 0.7472527472527473, "eval_loss": 0.585824728012085, "eval_runtime": 1.2365, "eval_samples_per_second": 220.783, "eval_steps_per_second": 4.044, "step": 292 }, { "epoch": 90.76923076923077, "eval_accuracy": 0.7435897435897436, "eval_loss": 0.5823063850402832, "eval_runtime": 1.1583, "eval_samples_per_second": 235.693, "eval_steps_per_second": 4.317, "step": 295 }, { "epoch": 92.0, "eval_accuracy": 0.7435897435897436, "eval_loss": 0.5816611647605896, "eval_runtime": 1.2737, "eval_samples_per_second": 214.329, "eval_steps_per_second": 3.925, "step": 299 }, { "epoch": 92.3076923076923, "grad_norm": 5.621781826019287, "learning_rate": 0.0, "loss": 0.291, "step": 300 }, { "epoch": 92.3076923076923, "eval_accuracy": 0.7435897435897436, "eval_loss": 0.5815560221672058, "eval_runtime": 1.2025, "eval_samples_per_second": 227.027, "eval_steps_per_second": 4.158, "step": 300 }, { "epoch": 92.3076923076923, "step": 300, "total_flos": 1.4722503891660472e+18, "train_loss": 0.31525614658991497, "train_runtime": 772.9292, "train_samples_per_second": 105.443, "train_steps_per_second": 0.388 } ], "logging_steps": 10, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4722503891660472e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }