{ "best_metric": 0.8754578754578755, "best_model_checkpoint": "vit-msn-small-lateral_flow_ivalidation_train_test_7/checkpoint-1", "epoch": 61.53846153846154, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6153846153846154, "eval_accuracy": 0.8754578754578755, "eval_loss": 0.4368458390235901, "eval_runtime": 1.1438, "eval_samples_per_second": 238.681, "eval_steps_per_second": 4.371, "step": 1 }, { "epoch": 1.8461538461538463, "eval_accuracy": 0.8681318681318682, "eval_loss": 0.4439781904220581, "eval_runtime": 1.1174, "eval_samples_per_second": 244.315, "eval_steps_per_second": 4.475, "step": 3 }, { "epoch": 2.4615384615384617, "eval_accuracy": 0.8644688644688645, "eval_loss": 0.44702231884002686, "eval_runtime": 1.1791, "eval_samples_per_second": 231.541, "eval_steps_per_second": 4.241, "step": 4 }, { "epoch": 3.6923076923076925, "eval_accuracy": 0.8644688644688645, "eval_loss": 0.4443347454071045, "eval_runtime": 1.151, "eval_samples_per_second": 237.194, "eval_steps_per_second": 4.344, "step": 6 }, { "epoch": 4.923076923076923, "eval_accuracy": 0.8644688644688645, "eval_loss": 0.43930718302726746, "eval_runtime": 1.1712, "eval_samples_per_second": 233.092, "eval_steps_per_second": 4.269, "step": 8 }, { "epoch": 5.538461538461538, "eval_accuracy": 0.8681318681318682, "eval_loss": 0.4371500015258789, "eval_runtime": 1.1797, "eval_samples_per_second": 231.411, "eval_steps_per_second": 4.238, "step": 9 }, { "epoch": 6.153846153846154, "grad_norm": 3.9270777702331543, "learning_rate": 5e-07, "loss": 0.3118, "step": 10 }, { "epoch": 6.769230769230769, "eval_accuracy": 0.8644688644688645, "eval_loss": 0.4339592754840851, "eval_runtime": 1.1459, "eval_samples_per_second": 238.235, "eval_steps_per_second": 4.363, "step": 11 }, { "epoch": 8.0, "eval_accuracy": 0.8608058608058609, "eval_loss": 0.4319237768650055, "eval_runtime": 1.1339, "eval_samples_per_second": 240.757, "eval_steps_per_second": 4.409, "step": 13 }, { "epoch": 8.615384615384615, "eval_accuracy": 0.8608058608058609, "eval_loss": 0.4313403069972992, "eval_runtime": 1.14, "eval_samples_per_second": 239.473, "eval_steps_per_second": 4.386, "step": 14 }, { "epoch": 9.846153846153847, "eval_accuracy": 0.8681318681318682, "eval_loss": 0.431209534406662, "eval_runtime": 1.1734, "eval_samples_per_second": 232.658, "eval_steps_per_second": 4.261, "step": 16 }, { "epoch": 10.461538461538462, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4313659369945526, "eval_runtime": 1.1216, "eval_samples_per_second": 243.4, "eval_steps_per_second": 4.458, "step": 17 }, { "epoch": 11.692307692307692, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.430632084608078, "eval_runtime": 1.1723, "eval_samples_per_second": 232.869, "eval_steps_per_second": 4.265, "step": 19 }, { "epoch": 12.307692307692308, "grad_norm": 10.91845703125, "learning_rate": 5e-07, "loss": 0.3019, "step": 20 }, { "epoch": 12.923076923076923, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.42944204807281494, "eval_runtime": 1.1484, "eval_samples_per_second": 237.712, "eval_steps_per_second": 4.354, "step": 21 }, { "epoch": 13.538461538461538, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.42895710468292236, "eval_runtime": 1.2053, "eval_samples_per_second": 226.508, "eval_steps_per_second": 4.148, "step": 22 }, { "epoch": 14.76923076923077, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.42622682452201843, "eval_runtime": 1.1326, "eval_samples_per_second": 241.034, "eval_steps_per_second": 4.415, "step": 24 }, { "epoch": 16.0, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4222809672355652, "eval_runtime": 1.1723, "eval_samples_per_second": 232.871, "eval_steps_per_second": 4.265, "step": 26 }, { "epoch": 16.615384615384617, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4204210340976715, "eval_runtime": 1.2541, "eval_samples_per_second": 217.688, "eval_steps_per_second": 3.987, "step": 27 }, { "epoch": 17.846153846153847, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.417039155960083, "eval_runtime": 1.2268, "eval_samples_per_second": 222.524, "eval_steps_per_second": 4.076, "step": 29 }, { "epoch": 18.46153846153846, "grad_norm": 3.629610061645508, "learning_rate": 5e-07, "loss": 0.2922, "step": 30 }, { "epoch": 18.46153846153846, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4160122871398926, "eval_runtime": 1.135, "eval_samples_per_second": 240.526, "eval_steps_per_second": 4.405, "step": 30 }, { "epoch": 19.692307692307693, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4161117672920227, "eval_runtime": 1.185, "eval_samples_per_second": 230.389, "eval_steps_per_second": 4.22, "step": 32 }, { "epoch": 20.923076923076923, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4161428213119507, "eval_runtime": 1.1027, "eval_samples_per_second": 247.572, "eval_steps_per_second": 4.534, "step": 34 }, { "epoch": 21.53846153846154, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4161922335624695, "eval_runtime": 1.1087, "eval_samples_per_second": 246.243, "eval_steps_per_second": 4.51, "step": 35 }, { "epoch": 22.76923076923077, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41644683480262756, "eval_runtime": 1.1189, "eval_samples_per_second": 243.997, "eval_steps_per_second": 4.469, "step": 37 }, { "epoch": 24.0, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4166106879711151, "eval_runtime": 1.1168, "eval_samples_per_second": 244.453, "eval_steps_per_second": 4.477, "step": 39 }, { "epoch": 24.615384615384617, "grad_norm": 6.736999988555908, "learning_rate": 5e-08, "loss": 0.2993, "step": 40 }, { "epoch": 24.615384615384617, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.416763037443161, "eval_runtime": 1.1431, "eval_samples_per_second": 238.831, "eval_steps_per_second": 4.374, "step": 40 }, { "epoch": 25.846153846153847, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4170037508010864, "eval_runtime": 1.1382, "eval_samples_per_second": 239.862, "eval_steps_per_second": 4.393, "step": 42 }, { "epoch": 26.46153846153846, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41714850068092346, "eval_runtime": 1.1121, "eval_samples_per_second": 245.472, "eval_steps_per_second": 4.496, "step": 43 }, { "epoch": 27.692307692307693, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4176494777202606, "eval_runtime": 1.1025, "eval_samples_per_second": 247.61, "eval_steps_per_second": 4.535, "step": 45 }, { "epoch": 28.923076923076923, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4178927540779114, "eval_runtime": 1.1332, "eval_samples_per_second": 240.909, "eval_steps_per_second": 4.412, "step": 47 }, { "epoch": 29.53846153846154, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4178699254989624, "eval_runtime": 1.1034, "eval_samples_per_second": 247.418, "eval_steps_per_second": 4.531, "step": 48 }, { "epoch": 30.76923076923077, "grad_norm": 5.94409704208374, "learning_rate": 5e-09, "loss": 0.298, "step": 50 }, { "epoch": 30.76923076923077, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4178834855556488, "eval_runtime": 1.1121, "eval_samples_per_second": 245.481, "eval_steps_per_second": 4.496, "step": 50 }, { "epoch": 32.0, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4178943634033203, "eval_runtime": 1.1216, "eval_samples_per_second": 243.412, "eval_steps_per_second": 4.458, "step": 52 }, { "epoch": 32.61538461538461, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41789236664772034, "eval_runtime": 1.1684, "eval_samples_per_second": 233.651, "eval_steps_per_second": 4.279, "step": 53 }, { "epoch": 33.84615384615385, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41788020730018616, "eval_runtime": 1.134, "eval_samples_per_second": 240.742, "eval_steps_per_second": 4.409, "step": 55 }, { "epoch": 34.46153846153846, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41788193583488464, "eval_runtime": 1.1034, "eval_samples_per_second": 247.42, "eval_steps_per_second": 4.531, "step": 56 }, { "epoch": 35.69230769230769, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41786301136016846, "eval_runtime": 1.142, "eval_samples_per_second": 239.055, "eval_steps_per_second": 4.378, "step": 58 }, { "epoch": 36.92307692307692, "grad_norm": 3.7487382888793945, "learning_rate": 5e-09, "loss": 0.2936, "step": 60 }, { "epoch": 36.92307692307692, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41784799098968506, "eval_runtime": 1.1379, "eval_samples_per_second": 239.919, "eval_steps_per_second": 4.394, "step": 60 }, { "epoch": 37.53846153846154, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41783633828163147, "eval_runtime": 1.1594, "eval_samples_per_second": 235.472, "eval_steps_per_second": 4.313, "step": 61 }, { "epoch": 38.76923076923077, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41781386733055115, "eval_runtime": 1.1968, "eval_samples_per_second": 228.113, "eval_steps_per_second": 4.178, "step": 63 }, { "epoch": 40.0, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41779184341430664, "eval_runtime": 1.2641, "eval_samples_per_second": 215.964, "eval_steps_per_second": 3.955, "step": 65 }, { "epoch": 40.61538461538461, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41778844594955444, "eval_runtime": 1.1208, "eval_samples_per_second": 243.572, "eval_steps_per_second": 4.461, "step": 66 }, { "epoch": 41.84615384615385, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41776061058044434, "eval_runtime": 1.1953, "eval_samples_per_second": 228.397, "eval_steps_per_second": 4.183, "step": 68 }, { "epoch": 42.46153846153846, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41773903369903564, "eval_runtime": 1.1431, "eval_samples_per_second": 238.833, "eval_steps_per_second": 4.374, "step": 69 }, { "epoch": 43.07692307692308, "grad_norm": 11.417155265808105, "learning_rate": 5e-09, "loss": 0.2948, "step": 70 }, { "epoch": 43.69230769230769, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.417697936296463, "eval_runtime": 1.1458, "eval_samples_per_second": 238.262, "eval_steps_per_second": 4.364, "step": 71 }, { "epoch": 44.92307692307692, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4176548719406128, "eval_runtime": 1.1363, "eval_samples_per_second": 240.248, "eval_steps_per_second": 4.4, "step": 73 }, { "epoch": 45.53846153846154, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.417643278837204, "eval_runtime": 1.182, "eval_samples_per_second": 230.968, "eval_steps_per_second": 4.23, "step": 74 }, { "epoch": 46.76923076923077, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4176342189311981, "eval_runtime": 1.1232, "eval_samples_per_second": 243.052, "eval_steps_per_second": 4.452, "step": 76 }, { "epoch": 48.0, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.417605459690094, "eval_runtime": 1.188, "eval_samples_per_second": 229.804, "eval_steps_per_second": 4.209, "step": 78 }, { "epoch": 48.61538461538461, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4175802767276764, "eval_runtime": 1.1516, "eval_samples_per_second": 237.064, "eval_steps_per_second": 4.342, "step": 79 }, { "epoch": 49.23076923076923, "grad_norm": 6.5994462966918945, "learning_rate": 5e-09, "loss": 0.2965, "step": 80 }, { "epoch": 49.84615384615385, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41755160689353943, "eval_runtime": 1.1584, "eval_samples_per_second": 235.666, "eval_steps_per_second": 4.316, "step": 81 }, { "epoch": 50.46153846153846, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4175373315811157, "eval_runtime": 1.1263, "eval_samples_per_second": 242.377, "eval_steps_per_second": 4.439, "step": 82 }, { "epoch": 51.69230769230769, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4175109267234802, "eval_runtime": 1.1695, "eval_samples_per_second": 233.442, "eval_steps_per_second": 4.276, "step": 84 }, { "epoch": 52.92307692307692, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41747742891311646, "eval_runtime": 1.1314, "eval_samples_per_second": 241.296, "eval_steps_per_second": 4.419, "step": 86 }, { "epoch": 53.53846153846154, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41746145486831665, "eval_runtime": 1.1321, "eval_samples_per_second": 241.15, "eval_steps_per_second": 4.417, "step": 87 }, { "epoch": 54.76923076923077, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4174296259880066, "eval_runtime": 1.1236, "eval_samples_per_second": 242.969, "eval_steps_per_second": 4.45, "step": 89 }, { "epoch": 55.38461538461539, "grad_norm": 3.603271484375, "learning_rate": 5e-09, "loss": 0.292, "step": 90 }, { "epoch": 56.0, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41741496324539185, "eval_runtime": 1.2009, "eval_samples_per_second": 227.339, "eval_steps_per_second": 4.164, "step": 91 }, { "epoch": 56.61538461538461, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41740182042121887, "eval_runtime": 1.1185, "eval_samples_per_second": 244.079, "eval_steps_per_second": 4.47, "step": 92 }, { "epoch": 57.84615384615385, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4173685312271118, "eval_runtime": 1.1623, "eval_samples_per_second": 234.881, "eval_steps_per_second": 4.302, "step": 94 }, { "epoch": 58.46153846153846, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4173491299152374, "eval_runtime": 1.1895, "eval_samples_per_second": 229.514, "eval_steps_per_second": 4.204, "step": 95 }, { "epoch": 59.69230769230769, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4173341393470764, "eval_runtime": 1.1361, "eval_samples_per_second": 240.291, "eval_steps_per_second": 4.401, "step": 97 }, { "epoch": 60.92307692307692, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.41732990741729736, "eval_runtime": 1.1233, "eval_samples_per_second": 243.039, "eval_steps_per_second": 4.451, "step": 99 }, { "epoch": 61.53846153846154, "grad_norm": 4.0016703605651855, "learning_rate": 5e-09, "loss": 0.2962, "step": 100 }, { "epoch": 61.53846153846154, "eval_accuracy": 0.8717948717948718, "eval_loss": 0.4173365831375122, "eval_runtime": 1.1583, "eval_samples_per_second": 235.687, "eval_steps_per_second": 4.317, "step": 100 }, { "epoch": 61.53846153846154, "step": 100, "total_flos": 9.816111472132547e+17, "train_loss": 0.2976283621788025, "train_runtime": 498.5234, "train_samples_per_second": 163.483, "train_steps_per_second": 0.201 } ], "logging_steps": 10, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.816111472132547e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }