{ "best_metric": 0.5122641324996948, "best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_3/checkpoint-1", "epoch": 28.571428571428573, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5714285714285714, "eval_accuracy": 0.8873239436619719, "eval_loss": 0.5122641324996948, "eval_runtime": 0.5861, "eval_samples_per_second": 242.264, "eval_steps_per_second": 5.118, "step": 1 }, { "epoch": 1.7142857142857144, "eval_accuracy": 0.8661971830985915, "eval_loss": 0.5494502186775208, "eval_runtime": 0.5821, "eval_samples_per_second": 243.962, "eval_steps_per_second": 5.154, "step": 3 }, { "epoch": 2.857142857142857, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.6079620122909546, "eval_runtime": 0.5831, "eval_samples_per_second": 243.534, "eval_steps_per_second": 5.145, "step": 5 }, { "epoch": 4.0, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5590734481811523, "eval_runtime": 0.6248, "eval_samples_per_second": 227.269, "eval_steps_per_second": 4.801, "step": 7 }, { "epoch": 4.571428571428571, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5464029312133789, "eval_runtime": 0.6237, "eval_samples_per_second": 227.68, "eval_steps_per_second": 4.81, "step": 8 }, { "epoch": 5.714285714285714, "grad_norm": 4.793102264404297, "learning_rate": 9.523809523809525e-06, "loss": 0.4241, "step": 10 }, { "epoch": 5.714285714285714, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.5981650352478027, "eval_runtime": 0.6008, "eval_samples_per_second": 236.347, "eval_steps_per_second": 4.993, "step": 10 }, { "epoch": 6.857142857142857, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.6497244238853455, "eval_runtime": 0.6066, "eval_samples_per_second": 234.109, "eval_steps_per_second": 4.946, "step": 12 }, { "epoch": 8.0, "eval_accuracy": 0.852112676056338, "eval_loss": 0.5927726030349731, "eval_runtime": 0.627, "eval_samples_per_second": 226.478, "eval_steps_per_second": 4.785, "step": 14 }, { "epoch": 8.571428571428571, "eval_accuracy": 0.852112676056338, "eval_loss": 0.5711137056350708, "eval_runtime": 0.6061, "eval_samples_per_second": 234.301, "eval_steps_per_second": 4.95, "step": 15 }, { "epoch": 9.714285714285714, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5468315482139587, "eval_runtime": 0.5742, "eval_samples_per_second": 247.304, "eval_steps_per_second": 5.225, "step": 17 }, { "epoch": 10.857142857142858, "eval_accuracy": 0.852112676056338, "eval_loss": 0.5482771992683411, "eval_runtime": 0.5841, "eval_samples_per_second": 243.112, "eval_steps_per_second": 5.136, "step": 19 }, { "epoch": 11.428571428571429, "grad_norm": 5.585962772369385, "learning_rate": 7.1428571428571436e-06, "loss": 0.4152, "step": 20 }, { "epoch": 12.0, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.5783097147941589, "eval_runtime": 0.5721, "eval_samples_per_second": 248.198, "eval_steps_per_second": 5.244, "step": 21 }, { "epoch": 12.571428571428571, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.5835375785827637, "eval_runtime": 0.6296, "eval_samples_per_second": 225.543, "eval_steps_per_second": 4.765, "step": 22 }, { "epoch": 13.714285714285714, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.5668258666992188, "eval_runtime": 0.5869, "eval_samples_per_second": 241.934, "eval_steps_per_second": 5.111, "step": 24 }, { "epoch": 14.857142857142858, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.555620014667511, "eval_runtime": 0.5858, "eval_samples_per_second": 242.413, "eval_steps_per_second": 5.121, "step": 26 }, { "epoch": 16.0, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.5564189553260803, "eval_runtime": 0.5987, "eval_samples_per_second": 237.192, "eval_steps_per_second": 5.011, "step": 28 }, { "epoch": 16.571428571428573, "eval_accuracy": 0.8450704225352113, "eval_loss": 0.5590547919273376, "eval_runtime": 0.6483, "eval_samples_per_second": 219.036, "eval_steps_per_second": 4.628, "step": 29 }, { "epoch": 17.142857142857142, "grad_norm": 5.258753299713135, "learning_rate": 4.761904761904762e-06, "loss": 0.4367, "step": 30 }, { "epoch": 17.714285714285715, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.5619198679924011, "eval_runtime": 0.6281, "eval_samples_per_second": 226.062, "eval_steps_per_second": 4.776, "step": 31 }, { "epoch": 18.857142857142858, "eval_accuracy": 0.8591549295774648, "eval_loss": 0.5809253454208374, "eval_runtime": 0.5953, "eval_samples_per_second": 238.516, "eval_steps_per_second": 5.039, "step": 33 }, { "epoch": 20.0, "eval_accuracy": 0.8661971830985915, "eval_loss": 0.5810067057609558, "eval_runtime": 0.626, "eval_samples_per_second": 226.835, "eval_steps_per_second": 4.792, "step": 35 }, { "epoch": 20.571428571428573, "eval_accuracy": 0.8661971830985915, "eval_loss": 0.5768489837646484, "eval_runtime": 0.682, "eval_samples_per_second": 208.199, "eval_steps_per_second": 4.399, "step": 36 }, { "epoch": 21.714285714285715, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5590782761573792, "eval_runtime": 0.665, "eval_samples_per_second": 213.54, "eval_steps_per_second": 4.511, "step": 38 }, { "epoch": 22.857142857142858, "grad_norm": 4.620666027069092, "learning_rate": 2.380952380952381e-06, "loss": 0.4241, "step": 40 }, { "epoch": 22.857142857142858, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5452097654342651, "eval_runtime": 0.5858, "eval_samples_per_second": 242.391, "eval_steps_per_second": 5.121, "step": 40 }, { "epoch": 24.0, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5387392640113831, "eval_runtime": 0.5833, "eval_samples_per_second": 243.447, "eval_steps_per_second": 5.143, "step": 42 }, { "epoch": 24.571428571428573, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5397770404815674, "eval_runtime": 0.6666, "eval_samples_per_second": 213.023, "eval_steps_per_second": 4.5, "step": 43 }, { "epoch": 25.714285714285715, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5457538962364197, "eval_runtime": 0.5797, "eval_samples_per_second": 244.962, "eval_steps_per_second": 5.175, "step": 45 }, { "epoch": 26.857142857142858, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5509300827980042, "eval_runtime": 0.6395, "eval_samples_per_second": 222.035, "eval_steps_per_second": 4.691, "step": 47 }, { "epoch": 28.0, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5549753904342651, "eval_runtime": 0.5878, "eval_samples_per_second": 241.566, "eval_steps_per_second": 5.103, "step": 49 }, { "epoch": 28.571428571428573, "grad_norm": 4.701329708099365, "learning_rate": 0.0, "loss": 0.4171, "step": 50 }, { "epoch": 28.571428571428573, "eval_accuracy": 0.8732394366197183, "eval_loss": 0.5557973980903625, "eval_runtime": 0.5942, "eval_samples_per_second": 238.984, "eval_steps_per_second": 5.049, "step": 50 }, { "epoch": 28.571428571428573, "step": 50, "total_flos": 2.3842598606630093e+17, "train_loss": 0.4234132957458496, "train_runtime": 127.7102, "train_samples_per_second": 166.784, "train_steps_per_second": 0.392 } ], "logging_steps": 10, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3842598606630093e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }