|
{ |
|
"best_metric": 0.5122641324996948, |
|
"best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_3/checkpoint-15", |
|
"epoch": 28.571428571428573, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5166566371917725, |
|
"eval_runtime": 0.6288, |
|
"eval_samples_per_second": 225.827, |
|
"eval_steps_per_second": 4.771, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.519730806350708, |
|
"eval_runtime": 0.6203, |
|
"eval_samples_per_second": 228.939, |
|
"eval_steps_per_second": 4.837, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5266488194465637, |
|
"eval_runtime": 0.6213, |
|
"eval_samples_per_second": 228.546, |
|
"eval_steps_per_second": 4.828, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5390564203262329, |
|
"eval_runtime": 0.6545, |
|
"eval_samples_per_second": 216.969, |
|
"eval_steps_per_second": 4.584, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5425485968589783, |
|
"eval_runtime": 0.6176, |
|
"eval_samples_per_second": 229.935, |
|
"eval_steps_per_second": 4.858, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 5.448883056640625, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.4435, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5403485298156738, |
|
"eval_runtime": 0.6202, |
|
"eval_samples_per_second": 228.973, |
|
"eval_steps_per_second": 4.837, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5251158475875854, |
|
"eval_runtime": 0.6735, |
|
"eval_samples_per_second": 210.854, |
|
"eval_steps_per_second": 4.455, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5160439610481262, |
|
"eval_runtime": 0.6272, |
|
"eval_samples_per_second": 226.418, |
|
"eval_steps_per_second": 4.783, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"eval_accuracy": 0.8873239436619719, |
|
"eval_loss": 0.5122641324996948, |
|
"eval_runtime": 0.6632, |
|
"eval_samples_per_second": 214.109, |
|
"eval_steps_per_second": 4.523, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 9.714285714285714, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5291638374328613, |
|
"eval_runtime": 0.7041, |
|
"eval_samples_per_second": 201.678, |
|
"eval_steps_per_second": 4.261, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 10.857142857142858, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.568649172782898, |
|
"eval_runtime": 0.6802, |
|
"eval_samples_per_second": 208.773, |
|
"eval_steps_per_second": 4.411, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 5.323070049285889, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.4418, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5459948182106018, |
|
"eval_runtime": 0.6906, |
|
"eval_samples_per_second": 205.619, |
|
"eval_steps_per_second": 4.344, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 12.571428571428571, |
|
"eval_accuracy": 0.8873239436619719, |
|
"eval_loss": 0.5333032608032227, |
|
"eval_runtime": 0.6327, |
|
"eval_samples_per_second": 224.427, |
|
"eval_steps_per_second": 4.741, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 13.714285714285714, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5152425169944763, |
|
"eval_runtime": 0.6626, |
|
"eval_samples_per_second": 214.318, |
|
"eval_steps_per_second": 4.528, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 14.857142857142858, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5236279964447021, |
|
"eval_runtime": 0.6606, |
|
"eval_samples_per_second": 214.94, |
|
"eval_steps_per_second": 4.541, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5372341275215149, |
|
"eval_runtime": 0.6123, |
|
"eval_samples_per_second": 231.928, |
|
"eval_steps_per_second": 4.9, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 16.571428571428573, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5472158193588257, |
|
"eval_runtime": 0.6625, |
|
"eval_samples_per_second": 214.333, |
|
"eval_steps_per_second": 4.528, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 5.04396390914917, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 0.4363, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 17.714285714285715, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5421658754348755, |
|
"eval_runtime": 0.6149, |
|
"eval_samples_per_second": 230.945, |
|
"eval_steps_per_second": 4.879, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 18.857142857142858, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5293453335762024, |
|
"eval_runtime": 0.6909, |
|
"eval_samples_per_second": 205.543, |
|
"eval_steps_per_second": 4.342, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.523467481136322, |
|
"eval_runtime": 0.6321, |
|
"eval_samples_per_second": 224.663, |
|
"eval_steps_per_second": 4.746, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 20.571428571428573, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.523999810218811, |
|
"eval_runtime": 0.6902, |
|
"eval_samples_per_second": 205.737, |
|
"eval_steps_per_second": 4.347, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 21.714285714285715, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5302459001541138, |
|
"eval_runtime": 0.6908, |
|
"eval_samples_per_second": 205.559, |
|
"eval_steps_per_second": 4.343, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 5.993457794189453, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"loss": 0.4371, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5324126482009888, |
|
"eval_runtime": 0.7047, |
|
"eval_samples_per_second": 201.5, |
|
"eval_steps_per_second": 4.257, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8802816901408451, |
|
"eval_loss": 0.5349227786064148, |
|
"eval_runtime": 0.6311, |
|
"eval_samples_per_second": 225.0, |
|
"eval_steps_per_second": 4.754, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 24.571428571428573, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5362741351127625, |
|
"eval_runtime": 0.6225, |
|
"eval_samples_per_second": 228.126, |
|
"eval_steps_per_second": 4.82, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5341742634773254, |
|
"eval_runtime": 0.6561, |
|
"eval_samples_per_second": 216.426, |
|
"eval_steps_per_second": 4.572, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 26.857142857142858, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5314902067184448, |
|
"eval_runtime": 0.6626, |
|
"eval_samples_per_second": 214.302, |
|
"eval_steps_per_second": 4.528, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5318764448165894, |
|
"eval_runtime": 0.6393, |
|
"eval_samples_per_second": 222.13, |
|
"eval_steps_per_second": 4.693, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 4.919415473937988, |
|
"learning_rate": 0.0, |
|
"loss": 0.4298, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"eval_accuracy": 0.8661971830985915, |
|
"eval_loss": 0.5321409106254578, |
|
"eval_runtime": 0.6159, |
|
"eval_samples_per_second": 230.575, |
|
"eval_steps_per_second": 4.871, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"step": 50, |
|
"total_flos": 2.3842598606630093e+17, |
|
"train_loss": 0.43769028663635257, |
|
"train_runtime": 134.3919, |
|
"train_samples_per_second": 158.492, |
|
"train_steps_per_second": 0.372 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3842598606630093e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|