|
{ |
|
"best_metric": 0.5012136101722717, |
|
"best_model_checkpoint": "./kd_results/microsoft/beit-base-patch16-384_alpha0.7_temp5.0/checkpoint-1440", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 1800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 5.184907913208008, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 1.1858, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 5.849033355712891, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 1.1366, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 5.295121669769287, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.098, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.30632411067193677, |
|
"eval_loss": 1.3922169208526611, |
|
"eval_runtime": 38.7879, |
|
"eval_samples_per_second": 26.091, |
|
"eval_steps_per_second": 0.412, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 7.918435573577881, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.0812, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 7.588355541229248, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.9704, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 8.122336387634277, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.9292, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 5.83294677734375, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.8736, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6136363636363636, |
|
"eval_loss": 0.9673107266426086, |
|
"eval_runtime": 38.6388, |
|
"eval_samples_per_second": 26.191, |
|
"eval_steps_per_second": 0.414, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 5.593318462371826, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 0.6777, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 3.0493323802948, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.6155, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 6.693108081817627, |
|
"learning_rate": 4.783950617283951e-05, |
|
"loss": 0.5416, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7598814229249012, |
|
"eval_loss": 0.6812384724617004, |
|
"eval_runtime": 38.0797, |
|
"eval_samples_per_second": 26.576, |
|
"eval_steps_per_second": 0.42, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.0555555555555554, |
|
"grad_norm": 3.002751588821411, |
|
"learning_rate": 4.70679012345679e-05, |
|
"loss": 0.4754, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 4.859847068786621, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4004, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.611111111111111, |
|
"grad_norm": 5.904755115509033, |
|
"learning_rate": 4.5524691358024696e-05, |
|
"loss": 0.3873, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 3.493598461151123, |
|
"learning_rate": 4.4753086419753084e-05, |
|
"loss": 0.3877, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7984189723320159, |
|
"eval_loss": 0.5915946960449219, |
|
"eval_runtime": 38.2782, |
|
"eval_samples_per_second": 26.438, |
|
"eval_steps_per_second": 0.418, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 2.4440574645996094, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.3319, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 4.575101375579834, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.2766, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.722222222222222, |
|
"grad_norm": 4.615421772003174, |
|
"learning_rate": 4.243827160493827e-05, |
|
"loss": 0.3046, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.199356555938721, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.2839, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8063241106719368, |
|
"eval_loss": 0.5837989449501038, |
|
"eval_runtime": 37.2653, |
|
"eval_samples_per_second": 27.157, |
|
"eval_steps_per_second": 0.429, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.277777777777778, |
|
"grad_norm": 3.1010215282440186, |
|
"learning_rate": 4.089506172839506e-05, |
|
"loss": 0.237, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 2.9845540523529053, |
|
"learning_rate": 4.012345679012346e-05, |
|
"loss": 0.2372, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.833333333333333, |
|
"grad_norm": 2.53747820854187, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.2368, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.549915611743927, |
|
"eval_runtime": 37.6349, |
|
"eval_samples_per_second": 26.89, |
|
"eval_steps_per_second": 0.425, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.111111111111111, |
|
"grad_norm": 2.4945929050445557, |
|
"learning_rate": 3.8580246913580246e-05, |
|
"loss": 0.2242, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.388888888888889, |
|
"grad_norm": 3.1071951389312744, |
|
"learning_rate": 3.780864197530865e-05, |
|
"loss": 0.2067, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 2.733633518218994, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2155, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"grad_norm": 4.4593987464904785, |
|
"learning_rate": 3.626543209876543e-05, |
|
"loss": 0.2149, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7984189723320159, |
|
"eval_loss": 0.601948618888855, |
|
"eval_runtime": 39.5799, |
|
"eval_samples_per_second": 25.569, |
|
"eval_steps_per_second": 0.404, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.222222222222222, |
|
"grad_norm": 3.548896074295044, |
|
"learning_rate": 3.5493827160493834e-05, |
|
"loss": 0.1999, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 1.9559125900268555, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.1843, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 2.4996557235717773, |
|
"learning_rate": 3.395061728395062e-05, |
|
"loss": 0.1883, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8053359683794467, |
|
"eval_loss": 0.5796774625778198, |
|
"eval_runtime": 38.3082, |
|
"eval_samples_per_second": 26.417, |
|
"eval_steps_per_second": 0.418, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.055555555555555, |
|
"grad_norm": 2.0347297191619873, |
|
"learning_rate": 3.317901234567901e-05, |
|
"loss": 0.1838, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 1.6269855499267578, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.1786, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.61111111111111, |
|
"grad_norm": 1.7134425640106201, |
|
"learning_rate": 3.16358024691358e-05, |
|
"loss": 0.1739, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 2.123713254928589, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.1724, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.825098814229249, |
|
"eval_loss": 0.5546149015426636, |
|
"eval_runtime": 40.3516, |
|
"eval_samples_per_second": 25.08, |
|
"eval_steps_per_second": 0.397, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 9.166666666666666, |
|
"grad_norm": 1.7199671268463135, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.1581, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 9.444444444444445, |
|
"grad_norm": 1.6997764110565186, |
|
"learning_rate": 2.9320987654320992e-05, |
|
"loss": 0.1658, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.722222222222221, |
|
"grad_norm": 1.5390872955322266, |
|
"learning_rate": 2.8549382716049384e-05, |
|
"loss": 0.1639, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.5496571063995361, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1587, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8231225296442688, |
|
"eval_loss": 0.5657897591590881, |
|
"eval_runtime": 39.7046, |
|
"eval_samples_per_second": 25.488, |
|
"eval_steps_per_second": 0.403, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.277777777777779, |
|
"grad_norm": 1.098414659500122, |
|
"learning_rate": 2.700617283950617e-05, |
|
"loss": 0.155, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 10.555555555555555, |
|
"grad_norm": 0.9889047145843506, |
|
"learning_rate": 2.623456790123457e-05, |
|
"loss": 0.1584, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.833333333333334, |
|
"grad_norm": 2.0119264125823975, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.1592, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8231225296442688, |
|
"eval_loss": 0.516467809677124, |
|
"eval_runtime": 38.3777, |
|
"eval_samples_per_second": 26.369, |
|
"eval_steps_per_second": 0.417, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 2.5024330615997314, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.1541, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.38888888888889, |
|
"grad_norm": 1.625465989112854, |
|
"learning_rate": 2.3919753086419755e-05, |
|
"loss": 0.1485, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 11.666666666666666, |
|
"grad_norm": 1.1128909587860107, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.1401, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 11.944444444444445, |
|
"grad_norm": 1.7594417333602905, |
|
"learning_rate": 2.2376543209876542e-05, |
|
"loss": 0.1455, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8300395256916996, |
|
"eval_loss": 0.5143478512763977, |
|
"eval_runtime": 39.6911, |
|
"eval_samples_per_second": 25.497, |
|
"eval_steps_per_second": 0.403, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 12.222222222222221, |
|
"grad_norm": 1.4825284481048584, |
|
"learning_rate": 2.1604938271604937e-05, |
|
"loss": 0.1392, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 1.117546796798706, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.1425, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 12.777777777777779, |
|
"grad_norm": 1.0373642444610596, |
|
"learning_rate": 2.006172839506173e-05, |
|
"loss": 0.1438, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8330039525691699, |
|
"eval_loss": 0.5300087332725525, |
|
"eval_runtime": 38.4193, |
|
"eval_samples_per_second": 26.341, |
|
"eval_steps_per_second": 0.416, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 13.055555555555555, |
|
"grad_norm": 1.4194252490997314, |
|
"learning_rate": 1.9290123456790123e-05, |
|
"loss": 0.132, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 1.4579600095748901, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1353, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.61111111111111, |
|
"grad_norm": 1.5753164291381836, |
|
"learning_rate": 1.7746913580246917e-05, |
|
"loss": 0.1379, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 13.88888888888889, |
|
"grad_norm": 1.0776703357696533, |
|
"learning_rate": 1.697530864197531e-05, |
|
"loss": 0.1303, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8270750988142292, |
|
"eval_loss": 0.5375888347625732, |
|
"eval_runtime": 39.1838, |
|
"eval_samples_per_second": 25.827, |
|
"eval_steps_per_second": 0.408, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 14.166666666666666, |
|
"grad_norm": 1.2338591814041138, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.1285, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 14.444444444444445, |
|
"grad_norm": 1.0960110425949097, |
|
"learning_rate": 1.54320987654321e-05, |
|
"loss": 0.1312, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.722222222222221, |
|
"grad_norm": 1.6634916067123413, |
|
"learning_rate": 1.4660493827160496e-05, |
|
"loss": 0.1337, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1385433673858643, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.1306, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8280632411067194, |
|
"eval_loss": 0.5235399007797241, |
|
"eval_runtime": 40.6053, |
|
"eval_samples_per_second": 24.923, |
|
"eval_steps_per_second": 0.394, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 15.277777777777779, |
|
"grad_norm": 0.9714440703392029, |
|
"learning_rate": 1.3117283950617285e-05, |
|
"loss": 0.1249, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 15.555555555555555, |
|
"grad_norm": 0.6641523838043213, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.1234, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.833333333333334, |
|
"grad_norm": 0.7077529430389404, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.1274, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8399209486166008, |
|
"eval_loss": 0.5012136101722717, |
|
"eval_runtime": 39.893, |
|
"eval_samples_per_second": 25.368, |
|
"eval_steps_per_second": 0.401, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 16.11111111111111, |
|
"grad_norm": 0.6755304336547852, |
|
"learning_rate": 1.0802469135802469e-05, |
|
"loss": 0.1278, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 16.38888888888889, |
|
"grad_norm": 0.9831761121749878, |
|
"learning_rate": 1.0030864197530866e-05, |
|
"loss": 0.1242, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.6216269135475159, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1194, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.944444444444443, |
|
"grad_norm": 0.6754944324493408, |
|
"learning_rate": 8.487654320987654e-06, |
|
"loss": 0.121, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8369565217391305, |
|
"eval_loss": 0.5206666588783264, |
|
"eval_runtime": 38.6688, |
|
"eval_samples_per_second": 26.171, |
|
"eval_steps_per_second": 0.414, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 17.22222222222222, |
|
"grad_norm": 1.1738343238830566, |
|
"learning_rate": 7.71604938271605e-06, |
|
"loss": 0.1257, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"grad_norm": 0.6815004348754883, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1134, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"grad_norm": 0.6849784255027771, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.1197, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8369565217391305, |
|
"eval_loss": 0.5180361270904541, |
|
"eval_runtime": 40.2512, |
|
"eval_samples_per_second": 25.142, |
|
"eval_steps_per_second": 0.398, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 18.055555555555557, |
|
"grad_norm": 0.7509155869483948, |
|
"learning_rate": 5.401234567901234e-06, |
|
"loss": 0.1252, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 18.333333333333332, |
|
"grad_norm": 0.6722740530967712, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.1158, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 18.61111111111111, |
|
"grad_norm": 0.5532152652740479, |
|
"learning_rate": 3.858024691358025e-06, |
|
"loss": 0.119, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 18.88888888888889, |
|
"grad_norm": 0.6120570302009583, |
|
"learning_rate": 3.0864197530864196e-06, |
|
"loss": 0.1189, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8409090909090909, |
|
"eval_loss": 0.5124243497848511, |
|
"eval_runtime": 39.1972, |
|
"eval_samples_per_second": 25.818, |
|
"eval_steps_per_second": 0.408, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 19.166666666666668, |
|
"grad_norm": 0.6396480202674866, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.1204, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 19.444444444444443, |
|
"grad_norm": 0.5467541217803955, |
|
"learning_rate": 1.5432098765432098e-06, |
|
"loss": 0.1175, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 19.72222222222222, |
|
"grad_norm": 0.7086827754974365, |
|
"learning_rate": 7.716049382716049e-07, |
|
"loss": 0.1155, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.0691556930541992, |
|
"learning_rate": 0.0, |
|
"loss": 0.1157, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.841897233201581, |
|
"eval_loss": 0.5116459131240845, |
|
"eval_runtime": 38.7759, |
|
"eval_samples_per_second": 26.099, |
|
"eval_steps_per_second": 0.413, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 1800, |
|
"total_flos": 0.0, |
|
"train_loss": 0.28024863150384693, |
|
"train_runtime": 12279.1623, |
|
"train_samples_per_second": 9.333, |
|
"train_steps_per_second": 0.147 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|