|
{ |
|
"best_metric": 0.3434308171272278, |
|
"best_model_checkpoint": "./vit-base-beans/checkpoint-3760", |
|
"epoch": 1.9810326659641728, |
|
"global_step": 3760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019947312961011594, |
|
"loss": 2.0892, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019894625922023182, |
|
"loss": 1.6471, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019841938883034775, |
|
"loss": 1.407, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019789251844046365, |
|
"loss": 1.2934, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.554004214963119, |
|
"eval_loss": 1.2484745979309082, |
|
"eval_runtime": 40.9693, |
|
"eval_samples_per_second": 92.655, |
|
"eval_steps_per_second": 11.594, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019736564805057958, |
|
"loss": 1.3421, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019683877766069548, |
|
"loss": 1.1489, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001963119072708114, |
|
"loss": 1.1359, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019578503688092732, |
|
"loss": 1.0804, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.5076396206533192, |
|
"eval_loss": 1.2709373235702515, |
|
"eval_runtime": 41.0324, |
|
"eval_samples_per_second": 92.512, |
|
"eval_steps_per_second": 11.576, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001952581664910432, |
|
"loss": 1.281, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019473129610115912, |
|
"loss": 1.1561, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019420442571127503, |
|
"loss": 0.9883, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019367755532139096, |
|
"loss": 1.0225, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.5835089567966281, |
|
"eval_loss": 1.1064503192901611, |
|
"eval_runtime": 41.5232, |
|
"eval_samples_per_second": 91.419, |
|
"eval_steps_per_second": 11.439, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019315068493150686, |
|
"loss": 1.0751, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019262381454162277, |
|
"loss": 1.2585, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001920969441517387, |
|
"loss": 1.0325, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019157007376185457, |
|
"loss": 1.1999, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.6240779768177028, |
|
"eval_loss": 0.998985767364502, |
|
"eval_runtime": 41.8244, |
|
"eval_samples_per_second": 90.76, |
|
"eval_steps_per_second": 11.357, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001910432033719705, |
|
"loss": 1.0211, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019051633298208643, |
|
"loss": 1.0205, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001899894625922023, |
|
"loss": 1.0047, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018946259220231824, |
|
"loss": 0.9969, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_loss": 0.9371430277824402, |
|
"eval_runtime": 41.6306, |
|
"eval_samples_per_second": 91.183, |
|
"eval_steps_per_second": 11.41, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018893572181243414, |
|
"loss": 1.025, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018840885142255007, |
|
"loss": 1.0511, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018788198103266598, |
|
"loss": 0.8609, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018735511064278188, |
|
"loss": 0.8846, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.6620126448893572, |
|
"eval_loss": 0.9217966794967651, |
|
"eval_runtime": 41.7497, |
|
"eval_samples_per_second": 90.923, |
|
"eval_steps_per_second": 11.377, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001868282402528978, |
|
"loss": 1.0542, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001863013698630137, |
|
"loss": 0.8937, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018577449947312962, |
|
"loss": 0.9493, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018524762908324552, |
|
"loss": 0.9374, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.6704425711275026, |
|
"eval_loss": 0.8844485282897949, |
|
"eval_runtime": 41.7229, |
|
"eval_samples_per_second": 90.981, |
|
"eval_steps_per_second": 11.385, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018472075869336145, |
|
"loss": 0.93, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018419388830347736, |
|
"loss": 0.9324, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018366701791359326, |
|
"loss": 0.9806, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001831401475237092, |
|
"loss": 0.8979, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.6862486828240253, |
|
"eval_loss": 0.8492920994758606, |
|
"eval_runtime": 42.0264, |
|
"eval_samples_per_second": 90.324, |
|
"eval_steps_per_second": 11.302, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018261327713382507, |
|
"loss": 0.7995, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000182086406743941, |
|
"loss": 0.8829, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018155953635405693, |
|
"loss": 0.8794, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018103266596417283, |
|
"loss": 1.0743, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.6680716543730242, |
|
"eval_loss": 0.910214900970459, |
|
"eval_runtime": 41.8529, |
|
"eval_samples_per_second": 90.699, |
|
"eval_steps_per_second": 11.349, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018050579557428873, |
|
"loss": 0.9903, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017997892518440464, |
|
"loss": 0.9447, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017945205479452057, |
|
"loss": 0.9659, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017892518440463647, |
|
"loss": 0.8767, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.6290832455216017, |
|
"eval_loss": 0.9642680287361145, |
|
"eval_runtime": 41.7862, |
|
"eval_samples_per_second": 90.843, |
|
"eval_steps_per_second": 11.367, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017839831401475237, |
|
"loss": 1.0593, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001778714436248683, |
|
"loss": 0.8566, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001773445732349842, |
|
"loss": 0.8882, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001768177028451001, |
|
"loss": 0.9381, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.6383034773445733, |
|
"eval_loss": 0.9608025550842285, |
|
"eval_runtime": 41.7191, |
|
"eval_samples_per_second": 90.99, |
|
"eval_steps_per_second": 11.386, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017629083245521601, |
|
"loss": 0.9577, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017576396206533195, |
|
"loss": 0.8785, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017523709167544785, |
|
"loss": 0.7606, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017471022128556375, |
|
"loss": 1.1457, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.6654373024236038, |
|
"eval_loss": 0.8898976445198059, |
|
"eval_runtime": 41.808, |
|
"eval_samples_per_second": 90.796, |
|
"eval_steps_per_second": 11.361, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017418335089567968, |
|
"loss": 0.9, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017365648050579559, |
|
"loss": 0.7624, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001731296101159115, |
|
"loss": 0.819, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00017260273972602742, |
|
"loss": 0.8516, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.6791359325605901, |
|
"eval_loss": 0.8746893405914307, |
|
"eval_runtime": 41.7582, |
|
"eval_samples_per_second": 90.904, |
|
"eval_steps_per_second": 11.375, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00017207586933614332, |
|
"loss": 0.9307, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00017154899894625923, |
|
"loss": 0.8867, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017102212855637513, |
|
"loss": 0.8658, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017049525816649106, |
|
"loss": 0.8935, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.6699157007376185, |
|
"eval_loss": 0.8650425672531128, |
|
"eval_runtime": 42.0967, |
|
"eval_samples_per_second": 90.173, |
|
"eval_steps_per_second": 11.284, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00016996838777660696, |
|
"loss": 0.958, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00016944151738672287, |
|
"loss": 0.8805, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001689146469968388, |
|
"loss": 0.9066, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001683877766069547, |
|
"loss": 0.8468, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.7268177028451, |
|
"eval_loss": 0.7727631330490112, |
|
"eval_runtime": 41.8106, |
|
"eval_samples_per_second": 90.79, |
|
"eval_steps_per_second": 11.361, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001678609062170706, |
|
"loss": 0.797, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001673340358271865, |
|
"loss": 0.8564, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016680716543730244, |
|
"loss": 0.7341, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016628029504741834, |
|
"loss": 0.829, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.6978398314014752, |
|
"eval_loss": 0.8096127510070801, |
|
"eval_runtime": 42.0874, |
|
"eval_samples_per_second": 90.193, |
|
"eval_steps_per_second": 11.286, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016575342465753425, |
|
"loss": 0.7701, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016522655426765018, |
|
"loss": 0.763, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016469968387776608, |
|
"loss": 0.7975, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016417281348788198, |
|
"loss": 0.7611, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.7168071654373024, |
|
"eval_loss": 0.774563193321228, |
|
"eval_runtime": 41.9532, |
|
"eval_samples_per_second": 90.482, |
|
"eval_steps_per_second": 11.322, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001636459430979979, |
|
"loss": 0.8112, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016311907270811382, |
|
"loss": 0.7897, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016259220231822972, |
|
"loss": 0.8332, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016206533192834562, |
|
"loss": 0.8072, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.7067966280295047, |
|
"eval_loss": 0.802047848701477, |
|
"eval_runtime": 41.8602, |
|
"eval_samples_per_second": 90.683, |
|
"eval_steps_per_second": 11.347, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016153846153846155, |
|
"loss": 0.7557, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016101159114857746, |
|
"loss": 0.8949, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016048472075869336, |
|
"loss": 0.7729, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001599578503688093, |
|
"loss": 0.8402, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.6983667017913593, |
|
"eval_loss": 0.8181519508361816, |
|
"eval_runtime": 42.0502, |
|
"eval_samples_per_second": 90.273, |
|
"eval_steps_per_second": 11.296, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001594309799789252, |
|
"loss": 0.869, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001589041095890411, |
|
"loss": 0.8356, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000158377239199157, |
|
"loss": 0.9184, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015785036880927293, |
|
"loss": 0.6367, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 0.7236610054969788, |
|
"eval_runtime": 41.5948, |
|
"eval_samples_per_second": 91.261, |
|
"eval_steps_per_second": 11.42, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015732349841938883, |
|
"loss": 0.7656, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015679662802950474, |
|
"loss": 0.8507, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015626975763962067, |
|
"loss": 0.7213, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015574288724973657, |
|
"loss": 0.7094, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.7336670179135932, |
|
"eval_loss": 0.75114905834198, |
|
"eval_runtime": 41.1659, |
|
"eval_samples_per_second": 92.212, |
|
"eval_steps_per_second": 11.539, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00015521601685985248, |
|
"loss": 0.7631, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001546891464699684, |
|
"loss": 0.7045, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001541622760800843, |
|
"loss": 0.7582, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001536354056902002, |
|
"loss": 0.8905, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.7489462592202318, |
|
"eval_loss": 0.6889460682868958, |
|
"eval_runtime": 41.1832, |
|
"eval_samples_per_second": 92.173, |
|
"eval_steps_per_second": 11.534, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015310853530031612, |
|
"loss": 0.7429, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015258166491043205, |
|
"loss": 0.7355, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015205479452054795, |
|
"loss": 0.7525, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015152792413066385, |
|
"loss": 0.8386, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.7178609062170705, |
|
"eval_loss": 0.7636825442314148, |
|
"eval_runtime": 41.2051, |
|
"eval_samples_per_second": 92.124, |
|
"eval_steps_per_second": 11.528, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015100105374077978, |
|
"loss": 0.7451, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001504741833508957, |
|
"loss": 0.6861, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001499473129610116, |
|
"loss": 0.7938, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001494204425711275, |
|
"loss": 0.7564, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.7365648050579557, |
|
"eval_loss": 0.7086778283119202, |
|
"eval_runtime": 41.133, |
|
"eval_samples_per_second": 92.286, |
|
"eval_steps_per_second": 11.548, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014889357218124342, |
|
"loss": 0.7699, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014836670179135933, |
|
"loss": 0.754, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014783983140147523, |
|
"loss": 0.7126, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014731296101159116, |
|
"loss": 0.7927, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 0.7288175225257874, |
|
"eval_runtime": 40.9893, |
|
"eval_samples_per_second": 92.609, |
|
"eval_steps_per_second": 11.588, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014678609062170707, |
|
"loss": 0.8823, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00014625922023182297, |
|
"loss": 0.6232, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001457323498419389, |
|
"loss": 0.7342, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001452054794520548, |
|
"loss": 0.7456, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.7494731296101159, |
|
"eval_loss": 0.7018606662750244, |
|
"eval_runtime": 41.1744, |
|
"eval_samples_per_second": 92.193, |
|
"eval_steps_per_second": 11.536, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00014467860906217073, |
|
"loss": 0.7618, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001441517386722866, |
|
"loss": 0.6762, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00014362486828240254, |
|
"loss": 0.7244, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00014309799789251844, |
|
"loss": 0.6604, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.7386722866174921, |
|
"eval_loss": 0.737483561038971, |
|
"eval_runtime": 41.1001, |
|
"eval_samples_per_second": 92.36, |
|
"eval_steps_per_second": 11.557, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00014257112750263435, |
|
"loss": 0.8146, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00014204425711275028, |
|
"loss": 0.7946, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00014151738672286618, |
|
"loss": 0.8021, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001409905163329821, |
|
"loss": 0.6272, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.755795574288725, |
|
"eval_loss": 0.6697063446044922, |
|
"eval_runtime": 41.0445, |
|
"eval_samples_per_second": 92.485, |
|
"eval_steps_per_second": 11.573, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.000140463645943098, |
|
"loss": 0.7822, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00013993677555321392, |
|
"loss": 0.5635, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00013940990516332982, |
|
"loss": 0.8475, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00013888303477344572, |
|
"loss": 0.6899, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7510537407797682, |
|
"eval_loss": 0.6927923560142517, |
|
"eval_runtime": 41.0153, |
|
"eval_samples_per_second": 92.551, |
|
"eval_steps_per_second": 11.581, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00013835616438356166, |
|
"loss": 0.6789, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00013782929399367756, |
|
"loss": 0.7409, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001373024236037935, |
|
"loss": 0.6629, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001367755532139094, |
|
"loss": 0.7612, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.7726554267650158, |
|
"eval_loss": 0.6404809355735779, |
|
"eval_runtime": 40.9053, |
|
"eval_samples_per_second": 92.8, |
|
"eval_steps_per_second": 11.612, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001362486828240253, |
|
"loss": 0.727, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00013572181243414123, |
|
"loss": 0.6921, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001351949420442571, |
|
"loss": 0.5298, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00013466807165437303, |
|
"loss": 0.5398, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.7673867228661749, |
|
"eval_loss": 0.6329030394554138, |
|
"eval_runtime": 41.1422, |
|
"eval_samples_per_second": 92.265, |
|
"eval_steps_per_second": 11.545, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00013414120126448894, |
|
"loss": 0.6673, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00013361433087460484, |
|
"loss": 0.682, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013308746048472077, |
|
"loss": 0.6457, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013256059009483667, |
|
"loss": 0.5942, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.7481559536354057, |
|
"eval_loss": 0.6839689016342163, |
|
"eval_runtime": 41.0621, |
|
"eval_samples_per_second": 92.445, |
|
"eval_steps_per_second": 11.568, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001320337197049526, |
|
"loss": 0.5086, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00013150684931506848, |
|
"loss": 0.6658, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001309799789251844, |
|
"loss": 0.7157, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00013045310853530031, |
|
"loss": 0.5924, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.7795047418335089, |
|
"eval_loss": 0.6245933175086975, |
|
"eval_runtime": 40.9868, |
|
"eval_samples_per_second": 92.615, |
|
"eval_steps_per_second": 11.589, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00012992623814541622, |
|
"loss": 0.6697, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00012939936775553215, |
|
"loss": 0.8167, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00012887249736564805, |
|
"loss": 0.641, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00012834562697576398, |
|
"loss": 0.7035, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.7710748155953635, |
|
"eval_loss": 0.6422901153564453, |
|
"eval_runtime": 41.0116, |
|
"eval_samples_per_second": 92.559, |
|
"eval_steps_per_second": 11.582, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00012781875658587989, |
|
"loss": 0.6054, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001272918861959958, |
|
"loss": 0.5799, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00012676501580611172, |
|
"loss": 0.6575, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001262381454162276, |
|
"loss": 0.7114, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.7755532139093783, |
|
"eval_loss": 0.6289725303649902, |
|
"eval_runtime": 41.0218, |
|
"eval_samples_per_second": 92.536, |
|
"eval_steps_per_second": 11.579, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00012571127502634353, |
|
"loss": 0.6659, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00012518440463645943, |
|
"loss": 0.5519, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00012465753424657536, |
|
"loss": 0.7071, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00012413066385669126, |
|
"loss": 0.477, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.7895152792413066, |
|
"eval_loss": 0.5919615030288696, |
|
"eval_runtime": 41.0316, |
|
"eval_samples_per_second": 92.514, |
|
"eval_steps_per_second": 11.576, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00012360379346680717, |
|
"loss": 0.5786, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001230769230769231, |
|
"loss": 0.624, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012255005268703897, |
|
"loss": 0.5879, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001220231822971549, |
|
"loss": 0.6203, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.7881981032665965, |
|
"eval_loss": 0.6011049747467041, |
|
"eval_runtime": 41.3051, |
|
"eval_samples_per_second": 91.902, |
|
"eval_steps_per_second": 11.5, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00012149631190727082, |
|
"loss": 0.6142, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00012096944151738674, |
|
"loss": 0.7426, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00012044257112750264, |
|
"loss": 0.5851, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00011991570073761856, |
|
"loss": 0.4557, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.773972602739726, |
|
"eval_loss": 0.6244290471076965, |
|
"eval_runtime": 41.0912, |
|
"eval_samples_per_second": 92.38, |
|
"eval_steps_per_second": 11.56, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00011938883034773448, |
|
"loss": 0.7113, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00011886195995785037, |
|
"loss": 0.4499, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00011833508956796628, |
|
"loss": 0.7979, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001178082191780822, |
|
"loss": 0.6389, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.7968914646996839, |
|
"eval_loss": 0.5629897117614746, |
|
"eval_runtime": 41.1832, |
|
"eval_samples_per_second": 92.174, |
|
"eval_steps_per_second": 11.534, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011728134878819812, |
|
"loss": 0.5088, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011675447839831402, |
|
"loss": 0.5226, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00011622760800842994, |
|
"loss": 0.7143, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00011570073761854585, |
|
"loss": 0.6855, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.7966280295047419, |
|
"eval_loss": 0.570618748664856, |
|
"eval_runtime": 41.1893, |
|
"eval_samples_per_second": 92.16, |
|
"eval_steps_per_second": 11.532, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00011517386722866174, |
|
"loss": 0.5177, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00011464699683877766, |
|
"loss": 0.6428, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00011412012644889358, |
|
"loss": 0.6207, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00011359325605900948, |
|
"loss": 0.5935, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.8071654373024236, |
|
"eval_loss": 0.553897500038147, |
|
"eval_runtime": 41.1107, |
|
"eval_samples_per_second": 92.336, |
|
"eval_steps_per_second": 11.554, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001130663856691254, |
|
"loss": 0.5959, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00011253951527924131, |
|
"loss": 0.6435, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00011201264488935723, |
|
"loss": 0.6994, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011148577449947313, |
|
"loss": 0.6779, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.7829293993677555, |
|
"eval_loss": 0.5908519625663757, |
|
"eval_runtime": 40.8752, |
|
"eval_samples_per_second": 92.868, |
|
"eval_steps_per_second": 11.621, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011095890410958905, |
|
"loss": 0.6209, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00011043203371970497, |
|
"loss": 0.4842, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00010990516332982086, |
|
"loss": 0.6179, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00010937829293993678, |
|
"loss": 0.5032, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.8174394099051633, |
|
"eval_loss": 0.5368214249610901, |
|
"eval_runtime": 40.9272, |
|
"eval_samples_per_second": 92.75, |
|
"eval_steps_per_second": 11.606, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00010885142255005269, |
|
"loss": 0.4929, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00010832455216016861, |
|
"loss": 0.5158, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00010779768177028451, |
|
"loss": 0.5903, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00010727081138040043, |
|
"loss": 0.5604, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.773709167544784, |
|
"eval_loss": 0.6411083936691284, |
|
"eval_runtime": 40.9594, |
|
"eval_samples_per_second": 92.677, |
|
"eval_steps_per_second": 11.597, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00010674394099051635, |
|
"loss": 0.7767, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00010621707060063224, |
|
"loss": 0.6115, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00010569020021074815, |
|
"loss": 0.49, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00010516332982086407, |
|
"loss": 0.5398, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.7642255005268704, |
|
"eval_loss": 0.6474949717521667, |
|
"eval_runtime": 41.1724, |
|
"eval_samples_per_second": 92.198, |
|
"eval_steps_per_second": 11.537, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00010463645943097999, |
|
"loss": 0.7346, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010410958904109589, |
|
"loss": 0.5711, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010358271865121181, |
|
"loss": 0.4447, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00010305584826132772, |
|
"loss": 0.5243, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.7829293993677555, |
|
"eval_loss": 0.5976884365081787, |
|
"eval_runtime": 41.1462, |
|
"eval_samples_per_second": 92.256, |
|
"eval_steps_per_second": 11.544, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00010252897787144363, |
|
"loss": 0.6316, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00010200210748155954, |
|
"loss": 0.4889, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00010147523709167546, |
|
"loss": 0.5548, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00010094836670179138, |
|
"loss": 0.555, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 0.5374019742012024, |
|
"eval_runtime": 41.2791, |
|
"eval_samples_per_second": 91.959, |
|
"eval_steps_per_second": 11.507, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00010042149631190727, |
|
"loss": 0.5253, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.989462592202319e-05, |
|
"loss": 0.4858, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.936775553213909e-05, |
|
"loss": 0.4391, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.884088514225502e-05, |
|
"loss": 0.3991, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.8079557428872497, |
|
"eval_loss": 0.5598599910736084, |
|
"eval_runtime": 41.1706, |
|
"eval_samples_per_second": 92.202, |
|
"eval_steps_per_second": 11.537, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.831401475237092e-05, |
|
"loss": 0.4727, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.778714436248684e-05, |
|
"loss": 0.4029, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.726027397260274e-05, |
|
"loss": 0.4803, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.673340358271865e-05, |
|
"loss": 0.418, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.8308746048472075, |
|
"eval_loss": 0.5048983693122864, |
|
"eval_runtime": 41.4141, |
|
"eval_samples_per_second": 91.66, |
|
"eval_steps_per_second": 11.47, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.620653319283456e-05, |
|
"loss": 0.3603, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.567966280295048e-05, |
|
"loss": 0.3883, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.51527924130664e-05, |
|
"loss": 0.3467, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.46259220231823e-05, |
|
"loss": 0.4145, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.8116438356164384, |
|
"eval_loss": 0.5375648140907288, |
|
"eval_runtime": 41.1926, |
|
"eval_samples_per_second": 92.152, |
|
"eval_steps_per_second": 11.531, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.409905163329822e-05, |
|
"loss": 0.4242, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.357218124341412e-05, |
|
"loss": 0.3996, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.304531085353004e-05, |
|
"loss": 0.2548, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.251844046364595e-05, |
|
"loss": 0.4141, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.8229715489989463, |
|
"eval_loss": 0.5164880752563477, |
|
"eval_runtime": 41.0208, |
|
"eval_samples_per_second": 92.538, |
|
"eval_steps_per_second": 11.579, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.199157007376186e-05, |
|
"loss": 0.3818, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.146469968387778e-05, |
|
"loss": 0.3252, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.093782929399368e-05, |
|
"loss": 0.3749, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.041095890410958e-05, |
|
"loss": 0.3729, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.8303477344573235, |
|
"eval_loss": 0.5023428201675415, |
|
"eval_runtime": 41.3017, |
|
"eval_samples_per_second": 91.909, |
|
"eval_steps_per_second": 11.501, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.988408851422551e-05, |
|
"loss": 0.4618, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.935721812434142e-05, |
|
"loss": 0.3952, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.883034773445733e-05, |
|
"loss": 0.3239, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.830347734457324e-05, |
|
"loss": 0.4594, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.8374604847207587, |
|
"eval_loss": 0.4935886263847351, |
|
"eval_runtime": 42.0509, |
|
"eval_samples_per_second": 90.271, |
|
"eval_steps_per_second": 11.296, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.777660695468915e-05, |
|
"loss": 0.4036, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.724973656480506e-05, |
|
"loss": 0.2812, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.672286617492097e-05, |
|
"loss": 0.3308, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.619599578503689e-05, |
|
"loss": 0.3766, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.833245521601686, |
|
"eval_loss": 0.5025116801261902, |
|
"eval_runtime": 41.7352, |
|
"eval_samples_per_second": 90.954, |
|
"eval_steps_per_second": 11.381, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.56691253951528e-05, |
|
"loss": 0.2861, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.514225500526871e-05, |
|
"loss": 0.3525, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.461538461538461e-05, |
|
"loss": 0.4213, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.408851422550053e-05, |
|
"loss": 0.3628, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.8285036880927292, |
|
"eval_loss": 0.5210418105125427, |
|
"eval_runtime": 41.7139, |
|
"eval_samples_per_second": 91.001, |
|
"eval_steps_per_second": 11.387, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.356164383561645e-05, |
|
"loss": 0.5404, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.303477344573235e-05, |
|
"loss": 0.383, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.250790305584827e-05, |
|
"loss": 0.3332, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.198103266596417e-05, |
|
"loss": 0.4392, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.8387776606954689, |
|
"eval_loss": 0.49416255950927734, |
|
"eval_runtime": 41.5117, |
|
"eval_samples_per_second": 91.444, |
|
"eval_steps_per_second": 11.443, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.145416227608009e-05, |
|
"loss": 0.3811, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.0927291886196e-05, |
|
"loss": 0.3708, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.040042149631191e-05, |
|
"loss": 0.5215, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.987355110642783e-05, |
|
"loss": 0.4257, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.8300842992623815, |
|
"eval_loss": 0.49942925572395325, |
|
"eval_runtime": 41.6212, |
|
"eval_samples_per_second": 91.203, |
|
"eval_steps_per_second": 11.412, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.934668071654373e-05, |
|
"loss": 0.4234, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.881981032665965e-05, |
|
"loss": 0.3984, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.829293993677555e-05, |
|
"loss": 0.4514, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.776606954689147e-05, |
|
"loss": 0.3442, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.8403582718651211, |
|
"eval_loss": 0.4866703748703003, |
|
"eval_runtime": 41.5299, |
|
"eval_samples_per_second": 91.404, |
|
"eval_steps_per_second": 11.438, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.723919915700738e-05, |
|
"loss": 0.3327, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.671232876712329e-05, |
|
"loss": 0.4527, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.61854583772392e-05, |
|
"loss": 0.3862, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.565858798735511e-05, |
|
"loss": 0.4008, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.8163856691253951, |
|
"eval_loss": 0.5492302775382996, |
|
"eval_runtime": 41.4705, |
|
"eval_samples_per_second": 91.535, |
|
"eval_steps_per_second": 11.454, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.513171759747102e-05, |
|
"loss": 0.4259, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.460484720758694e-05, |
|
"loss": 0.3364, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.407797681770284e-05, |
|
"loss": 0.3355, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.355110642781876e-05, |
|
"loss": 0.3541, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.8335089567966281, |
|
"eval_loss": 0.5003746151924133, |
|
"eval_runtime": 41.6532, |
|
"eval_samples_per_second": 91.134, |
|
"eval_steps_per_second": 11.404, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.302423603793467e-05, |
|
"loss": 0.3951, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.249736564805058e-05, |
|
"loss": 0.3318, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.19704952581665e-05, |
|
"loss": 0.3423, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.144362486828242e-05, |
|
"loss": 0.3842, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.8435194942044257, |
|
"eval_loss": 0.46031510829925537, |
|
"eval_runtime": 41.7276, |
|
"eval_samples_per_second": 90.971, |
|
"eval_steps_per_second": 11.383, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.091675447839832e-05, |
|
"loss": 0.4708, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.038988408851422e-05, |
|
"loss": 0.2436, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.986301369863014e-05, |
|
"loss": 0.5203, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.933614330874604e-05, |
|
"loss": 0.3398, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.8519494204425712, |
|
"eval_loss": 0.44302287697792053, |
|
"eval_runtime": 41.6802, |
|
"eval_samples_per_second": 91.074, |
|
"eval_steps_per_second": 11.396, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.880927291886196e-05, |
|
"loss": 0.2589, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.828240252897788e-05, |
|
"loss": 0.3447, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.77555321390938e-05, |
|
"loss": 0.2995, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.72286617492097e-05, |
|
"loss": 0.3823, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.8466807165437302, |
|
"eval_loss": 0.45392054319381714, |
|
"eval_runtime": 41.6585, |
|
"eval_samples_per_second": 91.122, |
|
"eval_steps_per_second": 11.402, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.67017913593256e-05, |
|
"loss": 0.3361, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.617492096944152e-05, |
|
"loss": 0.354, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.564805057955743e-05, |
|
"loss": 0.3558, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.512118018967335e-05, |
|
"loss": 0.452, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.8482613277133825, |
|
"eval_loss": 0.4561702609062195, |
|
"eval_runtime": 41.5457, |
|
"eval_samples_per_second": 91.369, |
|
"eval_steps_per_second": 11.433, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.459430979978925e-05, |
|
"loss": 0.3694, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.406743940990516e-05, |
|
"loss": 0.3642, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.354056902002108e-05, |
|
"loss": 0.3627, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.301369863013699e-05, |
|
"loss": 0.3121, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy": 0.8548472075869337, |
|
"eval_loss": 0.4360343813896179, |
|
"eval_runtime": 41.6262, |
|
"eval_samples_per_second": 91.193, |
|
"eval_steps_per_second": 11.411, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.248682824025291e-05, |
|
"loss": 0.3276, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.195995785036881e-05, |
|
"loss": 0.2538, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.143308746048473e-05, |
|
"loss": 0.2862, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.090621707060063e-05, |
|
"loss": 0.3032, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.8535300316122234, |
|
"eval_loss": 0.4355548620223999, |
|
"eval_runtime": 41.6726, |
|
"eval_samples_per_second": 91.091, |
|
"eval_steps_per_second": 11.398, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.037934668071654e-05, |
|
"loss": 0.3316, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.985247629083246e-05, |
|
"loss": 0.3128, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.932560590094837e-05, |
|
"loss": 0.438, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.879873551106429e-05, |
|
"loss": 0.361, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.8530031612223393, |
|
"eval_loss": 0.4372209906578064, |
|
"eval_runtime": 41.6096, |
|
"eval_samples_per_second": 91.229, |
|
"eval_steps_per_second": 11.416, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.827186512118019e-05, |
|
"loss": 0.4102, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.774499473129611e-05, |
|
"loss": 0.3263, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.721812434141202e-05, |
|
"loss": 0.2175, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.669125395152792e-05, |
|
"loss": 0.5349, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.8532665964172813, |
|
"eval_loss": 0.4406832754611969, |
|
"eval_runtime": 41.5463, |
|
"eval_samples_per_second": 91.368, |
|
"eval_steps_per_second": 11.433, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.616438356164384e-05, |
|
"loss": 0.3344, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.563751317175975e-05, |
|
"loss": 0.4089, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.5110642781875665e-05, |
|
"loss": 0.4002, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.4583772391991575e-05, |
|
"loss": 0.2898, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.8606427818756586, |
|
"eval_loss": 0.41055828332901, |
|
"eval_runtime": 41.7124, |
|
"eval_samples_per_second": 91.004, |
|
"eval_steps_per_second": 11.388, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.405690200210748e-05, |
|
"loss": 0.4475, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.3530031612223395e-05, |
|
"loss": 0.3652, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.3003161222339306e-05, |
|
"loss": 0.3678, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.247629083245522e-05, |
|
"loss": 0.2751, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.8648577449947313, |
|
"eval_loss": 0.4102562963962555, |
|
"eval_runtime": 41.6322, |
|
"eval_samples_per_second": 91.179, |
|
"eval_steps_per_second": 11.409, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.1949420442571126e-05, |
|
"loss": 0.4149, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.142255005268704e-05, |
|
"loss": 0.2141, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.089567966280295e-05, |
|
"loss": 0.331, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.036880927291886e-05, |
|
"loss": 0.2966, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.8635405690200211, |
|
"eval_loss": 0.4106617271900177, |
|
"eval_runtime": 41.6805, |
|
"eval_samples_per_second": 91.074, |
|
"eval_steps_per_second": 11.396, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.984193888303478e-05, |
|
"loss": 0.3219, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.9315068493150684e-05, |
|
"loss": 0.3621, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.8788198103266594e-05, |
|
"loss": 0.3827, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.826132771338251e-05, |
|
"loss": 0.2146, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.8619599578503688, |
|
"eval_loss": 0.42587053775787354, |
|
"eval_runtime": 41.6661, |
|
"eval_samples_per_second": 91.105, |
|
"eval_steps_per_second": 11.4, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.773445732349842e-05, |
|
"loss": 0.2426, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.720758693361433e-05, |
|
"loss": 0.2925, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.668071654373025e-05, |
|
"loss": 0.308, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 0.4042, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 0.8714436248682824, |
|
"eval_loss": 0.39998331665992737, |
|
"eval_runtime": 41.7183, |
|
"eval_samples_per_second": 90.991, |
|
"eval_steps_per_second": 11.386, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.562697576396207e-05, |
|
"loss": 0.4615, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.510010537407798e-05, |
|
"loss": 0.3321, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.457323498419389e-05, |
|
"loss": 0.2821, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.4046364594309806e-05, |
|
"loss": 0.2715, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.8695995785036881, |
|
"eval_loss": 0.40408244729042053, |
|
"eval_runtime": 41.2952, |
|
"eval_samples_per_second": 91.923, |
|
"eval_steps_per_second": 11.503, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.3519494204425716e-05, |
|
"loss": 0.3723, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.2992623814541626e-05, |
|
"loss": 0.2885, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.2465753424657536e-05, |
|
"loss": 0.253, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.1938883034773446e-05, |
|
"loss": 0.2795, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.869072708113804, |
|
"eval_loss": 0.4010108709335327, |
|
"eval_runtime": 41.3963, |
|
"eval_samples_per_second": 91.699, |
|
"eval_steps_per_second": 11.474, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.1412012644889356e-05, |
|
"loss": 0.2582, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.088514225500527e-05, |
|
"loss": 0.3652, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.0358271865121184e-05, |
|
"loss": 0.404, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.9831401475237094e-05, |
|
"loss": 0.2104, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.8756585879873551, |
|
"eval_loss": 0.38390201330184937, |
|
"eval_runtime": 41.4547, |
|
"eval_samples_per_second": 91.57, |
|
"eval_steps_per_second": 11.458, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.9304531085353004e-05, |
|
"loss": 0.2561, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.8777660695468914e-05, |
|
"loss": 0.328, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.8250790305584824e-05, |
|
"loss": 0.2671, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.772391991570074e-05, |
|
"loss": 0.2486, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_accuracy": 0.8759220231822972, |
|
"eval_loss": 0.38206353783607483, |
|
"eval_runtime": 41.5068, |
|
"eval_samples_per_second": 91.455, |
|
"eval_steps_per_second": 11.444, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.719704952581665e-05, |
|
"loss": 0.3262, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.667017913593256e-05, |
|
"loss": 0.3114, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.614330874604848e-05, |
|
"loss": 0.3031, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.561643835616438e-05, |
|
"loss": 0.3005, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.8695995785036881, |
|
"eval_loss": 0.3960082530975342, |
|
"eval_runtime": 41.7318, |
|
"eval_samples_per_second": 90.962, |
|
"eval_steps_per_second": 11.382, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.50895679662803e-05, |
|
"loss": 0.3876, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.456269757639621e-05, |
|
"loss": 0.3107, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.403582718651212e-05, |
|
"loss": 0.2807, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.350895679662803e-05, |
|
"loss": 0.2839, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.8746048472075869, |
|
"eval_loss": 0.38452914357185364, |
|
"eval_runtime": 41.6884, |
|
"eval_samples_per_second": 91.056, |
|
"eval_steps_per_second": 11.394, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.2982086406743946e-05, |
|
"loss": 0.3845, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.245521601685985e-05, |
|
"loss": 0.2933, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.1928345626975767e-05, |
|
"loss": 0.3032, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.140147523709168e-05, |
|
"loss": 0.3101, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.8777660695468915, |
|
"eval_loss": 0.3834909200668335, |
|
"eval_runtime": 41.5606, |
|
"eval_samples_per_second": 91.337, |
|
"eval_steps_per_second": 11.429, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.087460484720759e-05, |
|
"loss": 0.3474, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.03477344573235e-05, |
|
"loss": 0.3775, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.9820864067439414e-05, |
|
"loss": 0.3458, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.929399367755532e-05, |
|
"loss": 0.1596, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.8822444678609063, |
|
"eval_loss": 0.3663616478443146, |
|
"eval_runtime": 41.6501, |
|
"eval_samples_per_second": 91.14, |
|
"eval_steps_per_second": 11.405, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.8767123287671234e-05, |
|
"loss": 0.2665, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.8240252897787145e-05, |
|
"loss": 0.3029, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.7713382507903058e-05, |
|
"loss": 0.3974, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.7186512118018968e-05, |
|
"loss": 0.3662, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_accuracy": 0.8819810326659642, |
|
"eval_loss": 0.3698595464229584, |
|
"eval_runtime": 41.5938, |
|
"eval_samples_per_second": 91.264, |
|
"eval_steps_per_second": 11.42, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.6659641728134882e-05, |
|
"loss": 0.304, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.613277133825079e-05, |
|
"loss": 0.2496, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.5605900948366702e-05, |
|
"loss": 0.3154, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.5079030558482612e-05, |
|
"loss": 0.406, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.8788198103266597, |
|
"eval_loss": 0.3647627532482147, |
|
"eval_runtime": 41.6136, |
|
"eval_samples_per_second": 91.22, |
|
"eval_steps_per_second": 11.415, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.4552160168598526e-05, |
|
"loss": 0.2383, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.402528977871444e-05, |
|
"loss": 0.2272, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.3498419388830346e-05, |
|
"loss": 0.3772, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.297154899894626e-05, |
|
"loss": 0.3417, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.8806638566912539, |
|
"eval_loss": 0.37239569425582886, |
|
"eval_runtime": 41.7989, |
|
"eval_samples_per_second": 90.816, |
|
"eval_steps_per_second": 11.364, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2444678609062173e-05, |
|
"loss": 0.2708, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1917808219178083e-05, |
|
"loss": 0.2455, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.1390937829293994e-05, |
|
"loss": 0.3778, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0864067439409907e-05, |
|
"loss": 0.2489, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.8825079030558483, |
|
"eval_loss": 0.361176073551178, |
|
"eval_runtime": 41.7201, |
|
"eval_samples_per_second": 90.987, |
|
"eval_steps_per_second": 11.385, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0337197049525817e-05, |
|
"loss": 0.3192, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9810326659641728e-05, |
|
"loss": 0.2914, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.928345626975764e-05, |
|
"loss": 0.2141, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.8756585879873555e-05, |
|
"loss": 0.2464, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.8825079030558483, |
|
"eval_loss": 0.36530667543411255, |
|
"eval_runtime": 41.819, |
|
"eval_samples_per_second": 90.772, |
|
"eval_steps_per_second": 11.358, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.822971548998946e-05, |
|
"loss": 0.1882, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7702845100105375e-05, |
|
"loss": 0.252, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.717597471022129e-05, |
|
"loss": 0.3904, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.66491043203372e-05, |
|
"loss": 0.2391, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.881190727081138, |
|
"eval_loss": 0.3640024662017822, |
|
"eval_runtime": 41.67, |
|
"eval_samples_per_second": 91.097, |
|
"eval_steps_per_second": 11.399, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.612223393045311e-05, |
|
"loss": 0.3707, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.5595363540569022e-05, |
|
"loss": 0.3237, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.5068493150684931e-05, |
|
"loss": 0.2514, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.4541622760800844e-05, |
|
"loss": 0.4291, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.8861959957850368, |
|
"eval_loss": 0.3543952703475952, |
|
"eval_runtime": 41.457, |
|
"eval_samples_per_second": 91.565, |
|
"eval_steps_per_second": 11.458, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.4014752370916756e-05, |
|
"loss": 0.2714, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.3487881981032666e-05, |
|
"loss": 0.4164, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2961011591148578e-05, |
|
"loss": 0.2214, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.2434141201264489e-05, |
|
"loss": 0.259, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.8896206533192834, |
|
"eval_loss": 0.3500049412250519, |
|
"eval_runtime": 41.6053, |
|
"eval_samples_per_second": 91.238, |
|
"eval_steps_per_second": 11.417, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1907270811380402e-05, |
|
"loss": 0.252, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1380400421496312e-05, |
|
"loss": 0.2847, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.0853530031612224e-05, |
|
"loss": 0.2444, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.0326659641728136e-05, |
|
"loss": 0.1871, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.8864594309799789, |
|
"eval_loss": 0.35397401452064514, |
|
"eval_runtime": 41.3976, |
|
"eval_samples_per_second": 91.696, |
|
"eval_steps_per_second": 11.474, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.799789251844046e-06, |
|
"loss": 0.1985, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.272918861959958e-06, |
|
"loss": 0.2478, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.74604847207587e-06, |
|
"loss": 0.2614, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.21917808219178e-06, |
|
"loss": 0.2337, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_accuracy": 0.8872497365648051, |
|
"eval_loss": 0.3516286015510559, |
|
"eval_runtime": 41.3744, |
|
"eval_samples_per_second": 91.748, |
|
"eval_steps_per_second": 11.481, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 0.2885, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.165437302423604e-06, |
|
"loss": 0.2682, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.638566912539515e-06, |
|
"loss": 0.1558, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.1116965226554275e-06, |
|
"loss": 0.2336, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.8890937829293993, |
|
"eval_loss": 0.3470406234264374, |
|
"eval_runtime": 41.6183, |
|
"eval_samples_per_second": 91.21, |
|
"eval_steps_per_second": 11.413, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.5848261327713385e-06, |
|
"loss": 0.2797, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.05795574288725e-06, |
|
"loss": 0.2202, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.531085353003161e-06, |
|
"loss": 0.2526, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.004214963119072e-06, |
|
"loss": 0.2401, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.8901475237091675, |
|
"eval_loss": 0.3446514308452606, |
|
"eval_runtime": 41.3853, |
|
"eval_samples_per_second": 91.723, |
|
"eval_steps_per_second": 11.477, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.4773445732349843e-06, |
|
"loss": 0.1877, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.9504741833508957e-06, |
|
"loss": 0.163, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.423603793466807e-06, |
|
"loss": 0.2202, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8967334035827188e-06, |
|
"loss": 0.2327, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_accuracy": 0.8904109589041096, |
|
"eval_loss": 0.3434308171272278, |
|
"eval_runtime": 41.3779, |
|
"eval_samples_per_second": 91.74, |
|
"eval_steps_per_second": 11.48, |
|
"step": 3760 |
|
} |
|
], |
|
"max_steps": 3796, |
|
"num_train_epochs": 2, |
|
"total_flos": 4.662174864046436e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|