|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.88e-05, |
|
"loss": 0.3202, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.8813009961455675, |
|
"eval_f1": 0.16261325703385787, |
|
"eval_loss": 0.24328218400478363, |
|
"eval_precision": 0.2519394163280384, |
|
"eval_recall": 0.12004928709734201, |
|
"eval_runtime": 4.054, |
|
"eval_samples_per_second": 123.334, |
|
"eval_steps_per_second": 30.833, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.7600000000000003e-05, |
|
"loss": 0.23, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.9034639835811182, |
|
"eval_f1": 0.49651100375738055, |
|
"eval_loss": 0.21028906106948853, |
|
"eval_precision": 0.5048208113516464, |
|
"eval_recall": 0.48847033972892095, |
|
"eval_runtime": 4.2572, |
|
"eval_samples_per_second": 117.449, |
|
"eval_steps_per_second": 29.362, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.64e-05, |
|
"loss": 0.2013, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.8792611503228713, |
|
"eval_f1": 0.5627305035874741, |
|
"eval_loss": 0.2621181607246399, |
|
"eval_precision": 0.4545060658578856, |
|
"eval_recall": 0.7386023587396585, |
|
"eval_runtime": 4.1357, |
|
"eval_samples_per_second": 120.897, |
|
"eval_steps_per_second": 30.224, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.52e-05, |
|
"loss": 0.1874, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.893452470340892, |
|
"eval_f1": 0.5619149696320114, |
|
"eval_loss": 0.2326020449399948, |
|
"eval_precision": 0.47293721433726243, |
|
"eval_recall": 0.6921316669600422, |
|
"eval_runtime": 4.241, |
|
"eval_samples_per_second": 117.897, |
|
"eval_steps_per_second": 29.474, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.1847, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9060920058066777, |
|
"eval_f1": 0.5619039721369932, |
|
"eval_loss": 0.20794397592544556, |
|
"eval_precision": 0.5312010034493572, |
|
"eval_recall": 0.5963738778384088, |
|
"eval_runtime": 4.124, |
|
"eval_samples_per_second": 121.24, |
|
"eval_steps_per_second": 30.31, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 0.1567, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.9071932722631025, |
|
"eval_f1": 0.503008186211658, |
|
"eval_loss": 0.23015139997005463, |
|
"eval_precision": 0.5720053835800808, |
|
"eval_recall": 0.4488646365076571, |
|
"eval_runtime": 4.1201, |
|
"eval_samples_per_second": 121.355, |
|
"eval_steps_per_second": 30.339, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.16e-05, |
|
"loss": 0.1484, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.9038269009360765, |
|
"eval_f1": 0.5580642412882338, |
|
"eval_loss": 0.22997109591960907, |
|
"eval_precision": 0.540785997357992, |
|
"eval_recall": 0.5764830135539518, |
|
"eval_runtime": 4.134, |
|
"eval_samples_per_second": 120.948, |
|
"eval_steps_per_second": 30.237, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.04e-05, |
|
"loss": 0.1388, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.9075561896180607, |
|
"eval_f1": 0.5364304509572634, |
|
"eval_loss": 0.2365296632051468, |
|
"eval_precision": 0.5535580524344569, |
|
"eval_recall": 0.5203309276535821, |
|
"eval_runtime": 4.1587, |
|
"eval_samples_per_second": 120.229, |
|
"eval_steps_per_second": 30.057, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.1191, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.9067302397757421, |
|
"eval_f1": 0.574726200505476, |
|
"eval_loss": 0.26086461544036865, |
|
"eval_precision": 0.5511391177896268, |
|
"eval_recall": 0.6004224608343601, |
|
"eval_runtime": 4.1544, |
|
"eval_samples_per_second": 120.354, |
|
"eval_steps_per_second": 30.088, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.1193, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9059543474996246, |
|
"eval_f1": 0.5809305373525557, |
|
"eval_loss": 0.25283825397491455, |
|
"eval_precision": 0.543281752719473, |
|
"eval_recall": 0.6241858827671185, |
|
"eval_runtime": 4.1474, |
|
"eval_samples_per_second": 120.557, |
|
"eval_steps_per_second": 30.139, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.088, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 0.9037142714121239, |
|
"eval_f1": 0.5845009103142563, |
|
"eval_loss": 0.2839806079864502, |
|
"eval_precision": 0.5310701956271576, |
|
"eval_recall": 0.6498855835240275, |
|
"eval_runtime": 4.1556, |
|
"eval_samples_per_second": 120.318, |
|
"eval_steps_per_second": 30.08, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.56e-05, |
|
"loss": 0.0924, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.9085197977674325, |
|
"eval_f1": 0.5776627856834843, |
|
"eval_loss": 0.27629220485687256, |
|
"eval_precision": 0.5662833953331079, |
|
"eval_recall": 0.5895088892800563, |
|
"eval_runtime": 4.1675, |
|
"eval_samples_per_second": 119.975, |
|
"eval_steps_per_second": 29.994, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 1.44e-05, |
|
"loss": 0.0834, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_accuracy": 0.9037267858036743, |
|
"eval_f1": 0.5866475003992974, |
|
"eval_loss": 0.332010954618454, |
|
"eval_precision": 0.5369098085075281, |
|
"eval_recall": 0.6465411019186763, |
|
"eval_runtime": 4.1738, |
|
"eval_samples_per_second": 119.795, |
|
"eval_steps_per_second": 29.949, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1.32e-05, |
|
"loss": 0.0654, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_accuracy": 0.9057416028432698, |
|
"eval_f1": 0.574710687542546, |
|
"eval_loss": 0.32423922419548035, |
|
"eval_precision": 0.5562510294844342, |
|
"eval_recall": 0.5944375990142581, |
|
"eval_runtime": 4.1539, |
|
"eval_samples_per_second": 120.368, |
|
"eval_steps_per_second": 30.092, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0689, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9046403363868448, |
|
"eval_f1": 0.5581112750629285, |
|
"eval_loss": 0.31789475679397583, |
|
"eval_precision": 0.550513698630137, |
|
"eval_recall": 0.5659214926949481, |
|
"eval_runtime": 4.1716, |
|
"eval_samples_per_second": 119.859, |
|
"eval_steps_per_second": 29.965, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.08e-05, |
|
"loss": 0.0498, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_accuracy": 0.9053661710967613, |
|
"eval_f1": 0.5820808768579258, |
|
"eval_loss": 0.38915345072746277, |
|
"eval_precision": 0.5509273813266269, |
|
"eval_recall": 0.6169688435134659, |
|
"eval_runtime": 4.1814, |
|
"eval_samples_per_second": 119.577, |
|
"eval_steps_per_second": 29.894, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.0528, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.9048155378685488, |
|
"eval_f1": 0.5776866283839212, |
|
"eval_loss": 0.3601633608341217, |
|
"eval_precision": 0.5409433092640958, |
|
"eval_recall": 0.619785249075867, |
|
"eval_runtime": 4.1473, |
|
"eval_samples_per_second": 120.56, |
|
"eval_steps_per_second": 30.14, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.0474, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_accuracy": 0.9040396455924313, |
|
"eval_f1": 0.5793253173012691, |
|
"eval_loss": 0.39758625626564026, |
|
"eval_precision": 0.5510722795869738, |
|
"eval_recall": 0.6106319309980637, |
|
"eval_runtime": 4.1737, |
|
"eval_samples_per_second": 119.798, |
|
"eval_steps_per_second": 29.949, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 7.2e-06, |
|
"loss": 0.039, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_accuracy": 0.9035766131050709, |
|
"eval_f1": 0.5778368499750789, |
|
"eval_loss": 0.4138449728488922, |
|
"eval_precision": 0.5471134182790625, |
|
"eval_recall": 0.6122161591269143, |
|
"eval_runtime": 4.1525, |
|
"eval_samples_per_second": 120.408, |
|
"eval_steps_per_second": 30.102, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0446, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9039520448515793, |
|
"eval_f1": 0.5882447535579319, |
|
"eval_loss": 0.408151775598526, |
|
"eval_precision": 0.5414446417998816, |
|
"eval_recall": 0.6439007217039253, |
|
"eval_runtime": 4.1562, |
|
"eval_samples_per_second": 120.303, |
|
"eval_steps_per_second": 30.076, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.0333, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_accuracy": 0.9046528507783952, |
|
"eval_f1": 0.5720617062984743, |
|
"eval_loss": 0.4318484365940094, |
|
"eval_precision": 0.5545274289491078, |
|
"eval_recall": 0.5907410667136067, |
|
"eval_runtime": 4.1724, |
|
"eval_samples_per_second": 119.834, |
|
"eval_steps_per_second": 29.959, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 3.6e-06, |
|
"loss": 0.0327, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_accuracy": 0.9054913150122641, |
|
"eval_f1": 0.5734657499363381, |
|
"eval_loss": 0.4232546091079712, |
|
"eval_precision": 0.5537704918032786, |
|
"eval_recall": 0.5946136243619081, |
|
"eval_runtime": 4.1536, |
|
"eval_samples_per_second": 120.378, |
|
"eval_steps_per_second": 30.095, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.03, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_accuracy": 0.9049782249587025, |
|
"eval_f1": 0.5769523005487548, |
|
"eval_loss": 0.44003215432167053, |
|
"eval_precision": 0.5543478260869565, |
|
"eval_recall": 0.6014786129202605, |
|
"eval_runtime": 4.2605, |
|
"eval_samples_per_second": 117.358, |
|
"eval_steps_per_second": 29.339, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.0286, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_accuracy": 0.9048280522600991, |
|
"eval_f1": 0.5807528586929305, |
|
"eval_loss": 0.4442707598209381, |
|
"eval_precision": 0.5522222222222222, |
|
"eval_recall": 0.6123921844745643, |
|
"eval_runtime": 4.3652, |
|
"eval_samples_per_second": 114.542, |
|
"eval_steps_per_second": 28.636, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0261, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9050407969164539, |
|
"eval_f1": 0.5811535881958416, |
|
"eval_loss": 0.4490407407283783, |
|
"eval_precision": 0.5548263166319833, |
|
"eval_recall": 0.6101038549551135, |
|
"eval_runtime": 4.2364, |
|
"eval_samples_per_second": 118.025, |
|
"eval_steps_per_second": 29.506, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2500, |
|
"total_flos": 2612991191040000.0, |
|
"train_loss": 0.10352115373611451, |
|
"train_runtime": 364.2147, |
|
"train_samples_per_second": 27.456, |
|
"train_steps_per_second": 6.864 |
|
} |
|
], |
|
"max_steps": 2500, |
|
"num_train_epochs": 10, |
|
"total_flos": 2612991191040000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|