|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.062061786651611, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.566, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_f1": 0.6402562480227776, |
|
"eval_loss": 0.5205540657043457, |
|
"eval_precision": 0.6483653398896937, |
|
"eval_recall": 0.6353427895981087, |
|
"eval_runtime": 5.1327, |
|
"eval_samples_per_second": 77.737, |
|
"eval_steps_per_second": 9.741, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.104482173919678, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5117, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7343358395989975, |
|
"eval_f1": 0.69391211208893, |
|
"eval_loss": 0.5062463879585266, |
|
"eval_precision": 0.6880119239984399, |
|
"eval_recall": 0.7045371885797418, |
|
"eval_runtime": 5.055, |
|
"eval_samples_per_second": 78.931, |
|
"eval_steps_per_second": 9.891, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.243982791900635, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4804, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7669172932330827, |
|
"eval_f1": 0.7152209115816456, |
|
"eval_loss": 0.46674054861068726, |
|
"eval_precision": 0.7182055749128919, |
|
"eval_recall": 0.7125841062011276, |
|
"eval_runtime": 5.066, |
|
"eval_samples_per_second": 78.76, |
|
"eval_steps_per_second": 9.87, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.68826961517334, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4345, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7919799498746867, |
|
"eval_f1": 0.744501107107864, |
|
"eval_loss": 0.43496260046958923, |
|
"eval_precision": 0.7494180559924504, |
|
"eval_recall": 0.7403164211674851, |
|
"eval_runtime": 5.0881, |
|
"eval_samples_per_second": 78.418, |
|
"eval_steps_per_second": 9.827, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.9247703552246094, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4081, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7944862155388471, |
|
"eval_f1": 0.7660995138690305, |
|
"eval_loss": 0.43371620774269104, |
|
"eval_precision": 0.7565013111888113, |
|
"eval_recall": 0.7845971994908165, |
|
"eval_runtime": 5.0664, |
|
"eval_samples_per_second": 78.754, |
|
"eval_steps_per_second": 9.869, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.43408203125, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3793, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.7753378378378378, |
|
"eval_loss": 0.39230969548225403, |
|
"eval_precision": 0.7857142857142857, |
|
"eval_recall": 0.7673213311511184, |
|
"eval_runtime": 5.0623, |
|
"eval_samples_per_second": 78.818, |
|
"eval_steps_per_second": 9.877, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.72346031665802, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3665, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7933776044839771, |
|
"eval_loss": 0.3765198886394501, |
|
"eval_precision": 0.7949020208205757, |
|
"eval_recall": 0.7919167121294781, |
|
"eval_runtime": 5.0479, |
|
"eval_samples_per_second": 79.042, |
|
"eval_steps_per_second": 9.905, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.3123555183410645, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3471, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.796615353247018, |
|
"eval_loss": 0.368134468793869, |
|
"eval_precision": 0.8088983050847458, |
|
"eval_recall": 0.7872340425531914, |
|
"eval_runtime": 5.0574, |
|
"eval_samples_per_second": 78.894, |
|
"eval_steps_per_second": 9.886, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.656528949737549, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3498, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.790357364116157, |
|
"eval_loss": 0.3676688075065613, |
|
"eval_precision": 0.8023956975228161, |
|
"eval_recall": 0.7811874886342971, |
|
"eval_runtime": 5.0449, |
|
"eval_samples_per_second": 79.09, |
|
"eval_steps_per_second": 9.911, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.076303482055664, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3282, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.7917273014868713, |
|
"eval_loss": 0.363395094871521, |
|
"eval_precision": 0.8074456774536514, |
|
"eval_recall": 0.780460083651573, |
|
"eval_runtime": 5.0444, |
|
"eval_samples_per_second": 79.097, |
|
"eval_steps_per_second": 9.912, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.567991256713867, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3149, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8065409159159159, |
|
"eval_loss": 0.3537313938140869, |
|
"eval_precision": 0.8180272108843537, |
|
"eval_recall": 0.7975541007455902, |
|
"eval_runtime": 5.0808, |
|
"eval_samples_per_second": 78.53, |
|
"eval_steps_per_second": 9.841, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 11.14825439453125, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3092, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8167483159828537, |
|
"eval_loss": 0.3528764247894287, |
|
"eval_precision": 0.8201621387462095, |
|
"eval_recall": 0.8136024731769412, |
|
"eval_runtime": 5.0935, |
|
"eval_samples_per_second": 78.336, |
|
"eval_steps_per_second": 9.816, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.867825031280518, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.3135, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8121903546212454, |
|
"eval_loss": 0.3471047580242157, |
|
"eval_precision": 0.8331751305173232, |
|
"eval_recall": 0.7978723404255319, |
|
"eval_runtime": 5.0603, |
|
"eval_samples_per_second": 78.849, |
|
"eval_steps_per_second": 9.881, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 12.051921844482422, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3103, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8269335415335841, |
|
"eval_loss": 0.3426941931247711, |
|
"eval_precision": 0.8430382253911666, |
|
"eval_recall": 0.8149663575195489, |
|
"eval_runtime": 5.0889, |
|
"eval_samples_per_second": 78.406, |
|
"eval_steps_per_second": 9.825, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.2898627519607544, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2974, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8297847585805701, |
|
"eval_loss": 0.33716997504234314, |
|
"eval_precision": 0.8385357006491028, |
|
"eval_recall": 0.8224677214038916, |
|
"eval_runtime": 5.0653, |
|
"eval_samples_per_second": 78.772, |
|
"eval_steps_per_second": 9.871, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.9146409034729, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2905, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8386324041811847, |
|
"eval_loss": 0.3345378339290619, |
|
"eval_precision": 0.8487869670976828, |
|
"eval_recall": 0.830287324968176, |
|
"eval_runtime": 5.0631, |
|
"eval_samples_per_second": 78.806, |
|
"eval_steps_per_second": 9.875, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 4.737354278564453, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2895, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8269335415335841, |
|
"eval_loss": 0.3339170217514038, |
|
"eval_precision": 0.8430382253911666, |
|
"eval_recall": 0.8149663575195489, |
|
"eval_runtime": 5.0593, |
|
"eval_samples_per_second": 78.864, |
|
"eval_steps_per_second": 9.883, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.6233842372894287, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2922, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8386324041811847, |
|
"eval_loss": 0.3318663537502289, |
|
"eval_precision": 0.8487869670976828, |
|
"eval_recall": 0.830287324968176, |
|
"eval_runtime": 5.0649, |
|
"eval_samples_per_second": 78.778, |
|
"eval_steps_per_second": 9.872, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.824616432189941, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2843, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8269335415335841, |
|
"eval_loss": 0.3319249749183655, |
|
"eval_precision": 0.8430382253911666, |
|
"eval_recall": 0.8149663575195489, |
|
"eval_runtime": 5.072, |
|
"eval_samples_per_second": 78.668, |
|
"eval_steps_per_second": 9.858, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.412130832672119, |
|
"learning_rate": 0.0, |
|
"loss": 0.287, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8279052989013229, |
|
"eval_loss": 0.33124828338623047, |
|
"eval_precision": 0.8414113428943938, |
|
"eval_recall": 0.8174668121476631, |
|
"eval_runtime": 5.1086, |
|
"eval_samples_per_second": 78.104, |
|
"eval_steps_per_second": 9.788, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7597037114448000.0, |
|
"train_loss": 0.35801424432973394, |
|
"train_runtime": 1952.1205, |
|
"train_samples_per_second": 37.272, |
|
"train_steps_per_second": 1.25 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7597037114448000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|