|
100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 738/738 [5:40:03<00:00, 26.33s/it]Could not locate the best model at ./results/checkpoint-700/pytorch_model.bin, if you are running a distributed training on multiple nodes, you should activate `--save_on_each_node`. |
|
{'loss': 0.3768, 'grad_norm': 38.466278076171875, 'learning_rate': 2.8867132867132867e-05, 'epoch': 0.14} |
|
{'loss': 0.0756, 'grad_norm': 14.140481948852539, 'learning_rate': 2.6769230769230772e-05, 'epoch': 0.27} |
|
100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 738/738 [5:40:05<00:00, 27.65s/it] |
|
{'eval_loss': 0.05197741463780403, 'eval_accuracy': 0.9826039519941812, 'eval_precision': 0.9828336140409687, 'eval_recall': 0.9826039519941812, 'eval_f1': 0.9826056829662054, 'eval_runtime': 29.6162, 'eval_samples_per_second': 557.059, 'eval_steps_per_second': 1.114, 'epoch': 0.27} |
|
{'loss': 0.0357, 'grad_norm': 10.71005630493164, 'learning_rate': 2.467132867132867e-05, 'epoch': 0.41} |
|
{'loss': 0.0224, 'grad_norm': 6.4140753746032715, 'learning_rate': 2.2573426573426575e-05, 'epoch': 0.54} |
|
{'eval_loss': 0.022187748923897743, 'eval_accuracy': 0.992847617893078, 'eval_precision': 0.992880317157068, 'eval_recall': 0.992847617893078, 'eval_f1': 0.9928480758737946, 'eval_runtime': 28.8171, 'eval_samples_per_second': 572.507, 'eval_steps_per_second': 1.145, 'epoch': 0.54} |
|
{'loss': 0.0599, 'grad_norm': 64.76811218261719, 'learning_rate': 2.0475524475524476e-05, 'epoch': 0.68} |
|
{'loss': 0.0305, 'grad_norm': 8.04214859008789, 'learning_rate': 1.8377622377622377e-05, 'epoch': 0.81} |
|
{'eval_loss': 0.012443681247532368, 'eval_accuracy': 0.9959389016850527, 'eval_precision': 0.9959631467970647, 'eval_recall': 0.9959389016850527, 'eval_f1': 0.9959391339252232, 'eval_runtime': 29.5842, 'eval_samples_per_second': 557.663, 'eval_steps_per_second': 1.115, 'epoch': 0.81} |
|
{'loss': 0.0225, 'grad_norm': 4.762246131896973, 'learning_rate': 1.6279720279720282e-05, 'epoch': 0.95} |
|
{'loss': 0.01, 'grad_norm': 3.604496479034424, 'learning_rate': 1.4181818181818181e-05, 'epoch': 1.08} |
|
{'eval_loss': 0.005496619734913111, 'eval_accuracy': 0.998363437992484, 'eval_precision': 0.9983647277618173, 'eval_recall': 0.998363437992484, 'eval_f1': 0.9983634627799908, 'eval_runtime': 29.3845, 'eval_samples_per_second': 561.453, 'eval_steps_per_second': 1.123, 'epoch': 1.08} |
|
{'loss': 0.0176, 'grad_norm': 0.5998724102973938, 'learning_rate': 1.2083916083916084e-05, 'epoch': 1.22} |
|
{'loss': 0.0045, 'grad_norm': 0.35552045702934265, 'learning_rate': 9.986013986013986e-06, 'epoch': 1.36} |
|
{'eval_loss': 0.011412929743528366, 'eval_accuracy': 0.9964844223542247, 'eval_precision': 0.9965061626506839, 'eval_recall': 0.9964844223542247, 'eval_f1': 0.9964846149108655, 'eval_runtime': 29.1533, 'eval_samples_per_second': 565.904, 'eval_steps_per_second': 1.132, 'epoch': 1.36} |
|
{'loss': 0.0036, 'grad_norm': 1.270171046257019, 'learning_rate': 7.888111888111889e-06, 'epoch': 1.49} |
|
{'loss': 0.0034, 'grad_norm': 1.043688178062439, 'learning_rate': 5.79020979020979e-06, 'epoch': 1.63} |
|
{'eval_loss': 0.004131026100367308, 'eval_accuracy': 0.9987877318462844, 'eval_precision': 0.9987892102056513, 'eval_recall': 0.9987877318462844, 'eval_f1': 0.9987877515573715, 'eval_runtime': 29.5609, 'eval_samples_per_second': 558.102, 'eval_steps_per_second': 1.116, 'epoch': 1.63} |
|
{'loss': 0.0027, 'grad_norm': 0.4854286313056946, 'learning_rate': 3.6923076923076925e-06, 'epoch': 1.76} |
|
{'loss': 0.0022, 'grad_norm': 0.3757590651512146, 'learning_rate': 1.5944055944055944e-06, 'epoch': 1.9} |
|
{'eval_loss': 0.0027497005648911, 'eval_accuracy': 0.9991514122923991, 'eval_precision': 0.9991514122923991, 'eval_recall': 0.9991514122923991, 'eval_f1': 0.9991514122923991, 'eval_runtime': 29.2755, 'eval_samples_per_second': 563.543, 'eval_steps_per_second': 1.127, 'epoch': 1.9} |
|
{'train_runtime': 20406.1913, 'train_samples_per_second': 101.809, 'train_steps_per_second': 0.036, 'train_loss': 0.04531788620037761, 'epoch': 2.0} |
|
100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 33/33 [00:27<00:00, 1.18it/s] |
|
{'eval_loss': 0.003189795184880495, 'eval_accuracy': 0.9991514122923991, 'eval_precision': 0.9991516888192084, 'eval_recall': 0.9991514122923991, 'eval_f1': 0.9991514183555437, 'eval_runtime': 29.3015, 'eval_samples_per_second': 563.043, 'eval_steps_per_second': 1.126, 'epoch': 2.0} |
|
|