|
{ |
|
"best_metric": 0.922854387656702, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2395", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 4790, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.998338094001558, |
|
"eval_f1": 0.8886836027713626, |
|
"eval_loss": 0.005215654149651527, |
|
"eval_precision": 0.8498233215547704, |
|
"eval_recall": 0.9312681510164569, |
|
"eval_runtime": 14.0241, |
|
"eval_samples_per_second": 484.736, |
|
"eval_steps_per_second": 60.61, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.0438413361169103, |
|
"grad_norm": 0.063787080347538, |
|
"learning_rate": 4.478079331941545e-05, |
|
"loss": 0.0127, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9983965203843158, |
|
"eval_f1": 0.90715667311412, |
|
"eval_loss": 0.0056168013252317905, |
|
"eval_precision": 0.9062801932367149, |
|
"eval_recall": 0.9080348499515973, |
|
"eval_runtime": 13.9845, |
|
"eval_samples_per_second": 486.11, |
|
"eval_steps_per_second": 60.782, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.0876826722338206, |
|
"grad_norm": 0.30002298951148987, |
|
"learning_rate": 3.95615866388309e-05, |
|
"loss": 0.0035, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9985393404310569, |
|
"eval_f1": 0.9184466019417475, |
|
"eval_loss": 0.0046853721141815186, |
|
"eval_precision": 0.9211295034079844, |
|
"eval_recall": 0.9157792836398838, |
|
"eval_runtime": 14.0593, |
|
"eval_samples_per_second": 483.522, |
|
"eval_steps_per_second": 60.458, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 3.1315240083507305, |
|
"grad_norm": 0.03608839586377144, |
|
"learning_rate": 3.434237995824635e-05, |
|
"loss": 0.002, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9984030122046221, |
|
"eval_f1": 0.9054054054054055, |
|
"eval_loss": 0.006538054905831814, |
|
"eval_precision": 0.9027911453320501, |
|
"eval_recall": 0.9080348499515973, |
|
"eval_runtime": 14.21, |
|
"eval_samples_per_second": 478.395, |
|
"eval_steps_per_second": 59.817, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 4.175365344467641, |
|
"grad_norm": 0.002257826505228877, |
|
"learning_rate": 2.9123173277661797e-05, |
|
"loss": 0.0014, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9985847831732018, |
|
"eval_f1": 0.922854387656702, |
|
"eval_loss": 0.006093208212405443, |
|
"eval_precision": 0.9193083573487032, |
|
"eval_recall": 0.9264278799612778, |
|
"eval_runtime": 13.9969, |
|
"eval_samples_per_second": 485.68, |
|
"eval_steps_per_second": 60.728, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 5.219206680584551, |
|
"grad_norm": 0.0006600466440431774, |
|
"learning_rate": 2.3903966597077245e-05, |
|
"loss": 0.0007, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9984289794858479, |
|
"eval_f1": 0.9072978303747534, |
|
"eval_loss": 0.006890583783388138, |
|
"eval_precision": 0.9246231155778895, |
|
"eval_recall": 0.8906098741529526, |
|
"eval_runtime": 14.017, |
|
"eval_samples_per_second": 484.982, |
|
"eval_steps_per_second": 60.641, |
|
"step": 2874 |
|
}, |
|
{ |
|
"epoch": 6.263048016701461, |
|
"grad_norm": 0.06641975790262222, |
|
"learning_rate": 1.8684759916492694e-05, |
|
"loss": 0.0004, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9985003895092184, |
|
"eval_f1": 0.910133843212237, |
|
"eval_loss": 0.007136390544474125, |
|
"eval_precision": 0.898961284230406, |
|
"eval_recall": 0.9215876089060987, |
|
"eval_runtime": 14.0087, |
|
"eval_samples_per_second": 485.271, |
|
"eval_steps_per_second": 60.677, |
|
"step": 3353 |
|
}, |
|
{ |
|
"epoch": 7.306889352818372, |
|
"grad_norm": 0.0022020984906703234, |
|
"learning_rate": 1.3465553235908144e-05, |
|
"loss": 0.0003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9986172422747338, |
|
"eval_f1": 0.9218225419664269, |
|
"eval_loss": 0.007608836982399225, |
|
"eval_precision": 0.9134980988593155, |
|
"eval_recall": 0.9303000968054211, |
|
"eval_runtime": 13.9459, |
|
"eval_samples_per_second": 487.454, |
|
"eval_steps_per_second": 60.95, |
|
"step": 3832 |
|
}, |
|
{ |
|
"epoch": 8.350730688935283, |
|
"grad_norm": 0.0032535246573388577, |
|
"learning_rate": 8.246346555323591e-06, |
|
"loss": 0.0001, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9985653077122826, |
|
"eval_f1": 0.9187109187109187, |
|
"eval_loss": 0.007967105135321617, |
|
"eval_precision": 0.9130019120458891, |
|
"eval_recall": 0.9244917715392061, |
|
"eval_runtime": 14.2735, |
|
"eval_samples_per_second": 476.269, |
|
"eval_steps_per_second": 59.551, |
|
"step": 4311 |
|
}, |
|
{ |
|
"epoch": 9.394572025052192, |
|
"grad_norm": 0.00014514605572912842, |
|
"learning_rate": 3.02713987473904e-06, |
|
"loss": 0.0001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9986107504544274, |
|
"eval_f1": 0.919463087248322, |
|
"eval_loss": 0.008040270768105984, |
|
"eval_precision": 0.9107312440645774, |
|
"eval_recall": 0.9283639883833494, |
|
"eval_runtime": 14.9407, |
|
"eval_samples_per_second": 455.0, |
|
"eval_steps_per_second": 56.892, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 4790, |
|
"total_flos": 1.7383912956387396e+16, |
|
"train_loss": 0.002211083533992847, |
|
"train_runtime": 1477.0977, |
|
"train_samples_per_second": 207.447, |
|
"train_steps_per_second": 3.243 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4790, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7383912956387396e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|