DeBERTaV3-small-GeneralSentenceTransformer-v2-1-checkpoints-tmp
/
last-checkpoint
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.0, | |
"eval_steps": 1883, | |
"global_step": 18824, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.10003187420314492, | |
"grad_norm": 39.029380798339844, | |
"learning_rate": 9.976625584360391e-07, | |
"loss": 3.6326, | |
"step": 1883 | |
}, | |
{ | |
"epoch": 0.10003187420314492, | |
"eval_nli-pairs_loss": 2.6952593326568604, | |
"eval_nli-pairs_runtime": 25.731, | |
"eval_nli-pairs_samples_per_second": 264.584, | |
"eval_nli-pairs_steps_per_second": 16.556, | |
"step": 1883 | |
}, | |
{ | |
"epoch": 0.10003187420314492, | |
"eval_scitail-pairs-pos_loss": 2.172569990158081, | |
"eval_scitail-pairs-pos_runtime": 6.2772, | |
"eval_scitail-pairs-pos_samples_per_second": 207.736, | |
"eval_scitail-pairs-pos_steps_per_second": 13.063, | |
"step": 1883 | |
}, | |
{ | |
"epoch": 0.10003187420314492, | |
"eval_qnli-contrastive_loss": 2.702913999557495, | |
"eval_qnli-contrastive_runtime": 16.475, | |
"eval_qnli-contrastive_samples_per_second": 331.593, | |
"eval_qnli-contrastive_steps_per_second": 20.759, | |
"step": 1883 | |
}, | |
{ | |
"epoch": 0.20006374840628985, | |
"grad_norm": 25.459535598754883, | |
"learning_rate": 1.9974500637484067e-06, | |
"loss": 1.7665, | |
"step": 3766 | |
}, | |
{ | |
"epoch": 0.20006374840628985, | |
"eval_nli-pairs_loss": 1.2885302305221558, | |
"eval_nli-pairs_runtime": 25.4564, | |
"eval_nli-pairs_samples_per_second": 267.438, | |
"eval_nli-pairs_steps_per_second": 16.734, | |
"step": 3766 | |
}, | |
{ | |
"epoch": 0.20006374840628985, | |
"eval_scitail-pairs-pos_loss": 0.9637606143951416, | |
"eval_scitail-pairs-pos_runtime": 6.1565, | |
"eval_scitail-pairs-pos_samples_per_second": 211.809, | |
"eval_scitail-pairs-pos_steps_per_second": 13.319, | |
"step": 3766 | |
}, | |
{ | |
"epoch": 0.20006374840628985, | |
"eval_qnli-contrastive_loss": 1.713547945022583, | |
"eval_qnli-contrastive_runtime": 16.4307, | |
"eval_qnli-contrastive_samples_per_second": 332.487, | |
"eval_qnli-contrastive_steps_per_second": 20.815, | |
"step": 3766 | |
}, | |
{ | |
"epoch": 0.3000956226094348, | |
"grad_norm": 0.8201059103012085, | |
"learning_rate": 2.9977688057798558e-06, | |
"loss": 1.1522, | |
"step": 5649 | |
}, | |
{ | |
"epoch": 0.3000956226094348, | |
"eval_nli-pairs_loss": 0.9093547463417053, | |
"eval_nli-pairs_runtime": 25.1271, | |
"eval_nli-pairs_samples_per_second": 270.943, | |
"eval_nli-pairs_steps_per_second": 16.954, | |
"step": 5649 | |
}, | |
{ | |
"epoch": 0.3000956226094348, | |
"eval_scitail-pairs-pos_loss": 0.7571232914924622, | |
"eval_scitail-pairs-pos_runtime": 5.9021, | |
"eval_scitail-pairs-pos_samples_per_second": 220.937, | |
"eval_scitail-pairs-pos_steps_per_second": 13.893, | |
"step": 5649 | |
}, | |
{ | |
"epoch": 0.3000956226094348, | |
"eval_qnli-contrastive_loss": 0.91651451587677, | |
"eval_qnli-contrastive_runtime": 16.2309, | |
"eval_qnli-contrastive_samples_per_second": 336.579, | |
"eval_qnli-contrastive_steps_per_second": 21.071, | |
"step": 5649 | |
}, | |
{ | |
"epoch": 0.4001274968125797, | |
"grad_norm": 12.970890045166016, | |
"learning_rate": 3.9975563110922225e-06, | |
"loss": 0.9533, | |
"step": 7532 | |
}, | |
{ | |
"epoch": 0.4001274968125797, | |
"eval_nli-pairs_loss": 0.7290090322494507, | |
"eval_nli-pairs_runtime": 25.3154, | |
"eval_nli-pairs_samples_per_second": 268.928, | |
"eval_nli-pairs_steps_per_second": 16.828, | |
"step": 7532 | |
}, | |
{ | |
"epoch": 0.4001274968125797, | |
"eval_scitail-pairs-pos_loss": 0.6498324275016785, | |
"eval_scitail-pairs-pos_runtime": 6.0764, | |
"eval_scitail-pairs-pos_samples_per_second": 214.6, | |
"eval_scitail-pairs-pos_steps_per_second": 13.495, | |
"step": 7532 | |
}, | |
{ | |
"epoch": 0.4001274968125797, | |
"eval_qnli-contrastive_loss": 0.4303818643093109, | |
"eval_qnli-contrastive_runtime": 16.4463, | |
"eval_qnli-contrastive_samples_per_second": 332.172, | |
"eval_qnli-contrastive_steps_per_second": 20.795, | |
"step": 7532 | |
}, | |
{ | |
"epoch": 0.5001593710157246, | |
"grad_norm": 10.865135192871094, | |
"learning_rate": 4.9973438164045905e-06, | |
"loss": 0.8013, | |
"step": 9415 | |
}, | |
{ | |
"epoch": 0.5001593710157246, | |
"eval_nli-pairs_loss": 0.6431913375854492, | |
"eval_nli-pairs_runtime": 25.4337, | |
"eval_nli-pairs_samples_per_second": 267.676, | |
"eval_nli-pairs_steps_per_second": 16.749, | |
"step": 9415 | |
}, | |
{ | |
"epoch": 0.5001593710157246, | |
"eval_scitail-pairs-pos_loss": 0.6006649732589722, | |
"eval_scitail-pairs-pos_runtime": 6.199, | |
"eval_scitail-pairs-pos_samples_per_second": 210.355, | |
"eval_scitail-pairs-pos_steps_per_second": 13.228, | |
"step": 9415 | |
}, | |
{ | |
"epoch": 0.5001593710157246, | |
"eval_qnli-contrastive_loss": 0.25907495617866516, | |
"eval_qnli-contrastive_runtime": 16.4896, | |
"eval_qnli-contrastive_samples_per_second": 331.299, | |
"eval_qnli-contrastive_steps_per_second": 20.74, | |
"step": 9415 | |
}, | |
{ | |
"epoch": 0.6001912452188696, | |
"grad_norm": 2.3549954891204834, | |
"learning_rate": 5.997662558436039e-06, | |
"loss": 0.6568, | |
"step": 11298 | |
}, | |
{ | |
"epoch": 0.6001912452188696, | |
"eval_nli-pairs_loss": 0.5626155734062195, | |
"eval_nli-pairs_runtime": 25.1226, | |
"eval_nli-pairs_samples_per_second": 270.991, | |
"eval_nli-pairs_steps_per_second": 16.957, | |
"step": 11298 | |
}, | |
{ | |
"epoch": 0.6001912452188696, | |
"eval_scitail-pairs-pos_loss": 0.5481033325195312, | |
"eval_scitail-pairs-pos_runtime": 6.0513, | |
"eval_scitail-pairs-pos_samples_per_second": 215.492, | |
"eval_scitail-pairs-pos_steps_per_second": 13.551, | |
"step": 11298 | |
}, | |
{ | |
"epoch": 0.6001912452188696, | |
"eval_qnli-contrastive_loss": 0.13647136092185974, | |
"eval_qnli-contrastive_runtime": 16.3856, | |
"eval_qnli-contrastive_samples_per_second": 333.402, | |
"eval_qnli-contrastive_steps_per_second": 20.872, | |
"step": 11298 | |
}, | |
{ | |
"epoch": 0.7002231194220144, | |
"grad_norm": 10.994942665100098, | |
"learning_rate": 6.997450063748406e-06, | |
"loss": 0.6095, | |
"step": 13181 | |
}, | |
{ | |
"epoch": 0.7002231194220144, | |
"eval_nli-pairs_loss": 0.5226004719734192, | |
"eval_nli-pairs_runtime": 25.203, | |
"eval_nli-pairs_samples_per_second": 270.127, | |
"eval_nli-pairs_steps_per_second": 16.903, | |
"step": 13181 | |
}, | |
{ | |
"epoch": 0.7002231194220144, | |
"eval_scitail-pairs-pos_loss": 0.5108869075775146, | |
"eval_scitail-pairs-pos_runtime": 6.1126, | |
"eval_scitail-pairs-pos_samples_per_second": 213.331, | |
"eval_scitail-pairs-pos_steps_per_second": 13.415, | |
"step": 13181 | |
}, | |
{ | |
"epoch": 0.7002231194220144, | |
"eval_qnli-contrastive_loss": 0.16431590914726257, | |
"eval_qnli-contrastive_runtime": 16.4372, | |
"eval_qnli-contrastive_samples_per_second": 332.355, | |
"eval_qnli-contrastive_steps_per_second": 20.806, | |
"step": 13181 | |
}, | |
{ | |
"epoch": 0.8002549936251594, | |
"grad_norm": 8.826902389526367, | |
"learning_rate": 7.997768805779857e-06, | |
"loss": 0.5694, | |
"step": 15064 | |
}, | |
{ | |
"epoch": 0.8002549936251594, | |
"eval_nli-pairs_loss": 0.49213743209838867, | |
"eval_nli-pairs_runtime": 25.0892, | |
"eval_nli-pairs_samples_per_second": 271.352, | |
"eval_nli-pairs_steps_per_second": 16.979, | |
"step": 15064 | |
}, | |
{ | |
"epoch": 0.8002549936251594, | |
"eval_scitail-pairs-pos_loss": 0.5194270610809326, | |
"eval_scitail-pairs-pos_runtime": 6.261, | |
"eval_scitail-pairs-pos_samples_per_second": 208.273, | |
"eval_scitail-pairs-pos_steps_per_second": 13.097, | |
"step": 15064 | |
}, | |
{ | |
"epoch": 0.8002549936251594, | |
"eval_qnli-contrastive_loss": 0.05173656344413757, | |
"eval_qnli-contrastive_runtime": 16.3578, | |
"eval_qnli-contrastive_samples_per_second": 333.97, | |
"eval_qnli-contrastive_steps_per_second": 20.908, | |
"step": 15064 | |
}, | |
{ | |
"epoch": 0.9002868678283042, | |
"grad_norm": 0.4369502067565918, | |
"learning_rate": 8.997556311092223e-06, | |
"loss": 0.5375, | |
"step": 16947 | |
}, | |
{ | |
"epoch": 0.9002868678283042, | |
"eval_nli-pairs_loss": 0.5060996413230896, | |
"eval_nli-pairs_runtime": 25.3561, | |
"eval_nli-pairs_samples_per_second": 268.496, | |
"eval_nli-pairs_steps_per_second": 16.801, | |
"step": 16947 | |
}, | |
{ | |
"epoch": 0.9002868678283042, | |
"eval_scitail-pairs-pos_loss": 0.5642966628074646, | |
"eval_scitail-pairs-pos_runtime": 6.1557, | |
"eval_scitail-pairs-pos_samples_per_second": 211.837, | |
"eval_scitail-pairs-pos_steps_per_second": 13.321, | |
"step": 16947 | |
}, | |
{ | |
"epoch": 0.9002868678283042, | |
"eval_qnli-contrastive_loss": 0.046243228018283844, | |
"eval_qnli-contrastive_runtime": 16.4399, | |
"eval_qnli-contrastive_samples_per_second": 332.302, | |
"eval_qnli-contrastive_steps_per_second": 20.803, | |
"step": 16947 | |
} | |
], | |
"logging_steps": 1883, | |
"max_steps": 37648, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 18824, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 0.0, | |
"train_batch_size": 28, | |
"trial_name": null, | |
"trial_params": null | |
} | |