DeBERTaV3-small-GeneralSentenceTransformer-v2-1-checkpoints-tmp
/
last-checkpoint
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.0, | |
"eval_steps": 471, | |
"global_step": 4710, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.1, | |
"grad_norm": 17.846229553222656, | |
"learning_rate": 3.004181408813123e-06, | |
"loss": 4.4848, | |
"step": 471 | |
}, | |
{ | |
"epoch": 0.1, | |
"eval_nli-pairs_loss": 3.227689504623413, | |
"eval_nli-pairs_runtime": 23.5758, | |
"eval_nli-pairs_samples_per_second": 288.77, | |
"eval_nli-pairs_steps_per_second": 18.069, | |
"step": 471 | |
}, | |
{ | |
"epoch": 0.1, | |
"eval_scitail-pairs-pos_loss": 2.469686508178711, | |
"eval_scitail-pairs-pos_runtime": 5.4679, | |
"eval_scitail-pairs-pos_samples_per_second": 238.485, | |
"eval_scitail-pairs-pos_steps_per_second": 14.997, | |
"step": 471 | |
}, | |
{ | |
"epoch": 0.1, | |
"eval_qnli-contrastive_loss": 3.3142430782318115, | |
"eval_qnli-contrastive_runtime": 15.7426, | |
"eval_qnli-contrastive_samples_per_second": 347.019, | |
"eval_qnli-contrastive_steps_per_second": 21.724, | |
"step": 471 | |
}, | |
{ | |
"epoch": 0.2, | |
"grad_norm": 29.59261703491211, | |
"learning_rate": 6.027661627532969e-06, | |
"loss": 2.6358, | |
"step": 942 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_nli-pairs_loss": 1.5920209884643555, | |
"eval_nli-pairs_runtime": 23.3765, | |
"eval_nli-pairs_samples_per_second": 291.232, | |
"eval_nli-pairs_steps_per_second": 18.223, | |
"step": 942 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_scitail-pairs-pos_loss": 0.9157330989837646, | |
"eval_scitail-pairs-pos_runtime": 5.4478, | |
"eval_scitail-pairs-pos_samples_per_second": 239.363, | |
"eval_scitail-pairs-pos_steps_per_second": 15.052, | |
"step": 942 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_qnli-contrastive_loss": 2.663238763809204, | |
"eval_qnli-contrastive_runtime": 15.751, | |
"eval_qnli-contrastive_samples_per_second": 346.836, | |
"eval_qnli-contrastive_steps_per_second": 21.713, | |
"step": 942 | |
}, | |
{ | |
"epoch": 0.3, | |
"grad_norm": 24.539047241210938, | |
"learning_rate": 9.057574782888389e-06, | |
"loss": 1.7183, | |
"step": 1413 | |
}, | |
{ | |
"epoch": 0.3, | |
"eval_nli-pairs_loss": 1.1536647081375122, | |
"eval_nli-pairs_runtime": 23.6115, | |
"eval_nli-pairs_samples_per_second": 288.335, | |
"eval_nli-pairs_steps_per_second": 18.042, | |
"step": 1413 | |
}, | |
{ | |
"epoch": 0.3, | |
"eval_scitail-pairs-pos_loss": 0.7445429563522339, | |
"eval_scitail-pairs-pos_runtime": 5.3966, | |
"eval_scitail-pairs-pos_samples_per_second": 241.635, | |
"eval_scitail-pairs-pos_steps_per_second": 15.195, | |
"step": 1413 | |
}, | |
{ | |
"epoch": 0.3, | |
"eval_qnli-contrastive_loss": 2.130812406539917, | |
"eval_qnli-contrastive_runtime": 15.7293, | |
"eval_qnli-contrastive_samples_per_second": 347.313, | |
"eval_qnli-contrastive_steps_per_second": 21.743, | |
"step": 1413 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 139.8046875, | |
"learning_rate": 1.208748793824381e-05, | |
"loss": 1.6114, | |
"step": 1884 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_nli-pairs_loss": 0.8992123007774353, | |
"eval_nli-pairs_runtime": 23.6196, | |
"eval_nli-pairs_samples_per_second": 288.236, | |
"eval_nli-pairs_steps_per_second": 18.036, | |
"step": 1884 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_scitail-pairs-pos_loss": 0.6193641424179077, | |
"eval_scitail-pairs-pos_runtime": 5.4024, | |
"eval_scitail-pairs-pos_samples_per_second": 241.376, | |
"eval_scitail-pairs-pos_steps_per_second": 15.179, | |
"step": 1884 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_qnli-contrastive_loss": 1.6952241659164429, | |
"eval_qnli-contrastive_runtime": 15.7392, | |
"eval_qnli-contrastive_samples_per_second": 347.095, | |
"eval_qnli-contrastive_steps_per_second": 21.729, | |
"step": 1884 | |
}, | |
{ | |
"epoch": 0.5, | |
"grad_norm": 2.1193487644195557, | |
"learning_rate": 1.511740109359923e-05, | |
"loss": 1.5367, | |
"step": 2355 | |
}, | |
{ | |
"epoch": 0.5, | |
"eval_nli-pairs_loss": 0.8112400770187378, | |
"eval_nli-pairs_runtime": 23.4573, | |
"eval_nli-pairs_samples_per_second": 290.23, | |
"eval_nli-pairs_steps_per_second": 18.161, | |
"step": 2355 | |
}, | |
{ | |
"epoch": 0.5, | |
"eval_scitail-pairs-pos_loss": 0.6661093831062317, | |
"eval_scitail-pairs-pos_runtime": 5.3621, | |
"eval_scitail-pairs-pos_samples_per_second": 243.189, | |
"eval_scitail-pairs-pos_steps_per_second": 15.293, | |
"step": 2355 | |
}, | |
{ | |
"epoch": 0.5, | |
"eval_qnli-contrastive_loss": 0.8697724938392639, | |
"eval_qnli-contrastive_runtime": 15.7092, | |
"eval_qnli-contrastive_samples_per_second": 347.759, | |
"eval_qnli-contrastive_steps_per_second": 21.771, | |
"step": 2355 | |
}, | |
{ | |
"epoch": 0.6, | |
"grad_norm": 8.693464279174805, | |
"learning_rate": 1.814731424895465e-05, | |
"loss": 1.1657, | |
"step": 2826 | |
}, | |
{ | |
"epoch": 0.6, | |
"eval_nli-pairs_loss": 0.7330080270767212, | |
"eval_nli-pairs_runtime": 23.359, | |
"eval_nli-pairs_samples_per_second": 291.451, | |
"eval_nli-pairs_steps_per_second": 18.237, | |
"step": 2826 | |
}, | |
{ | |
"epoch": 0.6, | |
"eval_scitail-pairs-pos_loss": 0.558278501033783, | |
"eval_scitail-pairs-pos_runtime": 5.3162, | |
"eval_scitail-pairs-pos_samples_per_second": 245.289, | |
"eval_scitail-pairs-pos_steps_per_second": 15.425, | |
"step": 2826 | |
}, | |
{ | |
"epoch": 0.6, | |
"eval_qnli-contrastive_loss": 0.8414629101753235, | |
"eval_qnli-contrastive_runtime": 15.5773, | |
"eval_qnli-contrastive_samples_per_second": 350.703, | |
"eval_qnli-contrastive_steps_per_second": 21.955, | |
"step": 2826 | |
}, | |
{ | |
"epoch": 0.7, | |
"grad_norm": 20.00510025024414, | |
"learning_rate": 1.995853561663268e-05, | |
"loss": 1.2926, | |
"step": 3297 | |
}, | |
{ | |
"epoch": 0.7, | |
"eval_nli-pairs_loss": 0.688292384147644, | |
"eval_nli-pairs_runtime": 23.1585, | |
"eval_nli-pairs_samples_per_second": 293.974, | |
"eval_nli-pairs_steps_per_second": 18.395, | |
"step": 3297 | |
}, | |
{ | |
"epoch": 0.7, | |
"eval_scitail-pairs-pos_loss": 0.5283708572387695, | |
"eval_scitail-pairs-pos_runtime": 5.3322, | |
"eval_scitail-pairs-pos_samples_per_second": 244.552, | |
"eval_scitail-pairs-pos_steps_per_second": 15.378, | |
"step": 3297 | |
}, | |
{ | |
"epoch": 0.7, | |
"eval_qnli-contrastive_loss": 0.5239661335945129, | |
"eval_qnli-contrastive_runtime": 15.5222, | |
"eval_qnli-contrastive_samples_per_second": 351.947, | |
"eval_qnli-contrastive_steps_per_second": 22.033, | |
"step": 3297 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 20.681690216064453, | |
"learning_rate": 1.9476312452068522e-05, | |
"loss": 1.1523, | |
"step": 3768 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_nli-pairs_loss": 0.6775749325752258, | |
"eval_nli-pairs_runtime": 23.2425, | |
"eval_nli-pairs_samples_per_second": 292.912, | |
"eval_nli-pairs_steps_per_second": 18.328, | |
"step": 3768 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_scitail-pairs-pos_loss": 0.4816366732120514, | |
"eval_scitail-pairs-pos_runtime": 5.2694, | |
"eval_scitail-pairs-pos_samples_per_second": 247.467, | |
"eval_scitail-pairs-pos_steps_per_second": 15.562, | |
"step": 3768 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_qnli-contrastive_loss": 0.4342482388019562, | |
"eval_qnli-contrastive_runtime": 15.5335, | |
"eval_qnli-contrastive_samples_per_second": 351.691, | |
"eval_qnli-contrastive_steps_per_second": 22.017, | |
"step": 3768 | |
}, | |
{ | |
"epoch": 0.9, | |
"grad_norm": 12.640650749206543, | |
"learning_rate": 1.8475083492522773e-05, | |
"loss": 1.0387, | |
"step": 4239 | |
}, | |
{ | |
"epoch": 0.9, | |
"eval_nli-pairs_loss": 0.6213383674621582, | |
"eval_nli-pairs_runtime": 23.1579, | |
"eval_nli-pairs_samples_per_second": 293.981, | |
"eval_nli-pairs_steps_per_second": 18.395, | |
"step": 4239 | |
}, | |
{ | |
"epoch": 0.9, | |
"eval_scitail-pairs-pos_loss": 0.4603377878665924, | |
"eval_scitail-pairs-pos_runtime": 5.3009, | |
"eval_scitail-pairs-pos_samples_per_second": 245.997, | |
"eval_scitail-pairs-pos_steps_per_second": 15.469, | |
"step": 4239 | |
}, | |
{ | |
"epoch": 0.9, | |
"eval_qnli-contrastive_loss": 0.3022189736366272, | |
"eval_qnli-contrastive_runtime": 15.5459, | |
"eval_qnli-contrastive_samples_per_second": 351.411, | |
"eval_qnli-contrastive_steps_per_second": 21.999, | |
"step": 4239 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 20.227073669433594, | |
"learning_rate": 1.701008869684049e-05, | |
"loss": 1.0356, | |
"step": 4710 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_nli-pairs_loss": 0.6488831043243408, | |
"eval_nli-pairs_runtime": 23.1759, | |
"eval_nli-pairs_samples_per_second": 293.753, | |
"eval_nli-pairs_steps_per_second": 18.381, | |
"step": 4710 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_scitail-pairs-pos_loss": 0.5449082255363464, | |
"eval_scitail-pairs-pos_runtime": 5.3602, | |
"eval_scitail-pairs-pos_samples_per_second": 243.276, | |
"eval_scitail-pairs-pos_steps_per_second": 15.298, | |
"step": 4710 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_qnli-contrastive_loss": 0.1294127106666565, | |
"eval_qnli-contrastive_runtime": 15.5044, | |
"eval_qnli-contrastive_samples_per_second": 352.352, | |
"eval_qnli-contrastive_steps_per_second": 22.058, | |
"step": 4710 | |
} | |
], | |
"logging_steps": 471, | |
"max_steps": 9420, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 4710, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 0.0, | |
"train_batch_size": 28, | |
"trial_name": null, | |
"trial_params": null | |
} | |