|
{
|
|
"best_metric": 3.3321995735168457,
|
|
"best_model_checkpoint": "./tts-model-finetune\\checkpoint-50",
|
|
"epoch": 0.3448275862068966,
|
|
"eval_steps": 50,
|
|
"global_step": 50,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.006896551724137931,
|
|
"grad_norm": 11.392743110656738,
|
|
"learning_rate": 2e-05,
|
|
"loss": 13.6657,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.013793103448275862,
|
|
"grad_norm": 11.40077018737793,
|
|
"learning_rate": 4e-05,
|
|
"loss": 13.856,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.020689655172413793,
|
|
"grad_norm": 11.972955703735352,
|
|
"learning_rate": 6e-05,
|
|
"loss": 13.6725,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.027586206896551724,
|
|
"grad_norm": 13.583998680114746,
|
|
"learning_rate": 8e-05,
|
|
"loss": 12.6134,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.034482758620689655,
|
|
"grad_norm": 17.623764038085938,
|
|
"learning_rate": 0.0001,
|
|
"loss": 11.721,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.041379310344827586,
|
|
"grad_norm": 19.997751235961914,
|
|
"learning_rate": 9.999952403603673e-05,
|
|
"loss": 9.4947,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.04827586206896552,
|
|
"grad_norm": 15.626948356628418,
|
|
"learning_rate": 9.999809615320856e-05,
|
|
"loss": 6.9766,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.05517241379310345,
|
|
"grad_norm": 8.9348726272583,
|
|
"learning_rate": 9.999571637870036e-05,
|
|
"loss": 5.6984,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.06206896551724138,
|
|
"grad_norm": 4.136483669281006,
|
|
"learning_rate": 9.999238475781957e-05,
|
|
"loss": 4.8507,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.06896551724137931,
|
|
"grad_norm": 2.6459639072418213,
|
|
"learning_rate": 9.998810135399546e-05,
|
|
"loss": 4.5585,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.07586206896551724,
|
|
"grad_norm": 2.484941005706787,
|
|
"learning_rate": 9.998286624877786e-05,
|
|
"loss": 4.6016,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.08275862068965517,
|
|
"grad_norm": 2.2766242027282715,
|
|
"learning_rate": 9.997667954183565e-05,
|
|
"loss": 4.4363,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.0896551724137931,
|
|
"grad_norm": 2.1203527450561523,
|
|
"learning_rate": 9.99695413509548e-05,
|
|
"loss": 4.3823,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.09655172413793103,
|
|
"grad_norm": 1.9280288219451904,
|
|
"learning_rate": 9.996145181203615e-05,
|
|
"loss": 4.2706,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.10344827586206896,
|
|
"grad_norm": 1.7148234844207764,
|
|
"learning_rate": 9.99524110790929e-05,
|
|
"loss": 4.1715,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.1103448275862069,
|
|
"grad_norm": 1.6018073558807373,
|
|
"learning_rate": 9.994241932424754e-05,
|
|
"loss": 4.1337,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.11724137931034483,
|
|
"grad_norm": 1.4049166440963745,
|
|
"learning_rate": 9.99314767377287e-05,
|
|
"loss": 3.9714,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.12413793103448276,
|
|
"grad_norm": 1.3031293153762817,
|
|
"learning_rate": 9.991958352786744e-05,
|
|
"loss": 3.9484,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.1310344827586207,
|
|
"grad_norm": 1.0187429189682007,
|
|
"learning_rate": 9.990673992109335e-05,
|
|
"loss": 3.8798,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.13793103448275862,
|
|
"grad_norm": 0.9277167916297913,
|
|
"learning_rate": 9.989294616193017e-05,
|
|
"loss": 3.8025,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.14482758620689656,
|
|
"grad_norm": 0.9591456651687622,
|
|
"learning_rate": 9.987820251299122e-05,
|
|
"loss": 3.7636,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.15172413793103448,
|
|
"grad_norm": 1.0362803936004639,
|
|
"learning_rate": 9.986250925497429e-05,
|
|
"loss": 3.7407,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.15862068965517243,
|
|
"grad_norm": 1.0504515171051025,
|
|
"learning_rate": 9.98458666866564e-05,
|
|
"loss": 3.6976,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.16551724137931034,
|
|
"grad_norm": 0.9324204325675964,
|
|
"learning_rate": 9.982827512488809e-05,
|
|
"loss": 3.6664,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.1724137931034483,
|
|
"grad_norm": 0.8606096506118774,
|
|
"learning_rate": 9.980973490458728e-05,
|
|
"loss": 3.6558,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.1793103448275862,
|
|
"grad_norm": 0.7672788500785828,
|
|
"learning_rate": 9.97902463787331e-05,
|
|
"loss": 3.6692,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.18620689655172415,
|
|
"grad_norm": 0.6850264668464661,
|
|
"learning_rate": 9.976980991835894e-05,
|
|
"loss": 3.6091,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.19310344827586207,
|
|
"grad_norm": 0.6251705884933472,
|
|
"learning_rate": 9.974842591254558e-05,
|
|
"loss": 3.5454,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"grad_norm": 0.6199111342430115,
|
|
"learning_rate": 9.972609476841367e-05,
|
|
"loss": 3.5376,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.20689655172413793,
|
|
"grad_norm": 0.6018431782722473,
|
|
"learning_rate": 9.970281691111598e-05,
|
|
"loss": 3.5433,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.21379310344827587,
|
|
"grad_norm": 0.5882090330123901,
|
|
"learning_rate": 9.967859278382938e-05,
|
|
"loss": 3.5594,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.2206896551724138,
|
|
"grad_norm": 0.5856074690818787,
|
|
"learning_rate": 9.965342284774632e-05,
|
|
"loss": 3.5153,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.22758620689655173,
|
|
"grad_norm": 0.5841050148010254,
|
|
"learning_rate": 9.962730758206611e-05,
|
|
"loss": 3.5399,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.23448275862068965,
|
|
"grad_norm": 0.5656192302703857,
|
|
"learning_rate": 9.960024748398576e-05,
|
|
"loss": 3.6101,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.2413793103448276,
|
|
"grad_norm": 0.5472608208656311,
|
|
"learning_rate": 9.957224306869053e-05,
|
|
"loss": 3.5334,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.2482758620689655,
|
|
"grad_norm": 0.5338900089263916,
|
|
"learning_rate": 9.954329486934412e-05,
|
|
"loss": 3.5119,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.25517241379310346,
|
|
"grad_norm": 0.5114866495132446,
|
|
"learning_rate": 9.951340343707852e-05,
|
|
"loss": 3.4878,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.2620689655172414,
|
|
"grad_norm": 0.5140212178230286,
|
|
"learning_rate": 9.948256934098352e-05,
|
|
"loss": 3.4561,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.2689655172413793,
|
|
"grad_norm": 0.5367317199707031,
|
|
"learning_rate": 9.945079316809585e-05,
|
|
"loss": 3.4769,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.27586206896551724,
|
|
"grad_norm": 0.4917609989643097,
|
|
"learning_rate": 9.941807552338804e-05,
|
|
"loss": 3.4157,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.2827586206896552,
|
|
"grad_norm": 0.5060731172561646,
|
|
"learning_rate": 9.938441702975689e-05,
|
|
"loss": 3.4559,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.2896551724137931,
|
|
"grad_norm": 0.497332364320755,
|
|
"learning_rate": 9.93498183280116e-05,
|
|
"loss": 3.4068,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.296551724137931,
|
|
"grad_norm": 0.5172521471977234,
|
|
"learning_rate": 9.931428007686158e-05,
|
|
"loss": 3.4059,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.30344827586206896,
|
|
"grad_norm": 0.5016644597053528,
|
|
"learning_rate": 9.927780295290389e-05,
|
|
"loss": 3.4224,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.3103448275862069,
|
|
"grad_norm": 0.5105836391448975,
|
|
"learning_rate": 9.924038765061042e-05,
|
|
"loss": 3.4223,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.31724137931034485,
|
|
"grad_norm": 0.49239876866340637,
|
|
"learning_rate": 9.920203488231454e-05,
|
|
"loss": 3.3637,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.32413793103448274,
|
|
"grad_norm": 0.44286057353019714,
|
|
"learning_rate": 9.916274537819775e-05,
|
|
"loss": 3.3308,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.3310344827586207,
|
|
"grad_norm": 0.5211081504821777,
|
|
"learning_rate": 9.912251988627549e-05,
|
|
"loss": 3.3835,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.33793103448275863,
|
|
"grad_norm": 0.47642946243286133,
|
|
"learning_rate": 9.908135917238321e-05,
|
|
"loss": 3.3144,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"grad_norm": 0.45889148116111755,
|
|
"learning_rate": 9.903926402016153e-05,
|
|
"loss": 3.3593,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"eval_loss": 3.3321995735168457,
|
|
"eval_runtime": 17.8977,
|
|
"eval_samples_per_second": 1.676,
|
|
"eval_steps_per_second": 0.447,
|
|
"step": 50
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 725,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"EarlyStoppingCallback": {
|
|
"args": {
|
|
"early_stopping_patience": 3,
|
|
"early_stopping_threshold": 0.0
|
|
},
|
|
"attributes": {
|
|
"early_stopping_patience_counter": 0
|
|
}
|
|
},
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 966060303974400.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|