xls-r-300m-hi-prod / trainer_state.json
kapilkd13's picture
End of training
8caf192
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 27.11864406779661,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.36,
"eval_loss": 1.9129917621612549,
"eval_runtime": 85.8103,
"eval_samples_per_second": 24.414,
"eval_steps_per_second": 3.053,
"eval_wer": 0.9244121629939678,
"step": 400
},
{
"epoch": 1.69,
"learning_rate": 0.0002982,
"loss": 5.0013,
"step": 500
},
{
"epoch": 2.71,
"eval_loss": 0.7788760662078857,
"eval_runtime": 85.5378,
"eval_samples_per_second": 24.492,
"eval_steps_per_second": 3.063,
"eval_wer": 0.5943616890311462,
"step": 800
},
{
"epoch": 3.39,
"learning_rate": 0.00028011999999999997,
"loss": 0.6544,
"step": 1000
},
{
"epoch": 4.07,
"eval_loss": 0.7297950983047485,
"eval_runtime": 84.8704,
"eval_samples_per_second": 24.685,
"eval_steps_per_second": 3.087,
"eval_wer": 0.585190200664779,
"step": 1200
},
{
"epoch": 5.08,
"learning_rate": 0.00026011999999999997,
"loss": 0.4021,
"step": 1500
},
{
"epoch": 5.42,
"eval_loss": 0.697790265083313,
"eval_runtime": 84.1609,
"eval_samples_per_second": 24.893,
"eval_steps_per_second": 3.113,
"eval_wer": 0.5667241167056506,
"step": 1600
},
{
"epoch": 6.78,
"learning_rate": 0.00024011999999999997,
"loss": 0.3003,
"step": 2000
},
{
"epoch": 6.78,
"eval_loss": 0.6764048337936401,
"eval_runtime": 85.0271,
"eval_samples_per_second": 24.639,
"eval_steps_per_second": 3.081,
"eval_wer": 0.5382247937953958,
"step": 2000
},
{
"epoch": 8.14,
"eval_loss": 0.72489994764328,
"eval_runtime": 83.797,
"eval_samples_per_second": 25.001,
"eval_steps_per_second": 3.127,
"eval_wer": 0.5462883171242152,
"step": 2400
},
{
"epoch": 8.47,
"learning_rate": 0.00022011999999999997,
"loss": 0.2345,
"step": 2500
},
{
"epoch": 9.49,
"eval_loss": 0.7279900908470154,
"eval_runtime": 81.6862,
"eval_samples_per_second": 25.647,
"eval_steps_per_second": 3.207,
"eval_wer": 0.5124338298658131,
"step": 2800
},
{
"epoch": 10.17,
"learning_rate": 0.00020012,
"loss": 0.1993,
"step": 3000
},
{
"epoch": 10.85,
"eval_loss": 0.7288674712181091,
"eval_runtime": 81.4638,
"eval_samples_per_second": 25.717,
"eval_steps_per_second": 3.216,
"eval_wer": 0.4690385325618614,
"step": 3200
},
{
"epoch": 11.86,
"learning_rate": 0.00018012,
"loss": 0.1617,
"step": 3500
},
{
"epoch": 12.2,
"eval_loss": 0.7430842518806458,
"eval_runtime": 81.3254,
"eval_samples_per_second": 25.761,
"eval_steps_per_second": 3.222,
"eval_wer": 0.4732857318724609,
"step": 3600
},
{
"epoch": 13.56,
"learning_rate": 0.00016011999999999998,
"loss": 0.1432,
"step": 4000
},
{
"epoch": 13.56,
"eval_loss": 0.7448425889015198,
"eval_runtime": 83.7392,
"eval_samples_per_second": 25.018,
"eval_steps_per_second": 3.129,
"eval_wer": 0.4732857318724609,
"step": 4000
},
{
"epoch": 14.92,
"eval_loss": 0.7745729088783264,
"eval_runtime": 83.3791,
"eval_samples_per_second": 25.126,
"eval_steps_per_second": 3.142,
"eval_wer": 0.4484796257540318,
"step": 4400
},
{
"epoch": 15.25,
"learning_rate": 0.00014012,
"loss": 0.1172,
"step": 4500
},
{
"epoch": 16.27,
"eval_loss": 0.7588675022125244,
"eval_runtime": 83.1746,
"eval_samples_per_second": 25.188,
"eval_steps_per_second": 3.15,
"eval_wer": 0.4742090360704173,
"step": 4800
},
{
"epoch": 16.95,
"learning_rate": 0.00012011999999999998,
"loss": 0.1035,
"step": 5000
},
{
"epoch": 17.63,
"eval_loss": 0.7539412975311279,
"eval_runtime": 83.8808,
"eval_samples_per_second": 24.976,
"eval_steps_per_second": 3.123,
"eval_wer": 0.4353071525298535,
"step": 5200
},
{
"epoch": 18.64,
"learning_rate": 0.00010011999999999998,
"loss": 0.0956,
"step": 5500
},
{
"epoch": 18.98,
"eval_loss": 0.7648215293884277,
"eval_runtime": 83.2596,
"eval_samples_per_second": 25.162,
"eval_steps_per_second": 3.147,
"eval_wer": 0.44946448356518526,
"step": 5600
},
{
"epoch": 20.34,
"learning_rate": 8.012e-05,
"loss": 0.0845,
"step": 6000
},
{
"epoch": 20.34,
"eval_loss": 0.7876783013343811,
"eval_runtime": 84.6623,
"eval_samples_per_second": 24.745,
"eval_steps_per_second": 3.095,
"eval_wer": 0.4718699987689277,
"step": 6000
},
{
"epoch": 21.69,
"eval_loss": 0.7883597016334534,
"eval_runtime": 84.2577,
"eval_samples_per_second": 24.864,
"eval_steps_per_second": 3.11,
"eval_wer": 0.44343222947187,
"step": 6400
},
{
"epoch": 22.03,
"learning_rate": 6.0119999999999994e-05,
"loss": 0.0761,
"step": 6500
},
{
"epoch": 23.05,
"eval_loss": 0.7796189188957214,
"eval_runtime": 81.0587,
"eval_samples_per_second": 25.845,
"eval_steps_per_second": 3.232,
"eval_wer": 0.4385694940292995,
"step": 6800
},
{
"epoch": 23.73,
"learning_rate": 4.012e-05,
"loss": 0.0634,
"step": 7000
},
{
"epoch": 24.41,
"eval_loss": 0.7729123830795288,
"eval_runtime": 81.4812,
"eval_samples_per_second": 25.711,
"eval_steps_per_second": 3.215,
"eval_wer": 0.4306290779268743,
"step": 7200
},
{
"epoch": 25.42,
"learning_rate": 2.0159999999999997e-05,
"loss": 0.0571,
"step": 7500
},
{
"epoch": 25.76,
"eval_loss": 0.7826283574104309,
"eval_runtime": 81.9341,
"eval_samples_per_second": 25.569,
"eval_steps_per_second": 3.198,
"eval_wer": 0.4298288809553121,
"step": 7600
},
{
"epoch": 27.12,
"learning_rate": 1.6e-07,
"loss": 0.0508,
"step": 8000
},
{
"epoch": 27.12,
"eval_loss": 0.7805310487747192,
"eval_runtime": 81.3643,
"eval_samples_per_second": 25.748,
"eval_steps_per_second": 3.22,
"eval_wer": 0.4340145266527145,
"step": 8000
},
{
"epoch": 27.12,
"step": 8000,
"total_flos": 1.5928886672052732e+19,
"train_loss": 0.48406150817871096,
"train_runtime": 11109.9321,
"train_samples_per_second": 11.521,
"train_steps_per_second": 0.72
}
],
"max_steps": 8000,
"num_train_epochs": 28,
"total_flos": 1.5928886672052732e+19,
"trial_name": null,
"trial_params": null
}