|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 27.11864406779661, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 1.9129917621612549, |
|
"eval_runtime": 85.8103, |
|
"eval_samples_per_second": 24.414, |
|
"eval_steps_per_second": 3.053, |
|
"eval_wer": 0.9244121629939678, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0002982, |
|
"loss": 5.0013, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 0.7788760662078857, |
|
"eval_runtime": 85.5378, |
|
"eval_samples_per_second": 24.492, |
|
"eval_steps_per_second": 3.063, |
|
"eval_wer": 0.5943616890311462, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00028011999999999997, |
|
"loss": 0.6544, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_loss": 0.7297950983047485, |
|
"eval_runtime": 84.8704, |
|
"eval_samples_per_second": 24.685, |
|
"eval_steps_per_second": 3.087, |
|
"eval_wer": 0.585190200664779, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00026011999999999997, |
|
"loss": 0.4021, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 0.697790265083313, |
|
"eval_runtime": 84.1609, |
|
"eval_samples_per_second": 24.893, |
|
"eval_steps_per_second": 3.113, |
|
"eval_wer": 0.5667241167056506, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00024011999999999997, |
|
"loss": 0.3003, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_loss": 0.6764048337936401, |
|
"eval_runtime": 85.0271, |
|
"eval_samples_per_second": 24.639, |
|
"eval_steps_per_second": 3.081, |
|
"eval_wer": 0.5382247937953958, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"eval_loss": 0.72489994764328, |
|
"eval_runtime": 83.797, |
|
"eval_samples_per_second": 25.001, |
|
"eval_steps_per_second": 3.127, |
|
"eval_wer": 0.5462883171242152, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.00022011999999999997, |
|
"loss": 0.2345, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_loss": 0.7279900908470154, |
|
"eval_runtime": 81.6862, |
|
"eval_samples_per_second": 25.647, |
|
"eval_steps_per_second": 3.207, |
|
"eval_wer": 0.5124338298658131, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00020012, |
|
"loss": 0.1993, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"eval_loss": 0.7288674712181091, |
|
"eval_runtime": 81.4638, |
|
"eval_samples_per_second": 25.717, |
|
"eval_steps_per_second": 3.216, |
|
"eval_wer": 0.4690385325618614, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.00018012, |
|
"loss": 0.1617, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_loss": 0.7430842518806458, |
|
"eval_runtime": 81.3254, |
|
"eval_samples_per_second": 25.761, |
|
"eval_steps_per_second": 3.222, |
|
"eval_wer": 0.4732857318724609, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.00016011999999999998, |
|
"loss": 0.1432, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"eval_loss": 0.7448425889015198, |
|
"eval_runtime": 83.7392, |
|
"eval_samples_per_second": 25.018, |
|
"eval_steps_per_second": 3.129, |
|
"eval_wer": 0.4732857318724609, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_loss": 0.7745729088783264, |
|
"eval_runtime": 83.3791, |
|
"eval_samples_per_second": 25.126, |
|
"eval_steps_per_second": 3.142, |
|
"eval_wer": 0.4484796257540318, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 0.00014012, |
|
"loss": 0.1172, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"eval_loss": 0.7588675022125244, |
|
"eval_runtime": 83.1746, |
|
"eval_samples_per_second": 25.188, |
|
"eval_steps_per_second": 3.15, |
|
"eval_wer": 0.4742090360704173, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.00012011999999999998, |
|
"loss": 0.1035, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"eval_loss": 0.7539412975311279, |
|
"eval_runtime": 83.8808, |
|
"eval_samples_per_second": 24.976, |
|
"eval_steps_per_second": 3.123, |
|
"eval_wer": 0.4353071525298535, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.00010011999999999998, |
|
"loss": 0.0956, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_loss": 0.7648215293884277, |
|
"eval_runtime": 83.2596, |
|
"eval_samples_per_second": 25.162, |
|
"eval_steps_per_second": 3.147, |
|
"eval_wer": 0.44946448356518526, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 8.012e-05, |
|
"loss": 0.0845, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"eval_loss": 0.7876783013343811, |
|
"eval_runtime": 84.6623, |
|
"eval_samples_per_second": 24.745, |
|
"eval_steps_per_second": 3.095, |
|
"eval_wer": 0.4718699987689277, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"eval_loss": 0.7883597016334534, |
|
"eval_runtime": 84.2577, |
|
"eval_samples_per_second": 24.864, |
|
"eval_steps_per_second": 3.11, |
|
"eval_wer": 0.44343222947187, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 6.0119999999999994e-05, |
|
"loss": 0.0761, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 23.05, |
|
"eval_loss": 0.7796189188957214, |
|
"eval_runtime": 81.0587, |
|
"eval_samples_per_second": 25.845, |
|
"eval_steps_per_second": 3.232, |
|
"eval_wer": 0.4385694940292995, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 4.012e-05, |
|
"loss": 0.0634, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"eval_loss": 0.7729123830795288, |
|
"eval_runtime": 81.4812, |
|
"eval_samples_per_second": 25.711, |
|
"eval_steps_per_second": 3.215, |
|
"eval_wer": 0.4306290779268743, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 2.0159999999999997e-05, |
|
"loss": 0.0571, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"eval_loss": 0.7826283574104309, |
|
"eval_runtime": 81.9341, |
|
"eval_samples_per_second": 25.569, |
|
"eval_steps_per_second": 3.198, |
|
"eval_wer": 0.4298288809553121, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 1.6e-07, |
|
"loss": 0.0508, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"eval_loss": 0.7805310487747192, |
|
"eval_runtime": 81.3643, |
|
"eval_samples_per_second": 25.748, |
|
"eval_steps_per_second": 3.22, |
|
"eval_wer": 0.4340145266527145, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"step": 8000, |
|
"total_flos": 1.5928886672052732e+19, |
|
"train_loss": 0.48406150817871096, |
|
"train_runtime": 11109.9321, |
|
"train_samples_per_second": 11.521, |
|
"train_steps_per_second": 0.72 |
|
} |
|
], |
|
"max_steps": 8000, |
|
"num_train_epochs": 28, |
|
"total_flos": 1.5928886672052732e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|