|
{ |
|
"best_metric": 0.9180520176887512, |
|
"best_model_checkpoint": "ckpt/origin/pedes_attention_v2/checkpoint-91", |
|
"epoch": 4.882629107981221, |
|
"eval_steps": 7, |
|
"global_step": 130, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0282, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9767, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.998494093481022e-06, |
|
"loss": 1.0075, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.993977281025862e-06, |
|
"loss": 0.9679, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.986452283393452e-06, |
|
"loss": 0.971, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.975923633360985e-06, |
|
"loss": 1.0506, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.962397672993552e-06, |
|
"loss": 1.0311, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.9760443568229675, |
|
"eval_runtime": 37.1633, |
|
"eval_samples_per_second": 1.91, |
|
"eval_steps_per_second": 1.91, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.945882549823906e-06, |
|
"loss": 1.0691, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.926388211944707e-06, |
|
"loss": 0.9659, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.903926402016153e-06, |
|
"loss": 1.0062, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.878510650192644e-06, |
|
"loss": 1.0148, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.850156265972722e-06, |
|
"loss": 0.9223, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.8188803289772e-06, |
|
"loss": 1.0146, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.784701678661045e-06, |
|
"loss": 0.97, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 0.9596567153930664, |
|
"eval_runtime": 36.9759, |
|
"eval_samples_per_second": 1.92, |
|
"eval_steps_per_second": 1.92, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.747640902965185e-06, |
|
"loss": 1.0113, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.707720325915105e-06, |
|
"loss": 0.9954, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.664963994173695e-06, |
|
"loss": 1.0285, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.619397662556434e-06, |
|
"loss": 1.042, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.571048778517655e-06, |
|
"loss": 0.9092, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.519946465617217e-06, |
|
"loss": 0.932, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.466121505977577e-06, |
|
"loss": 1.014, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.9468032121658325, |
|
"eval_runtime": 36.927, |
|
"eval_samples_per_second": 1.923, |
|
"eval_steps_per_second": 1.923, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.409606321741776e-06, |
|
"loss": 1.0309, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.350434955543557e-06, |
|
"loss": 0.9213, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.288643050001362e-06, |
|
"loss": 0.9605, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.224267826248536e-06, |
|
"loss": 0.9896, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.157348061512728e-06, |
|
"loss": 0.9815, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.08792406575792e-06, |
|
"loss": 0.9748, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.016037657403225e-06, |
|
"loss": 0.993, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.9380103945732117, |
|
"eval_runtime": 36.9631, |
|
"eval_samples_per_second": 1.921, |
|
"eval_steps_per_second": 1.921, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.941732138133032e-06, |
|
"loss": 0.9235, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.865052266813686e-06, |
|
"loss": 0.9419, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.786044232532423e-06, |
|
"loss": 0.9539, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.704755626774796e-06, |
|
"loss": 1.016, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.621235414757337e-06, |
|
"loss": 0.9607, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 0.9749, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.447702723685335e-06, |
|
"loss": 0.9689, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.933106005191803, |
|
"eval_runtime": 37.0437, |
|
"eval_samples_per_second": 1.917, |
|
"eval_steps_per_second": 1.917, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.357794774235094e-06, |
|
"loss": 0.9367, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.265864214768883e-06, |
|
"loss": 0.8935, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.171966420818227e-06, |
|
"loss": 0.9468, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.076157952903134e-06, |
|
"loss": 0.9914, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.978496522462167e-06, |
|
"loss": 0.9485, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.879040957089229e-06, |
|
"loss": 0.9789, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.777851165098012e-06, |
|
"loss": 0.9744, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.9296567440032959, |
|
"eval_runtime": 37.085, |
|
"eval_samples_per_second": 1.915, |
|
"eval_steps_per_second": 1.915, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 7.674988099435487e-06, |
|
"loss": 1.0782, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 7.570513720966108e-06, |
|
"loss": 0.997, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.464490961148921e-06, |
|
"loss": 0.9033, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.3569836841299905e-06, |
|
"loss": 0.8924, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.248056648273034e-06, |
|
"loss": 0.9623, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.137775467151411e-06, |
|
"loss": 0.922, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.026206570024949e-06, |
|
"loss": 0.9452, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 0.9257412552833557, |
|
"eval_runtime": 37.1517, |
|
"eval_samples_per_second": 1.911, |
|
"eval_steps_per_second": 1.911, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.913417161825449e-06, |
|
"loss": 0.9623, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.799475182674942e-06, |
|
"loss": 0.8907, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.684449266961101e-06, |
|
"loss": 0.8867, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.568408701994459e-06, |
|
"loss": 0.9887, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.451423386272312e-06, |
|
"loss": 0.9524, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.333563787374493e-06, |
|
"loss": 0.8912, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.21490089951632e-06, |
|
"loss": 0.9499, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 0.9232458472251892, |
|
"eval_runtime": 37.0638, |
|
"eval_samples_per_second": 1.916, |
|
"eval_steps_per_second": 1.916, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 6.095506200784349e-06, |
|
"loss": 0.8558, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.975451610080643e-06, |
|
"loss": 0.968, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.8548094438015065e-06, |
|
"loss": 0.9154, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.733652372276809e-06, |
|
"loss": 0.9611, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 5.612053375996082e-06, |
|
"loss": 0.9836, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.490085701647805e-06, |
|
"loss": 0.9307, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.367822817998338e-06, |
|
"loss": 1.0362, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.9205808043479919, |
|
"eval_runtime": 37.0451, |
|
"eval_samples_per_second": 1.917, |
|
"eval_steps_per_second": 1.917, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.245338371637091e-06, |
|
"loss": 0.9823, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.122706142614562e-06, |
|
"loss": 0.9406, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9989, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.87729385738544e-06, |
|
"loss": 0.8766, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.75466162836291e-06, |
|
"loss": 0.9258, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.6321771820016635e-06, |
|
"loss": 0.9525, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 4.509914298352197e-06, |
|
"loss": 0.9574, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.9198555946350098, |
|
"eval_runtime": 36.934, |
|
"eval_samples_per_second": 1.922, |
|
"eval_steps_per_second": 1.922, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.38794662400392e-06, |
|
"loss": 0.9648, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.266347627723192e-06, |
|
"loss": 0.9437, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.145190556198494e-06, |
|
"loss": 0.8811, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.02454838991936e-06, |
|
"loss": 0.9659, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.904493799215652e-06, |
|
"loss": 0.9659, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.7850991004836813e-06, |
|
"loss": 0.8528, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.6664362126255087e-06, |
|
"loss": 0.9295, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.9187451004981995, |
|
"eval_runtime": 37.0034, |
|
"eval_samples_per_second": 1.919, |
|
"eval_steps_per_second": 1.919, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.5485766137276894e-06, |
|
"loss": 0.9386, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.4315912980055433e-06, |
|
"loss": 0.9061, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.3155507330389004e-06, |
|
"loss": 0.9602, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.2005248173250593e-06, |
|
"loss": 0.9184, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.0865828381745515e-06, |
|
"loss": 1.0052, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.9737934299750514e-06, |
|
"loss": 0.9061, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.862224532848591e-06, |
|
"loss": 0.9485, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 0.9179951548576355, |
|
"eval_runtime": 36.9583, |
|
"eval_samples_per_second": 1.921, |
|
"eval_steps_per_second": 1.921, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.7519433517269665e-06, |
|
"loss": 0.9743, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.6430163158700116e-06, |
|
"loss": 0.9124, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.5355090388510806e-06, |
|
"loss": 0.9506, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 2.429486279033892e-06, |
|
"loss": 0.9314, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.325011900564515e-06, |
|
"loss": 0.9229, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 2.2221488349019903e-06, |
|
"loss": 0.9828, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 2.1209590429107734e-06, |
|
"loss": 0.9736, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 0.9180520176887512, |
|
"eval_runtime": 36.9028, |
|
"eval_samples_per_second": 1.924, |
|
"eval_steps_per_second": 1.924, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.0215034775378336e-06, |
|
"loss": 1.0039, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.9238420470968665e-06, |
|
"loss": 0.9363, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.8280335791817733e-06, |
|
"loss": 0.9026, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.7341357852311175e-06, |
|
"loss": 1.0065, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.642205225764908e-06, |
|
"loss": 0.9446, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.5522972763146653e-06, |
|
"loss": 0.9325, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.904, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_loss": 0.9174049496650696, |
|
"eval_runtime": 36.9039, |
|
"eval_samples_per_second": 1.924, |
|
"eval_steps_per_second": 1.924, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.3787645852426663e-06, |
|
"loss": 0.8823, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.2952443732252058e-06, |
|
"loss": 0.9824, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.2139557674675773e-06, |
|
"loss": 0.884, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.134947733186315e-06, |
|
"loss": 0.8807, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.058267861866969e-06, |
|
"loss": 0.9713, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 9.83962342596776e-07, |
|
"loss": 0.9151, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 9.120759342420821e-07, |
|
"loss": 0.9174, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 0.917622447013855, |
|
"eval_runtime": 36.8966, |
|
"eval_samples_per_second": 1.924, |
|
"eval_steps_per_second": 1.924, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 8.426519384872733e-07, |
|
"loss": 0.8545, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.757321737514645e-07, |
|
"loss": 0.9481, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 7.113569499986401e-07, |
|
"loss": 0.9625, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 6.495650444564433e-07, |
|
"loss": 0.9481, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 5.903936782582253e-07, |
|
"loss": 0.9439, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 5.338784940224239e-07, |
|
"loss": 0.902, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 4.800535343827834e-07, |
|
"loss": 0.9468, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 0.9171994924545288, |
|
"eval_runtime": 37.0036, |
|
"eval_samples_per_second": 1.919, |
|
"eval_steps_per_second": 1.919, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 4.289512214823466e-07, |
|
"loss": 0.9181, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 3.8060233744356634e-07, |
|
"loss": 0.9299, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 3.350360058263058e-07, |
|
"loss": 0.8488, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.9227967408489653e-07, |
|
"loss": 0.894, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.523590970348166e-07, |
|
"loss": 0.9252, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.152983213389559e-07, |
|
"loss": 0.9887, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.8111967102280082e-07, |
|
"loss": 0.9491, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_loss": 0.9177583456039429, |
|
"eval_runtime": 36.857, |
|
"eval_samples_per_second": 1.926, |
|
"eval_steps_per_second": 1.926, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.4984373402728014e-07, |
|
"loss": 0.9271, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.2148934980735772e-07, |
|
"loss": 0.9753, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 9.607359798384785e-08, |
|
"loss": 0.9216, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.36117880552939e-08, |
|
"loss": 0.8974, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.411745017609493e-08, |
|
"loss": 0.9348, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3.7602327006450166e-08, |
|
"loss": 0.9237, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.4076366639015914e-08, |
|
"loss": 0.9956, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_loss": 0.9172103404998779, |
|
"eval_runtime": 36.9828, |
|
"eval_samples_per_second": 1.92, |
|
"eval_steps_per_second": 1.92, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 1.3547716606548967e-08, |
|
"loss": 0.8845, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 6.022718974137976e-09, |
|
"loss": 0.8938, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.5059065189787502e-09, |
|
"loss": 0.931, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.0, |
|
"loss": 0.9201, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"step": 130, |
|
"total_flos": 3.5067525347672064e+17, |
|
"train_loss": 0.9512384671431321, |
|
"train_runtime": 7720.1122, |
|
"train_samples_per_second": 0.828, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 130, |
|
"num_train_epochs": 5, |
|
"save_steps": 13, |
|
"total_flos": 3.5067525347672064e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|