{ "best_metric": 0.9180520176887512, "best_model_checkpoint": "ckpt/origin/pedes_attention_v2/checkpoint-91", "epoch": 4.882629107981221, "eval_steps": 7, "global_step": 130, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 5e-06, "loss": 1.0282, "step": 1 }, { "epoch": 0.08, "learning_rate": 1e-05, "loss": 0.9767, "step": 2 }, { "epoch": 0.11, "learning_rate": 9.998494093481022e-06, "loss": 1.0075, "step": 3 }, { "epoch": 0.15, "learning_rate": 9.993977281025862e-06, "loss": 0.9679, "step": 4 }, { "epoch": 0.19, "learning_rate": 9.986452283393452e-06, "loss": 0.971, "step": 5 }, { "epoch": 0.23, "learning_rate": 9.975923633360985e-06, "loss": 1.0506, "step": 6 }, { "epoch": 0.26, "learning_rate": 9.962397672993552e-06, "loss": 1.0311, "step": 7 }, { "epoch": 0.26, "eval_loss": 0.9760443568229675, "eval_runtime": 37.1633, "eval_samples_per_second": 1.91, "eval_steps_per_second": 1.91, "step": 7 }, { "epoch": 0.3, "learning_rate": 9.945882549823906e-06, "loss": 1.0691, "step": 8 }, { "epoch": 0.34, "learning_rate": 9.926388211944707e-06, "loss": 0.9659, "step": 9 }, { "epoch": 0.38, "learning_rate": 9.903926402016153e-06, "loss": 1.0062, "step": 10 }, { "epoch": 0.41, "learning_rate": 9.878510650192644e-06, "loss": 1.0148, "step": 11 }, { "epoch": 0.45, "learning_rate": 9.850156265972722e-06, "loss": 0.9223, "step": 12 }, { "epoch": 0.49, "learning_rate": 9.8188803289772e-06, "loss": 1.0146, "step": 13 }, { "epoch": 0.53, "learning_rate": 9.784701678661045e-06, "loss": 0.97, "step": 14 }, { "epoch": 0.53, "eval_loss": 0.9596567153930664, "eval_runtime": 36.9759, "eval_samples_per_second": 1.92, "eval_steps_per_second": 1.92, "step": 14 }, { "epoch": 0.56, "learning_rate": 9.747640902965185e-06, "loss": 1.0113, "step": 15 }, { "epoch": 0.6, "learning_rate": 9.707720325915105e-06, "loss": 0.9954, "step": 16 }, { "epoch": 0.64, "learning_rate": 9.664963994173695e-06, "loss": 1.0285, "step": 17 }, { "epoch": 0.68, "learning_rate": 9.619397662556434e-06, "loss": 1.042, "step": 18 }, { "epoch": 0.71, "learning_rate": 9.571048778517655e-06, "loss": 0.9092, "step": 19 }, { "epoch": 0.75, "learning_rate": 9.519946465617217e-06, "loss": 0.932, "step": 20 }, { "epoch": 0.79, "learning_rate": 9.466121505977577e-06, "loss": 1.014, "step": 21 }, { "epoch": 0.79, "eval_loss": 0.9468032121658325, "eval_runtime": 36.927, "eval_samples_per_second": 1.923, "eval_steps_per_second": 1.923, "step": 21 }, { "epoch": 0.83, "learning_rate": 9.409606321741776e-06, "loss": 1.0309, "step": 22 }, { "epoch": 0.86, "learning_rate": 9.350434955543557e-06, "loss": 0.9213, "step": 23 }, { "epoch": 0.9, "learning_rate": 9.288643050001362e-06, "loss": 0.9605, "step": 24 }, { "epoch": 0.94, "learning_rate": 9.224267826248536e-06, "loss": 0.9896, "step": 25 }, { "epoch": 0.98, "learning_rate": 9.157348061512728e-06, "loss": 0.9815, "step": 26 }, { "epoch": 1.01, "learning_rate": 9.08792406575792e-06, "loss": 0.9748, "step": 27 }, { "epoch": 1.05, "learning_rate": 9.016037657403225e-06, "loss": 0.993, "step": 28 }, { "epoch": 1.05, "eval_loss": 0.9380103945732117, "eval_runtime": 36.9631, "eval_samples_per_second": 1.921, "eval_steps_per_second": 1.921, "step": 28 }, { "epoch": 1.09, "learning_rate": 8.941732138133032e-06, "loss": 0.9235, "step": 29 }, { "epoch": 1.13, "learning_rate": 8.865052266813686e-06, "loss": 0.9419, "step": 30 }, { "epoch": 1.16, "learning_rate": 8.786044232532423e-06, "loss": 0.9539, "step": 31 }, { "epoch": 1.2, "learning_rate": 8.704755626774796e-06, "loss": 1.016, "step": 32 }, { "epoch": 1.24, "learning_rate": 8.621235414757337e-06, "loss": 0.9607, "step": 33 }, { "epoch": 1.28, "learning_rate": 8.535533905932739e-06, "loss": 0.9749, "step": 34 }, { "epoch": 1.31, "learning_rate": 8.447702723685335e-06, "loss": 0.9689, "step": 35 }, { "epoch": 1.31, "eval_loss": 0.933106005191803, "eval_runtime": 37.0437, "eval_samples_per_second": 1.917, "eval_steps_per_second": 1.917, "step": 35 }, { "epoch": 1.35, "learning_rate": 8.357794774235094e-06, "loss": 0.9367, "step": 36 }, { "epoch": 1.39, "learning_rate": 8.265864214768883e-06, "loss": 0.8935, "step": 37 }, { "epoch": 1.43, "learning_rate": 8.171966420818227e-06, "loss": 0.9468, "step": 38 }, { "epoch": 1.46, "learning_rate": 8.076157952903134e-06, "loss": 0.9914, "step": 39 }, { "epoch": 1.5, "learning_rate": 7.978496522462167e-06, "loss": 0.9485, "step": 40 }, { "epoch": 1.54, "learning_rate": 7.879040957089229e-06, "loss": 0.9789, "step": 41 }, { "epoch": 1.58, "learning_rate": 7.777851165098012e-06, "loss": 0.9744, "step": 42 }, { "epoch": 1.58, "eval_loss": 0.9296567440032959, "eval_runtime": 37.085, "eval_samples_per_second": 1.915, "eval_steps_per_second": 1.915, "step": 42 }, { "epoch": 1.62, "learning_rate": 7.674988099435487e-06, "loss": 1.0782, "step": 43 }, { "epoch": 1.65, "learning_rate": 7.570513720966108e-06, "loss": 0.997, "step": 44 }, { "epoch": 1.69, "learning_rate": 7.464490961148921e-06, "loss": 0.9033, "step": 45 }, { "epoch": 1.73, "learning_rate": 7.3569836841299905e-06, "loss": 0.8924, "step": 46 }, { "epoch": 1.77, "learning_rate": 7.248056648273034e-06, "loss": 0.9623, "step": 47 }, { "epoch": 1.8, "learning_rate": 7.137775467151411e-06, "loss": 0.922, "step": 48 }, { "epoch": 1.84, "learning_rate": 7.026206570024949e-06, "loss": 0.9452, "step": 49 }, { "epoch": 1.84, "eval_loss": 0.9257412552833557, "eval_runtime": 37.1517, "eval_samples_per_second": 1.911, "eval_steps_per_second": 1.911, "step": 49 }, { "epoch": 1.88, "learning_rate": 6.913417161825449e-06, "loss": 0.9623, "step": 50 }, { "epoch": 1.92, "learning_rate": 6.799475182674942e-06, "loss": 0.8907, "step": 51 }, { "epoch": 1.95, "learning_rate": 6.684449266961101e-06, "loss": 0.8867, "step": 52 }, { "epoch": 1.99, "learning_rate": 6.568408701994459e-06, "loss": 0.9887, "step": 53 }, { "epoch": 2.03, "learning_rate": 6.451423386272312e-06, "loss": 0.9524, "step": 54 }, { "epoch": 2.07, "learning_rate": 6.333563787374493e-06, "loss": 0.8912, "step": 55 }, { "epoch": 2.1, "learning_rate": 6.21490089951632e-06, "loss": 0.9499, "step": 56 }, { "epoch": 2.1, "eval_loss": 0.9232458472251892, "eval_runtime": 37.0638, "eval_samples_per_second": 1.916, "eval_steps_per_second": 1.916, "step": 56 }, { "epoch": 2.14, "learning_rate": 6.095506200784349e-06, "loss": 0.8558, "step": 57 }, { "epoch": 2.18, "learning_rate": 5.975451610080643e-06, "loss": 0.968, "step": 58 }, { "epoch": 2.22, "learning_rate": 5.8548094438015065e-06, "loss": 0.9154, "step": 59 }, { "epoch": 2.25, "learning_rate": 5.733652372276809e-06, "loss": 0.9611, "step": 60 }, { "epoch": 2.29, "learning_rate": 5.612053375996082e-06, "loss": 0.9836, "step": 61 }, { "epoch": 2.33, "learning_rate": 5.490085701647805e-06, "loss": 0.9307, "step": 62 }, { "epoch": 2.37, "learning_rate": 5.367822817998338e-06, "loss": 1.0362, "step": 63 }, { "epoch": 2.37, "eval_loss": 0.9205808043479919, "eval_runtime": 37.0451, "eval_samples_per_second": 1.917, "eval_steps_per_second": 1.917, "step": 63 }, { "epoch": 2.4, "learning_rate": 5.245338371637091e-06, "loss": 0.9823, "step": 64 }, { "epoch": 2.44, "learning_rate": 5.122706142614562e-06, "loss": 0.9406, "step": 65 }, { "epoch": 2.48, "learning_rate": 5e-06, "loss": 0.9989, "step": 66 }, { "epoch": 2.52, "learning_rate": 4.87729385738544e-06, "loss": 0.8766, "step": 67 }, { "epoch": 2.55, "learning_rate": 4.75466162836291e-06, "loss": 0.9258, "step": 68 }, { "epoch": 2.59, "learning_rate": 4.6321771820016635e-06, "loss": 0.9525, "step": 69 }, { "epoch": 2.63, "learning_rate": 4.509914298352197e-06, "loss": 0.9574, "step": 70 }, { "epoch": 2.63, "eval_loss": 0.9198555946350098, "eval_runtime": 36.934, "eval_samples_per_second": 1.922, "eval_steps_per_second": 1.922, "step": 70 }, { "epoch": 2.67, "learning_rate": 4.38794662400392e-06, "loss": 0.9648, "step": 71 }, { "epoch": 2.7, "learning_rate": 4.266347627723192e-06, "loss": 0.9437, "step": 72 }, { "epoch": 2.74, "learning_rate": 4.145190556198494e-06, "loss": 0.8811, "step": 73 }, { "epoch": 2.78, "learning_rate": 4.02454838991936e-06, "loss": 0.9659, "step": 74 }, { "epoch": 2.82, "learning_rate": 3.904493799215652e-06, "loss": 0.9659, "step": 75 }, { "epoch": 2.85, "learning_rate": 3.7850991004836813e-06, "loss": 0.8528, "step": 76 }, { "epoch": 2.89, "learning_rate": 3.6664362126255087e-06, "loss": 0.9295, "step": 77 }, { "epoch": 2.89, "eval_loss": 0.9187451004981995, "eval_runtime": 37.0034, "eval_samples_per_second": 1.919, "eval_steps_per_second": 1.919, "step": 77 }, { "epoch": 2.93, "learning_rate": 3.5485766137276894e-06, "loss": 0.9386, "step": 78 }, { "epoch": 2.97, "learning_rate": 3.4315912980055433e-06, "loss": 0.9061, "step": 79 }, { "epoch": 3.0, "learning_rate": 3.3155507330389004e-06, "loss": 0.9602, "step": 80 }, { "epoch": 3.04, "learning_rate": 3.2005248173250593e-06, "loss": 0.9184, "step": 81 }, { "epoch": 3.08, "learning_rate": 3.0865828381745515e-06, "loss": 1.0052, "step": 82 }, { "epoch": 3.12, "learning_rate": 2.9737934299750514e-06, "loss": 0.9061, "step": 83 }, { "epoch": 3.15, "learning_rate": 2.862224532848591e-06, "loss": 0.9485, "step": 84 }, { "epoch": 3.15, "eval_loss": 0.9179951548576355, "eval_runtime": 36.9583, "eval_samples_per_second": 1.921, "eval_steps_per_second": 1.921, "step": 84 }, { "epoch": 3.19, "learning_rate": 2.7519433517269665e-06, "loss": 0.9743, "step": 85 }, { "epoch": 3.23, "learning_rate": 2.6430163158700116e-06, "loss": 0.9124, "step": 86 }, { "epoch": 3.27, "learning_rate": 2.5355090388510806e-06, "loss": 0.9506, "step": 87 }, { "epoch": 3.31, "learning_rate": 2.429486279033892e-06, "loss": 0.9314, "step": 88 }, { "epoch": 3.34, "learning_rate": 2.325011900564515e-06, "loss": 0.9229, "step": 89 }, { "epoch": 3.38, "learning_rate": 2.2221488349019903e-06, "loss": 0.9828, "step": 90 }, { "epoch": 3.42, "learning_rate": 2.1209590429107734e-06, "loss": 0.9736, "step": 91 }, { "epoch": 3.42, "eval_loss": 0.9180520176887512, "eval_runtime": 36.9028, "eval_samples_per_second": 1.924, "eval_steps_per_second": 1.924, "step": 91 }, { "epoch": 3.46, "learning_rate": 2.0215034775378336e-06, "loss": 1.0039, "step": 92 }, { "epoch": 3.49, "learning_rate": 1.9238420470968665e-06, "loss": 0.9363, "step": 93 }, { "epoch": 3.53, "learning_rate": 1.8280335791817733e-06, "loss": 0.9026, "step": 94 }, { "epoch": 3.57, "learning_rate": 1.7341357852311175e-06, "loss": 1.0065, "step": 95 }, { "epoch": 3.61, "learning_rate": 1.642205225764908e-06, "loss": 0.9446, "step": 96 }, { "epoch": 3.64, "learning_rate": 1.5522972763146653e-06, "loss": 0.9325, "step": 97 }, { "epoch": 3.68, "learning_rate": 1.4644660940672628e-06, "loss": 0.904, "step": 98 }, { "epoch": 3.68, "eval_loss": 0.9174049496650696, "eval_runtime": 36.9039, "eval_samples_per_second": 1.924, "eval_steps_per_second": 1.924, "step": 98 }, { "epoch": 3.72, "learning_rate": 1.3787645852426663e-06, "loss": 0.8823, "step": 99 }, { "epoch": 3.76, "learning_rate": 1.2952443732252058e-06, "loss": 0.9824, "step": 100 }, { "epoch": 3.79, "learning_rate": 1.2139557674675773e-06, "loss": 0.884, "step": 101 }, { "epoch": 3.83, "learning_rate": 1.134947733186315e-06, "loss": 0.8807, "step": 102 }, { "epoch": 3.87, "learning_rate": 1.058267861866969e-06, "loss": 0.9713, "step": 103 }, { "epoch": 3.91, "learning_rate": 9.83962342596776e-07, "loss": 0.9151, "step": 104 }, { "epoch": 3.94, "learning_rate": 9.120759342420821e-07, "loss": 0.9174, "step": 105 }, { "epoch": 3.94, "eval_loss": 0.917622447013855, "eval_runtime": 36.8966, "eval_samples_per_second": 1.924, "eval_steps_per_second": 1.924, "step": 105 }, { "epoch": 3.98, "learning_rate": 8.426519384872733e-07, "loss": 0.8545, "step": 106 }, { "epoch": 4.02, "learning_rate": 7.757321737514645e-07, "loss": 0.9481, "step": 107 }, { "epoch": 4.06, "learning_rate": 7.113569499986401e-07, "loss": 0.9625, "step": 108 }, { "epoch": 4.09, "learning_rate": 6.495650444564433e-07, "loss": 0.9481, "step": 109 }, { "epoch": 4.13, "learning_rate": 5.903936782582253e-07, "loss": 0.9439, "step": 110 }, { "epoch": 4.17, "learning_rate": 5.338784940224239e-07, "loss": 0.902, "step": 111 }, { "epoch": 4.21, "learning_rate": 4.800535343827834e-07, "loss": 0.9468, "step": 112 }, { "epoch": 4.21, "eval_loss": 0.9171994924545288, "eval_runtime": 37.0036, "eval_samples_per_second": 1.919, "eval_steps_per_second": 1.919, "step": 112 }, { "epoch": 4.24, "learning_rate": 4.289512214823466e-07, "loss": 0.9181, "step": 113 }, { "epoch": 4.28, "learning_rate": 3.8060233744356634e-07, "loss": 0.9299, "step": 114 }, { "epoch": 4.32, "learning_rate": 3.350360058263058e-07, "loss": 0.8488, "step": 115 }, { "epoch": 4.36, "learning_rate": 2.9227967408489653e-07, "loss": 0.894, "step": 116 }, { "epoch": 4.39, "learning_rate": 2.523590970348166e-07, "loss": 0.9252, "step": 117 }, { "epoch": 4.43, "learning_rate": 2.152983213389559e-07, "loss": 0.9887, "step": 118 }, { "epoch": 4.47, "learning_rate": 1.8111967102280082e-07, "loss": 0.9491, "step": 119 }, { "epoch": 4.47, "eval_loss": 0.9177583456039429, "eval_runtime": 36.857, "eval_samples_per_second": 1.926, "eval_steps_per_second": 1.926, "step": 119 }, { "epoch": 4.51, "learning_rate": 1.4984373402728014e-07, "loss": 0.9271, "step": 120 }, { "epoch": 4.54, "learning_rate": 1.2148934980735772e-07, "loss": 0.9753, "step": 121 }, { "epoch": 4.58, "learning_rate": 9.607359798384785e-08, "loss": 0.9216, "step": 122 }, { "epoch": 4.62, "learning_rate": 7.36117880552939e-08, "loss": 0.8974, "step": 123 }, { "epoch": 4.66, "learning_rate": 5.411745017609493e-08, "loss": 0.9348, "step": 124 }, { "epoch": 4.69, "learning_rate": 3.7602327006450166e-08, "loss": 0.9237, "step": 125 }, { "epoch": 4.73, "learning_rate": 2.4076366639015914e-08, "loss": 0.9956, "step": 126 }, { "epoch": 4.73, "eval_loss": 0.9172103404998779, "eval_runtime": 36.9828, "eval_samples_per_second": 1.92, "eval_steps_per_second": 1.92, "step": 126 }, { "epoch": 4.77, "learning_rate": 1.3547716606548967e-08, "loss": 0.8845, "step": 127 }, { "epoch": 4.81, "learning_rate": 6.022718974137976e-09, "loss": 0.8938, "step": 128 }, { "epoch": 4.85, "learning_rate": 1.5059065189787502e-09, "loss": 0.931, "step": 129 }, { "epoch": 4.88, "learning_rate": 0.0, "loss": 0.9201, "step": 130 }, { "epoch": 4.88, "step": 130, "total_flos": 3.5067525347672064e+17, "train_loss": 0.9512384671431321, "train_runtime": 7720.1122, "train_samples_per_second": 0.828, "train_steps_per_second": 0.017 } ], "logging_steps": 1.0, "max_steps": 130, "num_train_epochs": 5, "save_steps": 13, "total_flos": 3.5067525347672064e+17, "trial_name": null, "trial_params": null }