adapters-opt-gptq-QLORA-super_glue-multirc / trainer_state-opt-gptq-QLORA-super_glue-multirc-sequence_classification.json
RMHalak's picture
Task: SequenceClassification
b93f7fb verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.984,
"eval_steps": 1,
"global_step": 124,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 26.392181396484375,
"learning_rate": 2.5e-05,
"loss": 1.2683,
"step": 1
},
{
"epoch": 0.016,
"eval_exact_match": 0.14285714285714285,
"eval_f1_a": 0.544,
"eval_f1_m": 0.5331374974232117,
"eval_loss": 0.9992128610610962,
"eval_runtime": 12.4039,
"eval_samples_per_second": 20.155,
"eval_steps_per_second": 2.58,
"step": 1
},
{
"epoch": 0.032,
"grad_norm": 19.146682739257812,
"learning_rate": 5e-05,
"loss": 1.0276,
"step": 2
},
{
"epoch": 0.032,
"eval_exact_match": 0.12244897959183673,
"eval_f1_a": 0.5344129554655871,
"eval_f1_m": 0.5208925994640281,
"eval_loss": 0.9813457131385803,
"eval_runtime": 12.2968,
"eval_samples_per_second": 20.331,
"eval_steps_per_second": 2.602,
"step": 2
},
{
"epoch": 0.048,
"grad_norm": 25.024534225463867,
"learning_rate": 4.959016393442623e-05,
"loss": 1.1935,
"step": 3
},
{
"epoch": 0.048,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4979253112033195,
"eval_f1_m": 0.4637011514562535,
"eval_loss": 0.9183418154716492,
"eval_runtime": 12.5058,
"eval_samples_per_second": 19.991,
"eval_steps_per_second": 2.559,
"step": 3
},
{
"epoch": 0.064,
"grad_norm": 18.022754669189453,
"learning_rate": 4.918032786885246e-05,
"loss": 0.9304,
"step": 4
},
{
"epoch": 0.064,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.43243243243243246,
"eval_f1_m": 0.42084400859911064,
"eval_loss": 0.8643535375595093,
"eval_runtime": 12.5036,
"eval_samples_per_second": 19.994,
"eval_steps_per_second": 2.559,
"step": 4
},
{
"epoch": 0.08,
"grad_norm": 17.011953353881836,
"learning_rate": 4.8770491803278687e-05,
"loss": 1.0187,
"step": 5
},
{
"epoch": 0.08,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.44131455399061037,
"eval_f1_m": 0.435791589363018,
"eval_loss": 0.8290175795555115,
"eval_runtime": 12.501,
"eval_samples_per_second": 19.998,
"eval_steps_per_second": 2.56,
"step": 5
},
{
"epoch": 0.096,
"grad_norm": 9.931246757507324,
"learning_rate": 4.836065573770492e-05,
"loss": 0.781,
"step": 6
},
{
"epoch": 0.096,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.44761904761904764,
"eval_f1_m": 0.44164457990988604,
"eval_loss": 0.803396463394165,
"eval_runtime": 12.603,
"eval_samples_per_second": 19.837,
"eval_steps_per_second": 2.539,
"step": 6
},
{
"epoch": 0.112,
"grad_norm": 34.31171417236328,
"learning_rate": 4.795081967213115e-05,
"loss": 1.2274,
"step": 7
},
{
"epoch": 0.112,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.416243654822335,
"eval_f1_m": 0.42698010606173864,
"eval_loss": 0.7892948985099792,
"eval_runtime": 12.5071,
"eval_samples_per_second": 19.989,
"eval_steps_per_second": 2.559,
"step": 7
},
{
"epoch": 0.128,
"grad_norm": 19.665807723999023,
"learning_rate": 4.754098360655738e-05,
"loss": 1.0631,
"step": 8
},
{
"epoch": 0.128,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.3734939759036145,
"eval_f1_m": 0.44151661996800046,
"eval_loss": 0.779171884059906,
"eval_runtime": 12.401,
"eval_samples_per_second": 20.16,
"eval_steps_per_second": 2.58,
"step": 8
},
{
"epoch": 0.144,
"grad_norm": 6.531087875366211,
"learning_rate": 4.713114754098361e-05,
"loss": 0.6823,
"step": 9
},
{
"epoch": 0.144,
"eval_exact_match": 0.0,
"eval_f1_a": 0.24657534246575338,
"eval_f1_m": 0.38393596199718655,
"eval_loss": 0.7732793092727661,
"eval_runtime": 12.3963,
"eval_samples_per_second": 20.167,
"eval_steps_per_second": 2.581,
"step": 9
},
{
"epoch": 0.16,
"grad_norm": 9.769821166992188,
"learning_rate": 4.672131147540984e-05,
"loss": 0.6971,
"step": 10
},
{
"epoch": 0.16,
"eval_exact_match": 0.0,
"eval_f1_a": 0.24827586206896554,
"eval_f1_m": 0.38636550524305635,
"eval_loss": 0.7666972875595093,
"eval_runtime": 12.3966,
"eval_samples_per_second": 20.167,
"eval_steps_per_second": 2.581,
"step": 10
},
{
"epoch": 0.176,
"grad_norm": 18.875186920166016,
"learning_rate": 4.631147540983607e-05,
"loss": 0.8232,
"step": 11
},
{
"epoch": 0.176,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2028985507246377,
"eval_f1_m": 0.37477658396025737,
"eval_loss": 0.7616797089576721,
"eval_runtime": 12.293,
"eval_samples_per_second": 20.337,
"eval_steps_per_second": 2.603,
"step": 11
},
{
"epoch": 0.192,
"grad_norm": 12.508312225341797,
"learning_rate": 4.59016393442623e-05,
"loss": 0.4358,
"step": 12
},
{
"epoch": 0.192,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2142857142857143,
"eval_f1_m": 0.37786431255819003,
"eval_loss": 0.7548164129257202,
"eval_runtime": 12.2924,
"eval_samples_per_second": 20.338,
"eval_steps_per_second": 2.603,
"step": 12
},
{
"epoch": 0.208,
"grad_norm": 13.73599624633789,
"learning_rate": 4.549180327868853e-05,
"loss": 0.7875,
"step": 13
},
{
"epoch": 0.208,
"eval_exact_match": 0.0,
"eval_f1_a": 0.21582733812949637,
"eval_f1_m": 0.37958707958707955,
"eval_loss": 0.7478398680686951,
"eval_runtime": 12.293,
"eval_samples_per_second": 20.337,
"eval_steps_per_second": 2.603,
"step": 13
},
{
"epoch": 0.224,
"grad_norm": 17.087984085083008,
"learning_rate": 4.508196721311476e-05,
"loss": 0.9012,
"step": 14
},
{
"epoch": 0.224,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2377622377622378,
"eval_f1_m": 0.38485918843061695,
"eval_loss": 0.7398672103881836,
"eval_runtime": 12.294,
"eval_samples_per_second": 20.335,
"eval_steps_per_second": 2.603,
"step": 14
},
{
"epoch": 0.24,
"grad_norm": 17.19772720336914,
"learning_rate": 4.467213114754098e-05,
"loss": 0.8344,
"step": 15
},
{
"epoch": 0.24,
"eval_exact_match": 0.0,
"eval_f1_a": 0.24657534246575338,
"eval_f1_m": 0.38393596199718655,
"eval_loss": 0.7301445603370667,
"eval_runtime": 12.3925,
"eval_samples_per_second": 20.173,
"eval_steps_per_second": 2.582,
"step": 15
},
{
"epoch": 0.256,
"grad_norm": 9.059144973754883,
"learning_rate": 4.426229508196721e-05,
"loss": 0.6957,
"step": 16
},
{
"epoch": 0.256,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.3184713375796179,
"eval_f1_m": 0.4192099850263115,
"eval_loss": 0.7225546836853027,
"eval_runtime": 12.2951,
"eval_samples_per_second": 20.333,
"eval_steps_per_second": 2.603,
"step": 16
},
{
"epoch": 0.272,
"grad_norm": 4.891639709472656,
"learning_rate": 4.3852459016393444e-05,
"loss": 0.6085,
"step": 17
},
{
"epoch": 0.272,
"eval_exact_match": 0.061224489795918366,
"eval_f1_a": 0.3878787878787879,
"eval_f1_m": 0.4543832318142042,
"eval_loss": 0.7196054458618164,
"eval_runtime": 12.2946,
"eval_samples_per_second": 20.334,
"eval_steps_per_second": 2.603,
"step": 17
},
{
"epoch": 0.288,
"grad_norm": 11.897939682006836,
"learning_rate": 4.3442622950819674e-05,
"loss": 0.7777,
"step": 18
},
{
"epoch": 0.288,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.3976608187134503,
"eval_f1_m": 0.4537870592792561,
"eval_loss": 0.7144648432731628,
"eval_runtime": 12.3977,
"eval_samples_per_second": 20.165,
"eval_steps_per_second": 2.581,
"step": 18
},
{
"epoch": 0.304,
"grad_norm": 11.019088745117188,
"learning_rate": 4.3032786885245904e-05,
"loss": 0.6732,
"step": 19
},
{
"epoch": 0.304,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.39285714285714285,
"eval_f1_m": 0.4570021548412905,
"eval_loss": 0.7114218473434448,
"eval_runtime": 12.3988,
"eval_samples_per_second": 20.163,
"eval_steps_per_second": 2.581,
"step": 19
},
{
"epoch": 0.32,
"grad_norm": 9.549395561218262,
"learning_rate": 4.262295081967213e-05,
"loss": 0.7477,
"step": 20
},
{
"epoch": 0.32,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4046242774566474,
"eval_f1_m": 0.4577005477515681,
"eval_loss": 0.709179699420929,
"eval_runtime": 12.4018,
"eval_samples_per_second": 20.158,
"eval_steps_per_second": 2.58,
"step": 20
},
{
"epoch": 0.336,
"grad_norm": 6.315933704376221,
"learning_rate": 4.2213114754098365e-05,
"loss": 0.6196,
"step": 21
},
{
"epoch": 0.336,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4069767441860465,
"eval_f1_m": 0.4574949710153791,
"eval_loss": 0.7092265486717224,
"eval_runtime": 12.2969,
"eval_samples_per_second": 20.33,
"eval_steps_per_second": 2.602,
"step": 21
},
{
"epoch": 0.352,
"grad_norm": 13.465363502502441,
"learning_rate": 4.1803278688524595e-05,
"loss": 0.8407,
"step": 22
},
{
"epoch": 0.352,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4333333333333333,
"eval_f1_m": 0.4608648720893619,
"eval_loss": 0.7104062438011169,
"eval_runtime": 12.3998,
"eval_samples_per_second": 20.162,
"eval_steps_per_second": 2.581,
"step": 22
},
{
"epoch": 0.368,
"grad_norm": 5.6240763664245605,
"learning_rate": 4.1393442622950826e-05,
"loss": 0.6945,
"step": 23
},
{
"epoch": 0.368,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4175824175824176,
"eval_f1_m": 0.4523820850351463,
"eval_loss": 0.7099843621253967,
"eval_runtime": 12.5009,
"eval_samples_per_second": 19.999,
"eval_steps_per_second": 2.56,
"step": 23
},
{
"epoch": 0.384,
"grad_norm": 11.571455001831055,
"learning_rate": 4.098360655737705e-05,
"loss": 0.7534,
"step": 24
},
{
"epoch": 0.384,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.42774566473988435,
"eval_f1_m": 0.4625007645415809,
"eval_loss": 0.7112500071525574,
"eval_runtime": 12.505,
"eval_samples_per_second": 19.992,
"eval_steps_per_second": 2.559,
"step": 24
},
{
"epoch": 0.4,
"grad_norm": 9.693904876708984,
"learning_rate": 4.057377049180328e-05,
"loss": 0.6428,
"step": 25
},
{
"epoch": 0.4,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.32258064516129026,
"eval_f1_m": 0.42786660784860053,
"eval_loss": 0.7106679677963257,
"eval_runtime": 12.5005,
"eval_samples_per_second": 19.999,
"eval_steps_per_second": 2.56,
"step": 25
},
{
"epoch": 0.416,
"grad_norm": 5.713263034820557,
"learning_rate": 4.016393442622951e-05,
"loss": 0.649,
"step": 26
},
{
"epoch": 0.416,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2638888888888889,
"eval_f1_m": 0.4046327255510929,
"eval_loss": 0.7118203043937683,
"eval_runtime": 12.4034,
"eval_samples_per_second": 20.156,
"eval_steps_per_second": 2.58,
"step": 26
},
{
"epoch": 0.432,
"grad_norm": 7.6466965675354,
"learning_rate": 3.975409836065574e-05,
"loss": 0.7344,
"step": 27
},
{
"epoch": 0.432,
"eval_exact_match": 0.0,
"eval_f1_a": 0.18045112781954886,
"eval_f1_m": 0.3727819799248371,
"eval_loss": 0.7153554558753967,
"eval_runtime": 12.4059,
"eval_samples_per_second": 20.152,
"eval_steps_per_second": 2.579,
"step": 27
},
{
"epoch": 0.448,
"grad_norm": 12.54140567779541,
"learning_rate": 3.934426229508197e-05,
"loss": 0.5762,
"step": 28
},
{
"epoch": 0.448,
"eval_exact_match": 0.0,
"eval_f1_a": 0.13846153846153844,
"eval_f1_m": 0.3607012601910561,
"eval_loss": 0.7164726853370667,
"eval_runtime": 12.4987,
"eval_samples_per_second": 20.002,
"eval_steps_per_second": 2.56,
"step": 28
},
{
"epoch": 0.464,
"grad_norm": 7.025119781494141,
"learning_rate": 3.89344262295082e-05,
"loss": 0.6606,
"step": 29
},
{
"epoch": 0.464,
"eval_exact_match": 0.0,
"eval_f1_a": 0.11111111111111112,
"eval_f1_m": 0.35377706194032715,
"eval_loss": 0.719851553440094,
"eval_runtime": 12.599,
"eval_samples_per_second": 19.843,
"eval_steps_per_second": 2.54,
"step": 29
},
{
"epoch": 0.48,
"grad_norm": 16.650728225708008,
"learning_rate": 3.8524590163934424e-05,
"loss": 0.6886,
"step": 30
},
{
"epoch": 0.48,
"eval_exact_match": 0.0,
"eval_f1_a": 0.05454545454545454,
"eval_f1_m": 0.3549857455469701,
"eval_loss": 0.7235468626022339,
"eval_runtime": 12.5049,
"eval_samples_per_second": 19.992,
"eval_steps_per_second": 2.559,
"step": 30
},
{
"epoch": 0.496,
"grad_norm": 9.604567527770996,
"learning_rate": 3.8114754098360655e-05,
"loss": 0.6693,
"step": 31
},
{
"epoch": 0.496,
"eval_exact_match": 0.0,
"eval_f1_a": 0.03773584905660377,
"eval_f1_m": 0.3570682452135034,
"eval_loss": 0.730707049369812,
"eval_runtime": 12.6051,
"eval_samples_per_second": 19.833,
"eval_steps_per_second": 2.539,
"step": 31
},
{
"epoch": 0.512,
"grad_norm": 10.662591934204102,
"learning_rate": 3.7704918032786885e-05,
"loss": 0.8203,
"step": 32
},
{
"epoch": 0.512,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.7325780987739563,
"eval_runtime": 12.5102,
"eval_samples_per_second": 19.984,
"eval_steps_per_second": 2.558,
"step": 32
},
{
"epoch": 0.528,
"grad_norm": 5.810702323913574,
"learning_rate": 3.729508196721312e-05,
"loss": 0.6831,
"step": 33
},
{
"epoch": 0.528,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.7296562790870667,
"eval_runtime": 12.5052,
"eval_samples_per_second": 19.992,
"eval_steps_per_second": 2.559,
"step": 33
},
{
"epoch": 0.544,
"grad_norm": 6.527573585510254,
"learning_rate": 3.6885245901639346e-05,
"loss": 0.6942,
"step": 34
},
{
"epoch": 0.544,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.7321445345878601,
"eval_runtime": 12.6062,
"eval_samples_per_second": 19.831,
"eval_steps_per_second": 2.538,
"step": 34
},
{
"epoch": 0.56,
"grad_norm": 15.23520565032959,
"learning_rate": 3.6475409836065576e-05,
"loss": 0.7819,
"step": 35
},
{
"epoch": 0.56,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019417475728155338,
"eval_f1_m": 0.3550274288869728,
"eval_loss": 0.7335781455039978,
"eval_runtime": 12.6066,
"eval_samples_per_second": 19.831,
"eval_steps_per_second": 2.538,
"step": 35
},
{
"epoch": 0.576,
"grad_norm": 12.970118522644043,
"learning_rate": 3.6065573770491806e-05,
"loss": 0.5959,
"step": 36
},
{
"epoch": 0.576,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.3489535707722984,
"eval_loss": 0.7367480397224426,
"eval_runtime": 12.5014,
"eval_samples_per_second": 19.998,
"eval_steps_per_second": 2.56,
"step": 36
},
{
"epoch": 0.592,
"grad_norm": 9.984509468078613,
"learning_rate": 3.5655737704918037e-05,
"loss": 0.5692,
"step": 37
},
{
"epoch": 0.592,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.3489535707722984,
"eval_loss": 0.7453652620315552,
"eval_runtime": 12.4005,
"eval_samples_per_second": 20.16,
"eval_steps_per_second": 2.581,
"step": 37
},
{
"epoch": 0.608,
"grad_norm": 9.310087203979492,
"learning_rate": 3.524590163934427e-05,
"loss": 0.8538,
"step": 38
},
{
"epoch": 0.608,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7506445050239563,
"eval_runtime": 12.5041,
"eval_samples_per_second": 19.993,
"eval_steps_per_second": 2.559,
"step": 38
},
{
"epoch": 0.624,
"grad_norm": 17.32353401184082,
"learning_rate": 3.483606557377049e-05,
"loss": 0.7333,
"step": 39
},
{
"epoch": 0.624,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7485976815223694,
"eval_runtime": 12.605,
"eval_samples_per_second": 19.833,
"eval_steps_per_second": 2.539,
"step": 39
},
{
"epoch": 0.64,
"grad_norm": 9.058735847473145,
"learning_rate": 3.442622950819672e-05,
"loss": 0.4784,
"step": 40
},
{
"epoch": 0.64,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7543515563011169,
"eval_runtime": 12.4985,
"eval_samples_per_second": 20.002,
"eval_steps_per_second": 2.56,
"step": 40
},
{
"epoch": 0.656,
"grad_norm": 15.80337142944336,
"learning_rate": 3.401639344262295e-05,
"loss": 0.8348,
"step": 41
},
{
"epoch": 0.656,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7519648671150208,
"eval_runtime": 12.4002,
"eval_samples_per_second": 20.161,
"eval_steps_per_second": 2.581,
"step": 41
},
{
"epoch": 0.672,
"grad_norm": 18.595117568969727,
"learning_rate": 3.360655737704918e-05,
"loss": 0.8212,
"step": 42
},
{
"epoch": 0.672,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7456523180007935,
"eval_runtime": 12.5047,
"eval_samples_per_second": 19.992,
"eval_steps_per_second": 2.559,
"step": 42
},
{
"epoch": 0.688,
"grad_norm": 9.790151596069336,
"learning_rate": 3.319672131147541e-05,
"loss": 0.9515,
"step": 43
},
{
"epoch": 0.688,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.3489535707722984,
"eval_loss": 0.7356074452400208,
"eval_runtime": 12.5029,
"eval_samples_per_second": 19.995,
"eval_steps_per_second": 2.559,
"step": 43
},
{
"epoch": 0.704,
"grad_norm": 19.172956466674805,
"learning_rate": 3.2786885245901635e-05,
"loss": 0.8488,
"step": 44
},
{
"epoch": 0.704,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.7283183336257935,
"eval_runtime": 12.6008,
"eval_samples_per_second": 19.84,
"eval_steps_per_second": 2.54,
"step": 44
},
{
"epoch": 0.72,
"grad_norm": 10.353903770446777,
"learning_rate": 3.237704918032787e-05,
"loss": 0.6007,
"step": 45
},
{
"epoch": 0.72,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.7258906364440918,
"eval_runtime": 12.4066,
"eval_samples_per_second": 20.15,
"eval_steps_per_second": 2.579,
"step": 45
},
{
"epoch": 0.736,
"grad_norm": 10.669143676757812,
"learning_rate": 3.19672131147541e-05,
"loss": 0.8167,
"step": 46
},
{
"epoch": 0.736,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.7255585789680481,
"eval_runtime": 12.6073,
"eval_samples_per_second": 19.83,
"eval_steps_per_second": 2.538,
"step": 46
},
{
"epoch": 0.752,
"grad_norm": 12.651082992553711,
"learning_rate": 3.155737704918033e-05,
"loss": 0.7909,
"step": 47
},
{
"epoch": 0.752,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019047619047619046,
"eval_f1_m": 0.35264647650602043,
"eval_loss": 0.7224531173706055,
"eval_runtime": 12.5046,
"eval_samples_per_second": 19.993,
"eval_steps_per_second": 2.559,
"step": 47
},
{
"epoch": 0.768,
"grad_norm": 16.77728843688965,
"learning_rate": 3.114754098360656e-05,
"loss": 0.7277,
"step": 48
},
{
"epoch": 0.768,
"eval_exact_match": 0.0,
"eval_f1_a": 0.037383177570093455,
"eval_f1_m": 0.3553675649413946,
"eval_loss": 0.7222617268562317,
"eval_runtime": 12.5048,
"eval_samples_per_second": 19.992,
"eval_steps_per_second": 2.559,
"step": 48
},
{
"epoch": 0.784,
"grad_norm": 9.990218162536621,
"learning_rate": 3.073770491803279e-05,
"loss": 0.6484,
"step": 49
},
{
"epoch": 0.784,
"eval_exact_match": 0.0,
"eval_f1_a": 0.018867924528301886,
"eval_f1_m": 0.35094579623391153,
"eval_loss": 0.7233593463897705,
"eval_runtime": 12.3979,
"eval_samples_per_second": 20.165,
"eval_steps_per_second": 2.581,
"step": 49
},
{
"epoch": 0.8,
"grad_norm": 19.805315017700195,
"learning_rate": 3.0327868852459017e-05,
"loss": 0.7366,
"step": 50
},
{
"epoch": 0.8,
"eval_exact_match": 0.0,
"eval_f1_a": 0.05454545454545454,
"eval_f1_m": 0.35851045213290117,
"eval_loss": 0.7192890644073486,
"eval_runtime": 12.394,
"eval_samples_per_second": 20.171,
"eval_steps_per_second": 2.582,
"step": 50
},
{
"epoch": 0.816,
"grad_norm": 6.260676860809326,
"learning_rate": 2.9918032786885248e-05,
"loss": 0.6544,
"step": 51
},
{
"epoch": 0.816,
"eval_exact_match": 0.0,
"eval_f1_a": 0.07207207207207207,
"eval_f1_m": 0.3631490108530924,
"eval_loss": 0.7178593873977661,
"eval_runtime": 12.399,
"eval_samples_per_second": 20.163,
"eval_steps_per_second": 2.581,
"step": 51
},
{
"epoch": 0.832,
"grad_norm": 6.211192607879639,
"learning_rate": 2.9508196721311478e-05,
"loss": 0.5797,
"step": 52
},
{
"epoch": 0.832,
"eval_exact_match": 0.0,
"eval_f1_a": 0.07142857142857144,
"eval_f1_m": 0.36225194873154054,
"eval_loss": 0.7176406383514404,
"eval_runtime": 12.2942,
"eval_samples_per_second": 20.335,
"eval_steps_per_second": 2.603,
"step": 52
},
{
"epoch": 0.848,
"grad_norm": 19.603347778320312,
"learning_rate": 2.9098360655737705e-05,
"loss": 0.613,
"step": 53
},
{
"epoch": 0.848,
"eval_exact_match": 0.0,
"eval_f1_a": 0.08928571428571429,
"eval_f1_m": 0.36680547343812653,
"eval_loss": 0.719406247138977,
"eval_runtime": 12.2965,
"eval_samples_per_second": 20.331,
"eval_steps_per_second": 2.602,
"step": 53
},
{
"epoch": 0.864,
"grad_norm": 7.0200042724609375,
"learning_rate": 2.8688524590163935e-05,
"loss": 0.7246,
"step": 54
},
{
"epoch": 0.864,
"eval_exact_match": 0.0,
"eval_f1_a": 0.03773584905660377,
"eval_f1_m": 0.35666626624009584,
"eval_loss": 0.7230820059776306,
"eval_runtime": 12.3942,
"eval_samples_per_second": 20.171,
"eval_steps_per_second": 2.582,
"step": 54
},
{
"epoch": 0.88,
"grad_norm": 7.6928791999816895,
"learning_rate": 2.8278688524590162e-05,
"loss": 0.723,
"step": 55
},
{
"epoch": 0.88,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.7283515334129333,
"eval_runtime": 12.4051,
"eval_samples_per_second": 20.153,
"eval_steps_per_second": 2.58,
"step": 55
},
{
"epoch": 0.896,
"grad_norm": 9.09825611114502,
"learning_rate": 2.7868852459016392e-05,
"loss": 0.526,
"step": 56
},
{
"epoch": 0.896,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.736648440361023,
"eval_runtime": 12.5063,
"eval_samples_per_second": 19.99,
"eval_steps_per_second": 2.559,
"step": 56
},
{
"epoch": 0.912,
"grad_norm": 12.663421630859375,
"learning_rate": 2.7459016393442626e-05,
"loss": 0.7038,
"step": 57
},
{
"epoch": 0.912,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.3489535707722984,
"eval_loss": 0.7481816411018372,
"eval_runtime": 12.4036,
"eval_samples_per_second": 20.155,
"eval_steps_per_second": 2.58,
"step": 57
},
{
"epoch": 0.928,
"grad_norm": 15.922131538391113,
"learning_rate": 2.7049180327868856e-05,
"loss": 0.7805,
"step": 58
},
{
"epoch": 0.928,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7556718587875366,
"eval_runtime": 12.4053,
"eval_samples_per_second": 20.153,
"eval_steps_per_second": 2.58,
"step": 58
},
{
"epoch": 0.944,
"grad_norm": 15.116573333740234,
"learning_rate": 2.6639344262295087e-05,
"loss": 0.7645,
"step": 59
},
{
"epoch": 0.944,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7615898251533508,
"eval_runtime": 12.301,
"eval_samples_per_second": 20.324,
"eval_steps_per_second": 2.601,
"step": 59
},
{
"epoch": 0.96,
"grad_norm": 16.11196517944336,
"learning_rate": 2.6229508196721314e-05,
"loss": 0.802,
"step": 60
},
{
"epoch": 0.96,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.762087881565094,
"eval_runtime": 12.4045,
"eval_samples_per_second": 20.154,
"eval_steps_per_second": 2.58,
"step": 60
},
{
"epoch": 0.976,
"grad_norm": 11.784616470336914,
"learning_rate": 2.5819672131147544e-05,
"loss": 0.7266,
"step": 61
},
{
"epoch": 0.976,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7605761885643005,
"eval_runtime": 12.5067,
"eval_samples_per_second": 19.989,
"eval_steps_per_second": 2.559,
"step": 61
},
{
"epoch": 0.992,
"grad_norm": 5.855625152587891,
"learning_rate": 2.540983606557377e-05,
"loss": 0.6895,
"step": 62
},
{
"epoch": 0.992,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7616230249404907,
"eval_runtime": 12.4021,
"eval_samples_per_second": 20.158,
"eval_steps_per_second": 2.58,
"step": 62
},
{
"epoch": 1.008,
"grad_norm": 12.561187744140625,
"learning_rate": 2.5e-05,
"loss": 0.8057,
"step": 63
},
{
"epoch": 1.008,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.35077572820670067,
"eval_loss": 0.7573671936988831,
"eval_runtime": 12.5096,
"eval_samples_per_second": 19.985,
"eval_steps_per_second": 2.558,
"step": 63
},
{
"epoch": 1.024,
"grad_norm": 10.385027885437012,
"learning_rate": 2.459016393442623e-05,
"loss": 0.7454,
"step": 64
},
{
"epoch": 1.024,
"eval_exact_match": 0.0,
"eval_f1_a": 0.0,
"eval_f1_m": 0.3489535707722984,
"eval_loss": 0.7491288781166077,
"eval_runtime": 12.4029,
"eval_samples_per_second": 20.157,
"eval_steps_per_second": 2.58,
"step": 64
},
{
"epoch": 1.04,
"grad_norm": 15.67496395111084,
"learning_rate": 2.418032786885246e-05,
"loss": 0.877,
"step": 65
},
{
"epoch": 1.04,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019417475728155338,
"eval_f1_m": 0.3550274288869728,
"eval_loss": 0.7429726719856262,
"eval_runtime": 12.4019,
"eval_samples_per_second": 20.158,
"eval_steps_per_second": 2.58,
"step": 65
},
{
"epoch": 1.056,
"grad_norm": 14.67371654510498,
"learning_rate": 2.377049180327869e-05,
"loss": 0.7684,
"step": 66
},
{
"epoch": 1.056,
"eval_exact_match": 0.0,
"eval_f1_a": 0.019230769230769232,
"eval_f1_m": 0.35394517780472173,
"eval_loss": 0.7341777086257935,
"eval_runtime": 12.3064,
"eval_samples_per_second": 20.315,
"eval_steps_per_second": 2.6,
"step": 66
},
{
"epoch": 1.072,
"grad_norm": 11.17091178894043,
"learning_rate": 2.336065573770492e-05,
"loss": 0.6712,
"step": 67
},
{
"epoch": 1.072,
"eval_exact_match": 0.0,
"eval_f1_a": 0.03669724770642201,
"eval_f1_m": 0.34935849464660995,
"eval_loss": 0.724734365940094,
"eval_runtime": 12.4001,
"eval_samples_per_second": 20.161,
"eval_steps_per_second": 2.581,
"step": 67
},
{
"epoch": 1.088,
"grad_norm": 7.933443546295166,
"learning_rate": 2.295081967213115e-05,
"loss": 0.545,
"step": 68
},
{
"epoch": 1.088,
"eval_exact_match": 0.0,
"eval_f1_a": 0.1,
"eval_f1_m": 0.35936518130395684,
"eval_loss": 0.7180312275886536,
"eval_runtime": 12.6064,
"eval_samples_per_second": 19.831,
"eval_steps_per_second": 2.538,
"step": 68
},
{
"epoch": 1.104,
"grad_norm": 6.20879602432251,
"learning_rate": 2.254098360655738e-05,
"loss": 0.5563,
"step": 69
},
{
"epoch": 1.104,
"eval_exact_match": 0.0,
"eval_f1_a": 0.15503875968992248,
"eval_f1_m": 0.3728975672853224,
"eval_loss": 0.7145351767539978,
"eval_runtime": 12.3015,
"eval_samples_per_second": 20.323,
"eval_steps_per_second": 2.601,
"step": 69
},
{
"epoch": 1.12,
"grad_norm": 5.216189861297607,
"learning_rate": 2.2131147540983607e-05,
"loss": 0.5797,
"step": 70
},
{
"epoch": 1.12,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2222222222222222,
"eval_f1_m": 0.3975035168912719,
"eval_loss": 0.7122148275375366,
"eval_runtime": 12.2962,
"eval_samples_per_second": 20.331,
"eval_steps_per_second": 2.602,
"step": 70
},
{
"epoch": 1.1360000000000001,
"grad_norm": 4.51043701171875,
"learning_rate": 2.1721311475409837e-05,
"loss": 0.7785,
"step": 71
},
{
"epoch": 1.1360000000000001,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2571428571428571,
"eval_f1_m": 0.4030671709243137,
"eval_loss": 0.7087773680686951,
"eval_runtime": 12.2926,
"eval_samples_per_second": 20.337,
"eval_steps_per_second": 2.603,
"step": 71
},
{
"epoch": 1.152,
"grad_norm": 9.325611114501953,
"learning_rate": 2.1311475409836064e-05,
"loss": 0.757,
"step": 72
},
{
"epoch": 1.152,
"eval_exact_match": 0.0,
"eval_f1_a": 0.3013698630136986,
"eval_f1_m": 0.4158932903830863,
"eval_loss": 0.7072968482971191,
"eval_runtime": 12.3042,
"eval_samples_per_second": 20.318,
"eval_steps_per_second": 2.601,
"step": 72
},
{
"epoch": 1.168,
"grad_norm": 15.549028396606445,
"learning_rate": 2.0901639344262298e-05,
"loss": 0.85,
"step": 73
},
{
"epoch": 1.168,
"eval_exact_match": 0.0,
"eval_f1_a": 0.3013698630136986,
"eval_f1_m": 0.4126730412444698,
"eval_loss": 0.7068906426429749,
"eval_runtime": 12.3976,
"eval_samples_per_second": 20.165,
"eval_steps_per_second": 2.581,
"step": 73
},
{
"epoch": 1.184,
"grad_norm": 7.413804531097412,
"learning_rate": 2.0491803278688525e-05,
"loss": 0.6694,
"step": 74
},
{
"epoch": 1.184,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.32258064516129026,
"eval_f1_m": 0.41783029895274804,
"eval_loss": 0.7051249742507935,
"eval_runtime": 12.3988,
"eval_samples_per_second": 20.163,
"eval_steps_per_second": 2.581,
"step": 74
},
{
"epoch": 1.2,
"grad_norm": 13.059216499328613,
"learning_rate": 2.0081967213114755e-05,
"loss": 0.7654,
"step": 75
},
{
"epoch": 1.2,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.3658536585365854,
"eval_f1_m": 0.4367546285913633,
"eval_loss": 0.7052109241485596,
"eval_runtime": 12.3021,
"eval_samples_per_second": 20.322,
"eval_steps_per_second": 2.601,
"step": 75
},
{
"epoch": 1.216,
"grad_norm": 18.173431396484375,
"learning_rate": 1.9672131147540985e-05,
"loss": 0.7874,
"step": 76
},
{
"epoch": 1.216,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.393063583815029,
"eval_f1_m": 0.4441113308460247,
"eval_loss": 0.7044414281845093,
"eval_runtime": 12.3005,
"eval_samples_per_second": 20.324,
"eval_steps_per_second": 2.602,
"step": 76
},
{
"epoch": 1.232,
"grad_norm": 5.066444396972656,
"learning_rate": 1.9262295081967212e-05,
"loss": 0.585,
"step": 77
},
{
"epoch": 1.232,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.4067796610169491,
"eval_f1_m": 0.44523701921661113,
"eval_loss": 0.7045234441757202,
"eval_runtime": 12.4024,
"eval_samples_per_second": 20.157,
"eval_steps_per_second": 2.58,
"step": 77
},
{
"epoch": 1.248,
"grad_norm": 15.81064224243164,
"learning_rate": 1.8852459016393442e-05,
"loss": 0.7125,
"step": 78
},
{
"epoch": 1.248,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.4347826086956522,
"eval_f1_m": 0.45324236421175196,
"eval_loss": 0.7049570083618164,
"eval_runtime": 12.2997,
"eval_samples_per_second": 20.326,
"eval_steps_per_second": 2.602,
"step": 78
},
{
"epoch": 1.264,
"grad_norm": 15.786576271057129,
"learning_rate": 1.8442622950819673e-05,
"loss": 0.674,
"step": 79
},
{
"epoch": 1.264,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.45263157894736844,
"eval_f1_m": 0.45977191742497864,
"eval_loss": 0.7056093811988831,
"eval_runtime": 12.297,
"eval_samples_per_second": 20.33,
"eval_steps_per_second": 2.602,
"step": 79
},
{
"epoch": 1.28,
"grad_norm": 20.205602645874023,
"learning_rate": 1.8032786885245903e-05,
"loss": 0.7886,
"step": 80
},
{
"epoch": 1.28,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.450261780104712,
"eval_f1_m": 0.45734237417910883,
"eval_loss": 0.706125020980835,
"eval_runtime": 12.2957,
"eval_samples_per_second": 20.332,
"eval_steps_per_second": 2.603,
"step": 80
},
{
"epoch": 1.296,
"grad_norm": 7.352344036102295,
"learning_rate": 1.7622950819672133e-05,
"loss": 0.521,
"step": 81
},
{
"epoch": 1.296,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.44329896907216493,
"eval_f1_m": 0.44956970467174556,
"eval_loss": 0.7077500224113464,
"eval_runtime": 12.4027,
"eval_samples_per_second": 20.157,
"eval_steps_per_second": 2.58,
"step": 81
},
{
"epoch": 1.312,
"grad_norm": 9.654190063476562,
"learning_rate": 1.721311475409836e-05,
"loss": 0.7754,
"step": 82
},
{
"epoch": 1.312,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.45226130653266333,
"eval_f1_m": 0.45042921930677027,
"eval_loss": 0.7080312371253967,
"eval_runtime": 12.2984,
"eval_samples_per_second": 20.328,
"eval_steps_per_second": 2.602,
"step": 82
},
{
"epoch": 1.328,
"grad_norm": 19.156660079956055,
"learning_rate": 1.680327868852459e-05,
"loss": 0.7493,
"step": 83
},
{
"epoch": 1.328,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4577114427860697,
"eval_f1_m": 0.4507745089377742,
"eval_loss": 0.7095780968666077,
"eval_runtime": 12.2981,
"eval_samples_per_second": 20.328,
"eval_steps_per_second": 2.602,
"step": 83
},
{
"epoch": 1.3439999999999999,
"grad_norm": 4.014058589935303,
"learning_rate": 1.6393442622950818e-05,
"loss": 0.7076,
"step": 84
},
{
"epoch": 1.3439999999999999,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.45999999999999996,
"eval_f1_m": 0.45553641369967895,
"eval_loss": 0.7096328139305115,
"eval_runtime": 12.298,
"eval_samples_per_second": 20.329,
"eval_steps_per_second": 2.602,
"step": 84
},
{
"epoch": 1.3599999999999999,
"grad_norm": 12.296324729919434,
"learning_rate": 1.598360655737705e-05,
"loss": 0.6649,
"step": 85
},
{
"epoch": 1.3599999999999999,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4577114427860697,
"eval_f1_m": 0.4507745089377742,
"eval_loss": 0.7094843983650208,
"eval_runtime": 12.2932,
"eval_samples_per_second": 20.337,
"eval_steps_per_second": 2.603,
"step": 85
},
{
"epoch": 1.376,
"grad_norm": 8.357748985290527,
"learning_rate": 1.557377049180328e-05,
"loss": 0.6199,
"step": 86
},
{
"epoch": 1.376,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.45544554455445546,
"eval_f1_m": 0.44890376063845455,
"eval_loss": 0.7101484537124634,
"eval_runtime": 12.2925,
"eval_samples_per_second": 20.338,
"eval_steps_per_second": 2.603,
"step": 86
},
{
"epoch": 1.392,
"grad_norm": 5.569490909576416,
"learning_rate": 1.5163934426229509e-05,
"loss": 0.7666,
"step": 87
},
{
"epoch": 1.392,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4577114427860697,
"eval_f1_m": 0.4507745089377742,
"eval_loss": 0.7110859155654907,
"eval_runtime": 12.3978,
"eval_samples_per_second": 20.165,
"eval_steps_per_second": 2.581,
"step": 87
},
{
"epoch": 1.408,
"grad_norm": 20.458703994750977,
"learning_rate": 1.4754098360655739e-05,
"loss": 0.7505,
"step": 88
},
{
"epoch": 1.408,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4577114427860697,
"eval_f1_m": 0.4507745089377742,
"eval_loss": 0.7112030982971191,
"eval_runtime": 12.4088,
"eval_samples_per_second": 20.147,
"eval_steps_per_second": 2.579,
"step": 88
},
{
"epoch": 1.424,
"grad_norm": 17.410215377807617,
"learning_rate": 1.4344262295081968e-05,
"loss": 0.7426,
"step": 89
},
{
"epoch": 1.424,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.45,
"eval_f1_m": 0.4473113054745707,
"eval_loss": 0.7114999890327454,
"eval_runtime": 12.4068,
"eval_samples_per_second": 20.15,
"eval_steps_per_second": 2.579,
"step": 89
},
{
"epoch": 1.44,
"grad_norm": 12.000615119934082,
"learning_rate": 1.3934426229508196e-05,
"loss": 0.8439,
"step": 90
},
{
"epoch": 1.44,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.45,
"eval_f1_m": 0.4473113054745707,
"eval_loss": 0.7099843621253967,
"eval_runtime": 12.3004,
"eval_samples_per_second": 20.325,
"eval_steps_per_second": 2.602,
"step": 90
},
{
"epoch": 1.456,
"grad_norm": 9.254725456237793,
"learning_rate": 1.3524590163934428e-05,
"loss": 0.7011,
"step": 91
},
{
"epoch": 1.456,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.44102564102564107,
"eval_f1_m": 0.44650038170446327,
"eval_loss": 0.7092187404632568,
"eval_runtime": 12.6142,
"eval_samples_per_second": 19.819,
"eval_steps_per_second": 2.537,
"step": 91
},
{
"epoch": 1.472,
"grad_norm": 9.848048210144043,
"learning_rate": 1.3114754098360657e-05,
"loss": 0.689,
"step": 92
},
{
"epoch": 1.472,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.45263157894736844,
"eval_f1_m": 0.4581459923296658,
"eval_loss": 0.7082812786102295,
"eval_runtime": 12.4011,
"eval_samples_per_second": 20.159,
"eval_steps_per_second": 2.58,
"step": 92
},
{
"epoch": 1.488,
"grad_norm": 17.459680557250977,
"learning_rate": 1.2704918032786885e-05,
"loss": 0.8424,
"step": 93
},
{
"epoch": 1.488,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.450261780104712,
"eval_f1_m": 0.45620235773296997,
"eval_loss": 0.7073437571525574,
"eval_runtime": 12.4025,
"eval_samples_per_second": 20.157,
"eval_steps_per_second": 2.58,
"step": 93
},
{
"epoch": 1.504,
"grad_norm": 7.91733980178833,
"learning_rate": 1.2295081967213116e-05,
"loss": 0.6224,
"step": 94
},
{
"epoch": 1.504,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4468085106382979,
"eval_f1_m": 0.4566639709496852,
"eval_loss": 0.7057031393051147,
"eval_runtime": 12.4036,
"eval_samples_per_second": 20.155,
"eval_steps_per_second": 2.58,
"step": 94
},
{
"epoch": 1.52,
"grad_norm": 17.14729881286621,
"learning_rate": 1.1885245901639344e-05,
"loss": 0.5743,
"step": 95
},
{
"epoch": 1.52,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.4491978609625668,
"eval_f1_m": 0.4579678258249688,
"eval_loss": 0.7053046822547913,
"eval_runtime": 12.4028,
"eval_samples_per_second": 20.157,
"eval_steps_per_second": 2.58,
"step": 95
},
{
"epoch": 1.536,
"grad_norm": 10.893065452575684,
"learning_rate": 1.1475409836065575e-05,
"loss": 0.7041,
"step": 96
},
{
"epoch": 1.536,
"eval_exact_match": 0.04081632653061224,
"eval_f1_a": 0.44324324324324327,
"eval_f1_m": 0.45810145070349156,
"eval_loss": 0.7051210999488831,
"eval_runtime": 12.3907,
"eval_samples_per_second": 20.176,
"eval_steps_per_second": 2.583,
"step": 96
},
{
"epoch": 1.552,
"grad_norm": 9.716265678405762,
"learning_rate": 1.1065573770491803e-05,
"loss": 0.6443,
"step": 97
},
{
"epoch": 1.552,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.4262295081967213,
"eval_f1_m": 0.4496709356403233,
"eval_loss": 0.7061210870742798,
"eval_runtime": 12.2983,
"eval_samples_per_second": 20.328,
"eval_steps_per_second": 2.602,
"step": 97
},
{
"epoch": 1.568,
"grad_norm": 17.185867309570312,
"learning_rate": 1.0655737704918032e-05,
"loss": 0.6588,
"step": 98
},
{
"epoch": 1.568,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.430939226519337,
"eval_f1_m": 0.4559391572146673,
"eval_loss": 0.704464852809906,
"eval_runtime": 12.4036,
"eval_samples_per_second": 20.155,
"eval_steps_per_second": 2.58,
"step": 98
},
{
"epoch": 1.584,
"grad_norm": 9.352198600769043,
"learning_rate": 1.0245901639344262e-05,
"loss": 0.6669,
"step": 99
},
{
"epoch": 1.584,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.41860465116279066,
"eval_f1_m": 0.45876048721186774,
"eval_loss": 0.7052500247955322,
"eval_runtime": 12.2869,
"eval_samples_per_second": 20.347,
"eval_steps_per_second": 2.604,
"step": 99
},
{
"epoch": 1.6,
"grad_norm": 10.983183860778809,
"learning_rate": 9.836065573770493e-06,
"loss": 0.667,
"step": 100
},
{
"epoch": 1.6,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.3976608187134503,
"eval_f1_m": 0.44378116214850916,
"eval_loss": 0.7053242325782776,
"eval_runtime": 12.292,
"eval_samples_per_second": 20.338,
"eval_steps_per_second": 2.603,
"step": 100
},
{
"epoch": 1.616,
"grad_norm": 8.352677345275879,
"learning_rate": 9.426229508196721e-06,
"loss": 0.6028,
"step": 101
},
{
"epoch": 1.616,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.3780487804878049,
"eval_f1_m": 0.440360093421318,
"eval_loss": 0.7049765586853027,
"eval_runtime": 12.3969,
"eval_samples_per_second": 20.166,
"eval_steps_per_second": 2.581,
"step": 101
},
{
"epoch": 1.6320000000000001,
"grad_norm": 18.418481826782227,
"learning_rate": 9.016393442622952e-06,
"loss": 0.7084,
"step": 102
},
{
"epoch": 1.6320000000000001,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.3827160493827161,
"eval_f1_m": 0.4446715982430268,
"eval_loss": 0.7034218907356262,
"eval_runtime": 12.3991,
"eval_samples_per_second": 20.163,
"eval_steps_per_second": 2.581,
"step": 102
},
{
"epoch": 1.6480000000000001,
"grad_norm": 10.633659362792969,
"learning_rate": 8.60655737704918e-06,
"loss": 0.6221,
"step": 103
},
{
"epoch": 1.6480000000000001,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.33766233766233766,
"eval_f1_m": 0.42304736080246286,
"eval_loss": 0.7042617201805115,
"eval_runtime": 12.2954,
"eval_samples_per_second": 20.333,
"eval_steps_per_second": 2.603,
"step": 103
},
{
"epoch": 1.6640000000000001,
"grad_norm": 12.377849578857422,
"learning_rate": 8.196721311475409e-06,
"loss": 0.5413,
"step": 104
},
{
"epoch": 1.6640000000000001,
"eval_exact_match": 0.02040816326530612,
"eval_f1_a": 0.3289473684210526,
"eval_f1_m": 0.4253797223184978,
"eval_loss": 0.7033711075782776,
"eval_runtime": 12.3003,
"eval_samples_per_second": 20.325,
"eval_steps_per_second": 2.602,
"step": 104
},
{
"epoch": 1.6800000000000002,
"grad_norm": 12.551422119140625,
"learning_rate": 7.78688524590164e-06,
"loss": 0.682,
"step": 105
},
{
"epoch": 1.6800000000000002,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2953020134228188,
"eval_f1_m": 0.4078515022392573,
"eval_loss": 0.7049804925918579,
"eval_runtime": 12.495,
"eval_samples_per_second": 20.008,
"eval_steps_per_second": 2.561,
"step": 105
},
{
"epoch": 1.696,
"grad_norm": 8.237449645996094,
"learning_rate": 7.3770491803278695e-06,
"loss": 0.7448,
"step": 106
},
{
"epoch": 1.696,
"eval_exact_match": 0.0,
"eval_f1_a": 0.3013698630136986,
"eval_f1_m": 0.41549131140967865,
"eval_loss": 0.7063398361206055,
"eval_runtime": 12.4967,
"eval_samples_per_second": 20.005,
"eval_steps_per_second": 2.561,
"step": 106
},
{
"epoch": 1.712,
"grad_norm": 7.336461067199707,
"learning_rate": 6.967213114754098e-06,
"loss": 0.4493,
"step": 107
},
{
"epoch": 1.712,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2553191489361702,
"eval_f1_m": 0.4051889720257066,
"eval_loss": 0.7075039148330688,
"eval_runtime": 12.403,
"eval_samples_per_second": 20.156,
"eval_steps_per_second": 2.58,
"step": 107
},
{
"epoch": 1.728,
"grad_norm": 8.348127365112305,
"learning_rate": 6.557377049180328e-06,
"loss": 0.8977,
"step": 108
},
{
"epoch": 1.728,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2608695652173913,
"eval_f1_m": 0.4141215926930211,
"eval_loss": 0.7066875100135803,
"eval_runtime": 12.3958,
"eval_samples_per_second": 20.168,
"eval_steps_per_second": 2.582,
"step": 108
},
{
"epoch": 1.744,
"grad_norm": 6.762801170349121,
"learning_rate": 6.147540983606558e-06,
"loss": 0.6888,
"step": 109
},
{
"epoch": 1.744,
"eval_exact_match": 0.0,
"eval_f1_a": 0.23529411764705882,
"eval_f1_m": 0.4052051689806791,
"eval_loss": 0.7079609632492065,
"eval_runtime": 12.2957,
"eval_samples_per_second": 20.332,
"eval_steps_per_second": 2.603,
"step": 109
},
{
"epoch": 1.76,
"grad_norm": 7.54478645324707,
"learning_rate": 5.737704918032787e-06,
"loss": 0.7079,
"step": 110
},
{
"epoch": 1.76,
"eval_exact_match": 0.0,
"eval_f1_a": 0.196969696969697,
"eval_f1_m": 0.3930353659945496,
"eval_loss": 0.7083203196525574,
"eval_runtime": 12.4001,
"eval_samples_per_second": 20.161,
"eval_steps_per_second": 2.581,
"step": 110
},
{
"epoch": 1.776,
"grad_norm": 6.84658145904541,
"learning_rate": 5.327868852459016e-06,
"loss": 0.6766,
"step": 111
},
{
"epoch": 1.776,
"eval_exact_match": 0.0,
"eval_f1_a": 0.2105263157894737,
"eval_f1_m": 0.39676157176157173,
"eval_loss": 0.7091640830039978,
"eval_runtime": 12.4008,
"eval_samples_per_second": 20.16,
"eval_steps_per_second": 2.58,
"step": 111
},
{
"epoch": 1.792,
"grad_norm": 16.06954002380371,
"learning_rate": 4.918032786885246e-06,
"loss": 0.6298,
"step": 112
},
{
"epoch": 1.792,
"eval_exact_match": 0.0,
"eval_f1_a": 0.15748031496062992,
"eval_f1_m": 0.37861616841208684,
"eval_loss": 0.7095195055007935,
"eval_runtime": 12.4951,
"eval_samples_per_second": 20.008,
"eval_steps_per_second": 2.561,
"step": 112
},
{
"epoch": 1.808,
"grad_norm": 5.902178764343262,
"learning_rate": 4.508196721311476e-06,
"loss": 0.5864,
"step": 113
},
{
"epoch": 1.808,
"eval_exact_match": 0.0,
"eval_f1_a": 0.15873015873015875,
"eval_f1_m": 0.38075416646845217,
"eval_loss": 0.7111679911613464,
"eval_runtime": 12.4019,
"eval_samples_per_second": 20.158,
"eval_steps_per_second": 2.58,
"step": 113
},
{
"epoch": 1.8239999999999998,
"grad_norm": 17.344051361083984,
"learning_rate": 4.098360655737704e-06,
"loss": 0.7265,
"step": 114
},
{
"epoch": 1.8239999999999998,
"eval_exact_match": 0.0,
"eval_f1_a": 0.16,
"eval_f1_m": 0.38211471068613934,
"eval_loss": 0.7119726538658142,
"eval_runtime": 12.3958,
"eval_samples_per_second": 20.168,
"eval_steps_per_second": 2.582,
"step": 114
},
{
"epoch": 1.8399999999999999,
"grad_norm": 13.185153007507324,
"learning_rate": 3.6885245901639347e-06,
"loss": 0.5856,
"step": 115
},
{
"epoch": 1.8399999999999999,
"eval_exact_match": 0.0,
"eval_f1_a": 0.15873015873015875,
"eval_f1_m": 0.37997671262977384,
"eval_loss": 0.7116367220878601,
"eval_runtime": 12.3902,
"eval_samples_per_second": 20.177,
"eval_steps_per_second": 2.583,
"step": 115
},
{
"epoch": 1.8559999999999999,
"grad_norm": 6.701231956481934,
"learning_rate": 3.278688524590164e-06,
"loss": 0.718,
"step": 116
},
{
"epoch": 1.8559999999999999,
"eval_exact_match": 0.0,
"eval_f1_a": 0.09836065573770492,
"eval_f1_m": 0.361842380975034,
"eval_loss": 0.7136015892028809,
"eval_runtime": 12.294,
"eval_samples_per_second": 20.335,
"eval_steps_per_second": 2.603,
"step": 116
},
{
"epoch": 1.8719999999999999,
"grad_norm": 11.730875015258789,
"learning_rate": 2.8688524590163937e-06,
"loss": 0.7096,
"step": 117
},
{
"epoch": 1.8719999999999999,
"eval_exact_match": 0.0,
"eval_f1_a": 0.08333333333333334,
"eval_f1_m": 0.35354549079038877,
"eval_loss": 0.7125195264816284,
"eval_runtime": 12.3992,
"eval_samples_per_second": 20.163,
"eval_steps_per_second": 2.581,
"step": 117
},
{
"epoch": 1.888,
"grad_norm": 11.764548301696777,
"learning_rate": 2.459016393442623e-06,
"loss": 0.7648,
"step": 118
},
{
"epoch": 1.888,
"eval_exact_match": 0.0,
"eval_f1_a": 0.10169491525423728,
"eval_f1_m": 0.36885033900340025,
"eval_loss": 0.7145312428474426,
"eval_runtime": 12.2937,
"eval_samples_per_second": 20.336,
"eval_steps_per_second": 2.603,
"step": 118
},
{
"epoch": 1.904,
"grad_norm": 7.357093334197998,
"learning_rate": 2.049180327868852e-06,
"loss": 0.66,
"step": 119
},
{
"epoch": 1.904,
"eval_exact_match": 0.0,
"eval_f1_a": 0.08403361344537816,
"eval_f1_m": 0.3581008843763946,
"eval_loss": 0.7142617106437683,
"eval_runtime": 12.2932,
"eval_samples_per_second": 20.336,
"eval_steps_per_second": 2.603,
"step": 119
},
{
"epoch": 1.92,
"grad_norm": 12.222306251525879,
"learning_rate": 1.639344262295082e-06,
"loss": 0.7134,
"step": 120
},
{
"epoch": 1.92,
"eval_exact_match": 0.0,
"eval_f1_a": 0.08620689655172413,
"eval_f1_m": 0.3664693866224478,
"eval_loss": 0.7143398523330688,
"eval_runtime": 12.4037,
"eval_samples_per_second": 20.155,
"eval_steps_per_second": 2.58,
"step": 120
},
{
"epoch": 1.936,
"grad_norm": 11.94378662109375,
"learning_rate": 1.2295081967213116e-06,
"loss": 0.6269,
"step": 121
},
{
"epoch": 1.936,
"eval_exact_match": 0.0,
"eval_f1_a": 0.06837606837606838,
"eval_f1_m": 0.35452724712928796,
"eval_loss": 0.7161992192268372,
"eval_runtime": 12.2894,
"eval_samples_per_second": 20.343,
"eval_steps_per_second": 2.604,
"step": 121
},
{
"epoch": 1.952,
"grad_norm": 10.898783683776855,
"learning_rate": 8.19672131147541e-07,
"loss": 0.6692,
"step": 122
},
{
"epoch": 1.952,
"eval_exact_match": 0.0,
"eval_f1_a": 0.06779661016949154,
"eval_f1_m": 0.35377437355228475,
"eval_loss": 0.7155507802963257,
"eval_runtime": 12.2897,
"eval_samples_per_second": 20.342,
"eval_steps_per_second": 2.604,
"step": 122
},
{
"epoch": 1.968,
"grad_norm": 11.34163761138916,
"learning_rate": 4.098360655737705e-07,
"loss": 0.7837,
"step": 123
},
{
"epoch": 1.968,
"eval_exact_match": 0.0,
"eval_f1_a": 0.06837606837606838,
"eval_f1_m": 0.3542995836363184,
"eval_loss": 0.7164883017539978,
"eval_runtime": 12.3974,
"eval_samples_per_second": 20.166,
"eval_steps_per_second": 2.581,
"step": 123
},
{
"epoch": 1.984,
"grad_norm": 9.480073928833008,
"learning_rate": 0.0,
"loss": 0.5815,
"step": 124
},
{
"epoch": 1.984,
"eval_exact_match": 0.0,
"eval_f1_a": 0.053097345132743355,
"eval_f1_m": 0.35416089179594584,
"eval_loss": 0.7167656421661377,
"eval_runtime": 12.2934,
"eval_samples_per_second": 20.336,
"eval_steps_per_second": 2.603,
"step": 124
},
{
"epoch": 1.984,
"step": 124,
"total_flos": 107566092058624.0,
"train_loss": 0.7248697588520665,
"train_runtime": 2001.1585,
"train_samples_per_second": 0.999,
"train_steps_per_second": 0.062
}
],
"logging_steps": 1,
"max_steps": 124,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 107566092058624.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}