|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 86.0, |
|
"global_step": 2752, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6175584724649537, |
|
"eval_f1": 0.0769764216366158, |
|
"eval_loss": 1.250605583190918, |
|
"eval_precision": 0.06442251886244922, |
|
"eval_recall": 0.09560723514211886, |
|
"eval_runtime": 3.5402, |
|
"eval_samples_per_second": 26.553, |
|
"eval_steps_per_second": 3.39, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6520655932770609, |
|
"eval_f1": 0.18170731707317075, |
|
"eval_loss": 1.0688396692276, |
|
"eval_precision": 0.14063237376120813, |
|
"eval_recall": 0.2566752799310939, |
|
"eval_runtime": 3.2469, |
|
"eval_samples_per_second": 28.95, |
|
"eval_steps_per_second": 3.696, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.733239132859852, |
|
"eval_f1": 0.26462128475551294, |
|
"eval_loss": 0.8916715383529663, |
|
"eval_precision": 0.21036585365853658, |
|
"eval_recall": 0.35658914728682173, |
|
"eval_runtime": 3.5217, |
|
"eval_samples_per_second": 26.692, |
|
"eval_steps_per_second": 3.407, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7502695868813446, |
|
"eval_f1": 0.31227821149751595, |
|
"eval_loss": 0.8893136382102966, |
|
"eval_precision": 0.2655401327700664, |
|
"eval_recall": 0.3789836347975883, |
|
"eval_runtime": 3.1549, |
|
"eval_samples_per_second": 29.795, |
|
"eval_steps_per_second": 3.804, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7515338563938572, |
|
"eval_f1": 0.3259361997226075, |
|
"eval_loss": 0.8177661299705505, |
|
"eval_precision": 0.2727800348229832, |
|
"eval_recall": 0.40482342807924204, |
|
"eval_runtime": 3.1577, |
|
"eval_samples_per_second": 29.768, |
|
"eval_steps_per_second": 3.8, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7685271260179228, |
|
"eval_f1": 0.3471241170534813, |
|
"eval_loss": 0.818898618221283, |
|
"eval_precision": 0.2847682119205298, |
|
"eval_recall": 0.4444444444444444, |
|
"eval_runtime": 3.5275, |
|
"eval_samples_per_second": 26.648, |
|
"eval_steps_per_second": 3.402, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7693451827613134, |
|
"eval_f1": 0.39416058394160586, |
|
"eval_loss": 0.8939387798309326, |
|
"eval_precision": 0.34198860037998735, |
|
"eval_recall": 0.46511627906976744, |
|
"eval_runtime": 4.3875, |
|
"eval_samples_per_second": 21.424, |
|
"eval_steps_per_second": 2.735, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7565537500464805, |
|
"eval_f1": 0.3823943661971831, |
|
"eval_loss": 0.8892739415168762, |
|
"eval_precision": 0.323406789755807, |
|
"eval_recall": 0.46770025839793283, |
|
"eval_runtime": 3.5968, |
|
"eval_samples_per_second": 26.135, |
|
"eval_steps_per_second": 3.336, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7693451827613134, |
|
"eval_f1": 0.41654247391952315, |
|
"eval_loss": 0.9633013010025024, |
|
"eval_precision": 0.3670387393302692, |
|
"eval_recall": 0.48148148148148145, |
|
"eval_runtime": 3.1245, |
|
"eval_samples_per_second": 30.085, |
|
"eval_steps_per_second": 3.841, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.765478005428922, |
|
"eval_f1": 0.3988900450919181, |
|
"eval_loss": 0.9256911873817444, |
|
"eval_precision": 0.3339140534262485, |
|
"eval_recall": 0.49526270456503013, |
|
"eval_runtime": 3.0684, |
|
"eval_samples_per_second": 30.635, |
|
"eval_steps_per_second": 3.911, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7651061614546536, |
|
"eval_f1": 0.4374537379718727, |
|
"eval_loss": 1.0472618341445923, |
|
"eval_precision": 0.3835171966255678, |
|
"eval_recall": 0.5090439276485789, |
|
"eval_runtime": 15.8515, |
|
"eval_samples_per_second": 5.93, |
|
"eval_steps_per_second": 0.757, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7513107500092961, |
|
"eval_f1": 0.4116819141449683, |
|
"eval_loss": 1.0901930332183838, |
|
"eval_precision": 0.34800713860797144, |
|
"eval_recall": 0.5038759689922481, |
|
"eval_runtime": 3.3377, |
|
"eval_samples_per_second": 28.163, |
|
"eval_steps_per_second": 3.595, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7735842040679731, |
|
"eval_f1": 0.4424198250728863, |
|
"eval_loss": 1.0164769887924194, |
|
"eval_precision": 0.38344914718888184, |
|
"eval_recall": 0.5228251507321274, |
|
"eval_runtime": 3.1559, |
|
"eval_samples_per_second": 29.785, |
|
"eval_steps_per_second": 3.802, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7737329416576805, |
|
"eval_f1": 0.46205860255447034, |
|
"eval_loss": 1.0768243074417114, |
|
"eval_precision": 0.4097268487674883, |
|
"eval_recall": 0.5297157622739018, |
|
"eval_runtime": 3.0915, |
|
"eval_samples_per_second": 30.406, |
|
"eval_steps_per_second": 3.882, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7817647715018778, |
|
"eval_f1": 0.4762264150943396, |
|
"eval_loss": 1.1149170398712158, |
|
"eval_precision": 0.4237743451981195, |
|
"eval_recall": 0.5434969853574505, |
|
"eval_runtime": 3.2505, |
|
"eval_samples_per_second": 28.919, |
|
"eval_steps_per_second": 3.692, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 1.6875e-05, |
|
"loss": 0.4087, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7796452608485479, |
|
"eval_f1": 0.4698749526335733, |
|
"eval_loss": 1.1298694610595703, |
|
"eval_precision": 0.41948579161028415, |
|
"eval_recall": 0.5340223944875108, |
|
"eval_runtime": 3.0471, |
|
"eval_samples_per_second": 30.849, |
|
"eval_steps_per_second": 3.938, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7883464098464285, |
|
"eval_f1": 0.4673040152963671, |
|
"eval_loss": 1.1149996519088745, |
|
"eval_precision": 0.4202200825309491, |
|
"eval_recall": 0.5262704565030146, |
|
"eval_runtime": 3.1689, |
|
"eval_samples_per_second": 29.664, |
|
"eval_steps_per_second": 3.787, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7801286580150969, |
|
"eval_f1": 0.45127436281859074, |
|
"eval_loss": 1.1859441995620728, |
|
"eval_precision": 0.39946914399469147, |
|
"eval_recall": 0.5185185185185185, |
|
"eval_runtime": 7.3417, |
|
"eval_samples_per_second": 12.803, |
|
"eval_steps_per_second": 1.634, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7609043245454208, |
|
"eval_f1": 0.4414210128495843, |
|
"eval_loss": 1.2266000509262085, |
|
"eval_precision": 0.39326599326599326, |
|
"eval_recall": 0.5030146425495263, |
|
"eval_runtime": 3.4668, |
|
"eval_samples_per_second": 27.114, |
|
"eval_steps_per_second": 3.461, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7837355445655003, |
|
"eval_f1": 0.4707198806415516, |
|
"eval_loss": 1.1946580410003662, |
|
"eval_precision": 0.4151315789473684, |
|
"eval_recall": 0.5434969853574505, |
|
"eval_runtime": 3.1596, |
|
"eval_samples_per_second": 29.75, |
|
"eval_steps_per_second": 3.798, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7890900977949652, |
|
"eval_f1": 0.4974698326196964, |
|
"eval_loss": 1.2461358308792114, |
|
"eval_precision": 0.4538352272727273, |
|
"eval_recall": 0.5503875968992248, |
|
"eval_runtime": 3.1747, |
|
"eval_samples_per_second": 29.609, |
|
"eval_steps_per_second": 3.78, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7804261331945116, |
|
"eval_f1": 0.4870506378044067, |
|
"eval_loss": 1.2504911422729492, |
|
"eval_precision": 0.4417952314165498, |
|
"eval_recall": 0.5426356589147286, |
|
"eval_runtime": 3.1491, |
|
"eval_samples_per_second": 29.849, |
|
"eval_steps_per_second": 3.811, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7844420481166102, |
|
"eval_f1": 0.4932249322493225, |
|
"eval_loss": 1.287372350692749, |
|
"eval_precision": 0.4479606188466948, |
|
"eval_recall": 0.5486649440137812, |
|
"eval_runtime": 3.7236, |
|
"eval_samples_per_second": 25.244, |
|
"eval_steps_per_second": 3.223, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7877886438850259, |
|
"eval_f1": 0.49941107184923444, |
|
"eval_loss": 1.277133584022522, |
|
"eval_precision": 0.4588744588744589, |
|
"eval_recall": 0.5478036175710594, |
|
"eval_runtime": 3.2052, |
|
"eval_samples_per_second": 29.328, |
|
"eval_steps_per_second": 3.744, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7892760197820994, |
|
"eval_f1": 0.4747201852566576, |
|
"eval_loss": 1.2719131708145142, |
|
"eval_precision": 0.43006993006993005, |
|
"eval_recall": 0.5297157622739018, |
|
"eval_runtime": 3.0524, |
|
"eval_samples_per_second": 30.795, |
|
"eval_steps_per_second": 3.931, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7896478637563679, |
|
"eval_f1": 0.49407265774378584, |
|
"eval_loss": 1.2584308385849, |
|
"eval_precision": 0.4442916093535076, |
|
"eval_recall": 0.5564168819982773, |
|
"eval_runtime": 3.7701, |
|
"eval_samples_per_second": 24.933, |
|
"eval_steps_per_second": 3.183, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7868590339493549, |
|
"eval_f1": 0.4920077972709551, |
|
"eval_loss": 1.3040637969970703, |
|
"eval_precision": 0.4494301994301994, |
|
"eval_recall": 0.5434969853574505, |
|
"eval_runtime": 3.4169, |
|
"eval_samples_per_second": 27.51, |
|
"eval_steps_per_second": 3.512, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7880861190644406, |
|
"eval_f1": 0.4916158536585366, |
|
"eval_loss": 1.3579275608062744, |
|
"eval_precision": 0.4408749145591251, |
|
"eval_recall": 0.5555555555555556, |
|
"eval_runtime": 3.4487, |
|
"eval_samples_per_second": 27.256, |
|
"eval_steps_per_second": 3.48, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7839958353474882, |
|
"eval_f1": 0.501557632398754, |
|
"eval_loss": 1.3749990463256836, |
|
"eval_precision": 0.4577114427860697, |
|
"eval_recall": 0.5546942291128337, |
|
"eval_runtime": 3.131, |
|
"eval_samples_per_second": 30.022, |
|
"eval_steps_per_second": 3.833, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7871936935261964, |
|
"eval_f1": 0.4903883876029816, |
|
"eval_loss": 1.3733103275299072, |
|
"eval_precision": 0.45028818443804036, |
|
"eval_recall": 0.5383290267011197, |
|
"eval_runtime": 3.4968, |
|
"eval_samples_per_second": 26.882, |
|
"eval_steps_per_second": 3.432, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7819135090915852, |
|
"eval_f1": 0.5099260412611912, |
|
"eval_loss": 1.395755410194397, |
|
"eval_precision": 0.46519886363636365, |
|
"eval_recall": 0.5641688199827735, |
|
"eval_runtime": 3.2027, |
|
"eval_samples_per_second": 29.351, |
|
"eval_steps_per_second": 3.747, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 1.375e-05, |
|
"loss": 0.0138, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7822481686684267, |
|
"eval_f1": 0.49709639953542384, |
|
"eval_loss": 1.4415804147720337, |
|
"eval_precision": 0.45147679324894513, |
|
"eval_recall": 0.5529715762273901, |
|
"eval_runtime": 3.081, |
|
"eval_samples_per_second": 30.51, |
|
"eval_steps_per_second": 3.895, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7877886438850259, |
|
"eval_f1": 0.49035187287173665, |
|
"eval_loss": 1.384263277053833, |
|
"eval_precision": 0.43724696356275305, |
|
"eval_recall": 0.5581395348837209, |
|
"eval_runtime": 3.7036, |
|
"eval_samples_per_second": 25.381, |
|
"eval_steps_per_second": 3.24, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7796080764511211, |
|
"eval_f1": 0.490521327014218, |
|
"eval_loss": 1.4162341356277466, |
|
"eval_precision": 0.45295404814004375, |
|
"eval_recall": 0.5348837209302325, |
|
"eval_runtime": 3.1016, |
|
"eval_samples_per_second": 30.307, |
|
"eval_steps_per_second": 3.869, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7906146580894656, |
|
"eval_f1": 0.5060240963855421, |
|
"eval_loss": 1.380549669265747, |
|
"eval_precision": 0.46104815864022664, |
|
"eval_recall": 0.5607235142118863, |
|
"eval_runtime": 3.5552, |
|
"eval_samples_per_second": 26.44, |
|
"eval_steps_per_second": 3.375, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7868590339493549, |
|
"eval_f1": 0.4811210587777345, |
|
"eval_loss": 1.4221726655960083, |
|
"eval_precision": 0.43892045454545453, |
|
"eval_recall": 0.5322997416020672, |
|
"eval_runtime": 3.1728, |
|
"eval_samples_per_second": 29.627, |
|
"eval_steps_per_second": 3.782, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7899453389357826, |
|
"eval_f1": 0.502147598594299, |
|
"eval_loss": 1.407759189605713, |
|
"eval_precision": 0.4592857142857143, |
|
"eval_recall": 0.553832902670112, |
|
"eval_runtime": 3.1234, |
|
"eval_samples_per_second": 30.096, |
|
"eval_steps_per_second": 3.842, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7908377644740266, |
|
"eval_f1": 0.4872389791183295, |
|
"eval_loss": 1.4327783584594727, |
|
"eval_precision": 0.4421052631578947, |
|
"eval_recall": 0.5426356589147286, |
|
"eval_runtime": 3.409, |
|
"eval_samples_per_second": 27.574, |
|
"eval_steps_per_second": 3.52, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7851113672702934, |
|
"eval_f1": 0.4913227921326649, |
|
"eval_loss": 1.4441440105438232, |
|
"eval_precision": 0.4448324022346369, |
|
"eval_recall": 0.5486649440137812, |
|
"eval_runtime": 3.2678, |
|
"eval_samples_per_second": 28.766, |
|
"eval_steps_per_second": 3.672, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7912839772431488, |
|
"eval_f1": 0.5036864571206829, |
|
"eval_loss": 1.4089561700820923, |
|
"eval_precision": 0.4583333333333333, |
|
"eval_recall": 0.5590008613264428, |
|
"eval_runtime": 3.1449, |
|
"eval_samples_per_second": 29.89, |
|
"eval_steps_per_second": 3.816, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7902428141151973, |
|
"eval_f1": 0.503858024691358, |
|
"eval_loss": 1.4232546091079712, |
|
"eval_precision": 0.45632424877707894, |
|
"eval_recall": 0.5624461670973299, |
|
"eval_runtime": 8.2173, |
|
"eval_samples_per_second": 11.439, |
|
"eval_steps_per_second": 1.46, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7909493176663073, |
|
"eval_f1": 0.49484536082474234, |
|
"eval_loss": 1.4232605695724487, |
|
"eval_precision": 0.4444444444444444, |
|
"eval_recall": 0.5581395348837209, |
|
"eval_runtime": 3.0349, |
|
"eval_samples_per_second": 30.973, |
|
"eval_steps_per_second": 3.954, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7923995091659539, |
|
"eval_f1": 0.5162324649298597, |
|
"eval_loss": 1.4146913290023804, |
|
"eval_precision": 0.4827586206896552, |
|
"eval_recall": 0.5546942291128337, |
|
"eval_runtime": 3.288, |
|
"eval_samples_per_second": 28.589, |
|
"eval_steps_per_second": 3.65, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7880117502695869, |
|
"eval_f1": 0.4955684007707129, |
|
"eval_loss": 1.4241943359375, |
|
"eval_precision": 0.44839609483960946, |
|
"eval_recall": 0.553832902670112, |
|
"eval_runtime": 6.1717, |
|
"eval_samples_per_second": 15.231, |
|
"eval_steps_per_second": 1.944, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7880861190644406, |
|
"eval_f1": 0.49018853405155827, |
|
"eval_loss": 1.421156644821167, |
|
"eval_precision": 0.4429763560500695, |
|
"eval_recall": 0.5486649440137812, |
|
"eval_runtime": 3.1226, |
|
"eval_samples_per_second": 30.103, |
|
"eval_steps_per_second": 3.843, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7936637786784665, |
|
"eval_f1": 0.5089463220675944, |
|
"eval_loss": 1.424131989479065, |
|
"eval_precision": 0.4726735598227474, |
|
"eval_recall": 0.5512489233419466, |
|
"eval_runtime": 3.3347, |
|
"eval_samples_per_second": 28.189, |
|
"eval_steps_per_second": 3.599, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"learning_rate": 1.0625e-05, |
|
"loss": 0.0034, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7858550552188301, |
|
"eval_f1": 0.5065398335315101, |
|
"eval_loss": 1.443221926689148, |
|
"eval_precision": 0.46916299559471364, |
|
"eval_recall": 0.5503875968992248, |
|
"eval_runtime": 3.0632, |
|
"eval_samples_per_second": 30.687, |
|
"eval_steps_per_second": 3.917, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.784739523296025, |
|
"eval_f1": 0.49553398058252424, |
|
"eval_loss": 1.4684823751449585, |
|
"eval_precision": 0.4512022630834512, |
|
"eval_recall": 0.549526270456503, |
|
"eval_runtime": 3.2701, |
|
"eval_samples_per_second": 28.745, |
|
"eval_steps_per_second": 3.67, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7949280481909791, |
|
"eval_f1": 0.5152113789016199, |
|
"eval_loss": 1.4661859273910522, |
|
"eval_precision": 0.4759124087591241, |
|
"eval_recall": 0.5615848406546081, |
|
"eval_runtime": 3.3592, |
|
"eval_samples_per_second": 27.983, |
|
"eval_steps_per_second": 3.572, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7894619417692337, |
|
"eval_f1": 0.506033476060724, |
|
"eval_loss": 1.5200412273406982, |
|
"eval_precision": 0.4616477272727273, |
|
"eval_recall": 0.5598621877691645, |
|
"eval_runtime": 3.1539, |
|
"eval_samples_per_second": 29.805, |
|
"eval_steps_per_second": 3.805, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7907262112817461, |
|
"eval_f1": 0.5102848101265823, |
|
"eval_loss": 1.49446702003479, |
|
"eval_precision": 0.4718361375274323, |
|
"eval_recall": 0.5555555555555556, |
|
"eval_runtime": 6.9973, |
|
"eval_samples_per_second": 13.434, |
|
"eval_steps_per_second": 1.715, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7916558212174172, |
|
"eval_f1": 0.5182511030886483, |
|
"eval_loss": 1.5040817260742188, |
|
"eval_precision": 0.484984984984985, |
|
"eval_recall": 0.5564168819982773, |
|
"eval_runtime": 8.0818, |
|
"eval_samples_per_second": 11.631, |
|
"eval_steps_per_second": 1.485, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7928829063325029, |
|
"eval_f1": 0.5159489633173845, |
|
"eval_loss": 1.5629594326019287, |
|
"eval_precision": 0.4803266518188567, |
|
"eval_recall": 0.5572782084409992, |
|
"eval_runtime": 3.423, |
|
"eval_samples_per_second": 27.462, |
|
"eval_steps_per_second": 3.506, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7840702041423419, |
|
"eval_f1": 0.5010117361392149, |
|
"eval_loss": 1.5059171915054321, |
|
"eval_precision": 0.4725190839694656, |
|
"eval_recall": 0.533161068044789, |
|
"eval_runtime": 3.1466, |
|
"eval_samples_per_second": 29.874, |
|
"eval_steps_per_second": 3.814, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.790763395679173, |
|
"eval_f1": 0.5078616352201258, |
|
"eval_loss": 1.5826448202133179, |
|
"eval_precision": 0.4671005061460593, |
|
"eval_recall": 0.5564168819982773, |
|
"eval_runtime": 3.1003, |
|
"eval_samples_per_second": 30.319, |
|
"eval_steps_per_second": 3.871, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.791246792845722, |
|
"eval_f1": 0.520757758968158, |
|
"eval_loss": 1.5211557149887085, |
|
"eval_precision": 0.4893939393939394, |
|
"eval_recall": 0.5564168819982773, |
|
"eval_runtime": 3.5151, |
|
"eval_samples_per_second": 26.742, |
|
"eval_steps_per_second": 3.414, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7894619417692337, |
|
"eval_f1": 0.5172550575168584, |
|
"eval_loss": 1.5194625854492188, |
|
"eval_precision": 0.47941176470588237, |
|
"eval_recall": 0.5615848406546081, |
|
"eval_runtime": 3.1721, |
|
"eval_samples_per_second": 29.634, |
|
"eval_steps_per_second": 3.783, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7864871899750865, |
|
"eval_f1": 0.5015600624024961, |
|
"eval_loss": 1.5568981170654297, |
|
"eval_precision": 0.45830363506771205, |
|
"eval_recall": 0.553832902670112, |
|
"eval_runtime": 3.0367, |
|
"eval_samples_per_second": 30.954, |
|
"eval_steps_per_second": 3.952, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.789610679358941, |
|
"eval_f1": 0.5186953062848051, |
|
"eval_loss": 1.5378462076187134, |
|
"eval_precision": 0.4818920916481892, |
|
"eval_recall": 0.5615848406546081, |
|
"eval_runtime": 3.0368, |
|
"eval_samples_per_second": 30.954, |
|
"eval_steps_per_second": 3.952, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7838842821552077, |
|
"eval_f1": 0.490257209664848, |
|
"eval_loss": 1.5430467128753662, |
|
"eval_precision": 0.44768683274021354, |
|
"eval_recall": 0.5417743324720069, |
|
"eval_runtime": 3.2599, |
|
"eval_samples_per_second": 28.835, |
|
"eval_steps_per_second": 3.681, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.7890529133975384, |
|
"eval_f1": 0.5021816739389131, |
|
"eval_loss": 1.5567139387130737, |
|
"eval_precision": 0.46544117647058825, |
|
"eval_recall": 0.5452196382428941, |
|
"eval_runtime": 3.4648, |
|
"eval_samples_per_second": 27.13, |
|
"eval_steps_per_second": 3.463, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.7909865020637341, |
|
"eval_f1": 0.5128824476650564, |
|
"eval_loss": 1.5516618490219116, |
|
"eval_precision": 0.48148148148148145, |
|
"eval_recall": 0.5486649440137812, |
|
"eval_runtime": 3.3346, |
|
"eval_samples_per_second": 28.19, |
|
"eval_steps_per_second": 3.599, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.7902056297177704, |
|
"eval_f1": 0.508203281312525, |
|
"eval_loss": 1.5522751808166504, |
|
"eval_precision": 0.47458893871449925, |
|
"eval_recall": 0.5469422911283376, |
|
"eval_runtime": 3.095, |
|
"eval_samples_per_second": 30.372, |
|
"eval_steps_per_second": 3.877, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.787044955936489, |
|
"eval_f1": 0.4970691676436107, |
|
"eval_loss": 1.5659950971603394, |
|
"eval_precision": 0.45493562231759654, |
|
"eval_recall": 0.5478036175710594, |
|
"eval_runtime": 3.7257, |
|
"eval_samples_per_second": 25.23, |
|
"eval_steps_per_second": 3.221, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.791246792845722, |
|
"eval_f1": 0.5023219814241486, |
|
"eval_loss": 1.5194514989852905, |
|
"eval_precision": 0.4560787069571328, |
|
"eval_recall": 0.5590008613264428, |
|
"eval_runtime": 3.1193, |
|
"eval_samples_per_second": 30.135, |
|
"eval_steps_per_second": 3.847, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.7909493176663073, |
|
"eval_f1": 0.504884720593982, |
|
"eval_loss": 1.5329976081848145, |
|
"eval_precision": 0.46208869814020026, |
|
"eval_recall": 0.5564168819982773, |
|
"eval_runtime": 3.2791, |
|
"eval_samples_per_second": 28.666, |
|
"eval_steps_per_second": 3.66, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.7897222325512215, |
|
"eval_f1": 0.5118296529968455, |
|
"eval_loss": 1.5393718481063843, |
|
"eval_precision": 0.472, |
|
"eval_recall": 0.5590008613264428, |
|
"eval_runtime": 3.3565, |
|
"eval_samples_per_second": 28.006, |
|
"eval_steps_per_second": 3.575, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7929572751273566, |
|
"eval_f1": 0.5202220459952419, |
|
"eval_loss": 1.5860378742218018, |
|
"eval_precision": 0.4819985304922851, |
|
"eval_recall": 0.5650301464254953, |
|
"eval_runtime": 3.0372, |
|
"eval_samples_per_second": 30.949, |
|
"eval_steps_per_second": 3.951, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.7916186368199903, |
|
"eval_f1": 0.5208667736757624, |
|
"eval_loss": 1.622610092163086, |
|
"eval_precision": 0.48760330578512395, |
|
"eval_recall": 0.5590008613264428, |
|
"eval_runtime": 3.1363, |
|
"eval_samples_per_second": 29.972, |
|
"eval_steps_per_second": 3.826, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.7935150410887591, |
|
"eval_f1": 0.5205158264947246, |
|
"eval_loss": 1.613077163696289, |
|
"eval_precision": 0.47639484978540775, |
|
"eval_recall": 0.5736434108527132, |
|
"eval_runtime": 3.4181, |
|
"eval_samples_per_second": 27.501, |
|
"eval_steps_per_second": 3.511, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.7962666864983453, |
|
"eval_f1": 0.5341365461847388, |
|
"eval_loss": 1.617754340171814, |
|
"eval_precision": 0.5003762227238525, |
|
"eval_recall": 0.5727820844099913, |
|
"eval_runtime": 3.9965, |
|
"eval_samples_per_second": 23.521, |
|
"eval_steps_per_second": 3.003, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7960063957163575, |
|
"eval_f1": 0.5309876049580169, |
|
"eval_loss": 1.6110599040985107, |
|
"eval_precision": 0.4955223880597015, |
|
"eval_recall": 0.5719207579672696, |
|
"eval_runtime": 3.0526, |
|
"eval_samples_per_second": 30.794, |
|
"eval_steps_per_second": 3.931, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.7905774736920388, |
|
"eval_f1": 0.5087025316455697, |
|
"eval_loss": 1.5602927207946777, |
|
"eval_precision": 0.4703730797366496, |
|
"eval_recall": 0.553832902670112, |
|
"eval_runtime": 3.378, |
|
"eval_samples_per_second": 27.827, |
|
"eval_steps_per_second": 3.552, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7928085375376492, |
|
"eval_f1": 0.5181962025316456, |
|
"eval_loss": 1.555016040802002, |
|
"eval_precision": 0.479151426481346, |
|
"eval_recall": 0.5641688199827735, |
|
"eval_runtime": 3.0403, |
|
"eval_samples_per_second": 30.918, |
|
"eval_steps_per_second": 3.947, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.7963782396906258, |
|
"eval_f1": 0.5212933753943217, |
|
"eval_loss": 1.5542960166931152, |
|
"eval_precision": 0.4807272727272727, |
|
"eval_recall": 0.5693367786391043, |
|
"eval_runtime": 3.1539, |
|
"eval_samples_per_second": 29.805, |
|
"eval_steps_per_second": 3.805, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7952998921652474, |
|
"eval_f1": 0.5285996055226825, |
|
"eval_loss": 1.560102939605713, |
|
"eval_precision": 0.487627365356623, |
|
"eval_recall": 0.5770887166236004, |
|
"eval_runtime": 3.3272, |
|
"eval_samples_per_second": 28.252, |
|
"eval_steps_per_second": 3.607, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.7926597999479419, |
|
"eval_f1": 0.5247999999999999, |
|
"eval_loss": 1.559518575668335, |
|
"eval_precision": 0.4899178491411501, |
|
"eval_recall": 0.5650301464254953, |
|
"eval_runtime": 3.1588, |
|
"eval_samples_per_second": 29.758, |
|
"eval_steps_per_second": 3.799, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.7940356226527349, |
|
"eval_f1": 0.533066132264529, |
|
"eval_loss": 1.5632045269012451, |
|
"eval_precision": 0.4985007496251874, |
|
"eval_recall": 0.5727820844099913, |
|
"eval_runtime": 3.5803, |
|
"eval_samples_per_second": 26.255, |
|
"eval_steps_per_second": 3.352, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 78.12, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.0011, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.7931431971144908, |
|
"eval_f1": 0.5266084193804608, |
|
"eval_loss": 1.5693398714065552, |
|
"eval_precision": 0.4885777450257922, |
|
"eval_recall": 0.5710594315245479, |
|
"eval_runtime": 3.2284, |
|
"eval_samples_per_second": 29.117, |
|
"eval_steps_per_second": 3.717, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7927341687427956, |
|
"eval_f1": 0.5263575108997226, |
|
"eval_loss": 1.5728816986083984, |
|
"eval_precision": 0.48751835535976507, |
|
"eval_recall": 0.5719207579672696, |
|
"eval_runtime": 3.0942, |
|
"eval_samples_per_second": 30.38, |
|
"eval_steps_per_second": 3.878, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.7929200907299297, |
|
"eval_f1": 0.5305313243457573, |
|
"eval_loss": 1.5773258209228516, |
|
"eval_precision": 0.49155033063923587, |
|
"eval_recall": 0.5762273901808785, |
|
"eval_runtime": 3.5248, |
|
"eval_samples_per_second": 26.668, |
|
"eval_steps_per_second": 3.404, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.7932919347041981, |
|
"eval_f1": 0.5318382058470164, |
|
"eval_loss": 1.579202651977539, |
|
"eval_precision": 0.49700598802395207, |
|
"eval_recall": 0.5719207579672696, |
|
"eval_runtime": 3.0984, |
|
"eval_samples_per_second": 30.338, |
|
"eval_steps_per_second": 3.873, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.793329119101625, |
|
"eval_f1": 0.5278780585639792, |
|
"eval_loss": 1.58578622341156, |
|
"eval_precision": 0.493993993993994, |
|
"eval_recall": 0.5667527993109388, |
|
"eval_runtime": 3.0353, |
|
"eval_samples_per_second": 30.969, |
|
"eval_steps_per_second": 3.954, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7925110623582344, |
|
"eval_f1": 0.528965241709948, |
|
"eval_loss": 1.584820032119751, |
|
"eval_precision": 0.4932935916542474, |
|
"eval_recall": 0.570198105081826, |
|
"eval_runtime": 3.3844, |
|
"eval_samples_per_second": 27.774, |
|
"eval_steps_per_second": 3.546, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.7922135871788197, |
|
"eval_f1": 0.5333866453418633, |
|
"eval_loss": 1.5820199251174927, |
|
"eval_precision": 0.49776119402985075, |
|
"eval_recall": 0.5745047372954349, |
|
"eval_runtime": 3.2337, |
|
"eval_samples_per_second": 29.069, |
|
"eval_steps_per_second": 3.711, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.7920648495891124, |
|
"eval_f1": 0.5324830609804703, |
|
"eval_loss": 1.5860569477081299, |
|
"eval_precision": 0.49554896142433236, |
|
"eval_recall": 0.5753660637381568, |
|
"eval_runtime": 3.1113, |
|
"eval_samples_per_second": 30.212, |
|
"eval_steps_per_second": 3.857, |
|
"step": 2752 |
|
} |
|
], |
|
"max_steps": 3200, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.8890572813041664e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|