|
{ |
|
"best_metric": 0.11773921549320221, |
|
"best_model_checkpoint": "model_training/deepseek_prover_base_no_err/checkpoints-random-09-07-09-02/checkpoint-450", |
|
"epoch": 3.655837563451777, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04060913705583756, |
|
"grad_norm": 117.39540100097656, |
|
"learning_rate": 1e-05, |
|
"loss": 15.7951, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08121827411167512, |
|
"grad_norm": 105.14662170410156, |
|
"learning_rate": 2e-05, |
|
"loss": 11.8462, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1218274111675127, |
|
"grad_norm": 43.610801696777344, |
|
"learning_rate": 3e-05, |
|
"loss": 3.3642, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.16243654822335024, |
|
"grad_norm": 0.4269501864910126, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1821, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 0.41744017601013184, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4639, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2436548223350254, |
|
"grad_norm": 0.45930907130241394, |
|
"learning_rate": 6e-05, |
|
"loss": 0.1685, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.28426395939086296, |
|
"grad_norm": 0.32693225145339966, |
|
"learning_rate": 7e-05, |
|
"loss": 0.1772, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3248730964467005, |
|
"grad_norm": 0.2242601364850998, |
|
"learning_rate": 8e-05, |
|
"loss": 0.1677, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.36548223350253806, |
|
"grad_norm": 2.6285548210144043, |
|
"learning_rate": 9e-05, |
|
"loss": 0.1544, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 0.1525067687034607, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1655, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"eval_loss": 0.15786193311214447, |
|
"eval_runtime": 564.1924, |
|
"eval_samples_per_second": 1.772, |
|
"eval_steps_per_second": 0.222, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4467005076142132, |
|
"grad_norm": 0.10299257934093475, |
|
"learning_rate": 9.996842891446092e-05, |
|
"loss": 0.1514, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4873096446700508, |
|
"grad_norm": 0.11552461981773376, |
|
"learning_rate": 9.987375552718133e-05, |
|
"loss": 0.1505, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5279187817258884, |
|
"grad_norm": 0.07015743851661682, |
|
"learning_rate": 9.971609939582557e-05, |
|
"loss": 0.1556, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5685279187817259, |
|
"grad_norm": 0.0795639380812645, |
|
"learning_rate": 9.9495659615402e-05, |
|
"loss": 0.1515, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 0.07625389844179153, |
|
"learning_rate": 9.921271456683715e-05, |
|
"loss": 0.1394, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.649746192893401, |
|
"grad_norm": 0.10126403719186783, |
|
"learning_rate": 9.886762156542428e-05, |
|
"loss": 0.1657, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6903553299492385, |
|
"grad_norm": 0.06794468313455582, |
|
"learning_rate": 9.846081640959007e-05, |
|
"loss": 0.1475, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7309644670050761, |
|
"grad_norm": 0.043415650725364685, |
|
"learning_rate": 9.79928128305494e-05, |
|
"loss": 0.1474, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7715736040609137, |
|
"grad_norm": 0.04360484331846237, |
|
"learning_rate": 9.746420184354334e-05, |
|
"loss": 0.1495, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 0.044484805315732956, |
|
"learning_rate": 9.687565100147939e-05, |
|
"loss": 0.1466, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"eval_loss": 0.14690107107162476, |
|
"eval_runtime": 564.1055, |
|
"eval_samples_per_second": 1.773, |
|
"eval_steps_per_second": 0.222, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8527918781725888, |
|
"grad_norm": 0.0379624105989933, |
|
"learning_rate": 9.622790355191672e-05, |
|
"loss": 0.1393, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.8934010152284264, |
|
"grad_norm": 0.046673040837049484, |
|
"learning_rate": 9.552177749846083e-05, |
|
"loss": 0.1408, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.934010152284264, |
|
"grad_norm": 0.04505982622504234, |
|
"learning_rate": 9.475816456775313e-05, |
|
"loss": 0.1482, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9746192893401016, |
|
"grad_norm": 0.04212146997451782, |
|
"learning_rate": 9.393802908335977e-05, |
|
"loss": 0.141, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"grad_norm": 0.044183164834976196, |
|
"learning_rate": 9.306240674798203e-05, |
|
"loss": 0.144, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0558375634517767, |
|
"grad_norm": 0.050254255533218384, |
|
"learning_rate": 9.213240333552589e-05, |
|
"loss": 0.1376, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0964467005076142, |
|
"grad_norm": 0.050791483372449875, |
|
"learning_rate": 9.114919329468282e-05, |
|
"loss": 0.1583, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.1370558375634519, |
|
"grad_norm": 0.05157487466931343, |
|
"learning_rate": 9.011401826578492e-05, |
|
"loss": 0.1307, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.1776649746192893, |
|
"grad_norm": 0.05917412042617798, |
|
"learning_rate": 8.902818551280758e-05, |
|
"loss": 0.134, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.218274111675127, |
|
"grad_norm": 0.047791987657547, |
|
"learning_rate": 8.789306627249985e-05, |
|
"loss": 0.1252, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.218274111675127, |
|
"eval_loss": 0.1394234001636505, |
|
"eval_runtime": 564.1635, |
|
"eval_samples_per_second": 1.773, |
|
"eval_steps_per_second": 0.222, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2588832487309645, |
|
"grad_norm": 0.06823377311229706, |
|
"learning_rate": 8.6710094022727e-05, |
|
"loss": 0.1289, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.299492385786802, |
|
"grad_norm": 0.10027821362018585, |
|
"learning_rate": 8.548076267221256e-05, |
|
"loss": 0.1345, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.3401015228426396, |
|
"grad_norm": 0.08134859055280685, |
|
"learning_rate": 8.420662467396547e-05, |
|
"loss": 0.147, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.380710659898477, |
|
"grad_norm": 0.0758972093462944, |
|
"learning_rate": 8.288928906477496e-05, |
|
"loss": 0.1328, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.4213197969543148, |
|
"grad_norm": 0.07111922651529312, |
|
"learning_rate": 8.15304194332491e-05, |
|
"loss": 0.1311, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.4619289340101522, |
|
"grad_norm": 0.05676901340484619, |
|
"learning_rate": 8.013173181896283e-05, |
|
"loss": 0.126, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.50253807106599, |
|
"grad_norm": 0.058128587901592255, |
|
"learning_rate": 7.869499254536865e-05, |
|
"loss": 0.1265, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.5431472081218274, |
|
"grad_norm": 0.07095002382993698, |
|
"learning_rate": 7.722201598920673e-05, |
|
"loss": 0.1194, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.5837563451776648, |
|
"grad_norm": 0.06738153845071793, |
|
"learning_rate": 7.571466228923115e-05, |
|
"loss": 0.1274, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.6243654822335025, |
|
"grad_norm": 0.08010146021842957, |
|
"learning_rate": 7.417483499714589e-05, |
|
"loss": 0.1396, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6243654822335025, |
|
"eval_loss": 0.1322861909866333, |
|
"eval_runtime": 564.0964, |
|
"eval_samples_per_second": 1.773, |
|
"eval_steps_per_second": 0.222, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.665989847715736, |
|
"grad_norm": 0.1655184030532837, |
|
"learning_rate": 7.260447867371709e-05, |
|
"loss": 0.1362, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.7065989847715737, |
|
"grad_norm": 0.07665544748306274, |
|
"learning_rate": 7.100557643309732e-05, |
|
"loss": 0.1194, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.7472081218274111, |
|
"grad_norm": 0.07500068098306656, |
|
"learning_rate": 6.938014743846285e-05, |
|
"loss": 0.1319, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.7878172588832486, |
|
"grad_norm": 0.0936647355556488, |
|
"learning_rate": 6.773024435212678e-05, |
|
"loss": 0.1164, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.8284263959390863, |
|
"grad_norm": 0.08089397102594376, |
|
"learning_rate": 6.605795074334794e-05, |
|
"loss": 0.1242, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.869035532994924, |
|
"grad_norm": 0.06260140240192413, |
|
"learning_rate": 6.436537845710903e-05, |
|
"loss": 0.1223, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.9096446700507614, |
|
"grad_norm": 0.08003013581037521, |
|
"learning_rate": 6.265466494718732e-05, |
|
"loss": 0.1251, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.950253807106599, |
|
"grad_norm": 0.09465450048446655, |
|
"learning_rate": 6.092797057688495e-05, |
|
"loss": 0.1254, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.9908629441624366, |
|
"grad_norm": 0.1107378676533699, |
|
"learning_rate": 5.918747589082853e-05, |
|
"loss": 0.1198, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.0314720812182743, |
|
"grad_norm": 0.08334667980670929, |
|
"learning_rate": 5.7435378861282585e-05, |
|
"loss": 0.1283, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0314720812182743, |
|
"eval_loss": 0.1270112842321396, |
|
"eval_runtime": 566.0408, |
|
"eval_samples_per_second": 1.767, |
|
"eval_steps_per_second": 0.221, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0720812182741115, |
|
"grad_norm": 0.07709399610757828, |
|
"learning_rate": 5.567389211245485e-05, |
|
"loss": 0.1204, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.112690355329949, |
|
"grad_norm": 0.11390144377946854, |
|
"learning_rate": 5.390524012629824e-05, |
|
"loss": 0.1165, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.153299492385787, |
|
"grad_norm": 0.09440046548843384, |
|
"learning_rate": 5.2131656433338506e-05, |
|
"loss": 0.1209, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.1939086294416246, |
|
"grad_norm": 0.2260516881942749, |
|
"learning_rate": 5.035538079207488e-05, |
|
"loss": 0.1265, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.234517766497462, |
|
"grad_norm": 0.08039233833551407, |
|
"learning_rate": 4.857865636051585e-05, |
|
"loss": 0.1231, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.2751269035532995, |
|
"grad_norm": 0.13909466564655304, |
|
"learning_rate": 4.6803726863421725e-05, |
|
"loss": 0.1135, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.315736040609137, |
|
"grad_norm": 0.12441807985305786, |
|
"learning_rate": 4.503283375883165e-05, |
|
"loss": 0.103, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.3563451776649744, |
|
"grad_norm": 0.09395081549882889, |
|
"learning_rate": 4.326821340745304e-05, |
|
"loss": 0.1136, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.396954314720812, |
|
"grad_norm": 0.09730109572410583, |
|
"learning_rate": 4.151209424848819e-05, |
|
"loss": 0.1133, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.43756345177665, |
|
"grad_norm": 0.09149850159883499, |
|
"learning_rate": 3.976669398546451e-05, |
|
"loss": 0.1177, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.43756345177665, |
|
"eval_loss": 0.12281998991966248, |
|
"eval_runtime": 566.7313, |
|
"eval_samples_per_second": 1.765, |
|
"eval_steps_per_second": 0.221, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.4781725888324875, |
|
"grad_norm": 0.09771734476089478, |
|
"learning_rate": 3.803421678562213e-05, |
|
"loss": 0.117, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.5187817258883247, |
|
"grad_norm": 0.11150355637073517, |
|
"learning_rate": 3.631685049639586e-05, |
|
"loss": 0.1121, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.5593908629441624, |
|
"grad_norm": 0.09728321433067322, |
|
"learning_rate": 3.461676388250651e-05, |
|
"loss": 0.1123, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.1387196183204651, |
|
"learning_rate": 3.293610388715048e-05, |
|
"loss": 0.1091, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.6406091370558373, |
|
"grad_norm": 0.08297745138406754, |
|
"learning_rate": 3.127699292074683e-05, |
|
"loss": 0.1123, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.681218274111675, |
|
"grad_norm": 0.07975372672080994, |
|
"learning_rate": 2.964152618066508e-05, |
|
"loss": 0.117, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.7218274111675127, |
|
"grad_norm": 0.09079435467720032, |
|
"learning_rate": 2.8031769005319147e-05, |
|
"loss": 0.1167, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.7624365482233504, |
|
"grad_norm": 0.11697285622358322, |
|
"learning_rate": 2.6449754265968264e-05, |
|
"loss": 0.1149, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.803045685279188, |
|
"grad_norm": 0.14380836486816406, |
|
"learning_rate": 2.4897479799518796e-05, |
|
"loss": 0.1078, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.8436548223350253, |
|
"grad_norm": 0.08891697227954865, |
|
"learning_rate": 2.3376905885569182e-05, |
|
"loss": 0.1041, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.8436548223350253, |
|
"eval_loss": 0.11983277648687363, |
|
"eval_runtime": 566.6646, |
|
"eval_samples_per_second": 1.765, |
|
"eval_steps_per_second": 0.221, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.884263959390863, |
|
"grad_norm": 0.09689132869243622, |
|
"learning_rate": 2.1889952770883643e-05, |
|
"loss": 0.1212, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.9248730964467002, |
|
"grad_norm": 0.08049995452165604, |
|
"learning_rate": 2.043849824442124e-05, |
|
"loss": 0.105, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.965482233502538, |
|
"grad_norm": 0.0926508679986, |
|
"learning_rate": 1.9024375265982384e-05, |
|
"loss": 0.11, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 3.0060913705583756, |
|
"grad_norm": 0.09762662649154663, |
|
"learning_rate": 1.764936965146773e-05, |
|
"loss": 0.1067, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.0467005076142133, |
|
"grad_norm": 0.11719583719968796, |
|
"learning_rate": 1.631521781767214e-05, |
|
"loss": 0.0963, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.087309644670051, |
|
"grad_norm": 0.09774573147296906, |
|
"learning_rate": 1.502360458946232e-05, |
|
"loss": 0.1081, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.127918781725888, |
|
"grad_norm": 0.10405320674180984, |
|
"learning_rate": 1.3776161072106702e-05, |
|
"loss": 0.107, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 3.168527918781726, |
|
"grad_norm": 0.10721355676651001, |
|
"learning_rate": 1.257446259144494e-05, |
|
"loss": 0.1101, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.2091370558375636, |
|
"grad_norm": 0.12284258753061295, |
|
"learning_rate": 1.1420026704498077e-05, |
|
"loss": 0.1096, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 3.249746192893401, |
|
"grad_norm": 0.09421534836292267, |
|
"learning_rate": 1.031431128303153e-05, |
|
"loss": 0.1059, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.249746192893401, |
|
"eval_loss": 0.11832693219184875, |
|
"eval_runtime": 566.7299, |
|
"eval_samples_per_second": 1.765, |
|
"eval_steps_per_second": 0.221, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.2903553299492385, |
|
"grad_norm": 0.09948204457759857, |
|
"learning_rate": 9.258712672491415e-06, |
|
"loss": 0.1129, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 3.330964467005076, |
|
"grad_norm": 0.10511817038059235, |
|
"learning_rate": 8.254563928638893e-06, |
|
"loss": 0.1101, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.371573604060914, |
|
"grad_norm": 0.11750485748052597, |
|
"learning_rate": 7.3031331341093915e-06, |
|
"loss": 0.1121, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 3.412182741116751, |
|
"grad_norm": 0.09111586213111877, |
|
"learning_rate": 6.405621797022848e-06, |
|
"loss": 0.1102, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.452791878172589, |
|
"grad_norm": 0.08644920587539673, |
|
"learning_rate": 5.563163333667099e-06, |
|
"loss": 0.0995, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 3.4934010152284265, |
|
"grad_norm": 0.10021346062421799, |
|
"learning_rate": 4.776821637170526e-06, |
|
"loss": 0.0984, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.5340101522842637, |
|
"grad_norm": 0.09591208398342133, |
|
"learning_rate": 4.047589733971646e-06, |
|
"loss": 0.1102, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 3.5746192893401014, |
|
"grad_norm": 0.11921706050634384, |
|
"learning_rate": 3.376388529782215e-06, |
|
"loss": 0.1085, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.615228426395939, |
|
"grad_norm": 0.08839091658592224, |
|
"learning_rate": 2.7640656466274782e-06, |
|
"loss": 0.1082, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 3.655837563451777, |
|
"grad_norm": 0.09160158783197403, |
|
"learning_rate": 2.2113943524323167e-06, |
|
"loss": 0.1041, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.655837563451777, |
|
"eval_loss": 0.11773921549320221, |
|
"eval_runtime": 566.7284, |
|
"eval_samples_per_second": 1.765, |
|
"eval_steps_per_second": 0.221, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 492, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8705510622704435e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|