|
{ |
|
"best_metric": 2.037623643875122, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-400", |
|
"epoch": 0.028237619568670363, |
|
"eval_steps": 50, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.05940489216759e-05, |
|
"eval_loss": 2.8033833503723145, |
|
"eval_runtime": 108.3643, |
|
"eval_samples_per_second": 55.046, |
|
"eval_steps_per_second": 13.768, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007059404892167591, |
|
"grad_norm": 1.7475390434265137, |
|
"learning_rate": 4.1400000000000003e-05, |
|
"loss": 2.648, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0014118809784335181, |
|
"grad_norm": 1.68655526638031, |
|
"learning_rate": 8.280000000000001e-05, |
|
"loss": 2.4858, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002117821467650277, |
|
"grad_norm": 1.582150936126709, |
|
"learning_rate": 0.00012419999999999998, |
|
"loss": 2.4498, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0028237619568670363, |
|
"grad_norm": 1.5243428945541382, |
|
"learning_rate": 0.00016560000000000001, |
|
"loss": 2.4085, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0035297024460837953, |
|
"grad_norm": 1.5775226354599, |
|
"learning_rate": 0.000207, |
|
"loss": 2.3538, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0035297024460837953, |
|
"eval_loss": 2.3329827785491943, |
|
"eval_runtime": 108.5486, |
|
"eval_samples_per_second": 54.952, |
|
"eval_steps_per_second": 13.745, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004235642935300554, |
|
"grad_norm": 1.0230377912521362, |
|
"learning_rate": 0.00020674787920189178, |
|
"loss": 2.4597, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004941583424517313, |
|
"grad_norm": 1.1582502126693726, |
|
"learning_rate": 0.00020599274511475253, |
|
"loss": 2.2224, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0056475239137340726, |
|
"grad_norm": 1.2100154161453247, |
|
"learning_rate": 0.00020473827667594888, |
|
"loss": 2.1416, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006353464402950831, |
|
"grad_norm": 1.165224552154541, |
|
"learning_rate": 0.00020299058552961598, |
|
"loss": 2.2391, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.007059404892167591, |
|
"grad_norm": 1.492423176765442, |
|
"learning_rate": 0.00020075818625134152, |
|
"loss": 2.308, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007059404892167591, |
|
"eval_loss": 2.240849018096924, |
|
"eval_runtime": 108.306, |
|
"eval_samples_per_second": 55.075, |
|
"eval_steps_per_second": 13.776, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007765345381384349, |
|
"grad_norm": 0.9571946263313293, |
|
"learning_rate": 0.00019805195486600916, |
|
"loss": 2.3412, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.008471285870601109, |
|
"grad_norm": 1.0861552953720093, |
|
"learning_rate": 0.00019488507586089894, |
|
"loss": 2.1932, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.009177226359817867, |
|
"grad_norm": 1.318565011024475, |
|
"learning_rate": 0.00019127297795219008, |
|
"loss": 2.199, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.009883166849034626, |
|
"grad_norm": 1.283436894416809, |
|
"learning_rate": 0.00018723325891780706, |
|
"loss": 2.2015, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.010589107338251386, |
|
"grad_norm": 1.3785208463668823, |
|
"learning_rate": 0.0001827855998628142, |
|
"loss": 2.1604, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.010589107338251386, |
|
"eval_loss": 2.1907973289489746, |
|
"eval_runtime": 108.2635, |
|
"eval_samples_per_second": 55.097, |
|
"eval_steps_per_second": 13.781, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.011295047827468145, |
|
"grad_norm": 0.9574385285377502, |
|
"learning_rate": 0.0001779516693350504, |
|
"loss": 2.1949, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.012000988316684903, |
|
"grad_norm": 1.022146463394165, |
|
"learning_rate": 0.00017275501775814182, |
|
"loss": 2.0991, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.012706928805901662, |
|
"grad_norm": 1.1318084001541138, |
|
"learning_rate": 0.00016722096269620562, |
|
"loss": 2.1822, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.013412869295118422, |
|
"grad_norm": 1.1545848846435547, |
|
"learning_rate": 0.00016137646550922228, |
|
"loss": 2.1998, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.014118809784335181, |
|
"grad_norm": 1.444872260093689, |
|
"learning_rate": 0.00015525, |
|
"loss": 2.163, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.014118809784335181, |
|
"eval_loss": 2.1361401081085205, |
|
"eval_runtime": 115.7055, |
|
"eval_samples_per_second": 51.553, |
|
"eval_steps_per_second": 12.895, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01482475027355194, |
|
"grad_norm": 0.986111581325531, |
|
"learning_rate": 0.0001488714136926695, |
|
"loss": 2.2117, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.015530690762768699, |
|
"grad_norm": 1.0047094821929932, |
|
"learning_rate": 0.0001422717824185469, |
|
"loss": 2.1779, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.016236631251985458, |
|
"grad_norm": 1.1812105178833008, |
|
"learning_rate": 0.00013548325891780705, |
|
"loss": 2.1505, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.016942571741202218, |
|
"grad_norm": 1.5376032590866089, |
|
"learning_rate": 0.0001285389161945656, |
|
"loss": 2.0143, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.017648512230418977, |
|
"grad_norm": 1.3409324884414673, |
|
"learning_rate": 0.0001214725863885273, |
|
"loss": 2.1464, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.017648512230418977, |
|
"eval_loss": 2.1107239723205566, |
|
"eval_runtime": 108.6879, |
|
"eval_samples_per_second": 54.882, |
|
"eval_steps_per_second": 13.727, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.018354452719635733, |
|
"grad_norm": 0.8746324777603149, |
|
"learning_rate": 0.00011431869594820213, |
|
"loss": 2.2634, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.019060393208852493, |
|
"grad_norm": 1.0597100257873535, |
|
"learning_rate": 0.00010711209790870886, |
|
"loss": 2.1859, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.019766333698069252, |
|
"grad_norm": 0.9971674084663391, |
|
"learning_rate": 9.988790209129117e-05, |
|
"loss": 2.0446, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02047227418728601, |
|
"grad_norm": 1.1347280740737915, |
|
"learning_rate": 9.268130405179787e-05, |
|
"loss": 2.0384, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02117821467650277, |
|
"grad_norm": 1.377494215965271, |
|
"learning_rate": 8.55274136114727e-05, |
|
"loss": 2.0872, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02117821467650277, |
|
"eval_loss": 2.081465721130371, |
|
"eval_runtime": 115.6558, |
|
"eval_samples_per_second": 51.575, |
|
"eval_steps_per_second": 12.9, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02188415516571953, |
|
"grad_norm": 0.9409071207046509, |
|
"learning_rate": 7.84610838054344e-05, |
|
"loss": 2.1326, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02259009565493629, |
|
"grad_norm": 0.9766196608543396, |
|
"learning_rate": 7.151674108219295e-05, |
|
"loss": 2.1009, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.023296036144153046, |
|
"grad_norm": 1.045487880706787, |
|
"learning_rate": 6.472821758145309e-05, |
|
"loss": 1.9352, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.024001976633369806, |
|
"grad_norm": 1.2392162084579468, |
|
"learning_rate": 5.8128586307330475e-05, |
|
"loss": 2.0905, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.024707917122586565, |
|
"grad_norm": 1.3943520784378052, |
|
"learning_rate": 5.175000000000002e-05, |
|
"loss": 2.1226, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.024707917122586565, |
|
"eval_loss": 2.057299852371216, |
|
"eval_runtime": 109.4826, |
|
"eval_samples_per_second": 54.484, |
|
"eval_steps_per_second": 13.628, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.025413857611803325, |
|
"grad_norm": 0.9228034615516663, |
|
"learning_rate": 4.5623534490777714e-05, |
|
"loss": 2.1183, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.026119798101020084, |
|
"grad_norm": 0.9374071955680847, |
|
"learning_rate": 3.9779037303794365e-05, |
|
"loss": 2.0988, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.026825738590236844, |
|
"grad_norm": 1.051098108291626, |
|
"learning_rate": 3.42449822418582e-05, |
|
"loss": 1.9813, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.027531679079453603, |
|
"grad_norm": 1.083287000656128, |
|
"learning_rate": 2.9048330664949622e-05, |
|
"loss": 2.1039, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.028237619568670363, |
|
"grad_norm": 1.2849839925765991, |
|
"learning_rate": 2.4214400137185785e-05, |
|
"loss": 1.9843, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.028237619568670363, |
|
"eval_loss": 2.037623643875122, |
|
"eval_runtime": 107.9784, |
|
"eval_samples_per_second": 55.243, |
|
"eval_steps_per_second": 13.818, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1105701006934016e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|