|
{ |
|
"best_metric": 0.22088730335235596, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.30656039239730226, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006131207847946045, |
|
"eval_loss": 1.6972112655639648, |
|
"eval_runtime": 42.3642, |
|
"eval_samples_per_second": 16.217, |
|
"eval_steps_per_second": 4.06, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0061312078479460455, |
|
"grad_norm": 2.5399515628814697, |
|
"learning_rate": 4.22e-05, |
|
"loss": 1.0729, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012262415695892091, |
|
"grad_norm": 1.911725640296936, |
|
"learning_rate": 8.44e-05, |
|
"loss": 0.624, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.018393623543838136, |
|
"grad_norm": 1.583366870880127, |
|
"learning_rate": 0.0001266, |
|
"loss": 0.5414, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.024524831391784182, |
|
"grad_norm": 2.1019012928009033, |
|
"learning_rate": 0.0001688, |
|
"loss": 0.489, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.030656039239730228, |
|
"grad_norm": 3.7989816665649414, |
|
"learning_rate": 0.000211, |
|
"loss": 0.6301, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.030656039239730228, |
|
"eval_loss": 0.7841286063194275, |
|
"eval_runtime": 42.3765, |
|
"eval_samples_per_second": 16.212, |
|
"eval_steps_per_second": 4.059, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03678724708767627, |
|
"grad_norm": 2.0683157444000244, |
|
"learning_rate": 0.00021074300730241147, |
|
"loss": 0.5761, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04291845493562232, |
|
"grad_norm": 1.5029504299163818, |
|
"learning_rate": 0.00020997328125223568, |
|
"loss": 0.4392, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.049049662783568364, |
|
"grad_norm": 1.8249471187591553, |
|
"learning_rate": 0.0002086945718774165, |
|
"loss": 0.4728, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05518087063151441, |
|
"grad_norm": 2.3122222423553467, |
|
"learning_rate": 0.00020691310892149265, |
|
"loss": 0.4777, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.061312078479460456, |
|
"grad_norm": 3.182551145553589, |
|
"learning_rate": 0.00020463757149291335, |
|
"loss": 0.5207, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.061312078479460456, |
|
"eval_loss": 0.784916877746582, |
|
"eval_runtime": 42.3748, |
|
"eval_samples_per_second": 16.212, |
|
"eval_steps_per_second": 4.059, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0674432863274065, |
|
"grad_norm": 1.1367835998535156, |
|
"learning_rate": 0.0002018790457812944, |
|
"loss": 0.4991, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07357449417535254, |
|
"grad_norm": 2.4769582748413086, |
|
"learning_rate": 0.0001986509710466168, |
|
"loss": 0.396, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07970570202329859, |
|
"grad_norm": 1.6102774143218994, |
|
"learning_rate": 0.00019496907414450293, |
|
"loss": 0.3531, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08583690987124463, |
|
"grad_norm": 1.7436530590057373, |
|
"learning_rate": 0.00019085129290655697, |
|
"loss": 0.4021, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09196811771919068, |
|
"grad_norm": 4.168282985687256, |
|
"learning_rate": 0.00018631768874905217, |
|
"loss": 0.4508, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09196811771919068, |
|
"eval_loss": 0.5247650146484375, |
|
"eval_runtime": 42.4743, |
|
"eval_samples_per_second": 16.175, |
|
"eval_steps_per_second": 4.05, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09809932556713673, |
|
"grad_norm": 1.1784135103225708, |
|
"learning_rate": 0.0001813903489357277, |
|
"loss": 0.4399, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10423053341508277, |
|
"grad_norm": 1.4040613174438477, |
|
"learning_rate": 0.00017609327897085954, |
|
"loss": 0.3591, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11036174126302882, |
|
"grad_norm": 1.1797558069229126, |
|
"learning_rate": 0.00017045228564685694, |
|
"loss": 0.3523, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11649294911097487, |
|
"grad_norm": 1.590161681175232, |
|
"learning_rate": 0.0001644948513161638, |
|
"loss": 0.3406, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12262415695892091, |
|
"grad_norm": 3.65169358253479, |
|
"learning_rate": 0.00015825, |
|
"loss": 0.3927, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12262415695892091, |
|
"eval_loss": 0.5356494784355164, |
|
"eval_runtime": 42.3757, |
|
"eval_samples_per_second": 16.212, |
|
"eval_steps_per_second": 4.059, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12875536480686695, |
|
"grad_norm": 1.1879467964172363, |
|
"learning_rate": 0.00015174815598624768, |
|
"loss": 0.4153, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.134886572654813, |
|
"grad_norm": 2.3014867305755615, |
|
"learning_rate": 0.00014502099560537873, |
|
"loss": 0.3318, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14101778050275904, |
|
"grad_norm": 1.5435618162155151, |
|
"learning_rate": 0.00013810129290655696, |
|
"loss": 0.3028, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14714898835070508, |
|
"grad_norm": 2.062577486038208, |
|
"learning_rate": 0.00013102275998576495, |
|
"loss": 0.3095, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15328019619865113, |
|
"grad_norm": 3.058638095855713, |
|
"learning_rate": 0.00012381988274386116, |
|
"loss": 0.3716, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15328019619865113, |
|
"eval_loss": 0.38261598348617554, |
|
"eval_runtime": 42.4189, |
|
"eval_samples_per_second": 16.196, |
|
"eval_steps_per_second": 4.055, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15941140404659718, |
|
"grad_norm": 1.1857730150222778, |
|
"learning_rate": 0.00011652775287473745, |
|
"loss": 0.3379, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16554261189454322, |
|
"grad_norm": 1.465416431427002, |
|
"learning_rate": 0.00010918189690211387, |
|
"loss": 0.2494, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.17167381974248927, |
|
"grad_norm": 1.1710807085037231, |
|
"learning_rate": 0.00010181810309788618, |
|
"loss": 0.2669, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17780502759043532, |
|
"grad_norm": 1.733910083770752, |
|
"learning_rate": 9.447224712526258e-05, |
|
"loss": 0.2858, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.18393623543838136, |
|
"grad_norm": 1.8814456462860107, |
|
"learning_rate": 8.718011725613886e-05, |
|
"loss": 0.3506, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18393623543838136, |
|
"eval_loss": 0.35164520144462585, |
|
"eval_runtime": 42.4421, |
|
"eval_samples_per_second": 16.187, |
|
"eval_steps_per_second": 4.053, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1900674432863274, |
|
"grad_norm": 0.981971800327301, |
|
"learning_rate": 7.997724001423507e-05, |
|
"loss": 0.3302, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19619865113427346, |
|
"grad_norm": 1.1474406719207764, |
|
"learning_rate": 7.289870709344306e-05, |
|
"loss": 0.2733, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2023298589822195, |
|
"grad_norm": 1.0042413473129272, |
|
"learning_rate": 6.597900439462128e-05, |
|
"loss": 0.2392, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.20846106683016555, |
|
"grad_norm": 1.0399328470230103, |
|
"learning_rate": 5.9251844013752326e-05, |
|
"loss": 0.2247, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2145922746781116, |
|
"grad_norm": 2.1786627769470215, |
|
"learning_rate": 5.275000000000002e-05, |
|
"loss": 0.326, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2145922746781116, |
|
"eval_loss": 0.2690972089767456, |
|
"eval_runtime": 42.4522, |
|
"eval_samples_per_second": 16.183, |
|
"eval_steps_per_second": 4.052, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22072348252605764, |
|
"grad_norm": 0.7868878245353699, |
|
"learning_rate": 4.650514868383623e-05, |
|
"loss": 0.2916, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2268546903740037, |
|
"grad_norm": 0.9699639081954956, |
|
"learning_rate": 4.054771435314305e-05, |
|
"loss": 0.2132, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.23298589822194973, |
|
"grad_norm": 0.8491773009300232, |
|
"learning_rate": 3.4906721029140495e-05, |
|
"loss": 0.1864, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23911710606989578, |
|
"grad_norm": 0.9103133678436279, |
|
"learning_rate": 2.9609651064272323e-05, |
|
"loss": 0.2154, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.24524831391784183, |
|
"grad_norm": 2.587815999984741, |
|
"learning_rate": 2.468231125094783e-05, |
|
"loss": 0.2517, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24524831391784183, |
|
"eval_loss": 0.2357824444770813, |
|
"eval_runtime": 42.4802, |
|
"eval_samples_per_second": 16.172, |
|
"eval_steps_per_second": 4.049, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25137952176578787, |
|
"grad_norm": 0.9035682082176208, |
|
"learning_rate": 2.0148707093443057e-05, |
|
"loss": 0.2775, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2575107296137339, |
|
"grad_norm": 0.6796780228614807, |
|
"learning_rate": 1.603092585549706e-05, |
|
"loss": 0.2234, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.26364193746167996, |
|
"grad_norm": 1.1364048719406128, |
|
"learning_rate": 1.2349028953383204e-05, |
|
"loss": 0.1956, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.269773145309626, |
|
"grad_norm": 0.9247281551361084, |
|
"learning_rate": 9.120954218705596e-06, |
|
"loss": 0.1868, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.27590435315757206, |
|
"grad_norm": 1.8796603679656982, |
|
"learning_rate": 6.362428507086673e-06, |
|
"loss": 0.2535, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27590435315757206, |
|
"eval_loss": 0.22399407625198364, |
|
"eval_runtime": 42.4648, |
|
"eval_samples_per_second": 16.178, |
|
"eval_steps_per_second": 4.05, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2820355610055181, |
|
"grad_norm": 0.8298888802528381, |
|
"learning_rate": 4.0868910785073565e-06, |
|
"loss": 0.2218, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.28816676885346415, |
|
"grad_norm": 0.9988222122192383, |
|
"learning_rate": 2.3054281225835e-06, |
|
"loss": 0.2328, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.29429797670141017, |
|
"grad_norm": 0.8386346101760864, |
|
"learning_rate": 1.026718747764327e-06, |
|
"loss": 0.217, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.30042918454935624, |
|
"grad_norm": 0.7417736053466797, |
|
"learning_rate": 2.5699269758854715e-07, |
|
"loss": 0.1874, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.30656039239730226, |
|
"grad_norm": 2.6463584899902344, |
|
"learning_rate": 0.0, |
|
"loss": 0.3353, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30656039239730226, |
|
"eval_loss": 0.22088730335235596, |
|
"eval_runtime": 42.3398, |
|
"eval_samples_per_second": 16.226, |
|
"eval_steps_per_second": 4.062, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.437638077028762e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|