|
{ |
|
"best_metric": 1.3169949054718018, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-350", |
|
"epoch": 1.0012453300124533, |
|
"eval_steps": 50, |
|
"global_step": 402, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0024906600249066002, |
|
"eval_loss": 2.3575375080108643, |
|
"eval_runtime": 9.0845, |
|
"eval_samples_per_second": 18.603, |
|
"eval_steps_per_second": 4.733, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.024906600249066, |
|
"grad_norm": 1.6407170295715332, |
|
"learning_rate": 4.36e-05, |
|
"loss": 1.6376, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.049813200498132, |
|
"grad_norm": 2.079932451248169, |
|
"learning_rate": 8.72e-05, |
|
"loss": 1.8074, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.074719800747198, |
|
"grad_norm": 1.752514123916626, |
|
"learning_rate": 0.0001308, |
|
"loss": 1.5148, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.099626400996264, |
|
"grad_norm": 2.3281145095825195, |
|
"learning_rate": 0.0001744, |
|
"loss": 1.618, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12453300124533001, |
|
"grad_norm": 2.0914270877838135, |
|
"learning_rate": 0.000218, |
|
"loss": 1.3761, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12453300124533001, |
|
"eval_loss": 1.7705373764038086, |
|
"eval_runtime": 9.1773, |
|
"eval_samples_per_second": 18.415, |
|
"eval_steps_per_second": 4.685, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.149439601494396, |
|
"grad_norm": 1.1187117099761963, |
|
"learning_rate": 0.00021756616696447703, |
|
"loss": 1.6001, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17434620174346202, |
|
"grad_norm": 1.864639401435852, |
|
"learning_rate": 0.00021626812127263666, |
|
"loss": 1.5317, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.199252801992528, |
|
"grad_norm": 1.9063708782196045, |
|
"learning_rate": 0.00021411619567865767, |
|
"loss": 1.4666, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22415940224159403, |
|
"grad_norm": 1.9959717988967896, |
|
"learning_rate": 0.00021112752002497406, |
|
"loss": 1.3854, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24906600249066002, |
|
"grad_norm": 1.7696688175201416, |
|
"learning_rate": 0.00020732588488463224, |
|
"loss": 1.0668, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24906600249066002, |
|
"eval_loss": 1.671539306640625, |
|
"eval_runtime": 9.1879, |
|
"eval_samples_per_second": 18.394, |
|
"eval_steps_per_second": 4.68, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 1.1212961673736572, |
|
"learning_rate": 0.00020274155218263936, |
|
"loss": 1.7123, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.298879202988792, |
|
"grad_norm": 1.5101659297943115, |
|
"learning_rate": 0.00019741101430380186, |
|
"loss": 1.5006, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.32378580323785805, |
|
"grad_norm": 1.5983831882476807, |
|
"learning_rate": 0.00019137670360461418, |
|
"loss": 1.4108, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.34869240348692404, |
|
"grad_norm": 1.7551649808883667, |
|
"learning_rate": 0.00018468665464155184, |
|
"loss": 1.363, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.37359900373599003, |
|
"grad_norm": 1.9306334257125854, |
|
"learning_rate": 0.0001773941218045129, |
|
"loss": 1.0146, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.37359900373599003, |
|
"eval_loss": 1.5669289827346802, |
|
"eval_runtime": 9.1037, |
|
"eval_samples_per_second": 18.564, |
|
"eval_steps_per_second": 4.723, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.398505603985056, |
|
"grad_norm": 1.0432745218276978, |
|
"learning_rate": 0.00016955715539913665, |
|
"loss": 1.4577, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.42341220423412207, |
|
"grad_norm": 1.7187490463256836, |
|
"learning_rate": 0.0001612381395524862, |
|
"loss": 1.4403, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.44831880448318806, |
|
"grad_norm": 1.626038908958435, |
|
"learning_rate": 0.00015250329562047557, |
|
"loss": 1.2982, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.47322540473225405, |
|
"grad_norm": 1.8136370182037354, |
|
"learning_rate": 0.00014342215505003773, |
|
"loss": 1.4308, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.49813200498132004, |
|
"grad_norm": 1.1612805128097534, |
|
"learning_rate": 0.000134067005892176, |
|
"loss": 0.9796, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.49813200498132004, |
|
"eval_loss": 1.5291515588760376, |
|
"eval_runtime": 9.1903, |
|
"eval_samples_per_second": 18.389, |
|
"eval_steps_per_second": 4.679, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.523038605230386, |
|
"grad_norm": 1.3198999166488647, |
|
"learning_rate": 0.0001245123173717881, |
|
"loss": 1.8174, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 1.404754638671875, |
|
"learning_rate": 0.00011483414709482405, |
|
"loss": 1.1952, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.572851805728518, |
|
"grad_norm": 1.841070532798767, |
|
"learning_rate": 0.00010510953561155114, |
|
"loss": 1.2676, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.597758405977584, |
|
"grad_norm": 1.8930388689041138, |
|
"learning_rate": 9.541589315534674e-05, |
|
"loss": 1.3132, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6226650062266501, |
|
"grad_norm": 1.8174850940704346, |
|
"learning_rate": 8.583038343872554e-05, |
|
"loss": 0.9781, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6226650062266501, |
|
"eval_loss": 1.4715014696121216, |
|
"eval_runtime": 9.1661, |
|
"eval_samples_per_second": 18.437, |
|
"eval_steps_per_second": 4.691, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6475716064757161, |
|
"grad_norm": 1.1012831926345825, |
|
"learning_rate": 7.642930941173154e-05, |
|
"loss": 1.4417, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6724782067247821, |
|
"grad_norm": 1.208770990371704, |
|
"learning_rate": 6.728750587220522e-05, |
|
"loss": 1.4184, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6973848069738481, |
|
"grad_norm": 1.6374764442443848, |
|
"learning_rate": 5.847774376289351e-05, |
|
"loss": 1.2979, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7222914072229141, |
|
"grad_norm": 1.8867082595825195, |
|
"learning_rate": 5.0070150897339905e-05, |
|
"loss": 1.3951, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7471980074719801, |
|
"grad_norm": 1.2463188171386719, |
|
"learning_rate": 4.213165372571504e-05, |
|
"loss": 0.8543, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7471980074719801, |
|
"eval_loss": 1.4179314374923706, |
|
"eval_runtime": 9.0764, |
|
"eval_samples_per_second": 18.62, |
|
"eval_steps_per_second": 4.738, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.772104607721046, |
|
"grad_norm": 1.0654844045639038, |
|
"learning_rate": 3.472544458426586e-05, |
|
"loss": 1.4909, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.797011207970112, |
|
"grad_norm": 1.1939270496368408, |
|
"learning_rate": 2.7910478669204397e-05, |
|
"loss": 1.3609, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 1.3185659646987915, |
|
"learning_rate": 2.174100473924473e-05, |
|
"loss": 1.4049, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8468244084682441, |
|
"grad_norm": 1.7657341957092285, |
|
"learning_rate": 1.626613328250815e-05, |
|
"loss": 1.2549, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8717310087173101, |
|
"grad_norm": 1.3518775701522827, |
|
"learning_rate": 1.152944558529147e-05, |
|
"loss": 1.0009, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8717310087173101, |
|
"eval_loss": 1.3169949054718018, |
|
"eval_runtime": 9.1662, |
|
"eval_samples_per_second": 18.437, |
|
"eval_steps_per_second": 4.691, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8966376089663761, |
|
"grad_norm": 0.9178294539451599, |
|
"learning_rate": 7.568646814604021e-06, |
|
"loss": 1.3807, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9215442092154421, |
|
"grad_norm": 1.0393375158309937, |
|
"learning_rate": 4.4152658760198e-06, |
|
"loss": 1.2499, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9464508094645081, |
|
"grad_norm": 1.3953897953033447, |
|
"learning_rate": 2.094404436047883e-06, |
|
"loss": 1.0791, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9713574097135741, |
|
"grad_norm": 1.0463335514068604, |
|
"learning_rate": 6.245371068631892e-07, |
|
"loss": 1.1939, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9962640099626401, |
|
"grad_norm": 1.1366596221923828, |
|
"learning_rate": 1.736438397464224e-08, |
|
"loss": 0.9256, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9962640099626401, |
|
"eval_loss": 1.3189666271209717, |
|
"eval_runtime": 9.1794, |
|
"eval_samples_per_second": 18.411, |
|
"eval_steps_per_second": 4.684, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 402, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.46090111891669e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|