|
{ |
|
"best_metric": 1.584151268005371, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-450", |
|
"epoch": 0.12789540997584198, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002842120221685377, |
|
"eval_loss": 2.4652628898620605, |
|
"eval_runtime": 82.1891, |
|
"eval_samples_per_second": 9.016, |
|
"eval_steps_per_second": 2.263, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002842120221685377, |
|
"grad_norm": 2.0982413291931152, |
|
"learning_rate": 4.12e-05, |
|
"loss": 1.8261, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005684240443370754, |
|
"grad_norm": 4.10166597366333, |
|
"learning_rate": 8.24e-05, |
|
"loss": 1.4635, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008526360665056132, |
|
"grad_norm": 1.6615333557128906, |
|
"learning_rate": 0.0001236, |
|
"loss": 1.667, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011368480886741509, |
|
"grad_norm": 1.931045651435852, |
|
"learning_rate": 0.0001648, |
|
"loss": 1.5695, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.014210601108426887, |
|
"grad_norm": 1.6880329847335815, |
|
"learning_rate": 0.000206, |
|
"loss": 1.8497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014210601108426887, |
|
"eval_loss": 1.8087369203567505, |
|
"eval_runtime": 81.6706, |
|
"eval_samples_per_second": 9.073, |
|
"eval_steps_per_second": 2.277, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.017052721330112264, |
|
"grad_norm": 3.674773931503296, |
|
"learning_rate": 0.0002057490971767619, |
|
"loss": 1.7821, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.019894841551797642, |
|
"grad_norm": 3.0321569442749023, |
|
"learning_rate": 0.00020499761108038175, |
|
"loss": 1.6055, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.022736961773483017, |
|
"grad_norm": 2.507596254348755, |
|
"learning_rate": 0.00020374920287558198, |
|
"loss": 1.3591, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.025579081995168396, |
|
"grad_norm": 8.490031242370605, |
|
"learning_rate": 0.00020200995468164684, |
|
"loss": 1.2845, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.028421202216853774, |
|
"grad_norm": 42.760581970214844, |
|
"learning_rate": 0.00019978833994094855, |
|
"loss": 1.8626, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028421202216853774, |
|
"eval_loss": 2.087491750717163, |
|
"eval_runtime": 81.5271, |
|
"eval_samples_per_second": 9.089, |
|
"eval_steps_per_second": 2.281, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03126332243853915, |
|
"grad_norm": 2.0331203937530518, |
|
"learning_rate": 0.00019709518213718787, |
|
"loss": 1.708, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03410544266022453, |
|
"grad_norm": 4.635890007019043, |
|
"learning_rate": 0.00019394360206446948, |
|
"loss": 1.6543, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0369475628819099, |
|
"grad_norm": 2.990798234939575, |
|
"learning_rate": 0.00019034895390411186, |
|
"loss": 1.8625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.039789683103595284, |
|
"grad_norm": 3.2386255264282227, |
|
"learning_rate": 0.0001863287504206196, |
|
"loss": 1.7414, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04263180332528066, |
|
"grad_norm": 1.6220500469207764, |
|
"learning_rate": 0.00018190257764125471, |
|
"loss": 1.7585, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04263180332528066, |
|
"eval_loss": 1.7171608209609985, |
|
"eval_runtime": 82.0601, |
|
"eval_samples_per_second": 9.03, |
|
"eval_steps_per_second": 2.267, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.045473923546966034, |
|
"grad_norm": 1.6705387830734253, |
|
"learning_rate": 0.00017709199943488106, |
|
"loss": 1.806, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.048316043768651416, |
|
"grad_norm": 2.3406643867492676, |
|
"learning_rate": 0.00017192045245496238, |
|
"loss": 1.5212, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05115816399033679, |
|
"grad_norm": 3.6276838779449463, |
|
"learning_rate": 0.00016641313195854277, |
|
"loss": 1.5071, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.054000284212022166, |
|
"grad_norm": 7.468491077423096, |
|
"learning_rate": 0.0001605968690574869, |
|
"loss": 1.0868, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05684240443370755, |
|
"grad_norm": 19.19157600402832, |
|
"learning_rate": 0.0001545, |
|
"loss": 1.503, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05684240443370755, |
|
"eval_loss": 2.506401300430298, |
|
"eval_runtime": 81.7213, |
|
"eval_samples_per_second": 9.067, |
|
"eval_steps_per_second": 2.276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05968452465539292, |
|
"grad_norm": 1.718658208847046, |
|
"learning_rate": 0.00014815222811927496, |
|
"loss": 1.7286, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0625266448770783, |
|
"grad_norm": 2.2495739459991455, |
|
"learning_rate": 0.00014158447912183896, |
|
"loss": 1.5474, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06536876509876367, |
|
"grad_norm": 2.8208131790161133, |
|
"learning_rate": 0.00013482875042061958, |
|
"loss": 1.3549, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06821088532044906, |
|
"grad_norm": 1.70619535446167, |
|
"learning_rate": 0.00012791795524676576, |
|
"loss": 1.8743, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07105300554213444, |
|
"grad_norm": 2.3366291522979736, |
|
"learning_rate": 0.00012088576229969385, |
|
"loss": 1.9315, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07105300554213444, |
|
"eval_loss": 1.7082891464233398, |
|
"eval_runtime": 81.785, |
|
"eval_samples_per_second": 9.06, |
|
"eval_steps_per_second": 2.274, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0738951257638198, |
|
"grad_norm": 2.337188959121704, |
|
"learning_rate": 0.0001137664317165683, |
|
"loss": 1.7002, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07673724598550519, |
|
"grad_norm": 3.3565144538879395, |
|
"learning_rate": 0.00010659464816035761, |
|
"loss": 1.4179, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07957936620719057, |
|
"grad_norm": 3.4117555618286133, |
|
"learning_rate": 9.940535183964242e-05, |
|
"loss": 1.667, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08242148642887594, |
|
"grad_norm": 4.477943420410156, |
|
"learning_rate": 9.22335682834317e-05, |
|
"loss": 1.3015, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08526360665056132, |
|
"grad_norm": 9.481433868408203, |
|
"learning_rate": 8.511423770030617e-05, |
|
"loss": 1.6444, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08526360665056132, |
|
"eval_loss": 2.061939239501953, |
|
"eval_runtime": 82.2643, |
|
"eval_samples_per_second": 9.008, |
|
"eval_steps_per_second": 2.261, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0881057268722467, |
|
"grad_norm": 1.6801893711090088, |
|
"learning_rate": 7.808204475323423e-05, |
|
"loss": 1.097, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09094784709393207, |
|
"grad_norm": 1.468928575515747, |
|
"learning_rate": 7.117124957938042e-05, |
|
"loss": 1.6221, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09378996731561745, |
|
"grad_norm": 2.429023027420044, |
|
"learning_rate": 6.441552087816105e-05, |
|
"loss": 1.7242, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09663208753730283, |
|
"grad_norm": 2.3508596420288086, |
|
"learning_rate": 5.784777188072502e-05, |
|
"loss": 1.5539, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0994742077589882, |
|
"grad_norm": 1.75046968460083, |
|
"learning_rate": 5.150000000000002e-05, |
|
"loss": 1.8748, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0994742077589882, |
|
"eval_loss": 1.625133752822876, |
|
"eval_runtime": 82.1926, |
|
"eval_samples_per_second": 9.015, |
|
"eval_steps_per_second": 2.263, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10231632798067358, |
|
"grad_norm": 3.755326986312866, |
|
"learning_rate": 4.540313094251309e-05, |
|
"loss": 1.605, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.10515844820235896, |
|
"grad_norm": 3.1094276905059814, |
|
"learning_rate": 3.958686804145719e-05, |
|
"loss": 1.6938, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.10800056842404433, |
|
"grad_norm": 3.3867297172546387, |
|
"learning_rate": 3.4079547545037634e-05, |
|
"loss": 1.6012, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11084268864572971, |
|
"grad_norm": 4.2931599617004395, |
|
"learning_rate": 2.8908000565118947e-05, |
|
"loss": 1.1067, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1136848088674151, |
|
"grad_norm": 37.14210510253906, |
|
"learning_rate": 2.4097422358745275e-05, |
|
"loss": 1.8836, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1136848088674151, |
|
"eval_loss": 1.6405808925628662, |
|
"eval_runtime": 82.1006, |
|
"eval_samples_per_second": 9.026, |
|
"eval_steps_per_second": 2.266, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11652692908910046, |
|
"grad_norm": 1.849120855331421, |
|
"learning_rate": 1.9671249579380422e-05, |
|
"loss": 1.5864, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11936904931078585, |
|
"grad_norm": 1.457759976387024, |
|
"learning_rate": 1.5651046095888127e-05, |
|
"loss": 1.543, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.12221116953247123, |
|
"grad_norm": 2.084972858428955, |
|
"learning_rate": 1.205639793553052e-05, |
|
"loss": 1.5948, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1250532897541566, |
|
"grad_norm": 2.2695581912994385, |
|
"learning_rate": 8.904817862812098e-06, |
|
"loss": 1.8299, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12789540997584198, |
|
"grad_norm": 2.3242664337158203, |
|
"learning_rate": 6.211660059051443e-06, |
|
"loss": 1.9295, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12789540997584198, |
|
"eval_loss": 1.584151268005371, |
|
"eval_runtime": 81.6227, |
|
"eval_samples_per_second": 9.078, |
|
"eval_steps_per_second": 2.279, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.83983944138752e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|