{ "best_metric": 10.287571907043457, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.016645859342488557, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003329171868497711, "grad_norm": 1.2723332643508911, "learning_rate": 7e-06, "loss": 41.5141, "step": 1 }, { "epoch": 0.0003329171868497711, "eval_loss": 10.374454498291016, "eval_runtime": 10.7185, "eval_samples_per_second": 471.989, "eval_steps_per_second": 118.02, "step": 1 }, { "epoch": 0.0006658343736995423, "grad_norm": 1.4696481227874756, "learning_rate": 1.4e-05, "loss": 41.5006, "step": 2 }, { "epoch": 0.0009987515605493133, "grad_norm": 1.355368733406067, "learning_rate": 2.1e-05, "loss": 41.4914, "step": 3 }, { "epoch": 0.0013316687473990845, "grad_norm": 1.3979071378707886, "learning_rate": 2.8e-05, "loss": 41.4982, "step": 4 }, { "epoch": 0.0016645859342488557, "grad_norm": 1.4343304634094238, "learning_rate": 3.5e-05, "loss": 41.4858, "step": 5 }, { "epoch": 0.0019975031210986267, "grad_norm": 1.582345724105835, "learning_rate": 4.2e-05, "loss": 41.5017, "step": 6 }, { "epoch": 0.002330420307948398, "grad_norm": 1.5597692728042603, "learning_rate": 4.899999999999999e-05, "loss": 41.4897, "step": 7 }, { "epoch": 0.002663337494798169, "grad_norm": 1.6229273080825806, "learning_rate": 5.6e-05, "loss": 41.4884, "step": 8 }, { "epoch": 0.00299625468164794, "grad_norm": 1.507421612739563, "learning_rate": 6.3e-05, "loss": 41.4902, "step": 9 }, { "epoch": 0.0033291718684977114, "grad_norm": 1.4420781135559082, "learning_rate": 7e-05, "loss": 41.4668, "step": 10 }, { "epoch": 0.003662089055347482, "grad_norm": 1.6588679552078247, "learning_rate": 6.999521567473641e-05, "loss": 41.466, "step": 11 }, { "epoch": 0.003995006242197253, "grad_norm": 1.7542781829833984, "learning_rate": 6.998086400693241e-05, "loss": 41.4676, "step": 12 }, { "epoch": 0.0043279234290470245, "grad_norm": 1.6211879253387451, "learning_rate": 6.995694892019065e-05, "loss": 41.4736, "step": 13 }, { "epoch": 0.004660840615896796, "grad_norm": 1.7433085441589355, "learning_rate": 6.99234769526571e-05, "loss": 41.4453, "step": 14 }, { "epoch": 0.004993757802746567, "grad_norm": 1.9194446802139282, "learning_rate": 6.988045725523343e-05, "loss": 41.4438, "step": 15 }, { "epoch": 0.005326674989596338, "grad_norm": 1.809540033340454, "learning_rate": 6.982790158907539e-05, "loss": 41.4351, "step": 16 }, { "epoch": 0.005659592176446109, "grad_norm": 1.8582813739776611, "learning_rate": 6.976582432237733e-05, "loss": 41.4342, "step": 17 }, { "epoch": 0.00599250936329588, "grad_norm": 2.042267322540283, "learning_rate": 6.969424242644413e-05, "loss": 41.3984, "step": 18 }, { "epoch": 0.006325426550145652, "grad_norm": 2.0619945526123047, "learning_rate": 6.961317547105138e-05, "loss": 41.4161, "step": 19 }, { "epoch": 0.006658343736995423, "grad_norm": 2.238077402114868, "learning_rate": 6.952264561909527e-05, "loss": 41.3963, "step": 20 }, { "epoch": 0.006991260923845194, "grad_norm": 2.1106245517730713, "learning_rate": 6.942267762053337e-05, "loss": 41.4235, "step": 21 }, { "epoch": 0.007324178110694964, "grad_norm": 2.1262357234954834, "learning_rate": 6.931329880561832e-05, "loss": 41.3771, "step": 22 }, { "epoch": 0.007657095297544735, "grad_norm": 2.1973509788513184, "learning_rate": 6.919453907742597e-05, "loss": 41.3827, "step": 23 }, { "epoch": 0.007990012484394507, "grad_norm": 2.2817211151123047, "learning_rate": 6.90664309036802e-05, "loss": 41.3684, "step": 24 }, { "epoch": 0.008322929671244279, "grad_norm": 2.1451680660247803, "learning_rate": 6.892900930787656e-05, "loss": 41.3657, "step": 25 }, { "epoch": 0.008655846858094049, "grad_norm": 2.2792739868164062, "learning_rate": 6.87823118597072e-05, "loss": 41.3401, "step": 26 }, { "epoch": 0.008988764044943821, "grad_norm": 2.215949058532715, "learning_rate": 6.862637866478969e-05, "loss": 41.3516, "step": 27 }, { "epoch": 0.009321681231793591, "grad_norm": 2.3315978050231934, "learning_rate": 6.846125235370252e-05, "loss": 41.3321, "step": 28 }, { "epoch": 0.009654598418643362, "grad_norm": 2.4486584663391113, "learning_rate": 6.828697807033038e-05, "loss": 41.3163, "step": 29 }, { "epoch": 0.009987515605493134, "grad_norm": 2.5066885948181152, "learning_rate": 6.81036034595222e-05, "loss": 41.3124, "step": 30 }, { "epoch": 0.010320432792342904, "grad_norm": 2.506309747695923, "learning_rate": 6.791117865406564e-05, "loss": 41.2818, "step": 31 }, { "epoch": 0.010653349979192676, "grad_norm": 2.4515650272369385, "learning_rate": 6.770975626098112e-05, "loss": 41.2798, "step": 32 }, { "epoch": 0.010986267166042446, "grad_norm": 2.3991332054138184, "learning_rate": 6.749939134713974e-05, "loss": 41.283, "step": 33 }, { "epoch": 0.011319184352892218, "grad_norm": 2.327986240386963, "learning_rate": 6.728014142420846e-05, "loss": 41.2918, "step": 34 }, { "epoch": 0.011652101539741989, "grad_norm": 2.2945783138275146, "learning_rate": 6.7052066432927e-05, "loss": 41.257, "step": 35 }, { "epoch": 0.01198501872659176, "grad_norm": 2.445061445236206, "learning_rate": 6.681522872672069e-05, "loss": 41.2448, "step": 36 }, { "epoch": 0.012317935913441531, "grad_norm": 2.3847179412841797, "learning_rate": 6.656969305465356e-05, "loss": 41.2389, "step": 37 }, { "epoch": 0.012650853100291303, "grad_norm": 2.1190834045410156, "learning_rate": 6.631552654372672e-05, "loss": 41.236, "step": 38 }, { "epoch": 0.012983770287141073, "grad_norm": 2.0182554721832275, "learning_rate": 6.60527986805264e-05, "loss": 41.2218, "step": 39 }, { "epoch": 0.013316687473990845, "grad_norm": 2.2660741806030273, "learning_rate": 6.578158129222711e-05, "loss": 41.2069, "step": 40 }, { "epoch": 0.013649604660840616, "grad_norm": 2.036485433578491, "learning_rate": 6.550194852695469e-05, "loss": 41.2141, "step": 41 }, { "epoch": 0.013982521847690388, "grad_norm": 2.152125597000122, "learning_rate": 6.521397683351509e-05, "loss": 41.1895, "step": 42 }, { "epoch": 0.014315439034540158, "grad_norm": 1.9263800382614136, "learning_rate": 6.491774494049386e-05, "loss": 41.1751, "step": 43 }, { "epoch": 0.014648356221389928, "grad_norm": 1.9366573095321655, "learning_rate": 6.461333383473272e-05, "loss": 41.1583, "step": 44 }, { "epoch": 0.0149812734082397, "grad_norm": 1.9731613397598267, "learning_rate": 6.430082673918849e-05, "loss": 41.142, "step": 45 }, { "epoch": 0.01531419059508947, "grad_norm": 1.8909395933151245, "learning_rate": 6.398030909018069e-05, "loss": 41.1476, "step": 46 }, { "epoch": 0.015647107781939243, "grad_norm": 1.7645783424377441, "learning_rate": 6.365186851403423e-05, "loss": 41.1339, "step": 47 }, { "epoch": 0.015980024968789013, "grad_norm": 1.76289963722229, "learning_rate": 6.331559480312315e-05, "loss": 41.1443, "step": 48 }, { "epoch": 0.016312942155638784, "grad_norm": 1.5803886651992798, "learning_rate": 6.297157989132236e-05, "loss": 41.1676, "step": 49 }, { "epoch": 0.016645859342488557, "grad_norm": 1.5209460258483887, "learning_rate": 6.261991782887377e-05, "loss": 41.1441, "step": 50 }, { "epoch": 0.016645859342488557, "eval_loss": 10.287571907043457, "eval_runtime": 10.7091, "eval_samples_per_second": 472.401, "eval_steps_per_second": 118.124, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 10901565997056.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }