cimol's picture
Training in progress, step 50, checkpoint
343380b verified
raw
history blame
10 kB
{
"best_metric": 10.287571907043457,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.016645859342488557,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0003329171868497711,
"grad_norm": 1.2723332643508911,
"learning_rate": 7e-06,
"loss": 41.5141,
"step": 1
},
{
"epoch": 0.0003329171868497711,
"eval_loss": 10.374454498291016,
"eval_runtime": 10.7185,
"eval_samples_per_second": 471.989,
"eval_steps_per_second": 118.02,
"step": 1
},
{
"epoch": 0.0006658343736995423,
"grad_norm": 1.4696481227874756,
"learning_rate": 1.4e-05,
"loss": 41.5006,
"step": 2
},
{
"epoch": 0.0009987515605493133,
"grad_norm": 1.355368733406067,
"learning_rate": 2.1e-05,
"loss": 41.4914,
"step": 3
},
{
"epoch": 0.0013316687473990845,
"grad_norm": 1.3979071378707886,
"learning_rate": 2.8e-05,
"loss": 41.4982,
"step": 4
},
{
"epoch": 0.0016645859342488557,
"grad_norm": 1.4343304634094238,
"learning_rate": 3.5e-05,
"loss": 41.4858,
"step": 5
},
{
"epoch": 0.0019975031210986267,
"grad_norm": 1.582345724105835,
"learning_rate": 4.2e-05,
"loss": 41.5017,
"step": 6
},
{
"epoch": 0.002330420307948398,
"grad_norm": 1.5597692728042603,
"learning_rate": 4.899999999999999e-05,
"loss": 41.4897,
"step": 7
},
{
"epoch": 0.002663337494798169,
"grad_norm": 1.6229273080825806,
"learning_rate": 5.6e-05,
"loss": 41.4884,
"step": 8
},
{
"epoch": 0.00299625468164794,
"grad_norm": 1.507421612739563,
"learning_rate": 6.3e-05,
"loss": 41.4902,
"step": 9
},
{
"epoch": 0.0033291718684977114,
"grad_norm": 1.4420781135559082,
"learning_rate": 7e-05,
"loss": 41.4668,
"step": 10
},
{
"epoch": 0.003662089055347482,
"grad_norm": 1.6588679552078247,
"learning_rate": 6.999521567473641e-05,
"loss": 41.466,
"step": 11
},
{
"epoch": 0.003995006242197253,
"grad_norm": 1.7542781829833984,
"learning_rate": 6.998086400693241e-05,
"loss": 41.4676,
"step": 12
},
{
"epoch": 0.0043279234290470245,
"grad_norm": 1.6211879253387451,
"learning_rate": 6.995694892019065e-05,
"loss": 41.4736,
"step": 13
},
{
"epoch": 0.004660840615896796,
"grad_norm": 1.7433085441589355,
"learning_rate": 6.99234769526571e-05,
"loss": 41.4453,
"step": 14
},
{
"epoch": 0.004993757802746567,
"grad_norm": 1.9194446802139282,
"learning_rate": 6.988045725523343e-05,
"loss": 41.4438,
"step": 15
},
{
"epoch": 0.005326674989596338,
"grad_norm": 1.809540033340454,
"learning_rate": 6.982790158907539e-05,
"loss": 41.4351,
"step": 16
},
{
"epoch": 0.005659592176446109,
"grad_norm": 1.8582813739776611,
"learning_rate": 6.976582432237733e-05,
"loss": 41.4342,
"step": 17
},
{
"epoch": 0.00599250936329588,
"grad_norm": 2.042267322540283,
"learning_rate": 6.969424242644413e-05,
"loss": 41.3984,
"step": 18
},
{
"epoch": 0.006325426550145652,
"grad_norm": 2.0619945526123047,
"learning_rate": 6.961317547105138e-05,
"loss": 41.4161,
"step": 19
},
{
"epoch": 0.006658343736995423,
"grad_norm": 2.238077402114868,
"learning_rate": 6.952264561909527e-05,
"loss": 41.3963,
"step": 20
},
{
"epoch": 0.006991260923845194,
"grad_norm": 2.1106245517730713,
"learning_rate": 6.942267762053337e-05,
"loss": 41.4235,
"step": 21
},
{
"epoch": 0.007324178110694964,
"grad_norm": 2.1262357234954834,
"learning_rate": 6.931329880561832e-05,
"loss": 41.3771,
"step": 22
},
{
"epoch": 0.007657095297544735,
"grad_norm": 2.1973509788513184,
"learning_rate": 6.919453907742597e-05,
"loss": 41.3827,
"step": 23
},
{
"epoch": 0.007990012484394507,
"grad_norm": 2.2817211151123047,
"learning_rate": 6.90664309036802e-05,
"loss": 41.3684,
"step": 24
},
{
"epoch": 0.008322929671244279,
"grad_norm": 2.1451680660247803,
"learning_rate": 6.892900930787656e-05,
"loss": 41.3657,
"step": 25
},
{
"epoch": 0.008655846858094049,
"grad_norm": 2.2792739868164062,
"learning_rate": 6.87823118597072e-05,
"loss": 41.3401,
"step": 26
},
{
"epoch": 0.008988764044943821,
"grad_norm": 2.215949058532715,
"learning_rate": 6.862637866478969e-05,
"loss": 41.3516,
"step": 27
},
{
"epoch": 0.009321681231793591,
"grad_norm": 2.3315978050231934,
"learning_rate": 6.846125235370252e-05,
"loss": 41.3321,
"step": 28
},
{
"epoch": 0.009654598418643362,
"grad_norm": 2.4486584663391113,
"learning_rate": 6.828697807033038e-05,
"loss": 41.3163,
"step": 29
},
{
"epoch": 0.009987515605493134,
"grad_norm": 2.5066885948181152,
"learning_rate": 6.81036034595222e-05,
"loss": 41.3124,
"step": 30
},
{
"epoch": 0.010320432792342904,
"grad_norm": 2.506309747695923,
"learning_rate": 6.791117865406564e-05,
"loss": 41.2818,
"step": 31
},
{
"epoch": 0.010653349979192676,
"grad_norm": 2.4515650272369385,
"learning_rate": 6.770975626098112e-05,
"loss": 41.2798,
"step": 32
},
{
"epoch": 0.010986267166042446,
"grad_norm": 2.3991332054138184,
"learning_rate": 6.749939134713974e-05,
"loss": 41.283,
"step": 33
},
{
"epoch": 0.011319184352892218,
"grad_norm": 2.327986240386963,
"learning_rate": 6.728014142420846e-05,
"loss": 41.2918,
"step": 34
},
{
"epoch": 0.011652101539741989,
"grad_norm": 2.2945783138275146,
"learning_rate": 6.7052066432927e-05,
"loss": 41.257,
"step": 35
},
{
"epoch": 0.01198501872659176,
"grad_norm": 2.445061445236206,
"learning_rate": 6.681522872672069e-05,
"loss": 41.2448,
"step": 36
},
{
"epoch": 0.012317935913441531,
"grad_norm": 2.3847179412841797,
"learning_rate": 6.656969305465356e-05,
"loss": 41.2389,
"step": 37
},
{
"epoch": 0.012650853100291303,
"grad_norm": 2.1190834045410156,
"learning_rate": 6.631552654372672e-05,
"loss": 41.236,
"step": 38
},
{
"epoch": 0.012983770287141073,
"grad_norm": 2.0182554721832275,
"learning_rate": 6.60527986805264e-05,
"loss": 41.2218,
"step": 39
},
{
"epoch": 0.013316687473990845,
"grad_norm": 2.2660741806030273,
"learning_rate": 6.578158129222711e-05,
"loss": 41.2069,
"step": 40
},
{
"epoch": 0.013649604660840616,
"grad_norm": 2.036485433578491,
"learning_rate": 6.550194852695469e-05,
"loss": 41.2141,
"step": 41
},
{
"epoch": 0.013982521847690388,
"grad_norm": 2.152125597000122,
"learning_rate": 6.521397683351509e-05,
"loss": 41.1895,
"step": 42
},
{
"epoch": 0.014315439034540158,
"grad_norm": 1.9263800382614136,
"learning_rate": 6.491774494049386e-05,
"loss": 41.1751,
"step": 43
},
{
"epoch": 0.014648356221389928,
"grad_norm": 1.9366573095321655,
"learning_rate": 6.461333383473272e-05,
"loss": 41.1583,
"step": 44
},
{
"epoch": 0.0149812734082397,
"grad_norm": 1.9731613397598267,
"learning_rate": 6.430082673918849e-05,
"loss": 41.142,
"step": 45
},
{
"epoch": 0.01531419059508947,
"grad_norm": 1.8909395933151245,
"learning_rate": 6.398030909018069e-05,
"loss": 41.1476,
"step": 46
},
{
"epoch": 0.015647107781939243,
"grad_norm": 1.7645783424377441,
"learning_rate": 6.365186851403423e-05,
"loss": 41.1339,
"step": 47
},
{
"epoch": 0.015980024968789013,
"grad_norm": 1.76289963722229,
"learning_rate": 6.331559480312315e-05,
"loss": 41.1443,
"step": 48
},
{
"epoch": 0.016312942155638784,
"grad_norm": 1.5803886651992798,
"learning_rate": 6.297157989132236e-05,
"loss": 41.1676,
"step": 49
},
{
"epoch": 0.016645859342488557,
"grad_norm": 1.5209460258483887,
"learning_rate": 6.261991782887377e-05,
"loss": 41.1441,
"step": 50
},
{
"epoch": 0.016645859342488557,
"eval_loss": 10.287571907043457,
"eval_runtime": 10.7091,
"eval_samples_per_second": 472.401,
"eval_steps_per_second": 118.124,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 10901565997056.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}