lesso's picture
Training in progress, step 50, checkpoint
3405714 verified
raw
history blame
9.96 kB
{
"best_metric": 0.6652013063430786,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.05042864346949067,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0010085728693898135,
"grad_norm": 1.3238667249679565,
"learning_rate": 1.013e-05,
"loss": 0.928,
"step": 1
},
{
"epoch": 0.0010085728693898135,
"eval_loss": 1.1040126085281372,
"eval_runtime": 101.7212,
"eval_samples_per_second": 4.109,
"eval_steps_per_second": 1.032,
"step": 1
},
{
"epoch": 0.002017145738779627,
"grad_norm": 1.4647407531738281,
"learning_rate": 2.026e-05,
"loss": 1.1291,
"step": 2
},
{
"epoch": 0.0030257186081694403,
"grad_norm": 1.7250173091888428,
"learning_rate": 3.039e-05,
"loss": 1.0937,
"step": 3
},
{
"epoch": 0.004034291477559254,
"grad_norm": 1.4756126403808594,
"learning_rate": 4.052e-05,
"loss": 1.0637,
"step": 4
},
{
"epoch": 0.005042864346949067,
"grad_norm": 1.4971727132797241,
"learning_rate": 5.065e-05,
"loss": 1.1541,
"step": 5
},
{
"epoch": 0.006051437216338881,
"grad_norm": 1.1345500946044922,
"learning_rate": 6.078e-05,
"loss": 0.8789,
"step": 6
},
{
"epoch": 0.0070600100857286935,
"grad_norm": 1.1694258451461792,
"learning_rate": 7.091e-05,
"loss": 0.7942,
"step": 7
},
{
"epoch": 0.008068582955118508,
"grad_norm": 0.9738374948501587,
"learning_rate": 8.104e-05,
"loss": 0.7897,
"step": 8
},
{
"epoch": 0.009077155824508321,
"grad_norm": 1.2070674896240234,
"learning_rate": 9.117e-05,
"loss": 0.8907,
"step": 9
},
{
"epoch": 0.010085728693898134,
"grad_norm": 0.9936267733573914,
"learning_rate": 0.0001013,
"loss": 0.7356,
"step": 10
},
{
"epoch": 0.011094301563287948,
"grad_norm": 0.9822749495506287,
"learning_rate": 0.00010076684210526316,
"loss": 0.787,
"step": 11
},
{
"epoch": 0.012102874432677761,
"grad_norm": 0.9206593632698059,
"learning_rate": 0.0001002336842105263,
"loss": 0.7268,
"step": 12
},
{
"epoch": 0.013111447302067574,
"grad_norm": 0.8007137179374695,
"learning_rate": 9.970052631578946e-05,
"loss": 0.6785,
"step": 13
},
{
"epoch": 0.014120020171457387,
"grad_norm": 0.7873572707176208,
"learning_rate": 9.916736842105263e-05,
"loss": 0.65,
"step": 14
},
{
"epoch": 0.015128593040847202,
"grad_norm": 0.8010468482971191,
"learning_rate": 9.863421052631579e-05,
"loss": 0.6508,
"step": 15
},
{
"epoch": 0.016137165910237016,
"grad_norm": 0.7741969227790833,
"learning_rate": 9.810105263157895e-05,
"loss": 0.5805,
"step": 16
},
{
"epoch": 0.01714573877962683,
"grad_norm": 0.7493349313735962,
"learning_rate": 9.756789473684211e-05,
"loss": 0.564,
"step": 17
},
{
"epoch": 0.018154311649016642,
"grad_norm": 0.9663587212562561,
"learning_rate": 9.703473684210525e-05,
"loss": 0.808,
"step": 18
},
{
"epoch": 0.019162884518406455,
"grad_norm": 0.7793949246406555,
"learning_rate": 9.650157894736842e-05,
"loss": 0.5491,
"step": 19
},
{
"epoch": 0.020171457387796268,
"grad_norm": 0.7455626726150513,
"learning_rate": 9.596842105263158e-05,
"loss": 0.6404,
"step": 20
},
{
"epoch": 0.02118003025718608,
"grad_norm": 0.7438361048698425,
"learning_rate": 9.543526315789474e-05,
"loss": 0.6354,
"step": 21
},
{
"epoch": 0.022188603126575897,
"grad_norm": 0.740470826625824,
"learning_rate": 9.49021052631579e-05,
"loss": 0.5225,
"step": 22
},
{
"epoch": 0.02319717599596571,
"grad_norm": 0.8656465411186218,
"learning_rate": 9.436894736842105e-05,
"loss": 0.7408,
"step": 23
},
{
"epoch": 0.024205748865355523,
"grad_norm": 0.9166726469993591,
"learning_rate": 9.38357894736842e-05,
"loss": 0.6548,
"step": 24
},
{
"epoch": 0.025214321734745335,
"grad_norm": 0.8170962333679199,
"learning_rate": 9.330263157894737e-05,
"loss": 0.654,
"step": 25
},
{
"epoch": 0.026222894604135148,
"grad_norm": 0.7673491835594177,
"learning_rate": 9.276947368421051e-05,
"loss": 0.6428,
"step": 26
},
{
"epoch": 0.02723146747352496,
"grad_norm": 0.8028613924980164,
"learning_rate": 9.223631578947369e-05,
"loss": 0.5204,
"step": 27
},
{
"epoch": 0.028240040342914774,
"grad_norm": 0.8541864156723022,
"learning_rate": 9.170315789473684e-05,
"loss": 0.6858,
"step": 28
},
{
"epoch": 0.02924861321230459,
"grad_norm": 0.8092413544654846,
"learning_rate": 9.117e-05,
"loss": 0.6384,
"step": 29
},
{
"epoch": 0.030257186081694403,
"grad_norm": 0.9186341762542725,
"learning_rate": 9.063684210526316e-05,
"loss": 0.7882,
"step": 30
},
{
"epoch": 0.031265758951084216,
"grad_norm": 0.9697185158729553,
"learning_rate": 9.010368421052632e-05,
"loss": 0.799,
"step": 31
},
{
"epoch": 0.03227433182047403,
"grad_norm": 0.7349818348884583,
"learning_rate": 8.957052631578946e-05,
"loss": 0.6457,
"step": 32
},
{
"epoch": 0.03328290468986384,
"grad_norm": 0.843620240688324,
"learning_rate": 8.903736842105263e-05,
"loss": 0.7368,
"step": 33
},
{
"epoch": 0.03429147755925366,
"grad_norm": 0.8225664496421814,
"learning_rate": 8.850421052631579e-05,
"loss": 0.7159,
"step": 34
},
{
"epoch": 0.03530005042864347,
"grad_norm": 0.9680421352386475,
"learning_rate": 8.797105263157895e-05,
"loss": 0.8112,
"step": 35
},
{
"epoch": 0.036308623298033284,
"grad_norm": 0.9144193530082703,
"learning_rate": 8.743789473684211e-05,
"loss": 0.7829,
"step": 36
},
{
"epoch": 0.03731719616742309,
"grad_norm": 0.8283833265304565,
"learning_rate": 8.690473684210526e-05,
"loss": 0.7398,
"step": 37
},
{
"epoch": 0.03832576903681291,
"grad_norm": 0.7781999707221985,
"learning_rate": 8.637157894736842e-05,
"loss": 0.7255,
"step": 38
},
{
"epoch": 0.039334341906202726,
"grad_norm": 0.7448036074638367,
"learning_rate": 8.583842105263158e-05,
"loss": 0.6482,
"step": 39
},
{
"epoch": 0.040342914775592535,
"grad_norm": 0.7694168090820312,
"learning_rate": 8.530526315789472e-05,
"loss": 0.6282,
"step": 40
},
{
"epoch": 0.04135148764498235,
"grad_norm": 0.9006367325782776,
"learning_rate": 8.47721052631579e-05,
"loss": 0.7293,
"step": 41
},
{
"epoch": 0.04236006051437216,
"grad_norm": 0.9051785469055176,
"learning_rate": 8.423894736842105e-05,
"loss": 0.8256,
"step": 42
},
{
"epoch": 0.04336863338376198,
"grad_norm": 0.9707128405570984,
"learning_rate": 8.37057894736842e-05,
"loss": 0.7974,
"step": 43
},
{
"epoch": 0.044377206253151794,
"grad_norm": 0.885073721408844,
"learning_rate": 8.317263157894737e-05,
"loss": 0.7655,
"step": 44
},
{
"epoch": 0.0453857791225416,
"grad_norm": 0.9013693928718567,
"learning_rate": 8.263947368421053e-05,
"loss": 0.7205,
"step": 45
},
{
"epoch": 0.04639435199193142,
"grad_norm": 1.1316449642181396,
"learning_rate": 8.210631578947368e-05,
"loss": 0.7543,
"step": 46
},
{
"epoch": 0.04740292486132123,
"grad_norm": 0.8760470151901245,
"learning_rate": 8.157315789473684e-05,
"loss": 0.7771,
"step": 47
},
{
"epoch": 0.048411497730711045,
"grad_norm": 1.0497716665267944,
"learning_rate": 8.104e-05,
"loss": 0.8032,
"step": 48
},
{
"epoch": 0.049420070600100854,
"grad_norm": 0.9932529926300049,
"learning_rate": 8.050684210526316e-05,
"loss": 0.7941,
"step": 49
},
{
"epoch": 0.05042864346949067,
"grad_norm": 1.1281291246414185,
"learning_rate": 7.997368421052632e-05,
"loss": 0.845,
"step": 50
},
{
"epoch": 0.05042864346949067,
"eval_loss": 0.6652013063430786,
"eval_runtime": 101.679,
"eval_samples_per_second": 4.111,
"eval_steps_per_second": 1.033,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.357300236897485e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}