{ "best_metric": 0.6652013063430786, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.05042864346949067, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010085728693898135, "grad_norm": 1.3238667249679565, "learning_rate": 1.013e-05, "loss": 0.928, "step": 1 }, { "epoch": 0.0010085728693898135, "eval_loss": 1.1040126085281372, "eval_runtime": 101.7212, "eval_samples_per_second": 4.109, "eval_steps_per_second": 1.032, "step": 1 }, { "epoch": 0.002017145738779627, "grad_norm": 1.4647407531738281, "learning_rate": 2.026e-05, "loss": 1.1291, "step": 2 }, { "epoch": 0.0030257186081694403, "grad_norm": 1.7250173091888428, "learning_rate": 3.039e-05, "loss": 1.0937, "step": 3 }, { "epoch": 0.004034291477559254, "grad_norm": 1.4756126403808594, "learning_rate": 4.052e-05, "loss": 1.0637, "step": 4 }, { "epoch": 0.005042864346949067, "grad_norm": 1.4971727132797241, "learning_rate": 5.065e-05, "loss": 1.1541, "step": 5 }, { "epoch": 0.006051437216338881, "grad_norm": 1.1345500946044922, "learning_rate": 6.078e-05, "loss": 0.8789, "step": 6 }, { "epoch": 0.0070600100857286935, "grad_norm": 1.1694258451461792, "learning_rate": 7.091e-05, "loss": 0.7942, "step": 7 }, { "epoch": 0.008068582955118508, "grad_norm": 0.9738374948501587, "learning_rate": 8.104e-05, "loss": 0.7897, "step": 8 }, { "epoch": 0.009077155824508321, "grad_norm": 1.2070674896240234, "learning_rate": 9.117e-05, "loss": 0.8907, "step": 9 }, { "epoch": 0.010085728693898134, "grad_norm": 0.9936267733573914, "learning_rate": 0.0001013, "loss": 0.7356, "step": 10 }, { "epoch": 0.011094301563287948, "grad_norm": 0.9822749495506287, "learning_rate": 0.00010076684210526316, "loss": 0.787, "step": 11 }, { "epoch": 0.012102874432677761, "grad_norm": 0.9206593632698059, "learning_rate": 0.0001002336842105263, "loss": 0.7268, "step": 12 }, { "epoch": 0.013111447302067574, "grad_norm": 0.8007137179374695, "learning_rate": 9.970052631578946e-05, "loss": 0.6785, "step": 13 }, { "epoch": 0.014120020171457387, "grad_norm": 0.7873572707176208, "learning_rate": 9.916736842105263e-05, "loss": 0.65, "step": 14 }, { "epoch": 0.015128593040847202, "grad_norm": 0.8010468482971191, "learning_rate": 9.863421052631579e-05, "loss": 0.6508, "step": 15 }, { "epoch": 0.016137165910237016, "grad_norm": 0.7741969227790833, "learning_rate": 9.810105263157895e-05, "loss": 0.5805, "step": 16 }, { "epoch": 0.01714573877962683, "grad_norm": 0.7493349313735962, "learning_rate": 9.756789473684211e-05, "loss": 0.564, "step": 17 }, { "epoch": 0.018154311649016642, "grad_norm": 0.9663587212562561, "learning_rate": 9.703473684210525e-05, "loss": 0.808, "step": 18 }, { "epoch": 0.019162884518406455, "grad_norm": 0.7793949246406555, "learning_rate": 9.650157894736842e-05, "loss": 0.5491, "step": 19 }, { "epoch": 0.020171457387796268, "grad_norm": 0.7455626726150513, "learning_rate": 9.596842105263158e-05, "loss": 0.6404, "step": 20 }, { "epoch": 0.02118003025718608, "grad_norm": 0.7438361048698425, "learning_rate": 9.543526315789474e-05, "loss": 0.6354, "step": 21 }, { "epoch": 0.022188603126575897, "grad_norm": 0.740470826625824, "learning_rate": 9.49021052631579e-05, "loss": 0.5225, "step": 22 }, { "epoch": 0.02319717599596571, "grad_norm": 0.8656465411186218, "learning_rate": 9.436894736842105e-05, "loss": 0.7408, "step": 23 }, { "epoch": 0.024205748865355523, "grad_norm": 0.9166726469993591, "learning_rate": 9.38357894736842e-05, "loss": 0.6548, "step": 24 }, { "epoch": 0.025214321734745335, "grad_norm": 0.8170962333679199, "learning_rate": 9.330263157894737e-05, "loss": 0.654, "step": 25 }, { "epoch": 0.026222894604135148, "grad_norm": 0.7673491835594177, "learning_rate": 9.276947368421051e-05, "loss": 0.6428, "step": 26 }, { "epoch": 0.02723146747352496, "grad_norm": 0.8028613924980164, "learning_rate": 9.223631578947369e-05, "loss": 0.5204, "step": 27 }, { "epoch": 0.028240040342914774, "grad_norm": 0.8541864156723022, "learning_rate": 9.170315789473684e-05, "loss": 0.6858, "step": 28 }, { "epoch": 0.02924861321230459, "grad_norm": 0.8092413544654846, "learning_rate": 9.117e-05, "loss": 0.6384, "step": 29 }, { "epoch": 0.030257186081694403, "grad_norm": 0.9186341762542725, "learning_rate": 9.063684210526316e-05, "loss": 0.7882, "step": 30 }, { "epoch": 0.031265758951084216, "grad_norm": 0.9697185158729553, "learning_rate": 9.010368421052632e-05, "loss": 0.799, "step": 31 }, { "epoch": 0.03227433182047403, "grad_norm": 0.7349818348884583, "learning_rate": 8.957052631578946e-05, "loss": 0.6457, "step": 32 }, { "epoch": 0.03328290468986384, "grad_norm": 0.843620240688324, "learning_rate": 8.903736842105263e-05, "loss": 0.7368, "step": 33 }, { "epoch": 0.03429147755925366, "grad_norm": 0.8225664496421814, "learning_rate": 8.850421052631579e-05, "loss": 0.7159, "step": 34 }, { "epoch": 0.03530005042864347, "grad_norm": 0.9680421352386475, "learning_rate": 8.797105263157895e-05, "loss": 0.8112, "step": 35 }, { "epoch": 0.036308623298033284, "grad_norm": 0.9144193530082703, "learning_rate": 8.743789473684211e-05, "loss": 0.7829, "step": 36 }, { "epoch": 0.03731719616742309, "grad_norm": 0.8283833265304565, "learning_rate": 8.690473684210526e-05, "loss": 0.7398, "step": 37 }, { "epoch": 0.03832576903681291, "grad_norm": 0.7781999707221985, "learning_rate": 8.637157894736842e-05, "loss": 0.7255, "step": 38 }, { "epoch": 0.039334341906202726, "grad_norm": 0.7448036074638367, "learning_rate": 8.583842105263158e-05, "loss": 0.6482, "step": 39 }, { "epoch": 0.040342914775592535, "grad_norm": 0.7694168090820312, "learning_rate": 8.530526315789472e-05, "loss": 0.6282, "step": 40 }, { "epoch": 0.04135148764498235, "grad_norm": 0.9006367325782776, "learning_rate": 8.47721052631579e-05, "loss": 0.7293, "step": 41 }, { "epoch": 0.04236006051437216, "grad_norm": 0.9051785469055176, "learning_rate": 8.423894736842105e-05, "loss": 0.8256, "step": 42 }, { "epoch": 0.04336863338376198, "grad_norm": 0.9707128405570984, "learning_rate": 8.37057894736842e-05, "loss": 0.7974, "step": 43 }, { "epoch": 0.044377206253151794, "grad_norm": 0.885073721408844, "learning_rate": 8.317263157894737e-05, "loss": 0.7655, "step": 44 }, { "epoch": 0.0453857791225416, "grad_norm": 0.9013693928718567, "learning_rate": 8.263947368421053e-05, "loss": 0.7205, "step": 45 }, { "epoch": 0.04639435199193142, "grad_norm": 1.1316449642181396, "learning_rate": 8.210631578947368e-05, "loss": 0.7543, "step": 46 }, { "epoch": 0.04740292486132123, "grad_norm": 0.8760470151901245, "learning_rate": 8.157315789473684e-05, "loss": 0.7771, "step": 47 }, { "epoch": 0.048411497730711045, "grad_norm": 1.0497716665267944, "learning_rate": 8.104e-05, "loss": 0.8032, "step": 48 }, { "epoch": 0.049420070600100854, "grad_norm": 0.9932529926300049, "learning_rate": 8.050684210526316e-05, "loss": 0.7941, "step": 49 }, { "epoch": 0.05042864346949067, "grad_norm": 1.1281291246414185, "learning_rate": 7.997368421052632e-05, "loss": 0.845, "step": 50 }, { "epoch": 0.05042864346949067, "eval_loss": 0.6652013063430786, "eval_runtime": 101.679, "eval_samples_per_second": 4.111, "eval_steps_per_second": 1.033, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.357300236897485e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }