{ "best_metric": 12.254945755004883, "best_model_checkpoint": "miner_id_24/checkpoint-250", "epoch": 1.0, "eval_steps": 50, "global_step": 263, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0038022813688212928, "eval_loss": 12.464061737060547, "eval_runtime": 0.7332, "eval_samples_per_second": 151.386, "eval_steps_per_second": 38.187, "step": 1 }, { "epoch": 0.03802281368821293, "grad_norm": 0.04448171332478523, "learning_rate": 4.2000000000000004e-05, "loss": 12.4533, "step": 10 }, { "epoch": 0.07604562737642585, "grad_norm": 0.0721018984913826, "learning_rate": 8.400000000000001e-05, "loss": 12.4702, "step": 20 }, { "epoch": 0.11406844106463879, "grad_norm": 0.10813530534505844, "learning_rate": 0.000126, "loss": 12.4604, "step": 30 }, { "epoch": 0.1520912547528517, "grad_norm": 0.22024933993816376, "learning_rate": 0.00016800000000000002, "loss": 12.4574, "step": 40 }, { "epoch": 0.19011406844106463, "grad_norm": 0.695553183555603, "learning_rate": 0.00021, "loss": 12.4603, "step": 50 }, { "epoch": 0.19011406844106463, "eval_loss": 12.422882080078125, "eval_runtime": 0.7503, "eval_samples_per_second": 147.933, "eval_steps_per_second": 37.316, "step": 50 }, { "epoch": 0.22813688212927757, "grad_norm": 0.4002653956413269, "learning_rate": 0.00020885998020319223, "loss": 12.4096, "step": 60 }, { "epoch": 0.2661596958174905, "grad_norm": 0.4154040813446045, "learning_rate": 0.00020546467595823764, "loss": 12.3615, "step": 70 }, { "epoch": 0.3041825095057034, "grad_norm": 0.2567123472690582, "learning_rate": 0.00019988781515190642, "loss": 12.3251, "step": 80 }, { "epoch": 0.34220532319391633, "grad_norm": 0.34832563996315, "learning_rate": 0.0001922504974360702, "loss": 12.3082, "step": 90 }, { "epoch": 0.38022813688212925, "grad_norm": 0.854489266872406, "learning_rate": 0.00018271856458959687, "loss": 12.2889, "step": 100 }, { "epoch": 0.38022813688212925, "eval_loss": 12.286394119262695, "eval_runtime": 0.6546, "eval_samples_per_second": 169.571, "eval_steps_per_second": 42.775, "step": 100 }, { "epoch": 0.41825095057034223, "grad_norm": 0.27274471521377563, "learning_rate": 0.00017149899932004494, "loss": 12.3175, "step": 110 }, { "epoch": 0.45627376425855515, "grad_norm": 0.2955954670906067, "learning_rate": 0.00015883543070396424, "loss": 12.2942, "step": 120 }, { "epoch": 0.49429657794676807, "grad_norm": 0.3152942359447479, "learning_rate": 0.00014500284386365127, "loss": 12.2781, "step": 130 }, { "epoch": 0.532319391634981, "grad_norm": 0.4206034541130066, "learning_rate": 0.0001303016087579445, "loss": 12.2801, "step": 140 }, { "epoch": 0.5703422053231939, "grad_norm": 2.542823076248169, "learning_rate": 0.00011505095774985408, "loss": 12.2484, "step": 150 }, { "epoch": 0.5703422053231939, "eval_loss": 12.266809463500977, "eval_runtime": 0.788, "eval_samples_per_second": 140.857, "eval_steps_per_second": 35.531, "step": 150 }, { "epoch": 0.6083650190114068, "grad_norm": 0.258990079164505, "learning_rate": 9.958205358344533e-05, "loss": 12.2947, "step": 160 }, { "epoch": 0.6463878326996197, "grad_norm": 0.3683970272541046, "learning_rate": 8.423079829652068e-05, "loss": 12.2761, "step": 170 }, { "epoch": 0.6844106463878327, "grad_norm": 0.24230748414993286, "learning_rate": 6.933053922113626e-05, "loss": 12.2694, "step": 180 }, { "epoch": 0.7224334600760456, "grad_norm": 0.4169347286224365, "learning_rate": 5.520483045869212e-05, "loss": 12.272, "step": 190 }, { "epoch": 0.7604562737642585, "grad_norm": 0.7109359502792358, "learning_rate": 4.216040701172398e-05, "loss": 12.2407, "step": 200 }, { "epoch": 0.7604562737642585, "eval_loss": 12.255280494689941, "eval_runtime": 0.725, "eval_samples_per_second": 153.099, "eval_steps_per_second": 38.62, "step": 200 }, { "epoch": 0.7984790874524715, "grad_norm": 0.39837583899497986, "learning_rate": 3.048052413675547e-05, "loss": 12.2795, "step": 210 }, { "epoch": 0.8365019011406845, "grad_norm": 0.2841992676258087, "learning_rate": 2.0418806551917254e-05, "loss": 12.2673, "step": 220 }, { "epoch": 0.8745247148288974, "grad_norm": 0.42232921719551086, "learning_rate": 1.2193741061716083e-05, "loss": 12.2493, "step": 230 }, { "epoch": 0.9125475285171103, "grad_norm": 0.4095086455345154, "learning_rate": 5.983932189749497e-06, "loss": 12.2626, "step": 240 }, { "epoch": 0.9505703422053232, "grad_norm": 0.941171407699585, "learning_rate": 1.924223841701947e-06, "loss": 12.2484, "step": 250 }, { "epoch": 0.9505703422053232, "eval_loss": 12.254945755004883, "eval_runtime": 0.7324, "eval_samples_per_second": 151.566, "eval_steps_per_second": 38.233, "step": 250 }, { "epoch": 0.9885931558935361, "grad_norm": 0.38638147711753845, "learning_rate": 1.0277121540277156e-07, "loss": 12.2669, "step": 260 } ], "logging_steps": 10, "max_steps": 263, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 126787780608.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }