{ "best_metric": 0.30705726146698, "best_model_checkpoint": "miner_id_24/checkpoint-450", "epoch": 0.016433853738701727, "eval_steps": 50, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.6519674974892724e-05, "eval_loss": 0.507515549659729, "eval_runtime": 988.3539, "eval_samples_per_second": 11.666, "eval_steps_per_second": 2.917, "step": 1 }, { "epoch": 0.00036519674974892725, "grad_norm": 0.44497770071029663, "learning_rate": 4.36e-05, "loss": 0.1747, "step": 10 }, { "epoch": 0.0007303934994978545, "grad_norm": 0.17814281582832336, "learning_rate": 8.72e-05, "loss": 0.1732, "step": 20 }, { "epoch": 0.0010955902492467817, "grad_norm": 0.42693984508514404, "learning_rate": 0.0001308, "loss": 0.4011, "step": 30 }, { "epoch": 0.001460786998995709, "grad_norm": 0.6188461184501648, "learning_rate": 0.0001744, "loss": 0.4073, "step": 40 }, { "epoch": 0.0018259837487446362, "grad_norm": 1.0257658958435059, "learning_rate": 0.000218, "loss": 0.5956, "step": 50 }, { "epoch": 0.0018259837487446362, "eval_loss": 0.3329148590564728, "eval_runtime": 989.8009, "eval_samples_per_second": 11.649, "eval_steps_per_second": 2.913, "step": 50 }, { "epoch": 0.0021911804984935633, "grad_norm": 0.2114683836698532, "learning_rate": 0.00021773448147832086, "loss": 0.1106, "step": 60 }, { "epoch": 0.0025563772482424907, "grad_norm": 0.24322663247585297, "learning_rate": 0.0002169392194928312, "loss": 0.1584, "step": 70 }, { "epoch": 0.002921573997991418, "grad_norm": 0.2985611855983734, "learning_rate": 0.00021561808847998484, "loss": 0.4166, "step": 80 }, { "epoch": 0.003286770747740345, "grad_norm": 0.4192894399166107, "learning_rate": 0.00021377752485727676, "loss": 0.4593, "step": 90 }, { "epoch": 0.0036519674974892723, "grad_norm": 0.9379873871803284, "learning_rate": 0.00021142649566566402, "loss": 0.5248, "step": 100 }, { "epoch": 0.0036519674974892723, "eval_loss": 0.3307770788669586, "eval_runtime": 989.3733, "eval_samples_per_second": 11.654, "eval_steps_per_second": 2.914, "step": 100 }, { "epoch": 0.0040171642472382, "grad_norm": 0.11100369691848755, "learning_rate": 0.0002085764548830435, "loss": 0.1029, "step": 110 }, { "epoch": 0.004382360996987127, "grad_norm": 0.2935497462749481, "learning_rate": 0.00020524128762162305, "loss": 0.1613, "step": 120 }, { "epoch": 0.004747557746736054, "grad_norm": 0.3621135652065277, "learning_rate": 0.00020143724248105043, "loss": 0.4263, "step": 130 }, { "epoch": 0.005112754496484981, "grad_norm": 0.45805230736732483, "learning_rate": 0.0001971828523868693, "loss": 0.4188, "step": 140 }, { "epoch": 0.005477951246233908, "grad_norm": 0.9291290640830994, "learning_rate": 0.0001924988442999686, "loss": 0.621, "step": 150 }, { "epoch": 0.005477951246233908, "eval_loss": 0.3238024115562439, "eval_runtime": 990.2129, "eval_samples_per_second": 11.644, "eval_steps_per_second": 2.911, "step": 150 }, { "epoch": 0.005843147995982836, "grad_norm": 0.27926528453826904, "learning_rate": 0.00018740803823691298, "loss": 0.133, "step": 160 }, { "epoch": 0.006208344745731763, "grad_norm": 0.34424373507499695, "learning_rate": 0.00018193523609311556, "loss": 0.2335, "step": 170 }, { "epoch": 0.00657354149548069, "grad_norm": 0.18848469853401184, "learning_rate": 0.00017610710081049675, "loss": 0.4478, "step": 180 }, { "epoch": 0.006938738245229618, "grad_norm": 0.5517479181289673, "learning_rate": 0.00016995202647831142, "loss": 0.4419, "step": 190 }, { "epoch": 0.007303934994978545, "grad_norm": 0.9645527601242065, "learning_rate": 0.00016350000000000002, "loss": 0.5241, "step": 200 }, { "epoch": 0.007303934994978545, "eval_loss": 0.3247826099395752, "eval_runtime": 988.6594, "eval_samples_per_second": 11.662, "eval_steps_per_second": 2.916, "step": 200 }, { "epoch": 0.007669131744727472, "grad_norm": 0.17922812700271606, "learning_rate": 0.00015678245500000943, "loss": 0.1349, "step": 210 }, { "epoch": 0.0080343284944764, "grad_norm": 0.4304835796356201, "learning_rate": 0.00014983211868233444, "loss": 0.3056, "step": 220 }, { "epoch": 0.008399525244225327, "grad_norm": 0.41957446932792664, "learning_rate": 0.00014268285238686927, "loss": 0.3908, "step": 230 }, { "epoch": 0.008764721993974253, "grad_norm": 0.5630193948745728, "learning_rate": 0.00013536948662036378, "loss": 0.3997, "step": 240 }, { "epoch": 0.009129918743723181, "grad_norm": 1.16728675365448, "learning_rate": 0.00012792765136569544, "loss": 0.5128, "step": 250 }, { "epoch": 0.009129918743723181, "eval_loss": 0.3218456208705902, "eval_runtime": 989.5873, "eval_samples_per_second": 11.651, "eval_steps_per_second": 2.913, "step": 250 }, { "epoch": 0.009495115493472109, "grad_norm": 0.18547005951404572, "learning_rate": 0.00012039360249617425, "loss": 0.1189, "step": 260 }, { "epoch": 0.009860312243221035, "grad_norm": 0.49629077315330505, "learning_rate": 0.00011280404514057264, "loss": 0.1783, "step": 270 }, { "epoch": 0.010225508992969963, "grad_norm": 0.3257622718811035, "learning_rate": 0.00010519595485942743, "loss": 0.4507, "step": 280 }, { "epoch": 0.01059070574271889, "grad_norm": 0.42767468094825745, "learning_rate": 9.76063975038258e-05, "loss": 0.4914, "step": 290 }, { "epoch": 0.010955902492467817, "grad_norm": 0.7811486124992371, "learning_rate": 9.00723486343046e-05, "loss": 0.4755, "step": 300 }, { "epoch": 0.010955902492467817, "eval_loss": 0.3131124973297119, "eval_runtime": 989.1878, "eval_samples_per_second": 11.656, "eval_steps_per_second": 2.915, "step": 300 }, { "epoch": 0.011321099242216744, "grad_norm": 0.25758835673332214, "learning_rate": 8.263051337963623e-05, "loss": 0.135, "step": 310 }, { "epoch": 0.011686295991965672, "grad_norm": 0.21303024888038635, "learning_rate": 7.531714761313074e-05, "loss": 0.1445, "step": 320 }, { "epoch": 0.012051492741714598, "grad_norm": 0.24839721620082855, "learning_rate": 6.816788131766559e-05, "loss": 0.3938, "step": 330 }, { "epoch": 0.012416689491463526, "grad_norm": 0.49439969658851624, "learning_rate": 6.121754499999055e-05, "loss": 0.4355, "step": 340 }, { "epoch": 0.012781886241212454, "grad_norm": 0.8836405873298645, "learning_rate": 5.450000000000003e-05, "loss": 0.5323, "step": 350 }, { "epoch": 0.012781886241212454, "eval_loss": 0.31057512760162354, "eval_runtime": 989.8141, "eval_samples_per_second": 11.649, "eval_steps_per_second": 2.913, "step": 350 }, { "epoch": 0.01314708299096138, "grad_norm": 0.17097894847393036, "learning_rate": 4.804797352168861e-05, "loss": 0.1068, "step": 360 }, { "epoch": 0.013512279740710308, "grad_norm": 0.1399489939212799, "learning_rate": 4.189289918950325e-05, "loss": 0.1655, "step": 370 }, { "epoch": 0.013877476490459235, "grad_norm": 0.36636313796043396, "learning_rate": 3.606476390688449e-05, "loss": 0.4092, "step": 380 }, { "epoch": 0.014242673240208161, "grad_norm": 0.462443470954895, "learning_rate": 3.0591961763087043e-05, "loss": 0.3231, "step": 390 }, { "epoch": 0.01460786998995709, "grad_norm": 0.8259087204933167, "learning_rate": 2.550115570003141e-05, "loss": 0.5062, "step": 400 }, { "epoch": 0.01460786998995709, "eval_loss": 0.30848589539527893, "eval_runtime": 990.5943, "eval_samples_per_second": 11.639, "eval_steps_per_second": 2.91, "step": 400 }, { "epoch": 0.014973066739706017, "grad_norm": 0.14570221304893494, "learning_rate": 2.081714761313074e-05, "loss": 0.1041, "step": 410 }, { "epoch": 0.015338263489454943, "grad_norm": 0.24520431458950043, "learning_rate": 1.656275751894957e-05, "loss": 0.1525, "step": 420 }, { "epoch": 0.01570346023920387, "grad_norm": 0.35061249136924744, "learning_rate": 1.275871237837696e-05, "loss": 0.5399, "step": 430 }, { "epoch": 0.0160686569889528, "grad_norm": 0.5372488498687744, "learning_rate": 9.423545116956494e-06, "loss": 0.424, "step": 440 }, { "epoch": 0.016433853738701727, "grad_norm": 0.8911713361740112, "learning_rate": 6.573504334335994e-06, "loss": 0.5111, "step": 450 }, { "epoch": 0.016433853738701727, "eval_loss": 0.30705726146698, "eval_runtime": 991.6107, "eval_samples_per_second": 11.628, "eval_steps_per_second": 2.907, "step": 450 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.17832103327957e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }