{ "best_metric": 3.116821765899658, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.12903225806451613, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025806451612903226, "grad_norm": 2.02890944480896, "learning_rate": 1.007e-05, "loss": 3.2543, "step": 1 }, { "epoch": 0.0025806451612903226, "eval_loss": 3.4940507411956787, "eval_runtime": 39.1123, "eval_samples_per_second": 4.167, "eval_steps_per_second": 1.048, "step": 1 }, { "epoch": 0.005161290322580645, "grad_norm": 2.1640987396240234, "learning_rate": 2.014e-05, "loss": 3.1859, "step": 2 }, { "epoch": 0.007741935483870968, "grad_norm": 2.289116859436035, "learning_rate": 3.0209999999999997e-05, "loss": 3.1102, "step": 3 }, { "epoch": 0.01032258064516129, "grad_norm": 1.9029440879821777, "learning_rate": 4.028e-05, "loss": 3.1912, "step": 4 }, { "epoch": 0.012903225806451613, "grad_norm": 1.5423721075057983, "learning_rate": 5.035e-05, "loss": 3.3264, "step": 5 }, { "epoch": 0.015483870967741935, "grad_norm": 1.1242539882659912, "learning_rate": 6.0419999999999994e-05, "loss": 2.9698, "step": 6 }, { "epoch": 0.01806451612903226, "grad_norm": 1.6809178590774536, "learning_rate": 7.049e-05, "loss": 2.8629, "step": 7 }, { "epoch": 0.02064516129032258, "grad_norm": 1.3089873790740967, "learning_rate": 8.056e-05, "loss": 2.9564, "step": 8 }, { "epoch": 0.023225806451612905, "grad_norm": 0.9860974550247192, "learning_rate": 9.062999999999999e-05, "loss": 3.0032, "step": 9 }, { "epoch": 0.025806451612903226, "grad_norm": 0.9577404856681824, "learning_rate": 0.0001007, "loss": 2.7733, "step": 10 }, { "epoch": 0.02838709677419355, "grad_norm": 1.073362946510315, "learning_rate": 0.00010017, "loss": 3.0293, "step": 11 }, { "epoch": 0.03096774193548387, "grad_norm": 0.9577970504760742, "learning_rate": 9.963999999999999e-05, "loss": 3.0549, "step": 12 }, { "epoch": 0.03354838709677419, "grad_norm": 0.8062331676483154, "learning_rate": 9.910999999999999e-05, "loss": 2.9548, "step": 13 }, { "epoch": 0.03612903225806452, "grad_norm": 0.8570857644081116, "learning_rate": 9.858e-05, "loss": 3.1505, "step": 14 }, { "epoch": 0.03870967741935484, "grad_norm": 0.9095686674118042, "learning_rate": 9.805e-05, "loss": 2.8758, "step": 15 }, { "epoch": 0.04129032258064516, "grad_norm": 0.8106794953346252, "learning_rate": 9.752e-05, "loss": 2.9163, "step": 16 }, { "epoch": 0.04387096774193548, "grad_norm": 0.8135427832603455, "learning_rate": 9.698999999999999e-05, "loss": 3.1537, "step": 17 }, { "epoch": 0.04645161290322581, "grad_norm": 0.773794412612915, "learning_rate": 9.646e-05, "loss": 2.9923, "step": 18 }, { "epoch": 0.04903225806451613, "grad_norm": 0.776435911655426, "learning_rate": 9.593e-05, "loss": 2.994, "step": 19 }, { "epoch": 0.05161290322580645, "grad_norm": 0.702139139175415, "learning_rate": 9.539999999999999e-05, "loss": 2.8807, "step": 20 }, { "epoch": 0.05419354838709677, "grad_norm": 0.6850553750991821, "learning_rate": 9.487e-05, "loss": 3.0033, "step": 21 }, { "epoch": 0.0567741935483871, "grad_norm": 0.6869837045669556, "learning_rate": 9.434e-05, "loss": 2.7906, "step": 22 }, { "epoch": 0.05935483870967742, "grad_norm": 0.7767460942268372, "learning_rate": 9.381e-05, "loss": 2.8578, "step": 23 }, { "epoch": 0.06193548387096774, "grad_norm": 0.747832179069519, "learning_rate": 9.327999999999999e-05, "loss": 2.9695, "step": 24 }, { "epoch": 0.06451612903225806, "grad_norm": 0.7731716632843018, "learning_rate": 9.274999999999999e-05, "loss": 2.9062, "step": 25 }, { "epoch": 0.06709677419354838, "grad_norm": 0.8283132910728455, "learning_rate": 9.222e-05, "loss": 3.1388, "step": 26 }, { "epoch": 0.0696774193548387, "grad_norm": 0.8168233036994934, "learning_rate": 9.169e-05, "loss": 3.0032, "step": 27 }, { "epoch": 0.07225806451612904, "grad_norm": 0.7814300060272217, "learning_rate": 9.116e-05, "loss": 3.0617, "step": 28 }, { "epoch": 0.07483870967741936, "grad_norm": 0.8907764554023743, "learning_rate": 9.062999999999999e-05, "loss": 2.8873, "step": 29 }, { "epoch": 0.07741935483870968, "grad_norm": 0.897400975227356, "learning_rate": 9.01e-05, "loss": 3.0865, "step": 30 }, { "epoch": 0.08, "grad_norm": 0.916833758354187, "learning_rate": 8.957e-05, "loss": 2.9955, "step": 31 }, { "epoch": 0.08258064516129032, "grad_norm": 0.9079581499099731, "learning_rate": 8.903999999999999e-05, "loss": 2.9475, "step": 32 }, { "epoch": 0.08516129032258064, "grad_norm": 1.2848162651062012, "learning_rate": 8.850999999999999e-05, "loss": 3.0854, "step": 33 }, { "epoch": 0.08774193548387096, "grad_norm": 1.0301451683044434, "learning_rate": 8.798e-05, "loss": 3.2001, "step": 34 }, { "epoch": 0.09032258064516129, "grad_norm": 0.9421987533569336, "learning_rate": 8.745e-05, "loss": 2.7962, "step": 35 }, { "epoch": 0.09290322580645162, "grad_norm": 1.2306110858917236, "learning_rate": 8.692e-05, "loss": 3.1929, "step": 36 }, { "epoch": 0.09548387096774194, "grad_norm": 1.1693624258041382, "learning_rate": 8.638999999999999e-05, "loss": 3.1043, "step": 37 }, { "epoch": 0.09806451612903226, "grad_norm": 1.169491171836853, "learning_rate": 8.586e-05, "loss": 2.7704, "step": 38 }, { "epoch": 0.10064516129032258, "grad_norm": 1.1204756498336792, "learning_rate": 8.533e-05, "loss": 3.2268, "step": 39 }, { "epoch": 0.1032258064516129, "grad_norm": 1.1709730625152588, "learning_rate": 8.479999999999999e-05, "loss": 2.9685, "step": 40 }, { "epoch": 0.10580645161290322, "grad_norm": 1.2603025436401367, "learning_rate": 8.427e-05, "loss": 2.9147, "step": 41 }, { "epoch": 0.10838709677419354, "grad_norm": 1.5371952056884766, "learning_rate": 8.374e-05, "loss": 2.9618, "step": 42 }, { "epoch": 0.11096774193548387, "grad_norm": 1.4978915452957153, "learning_rate": 8.321e-05, "loss": 3.0561, "step": 43 }, { "epoch": 0.1135483870967742, "grad_norm": 1.8759700059890747, "learning_rate": 8.268e-05, "loss": 3.5141, "step": 44 }, { "epoch": 0.11612903225806452, "grad_norm": 1.6922487020492554, "learning_rate": 8.214999999999999e-05, "loss": 3.229, "step": 45 }, { "epoch": 0.11870967741935484, "grad_norm": 1.9749841690063477, "learning_rate": 8.162e-05, "loss": 3.2539, "step": 46 }, { "epoch": 0.12129032258064516, "grad_norm": 2.2926204204559326, "learning_rate": 8.108999999999998e-05, "loss": 3.1085, "step": 47 }, { "epoch": 0.12387096774193548, "grad_norm": 4.02115535736084, "learning_rate": 8.056e-05, "loss": 3.1481, "step": 48 }, { "epoch": 0.12645161290322582, "grad_norm": 4.62841272354126, "learning_rate": 8.003e-05, "loss": 3.7727, "step": 49 }, { "epoch": 0.12903225806451613, "grad_norm": 6.652851581573486, "learning_rate": 7.95e-05, "loss": 3.9126, "step": 50 }, { "epoch": 0.12903225806451613, "eval_loss": 3.116821765899658, "eval_runtime": 38.3432, "eval_samples_per_second": 4.251, "eval_steps_per_second": 1.069, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.324290386617958e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }