{ "best_metric": 0.0934281200170517, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.004221635883905013, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.443271767810026e-05, "grad_norm": 36.238685607910156, "learning_rate": 0.0001, "loss": 8.6372, "step": 1 }, { "epoch": 8.443271767810026e-05, "eval_loss": 7.14997673034668, "eval_runtime": 1747.2511, "eval_samples_per_second": 2.854, "eval_steps_per_second": 1.427, "step": 1 }, { "epoch": 0.00016886543535620053, "grad_norm": 22.111175537109375, "learning_rate": 0.0002, "loss": 9.0008, "step": 2 }, { "epoch": 0.0002532981530343008, "grad_norm": 18.092870712280273, "learning_rate": 0.00019978589232386035, "loss": 5.7067, "step": 3 }, { "epoch": 0.00033773087071240106, "grad_norm": 13.40740966796875, "learning_rate": 0.00019914448613738106, "loss": 2.9529, "step": 4 }, { "epoch": 0.00042216358839050134, "grad_norm": 15.289527893066406, "learning_rate": 0.00019807852804032305, "loss": 2.0192, "step": 5 }, { "epoch": 0.0005065963060686016, "grad_norm": 8.689441680908203, "learning_rate": 0.00019659258262890683, "loss": 0.9383, "step": 6 }, { "epoch": 0.0005910290237467018, "grad_norm": 5.27055549621582, "learning_rate": 0.0001946930129495106, "loss": 0.2762, "step": 7 }, { "epoch": 0.0006754617414248021, "grad_norm": 8.563719749450684, "learning_rate": 0.0001923879532511287, "loss": 0.3639, "step": 8 }, { "epoch": 0.0007598944591029024, "grad_norm": 13.948263168334961, "learning_rate": 0.00018968727415326884, "loss": 0.4229, "step": 9 }, { "epoch": 0.0008443271767810027, "grad_norm": 9.877755165100098, "learning_rate": 0.00018660254037844388, "loss": 0.5385, "step": 10 }, { "epoch": 0.000928759894459103, "grad_norm": 4.546455383300781, "learning_rate": 0.00018314696123025454, "loss": 0.152, "step": 11 }, { "epoch": 0.0010131926121372031, "grad_norm": 5.590847015380859, "learning_rate": 0.00017933533402912354, "loss": 0.3444, "step": 12 }, { "epoch": 0.0010976253298153034, "grad_norm": 4.4191508293151855, "learning_rate": 0.00017518398074789775, "loss": 0.2935, "step": 13 }, { "epoch": 0.0011820580474934037, "grad_norm": 10.509808540344238, "learning_rate": 0.00017071067811865476, "loss": 0.6362, "step": 14 }, { "epoch": 0.001266490765171504, "grad_norm": 2.2120509147644043, "learning_rate": 0.00016593458151000688, "loss": 0.1195, "step": 15 }, { "epoch": 0.0013509234828496042, "grad_norm": 8.435502052307129, "learning_rate": 0.00016087614290087208, "loss": 0.3736, "step": 16 }, { "epoch": 0.0014353562005277045, "grad_norm": 2.322937250137329, "learning_rate": 0.00015555702330196023, "loss": 0.043, "step": 17 }, { "epoch": 0.0015197889182058048, "grad_norm": 3.382920742034912, "learning_rate": 0.00015000000000000001, "loss": 0.1894, "step": 18 }, { "epoch": 0.001604221635883905, "grad_norm": 2.115218162536621, "learning_rate": 0.00014422886902190014, "loss": 0.0963, "step": 19 }, { "epoch": 0.0016886543535620053, "grad_norm": 3.9554922580718994, "learning_rate": 0.000138268343236509, "loss": 0.1499, "step": 20 }, { "epoch": 0.0017730870712401056, "grad_norm": 2.485090732574463, "learning_rate": 0.00013214394653031616, "loss": 0.0795, "step": 21 }, { "epoch": 0.001857519788918206, "grad_norm": 1.8215144872665405, "learning_rate": 0.00012588190451025207, "loss": 0.0863, "step": 22 }, { "epoch": 0.001941952506596306, "grad_norm": 1.6672977209091187, "learning_rate": 0.00011950903220161285, "loss": 0.0254, "step": 23 }, { "epoch": 0.0020263852242744062, "grad_norm": 1.2854676246643066, "learning_rate": 0.00011305261922200519, "loss": 0.0466, "step": 24 }, { "epoch": 0.0021108179419525065, "grad_norm": 3.5440475940704346, "learning_rate": 0.00010654031292301432, "loss": 0.2324, "step": 25 }, { "epoch": 0.0021108179419525065, "eval_loss": 0.23284952342510223, "eval_runtime": 1757.7264, "eval_samples_per_second": 2.837, "eval_steps_per_second": 1.419, "step": 25 }, { "epoch": 0.002195250659630607, "grad_norm": 4.415905952453613, "learning_rate": 0.0001, "loss": 0.2306, "step": 26 }, { "epoch": 0.002279683377308707, "grad_norm": 2.082292318344116, "learning_rate": 9.345968707698569e-05, "loss": 0.0689, "step": 27 }, { "epoch": 0.0023641160949868074, "grad_norm": 0.1635982245206833, "learning_rate": 8.694738077799488e-05, "loss": 0.0034, "step": 28 }, { "epoch": 0.0024485488126649076, "grad_norm": 5.064727306365967, "learning_rate": 8.049096779838719e-05, "loss": 0.1768, "step": 29 }, { "epoch": 0.002532981530343008, "grad_norm": 1.381342887878418, "learning_rate": 7.411809548974792e-05, "loss": 0.0235, "step": 30 }, { "epoch": 0.002617414248021108, "grad_norm": 2.433394193649292, "learning_rate": 6.785605346968386e-05, "loss": 0.0458, "step": 31 }, { "epoch": 0.0027018469656992085, "grad_norm": 0.16443267464637756, "learning_rate": 6.173165676349103e-05, "loss": 0.0031, "step": 32 }, { "epoch": 0.0027862796833773087, "grad_norm": 5.821410179138184, "learning_rate": 5.577113097809989e-05, "loss": 0.1666, "step": 33 }, { "epoch": 0.002870712401055409, "grad_norm": 0.13388852775096893, "learning_rate": 5.000000000000002e-05, "loss": 0.0039, "step": 34 }, { "epoch": 0.0029551451187335093, "grad_norm": 3.2496159076690674, "learning_rate": 4.444297669803981e-05, "loss": 0.2075, "step": 35 }, { "epoch": 0.0030395778364116096, "grad_norm": 0.4470334053039551, "learning_rate": 3.9123857099127936e-05, "loss": 0.0083, "step": 36 }, { "epoch": 0.00312401055408971, "grad_norm": 2.798367977142334, "learning_rate": 3.406541848999312e-05, "loss": 0.0599, "step": 37 }, { "epoch": 0.00320844327176781, "grad_norm": 0.4125634431838989, "learning_rate": 2.9289321881345254e-05, "loss": 0.0067, "step": 38 }, { "epoch": 0.0032928759894459104, "grad_norm": 5.9901323318481445, "learning_rate": 2.4816019252102273e-05, "loss": 0.1536, "step": 39 }, { "epoch": 0.0033773087071240107, "grad_norm": 1.085100769996643, "learning_rate": 2.0664665970876496e-05, "loss": 0.0135, "step": 40 }, { "epoch": 0.003461741424802111, "grad_norm": 8.561752319335938, "learning_rate": 1.6853038769745467e-05, "loss": 0.2638, "step": 41 }, { "epoch": 0.0035461741424802113, "grad_norm": 5.0624847412109375, "learning_rate": 1.339745962155613e-05, "loss": 0.2491, "step": 42 }, { "epoch": 0.0036306068601583115, "grad_norm": 3.0588884353637695, "learning_rate": 1.0312725846731175e-05, "loss": 0.0416, "step": 43 }, { "epoch": 0.003715039577836412, "grad_norm": 0.11724475026130676, "learning_rate": 7.612046748871327e-06, "loss": 0.0022, "step": 44 }, { "epoch": 0.003799472295514512, "grad_norm": 0.7609636783599854, "learning_rate": 5.306987050489442e-06, "loss": 0.0195, "step": 45 }, { "epoch": 0.003883905013192612, "grad_norm": 0.0424778014421463, "learning_rate": 3.40741737109318e-06, "loss": 0.0007, "step": 46 }, { "epoch": 0.003968337730870713, "grad_norm": 0.03648586571216583, "learning_rate": 1.921471959676957e-06, "loss": 0.0007, "step": 47 }, { "epoch": 0.0040527704485488125, "grad_norm": 3.957385301589966, "learning_rate": 8.555138626189618e-07, "loss": 0.1444, "step": 48 }, { "epoch": 0.004137203166226913, "grad_norm": 1.900801420211792, "learning_rate": 2.141076761396521e-07, "loss": 0.0386, "step": 49 }, { "epoch": 0.004221635883905013, "grad_norm": 11.870166778564453, "learning_rate": 0.0, "loss": 0.8645, "step": 50 }, { "epoch": 0.004221635883905013, "eval_loss": 0.0934281200170517, "eval_runtime": 1757.9204, "eval_samples_per_second": 2.837, "eval_steps_per_second": 1.419, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.41887283560448e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }