|
{ |
|
"best_metric": 0.0934281200170517, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.004221635883905013, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.443271767810026e-05, |
|
"grad_norm": 36.238685607910156, |
|
"learning_rate": 0.0001, |
|
"loss": 8.6372, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 8.443271767810026e-05, |
|
"eval_loss": 7.14997673034668, |
|
"eval_runtime": 1747.2511, |
|
"eval_samples_per_second": 2.854, |
|
"eval_steps_per_second": 1.427, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016886543535620053, |
|
"grad_norm": 22.111175537109375, |
|
"learning_rate": 0.0002, |
|
"loss": 9.0008, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0002532981530343008, |
|
"grad_norm": 18.092870712280273, |
|
"learning_rate": 0.00019978589232386035, |
|
"loss": 5.7067, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00033773087071240106, |
|
"grad_norm": 13.40740966796875, |
|
"learning_rate": 0.00019914448613738106, |
|
"loss": 2.9529, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00042216358839050134, |
|
"grad_norm": 15.289527893066406, |
|
"learning_rate": 0.00019807852804032305, |
|
"loss": 2.0192, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0005065963060686016, |
|
"grad_norm": 8.689441680908203, |
|
"learning_rate": 0.00019659258262890683, |
|
"loss": 0.9383, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005910290237467018, |
|
"grad_norm": 5.27055549621582, |
|
"learning_rate": 0.0001946930129495106, |
|
"loss": 0.2762, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0006754617414248021, |
|
"grad_norm": 8.563719749450684, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 0.3639, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0007598944591029024, |
|
"grad_norm": 13.948263168334961, |
|
"learning_rate": 0.00018968727415326884, |
|
"loss": 0.4229, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008443271767810027, |
|
"grad_norm": 9.877755165100098, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.5385, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.000928759894459103, |
|
"grad_norm": 4.546455383300781, |
|
"learning_rate": 0.00018314696123025454, |
|
"loss": 0.152, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0010131926121372031, |
|
"grad_norm": 5.590847015380859, |
|
"learning_rate": 0.00017933533402912354, |
|
"loss": 0.3444, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0010976253298153034, |
|
"grad_norm": 4.4191508293151855, |
|
"learning_rate": 0.00017518398074789775, |
|
"loss": 0.2935, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0011820580474934037, |
|
"grad_norm": 10.509808540344238, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.6362, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.001266490765171504, |
|
"grad_norm": 2.2120509147644043, |
|
"learning_rate": 0.00016593458151000688, |
|
"loss": 0.1195, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0013509234828496042, |
|
"grad_norm": 8.435502052307129, |
|
"learning_rate": 0.00016087614290087208, |
|
"loss": 0.3736, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0014353562005277045, |
|
"grad_norm": 2.322937250137329, |
|
"learning_rate": 0.00015555702330196023, |
|
"loss": 0.043, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0015197889182058048, |
|
"grad_norm": 3.382920742034912, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.1894, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001604221635883905, |
|
"grad_norm": 2.115218162536621, |
|
"learning_rate": 0.00014422886902190014, |
|
"loss": 0.0963, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0016886543535620053, |
|
"grad_norm": 3.9554922580718994, |
|
"learning_rate": 0.000138268343236509, |
|
"loss": 0.1499, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0017730870712401056, |
|
"grad_norm": 2.485090732574463, |
|
"learning_rate": 0.00013214394653031616, |
|
"loss": 0.0795, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.001857519788918206, |
|
"grad_norm": 1.8215144872665405, |
|
"learning_rate": 0.00012588190451025207, |
|
"loss": 0.0863, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.001941952506596306, |
|
"grad_norm": 1.6672977209091187, |
|
"learning_rate": 0.00011950903220161285, |
|
"loss": 0.0254, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0020263852242744062, |
|
"grad_norm": 1.2854676246643066, |
|
"learning_rate": 0.00011305261922200519, |
|
"loss": 0.0466, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0021108179419525065, |
|
"grad_norm": 3.5440475940704346, |
|
"learning_rate": 0.00010654031292301432, |
|
"loss": 0.2324, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0021108179419525065, |
|
"eval_loss": 0.23284952342510223, |
|
"eval_runtime": 1757.7264, |
|
"eval_samples_per_second": 2.837, |
|
"eval_steps_per_second": 1.419, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.002195250659630607, |
|
"grad_norm": 4.415905952453613, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2306, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.002279683377308707, |
|
"grad_norm": 2.082292318344116, |
|
"learning_rate": 9.345968707698569e-05, |
|
"loss": 0.0689, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0023641160949868074, |
|
"grad_norm": 0.1635982245206833, |
|
"learning_rate": 8.694738077799488e-05, |
|
"loss": 0.0034, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0024485488126649076, |
|
"grad_norm": 5.064727306365967, |
|
"learning_rate": 8.049096779838719e-05, |
|
"loss": 0.1768, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.002532981530343008, |
|
"grad_norm": 1.381342887878418, |
|
"learning_rate": 7.411809548974792e-05, |
|
"loss": 0.0235, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002617414248021108, |
|
"grad_norm": 2.433394193649292, |
|
"learning_rate": 6.785605346968386e-05, |
|
"loss": 0.0458, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0027018469656992085, |
|
"grad_norm": 0.16443267464637756, |
|
"learning_rate": 6.173165676349103e-05, |
|
"loss": 0.0031, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0027862796833773087, |
|
"grad_norm": 5.821410179138184, |
|
"learning_rate": 5.577113097809989e-05, |
|
"loss": 0.1666, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.002870712401055409, |
|
"grad_norm": 0.13388852775096893, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0039, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0029551451187335093, |
|
"grad_norm": 3.2496159076690674, |
|
"learning_rate": 4.444297669803981e-05, |
|
"loss": 0.2075, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0030395778364116096, |
|
"grad_norm": 0.4470334053039551, |
|
"learning_rate": 3.9123857099127936e-05, |
|
"loss": 0.0083, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.00312401055408971, |
|
"grad_norm": 2.798367977142334, |
|
"learning_rate": 3.406541848999312e-05, |
|
"loss": 0.0599, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.00320844327176781, |
|
"grad_norm": 0.4125634431838989, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 0.0067, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0032928759894459104, |
|
"grad_norm": 5.9901323318481445, |
|
"learning_rate": 2.4816019252102273e-05, |
|
"loss": 0.1536, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0033773087071240107, |
|
"grad_norm": 1.085100769996643, |
|
"learning_rate": 2.0664665970876496e-05, |
|
"loss": 0.0135, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.003461741424802111, |
|
"grad_norm": 8.561752319335938, |
|
"learning_rate": 1.6853038769745467e-05, |
|
"loss": 0.2638, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0035461741424802113, |
|
"grad_norm": 5.0624847412109375, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 0.2491, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0036306068601583115, |
|
"grad_norm": 3.0588884353637695, |
|
"learning_rate": 1.0312725846731175e-05, |
|
"loss": 0.0416, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.003715039577836412, |
|
"grad_norm": 0.11724475026130676, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 0.0022, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.003799472295514512, |
|
"grad_norm": 0.7609636783599854, |
|
"learning_rate": 5.306987050489442e-06, |
|
"loss": 0.0195, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.003883905013192612, |
|
"grad_norm": 0.0424778014421463, |
|
"learning_rate": 3.40741737109318e-06, |
|
"loss": 0.0007, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.003968337730870713, |
|
"grad_norm": 0.03648586571216583, |
|
"learning_rate": 1.921471959676957e-06, |
|
"loss": 0.0007, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0040527704485488125, |
|
"grad_norm": 3.957385301589966, |
|
"learning_rate": 8.555138626189618e-07, |
|
"loss": 0.1444, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.004137203166226913, |
|
"grad_norm": 1.900801420211792, |
|
"learning_rate": 2.141076761396521e-07, |
|
"loss": 0.0386, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.004221635883905013, |
|
"grad_norm": 11.870166778564453, |
|
"learning_rate": 0.0, |
|
"loss": 0.8645, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004221635883905013, |
|
"eval_loss": 0.0934281200170517, |
|
"eval_runtime": 1757.9204, |
|
"eval_samples_per_second": 2.837, |
|
"eval_steps_per_second": 1.419, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.41887283560448e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|