|
{ |
|
"best_metric": 3.116821765899658, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.12903225806451613, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0025806451612903226, |
|
"grad_norm": 2.02890944480896, |
|
"learning_rate": 1.007e-05, |
|
"loss": 3.2543, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0025806451612903226, |
|
"eval_loss": 3.4940507411956787, |
|
"eval_runtime": 39.1123, |
|
"eval_samples_per_second": 4.167, |
|
"eval_steps_per_second": 1.048, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005161290322580645, |
|
"grad_norm": 2.1640987396240234, |
|
"learning_rate": 2.014e-05, |
|
"loss": 3.1859, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007741935483870968, |
|
"grad_norm": 2.289116859436035, |
|
"learning_rate": 3.0209999999999997e-05, |
|
"loss": 3.1102, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01032258064516129, |
|
"grad_norm": 1.9029440879821777, |
|
"learning_rate": 4.028e-05, |
|
"loss": 3.1912, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.012903225806451613, |
|
"grad_norm": 1.5423721075057983, |
|
"learning_rate": 5.035e-05, |
|
"loss": 3.3264, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.015483870967741935, |
|
"grad_norm": 1.1242539882659912, |
|
"learning_rate": 6.0419999999999994e-05, |
|
"loss": 2.9698, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01806451612903226, |
|
"grad_norm": 1.6809178590774536, |
|
"learning_rate": 7.049e-05, |
|
"loss": 2.8629, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02064516129032258, |
|
"grad_norm": 1.3089873790740967, |
|
"learning_rate": 8.056e-05, |
|
"loss": 2.9564, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.023225806451612905, |
|
"grad_norm": 0.9860974550247192, |
|
"learning_rate": 9.062999999999999e-05, |
|
"loss": 3.0032, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.025806451612903226, |
|
"grad_norm": 0.9577404856681824, |
|
"learning_rate": 0.0001007, |
|
"loss": 2.7733, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02838709677419355, |
|
"grad_norm": 1.073362946510315, |
|
"learning_rate": 0.00010017, |
|
"loss": 3.0293, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03096774193548387, |
|
"grad_norm": 0.9577970504760742, |
|
"learning_rate": 9.963999999999999e-05, |
|
"loss": 3.0549, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03354838709677419, |
|
"grad_norm": 0.8062331676483154, |
|
"learning_rate": 9.910999999999999e-05, |
|
"loss": 2.9548, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03612903225806452, |
|
"grad_norm": 0.8570857644081116, |
|
"learning_rate": 9.858e-05, |
|
"loss": 3.1505, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03870967741935484, |
|
"grad_norm": 0.9095686674118042, |
|
"learning_rate": 9.805e-05, |
|
"loss": 2.8758, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04129032258064516, |
|
"grad_norm": 0.8106794953346252, |
|
"learning_rate": 9.752e-05, |
|
"loss": 2.9163, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04387096774193548, |
|
"grad_norm": 0.8135427832603455, |
|
"learning_rate": 9.698999999999999e-05, |
|
"loss": 3.1537, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04645161290322581, |
|
"grad_norm": 0.773794412612915, |
|
"learning_rate": 9.646e-05, |
|
"loss": 2.9923, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04903225806451613, |
|
"grad_norm": 0.776435911655426, |
|
"learning_rate": 9.593e-05, |
|
"loss": 2.994, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05161290322580645, |
|
"grad_norm": 0.702139139175415, |
|
"learning_rate": 9.539999999999999e-05, |
|
"loss": 2.8807, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05419354838709677, |
|
"grad_norm": 0.6850553750991821, |
|
"learning_rate": 9.487e-05, |
|
"loss": 3.0033, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0567741935483871, |
|
"grad_norm": 0.6869837045669556, |
|
"learning_rate": 9.434e-05, |
|
"loss": 2.7906, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05935483870967742, |
|
"grad_norm": 0.7767460942268372, |
|
"learning_rate": 9.381e-05, |
|
"loss": 2.8578, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06193548387096774, |
|
"grad_norm": 0.747832179069519, |
|
"learning_rate": 9.327999999999999e-05, |
|
"loss": 2.9695, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 0.7731716632843018, |
|
"learning_rate": 9.274999999999999e-05, |
|
"loss": 2.9062, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06709677419354838, |
|
"grad_norm": 0.8283132910728455, |
|
"learning_rate": 9.222e-05, |
|
"loss": 3.1388, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0696774193548387, |
|
"grad_norm": 0.8168233036994934, |
|
"learning_rate": 9.169e-05, |
|
"loss": 3.0032, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07225806451612904, |
|
"grad_norm": 0.7814300060272217, |
|
"learning_rate": 9.116e-05, |
|
"loss": 3.0617, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07483870967741936, |
|
"grad_norm": 0.8907764554023743, |
|
"learning_rate": 9.062999999999999e-05, |
|
"loss": 2.8873, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07741935483870968, |
|
"grad_norm": 0.897400975227356, |
|
"learning_rate": 9.01e-05, |
|
"loss": 3.0865, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.916833758354187, |
|
"learning_rate": 8.957e-05, |
|
"loss": 2.9955, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08258064516129032, |
|
"grad_norm": 0.9079581499099731, |
|
"learning_rate": 8.903999999999999e-05, |
|
"loss": 2.9475, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08516129032258064, |
|
"grad_norm": 1.2848162651062012, |
|
"learning_rate": 8.850999999999999e-05, |
|
"loss": 3.0854, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08774193548387096, |
|
"grad_norm": 1.0301451683044434, |
|
"learning_rate": 8.798e-05, |
|
"loss": 3.2001, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09032258064516129, |
|
"grad_norm": 0.9421987533569336, |
|
"learning_rate": 8.745e-05, |
|
"loss": 2.7962, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09290322580645162, |
|
"grad_norm": 1.2306110858917236, |
|
"learning_rate": 8.692e-05, |
|
"loss": 3.1929, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09548387096774194, |
|
"grad_norm": 1.1693624258041382, |
|
"learning_rate": 8.638999999999999e-05, |
|
"loss": 3.1043, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09806451612903226, |
|
"grad_norm": 1.169491171836853, |
|
"learning_rate": 8.586e-05, |
|
"loss": 2.7704, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10064516129032258, |
|
"grad_norm": 1.1204756498336792, |
|
"learning_rate": 8.533e-05, |
|
"loss": 3.2268, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1032258064516129, |
|
"grad_norm": 1.1709730625152588, |
|
"learning_rate": 8.479999999999999e-05, |
|
"loss": 2.9685, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10580645161290322, |
|
"grad_norm": 1.2603025436401367, |
|
"learning_rate": 8.427e-05, |
|
"loss": 2.9147, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10838709677419354, |
|
"grad_norm": 1.5371952056884766, |
|
"learning_rate": 8.374e-05, |
|
"loss": 2.9618, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11096774193548387, |
|
"grad_norm": 1.4978915452957153, |
|
"learning_rate": 8.321e-05, |
|
"loss": 3.0561, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1135483870967742, |
|
"grad_norm": 1.8759700059890747, |
|
"learning_rate": 8.268e-05, |
|
"loss": 3.5141, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11612903225806452, |
|
"grad_norm": 1.6922487020492554, |
|
"learning_rate": 8.214999999999999e-05, |
|
"loss": 3.229, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11870967741935484, |
|
"grad_norm": 1.9749841690063477, |
|
"learning_rate": 8.162e-05, |
|
"loss": 3.2539, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12129032258064516, |
|
"grad_norm": 2.2926204204559326, |
|
"learning_rate": 8.108999999999998e-05, |
|
"loss": 3.1085, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12387096774193548, |
|
"grad_norm": 4.02115535736084, |
|
"learning_rate": 8.056e-05, |
|
"loss": 3.1481, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12645161290322582, |
|
"grad_norm": 4.62841272354126, |
|
"learning_rate": 8.003e-05, |
|
"loss": 3.7727, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 6.652851581573486, |
|
"learning_rate": 7.95e-05, |
|
"loss": 3.9126, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"eval_loss": 3.116821765899658, |
|
"eval_runtime": 38.3432, |
|
"eval_samples_per_second": 4.251, |
|
"eval_steps_per_second": 1.069, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.324290386617958e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|