|
{ |
|
"best_metric": 1.8225200176239014, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-450", |
|
"epoch": 0.2558271745309835, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005685048322910744, |
|
"eval_loss": 2.708378791809082, |
|
"eval_runtime": 47.2835, |
|
"eval_samples_per_second": 15.671, |
|
"eval_steps_per_second": 3.934, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005685048322910745, |
|
"grad_norm": 2.072335958480835, |
|
"learning_rate": 4.08e-05, |
|
"loss": 2.3259, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01137009664582149, |
|
"grad_norm": 2.634218215942383, |
|
"learning_rate": 8.16e-05, |
|
"loss": 2.141, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017055144968732235, |
|
"grad_norm": 2.5346333980560303, |
|
"learning_rate": 0.0001224, |
|
"loss": 1.9537, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02274019329164298, |
|
"grad_norm": 5.2234578132629395, |
|
"learning_rate": 0.0001632, |
|
"loss": 1.9308, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.028425241614553724, |
|
"grad_norm": 28.278820037841797, |
|
"learning_rate": 0.000204, |
|
"loss": 1.9311, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.028425241614553724, |
|
"eval_loss": 2.325153350830078, |
|
"eval_runtime": 47.0492, |
|
"eval_samples_per_second": 15.749, |
|
"eval_steps_per_second": 3.953, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03411028993746447, |
|
"grad_norm": 1.899110198020935, |
|
"learning_rate": 0.00020375153312650207, |
|
"loss": 2.1988, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.039795338260375214, |
|
"grad_norm": 2.418156385421753, |
|
"learning_rate": 0.00020300734301164017, |
|
"loss": 1.939, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04548038658328596, |
|
"grad_norm": 3.028864860534668, |
|
"learning_rate": 0.00020177105527484818, |
|
"loss": 1.9992, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.051165434906196704, |
|
"grad_norm": 4.271291255950928, |
|
"learning_rate": 0.00020004869298570854, |
|
"loss": 1.8298, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05685048322910745, |
|
"grad_norm": 12.00307846069336, |
|
"learning_rate": 0.00019784864732016265, |
|
"loss": 2.0342, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05685048322910745, |
|
"eval_loss": 2.5892810821533203, |
|
"eval_runtime": 47.1934, |
|
"eval_samples_per_second": 15.701, |
|
"eval_steps_per_second": 3.941, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06253553155201819, |
|
"grad_norm": 1.9848482608795166, |
|
"learning_rate": 0.00019518163667954527, |
|
"loss": 2.3178, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06822057987492894, |
|
"grad_norm": 2.8821191787719727, |
|
"learning_rate": 0.00019206065447161056, |
|
"loss": 2.0378, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07390562819783968, |
|
"grad_norm": 2.8982577323913574, |
|
"learning_rate": 0.00018850090580795544, |
|
"loss": 1.9132, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07959067652075043, |
|
"grad_norm": 4.434988021850586, |
|
"learning_rate": 0.00018451973342624464, |
|
"loss": 2.0179, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08527572484366117, |
|
"grad_norm": 11.088447570800781, |
|
"learning_rate": 0.00018013653319813575, |
|
"loss": 2.1179, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08527572484366117, |
|
"eval_loss": 2.5300779342651367, |
|
"eval_runtime": 47.2175, |
|
"eval_samples_per_second": 15.693, |
|
"eval_steps_per_second": 3.939, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09096077316657192, |
|
"grad_norm": 2.0930769443511963, |
|
"learning_rate": 0.0001753726596345424, |
|
"loss": 2.3273, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09664582148948266, |
|
"grad_norm": 2.696667194366455, |
|
"learning_rate": 0.00017025132184860355, |
|
"loss": 1.9949, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10233086981239341, |
|
"grad_norm": 3.0587522983551025, |
|
"learning_rate": 0.00016479747048321714, |
|
"loss": 1.9261, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10801591813530415, |
|
"grad_norm": 5.556695938110352, |
|
"learning_rate": 0.00015903767615401616, |
|
"loss": 1.7835, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1137009664582149, |
|
"grad_norm": 11.964973449707031, |
|
"learning_rate": 0.000153, |
|
"loss": 1.8545, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1137009664582149, |
|
"eval_loss": 2.2151694297790527, |
|
"eval_runtime": 47.2341, |
|
"eval_samples_per_second": 15.688, |
|
"eval_steps_per_second": 3.938, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11938601478112564, |
|
"grad_norm": 1.8511581420898438, |
|
"learning_rate": 0.0001467138569724859, |
|
"loss": 2.2388, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12507106310403637, |
|
"grad_norm": 2.840690851211548, |
|
"learning_rate": 0.00014020987252842305, |
|
"loss": 2.0782, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13075611142694712, |
|
"grad_norm": 3.4517576694488525, |
|
"learning_rate": 0.00013351973342624464, |
|
"loss": 1.8506, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13644115974985788, |
|
"grad_norm": 3.9618589878082275, |
|
"learning_rate": 0.00012667603335116609, |
|
"loss": 1.7965, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14212620807276863, |
|
"grad_norm": 14.236225128173828, |
|
"learning_rate": 0.00011971211412202691, |
|
"loss": 2.114, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14212620807276863, |
|
"eval_loss": 2.101684093475342, |
|
"eval_runtime": 47.3607, |
|
"eval_samples_per_second": 15.646, |
|
"eval_steps_per_second": 3.927, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14781125639567935, |
|
"grad_norm": 1.728967308998108, |
|
"learning_rate": 0.00011266190325330066, |
|
"loss": 2.2158, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1534963047185901, |
|
"grad_norm": 2.7104427814483643, |
|
"learning_rate": 0.00010555974866365511, |
|
"loss": 1.9438, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15918135304150086, |
|
"grad_norm": 2.974550247192383, |
|
"learning_rate": 9.844025133634492e-05, |
|
"loss": 1.8889, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1648664013644116, |
|
"grad_norm": 4.237817287445068, |
|
"learning_rate": 9.133809674669937e-05, |
|
"loss": 1.8694, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17055144968732233, |
|
"grad_norm": 8.692949295043945, |
|
"learning_rate": 8.428788587797311e-05, |
|
"loss": 1.8763, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17055144968732233, |
|
"eval_loss": 2.04364275932312, |
|
"eval_runtime": 47.3289, |
|
"eval_samples_per_second": 15.656, |
|
"eval_steps_per_second": 3.93, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17623649801023308, |
|
"grad_norm": 2.5650382041931152, |
|
"learning_rate": 7.73239666488339e-05, |
|
"loss": 2.1507, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18192154633314384, |
|
"grad_norm": 2.478137493133545, |
|
"learning_rate": 7.048026657375537e-05, |
|
"loss": 1.975, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1876065946560546, |
|
"grad_norm": 5.501384735107422, |
|
"learning_rate": 6.379012747157697e-05, |
|
"loss": 1.7551, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1932916429789653, |
|
"grad_norm": 4.856377124786377, |
|
"learning_rate": 5.7286143027514095e-05, |
|
"loss": 1.7361, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19897669130187606, |
|
"grad_norm": 6.624533176422119, |
|
"learning_rate": 5.100000000000002e-05, |
|
"loss": 1.8018, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19897669130187606, |
|
"eval_loss": 1.9168442487716675, |
|
"eval_runtime": 47.4834, |
|
"eval_samples_per_second": 15.605, |
|
"eval_steps_per_second": 3.917, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.20466173962478681, |
|
"grad_norm": 1.6058367490768433, |
|
"learning_rate": 4.496232384598384e-05, |
|
"loss": 2.2083, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21034678794769757, |
|
"grad_norm": 2.221337080001831, |
|
"learning_rate": 3.9202529516782854e-05, |
|
"loss": 1.9719, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2160318362706083, |
|
"grad_norm": 3.105900764465332, |
|
"learning_rate": 3.374867815139649e-05, |
|
"loss": 1.7793, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22171688459351904, |
|
"grad_norm": 3.6795544624328613, |
|
"learning_rate": 2.8627340365457602e-05, |
|
"loss": 1.72, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2274019329164298, |
|
"grad_norm": 11.990936279296875, |
|
"learning_rate": 2.3863466801864254e-05, |
|
"loss": 1.8091, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2274019329164298, |
|
"eval_loss": 1.850091576576233, |
|
"eval_runtime": 47.2393, |
|
"eval_samples_per_second": 15.686, |
|
"eval_steps_per_second": 3.937, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23308698123934055, |
|
"grad_norm": 1.6314740180969238, |
|
"learning_rate": 1.9480266573755372e-05, |
|
"loss": 2.1536, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.23877202956225127, |
|
"grad_norm": 2.274401903152466, |
|
"learning_rate": 1.5499094192044554e-05, |
|
"loss": 2.0182, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24445707788516202, |
|
"grad_norm": 2.7430739402770996, |
|
"learning_rate": 1.1939345528389446e-05, |
|
"loss": 1.6865, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.25014212620807275, |
|
"grad_norm": 3.536646842956543, |
|
"learning_rate": 8.818363320454701e-06, |
|
"loss": 1.8244, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2558271745309835, |
|
"grad_norm": 26.51901626586914, |
|
"learning_rate": 6.1513526798373514e-06, |
|
"loss": 2.0217, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2558271745309835, |
|
"eval_loss": 1.8225200176239014, |
|
"eval_runtime": 47.5365, |
|
"eval_samples_per_second": 15.588, |
|
"eval_steps_per_second": 3.913, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.711665917257318e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|