{ "best_metric": 1.2806880474090576, "best_model_checkpoint": "miner_id_24/checkpoint-450", "epoch": 0.021174477696216828, "eval_steps": 50, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.7054394880481836e-05, "eval_loss": 2.291715383529663, "eval_runtime": 1053.328, "eval_samples_per_second": 8.496, "eval_steps_per_second": 2.125, "step": 1 }, { "epoch": 0.0004705439488048184, "grad_norm": 0.5230432152748108, "learning_rate": 4.24e-05, "loss": 2.7622, "step": 10 }, { "epoch": 0.0009410878976096367, "grad_norm": 0.7896382212638855, "learning_rate": 8.48e-05, "loss": 3.1207, "step": 20 }, { "epoch": 0.001411631846414455, "grad_norm": 0.8223176002502441, "learning_rate": 0.0001272, "loss": 3.3159, "step": 30 }, { "epoch": 0.0018821757952192735, "grad_norm": 1.2859474420547485, "learning_rate": 0.0001696, "loss": 2.6552, "step": 40 }, { "epoch": 0.002352719744024092, "grad_norm": 1.6215729713439941, "learning_rate": 0.000212, "loss": 3.2582, "step": 50 }, { "epoch": 0.002352719744024092, "eval_loss": 1.5773438215255737, "eval_runtime": 1052.3497, "eval_samples_per_second": 8.504, "eval_steps_per_second": 2.127, "step": 50 }, { "epoch": 0.00282326369282891, "grad_norm": 0.46075549721717834, "learning_rate": 0.00021174178932754136, "loss": 2.1986, "step": 60 }, { "epoch": 0.0032938076416337285, "grad_norm": 0.6075234413146973, "learning_rate": 0.00021096841528660647, "loss": 2.8469, "step": 70 }, { "epoch": 0.003764351590438547, "grad_norm": 0.7672594785690308, "learning_rate": 0.0002096836456777834, "loss": 3.25, "step": 80 }, { "epoch": 0.004234895539243365, "grad_norm": 0.9351806640625, "learning_rate": 0.00020789373976946182, "loss": 2.4935, "step": 90 }, { "epoch": 0.004705439488048184, "grad_norm": 1.1365292072296143, "learning_rate": 0.0002056074178033063, "loss": 3.0859, "step": 100 }, { "epoch": 0.004705439488048184, "eval_loss": 1.4719538688659668, "eval_runtime": 1052.0449, "eval_samples_per_second": 8.506, "eval_steps_per_second": 2.127, "step": 100 }, { "epoch": 0.005175983436853002, "grad_norm": 0.4256722927093506, "learning_rate": 0.00020283581851011567, "loss": 2.0301, "step": 110 }, { "epoch": 0.00564652738565782, "grad_norm": 0.5135687589645386, "learning_rate": 0.00019959244484304625, "loss": 2.9534, "step": 120 }, { "epoch": 0.006117071334462639, "grad_norm": 0.9111499786376953, "learning_rate": 0.00019589309819258114, "loss": 2.9244, "step": 130 }, { "epoch": 0.006587615283267457, "grad_norm": 0.9945178031921387, "learning_rate": 0.00019175580140374444, "loss": 2.7275, "step": 140 }, { "epoch": 0.007058159232072276, "grad_norm": 1.4719926118850708, "learning_rate": 0.00018720071097061167, "loss": 3.1501, "step": 150 }, { "epoch": 0.007058159232072276, "eval_loss": 1.5417993068695068, "eval_runtime": 1051.4913, "eval_samples_per_second": 8.511, "eval_steps_per_second": 2.128, "step": 150 }, { "epoch": 0.007528703180877094, "grad_norm": 0.5069376230239868, "learning_rate": 0.00018225001883589702, "loss": 2.131, "step": 160 }, { "epoch": 0.007999247129681912, "grad_norm": 0.6495153307914734, "learning_rate": 0.00017692784427403898, "loss": 2.7595, "step": 170 }, { "epoch": 0.00846979107848673, "grad_norm": 0.8190892338752747, "learning_rate": 0.00017126011638451976, "loss": 3.0714, "step": 180 }, { "epoch": 0.00894033502729155, "grad_norm": 1.1729865074157715, "learning_rate": 0.00016527444776789915, "loss": 2.568, "step": 190 }, { "epoch": 0.009410878976096368, "grad_norm": 1.1141877174377441, "learning_rate": 0.00015900000000000002, "loss": 3.0678, "step": 200 }, { "epoch": 0.009410878976096368, "eval_loss": 1.5443755388259888, "eval_runtime": 1050.9688, "eval_samples_per_second": 8.515, "eval_steps_per_second": 2.129, "step": 200 }, { "epoch": 0.009881422924901186, "grad_norm": 0.42763814330101013, "learning_rate": 0.0001524673415596422, "loss": 2.0902, "step": 210 }, { "epoch": 0.010351966873706004, "grad_norm": 0.5467658638954163, "learning_rate": 0.00014570829890208668, "loss": 3.0582, "step": 220 }, { "epoch": 0.010822510822510822, "grad_norm": 0.8129895329475403, "learning_rate": 0.00013875580140374443, "loss": 2.9275, "step": 230 }, { "epoch": 0.01129305477131564, "grad_norm": 1.2014703750610352, "learning_rate": 0.00013164372093356477, "loss": 2.5216, "step": 240 }, { "epoch": 0.01176359872012046, "grad_norm": 1.4856128692626953, "learning_rate": 0.00012440670683269464, "loss": 2.8509, "step": 250 }, { "epoch": 0.01176359872012046, "eval_loss": 1.4362162351608276, "eval_runtime": 1052.317, "eval_samples_per_second": 8.504, "eval_steps_per_second": 2.127, "step": 250 }, { "epoch": 0.012234142668925278, "grad_norm": 0.44174057245254517, "learning_rate": 0.00011708001710637128, "loss": 2.0934, "step": 260 }, { "epoch": 0.012704686617730096, "grad_norm": 0.7299515008926392, "learning_rate": 0.00010969934665046512, "loss": 2.5203, "step": 270 }, { "epoch": 0.013175230566534914, "grad_norm": 0.7253561615943909, "learning_rate": 0.00010230065334953492, "loss": 3.0399, "step": 280 }, { "epoch": 0.013645774515339732, "grad_norm": 1.0471400022506714, "learning_rate": 9.491998289362875e-05, "loss": 2.7184, "step": 290 }, { "epoch": 0.014116318464144552, "grad_norm": 1.5433595180511475, "learning_rate": 8.759329316730539e-05, "loss": 2.949, "step": 300 }, { "epoch": 0.014116318464144552, "eval_loss": 1.3197296857833862, "eval_runtime": 1055.1737, "eval_samples_per_second": 8.481, "eval_steps_per_second": 2.121, "step": 300 }, { "epoch": 0.01458686241294937, "grad_norm": 0.5331817269325256, "learning_rate": 8.035627906643523e-05, "loss": 1.9766, "step": 310 }, { "epoch": 0.015057406361754188, "grad_norm": 0.42347341775894165, "learning_rate": 7.324419859625559e-05, "loss": 2.3662, "step": 320 }, { "epoch": 0.015527950310559006, "grad_norm": 0.7347255349159241, "learning_rate": 6.629170109791332e-05, "loss": 2.7505, "step": 330 }, { "epoch": 0.015998494259363824, "grad_norm": 0.8511791229248047, "learning_rate": 5.9532658440357784e-05, "loss": 2.3891, "step": 340 }, { "epoch": 0.016469038208168644, "grad_norm": 1.1082268953323364, "learning_rate": 5.300000000000002e-05, "loss": 2.7131, "step": 350 }, { "epoch": 0.016469038208168644, "eval_loss": 1.2930166721343994, "eval_runtime": 1050.6728, "eval_samples_per_second": 8.517, "eval_steps_per_second": 2.13, "step": 350 }, { "epoch": 0.01693958215697346, "grad_norm": 0.4141283929347992, "learning_rate": 4.672555223210085e-05, "loss": 1.9025, "step": 360 }, { "epoch": 0.01741012610577828, "grad_norm": 0.4382825195789337, "learning_rate": 4.073988361548022e-05, "loss": 2.5644, "step": 370 }, { "epoch": 0.0178806700545831, "grad_norm": 0.7108925580978394, "learning_rate": 3.507215572596106e-05, "loss": 2.9241, "step": 380 }, { "epoch": 0.018351214003387916, "grad_norm": 0.8783855438232422, "learning_rate": 2.9749981164102997e-05, "loss": 2.4924, "step": 390 }, { "epoch": 0.018821757952192736, "grad_norm": 1.2412166595458984, "learning_rate": 2.479928902938834e-05, "loss": 2.8697, "step": 400 }, { "epoch": 0.018821757952192736, "eval_loss": 1.2847579717636108, "eval_runtime": 1052.7564, "eval_samples_per_second": 8.501, "eval_steps_per_second": 2.126, "step": 400 }, { "epoch": 0.019292301900997552, "grad_norm": 0.3955267369747162, "learning_rate": 2.024419859625558e-05, "loss": 1.9078, "step": 410 }, { "epoch": 0.019762845849802372, "grad_norm": 0.4896032512187958, "learning_rate": 1.610690180741885e-05, "loss": 2.5107, "step": 420 }, { "epoch": 0.020233389798607188, "grad_norm": 0.688621461391449, "learning_rate": 1.240755515695374e-05, "loss": 3.0001, "step": 430 }, { "epoch": 0.020703933747412008, "grad_norm": 0.7938734292984009, "learning_rate": 9.164181489884296e-06, "loss": 2.5139, "step": 440 }, { "epoch": 0.021174477696216828, "grad_norm": 1.0540862083435059, "learning_rate": 6.392582196693718e-06, "loss": 2.7921, "step": 450 }, { "epoch": 0.021174477696216828, "eval_loss": 1.2806880474090576, "eval_runtime": 1054.9988, "eval_samples_per_second": 8.482, "eval_steps_per_second": 2.121, "step": 450 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.9223752445263872e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }