{ "best_metric": 1.2906723022460938, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.01271304278733463, "eval_steps": 100, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001271304278733463, "grad_norm": 0.5326213836669922, "learning_rate": 2e-05, "loss": 1.7856, "step": 1 }, { "epoch": 0.0001271304278733463, "eval_loss": 1.7346340417861938, "eval_runtime": 1249.3361, "eval_samples_per_second": 4.002, "eval_steps_per_second": 1.001, "step": 1 }, { "epoch": 0.0002542608557466926, "grad_norm": 0.5624520778656006, "learning_rate": 4e-05, "loss": 1.7626, "step": 2 }, { "epoch": 0.00038139128362003893, "grad_norm": 0.5890633463859558, "learning_rate": 6e-05, "loss": 1.739, "step": 3 }, { "epoch": 0.0005085217114933852, "grad_norm": 0.5437400937080383, "learning_rate": 8e-05, "loss": 1.6671, "step": 4 }, { "epoch": 0.0006356521393667316, "grad_norm": 0.6639446020126343, "learning_rate": 0.0001, "loss": 1.7684, "step": 5 }, { "epoch": 0.0007627825672400779, "grad_norm": 0.7031175494194031, "learning_rate": 0.00012, "loss": 1.7247, "step": 6 }, { "epoch": 0.0008899129951134241, "grad_norm": 0.5311002731323242, "learning_rate": 0.00014, "loss": 1.6195, "step": 7 }, { "epoch": 0.0010170434229867704, "grad_norm": 0.25101518630981445, "learning_rate": 0.00016, "loss": 1.5182, "step": 8 }, { "epoch": 0.0011441738508601168, "grad_norm": 0.8389205932617188, "learning_rate": 0.00018, "loss": 1.6646, "step": 9 }, { "epoch": 0.0012713042787334632, "grad_norm": 0.9317983388900757, "learning_rate": 0.0002, "loss": 1.6395, "step": 10 }, { "epoch": 0.0013984347066068094, "grad_norm": 0.48066481947898865, "learning_rate": 0.00019999832015210023, "loss": 1.5921, "step": 11 }, { "epoch": 0.0015255651344801557, "grad_norm": 0.2073744535446167, "learning_rate": 0.00019999328066483865, "loss": 1.4335, "step": 12 }, { "epoch": 0.0016526955623535021, "grad_norm": 0.22661754488945007, "learning_rate": 0.0001999848817075267, "loss": 1.477, "step": 13 }, { "epoch": 0.0017798259902268483, "grad_norm": 0.3149760663509369, "learning_rate": 0.00019997312356234386, "loss": 1.5713, "step": 14 }, { "epoch": 0.0019069564181001947, "grad_norm": 0.31392771005630493, "learning_rate": 0.00019995800662432798, "loss": 1.5414, "step": 15 }, { "epoch": 0.002034086845973541, "grad_norm": 0.2748984396457672, "learning_rate": 0.0001999395314013622, "loss": 1.5452, "step": 16 }, { "epoch": 0.0021612172738468874, "grad_norm": 0.19064395129680634, "learning_rate": 0.00019991769851415781, "loss": 1.5742, "step": 17 }, { "epoch": 0.0022883477017202336, "grad_norm": 0.1578415036201477, "learning_rate": 0.00019989250869623343, "loss": 1.5214, "step": 18 }, { "epoch": 0.0024154781295935798, "grad_norm": 0.20229928195476532, "learning_rate": 0.0001998639627938903, "loss": 1.3921, "step": 19 }, { "epoch": 0.0025426085574669264, "grad_norm": 0.2705669403076172, "learning_rate": 0.00019983206176618388, "loss": 1.4712, "step": 20 }, { "epoch": 0.0026697389853402725, "grad_norm": 0.27215054631233215, "learning_rate": 0.00019979680668489165, "loss": 1.4969, "step": 21 }, { "epoch": 0.0027968694132136187, "grad_norm": 0.20431619882583618, "learning_rate": 0.00019975819873447717, "loss": 1.431, "step": 22 }, { "epoch": 0.0029239998410869653, "grad_norm": 0.14068549871444702, "learning_rate": 0.00019971623921205005, "loss": 1.4543, "step": 23 }, { "epoch": 0.0030511302689603115, "grad_norm": 0.1594925820827484, "learning_rate": 0.00019967092952732264, "loss": 1.364, "step": 24 }, { "epoch": 0.0031782606968336576, "grad_norm": 0.17738410830497742, "learning_rate": 0.00019962227120256252, "loss": 1.4377, "step": 25 }, { "epoch": 0.0033053911247070042, "grad_norm": 0.1752498745918274, "learning_rate": 0.00019957026587254134, "loss": 1.3827, "step": 26 }, { "epoch": 0.0034325215525803504, "grad_norm": 0.18004052340984344, "learning_rate": 0.00019951491528448004, "loss": 1.3867, "step": 27 }, { "epoch": 0.0035596519804536966, "grad_norm": 0.1525774598121643, "learning_rate": 0.00019945622129799, "loss": 1.4164, "step": 28 }, { "epoch": 0.003686782408327043, "grad_norm": 0.1710219830274582, "learning_rate": 0.00019939418588501057, "loss": 1.4155, "step": 29 }, { "epoch": 0.0038139128362003893, "grad_norm": 0.20903100073337555, "learning_rate": 0.000199328811129743, "loss": 1.5239, "step": 30 }, { "epoch": 0.003941043264073736, "grad_norm": 0.18399456143379211, "learning_rate": 0.00019926009922858006, "loss": 1.3889, "step": 31 }, { "epoch": 0.004068173691947082, "grad_norm": 0.13330113887786865, "learning_rate": 0.0001991880524900327, "loss": 1.3587, "step": 32 }, { "epoch": 0.004195304119820428, "grad_norm": 0.13807597756385803, "learning_rate": 0.00019911267333465218, "loss": 1.4211, "step": 33 }, { "epoch": 0.004322434547693775, "grad_norm": 0.1550034135580063, "learning_rate": 0.0001990339642949488, "loss": 1.4317, "step": 34 }, { "epoch": 0.004449564975567121, "grad_norm": 0.1827971190214157, "learning_rate": 0.00019895192801530685, "loss": 1.4176, "step": 35 }, { "epoch": 0.004576695403440467, "grad_norm": 0.16028065979480743, "learning_rate": 0.00019886656725189575, "loss": 1.4122, "step": 36 }, { "epoch": 0.004703825831313814, "grad_norm": 0.14322146773338318, "learning_rate": 0.00019877788487257753, "loss": 1.423, "step": 37 }, { "epoch": 0.0048309562591871595, "grad_norm": 0.1685505211353302, "learning_rate": 0.00019868588385681032, "loss": 1.3702, "step": 38 }, { "epoch": 0.004958086687060506, "grad_norm": 0.1632552444934845, "learning_rate": 0.00019859056729554844, "loss": 1.3164, "step": 39 }, { "epoch": 0.005085217114933853, "grad_norm": 0.17149530351161957, "learning_rate": 0.00019849193839113833, "loss": 1.2799, "step": 40 }, { "epoch": 0.0052123475428071985, "grad_norm": 0.14993086457252502, "learning_rate": 0.00019839000045721118, "loss": 1.3412, "step": 41 }, { "epoch": 0.005339477970680545, "grad_norm": 0.15981823205947876, "learning_rate": 0.00019828475691857145, "loss": 1.3698, "step": 42 }, { "epoch": 0.005466608398553892, "grad_norm": 0.15311439335346222, "learning_rate": 0.00019817621131108196, "loss": 1.3792, "step": 43 }, { "epoch": 0.005593738826427237, "grad_norm": 0.19757720828056335, "learning_rate": 0.00019806436728154485, "loss": 1.4082, "step": 44 }, { "epoch": 0.005720869254300584, "grad_norm": 0.15765132009983063, "learning_rate": 0.00019794922858757928, "loss": 1.282, "step": 45 }, { "epoch": 0.005847999682173931, "grad_norm": 0.15940701961517334, "learning_rate": 0.00019783079909749515, "loss": 1.4016, "step": 46 }, { "epoch": 0.005975130110047276, "grad_norm": 0.1622331291437149, "learning_rate": 0.00019770908279016309, "loss": 1.3624, "step": 47 }, { "epoch": 0.006102260537920623, "grad_norm": 0.14866457879543304, "learning_rate": 0.00019758408375488071, "loss": 1.2807, "step": 48 }, { "epoch": 0.0062293909657939696, "grad_norm": 0.16648173332214355, "learning_rate": 0.00019745580619123535, "loss": 1.3617, "step": 49 }, { "epoch": 0.006356521393667315, "grad_norm": 0.16083629429340363, "learning_rate": 0.00019732425440896297, "loss": 1.3903, "step": 50 }, { "epoch": 0.006483651821540662, "grad_norm": 0.18012581765651703, "learning_rate": 0.00019718943282780323, "loss": 1.3472, "step": 51 }, { "epoch": 0.0066107822494140085, "grad_norm": 0.16914351284503937, "learning_rate": 0.00019705134597735113, "loss": 1.3765, "step": 52 }, { "epoch": 0.006737912677287354, "grad_norm": 0.15967325866222382, "learning_rate": 0.00019690999849690484, "loss": 1.3312, "step": 53 }, { "epoch": 0.006865043105160701, "grad_norm": 0.15996071696281433, "learning_rate": 0.00019676539513530968, "loss": 1.4227, "step": 54 }, { "epoch": 0.006992173533034047, "grad_norm": 0.1715613752603531, "learning_rate": 0.0001966175407507987, "loss": 1.3634, "step": 55 }, { "epoch": 0.007119303960907393, "grad_norm": 0.16633041203022003, "learning_rate": 0.00019646644031082948, "loss": 1.3279, "step": 56 }, { "epoch": 0.00724643438878074, "grad_norm": 0.16247525811195374, "learning_rate": 0.00019631209889191712, "loss": 1.3721, "step": 57 }, { "epoch": 0.007373564816654086, "grad_norm": 0.1603326052427292, "learning_rate": 0.00019615452167946385, "loss": 1.3212, "step": 58 }, { "epoch": 0.007500695244527432, "grad_norm": 0.16569744050502777, "learning_rate": 0.00019599371396758456, "loss": 1.3224, "step": 59 }, { "epoch": 0.007627825672400779, "grad_norm": 0.16010916233062744, "learning_rate": 0.0001958296811589293, "loss": 1.3022, "step": 60 }, { "epoch": 0.007754956100274125, "grad_norm": 0.1680569052696228, "learning_rate": 0.00019566242876450137, "loss": 1.3197, "step": 61 }, { "epoch": 0.007882086528147472, "grad_norm": 0.16432645916938782, "learning_rate": 0.00019549196240347248, "loss": 1.3167, "step": 62 }, { "epoch": 0.008009216956020818, "grad_norm": 0.17412547767162323, "learning_rate": 0.00019531828780299383, "loss": 1.3196, "step": 63 }, { "epoch": 0.008136347383894163, "grad_norm": 0.1736118346452713, "learning_rate": 0.0001951414107980036, "loss": 1.2966, "step": 64 }, { "epoch": 0.00826347781176751, "grad_norm": 0.16254910826683044, "learning_rate": 0.00019496133733103112, "loss": 1.3416, "step": 65 }, { "epoch": 0.008390608239640857, "grad_norm": 0.16831637918949127, "learning_rate": 0.00019477807345199714, "loss": 1.3396, "step": 66 }, { "epoch": 0.008517738667514202, "grad_norm": 0.1759282648563385, "learning_rate": 0.00019459162531801046, "loss": 1.3101, "step": 67 }, { "epoch": 0.00864486909538755, "grad_norm": 0.17314572632312775, "learning_rate": 0.00019440199919316123, "loss": 1.4026, "step": 68 }, { "epoch": 0.008771999523260895, "grad_norm": 0.17074303328990936, "learning_rate": 0.00019420920144831044, "loss": 1.3088, "step": 69 }, { "epoch": 0.008899129951134241, "grad_norm": 0.17773644626140594, "learning_rate": 0.0001940132385608757, "loss": 1.32, "step": 70 }, { "epoch": 0.009026260379007589, "grad_norm": 0.1736891269683838, "learning_rate": 0.0001938141171146141, "loss": 1.2865, "step": 71 }, { "epoch": 0.009153390806880934, "grad_norm": 0.17593072354793549, "learning_rate": 0.0001936118437994003, "loss": 1.3276, "step": 72 }, { "epoch": 0.00928052123475428, "grad_norm": 0.16799978911876678, "learning_rate": 0.00019340642541100248, "loss": 1.2585, "step": 73 }, { "epoch": 0.009407651662627628, "grad_norm": 0.17271657288074493, "learning_rate": 0.00019319786885085364, "loss": 1.3838, "step": 74 }, { "epoch": 0.009534782090500973, "grad_norm": 0.18318656086921692, "learning_rate": 0.0001929861811258197, "loss": 1.3857, "step": 75 }, { "epoch": 0.009661912518374319, "grad_norm": 0.1850346177816391, "learning_rate": 0.0001927713693479643, "loss": 1.3884, "step": 76 }, { "epoch": 0.009789042946247667, "grad_norm": 0.1707659363746643, "learning_rate": 0.0001925534407343097, "loss": 1.2674, "step": 77 }, { "epoch": 0.009916173374121012, "grad_norm": 0.1803124099969864, "learning_rate": 0.0001923324026065944, "loss": 1.2899, "step": 78 }, { "epoch": 0.010043303801994358, "grad_norm": 0.18143856525421143, "learning_rate": 0.0001921082623910271, "loss": 1.2967, "step": 79 }, { "epoch": 0.010170434229867705, "grad_norm": 0.17903882265090942, "learning_rate": 0.00019188102761803717, "loss": 1.2913, "step": 80 }, { "epoch": 0.010297564657741051, "grad_norm": 0.181906595826149, "learning_rate": 0.00019165070592202173, "loss": 1.2568, "step": 81 }, { "epoch": 0.010424695085614397, "grad_norm": 0.1655733585357666, "learning_rate": 0.00019141730504108922, "loss": 1.2758, "step": 82 }, { "epoch": 0.010551825513487744, "grad_norm": 0.17457562685012817, "learning_rate": 0.00019118083281679913, "loss": 1.2506, "step": 83 }, { "epoch": 0.01067895594136109, "grad_norm": 0.18457446992397308, "learning_rate": 0.00019094129719389886, "loss": 1.3701, "step": 84 }, { "epoch": 0.010806086369234436, "grad_norm": 0.17702506482601166, "learning_rate": 0.0001906987062200567, "loss": 1.3071, "step": 85 }, { "epoch": 0.010933216797107783, "grad_norm": 0.1763213723897934, "learning_rate": 0.0001904530680455914, "loss": 1.2996, "step": 86 }, { "epoch": 0.011060347224981129, "grad_norm": 0.17649979889392853, "learning_rate": 0.0001902043909231984, "loss": 1.314, "step": 87 }, { "epoch": 0.011187477652854475, "grad_norm": 0.1817278414964676, "learning_rate": 0.00018995268320767252, "loss": 1.315, "step": 88 }, { "epoch": 0.011314608080727822, "grad_norm": 0.17668454349040985, "learning_rate": 0.0001896979533556273, "loss": 1.2914, "step": 89 }, { "epoch": 0.011441738508601168, "grad_norm": 0.17107272148132324, "learning_rate": 0.0001894402099252109, "loss": 1.2884, "step": 90 }, { "epoch": 0.011568868936474514, "grad_norm": 0.18352022767066956, "learning_rate": 0.0001891794615758185, "loss": 1.3404, "step": 91 }, { "epoch": 0.011695999364347861, "grad_norm": 0.17999856173992157, "learning_rate": 0.00018891571706780146, "loss": 1.3001, "step": 92 }, { "epoch": 0.011823129792221207, "grad_norm": 0.17374040186405182, "learning_rate": 0.00018864898526217293, "loss": 1.266, "step": 93 }, { "epoch": 0.011950260220094553, "grad_norm": 0.18675245344638824, "learning_rate": 0.0001883792751203102, "loss": 1.3347, "step": 94 }, { "epoch": 0.0120773906479679, "grad_norm": 0.18314674496650696, "learning_rate": 0.0001881065957036536, "loss": 1.3224, "step": 95 }, { "epoch": 0.012204521075841246, "grad_norm": 0.17483913898468018, "learning_rate": 0.00018783095617340193, "loss": 1.2926, "step": 96 }, { "epoch": 0.012331651503714592, "grad_norm": 0.19491428136825562, "learning_rate": 0.00018755236579020502, "loss": 1.2636, "step": 97 }, { "epoch": 0.012458781931587939, "grad_norm": 0.17128659784793854, "learning_rate": 0.0001872708339138522, "loss": 1.2653, "step": 98 }, { "epoch": 0.012585912359461285, "grad_norm": 0.172018900513649, "learning_rate": 0.00018698637000295816, "loss": 1.2686, "step": 99 }, { "epoch": 0.01271304278733463, "grad_norm": 0.18891726434230804, "learning_rate": 0.0001866989836146449, "loss": 1.4058, "step": 100 }, { "epoch": 0.01271304278733463, "eval_loss": 1.2906723022460938, "eval_runtime": 1258.4463, "eval_samples_per_second": 3.973, "eval_steps_per_second": 0.993, "step": 100 } ], "logging_steps": 1, "max_steps": 552, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0364000227295232e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }