|
{ |
|
"best_metric": 1.2551084756851196, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.02542608557466926, |
|
"eval_steps": 100, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0001271304278733463, |
|
"grad_norm": 0.5326213836669922, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7856, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0001271304278733463, |
|
"eval_loss": 1.7346340417861938, |
|
"eval_runtime": 1249.3361, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 1.001, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002542608557466926, |
|
"grad_norm": 0.5624520778656006, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7626, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00038139128362003893, |
|
"grad_norm": 0.5890633463859558, |
|
"learning_rate": 6e-05, |
|
"loss": 1.739, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005085217114933852, |
|
"grad_norm": 0.5437400937080383, |
|
"learning_rate": 8e-05, |
|
"loss": 1.6671, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0006356521393667316, |
|
"grad_norm": 0.6639446020126343, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7684, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0007627825672400779, |
|
"grad_norm": 0.7031175494194031, |
|
"learning_rate": 0.00012, |
|
"loss": 1.7247, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008899129951134241, |
|
"grad_norm": 0.5311002731323242, |
|
"learning_rate": 0.00014, |
|
"loss": 1.6195, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0010170434229867704, |
|
"grad_norm": 0.25101518630981445, |
|
"learning_rate": 0.00016, |
|
"loss": 1.5182, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0011441738508601168, |
|
"grad_norm": 0.8389205932617188, |
|
"learning_rate": 0.00018, |
|
"loss": 1.6646, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0012713042787334632, |
|
"grad_norm": 0.9317983388900757, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6395, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0013984347066068094, |
|
"grad_norm": 0.48066481947898865, |
|
"learning_rate": 0.00019999832015210023, |
|
"loss": 1.5921, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0015255651344801557, |
|
"grad_norm": 0.2073744535446167, |
|
"learning_rate": 0.00019999328066483865, |
|
"loss": 1.4335, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0016526955623535021, |
|
"grad_norm": 0.22661754488945007, |
|
"learning_rate": 0.0001999848817075267, |
|
"loss": 1.477, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0017798259902268483, |
|
"grad_norm": 0.3149760663509369, |
|
"learning_rate": 0.00019997312356234386, |
|
"loss": 1.5713, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0019069564181001947, |
|
"grad_norm": 0.31392771005630493, |
|
"learning_rate": 0.00019995800662432798, |
|
"loss": 1.5414, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.002034086845973541, |
|
"grad_norm": 0.2748984396457672, |
|
"learning_rate": 0.0001999395314013622, |
|
"loss": 1.5452, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0021612172738468874, |
|
"grad_norm": 0.19064395129680634, |
|
"learning_rate": 0.00019991769851415781, |
|
"loss": 1.5742, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0022883477017202336, |
|
"grad_norm": 0.1578415036201477, |
|
"learning_rate": 0.00019989250869623343, |
|
"loss": 1.5214, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0024154781295935798, |
|
"grad_norm": 0.20229928195476532, |
|
"learning_rate": 0.0001998639627938903, |
|
"loss": 1.3921, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0025426085574669264, |
|
"grad_norm": 0.2705669403076172, |
|
"learning_rate": 0.00019983206176618388, |
|
"loss": 1.4712, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0026697389853402725, |
|
"grad_norm": 0.27215054631233215, |
|
"learning_rate": 0.00019979680668489165, |
|
"loss": 1.4969, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0027968694132136187, |
|
"grad_norm": 0.20431619882583618, |
|
"learning_rate": 0.00019975819873447717, |
|
"loss": 1.431, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0029239998410869653, |
|
"grad_norm": 0.14068549871444702, |
|
"learning_rate": 0.00019971623921205005, |
|
"loss": 1.4543, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0030511302689603115, |
|
"grad_norm": 0.1594925820827484, |
|
"learning_rate": 0.00019967092952732264, |
|
"loss": 1.364, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0031782606968336576, |
|
"grad_norm": 0.17738410830497742, |
|
"learning_rate": 0.00019962227120256252, |
|
"loss": 1.4377, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0033053911247070042, |
|
"grad_norm": 0.1752498745918274, |
|
"learning_rate": 0.00019957026587254134, |
|
"loss": 1.3827, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0034325215525803504, |
|
"grad_norm": 0.18004052340984344, |
|
"learning_rate": 0.00019951491528448004, |
|
"loss": 1.3867, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0035596519804536966, |
|
"grad_norm": 0.1525774598121643, |
|
"learning_rate": 0.00019945622129799, |
|
"loss": 1.4164, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.003686782408327043, |
|
"grad_norm": 0.1710219830274582, |
|
"learning_rate": 0.00019939418588501057, |
|
"loss": 1.4155, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0038139128362003893, |
|
"grad_norm": 0.20903100073337555, |
|
"learning_rate": 0.000199328811129743, |
|
"loss": 1.5239, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003941043264073736, |
|
"grad_norm": 0.18399456143379211, |
|
"learning_rate": 0.00019926009922858006, |
|
"loss": 1.3889, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.004068173691947082, |
|
"grad_norm": 0.13330113887786865, |
|
"learning_rate": 0.0001991880524900327, |
|
"loss": 1.3587, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.004195304119820428, |
|
"grad_norm": 0.13807597756385803, |
|
"learning_rate": 0.00019911267333465218, |
|
"loss": 1.4211, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.004322434547693775, |
|
"grad_norm": 0.1550034135580063, |
|
"learning_rate": 0.0001990339642949488, |
|
"loss": 1.4317, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.004449564975567121, |
|
"grad_norm": 0.1827971190214157, |
|
"learning_rate": 0.00019895192801530685, |
|
"loss": 1.4176, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004576695403440467, |
|
"grad_norm": 0.16028065979480743, |
|
"learning_rate": 0.00019886656725189575, |
|
"loss": 1.4122, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.004703825831313814, |
|
"grad_norm": 0.14322146773338318, |
|
"learning_rate": 0.00019877788487257753, |
|
"loss": 1.423, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0048309562591871595, |
|
"grad_norm": 0.1685505211353302, |
|
"learning_rate": 0.00019868588385681032, |
|
"loss": 1.3702, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.004958086687060506, |
|
"grad_norm": 0.1632552444934845, |
|
"learning_rate": 0.00019859056729554844, |
|
"loss": 1.3164, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.005085217114933853, |
|
"grad_norm": 0.17149530351161957, |
|
"learning_rate": 0.00019849193839113833, |
|
"loss": 1.2799, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0052123475428071985, |
|
"grad_norm": 0.14993086457252502, |
|
"learning_rate": 0.00019839000045721118, |
|
"loss": 1.3412, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.005339477970680545, |
|
"grad_norm": 0.15981823205947876, |
|
"learning_rate": 0.00019828475691857145, |
|
"loss": 1.3698, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.005466608398553892, |
|
"grad_norm": 0.15311439335346222, |
|
"learning_rate": 0.00019817621131108196, |
|
"loss": 1.3792, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.005593738826427237, |
|
"grad_norm": 0.19757720828056335, |
|
"learning_rate": 0.00019806436728154485, |
|
"loss": 1.4082, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.005720869254300584, |
|
"grad_norm": 0.15765132009983063, |
|
"learning_rate": 0.00019794922858757928, |
|
"loss": 1.282, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.005847999682173931, |
|
"grad_norm": 0.15940701961517334, |
|
"learning_rate": 0.00019783079909749515, |
|
"loss": 1.4016, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.005975130110047276, |
|
"grad_norm": 0.1622331291437149, |
|
"learning_rate": 0.00019770908279016309, |
|
"loss": 1.3624, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.006102260537920623, |
|
"grad_norm": 0.14866457879543304, |
|
"learning_rate": 0.00019758408375488071, |
|
"loss": 1.2807, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0062293909657939696, |
|
"grad_norm": 0.16648173332214355, |
|
"learning_rate": 0.00019745580619123535, |
|
"loss": 1.3617, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.006356521393667315, |
|
"grad_norm": 0.16083629429340363, |
|
"learning_rate": 0.00019732425440896297, |
|
"loss": 1.3903, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006483651821540662, |
|
"grad_norm": 0.18012581765651703, |
|
"learning_rate": 0.00019718943282780323, |
|
"loss": 1.3472, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0066107822494140085, |
|
"grad_norm": 0.16914351284503937, |
|
"learning_rate": 0.00019705134597735113, |
|
"loss": 1.3765, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.006737912677287354, |
|
"grad_norm": 0.15967325866222382, |
|
"learning_rate": 0.00019690999849690484, |
|
"loss": 1.3312, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.006865043105160701, |
|
"grad_norm": 0.15996071696281433, |
|
"learning_rate": 0.00019676539513530968, |
|
"loss": 1.4227, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.006992173533034047, |
|
"grad_norm": 0.1715613752603531, |
|
"learning_rate": 0.0001966175407507987, |
|
"loss": 1.3634, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.007119303960907393, |
|
"grad_norm": 0.16633041203022003, |
|
"learning_rate": 0.00019646644031082948, |
|
"loss": 1.3279, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.00724643438878074, |
|
"grad_norm": 0.16247525811195374, |
|
"learning_rate": 0.00019631209889191712, |
|
"loss": 1.3721, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.007373564816654086, |
|
"grad_norm": 0.1603326052427292, |
|
"learning_rate": 0.00019615452167946385, |
|
"loss": 1.3212, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.007500695244527432, |
|
"grad_norm": 0.16569744050502777, |
|
"learning_rate": 0.00019599371396758456, |
|
"loss": 1.3224, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.007627825672400779, |
|
"grad_norm": 0.16010916233062744, |
|
"learning_rate": 0.0001958296811589293, |
|
"loss": 1.3022, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.007754956100274125, |
|
"grad_norm": 0.1680569052696228, |
|
"learning_rate": 0.00019566242876450137, |
|
"loss": 1.3197, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.007882086528147472, |
|
"grad_norm": 0.16432645916938782, |
|
"learning_rate": 0.00019549196240347248, |
|
"loss": 1.3167, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.008009216956020818, |
|
"grad_norm": 0.17412547767162323, |
|
"learning_rate": 0.00019531828780299383, |
|
"loss": 1.3196, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.008136347383894163, |
|
"grad_norm": 0.1736118346452713, |
|
"learning_rate": 0.0001951414107980036, |
|
"loss": 1.2966, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.00826347781176751, |
|
"grad_norm": 0.16254910826683044, |
|
"learning_rate": 0.00019496133733103112, |
|
"loss": 1.3416, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.008390608239640857, |
|
"grad_norm": 0.16831637918949127, |
|
"learning_rate": 0.00019477807345199714, |
|
"loss": 1.3396, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.008517738667514202, |
|
"grad_norm": 0.1759282648563385, |
|
"learning_rate": 0.00019459162531801046, |
|
"loss": 1.3101, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.00864486909538755, |
|
"grad_norm": 0.17314572632312775, |
|
"learning_rate": 0.00019440199919316123, |
|
"loss": 1.4026, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.008771999523260895, |
|
"grad_norm": 0.17074303328990936, |
|
"learning_rate": 0.00019420920144831044, |
|
"loss": 1.3088, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.008899129951134241, |
|
"grad_norm": 0.17773644626140594, |
|
"learning_rate": 0.0001940132385608757, |
|
"loss": 1.32, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.009026260379007589, |
|
"grad_norm": 0.1736891269683838, |
|
"learning_rate": 0.0001938141171146141, |
|
"loss": 1.2865, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.009153390806880934, |
|
"grad_norm": 0.17593072354793549, |
|
"learning_rate": 0.0001936118437994003, |
|
"loss": 1.3276, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.00928052123475428, |
|
"grad_norm": 0.16799978911876678, |
|
"learning_rate": 0.00019340642541100248, |
|
"loss": 1.2585, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.009407651662627628, |
|
"grad_norm": 0.17271657288074493, |
|
"learning_rate": 0.00019319786885085364, |
|
"loss": 1.3838, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.009534782090500973, |
|
"grad_norm": 0.18318656086921692, |
|
"learning_rate": 0.0001929861811258197, |
|
"loss": 1.3857, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.009661912518374319, |
|
"grad_norm": 0.1850346177816391, |
|
"learning_rate": 0.0001927713693479643, |
|
"loss": 1.3884, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.009789042946247667, |
|
"grad_norm": 0.1707659363746643, |
|
"learning_rate": 0.0001925534407343097, |
|
"loss": 1.2674, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.009916173374121012, |
|
"grad_norm": 0.1803124099969864, |
|
"learning_rate": 0.0001923324026065944, |
|
"loss": 1.2899, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.010043303801994358, |
|
"grad_norm": 0.18143856525421143, |
|
"learning_rate": 0.0001921082623910271, |
|
"loss": 1.2967, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.010170434229867705, |
|
"grad_norm": 0.17903882265090942, |
|
"learning_rate": 0.00019188102761803717, |
|
"loss": 1.2913, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.010297564657741051, |
|
"grad_norm": 0.181906595826149, |
|
"learning_rate": 0.00019165070592202173, |
|
"loss": 1.2568, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.010424695085614397, |
|
"grad_norm": 0.1655733585357666, |
|
"learning_rate": 0.00019141730504108922, |
|
"loss": 1.2758, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.010551825513487744, |
|
"grad_norm": 0.17457562685012817, |
|
"learning_rate": 0.00019118083281679913, |
|
"loss": 1.2506, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.01067895594136109, |
|
"grad_norm": 0.18457446992397308, |
|
"learning_rate": 0.00019094129719389886, |
|
"loss": 1.3701, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.010806086369234436, |
|
"grad_norm": 0.17702506482601166, |
|
"learning_rate": 0.0001906987062200567, |
|
"loss": 1.3071, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.010933216797107783, |
|
"grad_norm": 0.1763213723897934, |
|
"learning_rate": 0.0001904530680455914, |
|
"loss": 1.2996, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.011060347224981129, |
|
"grad_norm": 0.17649979889392853, |
|
"learning_rate": 0.0001902043909231984, |
|
"loss": 1.314, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.011187477652854475, |
|
"grad_norm": 0.1817278414964676, |
|
"learning_rate": 0.00018995268320767252, |
|
"loss": 1.315, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.011314608080727822, |
|
"grad_norm": 0.17668454349040985, |
|
"learning_rate": 0.0001896979533556273, |
|
"loss": 1.2914, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.011441738508601168, |
|
"grad_norm": 0.17107272148132324, |
|
"learning_rate": 0.0001894402099252109, |
|
"loss": 1.2884, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.011568868936474514, |
|
"grad_norm": 0.18352022767066956, |
|
"learning_rate": 0.0001891794615758185, |
|
"loss": 1.3404, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.011695999364347861, |
|
"grad_norm": 0.17999856173992157, |
|
"learning_rate": 0.00018891571706780146, |
|
"loss": 1.3001, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.011823129792221207, |
|
"grad_norm": 0.17374040186405182, |
|
"learning_rate": 0.00018864898526217293, |
|
"loss": 1.266, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.011950260220094553, |
|
"grad_norm": 0.18675245344638824, |
|
"learning_rate": 0.0001883792751203102, |
|
"loss": 1.3347, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0120773906479679, |
|
"grad_norm": 0.18314674496650696, |
|
"learning_rate": 0.0001881065957036536, |
|
"loss": 1.3224, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.012204521075841246, |
|
"grad_norm": 0.17483913898468018, |
|
"learning_rate": 0.00018783095617340193, |
|
"loss": 1.2926, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.012331651503714592, |
|
"grad_norm": 0.19491428136825562, |
|
"learning_rate": 0.00018755236579020502, |
|
"loss": 1.2636, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.012458781931587939, |
|
"grad_norm": 0.17128659784793854, |
|
"learning_rate": 0.0001872708339138522, |
|
"loss": 1.2653, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.012585912359461285, |
|
"grad_norm": 0.172018900513649, |
|
"learning_rate": 0.00018698637000295816, |
|
"loss": 1.2686, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01271304278733463, |
|
"grad_norm": 0.18891726434230804, |
|
"learning_rate": 0.0001866989836146449, |
|
"loss": 1.4058, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01271304278733463, |
|
"eval_loss": 1.2906723022460938, |
|
"eval_runtime": 1258.4463, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 0.993, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.012840173215207978, |
|
"grad_norm": 0.1781390905380249, |
|
"learning_rate": 0.0001864086844042209, |
|
"loss": 1.3021, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.012967303643081324, |
|
"grad_norm": 0.17100100219249725, |
|
"learning_rate": 0.00018611548212485647, |
|
"loss": 1.2574, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.01309443407095467, |
|
"grad_norm": 0.18398095667362213, |
|
"learning_rate": 0.00018581938662725632, |
|
"loss": 1.2839, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.013221564498828017, |
|
"grad_norm": 0.18981115520000458, |
|
"learning_rate": 0.00018552040785932845, |
|
"loss": 1.3149, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.013348694926701363, |
|
"grad_norm": 0.18872378766536713, |
|
"learning_rate": 0.00018521855586584995, |
|
"loss": 1.279, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.013475825354574708, |
|
"grad_norm": 0.1824631690979004, |
|
"learning_rate": 0.00018491384078812959, |
|
"loss": 1.2743, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.013602955782448056, |
|
"grad_norm": 0.1971443146467209, |
|
"learning_rate": 0.000184606272863667, |
|
"loss": 1.3365, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.013730086210321402, |
|
"grad_norm": 0.19964328408241272, |
|
"learning_rate": 0.00018429586242580884, |
|
"loss": 1.3184, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.013857216638194747, |
|
"grad_norm": 0.17624543607234955, |
|
"learning_rate": 0.00018398261990340152, |
|
"loss": 1.2755, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.013984347066068095, |
|
"grad_norm": 0.18599238991737366, |
|
"learning_rate": 0.00018366655582044094, |
|
"loss": 1.3025, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01411147749394144, |
|
"grad_norm": 0.19051305949687958, |
|
"learning_rate": 0.00018334768079571884, |
|
"loss": 1.351, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.014238607921814786, |
|
"grad_norm": 0.1858106255531311, |
|
"learning_rate": 0.00018302600554246601, |
|
"loss": 1.2386, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.014365738349688134, |
|
"grad_norm": 0.17598244547843933, |
|
"learning_rate": 0.00018270154086799239, |
|
"loss": 1.2687, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.01449286877756148, |
|
"grad_norm": 0.18105947971343994, |
|
"learning_rate": 0.00018237429767332405, |
|
"loss": 1.2843, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.014619999205434825, |
|
"grad_norm": 0.18796177208423615, |
|
"learning_rate": 0.00018204428695283687, |
|
"loss": 1.2999, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.014747129633308173, |
|
"grad_norm": 0.18702763319015503, |
|
"learning_rate": 0.00018171151979388714, |
|
"loss": 1.2391, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.014874260061181518, |
|
"grad_norm": 0.17469799518585205, |
|
"learning_rate": 0.00018137600737643913, |
|
"loss": 1.2915, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.015001390489054864, |
|
"grad_norm": 0.1871766746044159, |
|
"learning_rate": 0.00018103776097268942, |
|
"loss": 1.2429, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.015128520916928212, |
|
"grad_norm": 0.18426093459129333, |
|
"learning_rate": 0.00018069679194668826, |
|
"loss": 1.2678, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.015255651344801557, |
|
"grad_norm": 0.1830713450908661, |
|
"learning_rate": 0.0001803531117539577, |
|
"loss": 1.3231, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.015382781772674903, |
|
"grad_norm": 0.19156108796596527, |
|
"learning_rate": 0.00018000673194110668, |
|
"loss": 1.3426, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.01550991220054825, |
|
"grad_norm": 0.18232569098472595, |
|
"learning_rate": 0.00017965766414544326, |
|
"loss": 1.2227, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.015637042628421596, |
|
"grad_norm": 0.18696987628936768, |
|
"learning_rate": 0.00017930592009458352, |
|
"loss": 1.2933, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.015764173056294944, |
|
"grad_norm": 0.18148070573806763, |
|
"learning_rate": 0.00017895151160605757, |
|
"loss": 1.3598, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.015891303484168288, |
|
"grad_norm": 0.1859319657087326, |
|
"learning_rate": 0.00017859445058691247, |
|
"loss": 1.2688, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.016018433912041635, |
|
"grad_norm": 0.18133966624736786, |
|
"learning_rate": 0.00017823474903331233, |
|
"loss": 1.2912, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.016145564339914983, |
|
"grad_norm": 0.16695751249790192, |
|
"learning_rate": 0.0001778724190301351, |
|
"loss": 1.2772, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.016272694767788327, |
|
"grad_norm": 0.17694084346294403, |
|
"learning_rate": 0.0001775074727505667, |
|
"loss": 1.2998, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.016399825195661674, |
|
"grad_norm": 0.18545518815517426, |
|
"learning_rate": 0.0001771399224556919, |
|
"loss": 1.2996, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.01652695562353502, |
|
"grad_norm": 0.1763446033000946, |
|
"learning_rate": 0.00017676978049408263, |
|
"loss": 1.2942, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.016654086051408366, |
|
"grad_norm": 0.1751178801059723, |
|
"learning_rate": 0.00017639705930138272, |
|
"loss": 1.2491, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.016781216479281713, |
|
"grad_norm": 0.17463481426239014, |
|
"learning_rate": 0.00017602177139989044, |
|
"loss": 1.3015, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.01690834690715506, |
|
"grad_norm": 0.1884208619594574, |
|
"learning_rate": 0.0001756439293981377, |
|
"loss": 1.2555, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.017035477335028405, |
|
"grad_norm": 0.1824871301651001, |
|
"learning_rate": 0.00017526354599046635, |
|
"loss": 1.3321, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.017162607762901752, |
|
"grad_norm": 0.17852945625782013, |
|
"learning_rate": 0.00017488063395660177, |
|
"loss": 1.2134, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0172897381907751, |
|
"grad_norm": 0.17903351783752441, |
|
"learning_rate": 0.00017449520616122344, |
|
"loss": 1.202, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.017416868618648444, |
|
"grad_norm": 0.19624289870262146, |
|
"learning_rate": 0.00017410727555353282, |
|
"loss": 1.2983, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.01754399904652179, |
|
"grad_norm": 0.20271572470664978, |
|
"learning_rate": 0.00017371685516681825, |
|
"loss": 1.331, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.01767112947439514, |
|
"grad_norm": 0.19160455465316772, |
|
"learning_rate": 0.00017332395811801707, |
|
"loss": 1.2325, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.017798259902268482, |
|
"grad_norm": 0.19286282360553741, |
|
"learning_rate": 0.00017292859760727493, |
|
"loss": 1.3632, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01792539033014183, |
|
"grad_norm": 0.18525561690330505, |
|
"learning_rate": 0.00017253078691750227, |
|
"loss": 1.302, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.018052520758015177, |
|
"grad_norm": 0.17999610304832458, |
|
"learning_rate": 0.00017213053941392818, |
|
"loss": 1.2617, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.01817965118588852, |
|
"grad_norm": 0.1817435920238495, |
|
"learning_rate": 0.00017172786854365116, |
|
"loss": 1.285, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.01830678161376187, |
|
"grad_norm": 0.18393941223621368, |
|
"learning_rate": 0.00017132278783518756, |
|
"loss": 1.2033, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.018433912041635216, |
|
"grad_norm": 0.18280182778835297, |
|
"learning_rate": 0.00017091531089801694, |
|
"loss": 1.2454, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.01856104246950856, |
|
"grad_norm": 0.17269238829612732, |
|
"learning_rate": 0.00017050545142212483, |
|
"loss": 1.2137, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.018688172897381908, |
|
"grad_norm": 0.18515561521053314, |
|
"learning_rate": 0.00017009322317754278, |
|
"loss": 1.2876, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.018815303325255255, |
|
"grad_norm": 0.18649280071258545, |
|
"learning_rate": 0.0001696786400138859, |
|
"loss": 1.3279, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.0189424337531286, |
|
"grad_norm": 0.18008284270763397, |
|
"learning_rate": 0.00016926171585988727, |
|
"loss": 1.1943, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.019069564181001947, |
|
"grad_norm": 0.18855896592140198, |
|
"learning_rate": 0.00016884246472293016, |
|
"loss": 1.3458, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.019196694608875294, |
|
"grad_norm": 0.18721222877502441, |
|
"learning_rate": 0.00016842090068857742, |
|
"loss": 1.205, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.019323825036748638, |
|
"grad_norm": 0.18609726428985596, |
|
"learning_rate": 0.00016799703792009827, |
|
"loss": 1.3147, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.019450955464621986, |
|
"grad_norm": 0.18827542662620544, |
|
"learning_rate": 0.00016757089065799226, |
|
"loss": 1.2053, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.019578085892495333, |
|
"grad_norm": 0.19211921095848083, |
|
"learning_rate": 0.00016714247321951106, |
|
"loss": 1.2881, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.019705216320368677, |
|
"grad_norm": 0.1911146342754364, |
|
"learning_rate": 0.0001667117999981774, |
|
"loss": 1.2841, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.019832346748242025, |
|
"grad_norm": 0.1876746416091919, |
|
"learning_rate": 0.00016627888546330138, |
|
"loss": 1.2795, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.019959477176115372, |
|
"grad_norm": 0.18275220692157745, |
|
"learning_rate": 0.00016584374415949443, |
|
"loss": 1.2646, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.020086607603988716, |
|
"grad_norm": 0.19240595400333405, |
|
"learning_rate": 0.0001654063907061807, |
|
"loss": 1.2286, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.020213738031862064, |
|
"grad_norm": 0.17621144652366638, |
|
"learning_rate": 0.00016496683979710575, |
|
"loss": 1.2623, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.02034086845973541, |
|
"grad_norm": 0.18566247820854187, |
|
"learning_rate": 0.000164525106199843, |
|
"loss": 1.2915, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.020467998887608755, |
|
"grad_norm": 0.19843867421150208, |
|
"learning_rate": 0.00016408120475529763, |
|
"loss": 1.1703, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.020595129315482102, |
|
"grad_norm": 0.20230089128017426, |
|
"learning_rate": 0.00016363515037720773, |
|
"loss": 1.274, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.02072225974335545, |
|
"grad_norm": 0.1874382644891739, |
|
"learning_rate": 0.00016318695805164359, |
|
"loss": 1.267, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.020849390171228794, |
|
"grad_norm": 0.19301468133926392, |
|
"learning_rate": 0.0001627366428365039, |
|
"loss": 1.3385, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.02097652059910214, |
|
"grad_norm": 0.1960678994655609, |
|
"learning_rate": 0.00016228421986101005, |
|
"loss": 1.2469, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02110365102697549, |
|
"grad_norm": 0.2149035483598709, |
|
"learning_rate": 0.00016182970432519772, |
|
"loss": 1.2695, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.021230781454848833, |
|
"grad_norm": 0.1928316354751587, |
|
"learning_rate": 0.00016137311149940633, |
|
"loss": 1.2581, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.02135791188272218, |
|
"grad_norm": 0.18403369188308716, |
|
"learning_rate": 0.0001609144567237658, |
|
"loss": 1.2872, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.021485042310595528, |
|
"grad_norm": 0.18688054382801056, |
|
"learning_rate": 0.00016045375540768136, |
|
"loss": 1.2762, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.021612172738468872, |
|
"grad_norm": 0.19875864684581757, |
|
"learning_rate": 0.00015999102302931585, |
|
"loss": 1.2773, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02173930316634222, |
|
"grad_norm": 0.19474861025810242, |
|
"learning_rate": 0.0001595262751350695, |
|
"loss": 1.2329, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.021866433594215567, |
|
"grad_norm": 0.1946505606174469, |
|
"learning_rate": 0.00015905952733905775, |
|
"loss": 1.1726, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.02199356402208891, |
|
"grad_norm": 0.18479324877262115, |
|
"learning_rate": 0.00015859079532258677, |
|
"loss": 1.3177, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.022120694449962258, |
|
"grad_norm": 0.19268646836280823, |
|
"learning_rate": 0.00015812009483362642, |
|
"loss": 1.2721, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.022247824877835606, |
|
"grad_norm": 0.18371957540512085, |
|
"learning_rate": 0.0001576474416862812, |
|
"loss": 1.3083, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02237495530570895, |
|
"grad_norm": 0.1987624615430832, |
|
"learning_rate": 0.00015717285176025913, |
|
"loss": 1.2582, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.022502085733582297, |
|
"grad_norm": 0.19360652565956116, |
|
"learning_rate": 0.00015669634100033797, |
|
"loss": 1.2597, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.022629216161455645, |
|
"grad_norm": 0.1875244826078415, |
|
"learning_rate": 0.00015621792541582966, |
|
"loss": 1.2637, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.02275634658932899, |
|
"grad_norm": 0.19594229757785797, |
|
"learning_rate": 0.00015573762108004262, |
|
"loss": 1.2907, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.022883477017202336, |
|
"grad_norm": 0.1935066133737564, |
|
"learning_rate": 0.00015525544412974132, |
|
"loss": 1.2446, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.023010607445075684, |
|
"grad_norm": 0.19178606569766998, |
|
"learning_rate": 0.0001547714107646046, |
|
"loss": 1.2644, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.023137737872949028, |
|
"grad_norm": 0.18824580311775208, |
|
"learning_rate": 0.00015428553724668103, |
|
"loss": 1.2592, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.023264868300822375, |
|
"grad_norm": 0.1857818067073822, |
|
"learning_rate": 0.00015379783989984277, |
|
"loss": 1.2547, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.023391998728695722, |
|
"grad_norm": 0.18491147458553314, |
|
"learning_rate": 0.00015330833510923718, |
|
"loss": 1.3073, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.023519129156569066, |
|
"grad_norm": 0.19134363532066345, |
|
"learning_rate": 0.00015281703932073612, |
|
"loss": 1.2456, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.023646259584442414, |
|
"grad_norm": 0.18579505383968353, |
|
"learning_rate": 0.0001523239690403835, |
|
"loss": 1.2626, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.02377339001231576, |
|
"grad_norm": 0.18687140941619873, |
|
"learning_rate": 0.0001518291408338409, |
|
"loss": 1.2795, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.023900520440189105, |
|
"grad_norm": 0.1869836449623108, |
|
"learning_rate": 0.00015133257132583073, |
|
"loss": 1.2111, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.024027650868062453, |
|
"grad_norm": 0.18433886766433716, |
|
"learning_rate": 0.00015083427719957793, |
|
"loss": 1.1969, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.0241547812959358, |
|
"grad_norm": 0.19012001156806946, |
|
"learning_rate": 0.0001503342751962493, |
|
"loss": 1.2973, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.024281911723809144, |
|
"grad_norm": 0.18975861370563507, |
|
"learning_rate": 0.00014983258211439117, |
|
"loss": 1.2964, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.024409042151682492, |
|
"grad_norm": 0.17685554921627045, |
|
"learning_rate": 0.0001493292148093649, |
|
"loss": 1.2763, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.02453617257955584, |
|
"grad_norm": 0.19333194196224213, |
|
"learning_rate": 0.00014882419019278075, |
|
"loss": 1.3203, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.024663303007429183, |
|
"grad_norm": 0.19778768718242645, |
|
"learning_rate": 0.00014831752523192948, |
|
"loss": 1.3204, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.02479043343530253, |
|
"grad_norm": 0.1869363635778427, |
|
"learning_rate": 0.00014780923694921255, |
|
"loss": 1.2258, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.024917563863175878, |
|
"grad_norm": 0.17671674489974976, |
|
"learning_rate": 0.00014729934242157004, |
|
"loss": 1.1667, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.025044694291049222, |
|
"grad_norm": 0.1893490105867386, |
|
"learning_rate": 0.00014678785877990697, |
|
"loss": 1.3572, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.02517182471892257, |
|
"grad_norm": 0.19606593251228333, |
|
"learning_rate": 0.00014627480320851774, |
|
"loss": 1.2507, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.025298955146795917, |
|
"grad_norm": 0.20087891817092896, |
|
"learning_rate": 0.00014576019294450888, |
|
"loss": 1.3149, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.02542608557466926, |
|
"grad_norm": 0.1857730895280838, |
|
"learning_rate": 0.00014524404527721977, |
|
"loss": 1.2893, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02542608557466926, |
|
"eval_loss": 1.2551084756851196, |
|
"eval_runtime": 1258.1994, |
|
"eval_samples_per_second": 3.974, |
|
"eval_steps_per_second": 0.993, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 552, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0728000454590464e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|