|
{ |
|
"epoch": 0.9991671471586905, |
|
"global_step": 1114, |
|
"max_steps": 1114, |
|
"logging_steps": 1, |
|
"eval_steps": 50, |
|
"save_steps": 50, |
|
"train_batch_size": 8, |
|
"num_train_epochs": 1, |
|
"num_input_tokens_seen": 0, |
|
"total_flos": 6.811715592467251e+17, |
|
"log_history": [ |
|
{ |
|
"loss": 440.6308, |
|
"grad_norm": 98.61355590820312, |
|
"learning_rate": 0.0004999990058793643, |
|
"epoch": 0.0008969184444871549, |
|
"step": 1 |
|
}, |
|
{ |
|
"loss": 515.3978, |
|
"grad_norm": 1112.0234375, |
|
"learning_rate": 0.0004999960235253631, |
|
"epoch": 0.0017938368889743098, |
|
"step": 2 |
|
}, |
|
{ |
|
"loss": 477.4767, |
|
"grad_norm": 392.8102722167969, |
|
"learning_rate": 0.0004999910529617153, |
|
"epoch": 0.0026907553334614646, |
|
"step": 3 |
|
}, |
|
{ |
|
"loss": 457.2771, |
|
"grad_norm": 292.9400939941406, |
|
"learning_rate": 0.0004999840942279514, |
|
"epoch": 0.0035876737779486196, |
|
"step": 4 |
|
}, |
|
{ |
|
"loss": 444.411, |
|
"grad_norm": 166.66598510742188, |
|
"learning_rate": 0.000499975147379414, |
|
"epoch": 0.004484592222435775, |
|
"step": 5 |
|
}, |
|
{ |
|
"loss": 438.7729, |
|
"grad_norm": 132.8984375, |
|
"learning_rate": 0.000499964212487257, |
|
"epoch": 0.005381510666922929, |
|
"step": 6 |
|
}, |
|
{ |
|
"loss": 434.4058, |
|
"grad_norm": 102.88407135009766, |
|
"learning_rate": 0.0004999512896384454, |
|
"epoch": 0.006278429111410084, |
|
"step": 7 |
|
}, |
|
{ |
|
"loss": 431.3428, |
|
"grad_norm": 109.61495971679688, |
|
"learning_rate": 0.0004999363789357541, |
|
"epoch": 0.007175347555897239, |
|
"step": 8 |
|
}, |
|
{ |
|
"loss": 430.6904, |
|
"grad_norm": 94.442626953125, |
|
"learning_rate": 0.0004999194804977674, |
|
"epoch": 0.008072266000384394, |
|
"step": 9 |
|
}, |
|
{ |
|
"loss": 427.7128, |
|
"grad_norm": 79.10123443603516, |
|
"learning_rate": 0.0004999005944588778, |
|
"epoch": 0.00896918444487155, |
|
"step": 10 |
|
}, |
|
{ |
|
"loss": 430.6295, |
|
"grad_norm": 81.77398681640625, |
|
"learning_rate": 0.0004998797209692856, |
|
"epoch": 0.009866102889358703, |
|
"step": 11 |
|
}, |
|
{ |
|
"loss": 422.6068, |
|
"grad_norm": 67.85909271240234, |
|
"learning_rate": 0.0004998568601949967, |
|
"epoch": 0.010763021333845858, |
|
"step": 12 |
|
}, |
|
{ |
|
"loss": 422.3798, |
|
"grad_norm": 81.51007843017578, |
|
"learning_rate": 0.0004998320123178223, |
|
"epoch": 0.011659939778333014, |
|
"step": 13 |
|
}, |
|
{ |
|
"loss": 423.3609, |
|
"grad_norm": 70.7045669555664, |
|
"learning_rate": 0.0004998051775353763, |
|
"epoch": 0.012556858222820167, |
|
"step": 14 |
|
}, |
|
{ |
|
"loss": 423.232, |
|
"grad_norm": 75.2995834350586, |
|
"learning_rate": 0.0004997763560610752, |
|
"epoch": 0.013453776667307323, |
|
"step": 15 |
|
}, |
|
{ |
|
"loss": 414.7621, |
|
"grad_norm": 63.627197265625, |
|
"learning_rate": 0.000499745548124135, |
|
"epoch": 0.014350695111794478, |
|
"step": 16 |
|
}, |
|
{ |
|
"loss": 419.0351, |
|
"grad_norm": 73.96087646484375, |
|
"learning_rate": 0.0004997127539695701, |
|
"epoch": 0.015247613556281632, |
|
"step": 17 |
|
}, |
|
{ |
|
"loss": 418.1977, |
|
"grad_norm": 70.3633804321289, |
|
"learning_rate": 0.0004996779738581913, |
|
"epoch": 0.016144532000768787, |
|
"step": 18 |
|
}, |
|
{ |
|
"loss": 416.1606, |
|
"grad_norm": 74.2279052734375, |
|
"learning_rate": 0.0004996412080666037, |
|
"epoch": 0.017041450445255943, |
|
"step": 19 |
|
}, |
|
{ |
|
"loss": 417.2284, |
|
"grad_norm": 63.311676025390625, |
|
"learning_rate": 0.0004996024568872042, |
|
"epoch": 0.0179383688897431, |
|
"step": 20 |
|
}, |
|
{ |
|
"loss": 409.5278, |
|
"grad_norm": 63.21588897705078, |
|
"learning_rate": 0.0004995617206281797, |
|
"epoch": 0.01883528733423025, |
|
"step": 21 |
|
}, |
|
{ |
|
"loss": 414.1958, |
|
"grad_norm": 61.4863395690918, |
|
"learning_rate": 0.0004995189996135042, |
|
"epoch": 0.019732205778717406, |
|
"step": 22 |
|
}, |
|
{ |
|
"loss": 419.7891, |
|
"grad_norm": 61.297481536865234, |
|
"learning_rate": 0.0004994742941829364, |
|
"epoch": 0.02062912422320456, |
|
"step": 23 |
|
}, |
|
{ |
|
"loss": 414.3831, |
|
"grad_norm": 68.20845031738281, |
|
"learning_rate": 0.0004994276046920171, |
|
"epoch": 0.021526042667691717, |
|
"step": 24 |
|
}, |
|
{ |
|
"loss": 415.8848, |
|
"grad_norm": 59.016239166259766, |
|
"learning_rate": 0.0004993789315120662, |
|
"epoch": 0.022422961112178872, |
|
"step": 25 |
|
}, |
|
{ |
|
"loss": 417.4357, |
|
"grad_norm": 55.90328598022461, |
|
"learning_rate": 0.0004993282750301799, |
|
"epoch": 0.023319879556666027, |
|
"step": 26 |
|
}, |
|
{ |
|
"loss": 411.6564, |
|
"grad_norm": 59.52859115600586, |
|
"learning_rate": 0.000499275635649227, |
|
"epoch": 0.02421679800115318, |
|
"step": 27 |
|
}, |
|
{ |
|
"loss": 412.2451, |
|
"grad_norm": 59.61384963989258, |
|
"learning_rate": 0.0004992210137878472, |
|
"epoch": 0.025113716445640335, |
|
"step": 28 |
|
}, |
|
{ |
|
"loss": 416.412, |
|
"grad_norm": 60.00177001953125, |
|
"learning_rate": 0.000499164409880446, |
|
"epoch": 0.02601063489012749, |
|
"step": 29 |
|
}, |
|
{ |
|
"loss": 405.7923, |
|
"grad_norm": 59.08831024169922, |
|
"learning_rate": 0.0004991058243771922, |
|
"epoch": 0.026907553334614646, |
|
"step": 30 |
|
}, |
|
{ |
|
"loss": 411.6278, |
|
"grad_norm": 58.00886154174805, |
|
"learning_rate": 0.0004990452577440143, |
|
"epoch": 0.0278044717791018, |
|
"step": 31 |
|
}, |
|
{ |
|
"loss": 406.3222, |
|
"grad_norm": 57.3386116027832, |
|
"learning_rate": 0.0004989827104625969, |
|
"epoch": 0.028701390223588957, |
|
"step": 32 |
|
}, |
|
{ |
|
"loss": 404.9872, |
|
"grad_norm": 56.013816833496094, |
|
"learning_rate": 0.000498918183030376, |
|
"epoch": 0.02959830866807611, |
|
"step": 33 |
|
}, |
|
{ |
|
"loss": 406.4626, |
|
"grad_norm": 57.787132263183594, |
|
"learning_rate": 0.0004988516759605363, |
|
"epoch": 0.030495227112563264, |
|
"step": 34 |
|
}, |
|
{ |
|
"loss": 405.2309, |
|
"grad_norm": 54.9903678894043, |
|
"learning_rate": 0.0004987831897820059, |
|
"epoch": 0.03139214555705042, |
|
"step": 35 |
|
}, |
|
{ |
|
"loss": 415.0021, |
|
"grad_norm": 55.86436462402344, |
|
"learning_rate": 0.0004987127250394532, |
|
"epoch": 0.032289064001537575, |
|
"step": 36 |
|
}, |
|
{ |
|
"loss": 402.1766, |
|
"grad_norm": 53.72284698486328, |
|
"learning_rate": 0.0004986402822932818, |
|
"epoch": 0.03318598244602473, |
|
"step": 37 |
|
}, |
|
{ |
|
"loss": 409.7162, |
|
"grad_norm": 56.52421569824219, |
|
"learning_rate": 0.0004985658621196263, |
|
"epoch": 0.034082900890511886, |
|
"step": 38 |
|
}, |
|
{ |
|
"loss": 406.8592, |
|
"grad_norm": 63.26171875, |
|
"learning_rate": 0.0004984894651103478, |
|
"epoch": 0.03497981933499904, |
|
"step": 39 |
|
}, |
|
{ |
|
"loss": 401.9672, |
|
"grad_norm": 52.98197937011719, |
|
"learning_rate": 0.0004984110918730289, |
|
"epoch": 0.0358767377794862, |
|
"step": 40 |
|
}, |
|
{ |
|
"loss": 402.0731, |
|
"grad_norm": 61.255733489990234, |
|
"learning_rate": 0.0004983307430309695, |
|
"epoch": 0.03677365622397335, |
|
"step": 41 |
|
}, |
|
{ |
|
"loss": 405.9777, |
|
"grad_norm": 62.212188720703125, |
|
"learning_rate": 0.0004982484192231808, |
|
"epoch": 0.0376705746684605, |
|
"step": 42 |
|
}, |
|
{ |
|
"loss": 409.4884, |
|
"grad_norm": 60.04124450683594, |
|
"learning_rate": 0.0004981641211043813, |
|
"epoch": 0.03856749311294766, |
|
"step": 43 |
|
}, |
|
{ |
|
"loss": 402.7691, |
|
"grad_norm": 58.80691909790039, |
|
"learning_rate": 0.0004980778493449912, |
|
"epoch": 0.03946441155743481, |
|
"step": 44 |
|
}, |
|
{ |
|
"loss": 406.07, |
|
"grad_norm": 58.074493408203125, |
|
"learning_rate": 0.0004979896046311265, |
|
"epoch": 0.04036133000192197, |
|
"step": 45 |
|
}, |
|
{ |
|
"loss": 406.7423, |
|
"grad_norm": 62.749534606933594, |
|
"learning_rate": 0.0004978993876645944, |
|
"epoch": 0.04125824844640912, |
|
"step": 46 |
|
}, |
|
{ |
|
"loss": 403.2931, |
|
"grad_norm": 58.47712707519531, |
|
"learning_rate": 0.0004978071991628874, |
|
"epoch": 0.04215516689089628, |
|
"step": 47 |
|
}, |
|
{ |
|
"loss": 402.5574, |
|
"grad_norm": 64.82901000976562, |
|
"learning_rate": 0.0004977130398591775, |
|
"epoch": 0.04305208533538343, |
|
"step": 48 |
|
}, |
|
{ |
|
"loss": 405.5097, |
|
"grad_norm": 56.95109939575195, |
|
"learning_rate": 0.00049761691050231, |
|
"epoch": 0.043949003779870585, |
|
"step": 49 |
|
}, |
|
{ |
|
"loss": 408.4274, |
|
"grad_norm": 60.67522048950195, |
|
"learning_rate": 0.0004975188118567987, |
|
"epoch": 0.044845922224357744, |
|
"step": 50 |
|
}, |
|
{ |
|
"eval_loss": 1.7932980060577393, |
|
"eval_runtime": 41.7475, |
|
"eval_samples_per_second": 49.057, |
|
"eval_steps_per_second": 3.066, |
|
"epoch": 0.044845922224357744, |
|
"step": 50 |
|
}, |
|
{ |
|
"loss": 405.2191, |
|
"grad_norm": 61.441951751708984, |
|
"learning_rate": 0.0004974187447028184, |
|
"epoch": 0.045742840668844896, |
|
"step": 51 |
|
}, |
|
{ |
|
"loss": 402.9874, |
|
"grad_norm": 56.64131546020508, |
|
"learning_rate": 0.0004973167098361999, |
|
"epoch": 0.046639759113332055, |
|
"step": 52 |
|
}, |
|
{ |
|
"loss": 403.7462, |
|
"grad_norm": 58.905479431152344, |
|
"learning_rate": 0.0004972127080684228, |
|
"epoch": 0.04753667755781921, |
|
"step": 53 |
|
}, |
|
{ |
|
"loss": 402.2606, |
|
"grad_norm": 60.9106559753418, |
|
"learning_rate": 0.0004971067402266096, |
|
"epoch": 0.04843359600230636, |
|
"step": 54 |
|
}, |
|
{ |
|
"loss": 397.4493, |
|
"grad_norm": 55.347869873046875, |
|
"learning_rate": 0.0004969988071535188, |
|
"epoch": 0.04933051444679352, |
|
"step": 55 |
|
}, |
|
{ |
|
"loss": 398.7716, |
|
"grad_norm": 56.816104888916016, |
|
"learning_rate": 0.0004968889097075385, |
|
"epoch": 0.05022743289128067, |
|
"step": 56 |
|
}, |
|
{ |
|
"loss": 399.2036, |
|
"grad_norm": 63.388851165771484, |
|
"learning_rate": 0.0004967770487626791, |
|
"epoch": 0.05112435133576783, |
|
"step": 57 |
|
}, |
|
{ |
|
"loss": 402.6399, |
|
"grad_norm": 58.803466796875, |
|
"learning_rate": 0.0004966632252085668, |
|
"epoch": 0.05202126978025498, |
|
"step": 58 |
|
}, |
|
{ |
|
"loss": 401.2329, |
|
"grad_norm": 61.42218780517578, |
|
"learning_rate": 0.0004965474399504364, |
|
"epoch": 0.05291818822474213, |
|
"step": 59 |
|
}, |
|
{ |
|
"loss": 394.491, |
|
"grad_norm": 54.581748962402344, |
|
"learning_rate": 0.000496429693909124, |
|
"epoch": 0.05381510666922929, |
|
"step": 60 |
|
}, |
|
{ |
|
"loss": 402.2176, |
|
"grad_norm": 60.348812103271484, |
|
"learning_rate": 0.0004963099880210597, |
|
"epoch": 0.05471202511371644, |
|
"step": 61 |
|
}, |
|
{ |
|
"loss": 401.5288, |
|
"grad_norm": 58.51568603515625, |
|
"learning_rate": 0.0004961883232382603, |
|
"epoch": 0.0556089435582036, |
|
"step": 62 |
|
}, |
|
{ |
|
"loss": 402.1975, |
|
"grad_norm": 53.891822814941406, |
|
"learning_rate": 0.0004960647005283217, |
|
"epoch": 0.056505862002690754, |
|
"step": 63 |
|
}, |
|
{ |
|
"loss": 402.8554, |
|
"grad_norm": 54.66781234741211, |
|
"learning_rate": 0.0004959391208744108, |
|
"epoch": 0.05740278044717791, |
|
"step": 64 |
|
}, |
|
{ |
|
"loss": 397.2245, |
|
"grad_norm": 57.83986282348633, |
|
"learning_rate": 0.0004958115852752582, |
|
"epoch": 0.058299698891665065, |
|
"step": 65 |
|
}, |
|
{ |
|
"loss": 398.295, |
|
"grad_norm": 56.6056022644043, |
|
"learning_rate": 0.0004956820947451502, |
|
"epoch": 0.05919661733615222, |
|
"step": 66 |
|
}, |
|
{ |
|
"loss": 398.1401, |
|
"grad_norm": 58.830711364746094, |
|
"learning_rate": 0.0004955506503139204, |
|
"epoch": 0.060093535780639376, |
|
"step": 67 |
|
}, |
|
{ |
|
"loss": 401.4149, |
|
"grad_norm": 54.770755767822266, |
|
"learning_rate": 0.0004954172530269418, |
|
"epoch": 0.06099045422512653, |
|
"step": 68 |
|
}, |
|
{ |
|
"loss": 399.5218, |
|
"grad_norm": 59.45661926269531, |
|
"learning_rate": 0.0004952819039451183, |
|
"epoch": 0.06188737266961369, |
|
"step": 69 |
|
}, |
|
{ |
|
"loss": 396.4537, |
|
"grad_norm": 53.4246826171875, |
|
"learning_rate": 0.0004951446041448765, |
|
"epoch": 0.06278429111410085, |
|
"step": 70 |
|
}, |
|
{ |
|
"loss": 401.2764, |
|
"grad_norm": 55.125919342041016, |
|
"learning_rate": 0.0004950053547181568, |
|
"epoch": 0.063681209558588, |
|
"step": 71 |
|
}, |
|
{ |
|
"loss": 400.9092, |
|
"grad_norm": 63.59549331665039, |
|
"learning_rate": 0.0004948641567724053, |
|
"epoch": 0.06457812800307515, |
|
"step": 72 |
|
}, |
|
{ |
|
"loss": 397.1968, |
|
"grad_norm": 58.40228271484375, |
|
"learning_rate": 0.0004947210114305639, |
|
"epoch": 0.0654750464475623, |
|
"step": 73 |
|
}, |
|
{ |
|
"loss": 398.0598, |
|
"grad_norm": 62.7151985168457, |
|
"learning_rate": 0.0004945759198310629, |
|
"epoch": 0.06637196489204945, |
|
"step": 74 |
|
}, |
|
{ |
|
"loss": 398.7396, |
|
"grad_norm": 59.287742614746094, |
|
"learning_rate": 0.0004944288831278106, |
|
"epoch": 0.06726888333653662, |
|
"step": 75 |
|
}, |
|
{ |
|
"loss": 391.3397, |
|
"grad_norm": 59.052059173583984, |
|
"learning_rate": 0.0004942799024901846, |
|
"epoch": 0.06816580178102377, |
|
"step": 76 |
|
}, |
|
{ |
|
"loss": 394.1899, |
|
"grad_norm": 54.65058135986328, |
|
"learning_rate": 0.0004941289791030229, |
|
"epoch": 0.06906272022551092, |
|
"step": 77 |
|
}, |
|
{ |
|
"loss": 393.8536, |
|
"grad_norm": 51.59941101074219, |
|
"learning_rate": 0.0004939761141666139, |
|
"epoch": 0.06995963866999808, |
|
"step": 78 |
|
}, |
|
{ |
|
"loss": 396.7059, |
|
"grad_norm": 55.84555435180664, |
|
"learning_rate": 0.0004938213088966872, |
|
"epoch": 0.07085655711448523, |
|
"step": 79 |
|
}, |
|
{ |
|
"loss": 392.0196, |
|
"grad_norm": 55.808250427246094, |
|
"learning_rate": 0.0004936645645244033, |
|
"epoch": 0.0717534755589724, |
|
"step": 80 |
|
}, |
|
{ |
|
"loss": 395.5785, |
|
"grad_norm": 53.83452224731445, |
|
"learning_rate": 0.0004935058822963453, |
|
"epoch": 0.07265039400345955, |
|
"step": 81 |
|
}, |
|
{ |
|
"loss": 398.3966, |
|
"grad_norm": 61.950626373291016, |
|
"learning_rate": 0.000493345263474507, |
|
"epoch": 0.0735473124479467, |
|
"step": 82 |
|
}, |
|
{ |
|
"loss": 399.4866, |
|
"grad_norm": 65.6949462890625, |
|
"learning_rate": 0.0004931827093362844, |
|
"epoch": 0.07444423089243385, |
|
"step": 83 |
|
}, |
|
{ |
|
"loss": 393.8017, |
|
"grad_norm": 54.928836822509766, |
|
"learning_rate": 0.0004930182211744649, |
|
"epoch": 0.075341149336921, |
|
"step": 84 |
|
}, |
|
{ |
|
"loss": 398.1347, |
|
"grad_norm": 59.81849670410156, |
|
"learning_rate": 0.0004928518002972172, |
|
"epoch": 0.07623806778140817, |
|
"step": 85 |
|
}, |
|
{ |
|
"loss": 392.8837, |
|
"grad_norm": 57.970462799072266, |
|
"learning_rate": 0.0004926834480280805, |
|
"epoch": 0.07713498622589532, |
|
"step": 86 |
|
}, |
|
{ |
|
"loss": 394.3792, |
|
"grad_norm": 57.43026351928711, |
|
"learning_rate": 0.0004925131657059547, |
|
"epoch": 0.07803190467038247, |
|
"step": 87 |
|
}, |
|
{ |
|
"loss": 395.7612, |
|
"grad_norm": 57.73651123046875, |
|
"learning_rate": 0.0004923409546850891, |
|
"epoch": 0.07892882311486962, |
|
"step": 88 |
|
}, |
|
{ |
|
"loss": 396.5627, |
|
"grad_norm": 58.27775573730469, |
|
"learning_rate": 0.000492166816335072, |
|
"epoch": 0.07982574155935677, |
|
"step": 89 |
|
}, |
|
{ |
|
"loss": 398.5615, |
|
"grad_norm": 53.49543762207031, |
|
"learning_rate": 0.0004919907520408196, |
|
"epoch": 0.08072266000384394, |
|
"step": 90 |
|
}, |
|
{ |
|
"loss": 398.6497, |
|
"grad_norm": 57.175514221191406, |
|
"learning_rate": 0.000491812763202565, |
|
"epoch": 0.08161957844833109, |
|
"step": 91 |
|
}, |
|
{ |
|
"loss": 392.5616, |
|
"grad_norm": 58.206119537353516, |
|
"learning_rate": 0.0004916328512358472, |
|
"epoch": 0.08251649689281824, |
|
"step": 92 |
|
}, |
|
{ |
|
"loss": 390.17, |
|
"grad_norm": 56.978179931640625, |
|
"learning_rate": 0.0004914510175714999, |
|
"epoch": 0.0834134153373054, |
|
"step": 93 |
|
}, |
|
{ |
|
"loss": 391.477, |
|
"grad_norm": 59.842369079589844, |
|
"learning_rate": 0.0004912672636556397, |
|
"epoch": 0.08431033378179256, |
|
"step": 94 |
|
}, |
|
{ |
|
"loss": 394.4383, |
|
"grad_norm": 52.20112609863281, |
|
"learning_rate": 0.0004910815909496555, |
|
"epoch": 0.08520725222627971, |
|
"step": 95 |
|
}, |
|
{ |
|
"loss": 390.8443, |
|
"grad_norm": 61.12334060668945, |
|
"learning_rate": 0.0004908940009301954, |
|
"epoch": 0.08610417067076687, |
|
"step": 96 |
|
}, |
|
{ |
|
"loss": 395.9276, |
|
"grad_norm": 55.49872589111328, |
|
"learning_rate": 0.0004907044950891565, |
|
"epoch": 0.08700108911525402, |
|
"step": 97 |
|
}, |
|
{ |
|
"loss": 394.7866, |
|
"grad_norm": 59.71890640258789, |
|
"learning_rate": 0.000490513074933672, |
|
"epoch": 0.08789800755974117, |
|
"step": 98 |
|
}, |
|
{ |
|
"loss": 388.5464, |
|
"grad_norm": 55.72919845581055, |
|
"learning_rate": 0.0004903197419860999, |
|
"epoch": 0.08879492600422834, |
|
"step": 99 |
|
}, |
|
{ |
|
"loss": 392.9969, |
|
"grad_norm": 61.6799430847168, |
|
"learning_rate": 0.0004901244977840103, |
|
"epoch": 0.08969184444871549, |
|
"step": 100 |
|
}, |
|
{ |
|
"eval_loss": 1.7485355138778687, |
|
"eval_runtime": 49.5113, |
|
"eval_samples_per_second": 41.364, |
|
"eval_steps_per_second": 2.585, |
|
"epoch": 0.08969184444871549, |
|
"step": 100 |
|
}, |
|
{ |
|
"loss": 393.0805, |
|
"grad_norm": 58.71113204956055, |
|
"learning_rate": 0.0004899273438801734, |
|
"epoch": 0.09058876289320264, |
|
"step": 101 |
|
}, |
|
{ |
|
"loss": 391.5116, |
|
"grad_norm": 54.11758804321289, |
|
"learning_rate": 0.0004897282818425474, |
|
"epoch": 0.09148568133768979, |
|
"step": 102 |
|
}, |
|
{ |
|
"loss": 394.4952, |
|
"grad_norm": 53.54176712036133, |
|
"learning_rate": 0.0004895273132542658, |
|
"epoch": 0.09238259978217694, |
|
"step": 103 |
|
}, |
|
{ |
|
"loss": 392.5484, |
|
"grad_norm": 51.26163101196289, |
|
"learning_rate": 0.0004893244397136246, |
|
"epoch": 0.09327951822666411, |
|
"step": 104 |
|
}, |
|
{ |
|
"loss": 392.7574, |
|
"grad_norm": 57.158973693847656, |
|
"learning_rate": 0.0004891196628340703, |
|
"epoch": 0.09417643667115126, |
|
"step": 105 |
|
}, |
|
{ |
|
"loss": 392.1094, |
|
"grad_norm": 51.87057113647461, |
|
"learning_rate": 0.0004889129842441859, |
|
"epoch": 0.09507335511563841, |
|
"step": 106 |
|
}, |
|
{ |
|
"loss": 391.9873, |
|
"grad_norm": 62.71110534667969, |
|
"learning_rate": 0.0004887044055876793, |
|
"epoch": 0.09597027356012557, |
|
"step": 107 |
|
}, |
|
{ |
|
"loss": 393.0227, |
|
"grad_norm": 61.41956329345703, |
|
"learning_rate": 0.0004884939285233691, |
|
"epoch": 0.09686719200461272, |
|
"step": 108 |
|
}, |
|
{ |
|
"loss": 389.2371, |
|
"grad_norm": 59.030765533447266, |
|
"learning_rate": 0.0004882815547251721, |
|
"epoch": 0.09776411044909988, |
|
"step": 109 |
|
}, |
|
{ |
|
"loss": 394.932, |
|
"grad_norm": 60.926448822021484, |
|
"learning_rate": 0.00048806728588208966, |
|
"epoch": 0.09866102889358704, |
|
"step": 110 |
|
}, |
|
{ |
|
"loss": 389.2965, |
|
"grad_norm": 59.546268463134766, |
|
"learning_rate": 0.0004878511236981945, |
|
"epoch": 0.09955794733807419, |
|
"step": 111 |
|
}, |
|
{ |
|
"loss": 389.0897, |
|
"grad_norm": 56.25603103637695, |
|
"learning_rate": 0.0004876330698926169, |
|
"epoch": 0.10045486578256134, |
|
"step": 112 |
|
}, |
|
{ |
|
"loss": 391.7546, |
|
"grad_norm": 63.1163444519043, |
|
"learning_rate": 0.00048741312619953104, |
|
"epoch": 0.10135178422704849, |
|
"step": 113 |
|
}, |
|
{ |
|
"loss": 392.0137, |
|
"grad_norm": 70.23162078857422, |
|
"learning_rate": 0.00048719129436814156, |
|
"epoch": 0.10224870267153566, |
|
"step": 114 |
|
}, |
|
{ |
|
"loss": 390.5738, |
|
"grad_norm": 60.9749755859375, |
|
"learning_rate": 0.00048696757616266927, |
|
"epoch": 0.10314562111602281, |
|
"step": 115 |
|
}, |
|
{ |
|
"loss": 387.7592, |
|
"grad_norm": 60.2146110534668, |
|
"learning_rate": 0.0004867419733623372, |
|
"epoch": 0.10404253956050996, |
|
"step": 116 |
|
}, |
|
{ |
|
"loss": 390.6403, |
|
"grad_norm": 59.26010513305664, |
|
"learning_rate": 0.00048651448776135654, |
|
"epoch": 0.10493945800499711, |
|
"step": 117 |
|
}, |
|
{ |
|
"loss": 391.4545, |
|
"grad_norm": 55.02613067626953, |
|
"learning_rate": 0.00048628512116891234, |
|
"epoch": 0.10583637644948427, |
|
"step": 118 |
|
}, |
|
{ |
|
"loss": 388.2937, |
|
"grad_norm": 56.28743362426758, |
|
"learning_rate": 0.00048605387540914916, |
|
"epoch": 0.10673329489397143, |
|
"step": 119 |
|
}, |
|
{ |
|
"loss": 389.2755, |
|
"grad_norm": 55.22878646850586, |
|
"learning_rate": 0.0004858207523211563, |
|
"epoch": 0.10763021333845858, |
|
"step": 120 |
|
}, |
|
{ |
|
"loss": 392.9062, |
|
"grad_norm": 55.45512771606445, |
|
"learning_rate": 0.00048558575375895377, |
|
"epoch": 0.10852713178294573, |
|
"step": 121 |
|
}, |
|
{ |
|
"loss": 388.4548, |
|
"grad_norm": 58.8115119934082, |
|
"learning_rate": 0.0004853488815914767, |
|
"epoch": 0.10942405022743289, |
|
"step": 122 |
|
}, |
|
{ |
|
"loss": 390.1011, |
|
"grad_norm": 55.49444580078125, |
|
"learning_rate": 0.00048511013770256134, |
|
"epoch": 0.11032096867192005, |
|
"step": 123 |
|
}, |
|
{ |
|
"loss": 388.7439, |
|
"grad_norm": 54.36104202270508, |
|
"learning_rate": 0.00048486952399092945, |
|
"epoch": 0.1112178871164072, |
|
"step": 124 |
|
}, |
|
{ |
|
"loss": 391.1307, |
|
"grad_norm": 52.75822067260742, |
|
"learning_rate": 0.0004846270423701734, |
|
"epoch": 0.11211480556089436, |
|
"step": 125 |
|
}, |
|
{ |
|
"loss": 388.8095, |
|
"grad_norm": 55.67084884643555, |
|
"learning_rate": 0.0004843826947687411, |
|
"epoch": 0.11301172400538151, |
|
"step": 126 |
|
}, |
|
{ |
|
"loss": 388.7104, |
|
"grad_norm": 58.483211517333984, |
|
"learning_rate": 0.0004841364831299206, |
|
"epoch": 0.11390864244986866, |
|
"step": 127 |
|
}, |
|
{ |
|
"loss": 392.5351, |
|
"grad_norm": 54.69878387451172, |
|
"learning_rate": 0.00048388840941182435, |
|
"epoch": 0.11480556089435583, |
|
"step": 128 |
|
}, |
|
{ |
|
"loss": 389.9329, |
|
"grad_norm": 56.85935974121094, |
|
"learning_rate": 0.00048363847558737395, |
|
"epoch": 0.11570247933884298, |
|
"step": 129 |
|
}, |
|
{ |
|
"loss": 389.8976, |
|
"grad_norm": 55.818260192871094, |
|
"learning_rate": 0.0004833866836442844, |
|
"epoch": 0.11659939778333013, |
|
"step": 130 |
|
}, |
|
{ |
|
"loss": 389.0714, |
|
"grad_norm": 69.33192443847656, |
|
"learning_rate": 0.0004831330355850483, |
|
"epoch": 0.11749631622781728, |
|
"step": 131 |
|
}, |
|
{ |
|
"loss": 387.675, |
|
"grad_norm": 59.69966506958008, |
|
"learning_rate": 0.0004828775334269198, |
|
"epoch": 0.11839323467230443, |
|
"step": 132 |
|
}, |
|
{ |
|
"loss": 389.1474, |
|
"grad_norm": 63.28241729736328, |
|
"learning_rate": 0.0004826201792018986, |
|
"epoch": 0.1192901531167916, |
|
"step": 133 |
|
}, |
|
{ |
|
"loss": 386.0185, |
|
"grad_norm": 60.13338851928711, |
|
"learning_rate": 0.0004823609749567138, |
|
"epoch": 0.12018707156127875, |
|
"step": 134 |
|
}, |
|
{ |
|
"loss": 393.0312, |
|
"grad_norm": 50.345890045166016, |
|
"learning_rate": 0.0004820999227528079, |
|
"epoch": 0.1210839900057659, |
|
"step": 135 |
|
}, |
|
{ |
|
"loss": 388.9017, |
|
"grad_norm": 54.398582458496094, |
|
"learning_rate": 0.00048183702466631986, |
|
"epoch": 0.12198090845025306, |
|
"step": 136 |
|
}, |
|
{ |
|
"loss": 390.3952, |
|
"grad_norm": 58.791343688964844, |
|
"learning_rate": 0.0004815722827880689, |
|
"epoch": 0.12287782689474021, |
|
"step": 137 |
|
}, |
|
{ |
|
"loss": 391.5972, |
|
"grad_norm": 56.27891540527344, |
|
"learning_rate": 0.000481305699223538, |
|
"epoch": 0.12377474533922737, |
|
"step": 138 |
|
}, |
|
{ |
|
"loss": 390.4619, |
|
"grad_norm": 57.29872512817383, |
|
"learning_rate": 0.000481037276092857, |
|
"epoch": 0.12467166378371453, |
|
"step": 139 |
|
}, |
|
{ |
|
"loss": 386.5269, |
|
"grad_norm": 56.40953826904297, |
|
"learning_rate": 0.0004807670155307856, |
|
"epoch": 0.1255685822282017, |
|
"step": 140 |
|
}, |
|
{ |
|
"loss": 386.9588, |
|
"grad_norm": 56.36626434326172, |
|
"learning_rate": 0.0004804949196866967, |
|
"epoch": 0.12646550067268883, |
|
"step": 141 |
|
}, |
|
{ |
|
"loss": 390.6064, |
|
"grad_norm": 59.941890716552734, |
|
"learning_rate": 0.00048022099072455893, |
|
"epoch": 0.127362419117176, |
|
"step": 142 |
|
}, |
|
{ |
|
"loss": 389.5639, |
|
"grad_norm": 55.42548370361328, |
|
"learning_rate": 0.0004799452308229199, |
|
"epoch": 0.12825933756166313, |
|
"step": 143 |
|
}, |
|
{ |
|
"loss": 389.1144, |
|
"grad_norm": 59.46462631225586, |
|
"learning_rate": 0.0004796676421748883, |
|
"epoch": 0.1291562560061503, |
|
"step": 144 |
|
}, |
|
{ |
|
"loss": 387.238, |
|
"grad_norm": 61.307960510253906, |
|
"learning_rate": 0.0004793882269881172, |
|
"epoch": 0.13005317445063747, |
|
"step": 145 |
|
}, |
|
{ |
|
"loss": 385.9282, |
|
"grad_norm": 53.019859313964844, |
|
"learning_rate": 0.00047910698748478565, |
|
"epoch": 0.1309500928951246, |
|
"step": 146 |
|
}, |
|
{ |
|
"loss": 388.6133, |
|
"grad_norm": 59.57033920288086, |
|
"learning_rate": 0.00047882392590158166, |
|
"epoch": 0.13184701133961177, |
|
"step": 147 |
|
}, |
|
{ |
|
"loss": 385.2765, |
|
"grad_norm": 55.921993255615234, |
|
"learning_rate": 0.000478539044489684, |
|
"epoch": 0.1327439297840989, |
|
"step": 148 |
|
}, |
|
{ |
|
"loss": 387.315, |
|
"grad_norm": 53.27146911621094, |
|
"learning_rate": 0.0004782523455147448, |
|
"epoch": 0.13364084822858607, |
|
"step": 149 |
|
}, |
|
{ |
|
"loss": 384.9127, |
|
"grad_norm": 61.21531295776367, |
|
"learning_rate": 0.0004779638312568708, |
|
"epoch": 0.13453776667307324, |
|
"step": 150 |
|
}, |
|
{ |
|
"eval_loss": 1.7258449792861938, |
|
"eval_runtime": 36.7008, |
|
"eval_samples_per_second": 55.803, |
|
"eval_steps_per_second": 3.488, |
|
"epoch": 0.13453776667307324, |
|
"step": 150 |
|
}, |
|
{ |
|
"loss": 385.8539, |
|
"grad_norm": 60.04133605957031, |
|
"learning_rate": 0.00047767350401060606, |
|
"epoch": 0.13543468511756038, |
|
"step": 151 |
|
}, |
|
{ |
|
"loss": 384.8003, |
|
"grad_norm": 59.11763000488281, |
|
"learning_rate": 0.0004773813660849128, |
|
"epoch": 0.13633160356204754, |
|
"step": 152 |
|
}, |
|
{ |
|
"loss": 387.7485, |
|
"grad_norm": 56.51465606689453, |
|
"learning_rate": 0.0004770874198031538, |
|
"epoch": 0.13722852200653468, |
|
"step": 153 |
|
}, |
|
{ |
|
"loss": 383.2278, |
|
"grad_norm": 56.18191146850586, |
|
"learning_rate": 0.0004767916675030736, |
|
"epoch": 0.13812544045102185, |
|
"step": 154 |
|
}, |
|
{ |
|
"loss": 383.6736, |
|
"grad_norm": 57.308799743652344, |
|
"learning_rate": 0.00047649411153678, |
|
"epoch": 0.139022358895509, |
|
"step": 155 |
|
}, |
|
{ |
|
"loss": 383.3135, |
|
"grad_norm": 56.1787109375, |
|
"learning_rate": 0.0004761947542707251, |
|
"epoch": 0.13991927733999615, |
|
"step": 156 |
|
}, |
|
{ |
|
"loss": 380.7021, |
|
"grad_norm": 59.29663848876953, |
|
"learning_rate": 0.0004758935980856868, |
|
"epoch": 0.14081619578448332, |
|
"step": 157 |
|
}, |
|
{ |
|
"loss": 388.3537, |
|
"grad_norm": 56.997901916503906, |
|
"learning_rate": 0.00047559064537674973, |
|
"epoch": 0.14171311422897045, |
|
"step": 158 |
|
}, |
|
{ |
|
"loss": 382.6107, |
|
"grad_norm": 54.997398376464844, |
|
"learning_rate": 0.0004752858985532862, |
|
"epoch": 0.14261003267345762, |
|
"step": 159 |
|
}, |
|
{ |
|
"loss": 390.4788, |
|
"grad_norm": 61.30497360229492, |
|
"learning_rate": 0.00047497936003893713, |
|
"epoch": 0.1435069511179448, |
|
"step": 160 |
|
}, |
|
{ |
|
"loss": 383.9597, |
|
"grad_norm": 56.59492492675781, |
|
"learning_rate": 0.0004746710322715926, |
|
"epoch": 0.14440386956243192, |
|
"step": 161 |
|
}, |
|
{ |
|
"loss": 392.4949, |
|
"grad_norm": 63.977073669433594, |
|
"learning_rate": 0.0004743609177033725, |
|
"epoch": 0.1453007880069191, |
|
"step": 162 |
|
}, |
|
{ |
|
"loss": 385.7721, |
|
"grad_norm": 63.132537841796875, |
|
"learning_rate": 0.0004740490188006072, |
|
"epoch": 0.14619770645140623, |
|
"step": 163 |
|
}, |
|
{ |
|
"loss": 385.057, |
|
"grad_norm": 61.54987716674805, |
|
"learning_rate": 0.0004737353380438178, |
|
"epoch": 0.1470946248958934, |
|
"step": 164 |
|
}, |
|
{ |
|
"loss": 384.8288, |
|
"grad_norm": 64.65653228759766, |
|
"learning_rate": 0.00047341987792769635, |
|
"epoch": 0.14799154334038056, |
|
"step": 165 |
|
}, |
|
{ |
|
"loss": 385.061, |
|
"grad_norm": 52.979087829589844, |
|
"learning_rate": 0.0004731026409610863, |
|
"epoch": 0.1488884617848677, |
|
"step": 166 |
|
}, |
|
{ |
|
"loss": 385.9828, |
|
"grad_norm": 66.97553253173828, |
|
"learning_rate": 0.00047278362966696197, |
|
"epoch": 0.14978538022935486, |
|
"step": 167 |
|
}, |
|
{ |
|
"loss": 381.6645, |
|
"grad_norm": 49.72977066040039, |
|
"learning_rate": 0.00047246284658240925, |
|
"epoch": 0.150682298673842, |
|
"step": 168 |
|
}, |
|
{ |
|
"loss": 387.0713, |
|
"grad_norm": 59.0352668762207, |
|
"learning_rate": 0.0004721402942586046, |
|
"epoch": 0.15157921711832917, |
|
"step": 169 |
|
}, |
|
{ |
|
"loss": 388.6861, |
|
"grad_norm": 56.49056625366211, |
|
"learning_rate": 0.0004718159752607955, |
|
"epoch": 0.15247613556281633, |
|
"step": 170 |
|
}, |
|
{ |
|
"loss": 386.6622, |
|
"grad_norm": 61.9783935546875, |
|
"learning_rate": 0.00047148989216827964, |
|
"epoch": 0.15337305400730347, |
|
"step": 171 |
|
}, |
|
{ |
|
"loss": 385.3264, |
|
"grad_norm": 60.84406280517578, |
|
"learning_rate": 0.0004711620475743844, |
|
"epoch": 0.15426997245179064, |
|
"step": 172 |
|
}, |
|
{ |
|
"loss": 383.2025, |
|
"grad_norm": 55.59370803833008, |
|
"learning_rate": 0.00047083244408644646, |
|
"epoch": 0.15516689089627778, |
|
"step": 173 |
|
}, |
|
{ |
|
"loss": 383.7802, |
|
"grad_norm": 59.102760314941406, |
|
"learning_rate": 0.0004705010843257908, |
|
"epoch": 0.15606380934076494, |
|
"step": 174 |
|
}, |
|
{ |
|
"loss": 387.181, |
|
"grad_norm": 63.97918701171875, |
|
"learning_rate": 0.00047016797092771004, |
|
"epoch": 0.1569607277852521, |
|
"step": 175 |
|
}, |
|
{ |
|
"loss": 382.4706, |
|
"grad_norm": 58.40498733520508, |
|
"learning_rate": 0.0004698331065414434, |
|
"epoch": 0.15785764622973925, |
|
"step": 176 |
|
}, |
|
{ |
|
"loss": 374.7974, |
|
"grad_norm": 57.276405334472656, |
|
"learning_rate": 0.0004694964938301556, |
|
"epoch": 0.1587545646742264, |
|
"step": 177 |
|
}, |
|
{ |
|
"loss": 383.6686, |
|
"grad_norm": 65.17239379882812, |
|
"learning_rate": 0.0004691581354709159, |
|
"epoch": 0.15965148311871355, |
|
"step": 178 |
|
}, |
|
{ |
|
"loss": 382.2492, |
|
"grad_norm": 54.67914962768555, |
|
"learning_rate": 0.0004688180341546765, |
|
"epoch": 0.16054840156320072, |
|
"step": 179 |
|
}, |
|
{ |
|
"loss": 379.0845, |
|
"grad_norm": 61.17100524902344, |
|
"learning_rate": 0.0004684761925862512, |
|
"epoch": 0.16144532000768788, |
|
"step": 180 |
|
}, |
|
{ |
|
"loss": 380.5147, |
|
"grad_norm": 53.48952102661133, |
|
"learning_rate": 0.00046813261348429403, |
|
"epoch": 0.16234223845217502, |
|
"step": 181 |
|
}, |
|
{ |
|
"loss": 388.3456, |
|
"grad_norm": 62.524898529052734, |
|
"learning_rate": 0.0004677872995812778, |
|
"epoch": 0.16323915689666219, |
|
"step": 182 |
|
}, |
|
{ |
|
"loss": 384.9105, |
|
"grad_norm": 55.23896026611328, |
|
"learning_rate": 0.00046744025362347174, |
|
"epoch": 0.16413607534114932, |
|
"step": 183 |
|
}, |
|
{ |
|
"loss": 388.0769, |
|
"grad_norm": 58.2794075012207, |
|
"learning_rate": 0.0004670914783709203, |
|
"epoch": 0.1650329937856365, |
|
"step": 184 |
|
}, |
|
{ |
|
"loss": 375.4843, |
|
"grad_norm": 57.62440872192383, |
|
"learning_rate": 0.00046674097659742087, |
|
"epoch": 0.16592991223012366, |
|
"step": 185 |
|
}, |
|
{ |
|
"loss": 388.4005, |
|
"grad_norm": 54.49860763549805, |
|
"learning_rate": 0.00046638875109050184, |
|
"epoch": 0.1668268306746108, |
|
"step": 186 |
|
}, |
|
{ |
|
"loss": 379.2246, |
|
"grad_norm": 56.57727813720703, |
|
"learning_rate": 0.00046603480465140035, |
|
"epoch": 0.16772374911909796, |
|
"step": 187 |
|
}, |
|
{ |
|
"loss": 390.5371, |
|
"grad_norm": 53.35488510131836, |
|
"learning_rate": 0.0004656791400950401, |
|
"epoch": 0.16862066756358512, |
|
"step": 188 |
|
}, |
|
{ |
|
"loss": 376.5087, |
|
"grad_norm": 57.38853454589844, |
|
"learning_rate": 0.0004653217602500088, |
|
"epoch": 0.16951758600807226, |
|
"step": 189 |
|
}, |
|
{ |
|
"loss": 383.3448, |
|
"grad_norm": 53.162269592285156, |
|
"learning_rate": 0.00046496266795853606, |
|
"epoch": 0.17041450445255943, |
|
"step": 190 |
|
}, |
|
{ |
|
"loss": 385.954, |
|
"grad_norm": 56.76969528198242, |
|
"learning_rate": 0.0004646018660764701, |
|
"epoch": 0.17131142289704657, |
|
"step": 191 |
|
}, |
|
{ |
|
"loss": 380.8749, |
|
"grad_norm": 55.99345016479492, |
|
"learning_rate": 0.0004642393574732559, |
|
"epoch": 0.17220834134153373, |
|
"step": 192 |
|
}, |
|
{ |
|
"loss": 379.5312, |
|
"grad_norm": 49.73320770263672, |
|
"learning_rate": 0.0004638751450319116, |
|
"epoch": 0.1731052597860209, |
|
"step": 193 |
|
}, |
|
{ |
|
"loss": 385.7988, |
|
"grad_norm": 56.80336380004883, |
|
"learning_rate": 0.00046350923164900604, |
|
"epoch": 0.17400217823050804, |
|
"step": 194 |
|
}, |
|
{ |
|
"loss": 380.8796, |
|
"grad_norm": 57.32421875, |
|
"learning_rate": 0.0004631416202346357, |
|
"epoch": 0.1748990966749952, |
|
"step": 195 |
|
}, |
|
{ |
|
"loss": 382.128, |
|
"grad_norm": 62.81551742553711, |
|
"learning_rate": 0.00046277231371240113, |
|
"epoch": 0.17579601511948234, |
|
"step": 196 |
|
}, |
|
{ |
|
"loss": 383.9042, |
|
"grad_norm": 60.5498046875, |
|
"learning_rate": 0.00046240131501938436, |
|
"epoch": 0.1766929335639695, |
|
"step": 197 |
|
}, |
|
{ |
|
"loss": 380.0457, |
|
"grad_norm": 54.78828811645508, |
|
"learning_rate": 0.000462028627106125, |
|
"epoch": 0.17758985200845667, |
|
"step": 198 |
|
}, |
|
{ |
|
"loss": 383.6067, |
|
"grad_norm": 60.62177276611328, |
|
"learning_rate": 0.00046165425293659694, |
|
"epoch": 0.1784867704529438, |
|
"step": 199 |
|
}, |
|
{ |
|
"loss": 385.004, |
|
"grad_norm": 53.65549850463867, |
|
"learning_rate": 0.00046127819548818507, |
|
"epoch": 0.17938368889743098, |
|
"step": 200 |
|
}, |
|
{ |
|
"eval_loss": 1.6973483562469482, |
|
"eval_runtime": 57.4311, |
|
"eval_samples_per_second": 35.66, |
|
"eval_steps_per_second": 2.229, |
|
"epoch": 0.17938368889743098, |
|
"step": 200 |
|
}, |
|
{ |
|
"loss": 381.3797, |
|
"grad_norm": 60.24985885620117, |
|
"learning_rate": 0.0004609004577516609, |
|
"epoch": 0.18028060734191811, |
|
"step": 201 |
|
}, |
|
{ |
|
"loss": 384.8868, |
|
"grad_norm": 55.66313552856445, |
|
"learning_rate": 0.00046052104273115957, |
|
"epoch": 0.18117752578640528, |
|
"step": 202 |
|
}, |
|
{ |
|
"loss": 381.8181, |
|
"grad_norm": 58.7210807800293, |
|
"learning_rate": 0.0004601399534441556, |
|
"epoch": 0.18207444423089245, |
|
"step": 203 |
|
}, |
|
{ |
|
"loss": 381.6777, |
|
"grad_norm": 51.48910903930664, |
|
"learning_rate": 0.0004597571929214386, |
|
"epoch": 0.18297136267537958, |
|
"step": 204 |
|
}, |
|
{ |
|
"loss": 389.5296, |
|
"grad_norm": 55.63520050048828, |
|
"learning_rate": 0.00045937276420708985, |
|
"epoch": 0.18386828111986675, |
|
"step": 205 |
|
}, |
|
{ |
|
"loss": 379.7319, |
|
"grad_norm": 56.91200637817383, |
|
"learning_rate": 0.00045898667035845726, |
|
"epoch": 0.1847651995643539, |
|
"step": 206 |
|
}, |
|
{ |
|
"loss": 383.4648, |
|
"grad_norm": 60.174800872802734, |
|
"learning_rate": 0.0004585989144461319, |
|
"epoch": 0.18566211800884105, |
|
"step": 207 |
|
}, |
|
{ |
|
"loss": 381.6614, |
|
"grad_norm": 46.41486740112305, |
|
"learning_rate": 0.00045820949955392286, |
|
"epoch": 0.18655903645332822, |
|
"step": 208 |
|
}, |
|
{ |
|
"loss": 388.843, |
|
"grad_norm": 66.20514678955078, |
|
"learning_rate": 0.0004578184287788333, |
|
"epoch": 0.18745595489781536, |
|
"step": 209 |
|
}, |
|
{ |
|
"loss": 382.3195, |
|
"grad_norm": 52.08879470825195, |
|
"learning_rate": 0.0004574257052310355, |
|
"epoch": 0.18835287334230252, |
|
"step": 210 |
|
}, |
|
{ |
|
"loss": 376.9011, |
|
"grad_norm": 59.04060363769531, |
|
"learning_rate": 0.00045703133203384594, |
|
"epoch": 0.18924979178678966, |
|
"step": 211 |
|
}, |
|
{ |
|
"loss": 382.9858, |
|
"grad_norm": 57.139583587646484, |
|
"learning_rate": 0.000456635312323701, |
|
"epoch": 0.19014671023127683, |
|
"step": 212 |
|
}, |
|
{ |
|
"loss": 386.4098, |
|
"grad_norm": 56.69694137573242, |
|
"learning_rate": 0.00045623764925013154, |
|
"epoch": 0.191043628675764, |
|
"step": 213 |
|
}, |
|
{ |
|
"loss": 381.0145, |
|
"grad_norm": 54.969146728515625, |
|
"learning_rate": 0.00045583834597573826, |
|
"epoch": 0.19194054712025113, |
|
"step": 214 |
|
}, |
|
{ |
|
"loss": 386.2006, |
|
"grad_norm": 55.187095642089844, |
|
"learning_rate": 0.000455437405676166, |
|
"epoch": 0.1928374655647383, |
|
"step": 215 |
|
}, |
|
{ |
|
"loss": 385.4291, |
|
"grad_norm": 56.27381896972656, |
|
"learning_rate": 0.000455034831540079, |
|
"epoch": 0.19373438400922544, |
|
"step": 216 |
|
}, |
|
{ |
|
"loss": 382.2878, |
|
"grad_norm": 55.81896209716797, |
|
"learning_rate": 0.00045463062676913527, |
|
"epoch": 0.1946313024537126, |
|
"step": 217 |
|
}, |
|
{ |
|
"loss": 381.0126, |
|
"grad_norm": 60.54517364501953, |
|
"learning_rate": 0.0004542247945779613, |
|
"epoch": 0.19552822089819977, |
|
"step": 218 |
|
}, |
|
{ |
|
"loss": 382.4228, |
|
"grad_norm": 51.44652557373047, |
|
"learning_rate": 0.0004538173381941264, |
|
"epoch": 0.1964251393426869, |
|
"step": 219 |
|
}, |
|
{ |
|
"loss": 374.3478, |
|
"grad_norm": 57.77920150756836, |
|
"learning_rate": 0.0004534082608581168, |
|
"epoch": 0.19732205778717407, |
|
"step": 220 |
|
}, |
|
{ |
|
"loss": 379.4279, |
|
"grad_norm": 52.3509635925293, |
|
"learning_rate": 0.0004529975658233104, |
|
"epoch": 0.1982189762316612, |
|
"step": 221 |
|
}, |
|
{ |
|
"loss": 380.0542, |
|
"grad_norm": 53.75742721557617, |
|
"learning_rate": 0.0004525852563559505, |
|
"epoch": 0.19911589467614838, |
|
"step": 222 |
|
}, |
|
{ |
|
"loss": 387.0319, |
|
"grad_norm": 59.18511199951172, |
|
"learning_rate": 0.0004521713357351198, |
|
"epoch": 0.20001281312063554, |
|
"step": 223 |
|
}, |
|
{ |
|
"loss": 375.638, |
|
"grad_norm": 53.67622375488281, |
|
"learning_rate": 0.00045175580725271457, |
|
"epoch": 0.20090973156512268, |
|
"step": 224 |
|
}, |
|
{ |
|
"loss": 383.951, |
|
"grad_norm": 67.28981018066406, |
|
"learning_rate": 0.00045133867421341835, |
|
"epoch": 0.20180665000960984, |
|
"step": 225 |
|
}, |
|
{ |
|
"loss": 380.0722, |
|
"grad_norm": 62.926700592041016, |
|
"learning_rate": 0.00045091993993467554, |
|
"epoch": 0.20270356845409698, |
|
"step": 226 |
|
}, |
|
{ |
|
"loss": 377.9981, |
|
"grad_norm": 53.50834274291992, |
|
"learning_rate": 0.0004504996077466654, |
|
"epoch": 0.20360048689858415, |
|
"step": 227 |
|
}, |
|
{ |
|
"loss": 380.4308, |
|
"grad_norm": 61.55268096923828, |
|
"learning_rate": 0.0004500776809922751, |
|
"epoch": 0.20449740534307131, |
|
"step": 228 |
|
}, |
|
{ |
|
"loss": 375.9146, |
|
"grad_norm": 55.11613845825195, |
|
"learning_rate": 0.0004496541630270733, |
|
"epoch": 0.20539432378755845, |
|
"step": 229 |
|
}, |
|
{ |
|
"loss": 381.8729, |
|
"grad_norm": 61.67683410644531, |
|
"learning_rate": 0.00044922905721928366, |
|
"epoch": 0.20629124223204562, |
|
"step": 230 |
|
}, |
|
{ |
|
"loss": 377.6188, |
|
"grad_norm": 55.07930374145508, |
|
"learning_rate": 0.00044880236694975773, |
|
"epoch": 0.20718816067653276, |
|
"step": 231 |
|
}, |
|
{ |
|
"loss": 383.7285, |
|
"grad_norm": 56.17093276977539, |
|
"learning_rate": 0.0004483740956119485, |
|
"epoch": 0.20808507912101992, |
|
"step": 232 |
|
}, |
|
{ |
|
"loss": 379.3219, |
|
"grad_norm": 57.20262908935547, |
|
"learning_rate": 0.0004479442466118828, |
|
"epoch": 0.2089819975655071, |
|
"step": 233 |
|
}, |
|
{ |
|
"loss": 378.996, |
|
"grad_norm": 52.91606521606445, |
|
"learning_rate": 0.0004475128233681349, |
|
"epoch": 0.20987891600999423, |
|
"step": 234 |
|
}, |
|
{ |
|
"loss": 376.5712, |
|
"grad_norm": 53.59124755859375, |
|
"learning_rate": 0.00044707982931179856, |
|
"epoch": 0.2107758344544814, |
|
"step": 235 |
|
}, |
|
{ |
|
"loss": 385.7614, |
|
"grad_norm": 57.6840705871582, |
|
"learning_rate": 0.00044664526788646064, |
|
"epoch": 0.21167275289896853, |
|
"step": 236 |
|
}, |
|
{ |
|
"loss": 381.0049, |
|
"grad_norm": 54.7835578918457, |
|
"learning_rate": 0.0004462091425481728, |
|
"epoch": 0.2125696713434557, |
|
"step": 237 |
|
}, |
|
{ |
|
"loss": 380.4299, |
|
"grad_norm": 56.61455535888672, |
|
"learning_rate": 0.0004457714567654247, |
|
"epoch": 0.21346658978794286, |
|
"step": 238 |
|
}, |
|
{ |
|
"loss": 377.3007, |
|
"grad_norm": 54.04520797729492, |
|
"learning_rate": 0.0004453322140191162, |
|
"epoch": 0.21436350823243, |
|
"step": 239 |
|
}, |
|
{ |
|
"loss": 376.2494, |
|
"grad_norm": 61.18534469604492, |
|
"learning_rate": 0.0004448914178025293, |
|
"epoch": 0.21526042667691717, |
|
"step": 240 |
|
}, |
|
{ |
|
"loss": 379.0678, |
|
"grad_norm": 58.791934967041016, |
|
"learning_rate": 0.000444449071621301, |
|
"epoch": 0.21615734512140433, |
|
"step": 241 |
|
}, |
|
{ |
|
"loss": 383.8186, |
|
"grad_norm": 54.751407623291016, |
|
"learning_rate": 0.0004440051789933951, |
|
"epoch": 0.21705426356589147, |
|
"step": 242 |
|
}, |
|
{ |
|
"loss": 374.9797, |
|
"grad_norm": 54.97734451293945, |
|
"learning_rate": 0.0004435597434490741, |
|
"epoch": 0.21795118201037864, |
|
"step": 243 |
|
}, |
|
{ |
|
"loss": 381.2922, |
|
"grad_norm": 55.37065887451172, |
|
"learning_rate": 0.00044311276853087144, |
|
"epoch": 0.21884810045486577, |
|
"step": 244 |
|
}, |
|
{ |
|
"loss": 378.8845, |
|
"grad_norm": 58.74147033691406, |
|
"learning_rate": 0.0004426642577935629, |
|
"epoch": 0.21974501889935294, |
|
"step": 245 |
|
}, |
|
{ |
|
"loss": 386.1524, |
|
"grad_norm": 58.316097259521484, |
|
"learning_rate": 0.0004422142148041388, |
|
"epoch": 0.2206419373438401, |
|
"step": 246 |
|
}, |
|
{ |
|
"loss": 378.2374, |
|
"grad_norm": 54.42732238769531, |
|
"learning_rate": 0.00044176264314177535, |
|
"epoch": 0.22153885578832724, |
|
"step": 247 |
|
}, |
|
{ |
|
"loss": 378.246, |
|
"grad_norm": 56.714080810546875, |
|
"learning_rate": 0.00044130954639780615, |
|
"epoch": 0.2224357742328144, |
|
"step": 248 |
|
}, |
|
{ |
|
"loss": 373.9691, |
|
"grad_norm": 51.52580642700195, |
|
"learning_rate": 0.0004408549281756937, |
|
"epoch": 0.22333269267730155, |
|
"step": 249 |
|
}, |
|
{ |
|
"loss": 377.4944, |
|
"grad_norm": 61.44560241699219, |
|
"learning_rate": 0.0004403987920910011, |
|
"epoch": 0.2242296111217887, |
|
"step": 250 |
|
}, |
|
{ |
|
"eval_loss": 1.6841200590133667, |
|
"eval_runtime": 35.8648, |
|
"eval_samples_per_second": 57.103, |
|
"eval_steps_per_second": 3.569, |
|
"epoch": 0.2242296111217887, |
|
"step": 250 |
|
}, |
|
{ |
|
"loss": 372.7726, |
|
"grad_norm": 52.64440155029297, |
|
"learning_rate": 0.00043994114177136245, |
|
"epoch": 0.22512652956627588, |
|
"step": 251 |
|
}, |
|
{ |
|
"loss": 374.3314, |
|
"grad_norm": 57.64458084106445, |
|
"learning_rate": 0.0004394819808564549, |
|
"epoch": 0.22602344801076302, |
|
"step": 252 |
|
}, |
|
{ |
|
"loss": 380.1327, |
|
"grad_norm": 48.348487854003906, |
|
"learning_rate": 0.00043902131299796923, |
|
"epoch": 0.22692036645525018, |
|
"step": 253 |
|
}, |
|
{ |
|
"loss": 376.8272, |
|
"grad_norm": 55.306766510009766, |
|
"learning_rate": 0.00043855914185958066, |
|
"epoch": 0.22781728489973732, |
|
"step": 254 |
|
}, |
|
{ |
|
"loss": 373.5811, |
|
"grad_norm": 50.16413879394531, |
|
"learning_rate": 0.0004380954711169202, |
|
"epoch": 0.2287142033442245, |
|
"step": 255 |
|
}, |
|
{ |
|
"loss": 380.8544, |
|
"grad_norm": 52.902305603027344, |
|
"learning_rate": 0.00043763030445754516, |
|
"epoch": 0.22961112178871165, |
|
"step": 256 |
|
}, |
|
{ |
|
"loss": 380.7617, |
|
"grad_norm": 55.323490142822266, |
|
"learning_rate": 0.0004371636455809096, |
|
"epoch": 0.2305080402331988, |
|
"step": 257 |
|
}, |
|
{ |
|
"loss": 378.9308, |
|
"grad_norm": 53.362361907958984, |
|
"learning_rate": 0.00043669549819833536, |
|
"epoch": 0.23140495867768596, |
|
"step": 258 |
|
}, |
|
{ |
|
"loss": 378.0917, |
|
"grad_norm": 51.511932373046875, |
|
"learning_rate": 0.0004362258660329822, |
|
"epoch": 0.2323018771221731, |
|
"step": 259 |
|
}, |
|
{ |
|
"loss": 374.3557, |
|
"grad_norm": 60.112728118896484, |
|
"learning_rate": 0.0004357547528198184, |
|
"epoch": 0.23319879556666026, |
|
"step": 260 |
|
}, |
|
{ |
|
"loss": 382.0044, |
|
"grad_norm": 52.59751510620117, |
|
"learning_rate": 0.0004352821623055908, |
|
"epoch": 0.23409571401114743, |
|
"step": 261 |
|
}, |
|
{ |
|
"loss": 379.4641, |
|
"grad_norm": 54.482444763183594, |
|
"learning_rate": 0.0004348080982487953, |
|
"epoch": 0.23499263245563456, |
|
"step": 262 |
|
}, |
|
{ |
|
"loss": 376.0202, |
|
"grad_norm": 57.2796516418457, |
|
"learning_rate": 0.0004343325644196468, |
|
"epoch": 0.23588955090012173, |
|
"step": 263 |
|
}, |
|
{ |
|
"loss": 380.4021, |
|
"grad_norm": 51.36527633666992, |
|
"learning_rate": 0.0004338555646000492, |
|
"epoch": 0.23678646934460887, |
|
"step": 264 |
|
}, |
|
{ |
|
"loss": 382.1948, |
|
"grad_norm": 54.246639251708984, |
|
"learning_rate": 0.0004333771025835655, |
|
"epoch": 0.23768338778909603, |
|
"step": 265 |
|
}, |
|
{ |
|
"loss": 376.0016, |
|
"grad_norm": 53.845367431640625, |
|
"learning_rate": 0.0004328971821753873, |
|
"epoch": 0.2385803062335832, |
|
"step": 266 |
|
}, |
|
{ |
|
"loss": 378.0241, |
|
"grad_norm": 55.82734298706055, |
|
"learning_rate": 0.0004324158071923049, |
|
"epoch": 0.23947722467807034, |
|
"step": 267 |
|
}, |
|
{ |
|
"loss": 376.6841, |
|
"grad_norm": 52.28315734863281, |
|
"learning_rate": 0.0004319329814626768, |
|
"epoch": 0.2403741431225575, |
|
"step": 268 |
|
}, |
|
{ |
|
"loss": 376.4868, |
|
"grad_norm": 59.60106658935547, |
|
"learning_rate": 0.00043144870882639907, |
|
"epoch": 0.24127106156704464, |
|
"step": 269 |
|
}, |
|
{ |
|
"loss": 376.3779, |
|
"grad_norm": 58.55453109741211, |
|
"learning_rate": 0.0004309629931348752, |
|
"epoch": 0.2421679800115318, |
|
"step": 270 |
|
}, |
|
{ |
|
"loss": 379.1783, |
|
"grad_norm": 52.10798263549805, |
|
"learning_rate": 0.0004304758382509849, |
|
"epoch": 0.24306489845601897, |
|
"step": 271 |
|
}, |
|
{ |
|
"loss": 379.3161, |
|
"grad_norm": 53.941673278808594, |
|
"learning_rate": 0.0004299872480490542, |
|
"epoch": 0.2439618169005061, |
|
"step": 272 |
|
}, |
|
{ |
|
"loss": 379.5319, |
|
"grad_norm": 53.70753860473633, |
|
"learning_rate": 0.00042949722641482383, |
|
"epoch": 0.24485873534499328, |
|
"step": 273 |
|
}, |
|
{ |
|
"loss": 379.6953, |
|
"grad_norm": 61.60326385498047, |
|
"learning_rate": 0.0004290057772454187, |
|
"epoch": 0.24575565378948042, |
|
"step": 274 |
|
}, |
|
{ |
|
"loss": 379.7555, |
|
"grad_norm": 57.09893798828125, |
|
"learning_rate": 0.0004285129044493169, |
|
"epoch": 0.24665257223396758, |
|
"step": 275 |
|
}, |
|
{ |
|
"loss": 381.1754, |
|
"grad_norm": 60.31880187988281, |
|
"learning_rate": 0.0004280186119463186, |
|
"epoch": 0.24754949067845475, |
|
"step": 276 |
|
}, |
|
{ |
|
"loss": 379.8077, |
|
"grad_norm": 57.53593826293945, |
|
"learning_rate": 0.0004275229036675148, |
|
"epoch": 0.24844640912294189, |
|
"step": 277 |
|
}, |
|
{ |
|
"loss": 381.0815, |
|
"grad_norm": 56.55409240722656, |
|
"learning_rate": 0.00042702578355525615, |
|
"epoch": 0.24934332756742905, |
|
"step": 278 |
|
}, |
|
{ |
|
"loss": 378.2445, |
|
"grad_norm": 50.37730026245117, |
|
"learning_rate": 0.00042652725556312156, |
|
"epoch": 0.2502402460119162, |
|
"step": 279 |
|
}, |
|
{ |
|
"loss": 376.4951, |
|
"grad_norm": 50.24005889892578, |
|
"learning_rate": 0.0004260273236558867, |
|
"epoch": 0.2511371644564034, |
|
"step": 280 |
|
}, |
|
{ |
|
"loss": 379.3927, |
|
"grad_norm": 52.99737548828125, |
|
"learning_rate": 0.0004255259918094926, |
|
"epoch": 0.2520340829008905, |
|
"step": 281 |
|
}, |
|
{ |
|
"loss": 379.7873, |
|
"grad_norm": 53.95462417602539, |
|
"learning_rate": 0.00042502326401101386, |
|
"epoch": 0.25293100134537766, |
|
"step": 282 |
|
}, |
|
{ |
|
"loss": 370.9284, |
|
"grad_norm": 51.21118927001953, |
|
"learning_rate": 0.0004245191442586273, |
|
"epoch": 0.2538279197898648, |
|
"step": 283 |
|
}, |
|
{ |
|
"loss": 374.7379, |
|
"grad_norm": 53.918975830078125, |
|
"learning_rate": 0.00042401363656157954, |
|
"epoch": 0.254724838234352, |
|
"step": 284 |
|
}, |
|
{ |
|
"loss": 373.7905, |
|
"grad_norm": 51.7956428527832, |
|
"learning_rate": 0.00042350674494015566, |
|
"epoch": 0.25562175667883913, |
|
"step": 285 |
|
}, |
|
{ |
|
"loss": 376.9342, |
|
"grad_norm": 51.80348205566406, |
|
"learning_rate": 0.0004229984734256471, |
|
"epoch": 0.25651867512332627, |
|
"step": 286 |
|
}, |
|
{ |
|
"loss": 378.537, |
|
"grad_norm": 53.50684356689453, |
|
"learning_rate": 0.0004224888260603195, |
|
"epoch": 0.25741559356781346, |
|
"step": 287 |
|
}, |
|
{ |
|
"loss": 374.9467, |
|
"grad_norm": 52.037200927734375, |
|
"learning_rate": 0.0004219778068973804, |
|
"epoch": 0.2583125120123006, |
|
"step": 288 |
|
}, |
|
{ |
|
"loss": 382.1371, |
|
"grad_norm": 48.98027420043945, |
|
"learning_rate": 0.0004214654200009475, |
|
"epoch": 0.25920943045678774, |
|
"step": 289 |
|
}, |
|
{ |
|
"loss": 378.7361, |
|
"grad_norm": 51.1038818359375, |
|
"learning_rate": 0.0004209516694460157, |
|
"epoch": 0.26010634890127493, |
|
"step": 290 |
|
}, |
|
{ |
|
"loss": 379.9825, |
|
"grad_norm": 53.03129577636719, |
|
"learning_rate": 0.0004204365593184255, |
|
"epoch": 0.26100326734576207, |
|
"step": 291 |
|
}, |
|
{ |
|
"loss": 376.35, |
|
"grad_norm": 54.52887725830078, |
|
"learning_rate": 0.0004199200937148297, |
|
"epoch": 0.2619001857902492, |
|
"step": 292 |
|
}, |
|
{ |
|
"loss": 376.654, |
|
"grad_norm": 51.10536575317383, |
|
"learning_rate": 0.00041940227674266105, |
|
"epoch": 0.26279710423473635, |
|
"step": 293 |
|
}, |
|
{ |
|
"loss": 372.8873, |
|
"grad_norm": 57.231117248535156, |
|
"learning_rate": 0.0004188831125201, |
|
"epoch": 0.26369402267922354, |
|
"step": 294 |
|
}, |
|
{ |
|
"loss": 372.2591, |
|
"grad_norm": 54.170921325683594, |
|
"learning_rate": 0.0004183626051760415, |
|
"epoch": 0.2645909411237107, |
|
"step": 295 |
|
}, |
|
{ |
|
"loss": 376.232, |
|
"grad_norm": 48.81595230102539, |
|
"learning_rate": 0.0004178407588500621, |
|
"epoch": 0.2654878595681978, |
|
"step": 296 |
|
}, |
|
{ |
|
"loss": 377.493, |
|
"grad_norm": 51.22395324707031, |
|
"learning_rate": 0.00041731757769238764, |
|
"epoch": 0.266384778012685, |
|
"step": 297 |
|
}, |
|
{ |
|
"loss": 373.4135, |
|
"grad_norm": 50.80076217651367, |
|
"learning_rate": 0.00041679306586385944, |
|
"epoch": 0.26728169645717215, |
|
"step": 298 |
|
}, |
|
{ |
|
"loss": 373.3929, |
|
"grad_norm": 52.78483581542969, |
|
"learning_rate": 0.00041626722753590185, |
|
"epoch": 0.2681786149016593, |
|
"step": 299 |
|
}, |
|
{ |
|
"loss": 374.4973, |
|
"grad_norm": 59.0179328918457, |
|
"learning_rate": 0.0004157400668904887, |
|
"epoch": 0.2690755333461465, |
|
"step": 300 |
|
}, |
|
{ |
|
"eval_loss": 1.6736700534820557, |
|
"eval_runtime": 48.4303, |
|
"eval_samples_per_second": 42.288, |
|
"eval_steps_per_second": 2.643, |
|
"epoch": 0.2690755333461465, |
|
"step": 300 |
|
}, |
|
{ |
|
"loss": 370.586, |
|
"grad_norm": 51.39365005493164, |
|
"learning_rate": 0.0004152115881201102, |
|
"epoch": 0.2699724517906336, |
|
"step": 301 |
|
}, |
|
{ |
|
"loss": 371.1306, |
|
"grad_norm": 53.13943862915039, |
|
"learning_rate": 0.0004146817954277395, |
|
"epoch": 0.27086937023512075, |
|
"step": 302 |
|
}, |
|
{ |
|
"loss": 375.8091, |
|
"grad_norm": 46.9393310546875, |
|
"learning_rate": 0.0004141506930267995, |
|
"epoch": 0.2717662886796079, |
|
"step": 303 |
|
}, |
|
{ |
|
"loss": 378.5063, |
|
"grad_norm": 56.166954040527344, |
|
"learning_rate": 0.00041361828514112884, |
|
"epoch": 0.2726632071240951, |
|
"step": 304 |
|
}, |
|
{ |
|
"loss": 372.5772, |
|
"grad_norm": 52.24879455566406, |
|
"learning_rate": 0.00041308457600494917, |
|
"epoch": 0.2735601255685822, |
|
"step": 305 |
|
}, |
|
{ |
|
"loss": 371.29, |
|
"grad_norm": 53.966949462890625, |
|
"learning_rate": 0.00041254956986283044, |
|
"epoch": 0.27445704401306936, |
|
"step": 306 |
|
}, |
|
{ |
|
"loss": 376.5358, |
|
"grad_norm": 51.999046325683594, |
|
"learning_rate": 0.0004120132709696578, |
|
"epoch": 0.27535396245755656, |
|
"step": 307 |
|
}, |
|
{ |
|
"loss": 377.9629, |
|
"grad_norm": 53.83307647705078, |
|
"learning_rate": 0.0004114756835905976, |
|
"epoch": 0.2762508809020437, |
|
"step": 308 |
|
}, |
|
{ |
|
"loss": 372.8809, |
|
"grad_norm": 55.104217529296875, |
|
"learning_rate": 0.0004109368120010636, |
|
"epoch": 0.27714779934653083, |
|
"step": 309 |
|
}, |
|
{ |
|
"loss": 377.9377, |
|
"grad_norm": 51.1360969543457, |
|
"learning_rate": 0.00041039666048668265, |
|
"epoch": 0.278044717791018, |
|
"step": 310 |
|
}, |
|
{ |
|
"loss": 377.1788, |
|
"grad_norm": 50.87997817993164, |
|
"learning_rate": 0.00040985523334326093, |
|
"epoch": 0.27894163623550516, |
|
"step": 311 |
|
}, |
|
{ |
|
"loss": 375.3121, |
|
"grad_norm": 49.86625289916992, |
|
"learning_rate": 0.00040931253487674955, |
|
"epoch": 0.2798385546799923, |
|
"step": 312 |
|
}, |
|
{ |
|
"loss": 373.2664, |
|
"grad_norm": 51.52640151977539, |
|
"learning_rate": 0.00040876856940321056, |
|
"epoch": 0.28073547312447944, |
|
"step": 313 |
|
}, |
|
{ |
|
"loss": 373.2856, |
|
"grad_norm": 49.00104904174805, |
|
"learning_rate": 0.00040822334124878236, |
|
"epoch": 0.28163239156896663, |
|
"step": 314 |
|
}, |
|
{ |
|
"loss": 377.6501, |
|
"grad_norm": 52.83418655395508, |
|
"learning_rate": 0.00040767685474964535, |
|
"epoch": 0.28252931001345377, |
|
"step": 315 |
|
}, |
|
{ |
|
"loss": 370.6684, |
|
"grad_norm": 49.96600341796875, |
|
"learning_rate": 0.00040712911425198764, |
|
"epoch": 0.2834262284579409, |
|
"step": 316 |
|
}, |
|
{ |
|
"loss": 376.3713, |
|
"grad_norm": 50.470123291015625, |
|
"learning_rate": 0.0004065801241119702, |
|
"epoch": 0.2843231469024281, |
|
"step": 317 |
|
}, |
|
{ |
|
"loss": 374.6679, |
|
"grad_norm": 47.91783142089844, |
|
"learning_rate": 0.0004060298886956926, |
|
"epoch": 0.28522006534691524, |
|
"step": 318 |
|
}, |
|
{ |
|
"loss": 376.8799, |
|
"grad_norm": 52.6668586730957, |
|
"learning_rate": 0.0004054784123791577, |
|
"epoch": 0.2861169837914024, |
|
"step": 319 |
|
}, |
|
{ |
|
"loss": 371.9651, |
|
"grad_norm": 50.082279205322266, |
|
"learning_rate": 0.00040492569954823763, |
|
"epoch": 0.2870139022358896, |
|
"step": 320 |
|
}, |
|
{ |
|
"loss": 373.8972, |
|
"grad_norm": 56.001190185546875, |
|
"learning_rate": 0.0004043717545986381, |
|
"epoch": 0.2879108206803767, |
|
"step": 321 |
|
}, |
|
{ |
|
"loss": 370.1523, |
|
"grad_norm": 53.00112533569336, |
|
"learning_rate": 0.0004038165819358639, |
|
"epoch": 0.28880773912486385, |
|
"step": 322 |
|
}, |
|
{ |
|
"loss": 377.1375, |
|
"grad_norm": 52.706729888916016, |
|
"learning_rate": 0.0004032601859751839, |
|
"epoch": 0.28970465756935104, |
|
"step": 323 |
|
}, |
|
{ |
|
"loss": 375.1089, |
|
"grad_norm": 51.362571716308594, |
|
"learning_rate": 0.00040270257114159583, |
|
"epoch": 0.2906015760138382, |
|
"step": 324 |
|
}, |
|
{ |
|
"loss": 370.7276, |
|
"grad_norm": 54.43815994262695, |
|
"learning_rate": 0.00040214374186979074, |
|
"epoch": 0.2914984944583253, |
|
"step": 325 |
|
}, |
|
{ |
|
"loss": 375.119, |
|
"grad_norm": 51.00381851196289, |
|
"learning_rate": 0.0004015837026041186, |
|
"epoch": 0.29239541290281246, |
|
"step": 326 |
|
}, |
|
{ |
|
"loss": 371.2367, |
|
"grad_norm": 57.776222229003906, |
|
"learning_rate": 0.000401022457798552, |
|
"epoch": 0.29329233134729965, |
|
"step": 327 |
|
}, |
|
{ |
|
"loss": 380.1667, |
|
"grad_norm": 53.284149169921875, |
|
"learning_rate": 0.0004004600119166513, |
|
"epoch": 0.2941892497917868, |
|
"step": 328 |
|
}, |
|
{ |
|
"loss": 369.6853, |
|
"grad_norm": 56.30731964111328, |
|
"learning_rate": 0.000399896369431529, |
|
"epoch": 0.2950861682362739, |
|
"step": 329 |
|
}, |
|
{ |
|
"loss": 374.0436, |
|
"grad_norm": 54.28211975097656, |
|
"learning_rate": 0.00039933153482581406, |
|
"epoch": 0.2959830866807611, |
|
"step": 330 |
|
}, |
|
{ |
|
"loss": 372.2117, |
|
"grad_norm": 50.88725280761719, |
|
"learning_rate": 0.00039876551259161643, |
|
"epoch": 0.29688000512524826, |
|
"step": 331 |
|
}, |
|
{ |
|
"loss": 374.7655, |
|
"grad_norm": 54.17941665649414, |
|
"learning_rate": 0.00039819830723049105, |
|
"epoch": 0.2977769235697354, |
|
"step": 332 |
|
}, |
|
{ |
|
"loss": 376.0198, |
|
"grad_norm": 52.40755081176758, |
|
"learning_rate": 0.0003976299232534024, |
|
"epoch": 0.2986738420142226, |
|
"step": 333 |
|
}, |
|
{ |
|
"loss": 371.5096, |
|
"grad_norm": 50.74897384643555, |
|
"learning_rate": 0.0003970603651806886, |
|
"epoch": 0.29957076045870973, |
|
"step": 334 |
|
}, |
|
{ |
|
"loss": 375.5447, |
|
"grad_norm": 47.52690124511719, |
|
"learning_rate": 0.00039648963754202496, |
|
"epoch": 0.30046767890319687, |
|
"step": 335 |
|
}, |
|
{ |
|
"loss": 376.1951, |
|
"grad_norm": 52.93135070800781, |
|
"learning_rate": 0.0003959177448763883, |
|
"epoch": 0.301364597347684, |
|
"step": 336 |
|
}, |
|
{ |
|
"loss": 371.1348, |
|
"grad_norm": 50.335418701171875, |
|
"learning_rate": 0.0003953446917320214, |
|
"epoch": 0.3022615157921712, |
|
"step": 337 |
|
}, |
|
{ |
|
"loss": 375.4595, |
|
"grad_norm": 51.26169204711914, |
|
"learning_rate": 0.0003947704826663955, |
|
"epoch": 0.30315843423665834, |
|
"step": 338 |
|
}, |
|
{ |
|
"loss": 372.898, |
|
"grad_norm": 54.89933776855469, |
|
"learning_rate": 0.0003941951222461756, |
|
"epoch": 0.3040553526811455, |
|
"step": 339 |
|
}, |
|
{ |
|
"loss": 370.8462, |
|
"grad_norm": 54.09654235839844, |
|
"learning_rate": 0.00039361861504718276, |
|
"epoch": 0.30495227112563267, |
|
"step": 340 |
|
}, |
|
{ |
|
"loss": 373.6092, |
|
"grad_norm": 52.41168975830078, |
|
"learning_rate": 0.0003930409656543588, |
|
"epoch": 0.3058491895701198, |
|
"step": 341 |
|
}, |
|
{ |
|
"loss": 374.9025, |
|
"grad_norm": 45.53563690185547, |
|
"learning_rate": 0.00039246217866172907, |
|
"epoch": 0.30674610801460694, |
|
"step": 342 |
|
}, |
|
{ |
|
"loss": 376.0628, |
|
"grad_norm": 51.11941146850586, |
|
"learning_rate": 0.00039188225867236643, |
|
"epoch": 0.30764302645909414, |
|
"step": 343 |
|
}, |
|
{ |
|
"loss": 374.4197, |
|
"grad_norm": 50.10179901123047, |
|
"learning_rate": 0.0003913012102983542, |
|
"epoch": 0.3085399449035813, |
|
"step": 344 |
|
}, |
|
{ |
|
"loss": 370.0171, |
|
"grad_norm": 50.524696350097656, |
|
"learning_rate": 0.00039071903816074977, |
|
"epoch": 0.3094368633480684, |
|
"step": 345 |
|
}, |
|
{ |
|
"loss": 371.2375, |
|
"grad_norm": 51.18245315551758, |
|
"learning_rate": 0.00039013574688954793, |
|
"epoch": 0.31033378179255555, |
|
"step": 346 |
|
}, |
|
{ |
|
"loss": 374.7748, |
|
"grad_norm": 64.64472198486328, |
|
"learning_rate": 0.0003895513411236438, |
|
"epoch": 0.31123070023704275, |
|
"step": 347 |
|
}, |
|
{ |
|
"loss": 377.3275, |
|
"grad_norm": 56.01545715332031, |
|
"learning_rate": 0.0003889658255107959, |
|
"epoch": 0.3121276186815299, |
|
"step": 348 |
|
}, |
|
{ |
|
"loss": 369.5843, |
|
"grad_norm": 56.439754486083984, |
|
"learning_rate": 0.0003883792047075896, |
|
"epoch": 0.313024537126017, |
|
"step": 349 |
|
}, |
|
{ |
|
"loss": 368.456, |
|
"grad_norm": 58.23375701904297, |
|
"learning_rate": 0.0003877914833793996, |
|
"epoch": 0.3139214555705042, |
|
"step": 350 |
|
}, |
|
{ |
|
"eval_loss": 1.661989450454712, |
|
"eval_runtime": 36.2255, |
|
"eval_samples_per_second": 56.535, |
|
"eval_steps_per_second": 3.533, |
|
"epoch": 0.3139214555705042, |
|
"step": 350 |
|
}, |
|
{ |
|
"loss": 374.9042, |
|
"grad_norm": 52.63510513305664, |
|
"learning_rate": 0.00038720266620035314, |
|
"epoch": 0.31481837401499135, |
|
"step": 351 |
|
}, |
|
{ |
|
"loss": 367.9091, |
|
"grad_norm": 55.49558639526367, |
|
"learning_rate": 0.0003866127578532927, |
|
"epoch": 0.3157152924594785, |
|
"step": 352 |
|
}, |
|
{ |
|
"loss": 374.5601, |
|
"grad_norm": 52.941497802734375, |
|
"learning_rate": 0.0003860217630297387, |
|
"epoch": 0.3166122109039657, |
|
"step": 353 |
|
}, |
|
{ |
|
"loss": 371.4058, |
|
"grad_norm": 44.237648010253906, |
|
"learning_rate": 0.0003854296864298523, |
|
"epoch": 0.3175091293484528, |
|
"step": 354 |
|
}, |
|
{ |
|
"loss": 376.094, |
|
"grad_norm": 52.86402893066406, |
|
"learning_rate": 0.00038483653276239816, |
|
"epoch": 0.31840604779293996, |
|
"step": 355 |
|
}, |
|
{ |
|
"loss": 374.3872, |
|
"grad_norm": 49.61796569824219, |
|
"learning_rate": 0.0003842423067447066, |
|
"epoch": 0.3193029662374271, |
|
"step": 356 |
|
}, |
|
{ |
|
"loss": 371.5387, |
|
"grad_norm": 49.825504302978516, |
|
"learning_rate": 0.0003836470131026365, |
|
"epoch": 0.3201998846819143, |
|
"step": 357 |
|
}, |
|
{ |
|
"loss": 371.4422, |
|
"grad_norm": 53.598228454589844, |
|
"learning_rate": 0.0003830506565705372, |
|
"epoch": 0.32109680312640143, |
|
"step": 358 |
|
}, |
|
{ |
|
"loss": 371.03, |
|
"grad_norm": 48.73537063598633, |
|
"learning_rate": 0.00038245324189121153, |
|
"epoch": 0.32199372157088857, |
|
"step": 359 |
|
}, |
|
{ |
|
"loss": 377.8967, |
|
"grad_norm": 48.377281188964844, |
|
"learning_rate": 0.00038185477381587763, |
|
"epoch": 0.32289064001537576, |
|
"step": 360 |
|
}, |
|
{ |
|
"loss": 374.9411, |
|
"grad_norm": 53.932228088378906, |
|
"learning_rate": 0.0003812552571041311, |
|
"epoch": 0.3237875584598629, |
|
"step": 361 |
|
}, |
|
{ |
|
"loss": 374.6432, |
|
"grad_norm": 52.54889678955078, |
|
"learning_rate": 0.00038065469652390736, |
|
"epoch": 0.32468447690435004, |
|
"step": 362 |
|
}, |
|
{ |
|
"loss": 371.9634, |
|
"grad_norm": 53.84141159057617, |
|
"learning_rate": 0.000380053096851444, |
|
"epoch": 0.32558139534883723, |
|
"step": 363 |
|
}, |
|
{ |
|
"loss": 371.487, |
|
"grad_norm": 49.041019439697266, |
|
"learning_rate": 0.00037945046287124197, |
|
"epoch": 0.32647831379332437, |
|
"step": 364 |
|
}, |
|
{ |
|
"loss": 370.3628, |
|
"grad_norm": 51.356388092041016, |
|
"learning_rate": 0.00037884679937602827, |
|
"epoch": 0.3273752322378115, |
|
"step": 365 |
|
}, |
|
{ |
|
"loss": 371.4878, |
|
"grad_norm": 49.55571746826172, |
|
"learning_rate": 0.0003782421111667178, |
|
"epoch": 0.32827215068229865, |
|
"step": 366 |
|
}, |
|
{ |
|
"loss": 373.209, |
|
"grad_norm": 51.30101013183594, |
|
"learning_rate": 0.00037763640305237456, |
|
"epoch": 0.32916906912678584, |
|
"step": 367 |
|
}, |
|
{ |
|
"loss": 369.0127, |
|
"grad_norm": 51.14597702026367, |
|
"learning_rate": 0.000377029679850174, |
|
"epoch": 0.330065987571273, |
|
"step": 368 |
|
}, |
|
{ |
|
"loss": 374.4203, |
|
"grad_norm": 51.925132751464844, |
|
"learning_rate": 0.00037642194638536487, |
|
"epoch": 0.3309629060157601, |
|
"step": 369 |
|
}, |
|
{ |
|
"loss": 370.4622, |
|
"grad_norm": 53.620052337646484, |
|
"learning_rate": 0.00037581320749123, |
|
"epoch": 0.3318598244602473, |
|
"step": 370 |
|
}, |
|
{ |
|
"loss": 369.0265, |
|
"grad_norm": 47.18992233276367, |
|
"learning_rate": 0.0003752034680090485, |
|
"epoch": 0.33275674290473445, |
|
"step": 371 |
|
}, |
|
{ |
|
"loss": 372.8077, |
|
"grad_norm": 56.7562141418457, |
|
"learning_rate": 0.0003745927327880574, |
|
"epoch": 0.3336536613492216, |
|
"step": 372 |
|
}, |
|
{ |
|
"loss": 368.2184, |
|
"grad_norm": 56.05765914916992, |
|
"learning_rate": 0.00037398100668541227, |
|
"epoch": 0.3345505797937088, |
|
"step": 373 |
|
}, |
|
{ |
|
"loss": 376.1522, |
|
"grad_norm": 50.888771057128906, |
|
"learning_rate": 0.00037336829456614975, |
|
"epoch": 0.3354474982381959, |
|
"step": 374 |
|
}, |
|
{ |
|
"loss": 371.1161, |
|
"grad_norm": 49.758975982666016, |
|
"learning_rate": 0.0003727546013031478, |
|
"epoch": 0.33634441668268306, |
|
"step": 375 |
|
}, |
|
{ |
|
"loss": 371.6988, |
|
"grad_norm": 53.891990661621094, |
|
"learning_rate": 0.00037213993177708746, |
|
"epoch": 0.33724133512717025, |
|
"step": 376 |
|
}, |
|
{ |
|
"loss": 370.6019, |
|
"grad_norm": 50.557762145996094, |
|
"learning_rate": 0.000371524290876414, |
|
"epoch": 0.3381382535716574, |
|
"step": 377 |
|
}, |
|
{ |
|
"loss": 373.2912, |
|
"grad_norm": 51.6466064453125, |
|
"learning_rate": 0.00037090768349729833, |
|
"epoch": 0.3390351720161445, |
|
"step": 378 |
|
}, |
|
{ |
|
"loss": 372.9784, |
|
"grad_norm": 48.213077545166016, |
|
"learning_rate": 0.00037029011454359695, |
|
"epoch": 0.33993209046063166, |
|
"step": 379 |
|
}, |
|
{ |
|
"loss": 368.0577, |
|
"grad_norm": 49.39459991455078, |
|
"learning_rate": 0.0003696715889268145, |
|
"epoch": 0.34082900890511886, |
|
"step": 380 |
|
}, |
|
{ |
|
"loss": 371.9662, |
|
"grad_norm": 49.54859924316406, |
|
"learning_rate": 0.00036905211156606344, |
|
"epoch": 0.341725927349606, |
|
"step": 381 |
|
}, |
|
{ |
|
"loss": 376.1466, |
|
"grad_norm": 54.29618835449219, |
|
"learning_rate": 0.00036843168738802574, |
|
"epoch": 0.34262284579409313, |
|
"step": 382 |
|
}, |
|
{ |
|
"loss": 372.8206, |
|
"grad_norm": 47.55562210083008, |
|
"learning_rate": 0.00036781032132691304, |
|
"epoch": 0.3435197642385803, |
|
"step": 383 |
|
}, |
|
{ |
|
"loss": 370.9735, |
|
"grad_norm": 49.289615631103516, |
|
"learning_rate": 0.00036718801832442814, |
|
"epoch": 0.34441668268306747, |
|
"step": 384 |
|
}, |
|
{ |
|
"loss": 370.5686, |
|
"grad_norm": 50.339176177978516, |
|
"learning_rate": 0.000366564783329725, |
|
"epoch": 0.3453136011275546, |
|
"step": 385 |
|
}, |
|
{ |
|
"loss": 371.3257, |
|
"grad_norm": 49.51339340209961, |
|
"learning_rate": 0.00036594062129936974, |
|
"epoch": 0.3462105195720418, |
|
"step": 386 |
|
}, |
|
{ |
|
"loss": 366.3475, |
|
"grad_norm": 48.21767044067383, |
|
"learning_rate": 0.0003653155371973012, |
|
"epoch": 0.34710743801652894, |
|
"step": 387 |
|
}, |
|
{ |
|
"loss": 369.8744, |
|
"grad_norm": 52.45291519165039, |
|
"learning_rate": 0.0003646895359947915, |
|
"epoch": 0.3480043564610161, |
|
"step": 388 |
|
}, |
|
{ |
|
"loss": 372.5318, |
|
"grad_norm": 49.45993423461914, |
|
"learning_rate": 0.00036406262267040624, |
|
"epoch": 0.3489012749055032, |
|
"step": 389 |
|
}, |
|
{ |
|
"loss": 369.184, |
|
"grad_norm": 48.8317756652832, |
|
"learning_rate": 0.0003634348022099652, |
|
"epoch": 0.3497981933499904, |
|
"step": 390 |
|
}, |
|
{ |
|
"loss": 373.9739, |
|
"grad_norm": 50.6275634765625, |
|
"learning_rate": 0.0003628060796065027, |
|
"epoch": 0.35069511179447754, |
|
"step": 391 |
|
}, |
|
{ |
|
"loss": 372.0473, |
|
"grad_norm": 48.547447204589844, |
|
"learning_rate": 0.00036217645986022756, |
|
"epoch": 0.3515920302389647, |
|
"step": 392 |
|
}, |
|
{ |
|
"loss": 364.9705, |
|
"grad_norm": 48.18462371826172, |
|
"learning_rate": 0.0003615459479784837, |
|
"epoch": 0.3524889486834519, |
|
"step": 393 |
|
}, |
|
{ |
|
"loss": 369.6471, |
|
"grad_norm": 46.10414123535156, |
|
"learning_rate": 0.0003609145489757101, |
|
"epoch": 0.353385867127939, |
|
"step": 394 |
|
}, |
|
{ |
|
"loss": 371.7173, |
|
"grad_norm": 46.38992691040039, |
|
"learning_rate": 0.0003602822678734008, |
|
"epoch": 0.35428278557242615, |
|
"step": 395 |
|
}, |
|
{ |
|
"loss": 367.3975, |
|
"grad_norm": 45.87107467651367, |
|
"learning_rate": 0.00035964910970006557, |
|
"epoch": 0.35517970401691334, |
|
"step": 396 |
|
}, |
|
{ |
|
"loss": 371.2871, |
|
"grad_norm": 46.54446029663086, |
|
"learning_rate": 0.00035901507949118915, |
|
"epoch": 0.3560766224614005, |
|
"step": 397 |
|
}, |
|
{ |
|
"loss": 368.7915, |
|
"grad_norm": 45.7996826171875, |
|
"learning_rate": 0.0003583801822891917, |
|
"epoch": 0.3569735409058876, |
|
"step": 398 |
|
}, |
|
{ |
|
"loss": 371.0395, |
|
"grad_norm": 48.34632873535156, |
|
"learning_rate": 0.0003577444231433885, |
|
"epoch": 0.35787045935037476, |
|
"step": 399 |
|
}, |
|
{ |
|
"loss": 374.4672, |
|
"grad_norm": 48.63014221191406, |
|
"learning_rate": 0.00035710780710994985, |
|
"epoch": 0.35876737779486195, |
|
"step": 400 |
|
}, |
|
{ |
|
"eval_loss": 1.6527702808380127, |
|
"eval_runtime": 51.2432, |
|
"eval_samples_per_second": 39.966, |
|
"eval_steps_per_second": 2.498, |
|
"epoch": 0.35876737779486195, |
|
"step": 400 |
|
}, |
|
{ |
|
"loss": 369.2286, |
|
"grad_norm": 50.575950622558594, |
|
"learning_rate": 0.00035647033925186066, |
|
"epoch": 0.3596642962393491, |
|
"step": 401 |
|
}, |
|
{ |
|
"loss": 366.6179, |
|
"grad_norm": 50.074954986572266, |
|
"learning_rate": 0.0003558320246388808, |
|
"epoch": 0.36056121468383623, |
|
"step": 402 |
|
}, |
|
{ |
|
"loss": 370.1017, |
|
"grad_norm": 51.92937088012695, |
|
"learning_rate": 0.00035519286834750403, |
|
"epoch": 0.3614581331283234, |
|
"step": 403 |
|
}, |
|
{ |
|
"loss": 366.74, |
|
"grad_norm": 52.75185775756836, |
|
"learning_rate": 0.00035455287546091785, |
|
"epoch": 0.36235505157281056, |
|
"step": 404 |
|
}, |
|
{ |
|
"loss": 369.307, |
|
"grad_norm": 50.451271057128906, |
|
"learning_rate": 0.0003539120510689636, |
|
"epoch": 0.3632519700172977, |
|
"step": 405 |
|
}, |
|
{ |
|
"loss": 374.2456, |
|
"grad_norm": 56.06875228881836, |
|
"learning_rate": 0.0003532704002680951, |
|
"epoch": 0.3641488884617849, |
|
"step": 406 |
|
}, |
|
{ |
|
"loss": 371.9364, |
|
"grad_norm": 49.18859100341797, |
|
"learning_rate": 0.0003526279281613388, |
|
"epoch": 0.36504580690627203, |
|
"step": 407 |
|
}, |
|
{ |
|
"loss": 375.3452, |
|
"grad_norm": 60.49544143676758, |
|
"learning_rate": 0.00035198463985825303, |
|
"epoch": 0.36594272535075917, |
|
"step": 408 |
|
}, |
|
{ |
|
"loss": 364.7332, |
|
"grad_norm": 55.390960693359375, |
|
"learning_rate": 0.0003513405404748872, |
|
"epoch": 0.3668396437952463, |
|
"step": 409 |
|
}, |
|
{ |
|
"loss": 367.328, |
|
"grad_norm": 45.79146194458008, |
|
"learning_rate": 0.00035069563513374105, |
|
"epoch": 0.3677365622397335, |
|
"step": 410 |
|
}, |
|
{ |
|
"loss": 372.7194, |
|
"grad_norm": 50.601531982421875, |
|
"learning_rate": 0.0003500499289637243, |
|
"epoch": 0.36863348068422064, |
|
"step": 411 |
|
}, |
|
{ |
|
"loss": 373.3177, |
|
"grad_norm": 58.5416374206543, |
|
"learning_rate": 0.0003494034271001158, |
|
"epoch": 0.3695303991287078, |
|
"step": 412 |
|
}, |
|
{ |
|
"loss": 367.5529, |
|
"grad_norm": 48.93236541748047, |
|
"learning_rate": 0.00034875613468452203, |
|
"epoch": 0.37042731757319497, |
|
"step": 413 |
|
}, |
|
{ |
|
"loss": 368.6186, |
|
"grad_norm": 49.043251037597656, |
|
"learning_rate": 0.00034810805686483713, |
|
"epoch": 0.3713242360176821, |
|
"step": 414 |
|
}, |
|
{ |
|
"loss": 363.3611, |
|
"grad_norm": 48.577144622802734, |
|
"learning_rate": 0.0003474591987952013, |
|
"epoch": 0.37222115446216925, |
|
"step": 415 |
|
}, |
|
{ |
|
"loss": 368.0312, |
|
"grad_norm": 48.73127746582031, |
|
"learning_rate": 0.0003468095656359601, |
|
"epoch": 0.37311807290665644, |
|
"step": 416 |
|
}, |
|
{ |
|
"loss": 367.3114, |
|
"grad_norm": 51.46812057495117, |
|
"learning_rate": 0.0003461591625536234, |
|
"epoch": 0.3740149913511436, |
|
"step": 417 |
|
}, |
|
{ |
|
"loss": 375.6931, |
|
"grad_norm": 49.236141204833984, |
|
"learning_rate": 0.0003455079947208242, |
|
"epoch": 0.3749119097956307, |
|
"step": 418 |
|
}, |
|
{ |
|
"loss": 365.6711, |
|
"grad_norm": 48.81379318237305, |
|
"learning_rate": 0.00034485606731627755, |
|
"epoch": 0.37580882824011785, |
|
"step": 419 |
|
}, |
|
{ |
|
"loss": 364.9393, |
|
"grad_norm": 51.185340881347656, |
|
"learning_rate": 0.0003442033855247394, |
|
"epoch": 0.37670574668460505, |
|
"step": 420 |
|
}, |
|
{ |
|
"loss": 369.8553, |
|
"grad_norm": 53.58812713623047, |
|
"learning_rate": 0.000343549954536965, |
|
"epoch": 0.3776026651290922, |
|
"step": 421 |
|
}, |
|
{ |
|
"loss": 372.3922, |
|
"grad_norm": 51.472042083740234, |
|
"learning_rate": 0.0003428957795496685, |
|
"epoch": 0.3784995835735793, |
|
"step": 422 |
|
}, |
|
{ |
|
"loss": 371.9807, |
|
"grad_norm": 54.97187805175781, |
|
"learning_rate": 0.0003422408657654805, |
|
"epoch": 0.3793965020180665, |
|
"step": 423 |
|
}, |
|
{ |
|
"loss": 370.048, |
|
"grad_norm": 54.97746276855469, |
|
"learning_rate": 0.0003415852183929077, |
|
"epoch": 0.38029342046255366, |
|
"step": 424 |
|
}, |
|
{ |
|
"loss": 370.0667, |
|
"grad_norm": 46.41242980957031, |
|
"learning_rate": 0.0003409288426462904, |
|
"epoch": 0.3811903389070408, |
|
"step": 425 |
|
}, |
|
{ |
|
"loss": 366.4669, |
|
"grad_norm": 51.722904205322266, |
|
"learning_rate": 0.0003402717437457624, |
|
"epoch": 0.382087257351528, |
|
"step": 426 |
|
}, |
|
{ |
|
"loss": 367.8651, |
|
"grad_norm": 51.60542678833008, |
|
"learning_rate": 0.00033961392691720803, |
|
"epoch": 0.3829841757960151, |
|
"step": 427 |
|
}, |
|
{ |
|
"loss": 364.8575, |
|
"grad_norm": 46.896331787109375, |
|
"learning_rate": 0.0003389553973922217, |
|
"epoch": 0.38388109424050226, |
|
"step": 428 |
|
}, |
|
{ |
|
"loss": 366.1106, |
|
"grad_norm": 47.48381042480469, |
|
"learning_rate": 0.00033829616040806566, |
|
"epoch": 0.38477801268498946, |
|
"step": 429 |
|
}, |
|
{ |
|
"loss": 369.6983, |
|
"grad_norm": 47.15787124633789, |
|
"learning_rate": 0.0003376362212076287, |
|
"epoch": 0.3856749311294766, |
|
"step": 430 |
|
}, |
|
{ |
|
"loss": 372.8012, |
|
"grad_norm": 49.67255401611328, |
|
"learning_rate": 0.0003369755850393841, |
|
"epoch": 0.38657184957396373, |
|
"step": 431 |
|
}, |
|
{ |
|
"loss": 369.0824, |
|
"grad_norm": 50.87350082397461, |
|
"learning_rate": 0.0003363142571573484, |
|
"epoch": 0.38746876801845087, |
|
"step": 432 |
|
}, |
|
{ |
|
"loss": 368.5385, |
|
"grad_norm": 52.32754135131836, |
|
"learning_rate": 0.0003356522428210391, |
|
"epoch": 0.38836568646293806, |
|
"step": 433 |
|
}, |
|
{ |
|
"loss": 370.1974, |
|
"grad_norm": 46.638084411621094, |
|
"learning_rate": 0.0003349895472954331, |
|
"epoch": 0.3892626049074252, |
|
"step": 434 |
|
}, |
|
{ |
|
"loss": 367.2549, |
|
"grad_norm": 51.39384460449219, |
|
"learning_rate": 0.00033432617585092467, |
|
"epoch": 0.39015952335191234, |
|
"step": 435 |
|
}, |
|
{ |
|
"loss": 368.2899, |
|
"grad_norm": 49.1676139831543, |
|
"learning_rate": 0.00033366213376328396, |
|
"epoch": 0.39105644179639953, |
|
"step": 436 |
|
}, |
|
{ |
|
"loss": 372.2977, |
|
"grad_norm": 51.6141242980957, |
|
"learning_rate": 0.0003329974263136144, |
|
"epoch": 0.3919533602408867, |
|
"step": 437 |
|
}, |
|
{ |
|
"loss": 368.3735, |
|
"grad_norm": 49.94230270385742, |
|
"learning_rate": 0.0003323320587883111, |
|
"epoch": 0.3928502786853738, |
|
"step": 438 |
|
}, |
|
{ |
|
"loss": 370.6481, |
|
"grad_norm": 49.947837829589844, |
|
"learning_rate": 0.0003316660364790188, |
|
"epoch": 0.393747197129861, |
|
"step": 439 |
|
}, |
|
{ |
|
"loss": 369.6432, |
|
"grad_norm": 48.53517532348633, |
|
"learning_rate": 0.0003309993646825896, |
|
"epoch": 0.39464411557434814, |
|
"step": 440 |
|
}, |
|
{ |
|
"loss": 366.7539, |
|
"grad_norm": 50.93443298339844, |
|
"learning_rate": 0.00033033204870104116, |
|
"epoch": 0.3955410340188353, |
|
"step": 441 |
|
}, |
|
{ |
|
"loss": 367.3075, |
|
"grad_norm": 49.63651657104492, |
|
"learning_rate": 0.000329664093841514, |
|
"epoch": 0.3964379524633224, |
|
"step": 442 |
|
}, |
|
{ |
|
"loss": 369.597, |
|
"grad_norm": 48.85470962524414, |
|
"learning_rate": 0.00032899550541623, |
|
"epoch": 0.3973348709078096, |
|
"step": 443 |
|
}, |
|
{ |
|
"loss": 366.1455, |
|
"grad_norm": 49.675559997558594, |
|
"learning_rate": 0.0003283262887424494, |
|
"epoch": 0.39823178935229675, |
|
"step": 444 |
|
}, |
|
{ |
|
"loss": 362.2254, |
|
"grad_norm": 48.583370208740234, |
|
"learning_rate": 0.0003276564491424292, |
|
"epoch": 0.3991287077967839, |
|
"step": 445 |
|
}, |
|
{ |
|
"loss": 372.5689, |
|
"grad_norm": 50.507293701171875, |
|
"learning_rate": 0.0003269859919433802, |
|
"epoch": 0.4000256262412711, |
|
"step": 446 |
|
}, |
|
{ |
|
"loss": 366.7801, |
|
"grad_norm": 50.75261688232422, |
|
"learning_rate": 0.0003263149224774251, |
|
"epoch": 0.4009225446857582, |
|
"step": 447 |
|
}, |
|
{ |
|
"loss": 369.5224, |
|
"grad_norm": 49.42384719848633, |
|
"learning_rate": 0.00032564324608155604, |
|
"epoch": 0.40181946313024536, |
|
"step": 448 |
|
}, |
|
{ |
|
"loss": 369.6519, |
|
"grad_norm": 49.12044143676758, |
|
"learning_rate": 0.00032497096809759184, |
|
"epoch": 0.40271638157473255, |
|
"step": 449 |
|
}, |
|
{ |
|
"loss": 370.9763, |
|
"grad_norm": 53.04697036743164, |
|
"learning_rate": 0.0003242980938721359, |
|
"epoch": 0.4036133000192197, |
|
"step": 450 |
|
}, |
|
{ |
|
"eval_loss": 1.6399173736572266, |
|
"eval_runtime": 36.1587, |
|
"eval_samples_per_second": 56.639, |
|
"eval_steps_per_second": 3.54, |
|
"epoch": 0.4036133000192197, |
|
"step": 450 |
|
}, |
|
{ |
|
"loss": 367.9265, |
|
"grad_norm": 52.0450553894043, |
|
"learning_rate": 0.00032362462875653355, |
|
"epoch": 0.4045102184637068, |
|
"step": 451 |
|
}, |
|
{ |
|
"loss": 372.4974, |
|
"grad_norm": 48.33359146118164, |
|
"learning_rate": 0.0003229505781068291, |
|
"epoch": 0.40540713690819397, |
|
"step": 452 |
|
}, |
|
{ |
|
"loss": 366.6081, |
|
"grad_norm": 49.462974548339844, |
|
"learning_rate": 0.00032227594728372397, |
|
"epoch": 0.40630405535268116, |
|
"step": 453 |
|
}, |
|
{ |
|
"loss": 366.3152, |
|
"grad_norm": 48.31398391723633, |
|
"learning_rate": 0.0003216007416525335, |
|
"epoch": 0.4072009737971683, |
|
"step": 454 |
|
}, |
|
{ |
|
"loss": 369.983, |
|
"grad_norm": 47.523338317871094, |
|
"learning_rate": 0.0003209249665831445, |
|
"epoch": 0.40809789224165544, |
|
"step": 455 |
|
}, |
|
{ |
|
"loss": 366.8036, |
|
"grad_norm": 45.295806884765625, |
|
"learning_rate": 0.00032024862744997265, |
|
"epoch": 0.40899481068614263, |
|
"step": 456 |
|
}, |
|
{ |
|
"loss": 366.4848, |
|
"grad_norm": 49.89873504638672, |
|
"learning_rate": 0.0003195717296319193, |
|
"epoch": 0.40989172913062977, |
|
"step": 457 |
|
}, |
|
{ |
|
"loss": 365.4414, |
|
"grad_norm": 46.948055267333984, |
|
"learning_rate": 0.00031889427851232915, |
|
"epoch": 0.4107886475751169, |
|
"step": 458 |
|
}, |
|
{ |
|
"loss": 369.7285, |
|
"grad_norm": 48.40359115600586, |
|
"learning_rate": 0.0003182162794789474, |
|
"epoch": 0.4116855660196041, |
|
"step": 459 |
|
}, |
|
{ |
|
"loss": 370.345, |
|
"grad_norm": 48.55045700073242, |
|
"learning_rate": 0.0003175377379238767, |
|
"epoch": 0.41258248446409124, |
|
"step": 460 |
|
}, |
|
{ |
|
"loss": 366.95, |
|
"grad_norm": 47.37104415893555, |
|
"learning_rate": 0.0003168586592435341, |
|
"epoch": 0.4134794029085784, |
|
"step": 461 |
|
}, |
|
{ |
|
"loss": 370.2368, |
|
"grad_norm": 51.285888671875, |
|
"learning_rate": 0.00031617904883860903, |
|
"epoch": 0.4143763213530655, |
|
"step": 462 |
|
}, |
|
{ |
|
"loss": 365.4067, |
|
"grad_norm": 50.595340728759766, |
|
"learning_rate": 0.000315498912114019, |
|
"epoch": 0.4152732397975527, |
|
"step": 463 |
|
}, |
|
{ |
|
"loss": 366.4186, |
|
"grad_norm": 45.943519592285156, |
|
"learning_rate": 0.0003148182544788678, |
|
"epoch": 0.41617015824203984, |
|
"step": 464 |
|
}, |
|
{ |
|
"loss": 362.8856, |
|
"grad_norm": 52.45280075073242, |
|
"learning_rate": 0.0003141370813464018, |
|
"epoch": 0.417067076686527, |
|
"step": 465 |
|
}, |
|
{ |
|
"loss": 366.827, |
|
"grad_norm": 47.95954132080078, |
|
"learning_rate": 0.0003134553981339672, |
|
"epoch": 0.4179639951310142, |
|
"step": 466 |
|
}, |
|
{ |
|
"loss": 370.8824, |
|
"grad_norm": 51.57919692993164, |
|
"learning_rate": 0.00031277321026296657, |
|
"epoch": 0.4188609135755013, |
|
"step": 467 |
|
}, |
|
{ |
|
"loss": 368.826, |
|
"grad_norm": 51.78611755371094, |
|
"learning_rate": 0.0003120905231588164, |
|
"epoch": 0.41975783201998845, |
|
"step": 468 |
|
}, |
|
{ |
|
"loss": 369.1159, |
|
"grad_norm": 46.962074279785156, |
|
"learning_rate": 0.0003114073422509034, |
|
"epoch": 0.42065475046447565, |
|
"step": 469 |
|
}, |
|
{ |
|
"loss": 361.8488, |
|
"grad_norm": 46.85802459716797, |
|
"learning_rate": 0.0003107236729725414, |
|
"epoch": 0.4215516689089628, |
|
"step": 470 |
|
}, |
|
{ |
|
"loss": 367.4666, |
|
"grad_norm": 54.017906188964844, |
|
"learning_rate": 0.0003100395207609284, |
|
"epoch": 0.4224485873534499, |
|
"step": 471 |
|
}, |
|
{ |
|
"loss": 366.9775, |
|
"grad_norm": 53.34091567993164, |
|
"learning_rate": 0.000309354891057103, |
|
"epoch": 0.42334550579793706, |
|
"step": 472 |
|
}, |
|
{ |
|
"loss": 366.0834, |
|
"grad_norm": 47.76055908203125, |
|
"learning_rate": 0.00030866978930590126, |
|
"epoch": 0.42424242424242425, |
|
"step": 473 |
|
}, |
|
{ |
|
"loss": 368.5773, |
|
"grad_norm": 49.945613861083984, |
|
"learning_rate": 0.00030798422095591364, |
|
"epoch": 0.4251393426869114, |
|
"step": 474 |
|
}, |
|
{ |
|
"loss": 363.8445, |
|
"grad_norm": 48.995609283447266, |
|
"learning_rate": 0.00030729819145944114, |
|
"epoch": 0.42603626113139853, |
|
"step": 475 |
|
}, |
|
{ |
|
"loss": 362.6448, |
|
"grad_norm": 45.06385040283203, |
|
"learning_rate": 0.00030661170627245256, |
|
"epoch": 0.4269331795758857, |
|
"step": 476 |
|
}, |
|
{ |
|
"loss": 364.0858, |
|
"grad_norm": 49.73957061767578, |
|
"learning_rate": 0.00030592477085454047, |
|
"epoch": 0.42783009802037286, |
|
"step": 477 |
|
}, |
|
{ |
|
"loss": 371.1085, |
|
"grad_norm": 49.45321273803711, |
|
"learning_rate": 0.00030523739066887836, |
|
"epoch": 0.42872701646486, |
|
"step": 478 |
|
}, |
|
{ |
|
"loss": 363.6934, |
|
"grad_norm": 49.325355529785156, |
|
"learning_rate": 0.00030454957118217674, |
|
"epoch": 0.4296239349093472, |
|
"step": 479 |
|
}, |
|
{ |
|
"loss": 368.4297, |
|
"grad_norm": 47.509742736816406, |
|
"learning_rate": 0.0003038613178646401, |
|
"epoch": 0.43052085335383433, |
|
"step": 480 |
|
}, |
|
{ |
|
"loss": 366.2455, |
|
"grad_norm": 48.50214767456055, |
|
"learning_rate": 0.000303172636189923, |
|
"epoch": 0.43141777179832147, |
|
"step": 481 |
|
}, |
|
{ |
|
"loss": 362.4247, |
|
"grad_norm": 46.59059143066406, |
|
"learning_rate": 0.00030248353163508674, |
|
"epoch": 0.43231469024280866, |
|
"step": 482 |
|
}, |
|
{ |
|
"loss": 368.7481, |
|
"grad_norm": 47.74319839477539, |
|
"learning_rate": 0.0003017940096805557, |
|
"epoch": 0.4332116086872958, |
|
"step": 483 |
|
}, |
|
{ |
|
"loss": 365.7433, |
|
"grad_norm": 53.59490203857422, |
|
"learning_rate": 0.0003011040758100741, |
|
"epoch": 0.43410852713178294, |
|
"step": 484 |
|
}, |
|
{ |
|
"loss": 366.9239, |
|
"grad_norm": 49.87615966796875, |
|
"learning_rate": 0.00030041373551066173, |
|
"epoch": 0.4350054455762701, |
|
"step": 485 |
|
}, |
|
{ |
|
"loss": 360.9555, |
|
"grad_norm": 44.795536041259766, |
|
"learning_rate": 0.0002997229942725711, |
|
"epoch": 0.43590236402075727, |
|
"step": 486 |
|
}, |
|
{ |
|
"loss": 370.6934, |
|
"grad_norm": 56.454227447509766, |
|
"learning_rate": 0.000299031857589243, |
|
"epoch": 0.4367992824652444, |
|
"step": 487 |
|
}, |
|
{ |
|
"loss": 369.9133, |
|
"grad_norm": 48.472312927246094, |
|
"learning_rate": 0.00029834033095726335, |
|
"epoch": 0.43769620090973155, |
|
"step": 488 |
|
}, |
|
{ |
|
"loss": 361.5723, |
|
"grad_norm": 51.665260314941406, |
|
"learning_rate": 0.00029764841987631933, |
|
"epoch": 0.43859311935421874, |
|
"step": 489 |
|
}, |
|
{ |
|
"loss": 366.223, |
|
"grad_norm": 51.25084686279297, |
|
"learning_rate": 0.0002969561298491557, |
|
"epoch": 0.4394900377987059, |
|
"step": 490 |
|
}, |
|
{ |
|
"loss": 367.7071, |
|
"grad_norm": 50.52541732788086, |
|
"learning_rate": 0.00029626346638153073, |
|
"epoch": 0.440386956243193, |
|
"step": 491 |
|
}, |
|
{ |
|
"loss": 367.0807, |
|
"grad_norm": 50.71653366088867, |
|
"learning_rate": 0.0002955704349821729, |
|
"epoch": 0.4412838746876802, |
|
"step": 492 |
|
}, |
|
{ |
|
"loss": 366.5776, |
|
"grad_norm": 44.603485107421875, |
|
"learning_rate": 0.0002948770411627367, |
|
"epoch": 0.44218079313216735, |
|
"step": 493 |
|
}, |
|
{ |
|
"loss": 367.2019, |
|
"grad_norm": 49.68048858642578, |
|
"learning_rate": 0.0002941832904377589, |
|
"epoch": 0.4430777115766545, |
|
"step": 494 |
|
}, |
|
{ |
|
"loss": 367.4325, |
|
"grad_norm": 56.277896881103516, |
|
"learning_rate": 0.000293489188324615, |
|
"epoch": 0.4439746300211416, |
|
"step": 495 |
|
}, |
|
{ |
|
"loss": 369.3215, |
|
"grad_norm": 46.4665412902832, |
|
"learning_rate": 0.00029279474034347465, |
|
"epoch": 0.4448715484656288, |
|
"step": 496 |
|
}, |
|
{ |
|
"loss": 368.6407, |
|
"grad_norm": 51.84563446044922, |
|
"learning_rate": 0.00029209995201725836, |
|
"epoch": 0.44576846691011596, |
|
"step": 497 |
|
}, |
|
{ |
|
"loss": 366.8856, |
|
"grad_norm": 55.93694305419922, |
|
"learning_rate": 0.0002914048288715937, |
|
"epoch": 0.4466653853546031, |
|
"step": 498 |
|
}, |
|
{ |
|
"loss": 367.8516, |
|
"grad_norm": 50.97298812866211, |
|
"learning_rate": 0.00029070937643477056, |
|
"epoch": 0.4475623037990903, |
|
"step": 499 |
|
}, |
|
{ |
|
"loss": 364.7996, |
|
"grad_norm": 53.179847717285156, |
|
"learning_rate": 0.000290013600237698, |
|
"epoch": 0.4484592222435774, |
|
"step": 500 |
|
}, |
|
{ |
|
"eval_loss": 1.6293703317642212, |
|
"eval_runtime": 47.4683, |
|
"eval_samples_per_second": 43.145, |
|
"eval_steps_per_second": 2.697, |
|
"epoch": 0.4484592222435774, |
|
"step": 500 |
|
}, |
|
{ |
|
"loss": 364.7999, |
|
"grad_norm": 53.32307434082031, |
|
"learning_rate": 0.00028931750581385975, |
|
"epoch": 0.44935614068806456, |
|
"step": 501 |
|
}, |
|
{ |
|
"loss": 368.2321, |
|
"grad_norm": 48.1343994140625, |
|
"learning_rate": 0.00028862109869927057, |
|
"epoch": 0.45025305913255176, |
|
"step": 502 |
|
}, |
|
{ |
|
"loss": 363.4522, |
|
"grad_norm": 48.97591781616211, |
|
"learning_rate": 0.00028792438443243175, |
|
"epoch": 0.4511499775770389, |
|
"step": 503 |
|
}, |
|
{ |
|
"loss": 367.3519, |
|
"grad_norm": 48.5214729309082, |
|
"learning_rate": 0.00028722736855428755, |
|
"epoch": 0.45204689602152603, |
|
"step": 504 |
|
}, |
|
{ |
|
"loss": 366.9135, |
|
"grad_norm": 48.30058288574219, |
|
"learning_rate": 0.00028653005660818115, |
|
"epoch": 0.4529438144660132, |
|
"step": 505 |
|
}, |
|
{ |
|
"loss": 365.4208, |
|
"grad_norm": 48.56584548950195, |
|
"learning_rate": 0.00028583245413980993, |
|
"epoch": 0.45384073291050037, |
|
"step": 506 |
|
}, |
|
{ |
|
"loss": 366.6342, |
|
"grad_norm": 44.84033203125, |
|
"learning_rate": 0.0002851345666971819, |
|
"epoch": 0.4547376513549875, |
|
"step": 507 |
|
}, |
|
{ |
|
"loss": 366.2589, |
|
"grad_norm": 46.03631591796875, |
|
"learning_rate": 0.0002844363998305717, |
|
"epoch": 0.45563456979947464, |
|
"step": 508 |
|
}, |
|
{ |
|
"loss": 368.2724, |
|
"grad_norm": 52.3626708984375, |
|
"learning_rate": 0.0002837379590924759, |
|
"epoch": 0.45653148824396184, |
|
"step": 509 |
|
}, |
|
{ |
|
"loss": 366.9325, |
|
"grad_norm": 42.26225280761719, |
|
"learning_rate": 0.0002830392500375694, |
|
"epoch": 0.457428406688449, |
|
"step": 510 |
|
}, |
|
{ |
|
"loss": 363.1102, |
|
"grad_norm": 47.719661712646484, |
|
"learning_rate": 0.0002823402782226608, |
|
"epoch": 0.4583253251329361, |
|
"step": 511 |
|
}, |
|
{ |
|
"loss": 369.943, |
|
"grad_norm": 48.35748291015625, |
|
"learning_rate": 0.00028164104920664864, |
|
"epoch": 0.4592222435774233, |
|
"step": 512 |
|
}, |
|
{ |
|
"loss": 366.7622, |
|
"grad_norm": 47.81887435913086, |
|
"learning_rate": 0.00028094156855047687, |
|
"epoch": 0.46011916202191044, |
|
"step": 513 |
|
}, |
|
{ |
|
"loss": 369.4684, |
|
"grad_norm": 51.35517883300781, |
|
"learning_rate": 0.0002802418418170908, |
|
"epoch": 0.4610160804663976, |
|
"step": 514 |
|
}, |
|
{ |
|
"loss": 367.9245, |
|
"grad_norm": 52.903011322021484, |
|
"learning_rate": 0.0002795418745713925, |
|
"epoch": 0.4619129989108847, |
|
"step": 515 |
|
}, |
|
{ |
|
"loss": 363.503, |
|
"grad_norm": 50.455223083496094, |
|
"learning_rate": 0.00027884167238019714, |
|
"epoch": 0.4628099173553719, |
|
"step": 516 |
|
}, |
|
{ |
|
"loss": 361.0208, |
|
"grad_norm": 48.27017593383789, |
|
"learning_rate": 0.0002781412408121884, |
|
"epoch": 0.46370683579985905, |
|
"step": 517 |
|
}, |
|
{ |
|
"loss": 364.5886, |
|
"grad_norm": 49.851619720458984, |
|
"learning_rate": 0.0002774405854378739, |
|
"epoch": 0.4646037542443462, |
|
"step": 518 |
|
}, |
|
{ |
|
"loss": 359.5211, |
|
"grad_norm": 49.12308120727539, |
|
"learning_rate": 0.00027673971182954157, |
|
"epoch": 0.4655006726888334, |
|
"step": 519 |
|
}, |
|
{ |
|
"loss": 366.8299, |
|
"grad_norm": 47.60043716430664, |
|
"learning_rate": 0.00027603862556121463, |
|
"epoch": 0.4663975911333205, |
|
"step": 520 |
|
}, |
|
{ |
|
"loss": 368.2267, |
|
"grad_norm": 41.944801330566406, |
|
"learning_rate": 0.0002753373322086077, |
|
"epoch": 0.46729450957780766, |
|
"step": 521 |
|
}, |
|
{ |
|
"loss": 368.1608, |
|
"grad_norm": 45.84396743774414, |
|
"learning_rate": 0.00027463583734908234, |
|
"epoch": 0.46819142802229485, |
|
"step": 522 |
|
}, |
|
{ |
|
"loss": 359.4468, |
|
"grad_norm": 44.122989654541016, |
|
"learning_rate": 0.0002739341465616026, |
|
"epoch": 0.469088346466782, |
|
"step": 523 |
|
}, |
|
{ |
|
"loss": 367.6043, |
|
"grad_norm": 44.97038269042969, |
|
"learning_rate": 0.000273232265426691, |
|
"epoch": 0.46998526491126913, |
|
"step": 524 |
|
}, |
|
{ |
|
"loss": 367.8859, |
|
"grad_norm": 49.4835319519043, |
|
"learning_rate": 0.0002725301995263835, |
|
"epoch": 0.47088218335575627, |
|
"step": 525 |
|
}, |
|
{ |
|
"loss": 365.9901, |
|
"grad_norm": 46.08525466918945, |
|
"learning_rate": 0.00027182795444418583, |
|
"epoch": 0.47177910180024346, |
|
"step": 526 |
|
}, |
|
{ |
|
"loss": 362.7762, |
|
"grad_norm": 45.26884841918945, |
|
"learning_rate": 0.0002711255357650286, |
|
"epoch": 0.4726760202447306, |
|
"step": 527 |
|
}, |
|
{ |
|
"loss": 363.5254, |
|
"grad_norm": 52.6630973815918, |
|
"learning_rate": 0.0002704229490752229, |
|
"epoch": 0.47357293868921774, |
|
"step": 528 |
|
}, |
|
{ |
|
"loss": 362.2083, |
|
"grad_norm": 49.639488220214844, |
|
"learning_rate": 0.00026972019996241635, |
|
"epoch": 0.47446985713370493, |
|
"step": 529 |
|
}, |
|
{ |
|
"loss": 370.2541, |
|
"grad_norm": 51.361610412597656, |
|
"learning_rate": 0.00026901729401554805, |
|
"epoch": 0.47536677557819207, |
|
"step": 530 |
|
}, |
|
{ |
|
"loss": 364.9506, |
|
"grad_norm": 45.84967803955078, |
|
"learning_rate": 0.00026831423682480425, |
|
"epoch": 0.4762636940226792, |
|
"step": 531 |
|
}, |
|
{ |
|
"loss": 373.7259, |
|
"grad_norm": 48.99913024902344, |
|
"learning_rate": 0.00026761103398157456, |
|
"epoch": 0.4771606124671664, |
|
"step": 532 |
|
}, |
|
{ |
|
"loss": 367.0407, |
|
"grad_norm": 53.0494270324707, |
|
"learning_rate": 0.00026690769107840634, |
|
"epoch": 0.47805753091165354, |
|
"step": 533 |
|
}, |
|
{ |
|
"loss": 366.3498, |
|
"grad_norm": 46.16975784301758, |
|
"learning_rate": 0.00026620421370896136, |
|
"epoch": 0.4789544493561407, |
|
"step": 534 |
|
}, |
|
{ |
|
"loss": 363.5735, |
|
"grad_norm": 45.147125244140625, |
|
"learning_rate": 0.00026550060746797057, |
|
"epoch": 0.47985136780062787, |
|
"step": 535 |
|
}, |
|
{ |
|
"loss": 362.9278, |
|
"grad_norm": 47.262821197509766, |
|
"learning_rate": 0.0002647968779511897, |
|
"epoch": 0.480748286245115, |
|
"step": 536 |
|
}, |
|
{ |
|
"loss": 366.6017, |
|
"grad_norm": 49.1768913269043, |
|
"learning_rate": 0.00026409303075535504, |
|
"epoch": 0.48164520468960215, |
|
"step": 537 |
|
}, |
|
{ |
|
"loss": 363.7893, |
|
"grad_norm": 47.41939163208008, |
|
"learning_rate": 0.00026338907147813894, |
|
"epoch": 0.4825421231340893, |
|
"step": 538 |
|
}, |
|
{ |
|
"loss": 362.325, |
|
"grad_norm": 45.2095947265625, |
|
"learning_rate": 0.0002626850057181048, |
|
"epoch": 0.4834390415785765, |
|
"step": 539 |
|
}, |
|
{ |
|
"loss": 368.0108, |
|
"grad_norm": 44.87570571899414, |
|
"learning_rate": 0.000261980839074663, |
|
"epoch": 0.4843359600230636, |
|
"step": 540 |
|
}, |
|
{ |
|
"loss": 363.8844, |
|
"grad_norm": 44.87836456298828, |
|
"learning_rate": 0.0002612765771480264, |
|
"epoch": 0.48523287846755075, |
|
"step": 541 |
|
}, |
|
{ |
|
"loss": 366.2256, |
|
"grad_norm": 52.47968292236328, |
|
"learning_rate": 0.00026057222553916545, |
|
"epoch": 0.48612979691203795, |
|
"step": 542 |
|
}, |
|
{ |
|
"loss": 364.6898, |
|
"grad_norm": 49.18819808959961, |
|
"learning_rate": 0.0002598677898497638, |
|
"epoch": 0.4870267153565251, |
|
"step": 543 |
|
}, |
|
{ |
|
"loss": 364.0697, |
|
"grad_norm": 47.542850494384766, |
|
"learning_rate": 0.00025916327568217416, |
|
"epoch": 0.4879236338010122, |
|
"step": 544 |
|
}, |
|
{ |
|
"loss": 362.7703, |
|
"grad_norm": 44.471256256103516, |
|
"learning_rate": 0.0002584586886393729, |
|
"epoch": 0.4888205522454994, |
|
"step": 545 |
|
}, |
|
{ |
|
"loss": 370.4043, |
|
"grad_norm": 46.374263763427734, |
|
"learning_rate": 0.0002577540343249162, |
|
"epoch": 0.48971747068998656, |
|
"step": 546 |
|
}, |
|
{ |
|
"loss": 362.8738, |
|
"grad_norm": 44.021278381347656, |
|
"learning_rate": 0.0002570493183428952, |
|
"epoch": 0.4906143891344737, |
|
"step": 547 |
|
}, |
|
{ |
|
"loss": 365.418, |
|
"grad_norm": 47.044212341308594, |
|
"learning_rate": 0.00025634454629789156, |
|
"epoch": 0.49151130757896083, |
|
"step": 548 |
|
}, |
|
{ |
|
"loss": 363.5009, |
|
"grad_norm": 48.60353469848633, |
|
"learning_rate": 0.00025563972379493273, |
|
"epoch": 0.492408226023448, |
|
"step": 549 |
|
}, |
|
{ |
|
"loss": 365.955, |
|
"grad_norm": 47.8569221496582, |
|
"learning_rate": 0.00025493485643944753, |
|
"epoch": 0.49330514446793516, |
|
"step": 550 |
|
}, |
|
{ |
|
"eval_loss": 1.6247297525405884, |
|
"eval_runtime": 36.2552, |
|
"eval_samples_per_second": 56.488, |
|
"eval_steps_per_second": 3.531, |
|
"epoch": 0.49330514446793516, |
|
"step": 550 |
|
}, |
|
{ |
|
"loss": 361.769, |
|
"grad_norm": 52.47264099121094, |
|
"learning_rate": 0.00025422994983722127, |
|
"epoch": 0.4942020629124223, |
|
"step": 551 |
|
}, |
|
{ |
|
"loss": 369.0356, |
|
"grad_norm": 51.903358459472656, |
|
"learning_rate": 0.0002535250095943517, |
|
"epoch": 0.4950989813569095, |
|
"step": 552 |
|
}, |
|
{ |
|
"loss": 362.5946, |
|
"grad_norm": 55.91824722290039, |
|
"learning_rate": 0.0002528200413172039, |
|
"epoch": 0.49599589980139663, |
|
"step": 553 |
|
}, |
|
{ |
|
"loss": 364.1907, |
|
"grad_norm": 49.117069244384766, |
|
"learning_rate": 0.00025211505061236583, |
|
"epoch": 0.49689281824588377, |
|
"step": 554 |
|
}, |
|
{ |
|
"loss": 363.2774, |
|
"grad_norm": 44.69606018066406, |
|
"learning_rate": 0.00025141004308660414, |
|
"epoch": 0.49778973669037097, |
|
"step": 555 |
|
}, |
|
{ |
|
"loss": 363.2139, |
|
"grad_norm": 52.18587112426758, |
|
"learning_rate": 0.00025070502434681915, |
|
"epoch": 0.4986866551348581, |
|
"step": 556 |
|
}, |
|
{ |
|
"loss": 365.6665, |
|
"grad_norm": 57.393428802490234, |
|
"learning_rate": 0.00025, |
|
"epoch": 0.49958357357934524, |
|
"step": 557 |
|
}, |
|
{ |
|
"loss": 363.4536, |
|
"grad_norm": 52.89313507080078, |
|
"learning_rate": 0.0002492949756531809, |
|
"epoch": 0.5004804920238324, |
|
"step": 558 |
|
}, |
|
{ |
|
"loss": 363.2097, |
|
"grad_norm": 51.265533447265625, |
|
"learning_rate": 0.00024858995691339587, |
|
"epoch": 0.5013774104683195, |
|
"step": 559 |
|
}, |
|
{ |
|
"loss": 366.4611, |
|
"grad_norm": 56.473567962646484, |
|
"learning_rate": 0.0002478849493876342, |
|
"epoch": 0.5022743289128068, |
|
"step": 560 |
|
}, |
|
{ |
|
"loss": 361.8987, |
|
"grad_norm": 49.68058776855469, |
|
"learning_rate": 0.0002471799586827962, |
|
"epoch": 0.5031712473572939, |
|
"step": 561 |
|
}, |
|
{ |
|
"loss": 360.8694, |
|
"grad_norm": 42.74179458618164, |
|
"learning_rate": 0.00024647499040564844, |
|
"epoch": 0.504068165801781, |
|
"step": 562 |
|
}, |
|
{ |
|
"loss": 364.9089, |
|
"grad_norm": 45.61265563964844, |
|
"learning_rate": 0.00024577005016277885, |
|
"epoch": 0.5049650842462682, |
|
"step": 563 |
|
}, |
|
{ |
|
"loss": 365.8124, |
|
"grad_norm": 46.97050857543945, |
|
"learning_rate": 0.0002450651435605526, |
|
"epoch": 0.5058620026907553, |
|
"step": 564 |
|
}, |
|
{ |
|
"loss": 360.1623, |
|
"grad_norm": 46.26262664794922, |
|
"learning_rate": 0.0002443602762050673, |
|
"epoch": 0.5067589211352425, |
|
"step": 565 |
|
}, |
|
{ |
|
"loss": 363.2248, |
|
"grad_norm": 44.43347930908203, |
|
"learning_rate": 0.00024365545370210842, |
|
"epoch": 0.5076558395797296, |
|
"step": 566 |
|
}, |
|
{ |
|
"loss": 365.1527, |
|
"grad_norm": 46.19889831542969, |
|
"learning_rate": 0.00024295068165710478, |
|
"epoch": 0.5085527580242168, |
|
"step": 567 |
|
}, |
|
{ |
|
"loss": 365.0658, |
|
"grad_norm": 49.645484924316406, |
|
"learning_rate": 0.00024224596567508385, |
|
"epoch": 0.509449676468704, |
|
"step": 568 |
|
}, |
|
{ |
|
"loss": 362.5722, |
|
"grad_norm": 47.69388961791992, |
|
"learning_rate": 0.00024154131136062715, |
|
"epoch": 0.5103465949131911, |
|
"step": 569 |
|
}, |
|
{ |
|
"loss": 361.0171, |
|
"grad_norm": 44.855857849121094, |
|
"learning_rate": 0.00024083672431782585, |
|
"epoch": 0.5112435133576783, |
|
"step": 570 |
|
}, |
|
{ |
|
"loss": 361.5502, |
|
"grad_norm": 48.860435485839844, |
|
"learning_rate": 0.00024013221015023619, |
|
"epoch": 0.5121404318021654, |
|
"step": 571 |
|
}, |
|
{ |
|
"loss": 360.8487, |
|
"grad_norm": 45.69166564941406, |
|
"learning_rate": 0.0002394277744608346, |
|
"epoch": 0.5130373502466525, |
|
"step": 572 |
|
}, |
|
{ |
|
"loss": 361.6857, |
|
"grad_norm": 45.67158889770508, |
|
"learning_rate": 0.00023872342285197366, |
|
"epoch": 0.5139342686911397, |
|
"step": 573 |
|
}, |
|
{ |
|
"loss": 364.0296, |
|
"grad_norm": 51.487369537353516, |
|
"learning_rate": 0.00023801916092533706, |
|
"epoch": 0.5148311871356269, |
|
"step": 574 |
|
}, |
|
{ |
|
"loss": 366.4655, |
|
"grad_norm": 49.884727478027344, |
|
"learning_rate": 0.0002373149942818953, |
|
"epoch": 0.5157281055801141, |
|
"step": 575 |
|
}, |
|
{ |
|
"loss": 360.9107, |
|
"grad_norm": 42.73551940917969, |
|
"learning_rate": 0.00023661092852186118, |
|
"epoch": 0.5166250240246012, |
|
"step": 576 |
|
}, |
|
{ |
|
"loss": 364.7719, |
|
"grad_norm": 44.425777435302734, |
|
"learning_rate": 0.000235906969244645, |
|
"epoch": 0.5175219424690883, |
|
"step": 577 |
|
}, |
|
{ |
|
"loss": 362.6983, |
|
"grad_norm": 52.82978057861328, |
|
"learning_rate": 0.00023520312204881045, |
|
"epoch": 0.5184188609135755, |
|
"step": 578 |
|
}, |
|
{ |
|
"loss": 359.655, |
|
"grad_norm": 46.826904296875, |
|
"learning_rate": 0.0002344993925320295, |
|
"epoch": 0.5193157793580626, |
|
"step": 579 |
|
}, |
|
{ |
|
"loss": 364.8085, |
|
"grad_norm": 42.24338150024414, |
|
"learning_rate": 0.00023379578629103865, |
|
"epoch": 0.5202126978025499, |
|
"step": 580 |
|
}, |
|
{ |
|
"loss": 358.4188, |
|
"grad_norm": 49.714271545410156, |
|
"learning_rate": 0.00023309230892159364, |
|
"epoch": 0.521109616247037, |
|
"step": 581 |
|
}, |
|
{ |
|
"loss": 364.1614, |
|
"grad_norm": 47.561073303222656, |
|
"learning_rate": 0.0002323889660184255, |
|
"epoch": 0.5220065346915241, |
|
"step": 582 |
|
}, |
|
{ |
|
"loss": 361.0988, |
|
"grad_norm": 45.20221710205078, |
|
"learning_rate": 0.00023168576317519576, |
|
"epoch": 0.5229034531360113, |
|
"step": 583 |
|
}, |
|
{ |
|
"loss": 367.0533, |
|
"grad_norm": 47.38787078857422, |
|
"learning_rate": 0.00023098270598445204, |
|
"epoch": 0.5238003715804984, |
|
"step": 584 |
|
}, |
|
{ |
|
"loss": 366.2763, |
|
"grad_norm": 47.23054122924805, |
|
"learning_rate": 0.00023027980003758363, |
|
"epoch": 0.5246972900249856, |
|
"step": 585 |
|
}, |
|
{ |
|
"loss": 365.6816, |
|
"grad_norm": 43.855403900146484, |
|
"learning_rate": 0.0002295770509247771, |
|
"epoch": 0.5255942084694727, |
|
"step": 586 |
|
}, |
|
{ |
|
"loss": 365.6198, |
|
"grad_norm": 51.30084228515625, |
|
"learning_rate": 0.00022887446423497146, |
|
"epoch": 0.5264911269139599, |
|
"step": 587 |
|
}, |
|
{ |
|
"loss": 362.4194, |
|
"grad_norm": 50.142330169677734, |
|
"learning_rate": 0.00022817204555581418, |
|
"epoch": 0.5273880453584471, |
|
"step": 588 |
|
}, |
|
{ |
|
"loss": 364.2704, |
|
"grad_norm": 46.52515411376953, |
|
"learning_rate": 0.00022746980047361654, |
|
"epoch": 0.5282849638029342, |
|
"step": 589 |
|
}, |
|
{ |
|
"loss": 362.0045, |
|
"grad_norm": 48.26958465576172, |
|
"learning_rate": 0.00022676773457330906, |
|
"epoch": 0.5291818822474214, |
|
"step": 590 |
|
}, |
|
{ |
|
"loss": 364.3056, |
|
"grad_norm": 45.78593063354492, |
|
"learning_rate": 0.0002260658534383974, |
|
"epoch": 0.5300788006919085, |
|
"step": 591 |
|
}, |
|
{ |
|
"loss": 364.2805, |
|
"grad_norm": 47.130184173583984, |
|
"learning_rate": 0.00022536416265091775, |
|
"epoch": 0.5309757191363956, |
|
"step": 592 |
|
}, |
|
{ |
|
"loss": 362.9882, |
|
"grad_norm": 43.309181213378906, |
|
"learning_rate": 0.0002246626677913923, |
|
"epoch": 0.5318726375808829, |
|
"step": 593 |
|
}, |
|
{ |
|
"loss": 362.9743, |
|
"grad_norm": 40.39152145385742, |
|
"learning_rate": 0.00022396137443878535, |
|
"epoch": 0.53276955602537, |
|
"step": 594 |
|
}, |
|
{ |
|
"loss": 359.4163, |
|
"grad_norm": 47.722068786621094, |
|
"learning_rate": 0.00022326028817045844, |
|
"epoch": 0.5336664744698572, |
|
"step": 595 |
|
}, |
|
{ |
|
"loss": 364.6919, |
|
"grad_norm": 42.61846160888672, |
|
"learning_rate": 0.00022255941456212605, |
|
"epoch": 0.5345633929143443, |
|
"step": 596 |
|
}, |
|
{ |
|
"loss": 368.3342, |
|
"grad_norm": 44.96833038330078, |
|
"learning_rate": 0.00022185875918781162, |
|
"epoch": 0.5354603113588314, |
|
"step": 597 |
|
}, |
|
{ |
|
"loss": 363.2259, |
|
"grad_norm": 43.944881439208984, |
|
"learning_rate": 0.00022115832761980287, |
|
"epoch": 0.5363572298033186, |
|
"step": 598 |
|
}, |
|
{ |
|
"loss": 362.7245, |
|
"grad_norm": 47.073341369628906, |
|
"learning_rate": 0.00022045812542860756, |
|
"epoch": 0.5372541482478057, |
|
"step": 599 |
|
}, |
|
{ |
|
"loss": 363.0497, |
|
"grad_norm": 44.11311721801758, |
|
"learning_rate": 0.00021975815818290928, |
|
"epoch": 0.538151066692293, |
|
"step": 600 |
|
}, |
|
{ |
|
"eval_loss": 1.61993408203125, |
|
"eval_runtime": 65.3564, |
|
"eval_samples_per_second": 31.336, |
|
"eval_steps_per_second": 1.958, |
|
"epoch": 0.538151066692293, |
|
"step": 600 |
|
}, |
|
{ |
|
"loss": 360.9368, |
|
"grad_norm": 45.97838592529297, |
|
"learning_rate": 0.00021905843144952316, |
|
"epoch": 0.5390479851367801, |
|
"step": 601 |
|
}, |
|
{ |
|
"loss": 363.959, |
|
"grad_norm": 45.36203384399414, |
|
"learning_rate": 0.0002183589507933514, |
|
"epoch": 0.5399449035812672, |
|
"step": 602 |
|
}, |
|
{ |
|
"loss": 363.9291, |
|
"grad_norm": 43.02581024169922, |
|
"learning_rate": 0.00021765972177733924, |
|
"epoch": 0.5408418220257544, |
|
"step": 603 |
|
}, |
|
{ |
|
"loss": 363.5491, |
|
"grad_norm": 47.46310806274414, |
|
"learning_rate": 0.0002169607499624307, |
|
"epoch": 0.5417387404702415, |
|
"step": 604 |
|
}, |
|
{ |
|
"loss": 367.6017, |
|
"grad_norm": 47.89605712890625, |
|
"learning_rate": 0.00021626204090752422, |
|
"epoch": 0.5426356589147286, |
|
"step": 605 |
|
}, |
|
{ |
|
"loss": 364.9732, |
|
"grad_norm": 45.463443756103516, |
|
"learning_rate": 0.00021556360016942842, |
|
"epoch": 0.5435325773592158, |
|
"step": 606 |
|
}, |
|
{ |
|
"loss": 364.4341, |
|
"grad_norm": 43.64617919921875, |
|
"learning_rate": 0.00021486543330281812, |
|
"epoch": 0.544429495803703, |
|
"step": 607 |
|
}, |
|
{ |
|
"loss": 366.3894, |
|
"grad_norm": 41.575531005859375, |
|
"learning_rate": 0.0002141675458601901, |
|
"epoch": 0.5453264142481902, |
|
"step": 608 |
|
}, |
|
{ |
|
"loss": 363.112, |
|
"grad_norm": 46.79388427734375, |
|
"learning_rate": 0.00021346994339181883, |
|
"epoch": 0.5462233326926773, |
|
"step": 609 |
|
}, |
|
{ |
|
"loss": 361.5751, |
|
"grad_norm": 48.13455581665039, |
|
"learning_rate": 0.0002127726314457124, |
|
"epoch": 0.5471202511371644, |
|
"step": 610 |
|
}, |
|
{ |
|
"loss": 361.1321, |
|
"grad_norm": 45.220550537109375, |
|
"learning_rate": 0.0002120756155675683, |
|
"epoch": 0.5480171695816516, |
|
"step": 611 |
|
}, |
|
{ |
|
"loss": 365.0866, |
|
"grad_norm": 46.22264099121094, |
|
"learning_rate": 0.0002113789013007295, |
|
"epoch": 0.5489140880261387, |
|
"step": 612 |
|
}, |
|
{ |
|
"loss": 360.2099, |
|
"grad_norm": 47.99028015136719, |
|
"learning_rate": 0.00021068249418614027, |
|
"epoch": 0.549811006470626, |
|
"step": 613 |
|
}, |
|
{ |
|
"loss": 362.4004, |
|
"grad_norm": 45.35298538208008, |
|
"learning_rate": 0.00020998639976230202, |
|
"epoch": 0.5507079249151131, |
|
"step": 614 |
|
}, |
|
{ |
|
"loss": 362.9482, |
|
"grad_norm": 45.84006118774414, |
|
"learning_rate": 0.00020929062356522942, |
|
"epoch": 0.5516048433596002, |
|
"step": 615 |
|
}, |
|
{ |
|
"loss": 361.6893, |
|
"grad_norm": 46.06373977661133, |
|
"learning_rate": 0.00020859517112840637, |
|
"epoch": 0.5525017618040874, |
|
"step": 616 |
|
}, |
|
{ |
|
"loss": 368.1667, |
|
"grad_norm": 43.56032180786133, |
|
"learning_rate": 0.00020790004798274165, |
|
"epoch": 0.5533986802485745, |
|
"step": 617 |
|
}, |
|
{ |
|
"loss": 363.2073, |
|
"grad_norm": 43.215370178222656, |
|
"learning_rate": 0.00020720525965652544, |
|
"epoch": 0.5542955986930617, |
|
"step": 618 |
|
}, |
|
{ |
|
"loss": 358.3785, |
|
"grad_norm": 47.84462356567383, |
|
"learning_rate": 0.00020651081167538508, |
|
"epoch": 0.5551925171375488, |
|
"step": 619 |
|
}, |
|
{ |
|
"loss": 365.6581, |
|
"grad_norm": 49.96092987060547, |
|
"learning_rate": 0.00020581670956224113, |
|
"epoch": 0.556089435582036, |
|
"step": 620 |
|
}, |
|
{ |
|
"loss": 363.1918, |
|
"grad_norm": 44.61714172363281, |
|
"learning_rate": 0.00020512295883726338, |
|
"epoch": 0.5569863540265232, |
|
"step": 621 |
|
}, |
|
{ |
|
"loss": 363.2948, |
|
"grad_norm": 44.841495513916016, |
|
"learning_rate": 0.00020442956501782713, |
|
"epoch": 0.5578832724710103, |
|
"step": 622 |
|
}, |
|
{ |
|
"loss": 358.7636, |
|
"grad_norm": 46.29624938964844, |
|
"learning_rate": 0.00020373653361846925, |
|
"epoch": 0.5587801909154975, |
|
"step": 623 |
|
}, |
|
{ |
|
"loss": 362.0233, |
|
"grad_norm": 43.61477279663086, |
|
"learning_rate": 0.0002030438701508443, |
|
"epoch": 0.5596771093599846, |
|
"step": 624 |
|
}, |
|
{ |
|
"loss": 366.3086, |
|
"grad_norm": 44.28224182128906, |
|
"learning_rate": 0.00020235158012368065, |
|
"epoch": 0.5605740278044717, |
|
"step": 625 |
|
}, |
|
{ |
|
"loss": 357.9655, |
|
"grad_norm": 43.08799362182617, |
|
"learning_rate": 0.00020165966904273666, |
|
"epoch": 0.5614709462489589, |
|
"step": 626 |
|
}, |
|
{ |
|
"loss": 364.1879, |
|
"grad_norm": 45.73900604248047, |
|
"learning_rate": 0.00020096814241075703, |
|
"epoch": 0.5623678646934461, |
|
"step": 627 |
|
}, |
|
{ |
|
"loss": 359.9633, |
|
"grad_norm": 48.213985443115234, |
|
"learning_rate": 0.00020027700572742895, |
|
"epoch": 0.5632647831379333, |
|
"step": 628 |
|
}, |
|
{ |
|
"loss": 365.9498, |
|
"grad_norm": 43.3817253112793, |
|
"learning_rate": 0.00019958626448933825, |
|
"epoch": 0.5641617015824204, |
|
"step": 629 |
|
}, |
|
{ |
|
"loss": 362.1366, |
|
"grad_norm": 42.70503234863281, |
|
"learning_rate": 0.00019889592418992594, |
|
"epoch": 0.5650586200269075, |
|
"step": 630 |
|
}, |
|
{ |
|
"loss": 361.433, |
|
"grad_norm": 46.60575485229492, |
|
"learning_rate": 0.00019820599031944436, |
|
"epoch": 0.5659555384713947, |
|
"step": 631 |
|
}, |
|
{ |
|
"loss": 364.1061, |
|
"grad_norm": 42.36573791503906, |
|
"learning_rate": 0.00019751646836491338, |
|
"epoch": 0.5668524569158818, |
|
"step": 632 |
|
}, |
|
{ |
|
"loss": 360.4161, |
|
"grad_norm": 43.14451599121094, |
|
"learning_rate": 0.00019682736381007707, |
|
"epoch": 0.5677493753603691, |
|
"step": 633 |
|
}, |
|
{ |
|
"loss": 357.0567, |
|
"grad_norm": 44.19496154785156, |
|
"learning_rate": 0.00019613868213535997, |
|
"epoch": 0.5686462938048562, |
|
"step": 634 |
|
}, |
|
{ |
|
"loss": 361.1339, |
|
"grad_norm": 42.32905960083008, |
|
"learning_rate": 0.00019545042881782333, |
|
"epoch": 0.5695432122493433, |
|
"step": 635 |
|
}, |
|
{ |
|
"loss": 361.2873, |
|
"grad_norm": 47.53689956665039, |
|
"learning_rate": 0.00019476260933112163, |
|
"epoch": 0.5704401306938305, |
|
"step": 636 |
|
}, |
|
{ |
|
"loss": 362.2348, |
|
"grad_norm": 47.5960578918457, |
|
"learning_rate": 0.00019407522914545957, |
|
"epoch": 0.5713370491383176, |
|
"step": 637 |
|
}, |
|
{ |
|
"loss": 366.9183, |
|
"grad_norm": 43.92160415649414, |
|
"learning_rate": 0.00019338829372754745, |
|
"epoch": 0.5722339675828048, |
|
"step": 638 |
|
}, |
|
{ |
|
"loss": 361.6643, |
|
"grad_norm": 46.373863220214844, |
|
"learning_rate": 0.0001927018085405588, |
|
"epoch": 0.5731308860272919, |
|
"step": 639 |
|
}, |
|
{ |
|
"loss": 362.9005, |
|
"grad_norm": 45.955814361572266, |
|
"learning_rate": 0.0001920157790440864, |
|
"epoch": 0.5740278044717791, |
|
"step": 640 |
|
}, |
|
{ |
|
"loss": 360.8845, |
|
"grad_norm": 46.01215362548828, |
|
"learning_rate": 0.00019133021069409872, |
|
"epoch": 0.5749247229162663, |
|
"step": 641 |
|
}, |
|
{ |
|
"loss": 361.9622, |
|
"grad_norm": 46.09065628051758, |
|
"learning_rate": 0.00019064510894289705, |
|
"epoch": 0.5758216413607534, |
|
"step": 642 |
|
}, |
|
{ |
|
"loss": 363.2684, |
|
"grad_norm": 45.370140075683594, |
|
"learning_rate": 0.00018996047923907166, |
|
"epoch": 0.5767185598052406, |
|
"step": 643 |
|
}, |
|
{ |
|
"loss": 362.285, |
|
"grad_norm": 43.416664123535156, |
|
"learning_rate": 0.00018927632702745866, |
|
"epoch": 0.5776154782497277, |
|
"step": 644 |
|
}, |
|
{ |
|
"loss": 360.188, |
|
"grad_norm": 44.63084030151367, |
|
"learning_rate": 0.00018859265774909668, |
|
"epoch": 0.5785123966942148, |
|
"step": 645 |
|
}, |
|
{ |
|
"loss": 362.1082, |
|
"grad_norm": 43.95875930786133, |
|
"learning_rate": 0.00018790947684118364, |
|
"epoch": 0.5794093151387021, |
|
"step": 646 |
|
}, |
|
{ |
|
"loss": 364.6595, |
|
"grad_norm": 46.196041107177734, |
|
"learning_rate": 0.00018722678973703355, |
|
"epoch": 0.5803062335831892, |
|
"step": 647 |
|
}, |
|
{ |
|
"loss": 367.5318, |
|
"grad_norm": 52.50529479980469, |
|
"learning_rate": 0.00018654460186603295, |
|
"epoch": 0.5812031520276764, |
|
"step": 648 |
|
}, |
|
{ |
|
"loss": 364.7477, |
|
"grad_norm": 44.10645294189453, |
|
"learning_rate": 0.00018586291865359822, |
|
"epoch": 0.5821000704721635, |
|
"step": 649 |
|
}, |
|
{ |
|
"loss": 362.5089, |
|
"grad_norm": 42.808326721191406, |
|
"learning_rate": 0.00018518174552113216, |
|
"epoch": 0.5829969889166506, |
|
"step": 650 |
|
}, |
|
{ |
|
"eval_loss": 1.6019372940063477, |
|
"eval_runtime": 17.6903, |
|
"eval_samples_per_second": 115.769, |
|
"eval_steps_per_second": 14.471, |
|
"epoch": 0.5829969889166506, |
|
"step": 650 |
|
}, |
|
{ |
|
"loss": 361.447, |
|
"grad_norm": 45.0283088684082, |
|
"learning_rate": 0.0001845010878859809, |
|
"epoch": 0.5838939073611378, |
|
"step": 651 |
|
}, |
|
{ |
|
"loss": 363.9907, |
|
"grad_norm": 45.77663040161133, |
|
"learning_rate": 0.00018382095116139098, |
|
"epoch": 0.5847908258056249, |
|
"step": 652 |
|
}, |
|
{ |
|
"loss": 358.2193, |
|
"grad_norm": 47.19649124145508, |
|
"learning_rate": 0.00018314134075646582, |
|
"epoch": 0.5856877442501122, |
|
"step": 653 |
|
}, |
|
{ |
|
"loss": 362.618, |
|
"grad_norm": 45.46641540527344, |
|
"learning_rate": 0.00018246226207612338, |
|
"epoch": 0.5865846626945993, |
|
"step": 654 |
|
}, |
|
{ |
|
"loss": 364.6533, |
|
"grad_norm": 45.993873596191406, |
|
"learning_rate": 0.00018178372052105263, |
|
"epoch": 0.5874815811390864, |
|
"step": 655 |
|
}, |
|
{ |
|
"loss": 359.9103, |
|
"grad_norm": 49.62721252441406, |
|
"learning_rate": 0.00018110572148767089, |
|
"epoch": 0.5883784995835736, |
|
"step": 656 |
|
}, |
|
{ |
|
"loss": 362.929, |
|
"grad_norm": 47.14739227294922, |
|
"learning_rate": 0.00018042827036808074, |
|
"epoch": 0.5892754180280607, |
|
"step": 657 |
|
}, |
|
{ |
|
"loss": 364.1747, |
|
"grad_norm": 46.9727897644043, |
|
"learning_rate": 0.00017975137255002744, |
|
"epoch": 0.5901723364725479, |
|
"step": 658 |
|
}, |
|
{ |
|
"loss": 362.2029, |
|
"grad_norm": 45.876277923583984, |
|
"learning_rate": 0.0001790750334168555, |
|
"epoch": 0.591069254917035, |
|
"step": 659 |
|
}, |
|
{ |
|
"loss": 359.2526, |
|
"grad_norm": 42.93642807006836, |
|
"learning_rate": 0.00017839925834746653, |
|
"epoch": 0.5919661733615222, |
|
"step": 660 |
|
}, |
|
{ |
|
"loss": 363.6162, |
|
"grad_norm": 41.57487487792969, |
|
"learning_rate": 0.0001777240527162761, |
|
"epoch": 0.5928630918060094, |
|
"step": 661 |
|
}, |
|
{ |
|
"loss": 361.9038, |
|
"grad_norm": 46.25205993652344, |
|
"learning_rate": 0.00017704942189317104, |
|
"epoch": 0.5937600102504965, |
|
"step": 662 |
|
}, |
|
{ |
|
"loss": 358.8016, |
|
"grad_norm": 45.354007720947266, |
|
"learning_rate": 0.0001763753712434666, |
|
"epoch": 0.5946569286949837, |
|
"step": 663 |
|
}, |
|
{ |
|
"loss": 361.5577, |
|
"grad_norm": 42.980037689208984, |
|
"learning_rate": 0.00017570190612786413, |
|
"epoch": 0.5955538471394708, |
|
"step": 664 |
|
}, |
|
{ |
|
"loss": 361.3445, |
|
"grad_norm": 44.7468147277832, |
|
"learning_rate": 0.00017502903190240815, |
|
"epoch": 0.5964507655839579, |
|
"step": 665 |
|
}, |
|
{ |
|
"loss": 360.489, |
|
"grad_norm": 43.96569061279297, |
|
"learning_rate": 0.00017435675391844397, |
|
"epoch": 0.5973476840284452, |
|
"step": 666 |
|
}, |
|
{ |
|
"loss": 365.539, |
|
"grad_norm": 45.040103912353516, |
|
"learning_rate": 0.00017368507752257495, |
|
"epoch": 0.5982446024729323, |
|
"step": 667 |
|
}, |
|
{ |
|
"loss": 363.3497, |
|
"grad_norm": 45.93570327758789, |
|
"learning_rate": 0.00017301400805661989, |
|
"epoch": 0.5991415209174195, |
|
"step": 668 |
|
}, |
|
{ |
|
"loss": 356.2852, |
|
"grad_norm": 41.94508743286133, |
|
"learning_rate": 0.00017234355085757086, |
|
"epoch": 0.6000384393619066, |
|
"step": 669 |
|
}, |
|
{ |
|
"loss": 364.3321, |
|
"grad_norm": 40.20936584472656, |
|
"learning_rate": 0.00017167371125755064, |
|
"epoch": 0.6009353578063937, |
|
"step": 670 |
|
}, |
|
{ |
|
"loss": 365.0333, |
|
"grad_norm": 42.29598617553711, |
|
"learning_rate": 0.00017100449458377003, |
|
"epoch": 0.6018322762508809, |
|
"step": 671 |
|
}, |
|
{ |
|
"loss": 356.7194, |
|
"grad_norm": 41.43622589111328, |
|
"learning_rate": 0.00017033590615848598, |
|
"epoch": 0.602729194695368, |
|
"step": 672 |
|
}, |
|
{ |
|
"loss": 362.7276, |
|
"grad_norm": 44.03760528564453, |
|
"learning_rate": 0.0001696679512989589, |
|
"epoch": 0.6036261131398553, |
|
"step": 673 |
|
}, |
|
{ |
|
"loss": 359.1711, |
|
"grad_norm": 39.68849182128906, |
|
"learning_rate": 0.00016900063531741048, |
|
"epoch": 0.6045230315843424, |
|
"step": 674 |
|
}, |
|
{ |
|
"loss": 357.2, |
|
"grad_norm": 40.92485809326172, |
|
"learning_rate": 0.0001683339635209813, |
|
"epoch": 0.6054199500288295, |
|
"step": 675 |
|
}, |
|
{ |
|
"loss": 362.3214, |
|
"grad_norm": 41.29072189331055, |
|
"learning_rate": 0.000167667941211689, |
|
"epoch": 0.6063168684733167, |
|
"step": 676 |
|
}, |
|
{ |
|
"loss": 361.0124, |
|
"grad_norm": 41.026676177978516, |
|
"learning_rate": 0.00016700257368638572, |
|
"epoch": 0.6072137869178038, |
|
"step": 677 |
|
}, |
|
{ |
|
"loss": 360.2582, |
|
"grad_norm": 43.93520736694336, |
|
"learning_rate": 0.0001663378662367161, |
|
"epoch": 0.608110705362291, |
|
"step": 678 |
|
}, |
|
{ |
|
"loss": 358.0945, |
|
"grad_norm": 43.4892578125, |
|
"learning_rate": 0.00016567382414907532, |
|
"epoch": 0.6090076238067781, |
|
"step": 679 |
|
}, |
|
{ |
|
"loss": 360.7998, |
|
"grad_norm": 43.67966842651367, |
|
"learning_rate": 0.00016501045270456694, |
|
"epoch": 0.6099045422512653, |
|
"step": 680 |
|
}, |
|
{ |
|
"loss": 359.6815, |
|
"grad_norm": 42.92584991455078, |
|
"learning_rate": 0.0001643477571789609, |
|
"epoch": 0.6108014606957525, |
|
"step": 681 |
|
}, |
|
{ |
|
"loss": 361.6625, |
|
"grad_norm": 42.53407287597656, |
|
"learning_rate": 0.00016368574284265165, |
|
"epoch": 0.6116983791402396, |
|
"step": 682 |
|
}, |
|
{ |
|
"loss": 363.5579, |
|
"grad_norm": 41.2686767578125, |
|
"learning_rate": 0.00016302441496061592, |
|
"epoch": 0.6125952975847268, |
|
"step": 683 |
|
}, |
|
{ |
|
"loss": 360.9108, |
|
"grad_norm": 42.09267044067383, |
|
"learning_rate": 0.00016236377879237136, |
|
"epoch": 0.6134922160292139, |
|
"step": 684 |
|
}, |
|
{ |
|
"loss": 360.2266, |
|
"grad_norm": 42.135650634765625, |
|
"learning_rate": 0.0001617038395919344, |
|
"epoch": 0.614389134473701, |
|
"step": 685 |
|
}, |
|
{ |
|
"loss": 355.2124, |
|
"grad_norm": 41.78007888793945, |
|
"learning_rate": 0.00016104460260777837, |
|
"epoch": 0.6152860529181883, |
|
"step": 686 |
|
}, |
|
{ |
|
"loss": 357.8339, |
|
"grad_norm": 41.49577713012695, |
|
"learning_rate": 0.00016038607308279198, |
|
"epoch": 0.6161829713626754, |
|
"step": 687 |
|
}, |
|
{ |
|
"loss": 361.7785, |
|
"grad_norm": 47.102848052978516, |
|
"learning_rate": 0.00015972825625423765, |
|
"epoch": 0.6170798898071626, |
|
"step": 688 |
|
}, |
|
{ |
|
"loss": 357.3535, |
|
"grad_norm": 41.43706512451172, |
|
"learning_rate": 0.0001590711573537096, |
|
"epoch": 0.6179768082516497, |
|
"step": 689 |
|
}, |
|
{ |
|
"loss": 359.8207, |
|
"grad_norm": 40.92182540893555, |
|
"learning_rate": 0.00015841478160709242, |
|
"epoch": 0.6188737266961368, |
|
"step": 690 |
|
}, |
|
{ |
|
"loss": 358.1373, |
|
"grad_norm": 49.461273193359375, |
|
"learning_rate": 0.0001577591342345195, |
|
"epoch": 0.619770645140624, |
|
"step": 691 |
|
}, |
|
{ |
|
"loss": 361.2856, |
|
"grad_norm": 50.03120040893555, |
|
"learning_rate": 0.00015710422045033158, |
|
"epoch": 0.6206675635851111, |
|
"step": 692 |
|
}, |
|
{ |
|
"loss": 359.0531, |
|
"grad_norm": 43.81147003173828, |
|
"learning_rate": 0.00015645004546303493, |
|
"epoch": 0.6215644820295984, |
|
"step": 693 |
|
}, |
|
{ |
|
"loss": 357.6739, |
|
"grad_norm": 44.85881042480469, |
|
"learning_rate": 0.00015579661447526067, |
|
"epoch": 0.6224614004740855, |
|
"step": 694 |
|
}, |
|
{ |
|
"loss": 358.5413, |
|
"grad_norm": 45.34134292602539, |
|
"learning_rate": 0.00015514393268372247, |
|
"epoch": 0.6233583189185726, |
|
"step": 695 |
|
}, |
|
{ |
|
"loss": 362.4291, |
|
"grad_norm": 44.94168472290039, |
|
"learning_rate": 0.00015449200527917578, |
|
"epoch": 0.6242552373630598, |
|
"step": 696 |
|
}, |
|
{ |
|
"loss": 353.4212, |
|
"grad_norm": 43.28814697265625, |
|
"learning_rate": 0.00015384083744637663, |
|
"epoch": 0.6251521558075469, |
|
"step": 697 |
|
}, |
|
{ |
|
"loss": 361.8906, |
|
"grad_norm": 42.88665008544922, |
|
"learning_rate": 0.00015319043436403992, |
|
"epoch": 0.626049074252034, |
|
"step": 698 |
|
}, |
|
{ |
|
"loss": 357.3509, |
|
"grad_norm": 46.005001068115234, |
|
"learning_rate": 0.00015254080120479874, |
|
"epoch": 0.6269459926965213, |
|
"step": 699 |
|
}, |
|
{ |
|
"loss": 356.4296, |
|
"grad_norm": 44.4104118347168, |
|
"learning_rate": 0.00015189194313516288, |
|
"epoch": 0.6278429111410084, |
|
"step": 700 |
|
}, |
|
{ |
|
"eval_loss": 1.597915768623352, |
|
"eval_runtime": 17.571, |
|
"eval_samples_per_second": 116.555, |
|
"eval_steps_per_second": 14.569, |
|
"epoch": 0.6278429111410084, |
|
"step": 700 |
|
}, |
|
{ |
|
"loss": 358.631, |
|
"grad_norm": 43.341407775878906, |
|
"learning_rate": 0.000151243865315478, |
|
"epoch": 0.6287398295854956, |
|
"step": 701 |
|
}, |
|
{ |
|
"loss": 361.772, |
|
"grad_norm": 43.18885803222656, |
|
"learning_rate": 0.00015059657289988426, |
|
"epoch": 0.6296367480299827, |
|
"step": 702 |
|
}, |
|
{ |
|
"loss": 359.0464, |
|
"grad_norm": 41.106483459472656, |
|
"learning_rate": 0.00014995007103627567, |
|
"epoch": 0.6305336664744698, |
|
"step": 703 |
|
}, |
|
{ |
|
"loss": 358.0773, |
|
"grad_norm": 42.815834045410156, |
|
"learning_rate": 0.00014930436486625907, |
|
"epoch": 0.631430584918957, |
|
"step": 704 |
|
}, |
|
{ |
|
"loss": 358.7279, |
|
"grad_norm": 39.7459602355957, |
|
"learning_rate": 0.00014865945952511296, |
|
"epoch": 0.6323275033634441, |
|
"step": 705 |
|
}, |
|
{ |
|
"loss": 358.3263, |
|
"grad_norm": 42.54743576049805, |
|
"learning_rate": 0.00014801536014174706, |
|
"epoch": 0.6332244218079314, |
|
"step": 706 |
|
}, |
|
{ |
|
"loss": 365.4639, |
|
"grad_norm": 45.69781494140625, |
|
"learning_rate": 0.00014737207183866118, |
|
"epoch": 0.6341213402524185, |
|
"step": 707 |
|
}, |
|
{ |
|
"loss": 357.4766, |
|
"grad_norm": 44.834136962890625, |
|
"learning_rate": 0.0001467295997319049, |
|
"epoch": 0.6350182586969056, |
|
"step": 708 |
|
}, |
|
{ |
|
"loss": 361.5132, |
|
"grad_norm": 40.79405975341797, |
|
"learning_rate": 0.00014608794893103646, |
|
"epoch": 0.6359151771413928, |
|
"step": 709 |
|
}, |
|
{ |
|
"loss": 361.108, |
|
"grad_norm": 40.1624870300293, |
|
"learning_rate": 0.00014544712453908216, |
|
"epoch": 0.6368120955858799, |
|
"step": 710 |
|
}, |
|
{ |
|
"loss": 357.4099, |
|
"grad_norm": 42.602073669433594, |
|
"learning_rate": 0.00014480713165249609, |
|
"epoch": 0.6377090140303671, |
|
"step": 711 |
|
}, |
|
{ |
|
"loss": 360.979, |
|
"grad_norm": 43.97264099121094, |
|
"learning_rate": 0.00014416797536111919, |
|
"epoch": 0.6386059324748542, |
|
"step": 712 |
|
}, |
|
{ |
|
"loss": 361.3081, |
|
"grad_norm": 40.94137191772461, |
|
"learning_rate": 0.00014352966074813932, |
|
"epoch": 0.6395028509193414, |
|
"step": 713 |
|
}, |
|
{ |
|
"loss": 359.9567, |
|
"grad_norm": 40.18381881713867, |
|
"learning_rate": 0.00014289219289005027, |
|
"epoch": 0.6403997693638286, |
|
"step": 714 |
|
}, |
|
{ |
|
"loss": 353.732, |
|
"grad_norm": 45.907203674316406, |
|
"learning_rate": 0.0001422555768566115, |
|
"epoch": 0.6412966878083157, |
|
"step": 715 |
|
}, |
|
{ |
|
"loss": 358.1761, |
|
"grad_norm": 46.9672737121582, |
|
"learning_rate": 0.0001416198177108083, |
|
"epoch": 0.6421936062528029, |
|
"step": 716 |
|
}, |
|
{ |
|
"loss": 358.2166, |
|
"grad_norm": 40.92546081542969, |
|
"learning_rate": 0.0001409849205088109, |
|
"epoch": 0.64309052469729, |
|
"step": 717 |
|
}, |
|
{ |
|
"loss": 358.0281, |
|
"grad_norm": 39.04634475708008, |
|
"learning_rate": 0.00014035089029993444, |
|
"epoch": 0.6439874431417771, |
|
"step": 718 |
|
}, |
|
{ |
|
"loss": 358.9151, |
|
"grad_norm": 41.55719757080078, |
|
"learning_rate": 0.00013971773212659929, |
|
"epoch": 0.6448843615862644, |
|
"step": 719 |
|
}, |
|
{ |
|
"loss": 356.5345, |
|
"grad_norm": 41.81498336791992, |
|
"learning_rate": 0.00013908545102429, |
|
"epoch": 0.6457812800307515, |
|
"step": 720 |
|
}, |
|
{ |
|
"loss": 358.3629, |
|
"grad_norm": 40.042484283447266, |
|
"learning_rate": 0.00013845405202151637, |
|
"epoch": 0.6466781984752387, |
|
"step": 721 |
|
}, |
|
{ |
|
"loss": 360.9086, |
|
"grad_norm": 44.207122802734375, |
|
"learning_rate": 0.00013782354013977245, |
|
"epoch": 0.6475751169197258, |
|
"step": 722 |
|
}, |
|
{ |
|
"loss": 357.7452, |
|
"grad_norm": 45.20026779174805, |
|
"learning_rate": 0.00013719392039349734, |
|
"epoch": 0.6484720353642129, |
|
"step": 723 |
|
}, |
|
{ |
|
"loss": 358.4982, |
|
"grad_norm": 41.07488250732422, |
|
"learning_rate": 0.00013656519779003476, |
|
"epoch": 0.6493689538087001, |
|
"step": 724 |
|
}, |
|
{ |
|
"loss": 361.3215, |
|
"grad_norm": 43.69713592529297, |
|
"learning_rate": 0.00013593737732959382, |
|
"epoch": 0.6502658722531872, |
|
"step": 725 |
|
}, |
|
{ |
|
"loss": 356.6879, |
|
"grad_norm": 45.356109619140625, |
|
"learning_rate": 0.00013531046400520858, |
|
"epoch": 0.6511627906976745, |
|
"step": 726 |
|
}, |
|
{ |
|
"loss": 363.6577, |
|
"grad_norm": 44.325103759765625, |
|
"learning_rate": 0.0001346844628026988, |
|
"epoch": 0.6520597091421616, |
|
"step": 727 |
|
}, |
|
{ |
|
"loss": 358.3399, |
|
"grad_norm": 40.79582595825195, |
|
"learning_rate": 0.0001340593787006303, |
|
"epoch": 0.6529566275866487, |
|
"step": 728 |
|
}, |
|
{ |
|
"loss": 360.8162, |
|
"grad_norm": 40.47697448730469, |
|
"learning_rate": 0.0001334352166702751, |
|
"epoch": 0.6538535460311359, |
|
"step": 729 |
|
}, |
|
{ |
|
"loss": 356.254, |
|
"grad_norm": 43.549407958984375, |
|
"learning_rate": 0.00013281198167557185, |
|
"epoch": 0.654750464475623, |
|
"step": 730 |
|
}, |
|
{ |
|
"loss": 356.3695, |
|
"grad_norm": 41.08717727661133, |
|
"learning_rate": 0.00013218967867308694, |
|
"epoch": 0.6556473829201102, |
|
"step": 731 |
|
}, |
|
{ |
|
"loss": 359.2961, |
|
"grad_norm": 44.06740951538086, |
|
"learning_rate": 0.00013156831261197438, |
|
"epoch": 0.6565443013645973, |
|
"step": 732 |
|
}, |
|
{ |
|
"loss": 354.8276, |
|
"grad_norm": 44.14928436279297, |
|
"learning_rate": 0.00013094788843393657, |
|
"epoch": 0.6574412198090845, |
|
"step": 733 |
|
}, |
|
{ |
|
"loss": 356.655, |
|
"grad_norm": 41.25139236450195, |
|
"learning_rate": 0.0001303284110731856, |
|
"epoch": 0.6583381382535717, |
|
"step": 734 |
|
}, |
|
{ |
|
"loss": 359.9945, |
|
"grad_norm": 43.141475677490234, |
|
"learning_rate": 0.00012970988545640307, |
|
"epoch": 0.6592350566980588, |
|
"step": 735 |
|
}, |
|
{ |
|
"loss": 354.7369, |
|
"grad_norm": 45.27100372314453, |
|
"learning_rate": 0.0001290923165027017, |
|
"epoch": 0.660131975142546, |
|
"step": 736 |
|
}, |
|
{ |
|
"loss": 357.4191, |
|
"grad_norm": 41.795658111572266, |
|
"learning_rate": 0.0001284757091235859, |
|
"epoch": 0.6610288935870331, |
|
"step": 737 |
|
}, |
|
{ |
|
"loss": 353.508, |
|
"grad_norm": 43.1330680847168, |
|
"learning_rate": 0.0001278600682229126, |
|
"epoch": 0.6619258120315202, |
|
"step": 738 |
|
}, |
|
{ |
|
"loss": 356.3365, |
|
"grad_norm": 43.488121032714844, |
|
"learning_rate": 0.00012724539869685226, |
|
"epoch": 0.6628227304760075, |
|
"step": 739 |
|
}, |
|
{ |
|
"loss": 357.6046, |
|
"grad_norm": 42.182777404785156, |
|
"learning_rate": 0.0001266317054338503, |
|
"epoch": 0.6637196489204946, |
|
"step": 740 |
|
}, |
|
{ |
|
"loss": 358.7371, |
|
"grad_norm": 43.06134796142578, |
|
"learning_rate": 0.00012601899331458777, |
|
"epoch": 0.6646165673649818, |
|
"step": 741 |
|
}, |
|
{ |
|
"loss": 358.2452, |
|
"grad_norm": 40.01738357543945, |
|
"learning_rate": 0.00012540726721194266, |
|
"epoch": 0.6655134858094689, |
|
"step": 742 |
|
}, |
|
{ |
|
"loss": 361.5233, |
|
"grad_norm": 40.66733169555664, |
|
"learning_rate": 0.0001247965319909515, |
|
"epoch": 0.666410404253956, |
|
"step": 743 |
|
}, |
|
{ |
|
"loss": 354.1553, |
|
"grad_norm": 39.47666931152344, |
|
"learning_rate": 0.0001241867925087701, |
|
"epoch": 0.6673073226984432, |
|
"step": 744 |
|
}, |
|
{ |
|
"loss": 358.3203, |
|
"grad_norm": 39.22403335571289, |
|
"learning_rate": 0.00012357805361463514, |
|
"epoch": 0.6682042411429303, |
|
"step": 745 |
|
}, |
|
{ |
|
"loss": 357.0617, |
|
"grad_norm": 39.071529388427734, |
|
"learning_rate": 0.00012297032014982597, |
|
"epoch": 0.6691011595874176, |
|
"step": 746 |
|
}, |
|
{ |
|
"loss": 362.905, |
|
"grad_norm": 40.75625228881836, |
|
"learning_rate": 0.0001223635969476255, |
|
"epoch": 0.6699980780319047, |
|
"step": 747 |
|
}, |
|
{ |
|
"loss": 354.9351, |
|
"grad_norm": 42.89009094238281, |
|
"learning_rate": 0.00012175788883328232, |
|
"epoch": 0.6708949964763918, |
|
"step": 748 |
|
}, |
|
{ |
|
"loss": 359.415, |
|
"grad_norm": 43.072513580322266, |
|
"learning_rate": 0.0001211532006239718, |
|
"epoch": 0.671791914920879, |
|
"step": 749 |
|
}, |
|
{ |
|
"loss": 357.7546, |
|
"grad_norm": 40.25785446166992, |
|
"learning_rate": 0.00012054953712875807, |
|
"epoch": 0.6726888333653661, |
|
"step": 750 |
|
}, |
|
{ |
|
"eval_loss": 1.609327793121338, |
|
"eval_runtime": 17.5285, |
|
"eval_samples_per_second": 116.839, |
|
"eval_steps_per_second": 14.605, |
|
"epoch": 0.6726888333653661, |
|
"step": 750 |
|
}, |
|
{ |
|
"loss": 357.2794, |
|
"grad_norm": 41.602596282958984, |
|
"learning_rate": 0.00011994690314855598, |
|
"epoch": 0.6735857518098533, |
|
"step": 751 |
|
}, |
|
{ |
|
"loss": 361.091, |
|
"grad_norm": 41.749717712402344, |
|
"learning_rate": 0.00011934530347609257, |
|
"epoch": 0.6744826702543405, |
|
"step": 752 |
|
}, |
|
{ |
|
"loss": 362.0817, |
|
"grad_norm": 39.51606369018555, |
|
"learning_rate": 0.00011874474289586895, |
|
"epoch": 0.6753795886988276, |
|
"step": 753 |
|
}, |
|
{ |
|
"loss": 356.8317, |
|
"grad_norm": 40.00758743286133, |
|
"learning_rate": 0.00011814522618412235, |
|
"epoch": 0.6762765071433148, |
|
"step": 754 |
|
}, |
|
{ |
|
"loss": 359.7722, |
|
"grad_norm": 41.676292419433594, |
|
"learning_rate": 0.00011754675810878845, |
|
"epoch": 0.6771734255878019, |
|
"step": 755 |
|
}, |
|
{ |
|
"loss": 359.641, |
|
"grad_norm": 41.25587463378906, |
|
"learning_rate": 0.00011694934342946287, |
|
"epoch": 0.678070344032289, |
|
"step": 756 |
|
}, |
|
{ |
|
"loss": 352.955, |
|
"grad_norm": 40.348514556884766, |
|
"learning_rate": 0.00011635298689736357, |
|
"epoch": 0.6789672624767762, |
|
"step": 757 |
|
}, |
|
{ |
|
"loss": 362.8987, |
|
"grad_norm": 43.387184143066406, |
|
"learning_rate": 0.00011575769325529342, |
|
"epoch": 0.6798641809212633, |
|
"step": 758 |
|
}, |
|
{ |
|
"loss": 357.0482, |
|
"grad_norm": 40.06668472290039, |
|
"learning_rate": 0.00011516346723760193, |
|
"epoch": 0.6807610993657506, |
|
"step": 759 |
|
}, |
|
{ |
|
"loss": 359.7377, |
|
"grad_norm": 39.39516830444336, |
|
"learning_rate": 0.00011457031357014772, |
|
"epoch": 0.6816580178102377, |
|
"step": 760 |
|
}, |
|
{ |
|
"loss": 362.0869, |
|
"grad_norm": 39.07398223876953, |
|
"learning_rate": 0.0001139782369702614, |
|
"epoch": 0.6825549362547249, |
|
"step": 761 |
|
}, |
|
{ |
|
"loss": 357.4482, |
|
"grad_norm": 42.54057312011719, |
|
"learning_rate": 0.00011338724214670734, |
|
"epoch": 0.683451854699212, |
|
"step": 762 |
|
}, |
|
{ |
|
"loss": 360.6057, |
|
"grad_norm": 40.7839241027832, |
|
"learning_rate": 0.00011279733379964691, |
|
"epoch": 0.6843487731436991, |
|
"step": 763 |
|
}, |
|
{ |
|
"loss": 362.9106, |
|
"grad_norm": 41.402889251708984, |
|
"learning_rate": 0.00011220851662060047, |
|
"epoch": 0.6852456915881863, |
|
"step": 764 |
|
}, |
|
{ |
|
"loss": 357.1811, |
|
"grad_norm": 41.3732795715332, |
|
"learning_rate": 0.00011162079529241042, |
|
"epoch": 0.6861426100326734, |
|
"step": 765 |
|
}, |
|
{ |
|
"loss": 358.0857, |
|
"grad_norm": 42.31522750854492, |
|
"learning_rate": 0.00011103417448920406, |
|
"epoch": 0.6870395284771607, |
|
"step": 766 |
|
}, |
|
{ |
|
"loss": 357.946, |
|
"grad_norm": 38.36897277832031, |
|
"learning_rate": 0.00011044865887635625, |
|
"epoch": 0.6879364469216478, |
|
"step": 767 |
|
}, |
|
{ |
|
"loss": 360.9647, |
|
"grad_norm": 43.01420974731445, |
|
"learning_rate": 0.00010986425311045212, |
|
"epoch": 0.6888333653661349, |
|
"step": 768 |
|
}, |
|
{ |
|
"loss": 362.1032, |
|
"grad_norm": 40.731163024902344, |
|
"learning_rate": 0.00010928096183925024, |
|
"epoch": 0.6897302838106221, |
|
"step": 769 |
|
}, |
|
{ |
|
"loss": 363.3222, |
|
"grad_norm": 41.69025421142578, |
|
"learning_rate": 0.00010869878970164587, |
|
"epoch": 0.6906272022551092, |
|
"step": 770 |
|
}, |
|
{ |
|
"loss": 358.3542, |
|
"grad_norm": 37.463043212890625, |
|
"learning_rate": 0.00010811774132763366, |
|
"epoch": 0.6915241206995963, |
|
"step": 771 |
|
}, |
|
{ |
|
"loss": 364.5648, |
|
"grad_norm": 38.481815338134766, |
|
"learning_rate": 0.00010753782133827093, |
|
"epoch": 0.6924210391440836, |
|
"step": 772 |
|
}, |
|
{ |
|
"loss": 361.0055, |
|
"grad_norm": 39.70282745361328, |
|
"learning_rate": 0.00010695903434564124, |
|
"epoch": 0.6933179575885707, |
|
"step": 773 |
|
}, |
|
{ |
|
"loss": 359.3154, |
|
"grad_norm": 38.182132720947266, |
|
"learning_rate": 0.00010638138495281725, |
|
"epoch": 0.6942148760330579, |
|
"step": 774 |
|
}, |
|
{ |
|
"loss": 356.322, |
|
"grad_norm": 37.12331008911133, |
|
"learning_rate": 0.00010580487775382449, |
|
"epoch": 0.695111794477545, |
|
"step": 775 |
|
}, |
|
{ |
|
"loss": 356.3972, |
|
"grad_norm": 40.065006256103516, |
|
"learning_rate": 0.00010522951733360456, |
|
"epoch": 0.6960087129220321, |
|
"step": 776 |
|
}, |
|
{ |
|
"loss": 351.4366, |
|
"grad_norm": 40.21229553222656, |
|
"learning_rate": 0.0001046553082679787, |
|
"epoch": 0.6969056313665193, |
|
"step": 777 |
|
}, |
|
{ |
|
"loss": 356.3872, |
|
"grad_norm": 39.17121124267578, |
|
"learning_rate": 0.00010408225512361171, |
|
"epoch": 0.6978025498110064, |
|
"step": 778 |
|
}, |
|
{ |
|
"loss": 358.5863, |
|
"grad_norm": 38.62257766723633, |
|
"learning_rate": 0.0001035103624579751, |
|
"epoch": 0.6986994682554937, |
|
"step": 779 |
|
}, |
|
{ |
|
"loss": 359.1902, |
|
"grad_norm": 39.73896408081055, |
|
"learning_rate": 0.00010293963481931143, |
|
"epoch": 0.6995963866999808, |
|
"step": 780 |
|
}, |
|
{ |
|
"loss": 357.0757, |
|
"grad_norm": 38.72207260131836, |
|
"learning_rate": 0.00010237007674659752, |
|
"epoch": 0.700493305144468, |
|
"step": 781 |
|
}, |
|
{ |
|
"loss": 359.07, |
|
"grad_norm": 39.15367126464844, |
|
"learning_rate": 0.00010180169276950899, |
|
"epoch": 0.7013902235889551, |
|
"step": 782 |
|
}, |
|
{ |
|
"loss": 357.7226, |
|
"grad_norm": 39.2513542175293, |
|
"learning_rate": 0.00010123448740838367, |
|
"epoch": 0.7022871420334422, |
|
"step": 783 |
|
}, |
|
{ |
|
"loss": 359.4571, |
|
"grad_norm": 41.660953521728516, |
|
"learning_rate": 0.00010066846517418596, |
|
"epoch": 0.7031840604779294, |
|
"step": 784 |
|
}, |
|
{ |
|
"loss": 358.3033, |
|
"grad_norm": 40.074806213378906, |
|
"learning_rate": 0.00010010363056847103, |
|
"epoch": 0.7040809789224165, |
|
"step": 785 |
|
}, |
|
{ |
|
"loss": 358.5859, |
|
"grad_norm": 40.53306198120117, |
|
"learning_rate": 9.953998808334874e-05, |
|
"epoch": 0.7049778973669037, |
|
"step": 786 |
|
}, |
|
{ |
|
"loss": 353.3639, |
|
"grad_norm": 43.58430099487305, |
|
"learning_rate": 9.8977542201448e-05, |
|
"epoch": 0.7058748158113909, |
|
"step": 787 |
|
}, |
|
{ |
|
"loss": 359.5676, |
|
"grad_norm": 39.986785888671875, |
|
"learning_rate": 9.841629739588145e-05, |
|
"epoch": 0.706771734255878, |
|
"step": 788 |
|
}, |
|
{ |
|
"loss": 361.0522, |
|
"grad_norm": 41.356590270996094, |
|
"learning_rate": 9.785625813020923e-05, |
|
"epoch": 0.7076686527003652, |
|
"step": 789 |
|
}, |
|
{ |
|
"loss": 355.244, |
|
"grad_norm": 40.596397399902344, |
|
"learning_rate": 9.729742885840429e-05, |
|
"epoch": 0.7085655711448523, |
|
"step": 790 |
|
}, |
|
{ |
|
"loss": 358.6471, |
|
"grad_norm": 39.8510627746582, |
|
"learning_rate": 9.673981402481619e-05, |
|
"epoch": 0.7094624895893394, |
|
"step": 791 |
|
}, |
|
{ |
|
"loss": 355.7997, |
|
"grad_norm": 37.443397521972656, |
|
"learning_rate": 9.618341806413614e-05, |
|
"epoch": 0.7103594080338267, |
|
"step": 792 |
|
}, |
|
{ |
|
"loss": 358.5055, |
|
"grad_norm": 38.937034606933594, |
|
"learning_rate": 9.562824540136192e-05, |
|
"epoch": 0.7112563264783138, |
|
"step": 793 |
|
}, |
|
{ |
|
"loss": 357.9367, |
|
"grad_norm": 39.378326416015625, |
|
"learning_rate": 9.507430045176238e-05, |
|
"epoch": 0.712153244922801, |
|
"step": 794 |
|
}, |
|
{ |
|
"loss": 356.7012, |
|
"grad_norm": 40.44821548461914, |
|
"learning_rate": 9.452158762084228e-05, |
|
"epoch": 0.7130501633672881, |
|
"step": 795 |
|
}, |
|
{ |
|
"loss": 361.7253, |
|
"grad_norm": 39.721378326416016, |
|
"learning_rate": 9.397011130430741e-05, |
|
"epoch": 0.7139470818117752, |
|
"step": 796 |
|
}, |
|
{ |
|
"loss": 359.5762, |
|
"grad_norm": 40.48420333862305, |
|
"learning_rate": 9.341987588802984e-05, |
|
"epoch": 0.7148440002562624, |
|
"step": 797 |
|
}, |
|
{ |
|
"loss": 355.1304, |
|
"grad_norm": 38.8956413269043, |
|
"learning_rate": 9.287088574801248e-05, |
|
"epoch": 0.7157409187007495, |
|
"step": 798 |
|
}, |
|
{ |
|
"loss": 360.5678, |
|
"grad_norm": 41.26605987548828, |
|
"learning_rate": 9.23231452503547e-05, |
|
"epoch": 0.7166378371452368, |
|
"step": 799 |
|
}, |
|
{ |
|
"loss": 359.8319, |
|
"grad_norm": 36.14881134033203, |
|
"learning_rate": 9.177665875121774e-05, |
|
"epoch": 0.7175347555897239, |
|
"step": 800 |
|
}, |
|
{ |
|
"eval_loss": 1.5968618392944336, |
|
"eval_runtime": 17.8479, |
|
"eval_samples_per_second": 114.747, |
|
"eval_steps_per_second": 14.343, |
|
"epoch": 0.7175347555897239, |
|
"step": 800 |
|
}, |
|
{ |
|
"loss": 361.1777, |
|
"grad_norm": 40.25320053100586, |
|
"learning_rate": 9.123143059678952e-05, |
|
"epoch": 0.718431674034211, |
|
"step": 801 |
|
}, |
|
{ |
|
"loss": 355.5561, |
|
"grad_norm": 39.248783111572266, |
|
"learning_rate": 9.068746512325046e-05, |
|
"epoch": 0.7193285924786982, |
|
"step": 802 |
|
}, |
|
{ |
|
"loss": 353.493, |
|
"grad_norm": 41.21136474609375, |
|
"learning_rate": 9.014476665673915e-05, |
|
"epoch": 0.7202255109231853, |
|
"step": 803 |
|
}, |
|
{ |
|
"loss": 355.8681, |
|
"grad_norm": 38.923973083496094, |
|
"learning_rate": 8.960333951331739e-05, |
|
"epoch": 0.7211224293676725, |
|
"step": 804 |
|
}, |
|
{ |
|
"loss": 355.0969, |
|
"grad_norm": 43.01164627075195, |
|
"learning_rate": 8.906318799893648e-05, |
|
"epoch": 0.7220193478121597, |
|
"step": 805 |
|
}, |
|
{ |
|
"loss": 354.1833, |
|
"grad_norm": 39.02459716796875, |
|
"learning_rate": 8.852431640940247e-05, |
|
"epoch": 0.7229162662566468, |
|
"step": 806 |
|
}, |
|
{ |
|
"loss": 359.125, |
|
"grad_norm": 37.63704299926758, |
|
"learning_rate": 8.798672903034225e-05, |
|
"epoch": 0.723813184701134, |
|
"step": 807 |
|
}, |
|
{ |
|
"loss": 355.6418, |
|
"grad_norm": 38.401512145996094, |
|
"learning_rate": 8.745043013716955e-05, |
|
"epoch": 0.7247101031456211, |
|
"step": 808 |
|
}, |
|
{ |
|
"loss": 358.6194, |
|
"grad_norm": 37.391685485839844, |
|
"learning_rate": 8.691542399505081e-05, |
|
"epoch": 0.7256070215901083, |
|
"step": 809 |
|
}, |
|
{ |
|
"loss": 359.1611, |
|
"grad_norm": 40.48008728027344, |
|
"learning_rate": 8.638171485887111e-05, |
|
"epoch": 0.7265039400345954, |
|
"step": 810 |
|
}, |
|
{ |
|
"loss": 359.4613, |
|
"grad_norm": 40.47174835205078, |
|
"learning_rate": 8.584930697320053e-05, |
|
"epoch": 0.7274008584790825, |
|
"step": 811 |
|
}, |
|
{ |
|
"loss": 351.1801, |
|
"grad_norm": 39.59210968017578, |
|
"learning_rate": 8.531820457226055e-05, |
|
"epoch": 0.7282977769235698, |
|
"step": 812 |
|
}, |
|
{ |
|
"loss": 355.662, |
|
"grad_norm": 36.89620590209961, |
|
"learning_rate": 8.478841187988992e-05, |
|
"epoch": 0.7291946953680569, |
|
"step": 813 |
|
}, |
|
{ |
|
"loss": 361.7194, |
|
"grad_norm": 38.956214904785156, |
|
"learning_rate": 8.425993310951132e-05, |
|
"epoch": 0.7300916138125441, |
|
"step": 814 |
|
}, |
|
{ |
|
"loss": 359.9547, |
|
"grad_norm": 36.15619659423828, |
|
"learning_rate": 8.373277246409818e-05, |
|
"epoch": 0.7309885322570312, |
|
"step": 815 |
|
}, |
|
{ |
|
"loss": 353.2803, |
|
"grad_norm": 41.085899353027344, |
|
"learning_rate": 8.320693413614053e-05, |
|
"epoch": 0.7318854507015183, |
|
"step": 816 |
|
}, |
|
{ |
|
"loss": 356.6743, |
|
"grad_norm": 40.31721878051758, |
|
"learning_rate": 8.268242230761239e-05, |
|
"epoch": 0.7327823691460055, |
|
"step": 817 |
|
}, |
|
{ |
|
"loss": 356.205, |
|
"grad_norm": 41.351558685302734, |
|
"learning_rate": 8.215924114993792e-05, |
|
"epoch": 0.7336792875904926, |
|
"step": 818 |
|
}, |
|
{ |
|
"loss": 360.4526, |
|
"grad_norm": 39.119476318359375, |
|
"learning_rate": 8.163739482395851e-05, |
|
"epoch": 0.7345762060349799, |
|
"step": 819 |
|
}, |
|
{ |
|
"loss": 361.5057, |
|
"grad_norm": 38.80229949951172, |
|
"learning_rate": 8.111688747990001e-05, |
|
"epoch": 0.735473124479467, |
|
"step": 820 |
|
}, |
|
{ |
|
"loss": 352.7518, |
|
"grad_norm": 40.22185134887695, |
|
"learning_rate": 8.059772325733899e-05, |
|
"epoch": 0.7363700429239541, |
|
"step": 821 |
|
}, |
|
{ |
|
"loss": 356.2066, |
|
"grad_norm": 40.426979064941406, |
|
"learning_rate": 8.007990628517034e-05, |
|
"epoch": 0.7372669613684413, |
|
"step": 822 |
|
}, |
|
{ |
|
"loss": 358.5974, |
|
"grad_norm": 39.50589370727539, |
|
"learning_rate": 7.956344068157443e-05, |
|
"epoch": 0.7381638798129284, |
|
"step": 823 |
|
}, |
|
{ |
|
"loss": 360.1032, |
|
"grad_norm": 38.537113189697266, |
|
"learning_rate": 7.904833055398428e-05, |
|
"epoch": 0.7390607982574156, |
|
"step": 824 |
|
}, |
|
{ |
|
"loss": 358.6521, |
|
"grad_norm": 38.09297180175781, |
|
"learning_rate": 7.853457999905264e-05, |
|
"epoch": 0.7399577167019028, |
|
"step": 825 |
|
}, |
|
{ |
|
"loss": 358.724, |
|
"grad_norm": 38.27792739868164, |
|
"learning_rate": 7.802219310261965e-05, |
|
"epoch": 0.7408546351463899, |
|
"step": 826 |
|
}, |
|
{ |
|
"loss": 361.0538, |
|
"grad_norm": 40.946353912353516, |
|
"learning_rate": 7.75111739396806e-05, |
|
"epoch": 0.7417515535908771, |
|
"step": 827 |
|
}, |
|
{ |
|
"loss": 354.2574, |
|
"grad_norm": 37.80830764770508, |
|
"learning_rate": 7.700152657435297e-05, |
|
"epoch": 0.7426484720353642, |
|
"step": 828 |
|
}, |
|
{ |
|
"loss": 356.4567, |
|
"grad_norm": 39.698429107666016, |
|
"learning_rate": 7.649325505984434e-05, |
|
"epoch": 0.7435453904798514, |
|
"step": 829 |
|
}, |
|
{ |
|
"loss": 355.0162, |
|
"grad_norm": 38.21966552734375, |
|
"learning_rate": 7.598636343842053e-05, |
|
"epoch": 0.7444423089243385, |
|
"step": 830 |
|
}, |
|
{ |
|
"loss": 356.4822, |
|
"grad_norm": 39.37642288208008, |
|
"learning_rate": 7.548085574137273e-05, |
|
"epoch": 0.7453392273688256, |
|
"step": 831 |
|
}, |
|
{ |
|
"loss": 357.8192, |
|
"grad_norm": 37.3087158203125, |
|
"learning_rate": 7.497673598898613e-05, |
|
"epoch": 0.7462361458133129, |
|
"step": 832 |
|
}, |
|
{ |
|
"loss": 363.7517, |
|
"grad_norm": 35.9515266418457, |
|
"learning_rate": 7.447400819050751e-05, |
|
"epoch": 0.7471330642578, |
|
"step": 833 |
|
}, |
|
{ |
|
"loss": 355.3728, |
|
"grad_norm": 36.964534759521484, |
|
"learning_rate": 7.397267634411337e-05, |
|
"epoch": 0.7480299827022872, |
|
"step": 834 |
|
}, |
|
{ |
|
"loss": 354.5074, |
|
"grad_norm": 39.167415618896484, |
|
"learning_rate": 7.347274443687855e-05, |
|
"epoch": 0.7489269011467743, |
|
"step": 835 |
|
}, |
|
{ |
|
"loss": 361.1248, |
|
"grad_norm": 40.1679801940918, |
|
"learning_rate": 7.297421644474387e-05, |
|
"epoch": 0.7498238195912614, |
|
"step": 836 |
|
}, |
|
{ |
|
"loss": 357.9431, |
|
"grad_norm": 38.67217254638672, |
|
"learning_rate": 7.247709633248526e-05, |
|
"epoch": 0.7507207380357486, |
|
"step": 837 |
|
}, |
|
{ |
|
"loss": 360.9297, |
|
"grad_norm": 37.734153747558594, |
|
"learning_rate": 7.198138805368143e-05, |
|
"epoch": 0.7516176564802357, |
|
"step": 838 |
|
}, |
|
{ |
|
"loss": 350.7899, |
|
"grad_norm": 36.58796691894531, |
|
"learning_rate": 7.148709555068314e-05, |
|
"epoch": 0.752514574924723, |
|
"step": 839 |
|
}, |
|
{ |
|
"loss": 358.5099, |
|
"grad_norm": 37.6004753112793, |
|
"learning_rate": 7.09942227545814e-05, |
|
"epoch": 0.7534114933692101, |
|
"step": 840 |
|
}, |
|
{ |
|
"loss": 350.2813, |
|
"grad_norm": 39.31602096557617, |
|
"learning_rate": 7.05027735851762e-05, |
|
"epoch": 0.7543084118136972, |
|
"step": 841 |
|
}, |
|
{ |
|
"loss": 361.4473, |
|
"grad_norm": 37.72463607788086, |
|
"learning_rate": 7.001275195094581e-05, |
|
"epoch": 0.7552053302581844, |
|
"step": 842 |
|
}, |
|
{ |
|
"loss": 356.7912, |
|
"grad_norm": 36.68344497680664, |
|
"learning_rate": 6.952416174901504e-05, |
|
"epoch": 0.7561022487026715, |
|
"step": 843 |
|
}, |
|
{ |
|
"loss": 360.7002, |
|
"grad_norm": 39.82998275756836, |
|
"learning_rate": 6.903700686512485e-05, |
|
"epoch": 0.7569991671471586, |
|
"step": 844 |
|
}, |
|
{ |
|
"loss": 357.1058, |
|
"grad_norm": 39.26710510253906, |
|
"learning_rate": 6.855129117360095e-05, |
|
"epoch": 0.7578960855916459, |
|
"step": 845 |
|
}, |
|
{ |
|
"loss": 356.4349, |
|
"grad_norm": 37.95897674560547, |
|
"learning_rate": 6.806701853732319e-05, |
|
"epoch": 0.758793004036133, |
|
"step": 846 |
|
}, |
|
{ |
|
"loss": 353.9336, |
|
"grad_norm": 36.72467041015625, |
|
"learning_rate": 6.75841928076951e-05, |
|
"epoch": 0.7596899224806202, |
|
"step": 847 |
|
}, |
|
{ |
|
"loss": 355.9283, |
|
"grad_norm": 38.29819869995117, |
|
"learning_rate": 6.710281782461275e-05, |
|
"epoch": 0.7605868409251073, |
|
"step": 848 |
|
}, |
|
{ |
|
"loss": 357.5876, |
|
"grad_norm": 39.196720123291016, |
|
"learning_rate": 6.662289741643454e-05, |
|
"epoch": 0.7614837593695944, |
|
"step": 849 |
|
}, |
|
{ |
|
"loss": 359.8077, |
|
"grad_norm": 40.00128936767578, |
|
"learning_rate": 6.614443539995078e-05, |
|
"epoch": 0.7623806778140816, |
|
"step": 850 |
|
}, |
|
{ |
|
"eval_loss": 1.582360863685608, |
|
"eval_runtime": 18.4592, |
|
"eval_samples_per_second": 110.947, |
|
"eval_steps_per_second": 13.868, |
|
"epoch": 0.7623806778140816, |
|
"step": 850 |
|
}, |
|
{ |
|
"loss": 355.6048, |
|
"grad_norm": 38.59453582763672, |
|
"learning_rate": 6.56674355803532e-05, |
|
"epoch": 0.7632775962585687, |
|
"step": 851 |
|
}, |
|
{ |
|
"loss": 360.1093, |
|
"grad_norm": 39.37229537963867, |
|
"learning_rate": 6.519190175120473e-05, |
|
"epoch": 0.764174514703056, |
|
"step": 852 |
|
}, |
|
{ |
|
"loss": 357.6195, |
|
"grad_norm": 36.07246017456055, |
|
"learning_rate": 6.47178376944092e-05, |
|
"epoch": 0.7650714331475431, |
|
"step": 853 |
|
}, |
|
{ |
|
"loss": 357.4596, |
|
"grad_norm": 36.77618408203125, |
|
"learning_rate": 6.424524718018163e-05, |
|
"epoch": 0.7659683515920302, |
|
"step": 854 |
|
}, |
|
{ |
|
"loss": 359.593, |
|
"grad_norm": 36.766483306884766, |
|
"learning_rate": 6.377413396701781e-05, |
|
"epoch": 0.7668652700365174, |
|
"step": 855 |
|
}, |
|
{ |
|
"loss": 356.4777, |
|
"grad_norm": 43.47877502441406, |
|
"learning_rate": 6.330450180166464e-05, |
|
"epoch": 0.7677621884810045, |
|
"step": 856 |
|
}, |
|
{ |
|
"loss": 353.8591, |
|
"grad_norm": 39.65815353393555, |
|
"learning_rate": 6.283635441909044e-05, |
|
"epoch": 0.7686591069254917, |
|
"step": 857 |
|
}, |
|
{ |
|
"loss": 358.9107, |
|
"grad_norm": 42.22090148925781, |
|
"learning_rate": 6.236969554245486e-05, |
|
"epoch": 0.7695560253699789, |
|
"step": 858 |
|
}, |
|
{ |
|
"loss": 361.3808, |
|
"grad_norm": 37.009342193603516, |
|
"learning_rate": 6.19045288830798e-05, |
|
"epoch": 0.770452943814466, |
|
"step": 859 |
|
}, |
|
{ |
|
"loss": 359.7101, |
|
"grad_norm": 36.62922668457031, |
|
"learning_rate": 6.144085814041941e-05, |
|
"epoch": 0.7713498622589532, |
|
"step": 860 |
|
}, |
|
{ |
|
"loss": 360.3506, |
|
"grad_norm": 35.92998123168945, |
|
"learning_rate": 6.097868700203082e-05, |
|
"epoch": 0.7722467807034403, |
|
"step": 861 |
|
}, |
|
{ |
|
"loss": 352.6364, |
|
"grad_norm": 40.08286666870117, |
|
"learning_rate": 6.05180191435451e-05, |
|
"epoch": 0.7731436991479275, |
|
"step": 862 |
|
}, |
|
{ |
|
"loss": 356.8879, |
|
"grad_norm": 38.76757049560547, |
|
"learning_rate": 6.0058858228637605e-05, |
|
"epoch": 0.7740406175924146, |
|
"step": 863 |
|
}, |
|
{ |
|
"loss": 355.7852, |
|
"grad_norm": 37.80318069458008, |
|
"learning_rate": 5.960120790899895e-05, |
|
"epoch": 0.7749375360369017, |
|
"step": 864 |
|
}, |
|
{ |
|
"loss": 357.245, |
|
"grad_norm": 36.61247253417969, |
|
"learning_rate": 5.914507182430626e-05, |
|
"epoch": 0.775834454481389, |
|
"step": 865 |
|
}, |
|
{ |
|
"loss": 355.3506, |
|
"grad_norm": 37.76987838745117, |
|
"learning_rate": 5.869045360219391e-05, |
|
"epoch": 0.7767313729258761, |
|
"step": 866 |
|
}, |
|
{ |
|
"loss": 351.2185, |
|
"grad_norm": 37.881492614746094, |
|
"learning_rate": 5.8237356858224704e-05, |
|
"epoch": 0.7776282913703633, |
|
"step": 867 |
|
}, |
|
{ |
|
"loss": 360.2768, |
|
"grad_norm": 39.45249557495117, |
|
"learning_rate": 5.7785785195861194e-05, |
|
"epoch": 0.7785252098148504, |
|
"step": 868 |
|
}, |
|
{ |
|
"loss": 353.9251, |
|
"grad_norm": 39.94224548339844, |
|
"learning_rate": 5.733574220643712e-05, |
|
"epoch": 0.7794221282593375, |
|
"step": 869 |
|
}, |
|
{ |
|
"loss": 355.1441, |
|
"grad_norm": 37.91038513183594, |
|
"learning_rate": 5.688723146912858e-05, |
|
"epoch": 0.7803190467038247, |
|
"step": 870 |
|
}, |
|
{ |
|
"loss": 359.303, |
|
"grad_norm": 36.14017105102539, |
|
"learning_rate": 5.644025655092591e-05, |
|
"epoch": 0.7812159651483118, |
|
"step": 871 |
|
}, |
|
{ |
|
"loss": 359.8912, |
|
"grad_norm": 37.15394592285156, |
|
"learning_rate": 5.5994821006604965e-05, |
|
"epoch": 0.7821128835927991, |
|
"step": 872 |
|
}, |
|
{ |
|
"loss": 360.2237, |
|
"grad_norm": 35.74496078491211, |
|
"learning_rate": 5.555092837869902e-05, |
|
"epoch": 0.7830098020372862, |
|
"step": 873 |
|
}, |
|
{ |
|
"loss": 352.0333, |
|
"grad_norm": 37.32427215576172, |
|
"learning_rate": 5.5108582197470784e-05, |
|
"epoch": 0.7839067204817733, |
|
"step": 874 |
|
}, |
|
{ |
|
"loss": 359.9949, |
|
"grad_norm": 40.355411529541016, |
|
"learning_rate": 5.4667785980883897e-05, |
|
"epoch": 0.7848036389262605, |
|
"step": 875 |
|
}, |
|
{ |
|
"loss": 351.2752, |
|
"grad_norm": 36.727745056152344, |
|
"learning_rate": 5.422854323457527e-05, |
|
"epoch": 0.7857005573707476, |
|
"step": 876 |
|
}, |
|
{ |
|
"loss": 352.9948, |
|
"grad_norm": 37.40601348876953, |
|
"learning_rate": 5.379085745182721e-05, |
|
"epoch": 0.7865974758152348, |
|
"step": 877 |
|
}, |
|
{ |
|
"loss": 357.7682, |
|
"grad_norm": 36.147159576416016, |
|
"learning_rate": 5.335473211353942e-05, |
|
"epoch": 0.787494394259722, |
|
"step": 878 |
|
}, |
|
{ |
|
"loss": 360.3233, |
|
"grad_norm": 36.26030349731445, |
|
"learning_rate": 5.29201706882014e-05, |
|
"epoch": 0.7883913127042091, |
|
"step": 879 |
|
}, |
|
{ |
|
"loss": 354.8234, |
|
"grad_norm": 34.958744049072266, |
|
"learning_rate": 5.2487176631865114e-05, |
|
"epoch": 0.7892882311486963, |
|
"step": 880 |
|
}, |
|
{ |
|
"loss": 358.086, |
|
"grad_norm": 36.89348602294922, |
|
"learning_rate": 5.205575338811719e-05, |
|
"epoch": 0.7901851495931834, |
|
"step": 881 |
|
}, |
|
{ |
|
"loss": 357.6668, |
|
"grad_norm": 39.996177673339844, |
|
"learning_rate": 5.1625904388051564e-05, |
|
"epoch": 0.7910820680376706, |
|
"step": 882 |
|
}, |
|
{ |
|
"loss": 353.7882, |
|
"grad_norm": 36.440711975097656, |
|
"learning_rate": 5.119763305024225e-05, |
|
"epoch": 0.7919789864821577, |
|
"step": 883 |
|
}, |
|
{ |
|
"loss": 356.1277, |
|
"grad_norm": 36.0537223815918, |
|
"learning_rate": 5.077094278071642e-05, |
|
"epoch": 0.7928759049266448, |
|
"step": 884 |
|
}, |
|
{ |
|
"loss": 359.5157, |
|
"grad_norm": 35.76783752441406, |
|
"learning_rate": 5.034583697292674e-05, |
|
"epoch": 0.7937728233711321, |
|
"step": 885 |
|
}, |
|
{ |
|
"loss": 353.6391, |
|
"grad_norm": 34.94169998168945, |
|
"learning_rate": 4.9922319007724954e-05, |
|
"epoch": 0.7946697418156192, |
|
"step": 886 |
|
}, |
|
{ |
|
"loss": 361.0958, |
|
"grad_norm": 38.87442398071289, |
|
"learning_rate": 4.9500392253334635e-05, |
|
"epoch": 0.7955666602601064, |
|
"step": 887 |
|
}, |
|
{ |
|
"loss": 357.8425, |
|
"grad_norm": 36.01359558105469, |
|
"learning_rate": 4.908006006532445e-05, |
|
"epoch": 0.7964635787045935, |
|
"step": 888 |
|
}, |
|
{ |
|
"loss": 358.4057, |
|
"grad_norm": 39.11752700805664, |
|
"learning_rate": 4.866132578658172e-05, |
|
"epoch": 0.7973604971490806, |
|
"step": 889 |
|
}, |
|
{ |
|
"loss": 355.1286, |
|
"grad_norm": 37.169158935546875, |
|
"learning_rate": 4.8244192747285507e-05, |
|
"epoch": 0.7982574155935678, |
|
"step": 890 |
|
}, |
|
{ |
|
"loss": 356.0285, |
|
"grad_norm": 35.89703369140625, |
|
"learning_rate": 4.7828664264880254e-05, |
|
"epoch": 0.7991543340380549, |
|
"step": 891 |
|
}, |
|
{ |
|
"loss": 353.9138, |
|
"grad_norm": 35.52785873413086, |
|
"learning_rate": 4.741474364404955e-05, |
|
"epoch": 0.8000512524825422, |
|
"step": 892 |
|
}, |
|
{ |
|
"loss": 359.8646, |
|
"grad_norm": 35.992713928222656, |
|
"learning_rate": 4.7002434176689564e-05, |
|
"epoch": 0.8009481709270293, |
|
"step": 893 |
|
}, |
|
{ |
|
"loss": 360.1763, |
|
"grad_norm": 36.50730514526367, |
|
"learning_rate": 4.659173914188319e-05, |
|
"epoch": 0.8018450893715164, |
|
"step": 894 |
|
}, |
|
{ |
|
"loss": 356.7962, |
|
"grad_norm": 36.77907180786133, |
|
"learning_rate": 4.618266180587363e-05, |
|
"epoch": 0.8027420078160036, |
|
"step": 895 |
|
}, |
|
{ |
|
"loss": 354.5534, |
|
"grad_norm": 36.69013214111328, |
|
"learning_rate": 4.5775205422038695e-05, |
|
"epoch": 0.8036389262604907, |
|
"step": 896 |
|
}, |
|
{ |
|
"loss": 355.8555, |
|
"grad_norm": 36.079769134521484, |
|
"learning_rate": 4.536937323086479e-05, |
|
"epoch": 0.8045358447049779, |
|
"step": 897 |
|
}, |
|
{ |
|
"loss": 352.4216, |
|
"grad_norm": 36.98958969116211, |
|
"learning_rate": 4.4965168459921076e-05, |
|
"epoch": 0.8054327631494651, |
|
"step": 898 |
|
}, |
|
{ |
|
"loss": 354.3763, |
|
"grad_norm": 36.339656829833984, |
|
"learning_rate": 4.456259432383408e-05, |
|
"epoch": 0.8063296815939522, |
|
"step": 899 |
|
}, |
|
{ |
|
"loss": 353.9048, |
|
"grad_norm": 35.602909088134766, |
|
"learning_rate": 4.4161654024261756e-05, |
|
"epoch": 0.8072266000384394, |
|
"step": 900 |
|
}, |
|
{ |
|
"eval_loss": 1.581258773803711, |
|
"eval_runtime": 19.1453, |
|
"eval_samples_per_second": 106.971, |
|
"eval_steps_per_second": 13.371, |
|
"epoch": 0.8072266000384394, |
|
"step": 900 |
|
}, |
|
{ |
|
"loss": 353.9864, |
|
"grad_norm": 37.425819396972656, |
|
"learning_rate": 4.3762350749868425e-05, |
|
"epoch": 0.8081235184829265, |
|
"step": 901 |
|
}, |
|
{ |
|
"loss": 352.1746, |
|
"grad_norm": 36.96770095825195, |
|
"learning_rate": 4.336468767629906e-05, |
|
"epoch": 0.8090204369274137, |
|
"step": 902 |
|
}, |
|
{ |
|
"loss": 362.0162, |
|
"grad_norm": 36.64163589477539, |
|
"learning_rate": 4.296866796615406e-05, |
|
"epoch": 0.8099173553719008, |
|
"step": 903 |
|
}, |
|
{ |
|
"loss": 356.8323, |
|
"grad_norm": 37.755550384521484, |
|
"learning_rate": 4.257429476896454e-05, |
|
"epoch": 0.8108142738163879, |
|
"step": 904 |
|
}, |
|
{ |
|
"loss": 355.0851, |
|
"grad_norm": 35.74870300292969, |
|
"learning_rate": 4.2181571221166696e-05, |
|
"epoch": 0.8117111922608752, |
|
"step": 905 |
|
}, |
|
{ |
|
"loss": 354.1617, |
|
"grad_norm": 35.670047760009766, |
|
"learning_rate": 4.179050044607713e-05, |
|
"epoch": 0.8126081107053623, |
|
"step": 906 |
|
}, |
|
{ |
|
"loss": 354.9214, |
|
"grad_norm": 36.92220687866211, |
|
"learning_rate": 4.140108555386812e-05, |
|
"epoch": 0.8135050291498495, |
|
"step": 907 |
|
}, |
|
{ |
|
"loss": 351.6111, |
|
"grad_norm": 38.204166412353516, |
|
"learning_rate": 4.101332964154275e-05, |
|
"epoch": 0.8144019475943366, |
|
"step": 908 |
|
}, |
|
{ |
|
"loss": 355.9622, |
|
"grad_norm": 35.54768753051758, |
|
"learning_rate": 4.0627235792910224e-05, |
|
"epoch": 0.8152988660388237, |
|
"step": 909 |
|
}, |
|
{ |
|
"loss": 359.8922, |
|
"grad_norm": 37.4915771484375, |
|
"learning_rate": 4.024280707856134e-05, |
|
"epoch": 0.8161957844833109, |
|
"step": 910 |
|
}, |
|
{ |
|
"loss": 356.2166, |
|
"grad_norm": 36.84100341796875, |
|
"learning_rate": 3.9860046555844406e-05, |
|
"epoch": 0.8170927029277981, |
|
"step": 911 |
|
}, |
|
{ |
|
"loss": 355.0562, |
|
"grad_norm": 35.636878967285156, |
|
"learning_rate": 3.947895726884038e-05, |
|
"epoch": 0.8179896213722853, |
|
"step": 912 |
|
}, |
|
{ |
|
"loss": 360.0903, |
|
"grad_norm": 36.50727081298828, |
|
"learning_rate": 3.909954224833911e-05, |
|
"epoch": 0.8188865398167724, |
|
"step": 913 |
|
}, |
|
{ |
|
"loss": 359.0554, |
|
"grad_norm": 37.51554489135742, |
|
"learning_rate": 3.8721804511815007e-05, |
|
"epoch": 0.8197834582612595, |
|
"step": 914 |
|
}, |
|
{ |
|
"loss": 356.6491, |
|
"grad_norm": 36.2037239074707, |
|
"learning_rate": 3.834574706340302e-05, |
|
"epoch": 0.8206803767057467, |
|
"step": 915 |
|
}, |
|
{ |
|
"loss": 357.358, |
|
"grad_norm": 39.62883758544922, |
|
"learning_rate": 3.797137289387503e-05, |
|
"epoch": 0.8215772951502338, |
|
"step": 916 |
|
}, |
|
{ |
|
"loss": 356.6225, |
|
"grad_norm": 35.792728424072266, |
|
"learning_rate": 3.7598684980615694e-05, |
|
"epoch": 0.822474213594721, |
|
"step": 917 |
|
}, |
|
{ |
|
"loss": 351.0151, |
|
"grad_norm": 35.77069854736328, |
|
"learning_rate": 3.7227686287598874e-05, |
|
"epoch": 0.8233711320392082, |
|
"step": 918 |
|
}, |
|
{ |
|
"loss": 356.1569, |
|
"grad_norm": 36.655330657958984, |
|
"learning_rate": 3.685837976536435e-05, |
|
"epoch": 0.8242680504836953, |
|
"step": 919 |
|
}, |
|
{ |
|
"loss": 356.6186, |
|
"grad_norm": 35.82206726074219, |
|
"learning_rate": 3.649076835099399e-05, |
|
"epoch": 0.8251649689281825, |
|
"step": 920 |
|
}, |
|
{ |
|
"loss": 352.9849, |
|
"grad_norm": 36.314361572265625, |
|
"learning_rate": 3.612485496808843e-05, |
|
"epoch": 0.8260618873726696, |
|
"step": 921 |
|
}, |
|
{ |
|
"loss": 355.4819, |
|
"grad_norm": 37.96638870239258, |
|
"learning_rate": 3.57606425267441e-05, |
|
"epoch": 0.8269588058171568, |
|
"step": 922 |
|
}, |
|
{ |
|
"loss": 358.6233, |
|
"grad_norm": 36.10899353027344, |
|
"learning_rate": 3.539813392352989e-05, |
|
"epoch": 0.8278557242616439, |
|
"step": 923 |
|
}, |
|
{ |
|
"loss": 353.3172, |
|
"grad_norm": 34.54022216796875, |
|
"learning_rate": 3.5037332041464e-05, |
|
"epoch": 0.828752642706131, |
|
"step": 924 |
|
}, |
|
{ |
|
"loss": 357.7184, |
|
"grad_norm": 36.95024108886719, |
|
"learning_rate": 3.467823974999115e-05, |
|
"epoch": 0.8296495611506183, |
|
"step": 925 |
|
}, |
|
{ |
|
"loss": 352.9876, |
|
"grad_norm": 37.89804458618164, |
|
"learning_rate": 3.4320859904959924e-05, |
|
"epoch": 0.8305464795951054, |
|
"step": 926 |
|
}, |
|
{ |
|
"loss": 354.4651, |
|
"grad_norm": 36.63965606689453, |
|
"learning_rate": 3.3965195348599626e-05, |
|
"epoch": 0.8314433980395926, |
|
"step": 927 |
|
}, |
|
{ |
|
"loss": 356.9139, |
|
"grad_norm": 35.67973709106445, |
|
"learning_rate": 3.361124890949816e-05, |
|
"epoch": 0.8323403164840797, |
|
"step": 928 |
|
}, |
|
{ |
|
"loss": 358.1943, |
|
"grad_norm": 35.843719482421875, |
|
"learning_rate": 3.325902340257914e-05, |
|
"epoch": 0.8332372349285668, |
|
"step": 929 |
|
}, |
|
{ |
|
"loss": 352.4489, |
|
"grad_norm": 36.6231803894043, |
|
"learning_rate": 3.2908521629079704e-05, |
|
"epoch": 0.834134153373054, |
|
"step": 930 |
|
}, |
|
{ |
|
"loss": 350.1209, |
|
"grad_norm": 34.934112548828125, |
|
"learning_rate": 3.255974637652828e-05, |
|
"epoch": 0.8350310718175412, |
|
"step": 931 |
|
}, |
|
{ |
|
"loss": 356.8803, |
|
"grad_norm": 34.707252502441406, |
|
"learning_rate": 3.2212700418722265e-05, |
|
"epoch": 0.8359279902620284, |
|
"step": 932 |
|
}, |
|
{ |
|
"loss": 356.7214, |
|
"grad_norm": 35.543949127197266, |
|
"learning_rate": 3.186738651570595e-05, |
|
"epoch": 0.8368249087065155, |
|
"step": 933 |
|
}, |
|
{ |
|
"loss": 354.0534, |
|
"grad_norm": 35.74333572387695, |
|
"learning_rate": 3.1523807413748887e-05, |
|
"epoch": 0.8377218271510026, |
|
"step": 934 |
|
}, |
|
{ |
|
"loss": 350.9949, |
|
"grad_norm": 36.81149673461914, |
|
"learning_rate": 3.118196584532359e-05, |
|
"epoch": 0.8386187455954898, |
|
"step": 935 |
|
}, |
|
{ |
|
"loss": 355.0341, |
|
"grad_norm": 36.43380355834961, |
|
"learning_rate": 3.084186452908411e-05, |
|
"epoch": 0.8395156640399769, |
|
"step": 936 |
|
}, |
|
{ |
|
"loss": 357.6827, |
|
"grad_norm": 35.787872314453125, |
|
"learning_rate": 3.0503506169844373e-05, |
|
"epoch": 0.840412582484464, |
|
"step": 937 |
|
}, |
|
{ |
|
"loss": 353.5415, |
|
"grad_norm": 35.96485137939453, |
|
"learning_rate": 3.0166893458556666e-05, |
|
"epoch": 0.8413095009289513, |
|
"step": 938 |
|
}, |
|
{ |
|
"loss": 357.3773, |
|
"grad_norm": 33.9022216796875, |
|
"learning_rate": 2.983202907228999e-05, |
|
"epoch": 0.8422064193734384, |
|
"step": 939 |
|
}, |
|
{ |
|
"loss": 355.6847, |
|
"grad_norm": 36.94380187988281, |
|
"learning_rate": 2.949891567420923e-05, |
|
"epoch": 0.8431033378179256, |
|
"step": 940 |
|
}, |
|
{ |
|
"loss": 352.4488, |
|
"grad_norm": 36.33073043823242, |
|
"learning_rate": 2.9167555913553577e-05, |
|
"epoch": 0.8440002562624127, |
|
"step": 941 |
|
}, |
|
{ |
|
"loss": 355.2479, |
|
"grad_norm": 34.81533432006836, |
|
"learning_rate": 2.88379524256156e-05, |
|
"epoch": 0.8448971747068998, |
|
"step": 942 |
|
}, |
|
{ |
|
"loss": 359.0098, |
|
"grad_norm": 34.85913848876953, |
|
"learning_rate": 2.8510107831720393e-05, |
|
"epoch": 0.845794093151387, |
|
"step": 943 |
|
}, |
|
{ |
|
"loss": 355.3041, |
|
"grad_norm": 35.2500114440918, |
|
"learning_rate": 2.8184024739204534e-05, |
|
"epoch": 0.8466910115958741, |
|
"step": 944 |
|
}, |
|
{ |
|
"loss": 357.6105, |
|
"grad_norm": 36.625144958496094, |
|
"learning_rate": 2.7859705741395403e-05, |
|
"epoch": 0.8475879300403614, |
|
"step": 945 |
|
}, |
|
{ |
|
"loss": 355.7482, |
|
"grad_norm": 34.630428314208984, |
|
"learning_rate": 2.7537153417590803e-05, |
|
"epoch": 0.8484848484848485, |
|
"step": 946 |
|
}, |
|
{ |
|
"loss": 358.0374, |
|
"grad_norm": 35.17256164550781, |
|
"learning_rate": 2.721637033303803e-05, |
|
"epoch": 0.8493817669293356, |
|
"step": 947 |
|
}, |
|
{ |
|
"loss": 352.4902, |
|
"grad_norm": 36.90748596191406, |
|
"learning_rate": 2.6897359038913716e-05, |
|
"epoch": 0.8502786853738228, |
|
"step": 948 |
|
}, |
|
{ |
|
"loss": 356.3272, |
|
"grad_norm": 35.69559097290039, |
|
"learning_rate": 2.6580122072303647e-05, |
|
"epoch": 0.8511756038183099, |
|
"step": 949 |
|
}, |
|
{ |
|
"loss": 351.9118, |
|
"grad_norm": 34.44248580932617, |
|
"learning_rate": 2.6264661956182212e-05, |
|
"epoch": 0.8520725222627971, |
|
"step": 950 |
|
}, |
|
{ |
|
"eval_loss": 1.5959553718566895, |
|
"eval_runtime": 18.4817, |
|
"eval_samples_per_second": 110.812, |
|
"eval_steps_per_second": 13.852, |
|
"epoch": 0.8520725222627971, |
|
"step": 950 |
|
}, |
|
{ |
|
"loss": 356.2447, |
|
"grad_norm": 34.08928680419922, |
|
"learning_rate": 2.5950981199392847e-05, |
|
"epoch": 0.8529694407072843, |
|
"step": 951 |
|
}, |
|
{ |
|
"loss": 357.2951, |
|
"grad_norm": 35.93143844604492, |
|
"learning_rate": 2.5639082296627537e-05, |
|
"epoch": 0.8538663591517714, |
|
"step": 952 |
|
}, |
|
{ |
|
"loss": 357.1935, |
|
"grad_norm": 34.351898193359375, |
|
"learning_rate": 2.5328967728407454e-05, |
|
"epoch": 0.8547632775962586, |
|
"step": 953 |
|
}, |
|
{ |
|
"loss": 352.3139, |
|
"grad_norm": 36.010223388671875, |
|
"learning_rate": 2.5020639961062853e-05, |
|
"epoch": 0.8556601960407457, |
|
"step": 954 |
|
}, |
|
{ |
|
"loss": 356.4665, |
|
"grad_norm": 34.825042724609375, |
|
"learning_rate": 2.4714101446713793e-05, |
|
"epoch": 0.8565571144852329, |
|
"step": 955 |
|
}, |
|
{ |
|
"loss": 354.6561, |
|
"grad_norm": 35.965755462646484, |
|
"learning_rate": 2.4409354623250307e-05, |
|
"epoch": 0.85745403292972, |
|
"step": 956 |
|
}, |
|
{ |
|
"loss": 350.8446, |
|
"grad_norm": 34.73567199707031, |
|
"learning_rate": 2.4106401914313238e-05, |
|
"epoch": 0.8583509513742071, |
|
"step": 957 |
|
}, |
|
{ |
|
"loss": 357.6875, |
|
"grad_norm": 34.63365936279297, |
|
"learning_rate": 2.3805245729274947e-05, |
|
"epoch": 0.8592478698186944, |
|
"step": 958 |
|
}, |
|
{ |
|
"loss": 352.3867, |
|
"grad_norm": 37.33460235595703, |
|
"learning_rate": 2.3505888463220047e-05, |
|
"epoch": 0.8601447882631815, |
|
"step": 959 |
|
}, |
|
{ |
|
"loss": 357.7318, |
|
"grad_norm": 35.54653549194336, |
|
"learning_rate": 2.3208332496926387e-05, |
|
"epoch": 0.8610417067076687, |
|
"step": 960 |
|
}, |
|
{ |
|
"loss": 356.5225, |
|
"grad_norm": 34.780433654785156, |
|
"learning_rate": 2.2912580196846222e-05, |
|
"epoch": 0.8619386251521558, |
|
"step": 961 |
|
}, |
|
{ |
|
"loss": 358.1692, |
|
"grad_norm": 37.751983642578125, |
|
"learning_rate": 2.2618633915087282e-05, |
|
"epoch": 0.8628355435966429, |
|
"step": 962 |
|
}, |
|
{ |
|
"loss": 359.3351, |
|
"grad_norm": 35.848167419433594, |
|
"learning_rate": 2.2326495989393985e-05, |
|
"epoch": 0.8637324620411301, |
|
"step": 963 |
|
}, |
|
{ |
|
"loss": 354.9636, |
|
"grad_norm": 34.292728424072266, |
|
"learning_rate": 2.203616874312919e-05, |
|
"epoch": 0.8646293804856173, |
|
"step": 964 |
|
}, |
|
{ |
|
"loss": 350.5273, |
|
"grad_norm": 35.46641540527344, |
|
"learning_rate": 2.174765448525523e-05, |
|
"epoch": 0.8655262989301045, |
|
"step": 965 |
|
}, |
|
{ |
|
"loss": 355.4344, |
|
"grad_norm": 34.72315979003906, |
|
"learning_rate": 2.1460955510315962e-05, |
|
"epoch": 0.8664232173745916, |
|
"step": 966 |
|
}, |
|
{ |
|
"loss": 353.3275, |
|
"grad_norm": 36.16691589355469, |
|
"learning_rate": 2.1176074098418402e-05, |
|
"epoch": 0.8673201358190787, |
|
"step": 967 |
|
}, |
|
{ |
|
"loss": 355.2486, |
|
"grad_norm": 36.415794372558594, |
|
"learning_rate": 2.0893012515214388e-05, |
|
"epoch": 0.8682170542635659, |
|
"step": 968 |
|
}, |
|
{ |
|
"loss": 355.4182, |
|
"grad_norm": 35.465538024902344, |
|
"learning_rate": 2.06117730118828e-05, |
|
"epoch": 0.869113972708053, |
|
"step": 969 |
|
}, |
|
{ |
|
"loss": 354.304, |
|
"grad_norm": 35.425926208496094, |
|
"learning_rate": 2.0332357825111668e-05, |
|
"epoch": 0.8700108911525402, |
|
"step": 970 |
|
}, |
|
{ |
|
"loss": 351.7629, |
|
"grad_norm": 34.78888702392578, |
|
"learning_rate": 2.0054769177080185e-05, |
|
"epoch": 0.8709078095970274, |
|
"step": 971 |
|
}, |
|
{ |
|
"loss": 358.8823, |
|
"grad_norm": 35.0769157409668, |
|
"learning_rate": 1.97790092754411e-05, |
|
"epoch": 0.8718047280415145, |
|
"step": 972 |
|
}, |
|
{ |
|
"loss": 353.2525, |
|
"grad_norm": 35.73164749145508, |
|
"learning_rate": 1.9505080313303365e-05, |
|
"epoch": 0.8727016464860017, |
|
"step": 973 |
|
}, |
|
{ |
|
"loss": 355.5436, |
|
"grad_norm": 35.51607894897461, |
|
"learning_rate": 1.9232984469214453e-05, |
|
"epoch": 0.8735985649304888, |
|
"step": 974 |
|
}, |
|
{ |
|
"loss": 353.8528, |
|
"grad_norm": 35.09918975830078, |
|
"learning_rate": 1.8962723907143044e-05, |
|
"epoch": 0.874495483374976, |
|
"step": 975 |
|
}, |
|
{ |
|
"loss": 358.7514, |
|
"grad_norm": 36.12480926513672, |
|
"learning_rate": 1.869430077646203e-05, |
|
"epoch": 0.8753924018194631, |
|
"step": 976 |
|
}, |
|
{ |
|
"loss": 354.3459, |
|
"grad_norm": 34.32866287231445, |
|
"learning_rate": 1.8427717211931177e-05, |
|
"epoch": 0.8762893202639502, |
|
"step": 977 |
|
}, |
|
{ |
|
"loss": 350.5236, |
|
"grad_norm": 35.1101188659668, |
|
"learning_rate": 1.816297533368022e-05, |
|
"epoch": 0.8771862387084375, |
|
"step": 978 |
|
}, |
|
{ |
|
"loss": 353.4749, |
|
"grad_norm": 36.59587478637695, |
|
"learning_rate": 1.7900077247192087e-05, |
|
"epoch": 0.8780831571529246, |
|
"step": 979 |
|
}, |
|
{ |
|
"loss": 353.3892, |
|
"grad_norm": 34.86069869995117, |
|
"learning_rate": 1.7639025043286155e-05, |
|
"epoch": 0.8789800755974118, |
|
"step": 980 |
|
}, |
|
{ |
|
"loss": 354.1761, |
|
"grad_norm": 35.580291748046875, |
|
"learning_rate": 1.7379820798101383e-05, |
|
"epoch": 0.8798769940418989, |
|
"step": 981 |
|
}, |
|
{ |
|
"loss": 355.6291, |
|
"grad_norm": 34.58673095703125, |
|
"learning_rate": 1.7122466573080196e-05, |
|
"epoch": 0.880773912486386, |
|
"step": 982 |
|
}, |
|
{ |
|
"loss": 357.7327, |
|
"grad_norm": 33.76737976074219, |
|
"learning_rate": 1.6866964414951698e-05, |
|
"epoch": 0.8816708309308732, |
|
"step": 983 |
|
}, |
|
{ |
|
"loss": 355.4995, |
|
"grad_norm": 34.57607650756836, |
|
"learning_rate": 1.6613316355715558e-05, |
|
"epoch": 0.8825677493753604, |
|
"step": 984 |
|
}, |
|
{ |
|
"loss": 357.9588, |
|
"grad_norm": 34.49372100830078, |
|
"learning_rate": 1.6361524412626088e-05, |
|
"epoch": 0.8834646678198476, |
|
"step": 985 |
|
}, |
|
{ |
|
"loss": 357.0802, |
|
"grad_norm": 34.17061996459961, |
|
"learning_rate": 1.611159058817571e-05, |
|
"epoch": 0.8843615862643347, |
|
"step": 986 |
|
}, |
|
{ |
|
"loss": 354.1526, |
|
"grad_norm": 36.93791198730469, |
|
"learning_rate": 1.5863516870079418e-05, |
|
"epoch": 0.8852585047088218, |
|
"step": 987 |
|
}, |
|
{ |
|
"loss": 358.1216, |
|
"grad_norm": 35.566646575927734, |
|
"learning_rate": 1.5617305231258898e-05, |
|
"epoch": 0.886155423153309, |
|
"step": 988 |
|
}, |
|
{ |
|
"loss": 351.2595, |
|
"grad_norm": 35.77732467651367, |
|
"learning_rate": 1.5372957629826655e-05, |
|
"epoch": 0.8870523415977961, |
|
"step": 989 |
|
}, |
|
{ |
|
"loss": 353.016, |
|
"grad_norm": 37.376441955566406, |
|
"learning_rate": 1.513047600907061e-05, |
|
"epoch": 0.8879492600422833, |
|
"step": 990 |
|
}, |
|
{ |
|
"loss": 352.4042, |
|
"grad_norm": 34.55933380126953, |
|
"learning_rate": 1.4889862297438688e-05, |
|
"epoch": 0.8888461784867705, |
|
"step": 991 |
|
}, |
|
{ |
|
"loss": 352.0331, |
|
"grad_norm": 34.30587387084961, |
|
"learning_rate": 1.4651118408523317e-05, |
|
"epoch": 0.8897430969312576, |
|
"step": 992 |
|
}, |
|
{ |
|
"loss": 356.2885, |
|
"grad_norm": 34.28126525878906, |
|
"learning_rate": 1.4414246241046286e-05, |
|
"epoch": 0.8906400153757448, |
|
"step": 993 |
|
}, |
|
{ |
|
"loss": 356.9485, |
|
"grad_norm": 35.106529235839844, |
|
"learning_rate": 1.4179247678843681e-05, |
|
"epoch": 0.8915369338202319, |
|
"step": 994 |
|
}, |
|
{ |
|
"loss": 357.6618, |
|
"grad_norm": 33.811737060546875, |
|
"learning_rate": 1.3946124590850901e-05, |
|
"epoch": 0.892433852264719, |
|
"step": 995 |
|
}, |
|
{ |
|
"loss": 361.4888, |
|
"grad_norm": 33.41731643676758, |
|
"learning_rate": 1.3714878831087657e-05, |
|
"epoch": 0.8933307707092062, |
|
"step": 996 |
|
}, |
|
{ |
|
"loss": 358.7178, |
|
"grad_norm": 34.46256637573242, |
|
"learning_rate": 1.3485512238643499e-05, |
|
"epoch": 0.8942276891536933, |
|
"step": 997 |
|
}, |
|
{ |
|
"loss": 357.5736, |
|
"grad_norm": 35.067893981933594, |
|
"learning_rate": 1.3258026637662846e-05, |
|
"epoch": 0.8951246075981806, |
|
"step": 998 |
|
}, |
|
{ |
|
"loss": 353.149, |
|
"grad_norm": 34.04292678833008, |
|
"learning_rate": 1.3032423837330748e-05, |
|
"epoch": 0.8960215260426677, |
|
"step": 999 |
|
}, |
|
{ |
|
"loss": 356.1142, |
|
"grad_norm": 34.39286422729492, |
|
"learning_rate": 1.2808705631858459e-05, |
|
"epoch": 0.8969184444871549, |
|
"step": 1000 |
|
}, |
|
{ |
|
"eval_loss": 1.586561918258667, |
|
"eval_runtime": 20.2668, |
|
"eval_samples_per_second": 101.052, |
|
"eval_steps_per_second": 12.631, |
|
"epoch": 0.8969184444871549, |
|
"step": 1000 |
|
}, |
|
{ |
|
"loss": 354.0248, |
|
"grad_norm": 36.2171516418457, |
|
"learning_rate": 1.2586873800468996e-05, |
|
"epoch": 0.897815362931642, |
|
"step": 1001 |
|
}, |
|
{ |
|
"loss": 362.0434, |
|
"grad_norm": 34.42704391479492, |
|
"learning_rate": 1.2366930107383156e-05, |
|
"epoch": 0.8987122813761291, |
|
"step": 1002 |
|
}, |
|
{ |
|
"loss": 354.9637, |
|
"grad_norm": 34.4918212890625, |
|
"learning_rate": 1.2148876301805528e-05, |
|
"epoch": 0.8996091998206163, |
|
"step": 1003 |
|
}, |
|
{ |
|
"loss": 348.8729, |
|
"grad_norm": 34.57630157470703, |
|
"learning_rate": 1.1932714117910386e-05, |
|
"epoch": 0.9005061182651035, |
|
"step": 1004 |
|
}, |
|
{ |
|
"loss": 352.9299, |
|
"grad_norm": 35.46476745605469, |
|
"learning_rate": 1.171844527482796e-05, |
|
"epoch": 0.9014030367095907, |
|
"step": 1005 |
|
}, |
|
{ |
|
"loss": 355.247, |
|
"grad_norm": 34.4285888671875, |
|
"learning_rate": 1.1506071476630964e-05, |
|
"epoch": 0.9022999551540778, |
|
"step": 1006 |
|
}, |
|
{ |
|
"loss": 352.168, |
|
"grad_norm": 34.935569763183594, |
|
"learning_rate": 1.1295594412320754e-05, |
|
"epoch": 0.9031968735985649, |
|
"step": 1007 |
|
}, |
|
{ |
|
"loss": 357.9673, |
|
"grad_norm": 33.162166595458984, |
|
"learning_rate": 1.1087015755814084e-05, |
|
"epoch": 0.9040937920430521, |
|
"step": 1008 |
|
}, |
|
{ |
|
"loss": 350.8712, |
|
"grad_norm": 34.0540657043457, |
|
"learning_rate": 1.088033716592976e-05, |
|
"epoch": 0.9049907104875392, |
|
"step": 1009 |
|
}, |
|
{ |
|
"loss": 356.8466, |
|
"grad_norm": 33.83312225341797, |
|
"learning_rate": 1.0675560286375369e-05, |
|
"epoch": 0.9058876289320263, |
|
"step": 1010 |
|
}, |
|
{ |
|
"loss": 353.7512, |
|
"grad_norm": 34.7866096496582, |
|
"learning_rate": 1.0472686745734233e-05, |
|
"epoch": 0.9067845473765136, |
|
"step": 1011 |
|
}, |
|
{ |
|
"loss": 354.8209, |
|
"grad_norm": 34.10197067260742, |
|
"learning_rate": 1.027171815745262e-05, |
|
"epoch": 0.9076814658210007, |
|
"step": 1012 |
|
}, |
|
{ |
|
"loss": 354.7816, |
|
"grad_norm": 34.292598724365234, |
|
"learning_rate": 1.0072656119826662e-05, |
|
"epoch": 0.9085783842654879, |
|
"step": 1013 |
|
}, |
|
{ |
|
"loss": 356.8245, |
|
"grad_norm": 34.5960693359375, |
|
"learning_rate": 9.875502215989791e-06, |
|
"epoch": 0.909475302709975, |
|
"step": 1014 |
|
}, |
|
{ |
|
"loss": 353.8681, |
|
"grad_norm": 33.786537170410156, |
|
"learning_rate": 9.680258013900129e-06, |
|
"epoch": 0.9103722211544621, |
|
"step": 1015 |
|
}, |
|
{ |
|
"loss": 355.527, |
|
"grad_norm": 35.2137565612793, |
|
"learning_rate": 9.486925066327978e-06, |
|
"epoch": 0.9112691395989493, |
|
"step": 1016 |
|
}, |
|
{ |
|
"loss": 352.3827, |
|
"grad_norm": 34.659767150878906, |
|
"learning_rate": 9.295504910843522e-06, |
|
"epoch": 0.9121660580434365, |
|
"step": 1017 |
|
}, |
|
{ |
|
"loss": 355.3458, |
|
"grad_norm": 33.41202926635742, |
|
"learning_rate": 9.10599906980461e-06, |
|
"epoch": 0.9130629764879237, |
|
"step": 1018 |
|
}, |
|
{ |
|
"loss": 357.3716, |
|
"grad_norm": 32.52941131591797, |
|
"learning_rate": 8.91840905034455e-06, |
|
"epoch": 0.9139598949324108, |
|
"step": 1019 |
|
}, |
|
{ |
|
"loss": 354.1408, |
|
"grad_norm": 33.926963806152344, |
|
"learning_rate": 8.732736344360198e-06, |
|
"epoch": 0.914856813376898, |
|
"step": 1020 |
|
}, |
|
{ |
|
"loss": 357.4122, |
|
"grad_norm": 33.29584503173828, |
|
"learning_rate": 8.548982428500163e-06, |
|
"epoch": 0.9157537318213851, |
|
"step": 1021 |
|
}, |
|
{ |
|
"loss": 356.5175, |
|
"grad_norm": 35.51197814941406, |
|
"learning_rate": 8.367148764152843e-06, |
|
"epoch": 0.9166506502658722, |
|
"step": 1022 |
|
}, |
|
{ |
|
"loss": 361.666, |
|
"grad_norm": 35.082054138183594, |
|
"learning_rate": 8.187236797435077e-06, |
|
"epoch": 0.9175475687103594, |
|
"step": 1023 |
|
}, |
|
{ |
|
"loss": 350.1344, |
|
"grad_norm": 34.95941925048828, |
|
"learning_rate": 8.009247959180482e-06, |
|
"epoch": 0.9184444871548466, |
|
"step": 1024 |
|
}, |
|
{ |
|
"loss": 359.1797, |
|
"grad_norm": 34.81248474121094, |
|
"learning_rate": 7.833183664928023e-06, |
|
"epoch": 0.9193414055993337, |
|
"step": 1025 |
|
}, |
|
{ |
|
"loss": 352.5403, |
|
"grad_norm": 34.408485412597656, |
|
"learning_rate": 7.659045314910879e-06, |
|
"epoch": 0.9202383240438209, |
|
"step": 1026 |
|
}, |
|
{ |
|
"loss": 353.7971, |
|
"grad_norm": 34.32902526855469, |
|
"learning_rate": 7.486834294045286e-06, |
|
"epoch": 0.921135242488308, |
|
"step": 1027 |
|
}, |
|
{ |
|
"loss": 352.8156, |
|
"grad_norm": 33.39252471923828, |
|
"learning_rate": 7.316551971919522e-06, |
|
"epoch": 0.9220321609327952, |
|
"step": 1028 |
|
}, |
|
{ |
|
"loss": 355.1404, |
|
"grad_norm": 35.65606689453125, |
|
"learning_rate": 7.148199702782854e-06, |
|
"epoch": 0.9229290793772823, |
|
"step": 1029 |
|
}, |
|
{ |
|
"loss": 358.3244, |
|
"grad_norm": 35.14055252075195, |
|
"learning_rate": 6.981778825535079e-06, |
|
"epoch": 0.9238259978217694, |
|
"step": 1030 |
|
}, |
|
{ |
|
"loss": 356.6115, |
|
"grad_norm": 32.90983581542969, |
|
"learning_rate": 6.817290663715614e-06, |
|
"epoch": 0.9247229162662567, |
|
"step": 1031 |
|
}, |
|
{ |
|
"loss": 354.6003, |
|
"grad_norm": 33.653778076171875, |
|
"learning_rate": 6.654736525493033e-06, |
|
"epoch": 0.9256198347107438, |
|
"step": 1032 |
|
}, |
|
{ |
|
"loss": 356.817, |
|
"grad_norm": 35.58637619018555, |
|
"learning_rate": 6.494117703654739e-06, |
|
"epoch": 0.926516753155231, |
|
"step": 1033 |
|
}, |
|
{ |
|
"loss": 355.3286, |
|
"grad_norm": 33.73952102661133, |
|
"learning_rate": 6.335435475596646e-06, |
|
"epoch": 0.9274136715997181, |
|
"step": 1034 |
|
}, |
|
{ |
|
"loss": 355.2651, |
|
"grad_norm": 33.62116241455078, |
|
"learning_rate": 6.1786911033129e-06, |
|
"epoch": 0.9283105900442052, |
|
"step": 1035 |
|
}, |
|
{ |
|
"loss": 357.9323, |
|
"grad_norm": 33.39925003051758, |
|
"learning_rate": 6.023885833386061e-06, |
|
"epoch": 0.9292075084886924, |
|
"step": 1036 |
|
}, |
|
{ |
|
"loss": 351.2944, |
|
"grad_norm": 34.47417068481445, |
|
"learning_rate": 5.87102089697708e-06, |
|
"epoch": 0.9301044269331796, |
|
"step": 1037 |
|
}, |
|
{ |
|
"loss": 355.5925, |
|
"grad_norm": 33.980857849121094, |
|
"learning_rate": 5.720097509815392e-06, |
|
"epoch": 0.9310013453776668, |
|
"step": 1038 |
|
}, |
|
{ |
|
"loss": 355.6397, |
|
"grad_norm": 32.85739517211914, |
|
"learning_rate": 5.571116872189475e-06, |
|
"epoch": 0.9318982638221539, |
|
"step": 1039 |
|
}, |
|
{ |
|
"loss": 355.7616, |
|
"grad_norm": 33.64262390136719, |
|
"learning_rate": 5.424080168937112e-06, |
|
"epoch": 0.932795182266641, |
|
"step": 1040 |
|
}, |
|
{ |
|
"loss": 357.7719, |
|
"grad_norm": 34.275169372558594, |
|
"learning_rate": 5.278988569436066e-06, |
|
"epoch": 0.9336921007111282, |
|
"step": 1041 |
|
}, |
|
{ |
|
"loss": 357.6499, |
|
"grad_norm": 34.75218963623047, |
|
"learning_rate": 5.1358432275947775e-06, |
|
"epoch": 0.9345890191556153, |
|
"step": 1042 |
|
}, |
|
{ |
|
"loss": 353.3368, |
|
"grad_norm": 34.046241760253906, |
|
"learning_rate": 4.994645281843152e-06, |
|
"epoch": 0.9354859376001025, |
|
"step": 1043 |
|
}, |
|
{ |
|
"loss": 354.6295, |
|
"grad_norm": 34.62663269042969, |
|
"learning_rate": 4.855395855123512e-06, |
|
"epoch": 0.9363828560445897, |
|
"step": 1044 |
|
}, |
|
{ |
|
"loss": 352.3897, |
|
"grad_norm": 35.12565231323242, |
|
"learning_rate": 4.718096054881688e-06, |
|
"epoch": 0.9372797744890768, |
|
"step": 1045 |
|
}, |
|
{ |
|
"loss": 352.5993, |
|
"grad_norm": 33.51365661621094, |
|
"learning_rate": 4.582746973058216e-06, |
|
"epoch": 0.938176692933564, |
|
"step": 1046 |
|
}, |
|
{ |
|
"loss": 354.0611, |
|
"grad_norm": 33.32587814331055, |
|
"learning_rate": 4.449349686079574e-06, |
|
"epoch": 0.9390736113780511, |
|
"step": 1047 |
|
}, |
|
{ |
|
"loss": 361.4709, |
|
"grad_norm": 35.336490631103516, |
|
"learning_rate": 4.317905254849791e-06, |
|
"epoch": 0.9399705298225383, |
|
"step": 1048 |
|
}, |
|
{ |
|
"loss": 360.2202, |
|
"grad_norm": 34.51678466796875, |
|
"learning_rate": 4.188414724741768e-06, |
|
"epoch": 0.9408674482670254, |
|
"step": 1049 |
|
}, |
|
{ |
|
"loss": 354.1904, |
|
"grad_norm": 34.459373474121094, |
|
"learning_rate": 4.060879125589195e-06, |
|
"epoch": 0.9417643667115125, |
|
"step": 1050 |
|
}, |
|
{ |
|
"eval_loss": 1.5787107944488525, |
|
"eval_runtime": 18.3575, |
|
"eval_samples_per_second": 111.562, |
|
"eval_steps_per_second": 13.945, |
|
"epoch": 0.9417643667115125, |
|
"step": 1050 |
|
}, |
|
{ |
|
"loss": 353.3853, |
|
"grad_norm": 33.25263214111328, |
|
"learning_rate": 3.9352994716783105e-06, |
|
"epoch": 0.9426612851559998, |
|
"step": 1051 |
|
}, |
|
{ |
|
"loss": 350.3391, |
|
"grad_norm": 35.57413101196289, |
|
"learning_rate": 3.8116767617396298e-06, |
|
"epoch": 0.9435582036004869, |
|
"step": 1052 |
|
}, |
|
{ |
|
"loss": 356.2869, |
|
"grad_norm": 33.38325881958008, |
|
"learning_rate": 3.690011978940255e-06, |
|
"epoch": 0.9444551220449741, |
|
"step": 1053 |
|
}, |
|
{ |
|
"loss": 356.4574, |
|
"grad_norm": 34.5271110534668, |
|
"learning_rate": 3.570306090876024e-06, |
|
"epoch": 0.9453520404894612, |
|
"step": 1054 |
|
}, |
|
{ |
|
"loss": 359.7423, |
|
"grad_norm": 35.02552795410156, |
|
"learning_rate": 3.4525600495636246e-06, |
|
"epoch": 0.9462489589339483, |
|
"step": 1055 |
|
}, |
|
{ |
|
"loss": 353.1874, |
|
"grad_norm": 35.6952018737793, |
|
"learning_rate": 3.3367747914331838e-06, |
|
"epoch": 0.9471458773784355, |
|
"step": 1056 |
|
}, |
|
{ |
|
"loss": 355.9973, |
|
"grad_norm": 35.45086669921875, |
|
"learning_rate": 3.222951237320915e-06, |
|
"epoch": 0.9480427958229227, |
|
"step": 1057 |
|
}, |
|
{ |
|
"loss": 355.2783, |
|
"grad_norm": 32.976966857910156, |
|
"learning_rate": 3.1110902924615102e-06, |
|
"epoch": 0.9489397142674099, |
|
"step": 1058 |
|
}, |
|
{ |
|
"loss": 358.506, |
|
"grad_norm": 34.06571960449219, |
|
"learning_rate": 3.0011928464811213e-06, |
|
"epoch": 0.949836632711897, |
|
"step": 1059 |
|
}, |
|
{ |
|
"loss": 358.1763, |
|
"grad_norm": 33.59235382080078, |
|
"learning_rate": 2.8932597733903886e-06, |
|
"epoch": 0.9507335511563841, |
|
"step": 1060 |
|
}, |
|
{ |
|
"loss": 357.5705, |
|
"grad_norm": 32.182106018066406, |
|
"learning_rate": 2.7872919315772017e-06, |
|
"epoch": 0.9516304696008713, |
|
"step": 1061 |
|
}, |
|
{ |
|
"loss": 354.619, |
|
"grad_norm": 35.46062469482422, |
|
"learning_rate": 2.683290163800145e-06, |
|
"epoch": 0.9525273880453584, |
|
"step": 1062 |
|
}, |
|
{ |
|
"loss": 350.0426, |
|
"grad_norm": 32.130767822265625, |
|
"learning_rate": 2.581255297181617e-06, |
|
"epoch": 0.9534243064898456, |
|
"step": 1063 |
|
}, |
|
{ |
|
"loss": 351.98, |
|
"grad_norm": 32.878875732421875, |
|
"learning_rate": 2.4811881432013905e-06, |
|
"epoch": 0.9543212249343328, |
|
"step": 1064 |
|
}, |
|
{ |
|
"loss": 353.1487, |
|
"grad_norm": 33.90510559082031, |
|
"learning_rate": 2.3830894976899774e-06, |
|
"epoch": 0.9552181433788199, |
|
"step": 1065 |
|
}, |
|
{ |
|
"loss": 357.164, |
|
"grad_norm": 34.16891860961914, |
|
"learning_rate": 2.2869601408225805e-06, |
|
"epoch": 0.9561150618233071, |
|
"step": 1066 |
|
}, |
|
{ |
|
"loss": 351.2288, |
|
"grad_norm": 33.57730484008789, |
|
"learning_rate": 2.1928008371125406e-06, |
|
"epoch": 0.9570119802677942, |
|
"step": 1067 |
|
}, |
|
{ |
|
"loss": 356.0024, |
|
"grad_norm": 33.691978454589844, |
|
"learning_rate": 2.1006123354055384e-06, |
|
"epoch": 0.9579088987122814, |
|
"step": 1068 |
|
}, |
|
{ |
|
"loss": 361.7596, |
|
"grad_norm": 33.60329055786133, |
|
"learning_rate": 2.0103953688734853e-06, |
|
"epoch": 0.9588058171567685, |
|
"step": 1069 |
|
}, |
|
{ |
|
"loss": 354.5997, |
|
"grad_norm": 35.25307083129883, |
|
"learning_rate": 1.9221506550088365e-06, |
|
"epoch": 0.9597027356012557, |
|
"step": 1070 |
|
}, |
|
{ |
|
"loss": 355.2119, |
|
"grad_norm": 34.94419860839844, |
|
"learning_rate": 1.83587889561862e-06, |
|
"epoch": 0.9605996540457429, |
|
"step": 1071 |
|
}, |
|
{ |
|
"loss": 355.9485, |
|
"grad_norm": 34.35773468017578, |
|
"learning_rate": 1.7515807768192228e-06, |
|
"epoch": 0.96149657249023, |
|
"step": 1072 |
|
}, |
|
{ |
|
"loss": 353.5008, |
|
"grad_norm": 33.7717170715332, |
|
"learning_rate": 1.6692569690305859e-06, |
|
"epoch": 0.9623934909347172, |
|
"step": 1073 |
|
}, |
|
{ |
|
"loss": 357.9717, |
|
"grad_norm": 35.07488250732422, |
|
"learning_rate": 1.5889081269710726e-06, |
|
"epoch": 0.9632904093792043, |
|
"step": 1074 |
|
}, |
|
{ |
|
"loss": 361.8947, |
|
"grad_norm": 34.685150146484375, |
|
"learning_rate": 1.5105348896522486e-06, |
|
"epoch": 0.9641873278236914, |
|
"step": 1075 |
|
}, |
|
{ |
|
"loss": 357.5904, |
|
"grad_norm": 34.1632080078125, |
|
"learning_rate": 1.4341378803737204e-06, |
|
"epoch": 0.9650842462681786, |
|
"step": 1076 |
|
}, |
|
{ |
|
"loss": 357.5146, |
|
"grad_norm": 34.23555374145508, |
|
"learning_rate": 1.3597177067181943e-06, |
|
"epoch": 0.9659811647126658, |
|
"step": 1077 |
|
}, |
|
{ |
|
"loss": 356.91, |
|
"grad_norm": 32.962257385253906, |
|
"learning_rate": 1.2872749605468137e-06, |
|
"epoch": 0.966878083157153, |
|
"step": 1078 |
|
}, |
|
{ |
|
"loss": 351.4866, |
|
"grad_norm": 34.07936096191406, |
|
"learning_rate": 1.2168102179941076e-06, |
|
"epoch": 0.9677750016016401, |
|
"step": 1079 |
|
}, |
|
{ |
|
"loss": 355.5893, |
|
"grad_norm": 33.35137939453125, |
|
"learning_rate": 1.1483240394637717e-06, |
|
"epoch": 0.9686719200461272, |
|
"step": 1080 |
|
}, |
|
{ |
|
"loss": 355.4586, |
|
"grad_norm": 34.09134292602539, |
|
"learning_rate": 1.0818169696239776e-06, |
|
"epoch": 0.9695688384906144, |
|
"step": 1081 |
|
}, |
|
{ |
|
"loss": 354.5378, |
|
"grad_norm": 32.67642593383789, |
|
"learning_rate": 1.0172895374031265e-06, |
|
"epoch": 0.9704657569351015, |
|
"step": 1082 |
|
}, |
|
{ |
|
"loss": 354.3784, |
|
"grad_norm": 32.6947021484375, |
|
"learning_rate": 9.5474225598563e-07, |
|
"epoch": 0.9713626753795886, |
|
"step": 1083 |
|
}, |
|
{ |
|
"loss": 355.8788, |
|
"grad_norm": 33.51148986816406, |
|
"learning_rate": 8.941756228078579e-07, |
|
"epoch": 0.9722595938240759, |
|
"step": 1084 |
|
}, |
|
{ |
|
"loss": 353.8372, |
|
"grad_norm": 33.57039260864258, |
|
"learning_rate": 8.35590119554086e-07, |
|
"epoch": 0.973156512268563, |
|
"step": 1085 |
|
}, |
|
{ |
|
"loss": 353.2452, |
|
"grad_norm": 33.60462188720703, |
|
"learning_rate": 7.789862121528324e-07, |
|
"epoch": 0.9740534307130502, |
|
"step": 1086 |
|
}, |
|
{ |
|
"loss": 357.0675, |
|
"grad_norm": 33.704349517822266, |
|
"learning_rate": 7.243643507729436e-07, |
|
"epoch": 0.9749503491575373, |
|
"step": 1087 |
|
}, |
|
{ |
|
"loss": 354.5553, |
|
"grad_norm": 34.90256881713867, |
|
"learning_rate": 6.717249698202088e-07, |
|
"epoch": 0.9758472676020244, |
|
"step": 1088 |
|
}, |
|
{ |
|
"loss": 349.4813, |
|
"grad_norm": 34.148128509521484, |
|
"learning_rate": 6.210684879337513e-07, |
|
"epoch": 0.9767441860465116, |
|
"step": 1089 |
|
}, |
|
{ |
|
"loss": 357.7331, |
|
"grad_norm": 34.612762451171875, |
|
"learning_rate": 5.72395307982837e-07, |
|
"epoch": 0.9776411044909988, |
|
"step": 1090 |
|
}, |
|
{ |
|
"loss": 358.809, |
|
"grad_norm": 32.881195068359375, |
|
"learning_rate": 5.257058170635709e-07, |
|
"epoch": 0.978538022935486, |
|
"step": 1091 |
|
}, |
|
{ |
|
"loss": 356.2231, |
|
"grad_norm": 32.4294319152832, |
|
"learning_rate": 4.810003864958168e-07, |
|
"epoch": 0.9794349413799731, |
|
"step": 1092 |
|
}, |
|
{ |
|
"loss": 354.6883, |
|
"grad_norm": 35.39781951904297, |
|
"learning_rate": 4.3827937182033815e-07, |
|
"epoch": 0.9803318598244602, |
|
"step": 1093 |
|
}, |
|
{ |
|
"loss": 352.7607, |
|
"grad_norm": 34.17608642578125, |
|
"learning_rate": 3.9754311279582844e-07, |
|
"epoch": 0.9812287782689474, |
|
"step": 1094 |
|
}, |
|
{ |
|
"loss": 353.8497, |
|
"grad_norm": 31.340768814086914, |
|
"learning_rate": 3.587919333963574e-07, |
|
"epoch": 0.9821256967134345, |
|
"step": 1095 |
|
}, |
|
{ |
|
"loss": 357.9939, |
|
"grad_norm": 33.75115966796875, |
|
"learning_rate": 3.2202614180870673e-07, |
|
"epoch": 0.9830226151579217, |
|
"step": 1096 |
|
}, |
|
{ |
|
"loss": 356.0656, |
|
"grad_norm": 32.56006622314453, |
|
"learning_rate": 2.872460304299274e-07, |
|
"epoch": 0.9839195336024089, |
|
"step": 1097 |
|
}, |
|
{ |
|
"loss": 353.62, |
|
"grad_norm": 34.134193420410156, |
|
"learning_rate": 2.5445187586503603e-07, |
|
"epoch": 0.984816452046896, |
|
"step": 1098 |
|
}, |
|
{ |
|
"loss": 355.838, |
|
"grad_norm": 34.15678024291992, |
|
"learning_rate": 2.2364393892479462e-07, |
|
"epoch": 0.9857133704913832, |
|
"step": 1099 |
|
}, |
|
{ |
|
"loss": 358.3669, |
|
"grad_norm": 32.837039947509766, |
|
"learning_rate": 1.9482246462365626e-07, |
|
"epoch": 0.9866102889358703, |
|
"step": 1100 |
|
}, |
|
{ |
|
"eval_loss": 1.5716547966003418, |
|
"eval_runtime": 18.217, |
|
"eval_samples_per_second": 112.422, |
|
"eval_steps_per_second": 14.053, |
|
"epoch": 0.9866102889358703, |
|
"step": 1100 |
|
}, |
|
{ |
|
"loss": 356.8408, |
|
"grad_norm": 33.33000183105469, |
|
"learning_rate": 1.6798768217776706e-07, |
|
"epoch": 0.9875072073803575, |
|
"step": 1101 |
|
}, |
|
{ |
|
"loss": 356.4636, |
|
"grad_norm": 34.879573822021484, |
|
"learning_rate": 1.4313980500327283e-07, |
|
"epoch": 0.9884041258248446, |
|
"step": 1102 |
|
}, |
|
{ |
|
"loss": 356.378, |
|
"grad_norm": 33.825469970703125, |
|
"learning_rate": 1.2027903071440415e-07, |
|
"epoch": 0.9893010442693317, |
|
"step": 1103 |
|
}, |
|
{ |
|
"loss": 359.4078, |
|
"grad_norm": 34.18437957763672, |
|
"learning_rate": 9.94055411221717e-08, |
|
"epoch": 0.990197962713819, |
|
"step": 1104 |
|
}, |
|
{ |
|
"loss": 356.8303, |
|
"grad_norm": 35.02104187011719, |
|
"learning_rate": 8.051950223267323e-08, |
|
"epoch": 0.9910948811583061, |
|
"step": 1105 |
|
}, |
|
{ |
|
"loss": 351.9132, |
|
"grad_norm": 33.7501220703125, |
|
"learning_rate": 6.362106424590009e-08, |
|
"epoch": 0.9919917996027933, |
|
"step": 1106 |
|
}, |
|
{ |
|
"loss": 356.2349, |
|
"grad_norm": 34.74052810668945, |
|
"learning_rate": 4.871036155454367e-08, |
|
"epoch": 0.9928887180472804, |
|
"step": 1107 |
|
}, |
|
{ |
|
"loss": 357.3864, |
|
"grad_norm": 33.26545715332031, |
|
"learning_rate": 3.578751274294079e-08, |
|
"epoch": 0.9937856364917675, |
|
"step": 1108 |
|
}, |
|
{ |
|
"loss": 358.4432, |
|
"grad_norm": 33.61418914794922, |
|
"learning_rate": 2.4852620586046647e-08, |
|
"epoch": 0.9946825549362547, |
|
"step": 1109 |
|
}, |
|
{ |
|
"loss": 356.3781, |
|
"grad_norm": 33.90690612792969, |
|
"learning_rate": 1.5905772048629975e-08, |
|
"epoch": 0.9955794733807419, |
|
"step": 1110 |
|
}, |
|
{ |
|
"loss": 355.2562, |
|
"grad_norm": 36.185489654541016, |
|
"learning_rate": 8.947038284717879e-09, |
|
"epoch": 0.9964763918252291, |
|
"step": 1111 |
|
}, |
|
{ |
|
"loss": 353.4495, |
|
"grad_norm": 35.645416259765625, |
|
"learning_rate": 3.976474636874228e-09, |
|
"epoch": 0.9973733102697162, |
|
"step": 1112 |
|
}, |
|
{ |
|
"loss": 358.9317, |
|
"grad_norm": 34.38767623901367, |
|
"learning_rate": 9.941206357555465e-10, |
|
"epoch": 0.9982702287142033, |
|
"step": 1113 |
|
}, |
|
{ |
|
"loss": 355.1901, |
|
"grad_norm": 33.96023941040039, |
|
"learning_rate": 0.0, |
|
"epoch": 0.9991671471586905, |
|
"step": 1114 |
|
}, |
|
{ |
|
"train_runtime": 10703.3349, |
|
"train_samples_per_second": 186.666, |
|
"train_steps_per_second": 0.104, |
|
"total_flos": 6.811715592467251e+17, |
|
"train_loss": 100.33408414611269, |
|
"epoch": 0.9991671471586905, |
|
"step": 1114 |
|
}, |
|
{ |
|
"eval_loss": 1.585738182067871, |
|
"eval_runtime": 19.5932, |
|
"eval_samples_per_second": 104.526, |
|
"eval_steps_per_second": 13.066, |
|
"epoch": 0.9991671471586905, |
|
"step": 1114 |
|
} |
|
], |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"is_hyper_param_search": false, |
|
"trial_name": null, |
|
"trial_params": null, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_training_stop": true, |
|
"should_epoch_stop": false, |
|
"should_save": true, |
|
"should_evaluate": false, |
|
"should_log": false |
|
}, |
|
"attributes": {} |
|
} |
|
} |
|
} |