diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,14416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2774720060732587, + "global_step": 24000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00019998102106661607, + "loss": 9.952, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019996204213323213, + "loss": 8.9779, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019994306319984817, + "loss": 8.4733, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019992408426646423, + "loss": 8.3471, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999051053330803, + "loss": 8.3499, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019988612639969635, + "loss": 8.3611, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001998671474663124, + "loss": 8.2644, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019984816853292845, + "loss": 8.2485, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001998291895995445, + "loss": 8.1744, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019981021066616057, + "loss": 8.1581, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019979123173277663, + "loss": 8.2968, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001997722527993927, + "loss": 8.2153, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019975327386600875, + "loss": 8.081, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019973429493262479, + "loss": 8.1176, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019971531599924085, + "loss": 8.1398, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001996963370658569, + "loss": 8.1933, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019967735813247297, + "loss": 8.1381, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019965837919908903, + "loss": 8.1712, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019963940026570507, + "loss": 8.1477, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019962042133232113, + "loss": 8.1891, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001996014423989372, + "loss": 8.0788, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019958246346555325, + "loss": 8.1196, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001995634845321693, + "loss": 8.0288, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019954450559878534, + "loss": 8.1236, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001995255266654014, + "loss": 8.0957, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019950654773201747, + "loss": 7.9825, + "step": 260 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019948756879863353, + "loss": 8.1757, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001994685898652496, + "loss": 8.1677, + "step": 280 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019944961093186565, + "loss": 8.0789, + "step": 290 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019943063199848169, + "loss": 8.0367, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019941165306509775, + "loss": 8.1172, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001993926741317138, + "loss": 8.1456, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019937369519832987, + "loss": 8.0392, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019935471626494593, + "loss": 8.2246, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019933573733156196, + "loss": 8.0326, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019931675839817803, + "loss": 8.092, + "step": 360 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001992977794647941, + "loss": 8.0419, + "step": 370 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019927880053141015, + "loss": 8.1022, + "step": 380 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001992598215980262, + "loss": 8.0943, + "step": 390 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019924084266464224, + "loss": 8.0907, + "step": 400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001992218637312583, + "loss": 8.0365, + "step": 410 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019920288479787437, + "loss": 7.9948, + "step": 420 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019918390586449043, + "loss": 8.033, + "step": 430 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001991649269311065, + "loss": 8.1033, + "step": 440 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019914594799772255, + "loss": 8.1076, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019912696906433858, + "loss": 8.0816, + "step": 460 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019910799013095465, + "loss": 8.0745, + "step": 470 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001990890111975707, + "loss": 8.1358, + "step": 480 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019907003226418677, + "loss": 7.9812, + "step": 490 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019905105333080283, + "loss": 8.0595, + "step": 500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019903207439741886, + "loss": 8.0713, + "step": 510 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019901309546403492, + "loss": 8.1596, + "step": 520 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019899411653065099, + "loss": 8.0495, + "step": 530 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019897513759726705, + "loss": 8.0031, + "step": 540 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001989561586638831, + "loss": 8.1362, + "step": 550 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019893717973049914, + "loss": 8.0954, + "step": 560 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001989182007971152, + "loss": 8.1174, + "step": 570 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019889922186373127, + "loss": 8.0747, + "step": 580 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019888024293034733, + "loss": 8.0865, + "step": 590 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001988612639969634, + "loss": 8.0401, + "step": 600 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019884228506357942, + "loss": 8.0427, + "step": 610 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019882330613019548, + "loss": 7.9791, + "step": 620 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019880432719681154, + "loss": 8.0075, + "step": 630 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001987853482634276, + "loss": 7.999, + "step": 640 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019876636933004367, + "loss": 8.0756, + "step": 650 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019874739039665973, + "loss": 8.0046, + "step": 660 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019872841146327576, + "loss": 7.9885, + "step": 670 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019870943252989182, + "loss": 8.065, + "step": 680 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019869045359650789, + "loss": 8.0558, + "step": 690 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019867147466312395, + "loss": 8.085, + "step": 700 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019865249572974, + "loss": 8.0773, + "step": 710 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019863351679635604, + "loss": 8.0463, + "step": 720 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001986145378629721, + "loss": 8.0125, + "step": 730 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019859555892958816, + "loss": 8.0906, + "step": 740 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019857657999620423, + "loss": 8.038, + "step": 750 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001985576010628203, + "loss": 8.0052, + "step": 760 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019853862212943632, + "loss": 8.0353, + "step": 770 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019851964319605238, + "loss": 8.0852, + "step": 780 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019850066426266844, + "loss": 8.0317, + "step": 790 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001984816853292845, + "loss": 7.9571, + "step": 800 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019846270639590057, + "loss": 8.086, + "step": 810 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019844372746251663, + "loss": 7.9966, + "step": 820 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019842474852913266, + "loss": 8.0486, + "step": 830 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019840576959574872, + "loss": 7.9632, + "step": 840 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019838679066236478, + "loss": 8.0173, + "step": 850 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019836781172898085, + "loss": 7.9798, + "step": 860 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001983488327955969, + "loss": 7.8961, + "step": 870 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019832985386221294, + "loss": 8.1219, + "step": 880 + }, + { + "epoch": 0.08, + "learning_rate": 0.000198310874928829, + "loss": 8.0091, + "step": 890 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019829189599544506, + "loss": 8.0785, + "step": 900 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019827291706206113, + "loss": 8.0095, + "step": 910 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019825393812867719, + "loss": 7.9793, + "step": 920 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019823495919529322, + "loss": 8.006, + "step": 930 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019821598026190928, + "loss": 7.969, + "step": 940 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019819700132852534, + "loss": 7.9929, + "step": 950 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001981780223951414, + "loss": 8.1714, + "step": 960 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019815904346175747, + "loss": 8.0096, + "step": 970 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019814006452837353, + "loss": 7.9708, + "step": 980 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019812108559498956, + "loss": 8.0766, + "step": 990 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019810210666160562, + "loss": 7.9056, + "step": 1000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019808312772822168, + "loss": 8.0285, + "step": 1010 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019806414879483774, + "loss": 8.0732, + "step": 1020 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001980451698614538, + "loss": 7.8647, + "step": 1030 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019802619092806984, + "loss": 8.0618, + "step": 1040 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001980072119946859, + "loss": 7.8339, + "step": 1050 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019798823306130196, + "loss": 7.9132, + "step": 1060 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019796925412791802, + "loss": 7.913, + "step": 1070 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019795027519453409, + "loss": 7.9198, + "step": 1080 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019793129626115012, + "loss": 8.0008, + "step": 1090 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019791231732776618, + "loss": 8.0339, + "step": 1100 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019789333839438224, + "loss": 8.0553, + "step": 1110 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001978743594609983, + "loss": 7.9269, + "step": 1120 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019785538052761436, + "loss": 7.9504, + "step": 1130 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001978364015942304, + "loss": 8.0064, + "step": 1140 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019781742266084646, + "loss": 8.1134, + "step": 1150 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019779844372746252, + "loss": 8.0186, + "step": 1160 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019777946479407858, + "loss": 8.02, + "step": 1170 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019776048586069464, + "loss": 8.1027, + "step": 1180 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001977415069273107, + "loss": 7.9113, + "step": 1190 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019772252799392674, + "loss": 7.983, + "step": 1200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001977035490605428, + "loss": 7.9472, + "step": 1210 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019768457012715886, + "loss": 8.0294, + "step": 1220 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019766559119377492, + "loss": 7.9201, + "step": 1230 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019764661226039098, + "loss": 7.9851, + "step": 1240 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019762763332700702, + "loss": 8.0097, + "step": 1250 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019760865439362308, + "loss": 8.0801, + "step": 1260 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019758967546023914, + "loss": 7.9854, + "step": 1270 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001975706965268552, + "loss": 7.9648, + "step": 1280 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019755171759347126, + "loss": 7.9651, + "step": 1290 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001975327386600873, + "loss": 7.9788, + "step": 1300 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019751375972670336, + "loss": 7.9753, + "step": 1310 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019749478079331942, + "loss": 8.0325, + "step": 1320 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019747580185993548, + "loss": 7.937, + "step": 1330 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019745682292655154, + "loss": 8.0093, + "step": 1340 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001974378439931676, + "loss": 8.0437, + "step": 1350 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019741886505978364, + "loss": 8.0538, + "step": 1360 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001973998861263997, + "loss": 7.9591, + "step": 1370 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019738090719301576, + "loss": 8.0154, + "step": 1380 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019736192825963182, + "loss": 7.9782, + "step": 1390 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019734294932624788, + "loss": 7.9924, + "step": 1400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019732397039286392, + "loss": 7.9091, + "step": 1410 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019730499145947998, + "loss": 7.9687, + "step": 1420 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019728601252609604, + "loss": 8.0328, + "step": 1430 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001972670335927121, + "loss": 7.8584, + "step": 1440 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019724805465932816, + "loss": 8.0146, + "step": 1450 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001972290757259442, + "loss": 7.8941, + "step": 1460 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019721009679256026, + "loss": 7.9312, + "step": 1470 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019719111785917632, + "loss": 7.9333, + "step": 1480 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019717213892579238, + "loss": 7.9736, + "step": 1490 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019715315999240844, + "loss": 7.9074, + "step": 1500 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001971341810590245, + "loss": 7.9985, + "step": 1510 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019711520212564054, + "loss": 7.9647, + "step": 1520 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001970962231922566, + "loss": 7.9233, + "step": 1530 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019707724425887266, + "loss": 7.9757, + "step": 1540 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019705826532548872, + "loss": 8.0475, + "step": 1550 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019703928639210478, + "loss": 7.974, + "step": 1560 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019702030745872082, + "loss": 8.0162, + "step": 1570 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019700132852533688, + "loss": 7.9094, + "step": 1580 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019698234959195294, + "loss": 7.8877, + "step": 1590 + }, + { + "epoch": 0.15, + "learning_rate": 0.000196963370658569, + "loss": 7.964, + "step": 1600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019694439172518506, + "loss": 8.012, + "step": 1610 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001969254127918011, + "loss": 8.0087, + "step": 1620 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019690643385841716, + "loss": 8.0649, + "step": 1630 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019688745492503322, + "loss": 7.9777, + "step": 1640 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019686847599164928, + "loss": 7.9384, + "step": 1650 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019684949705826534, + "loss": 7.9967, + "step": 1660 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019683051812488138, + "loss": 7.924, + "step": 1670 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019681153919149744, + "loss": 7.9114, + "step": 1680 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001967925602581135, + "loss": 7.9128, + "step": 1690 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019677358132472956, + "loss": 7.8818, + "step": 1700 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019675460239134562, + "loss": 8.0632, + "step": 1710 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019673562345796168, + "loss": 7.9477, + "step": 1720 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019671664452457772, + "loss": 7.9508, + "step": 1730 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019669766559119378, + "loss": 8.0061, + "step": 1740 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019667868665780984, + "loss": 7.9196, + "step": 1750 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001966597077244259, + "loss": 7.9596, + "step": 1760 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019664072879104196, + "loss": 7.8292, + "step": 1770 + }, + { + "epoch": 0.17, + "learning_rate": 0.000196621749857658, + "loss": 7.9823, + "step": 1780 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019660277092427406, + "loss": 7.9388, + "step": 1790 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019658379199089012, + "loss": 8.0311, + "step": 1800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019656481305750618, + "loss": 7.9965, + "step": 1810 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019654583412412224, + "loss": 7.92, + "step": 1820 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019652685519073827, + "loss": 7.9755, + "step": 1830 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019650787625735434, + "loss": 7.9663, + "step": 1840 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001964888973239704, + "loss": 7.9034, + "step": 1850 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019646991839058646, + "loss": 7.9657, + "step": 1860 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019645093945720252, + "loss": 8.0662, + "step": 1870 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019643196052381858, + "loss": 8.0137, + "step": 1880 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019641298159043462, + "loss": 7.9988, + "step": 1890 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019639400265705068, + "loss": 7.9998, + "step": 1900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019637502372366674, + "loss": 7.9599, + "step": 1910 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963560447902828, + "loss": 7.851, + "step": 1920 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019633706585689886, + "loss": 7.8906, + "step": 1930 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963180869235149, + "loss": 7.9987, + "step": 1940 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019629910799013096, + "loss": 7.949, + "step": 1950 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019628012905674702, + "loss": 8.0121, + "step": 1960 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019626115012336308, + "loss": 7.9445, + "step": 1970 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019624217118997914, + "loss": 7.9355, + "step": 1980 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019622319225659517, + "loss": 7.9748, + "step": 1990 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019620421332321124, + "loss": 7.9165, + "step": 2000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001961852343898273, + "loss": 7.9001, + "step": 2010 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019616625545644336, + "loss": 7.9579, + "step": 2020 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019614727652305942, + "loss": 8.0491, + "step": 2030 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019612829758967548, + "loss": 7.9823, + "step": 2040 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019610931865629151, + "loss": 7.9317, + "step": 2050 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019609033972290758, + "loss": 7.981, + "step": 2060 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019607136078952364, + "loss": 7.9837, + "step": 2070 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001960523818561397, + "loss": 7.9299, + "step": 2080 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019603340292275576, + "loss": 7.885, + "step": 2090 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001960144239893718, + "loss": 7.9707, + "step": 2100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019599544505598785, + "loss": 8.0994, + "step": 2110 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019597646612260392, + "loss": 7.9866, + "step": 2120 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019595748718921998, + "loss": 8.0013, + "step": 2130 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019593850825583604, + "loss": 7.9703, + "step": 2140 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019591952932245207, + "loss": 7.8933, + "step": 2150 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019590055038906813, + "loss": 7.9128, + "step": 2160 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001958815714556842, + "loss": 7.9134, + "step": 2170 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019586259252230026, + "loss": 7.9388, + "step": 2180 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019584361358891632, + "loss": 7.8159, + "step": 2190 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019582463465553235, + "loss": 7.966, + "step": 2200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001958056557221484, + "loss": 7.9638, + "step": 2210 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019578667678876447, + "loss": 7.9076, + "step": 2220 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019576769785538054, + "loss": 7.8966, + "step": 2230 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001957487189219966, + "loss": 8.0228, + "step": 2240 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019572973998861266, + "loss": 7.963, + "step": 2250 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001957107610552287, + "loss": 7.9361, + "step": 2260 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019569178212184475, + "loss": 8.0444, + "step": 2270 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019567280318846082, + "loss": 7.9489, + "step": 2280 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019565382425507688, + "loss": 8.0002, + "step": 2290 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019563484532169294, + "loss": 7.983, + "step": 2300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019561586638830897, + "loss": 7.9444, + "step": 2310 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019559688745492503, + "loss": 7.9221, + "step": 2320 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001955779085215411, + "loss": 8.0789, + "step": 2330 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019555892958815716, + "loss": 7.9703, + "step": 2340 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019553995065477322, + "loss": 7.8596, + "step": 2350 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019552097172138925, + "loss": 8.0037, + "step": 2360 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001955019927880053, + "loss": 7.919, + "step": 2370 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019548301385462137, + "loss": 7.9221, + "step": 2380 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019546403492123744, + "loss": 7.9257, + "step": 2390 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001954450559878535, + "loss": 7.8881, + "step": 2400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019542607705446956, + "loss": 7.9372, + "step": 2410 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001954070981210856, + "loss": 7.9647, + "step": 2420 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019538811918770165, + "loss": 8.0126, + "step": 2430 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019536914025431771, + "loss": 7.9594, + "step": 2440 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019535016132093378, + "loss": 7.917, + "step": 2450 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019533118238754984, + "loss": 8.013, + "step": 2460 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019531220345416587, + "loss": 7.9512, + "step": 2470 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019529322452078193, + "loss": 7.9876, + "step": 2480 + }, + { + "epoch": 0.24, + "learning_rate": 0.000195274245587398, + "loss": 7.9838, + "step": 2490 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019525526665401405, + "loss": 7.9647, + "step": 2500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019523628772063012, + "loss": 7.8661, + "step": 2510 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019521730878724615, + "loss": 7.9554, + "step": 2520 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001951983298538622, + "loss": 7.9141, + "step": 2530 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019517935092047827, + "loss": 7.939, + "step": 2540 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019516037198709433, + "loss": 8.0098, + "step": 2550 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001951413930537104, + "loss": 7.9881, + "step": 2560 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019512241412032646, + "loss": 7.9937, + "step": 2570 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001951034351869425, + "loss": 7.9844, + "step": 2580 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019508445625355855, + "loss": 8.043, + "step": 2590 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001950654773201746, + "loss": 7.9513, + "step": 2600 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019504649838679067, + "loss": 7.9174, + "step": 2610 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019502751945340674, + "loss": 7.9746, + "step": 2620 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019500854052002277, + "loss": 7.9344, + "step": 2630 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019498956158663883, + "loss": 7.8564, + "step": 2640 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001949705826532549, + "loss": 7.8934, + "step": 2650 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019495160371987095, + "loss": 7.8338, + "step": 2660 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019493262478648702, + "loss": 7.9002, + "step": 2670 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019491364585310305, + "loss": 7.957, + "step": 2680 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001948946669197191, + "loss": 7.8777, + "step": 2690 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019487568798633517, + "loss": 8.0054, + "step": 2700 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019485670905295123, + "loss": 7.9225, + "step": 2710 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001948377301195673, + "loss": 7.9895, + "step": 2720 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019481875118618333, + "loss": 7.9711, + "step": 2730 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001947997722527994, + "loss": 7.9268, + "step": 2740 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019478079331941545, + "loss": 8.0199, + "step": 2750 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001947618143860315, + "loss": 7.9409, + "step": 2760 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019474283545264757, + "loss": 7.7781, + "step": 2770 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019472385651926364, + "loss": 7.787, + "step": 2780 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019470487758587967, + "loss": 8.0509, + "step": 2790 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019468589865249573, + "loss": 7.8997, + "step": 2800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001946669197191118, + "loss": 8.0071, + "step": 2810 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019464794078572785, + "loss": 7.9269, + "step": 2820 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019462896185234391, + "loss": 7.9551, + "step": 2830 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019460998291895995, + "loss": 7.9637, + "step": 2840 + }, + { + "epoch": 0.27, + "learning_rate": 0.000194591003985576, + "loss": 7.9432, + "step": 2850 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019457202505219207, + "loss": 7.8711, + "step": 2860 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019455304611880813, + "loss": 7.8302, + "step": 2870 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001945340671854242, + "loss": 7.8777, + "step": 2880 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019451508825204023, + "loss": 7.9643, + "step": 2890 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001944961093186563, + "loss": 7.8927, + "step": 2900 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019447713038527235, + "loss": 7.9123, + "step": 2910 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001944581514518884, + "loss": 7.9644, + "step": 2920 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019443917251850447, + "loss": 7.891, + "step": 2930 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019442019358512053, + "loss": 8.0191, + "step": 2940 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019440121465173657, + "loss": 7.8945, + "step": 2950 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019438223571835263, + "loss": 7.9478, + "step": 2960 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001943632567849687, + "loss": 7.9027, + "step": 2970 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019434427785158475, + "loss": 7.8986, + "step": 2980 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019432529891820081, + "loss": 8.0644, + "step": 2990 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019430631998481685, + "loss": 7.9844, + "step": 3000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001942873410514329, + "loss": 7.9241, + "step": 3010 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019426836211804897, + "loss": 7.8227, + "step": 3020 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019424938318466503, + "loss": 7.8803, + "step": 3030 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001942304042512811, + "loss": 7.9341, + "step": 3040 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019421142531789713, + "loss": 7.927, + "step": 3050 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001941924463845132, + "loss": 7.9502, + "step": 3060 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019417346745112925, + "loss": 7.921, + "step": 3070 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001941544885177453, + "loss": 7.9221, + "step": 3080 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019413550958436137, + "loss": 7.8924, + "step": 3090 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019411653065097743, + "loss": 8.0156, + "step": 3100 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019409755171759347, + "loss": 7.9395, + "step": 3110 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019407857278420953, + "loss": 7.9926, + "step": 3120 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001940595938508256, + "loss": 7.9102, + "step": 3130 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019404061491744165, + "loss": 7.8352, + "step": 3140 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001940216359840577, + "loss": 7.8719, + "step": 3150 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019400265705067375, + "loss": 7.9277, + "step": 3160 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001939836781172898, + "loss": 7.9376, + "step": 3170 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019396469918390587, + "loss": 7.9999, + "step": 3180 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019394572025052193, + "loss": 7.8309, + "step": 3190 + }, + { + "epoch": 0.3, + "learning_rate": 0.000193926741317138, + "loss": 7.8541, + "step": 3200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019390776238375403, + "loss": 7.9434, + "step": 3210 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001938887834503701, + "loss": 7.9548, + "step": 3220 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019386980451698615, + "loss": 7.9358, + "step": 3230 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001938508255836022, + "loss": 7.9035, + "step": 3240 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019383184665021827, + "loss": 7.934, + "step": 3250 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001938128677168343, + "loss": 7.9353, + "step": 3260 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019379388878345037, + "loss": 7.9028, + "step": 3270 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019377490985006643, + "loss": 7.9257, + "step": 3280 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001937559309166825, + "loss": 7.8557, + "step": 3290 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019373695198329855, + "loss": 7.8651, + "step": 3300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001937179730499146, + "loss": 7.901, + "step": 3310 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019369899411653065, + "loss": 7.9915, + "step": 3320 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001936800151831467, + "loss": 7.9375, + "step": 3330 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019366103624976277, + "loss": 7.8321, + "step": 3340 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019364205731637883, + "loss": 7.8932, + "step": 3350 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001936230783829949, + "loss": 7.9586, + "step": 3360 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019360409944961093, + "loss": 7.8609, + "step": 3370 + }, + { + "epoch": 0.32, + "learning_rate": 0.000193585120516227, + "loss": 7.9284, + "step": 3380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019356614158284305, + "loss": 7.877, + "step": 3390 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001935471626494591, + "loss": 7.9125, + "step": 3400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019352818371607517, + "loss": 7.8638, + "step": 3410 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001935092047826912, + "loss": 7.9896, + "step": 3420 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019349022584930727, + "loss": 8.0264, + "step": 3430 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019347124691592333, + "loss": 7.9667, + "step": 3440 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001934522679825394, + "loss": 7.7931, + "step": 3450 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019343328904915545, + "loss": 8.0166, + "step": 3460 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001934143101157715, + "loss": 7.844, + "step": 3470 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019339533118238755, + "loss": 7.9468, + "step": 3480 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001933763522490036, + "loss": 7.9775, + "step": 3490 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019335737331561967, + "loss": 7.8543, + "step": 3500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019333839438223573, + "loss": 7.8744, + "step": 3510 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001933194154488518, + "loss": 7.8954, + "step": 3520 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019330043651546782, + "loss": 7.9898, + "step": 3530 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019328145758208389, + "loss": 7.8642, + "step": 3540 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019326247864869995, + "loss": 8.0018, + "step": 3550 + }, + { + "epoch": 0.34, + "learning_rate": 0.000193243499715316, + "loss": 8.0016, + "step": 3560 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019322452078193207, + "loss": 8.0801, + "step": 3570 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001932055418485481, + "loss": 8.0127, + "step": 3580 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019318656291516416, + "loss": 7.8582, + "step": 3590 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019316758398178023, + "loss": 7.9344, + "step": 3600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001931486050483963, + "loss": 7.953, + "step": 3610 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019312962611501235, + "loss": 7.9068, + "step": 3620 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001931106471816284, + "loss": 7.9032, + "step": 3630 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019309166824824444, + "loss": 7.8135, + "step": 3640 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001930726893148605, + "loss": 7.8799, + "step": 3650 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019305371038147657, + "loss": 7.9762, + "step": 3660 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019303473144809263, + "loss": 7.9839, + "step": 3670 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001930157525147087, + "loss": 7.8639, + "step": 3680 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019299677358132472, + "loss": 8.0189, + "step": 3690 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019297779464794078, + "loss": 7.9332, + "step": 3700 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019295881571455685, + "loss": 8.1368, + "step": 3710 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001929398367811729, + "loss": 7.8899, + "step": 3720 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019292085784778897, + "loss": 7.9733, + "step": 3730 + }, + { + "epoch": 0.35, + "learning_rate": 0.000192901878914405, + "loss": 8.0364, + "step": 3740 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019288289998102106, + "loss": 7.9229, + "step": 3750 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019286392104763713, + "loss": 7.9838, + "step": 3760 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001928449421142532, + "loss": 7.7698, + "step": 3770 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019282596318086925, + "loss": 7.8598, + "step": 3780 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001928069842474853, + "loss": 8.0224, + "step": 3790 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019278800531410134, + "loss": 7.899, + "step": 3800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001927690263807174, + "loss": 7.8593, + "step": 3810 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019275004744733347, + "loss": 7.9592, + "step": 3820 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019273106851394953, + "loss": 7.9444, + "step": 3830 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001927120895805656, + "loss": 7.9079, + "step": 3840 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019269311064718162, + "loss": 7.9192, + "step": 3850 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019267413171379768, + "loss": 7.8443, + "step": 3860 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019265515278041375, + "loss": 7.7676, + "step": 3870 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001926361738470298, + "loss": 7.9781, + "step": 3880 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019261719491364587, + "loss": 7.9639, + "step": 3890 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001925982159802619, + "loss": 7.9449, + "step": 3900 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019257923704687796, + "loss": 7.9286, + "step": 3910 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019256025811349402, + "loss": 7.9555, + "step": 3920 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019254127918011009, + "loss": 7.8571, + "step": 3930 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019252230024672615, + "loss": 7.9362, + "step": 3940 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019250332131334218, + "loss": 7.8598, + "step": 3950 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019248434237995824, + "loss": 7.9286, + "step": 3960 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001924653634465743, + "loss": 7.9102, + "step": 3970 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019244638451319037, + "loss": 8.0461, + "step": 3980 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019242740557980643, + "loss": 8.019, + "step": 3990 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001924084266464225, + "loss": 7.9759, + "step": 4000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019238944771303852, + "loss": 7.8909, + "step": 4010 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019237046877965458, + "loss": 7.8641, + "step": 4020 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019235148984627064, + "loss": 7.9116, + "step": 4030 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001923325109128867, + "loss": 8.1006, + "step": 4040 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019231353197950277, + "loss": 7.9186, + "step": 4050 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001922945530461188, + "loss": 7.9467, + "step": 4060 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019227557411273486, + "loss": 7.9013, + "step": 4070 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019225659517935092, + "loss": 7.8616, + "step": 4080 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019223761624596698, + "loss": 7.972, + "step": 4090 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019221863731258305, + "loss": 7.8126, + "step": 4100 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019219965837919908, + "loss": 7.9782, + "step": 4110 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019218067944581514, + "loss": 7.8078, + "step": 4120 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001921617005124312, + "loss": 7.9655, + "step": 4130 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019214272157904726, + "loss": 7.914, + "step": 4140 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019212374264566333, + "loss": 7.9165, + "step": 4150 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001921047637122794, + "loss": 7.8859, + "step": 4160 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019208578477889542, + "loss": 7.92, + "step": 4170 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019206680584551148, + "loss": 7.8548, + "step": 4180 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019204782691212754, + "loss": 7.8462, + "step": 4190 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001920288479787436, + "loss": 7.9479, + "step": 4200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019200986904535967, + "loss": 7.9687, + "step": 4210 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001919908901119757, + "loss": 7.8412, + "step": 4220 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019197191117859176, + "loss": 7.9112, + "step": 4230 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019195293224520782, + "loss": 7.8358, + "step": 4240 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019193395331182388, + "loss": 7.9411, + "step": 4250 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019191497437843995, + "loss": 7.8077, + "step": 4260 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019189599544505598, + "loss": 7.9192, + "step": 4270 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019187701651167204, + "loss": 7.9694, + "step": 4280 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001918580375782881, + "loss": 7.8397, + "step": 4290 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019183905864490416, + "loss": 7.8919, + "step": 4300 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019182007971152022, + "loss": 7.9859, + "step": 4310 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019180110077813629, + "loss": 7.8927, + "step": 4320 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019178212184475232, + "loss": 7.9667, + "step": 4330 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019176314291136838, + "loss": 7.9561, + "step": 4340 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019174416397798444, + "loss": 7.8562, + "step": 4350 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001917251850446005, + "loss": 7.8994, + "step": 4360 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019170620611121657, + "loss": 7.921, + "step": 4370 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001916872271778326, + "loss": 7.8508, + "step": 4380 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019166824824444866, + "loss": 7.9523, + "step": 4390 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019164926931106472, + "loss": 7.8375, + "step": 4400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019163029037768078, + "loss": 7.9494, + "step": 4410 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019161131144429684, + "loss": 7.9264, + "step": 4420 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019159233251091288, + "loss": 7.8938, + "step": 4430 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019157335357752894, + "loss": 7.898, + "step": 4440 + }, + { + "epoch": 0.42, + "learning_rate": 0.000191554374644145, + "loss": 7.8928, + "step": 4450 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019153539571076106, + "loss": 7.9245, + "step": 4460 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019151641677737712, + "loss": 7.994, + "step": 4470 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019149743784399316, + "loss": 7.9778, + "step": 4480 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019147845891060922, + "loss": 7.8502, + "step": 4490 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019145947997722528, + "loss": 7.8904, + "step": 4500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019144050104384134, + "loss": 7.9738, + "step": 4510 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001914215221104574, + "loss": 7.8975, + "step": 4520 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019140254317707346, + "loss": 7.8833, + "step": 4530 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001913835642436895, + "loss": 7.9091, + "step": 4540 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019136458531030556, + "loss": 8.0118, + "step": 4550 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019134560637692162, + "loss": 7.9777, + "step": 4560 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019132662744353768, + "loss": 7.8288, + "step": 4570 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019130764851015374, + "loss": 7.864, + "step": 4580 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019128866957676978, + "loss": 7.9451, + "step": 4590 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019126969064338584, + "loss": 7.9331, + "step": 4600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001912507117100019, + "loss": 7.958, + "step": 4610 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019123173277661796, + "loss": 7.8932, + "step": 4620 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019121275384323402, + "loss": 7.9253, + "step": 4630 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019119377490985006, + "loss": 7.849, + "step": 4640 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019117479597646612, + "loss": 7.9173, + "step": 4650 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019115581704308218, + "loss": 7.8063, + "step": 4660 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019113683810969824, + "loss": 7.8727, + "step": 4670 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001911178591763143, + "loss": 7.9849, + "step": 4680 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019109888024293036, + "loss": 7.917, + "step": 4690 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001910799013095464, + "loss": 7.8493, + "step": 4700 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019106092237616246, + "loss": 7.9853, + "step": 4710 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019104194344277852, + "loss": 7.8777, + "step": 4720 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019102296450939458, + "loss": 8.0427, + "step": 4730 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019100398557601064, + "loss": 7.9599, + "step": 4740 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019098500664262668, + "loss": 8.0004, + "step": 4750 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019096602770924274, + "loss": 7.8401, + "step": 4760 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001909470487758588, + "loss": 7.9153, + "step": 4770 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019092806984247486, + "loss": 7.9396, + "step": 4780 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019090909090909092, + "loss": 7.9395, + "step": 4790 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019089011197570696, + "loss": 7.9248, + "step": 4800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019087113304232302, + "loss": 7.983, + "step": 4810 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019085215410893908, + "loss": 7.9105, + "step": 4820 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019083317517555514, + "loss": 7.8619, + "step": 4830 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001908141962421712, + "loss": 7.8528, + "step": 4840 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019079521730878726, + "loss": 7.8596, + "step": 4850 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001907762383754033, + "loss": 7.82, + "step": 4860 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019075725944201936, + "loss": 7.8662, + "step": 4870 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019073828050863542, + "loss": 7.8878, + "step": 4880 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019071930157525148, + "loss": 7.9529, + "step": 4890 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019070032264186754, + "loss": 7.8718, + "step": 4900 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019068134370848358, + "loss": 7.9205, + "step": 4910 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019066236477509964, + "loss": 7.8675, + "step": 4920 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001906433858417157, + "loss": 7.9425, + "step": 4930 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019062440690833176, + "loss": 7.8226, + "step": 4940 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019060542797494782, + "loss": 7.9104, + "step": 4950 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019058644904156386, + "loss": 7.9396, + "step": 4960 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019056747010817992, + "loss": 7.9223, + "step": 4970 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019054849117479598, + "loss": 7.8131, + "step": 4980 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019052951224141204, + "loss": 7.8441, + "step": 4990 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001905105333080281, + "loss": 7.8416, + "step": 5000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019049155437464413, + "loss": 7.977, + "step": 5010 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001904725754412602, + "loss": 7.9648, + "step": 5020 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019045359650787626, + "loss": 7.8183, + "step": 5030 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019043461757449232, + "loss": 7.9049, + "step": 5040 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019041563864110838, + "loss": 7.9121, + "step": 5050 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019039665970772444, + "loss": 7.9363, + "step": 5060 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019037768077434048, + "loss": 7.7851, + "step": 5070 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019035870184095654, + "loss": 7.9085, + "step": 5080 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001903397229075726, + "loss": 7.931, + "step": 5090 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019032074397418866, + "loss": 7.9747, + "step": 5100 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019030176504080472, + "loss": 7.8948, + "step": 5110 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019028278610742075, + "loss": 7.8823, + "step": 5120 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019026380717403682, + "loss": 7.9353, + "step": 5130 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019024482824065288, + "loss": 7.8635, + "step": 5140 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019022584930726894, + "loss": 7.954, + "step": 5150 + }, + { + "epoch": 0.49, + "learning_rate": 0.000190206870373885, + "loss": 7.8627, + "step": 5160 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019018789144050103, + "loss": 7.8782, + "step": 5170 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001901689125071171, + "loss": 7.9441, + "step": 5180 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019014993357373316, + "loss": 7.8237, + "step": 5190 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019013095464034922, + "loss": 7.9041, + "step": 5200 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019011197570696528, + "loss": 7.8748, + "step": 5210 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019009299677358134, + "loss": 7.8806, + "step": 5220 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019007401784019737, + "loss": 7.9769, + "step": 5230 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019005503890681344, + "loss": 7.9438, + "step": 5240 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001900360599734295, + "loss": 7.9547, + "step": 5250 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019001708104004556, + "loss": 7.7906, + "step": 5260 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018999810210666162, + "loss": 7.9561, + "step": 5270 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018997912317327765, + "loss": 7.9207, + "step": 5280 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018996014423989371, + "loss": 7.9381, + "step": 5290 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018994116530650978, + "loss": 7.8762, + "step": 5300 + }, + { + "epoch": 0.5, + "learning_rate": 0.00018992218637312584, + "loss": 8.0111, + "step": 5310 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001899032074397419, + "loss": 7.9814, + "step": 5320 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018988422850635793, + "loss": 7.9152, + "step": 5330 + }, + { + "epoch": 0.51, + "learning_rate": 0.000189865249572974, + "loss": 7.8724, + "step": 5340 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018984627063959006, + "loss": 8.0027, + "step": 5350 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018982729170620612, + "loss": 7.8769, + "step": 5360 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018980831277282218, + "loss": 7.864, + "step": 5370 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018978933383943824, + "loss": 7.8941, + "step": 5380 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018977035490605427, + "loss": 7.9021, + "step": 5390 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018975137597267033, + "loss": 7.7893, + "step": 5400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001897323970392864, + "loss": 7.8462, + "step": 5410 + }, + { + "epoch": 0.51, + "learning_rate": 0.00018971341810590246, + "loss": 7.8329, + "step": 5420 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018969443917251852, + "loss": 7.9414, + "step": 5430 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018967546023913455, + "loss": 7.9004, + "step": 5440 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018965648130575061, + "loss": 7.911, + "step": 5450 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018963750237236668, + "loss": 7.9274, + "step": 5460 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018961852343898274, + "loss": 7.899, + "step": 5470 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001895995445055988, + "loss": 7.8951, + "step": 5480 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018958056557221483, + "loss": 7.9019, + "step": 5490 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001895615866388309, + "loss": 7.9009, + "step": 5500 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018954260770544695, + "loss": 7.9239, + "step": 5510 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018952362877206302, + "loss": 7.9005, + "step": 5520 + }, + { + "epoch": 0.52, + "learning_rate": 0.00018950464983867908, + "loss": 7.9392, + "step": 5530 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001894856709052951, + "loss": 7.8666, + "step": 5540 + }, + { + "epoch": 0.53, + "learning_rate": 0.00018946669197191117, + "loss": 7.8467, + "step": 5550 + }, + { + "epoch": 0.53, + "learning_rate": 0.00018944771303852723, + "loss": 7.9866, + "step": 5560 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001894287341051433, + "loss": 7.9338, + "step": 5570 + }, + { + "epoch": 0.53, + "learning_rate": 0.00018940975517175936, + "loss": 7.9074, + "step": 5580 + }, + { + "epoch": 0.53, + "learning_rate": 0.00018939077623837542, + "loss": 7.8457, + "step": 5590 + }, + { + "epoch": 0.53, + "learning_rate": 0.00018937179730499145, + "loss": 7.9165, + "step": 5600 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001893528183716075, + "loss": 7.8931, + "step": 5610 + }, + { + "epoch": 0.53, + "learning_rate": 0.00018933383943822357, + "loss": 7.9497, + "step": 5620 + }, + { + "epoch": 0.53, + "learning_rate": 0.00018931486050483964, + "loss": 7.9797, + "step": 5630 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001892958815714557, + "loss": 7.8169, + "step": 5640 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018927690263807173, + "loss": 7.9766, + "step": 5650 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001892579237046878, + "loss": 7.8768, + "step": 5660 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018923894477130385, + "loss": 7.8953, + "step": 5670 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018921996583791991, + "loss": 7.851, + "step": 5680 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018920098690453598, + "loss": 7.9436, + "step": 5690 + }, + { + "epoch": 0.54, + "learning_rate": 0.000189182007971152, + "loss": 7.882, + "step": 5700 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018916302903776807, + "loss": 7.8709, + "step": 5710 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018914405010438413, + "loss": 7.8128, + "step": 5720 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001891250711710002, + "loss": 7.9213, + "step": 5730 + }, + { + "epoch": 0.54, + "learning_rate": 0.00018910609223761626, + "loss": 7.8444, + "step": 5740 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018908711330423232, + "loss": 7.9045, + "step": 5750 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018906813437084835, + "loss": 8.0001, + "step": 5760 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001890491554374644, + "loss": 7.9054, + "step": 5770 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018903017650408047, + "loss": 7.9683, + "step": 5780 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018901119757069653, + "loss": 7.8151, + "step": 5790 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001889922186373126, + "loss": 7.8192, + "step": 5800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018897323970392863, + "loss": 7.9276, + "step": 5810 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001889542607705447, + "loss": 7.9805, + "step": 5820 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018893528183716075, + "loss": 7.8192, + "step": 5830 + }, + { + "epoch": 0.55, + "learning_rate": 0.00018891630290377681, + "loss": 7.9176, + "step": 5840 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018889732397039288, + "loss": 7.8999, + "step": 5850 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001888783450370089, + "loss": 7.8994, + "step": 5860 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018885936610362497, + "loss": 7.8313, + "step": 5870 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018884038717024103, + "loss": 7.924, + "step": 5880 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001888214082368571, + "loss": 7.8946, + "step": 5890 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018880242930347315, + "loss": 7.9005, + "step": 5900 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018878345037008922, + "loss": 7.8146, + "step": 5910 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018876447143670525, + "loss": 7.916, + "step": 5920 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001887454925033213, + "loss": 7.9194, + "step": 5930 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018872651356993737, + "loss": 7.9068, + "step": 5940 + }, + { + "epoch": 0.56, + "learning_rate": 0.00018870753463655343, + "loss": 7.8296, + "step": 5950 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001886885557031695, + "loss": 7.9821, + "step": 5960 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018866957676978553, + "loss": 7.9248, + "step": 5970 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001886505978364016, + "loss": 7.9169, + "step": 5980 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018863161890301765, + "loss": 7.9041, + "step": 5990 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001886126399696337, + "loss": 7.9649, + "step": 6000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018859366103624977, + "loss": 7.9127, + "step": 6010 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001885746821028658, + "loss": 8.0069, + "step": 6020 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018855570316948187, + "loss": 7.9172, + "step": 6030 + }, + { + "epoch": 0.57, + "learning_rate": 0.00018853672423609793, + "loss": 7.83, + "step": 6040 + }, + { + "epoch": 0.57, + "learning_rate": 0.000188517745302714, + "loss": 8.0185, + "step": 6050 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018849876636933005, + "loss": 7.96, + "step": 6060 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001884797874359461, + "loss": 7.8452, + "step": 6070 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018846080850256215, + "loss": 7.9495, + "step": 6080 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001884418295691782, + "loss": 7.8226, + "step": 6090 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018842285063579427, + "loss": 7.8803, + "step": 6100 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018840387170241033, + "loss": 7.887, + "step": 6110 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001883848927690264, + "loss": 7.7835, + "step": 6120 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018836591383564243, + "loss": 7.8716, + "step": 6130 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001883469349022585, + "loss": 7.8583, + "step": 6140 + }, + { + "epoch": 0.58, + "learning_rate": 0.00018832795596887455, + "loss": 7.925, + "step": 6150 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001883089770354906, + "loss": 7.885, + "step": 6160 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018828999810210667, + "loss": 7.8988, + "step": 6170 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001882710191687227, + "loss": 7.8836, + "step": 6180 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018825204023533877, + "loss": 7.8954, + "step": 6190 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018823306130195483, + "loss": 7.8208, + "step": 6200 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001882140823685709, + "loss": 7.8342, + "step": 6210 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018819510343518695, + "loss": 7.9022, + "step": 6220 + }, + { + "epoch": 0.59, + "learning_rate": 0.000188176124501803, + "loss": 7.8636, + "step": 6230 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018815714556841905, + "loss": 7.8913, + "step": 6240 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001881381666350351, + "loss": 7.8727, + "step": 6250 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018811918770165117, + "loss": 7.7914, + "step": 6260 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018810020876826723, + "loss": 7.9805, + "step": 6270 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001880812298348833, + "loss": 7.9156, + "step": 6280 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018806225090149933, + "loss": 7.8462, + "step": 6290 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001880432719681154, + "loss": 7.8603, + "step": 6300 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018802429303473145, + "loss": 7.9177, + "step": 6310 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001880053141013475, + "loss": 8.0463, + "step": 6320 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018798633516796357, + "loss": 7.8587, + "step": 6330 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001879673562345796, + "loss": 7.8777, + "step": 6340 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018794837730119567, + "loss": 7.9968, + "step": 6350 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018792939836781173, + "loss": 7.8663, + "step": 6360 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001879104194344278, + "loss": 7.959, + "step": 6370 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018789144050104385, + "loss": 7.8974, + "step": 6380 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018787246156765989, + "loss": 7.9797, + "step": 6390 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018785348263427595, + "loss": 7.9455, + "step": 6400 + }, + { + "epoch": 0.61, + "learning_rate": 0.000187834503700892, + "loss": 7.8779, + "step": 6410 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018781552476750807, + "loss": 7.9653, + "step": 6420 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018779654583412413, + "loss": 7.9065, + "step": 6430 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001877775669007402, + "loss": 7.9129, + "step": 6440 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018775858796735623, + "loss": 7.8326, + "step": 6450 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001877396090339723, + "loss": 7.8568, + "step": 6460 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018772063010058835, + "loss": 7.8862, + "step": 6470 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001877016511672044, + "loss": 7.9105, + "step": 6480 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018768267223382047, + "loss": 7.9036, + "step": 6490 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001876636933004365, + "loss": 7.8985, + "step": 6500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018764471436705257, + "loss": 7.9402, + "step": 6510 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018762573543366863, + "loss": 7.9015, + "step": 6520 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001876067565002847, + "loss": 7.9234, + "step": 6530 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018758777756690075, + "loss": 7.927, + "step": 6540 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018756879863351679, + "loss": 7.9412, + "step": 6550 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018754981970013285, + "loss": 7.8886, + "step": 6560 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001875308407667489, + "loss": 7.8395, + "step": 6570 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018751186183336497, + "loss": 7.8293, + "step": 6580 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018749288289998103, + "loss": 7.9124, + "step": 6590 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018747390396659706, + "loss": 7.903, + "step": 6600 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018745492503321313, + "loss": 7.9087, + "step": 6610 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001874359460998292, + "loss": 7.9055, + "step": 6620 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018741696716644525, + "loss": 7.9397, + "step": 6630 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001873979882330613, + "loss": 7.8537, + "step": 6640 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018737900929967737, + "loss": 7.9236, + "step": 6650 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001873600303662934, + "loss": 7.8514, + "step": 6660 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018734105143290947, + "loss": 7.9212, + "step": 6670 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018732207249952553, + "loss": 7.8364, + "step": 6680 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001873030935661416, + "loss": 7.8725, + "step": 6690 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018728411463275765, + "loss": 7.8552, + "step": 6700 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018726513569937368, + "loss": 7.8767, + "step": 6710 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018724615676598975, + "loss": 7.8403, + "step": 6720 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001872271778326058, + "loss": 7.9066, + "step": 6730 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018720819889922187, + "loss": 7.8817, + "step": 6740 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018718921996583793, + "loss": 7.9498, + "step": 6750 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018717024103245396, + "loss": 7.9311, + "step": 6760 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018715126209907002, + "loss": 7.7898, + "step": 6770 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018713228316568609, + "loss": 7.9264, + "step": 6780 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018711330423230215, + "loss": 7.827, + "step": 6790 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001870943252989182, + "loss": 7.9209, + "step": 6800 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018707534636553427, + "loss": 7.8619, + "step": 6810 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001870563674321503, + "loss": 7.8741, + "step": 6820 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018703738849876637, + "loss": 7.806, + "step": 6830 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018701840956538243, + "loss": 7.7915, + "step": 6840 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001869994306319985, + "loss": 7.9415, + "step": 6850 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018698045169861455, + "loss": 7.8782, + "step": 6860 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018696147276523058, + "loss": 7.9309, + "step": 6870 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018694249383184664, + "loss": 7.897, + "step": 6880 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001869235148984627, + "loss": 7.7669, + "step": 6890 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018690453596507877, + "loss": 7.8783, + "step": 6900 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018688555703169483, + "loss": 7.9449, + "step": 6910 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018686657809831086, + "loss": 7.839, + "step": 6920 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018684759916492692, + "loss": 7.864, + "step": 6930 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018682862023154299, + "loss": 7.8597, + "step": 6940 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018680964129815905, + "loss": 7.7478, + "step": 6950 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001867906623647751, + "loss": 7.835, + "step": 6960 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018677168343139117, + "loss": 7.8262, + "step": 6970 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001867527044980072, + "loss": 7.9301, + "step": 6980 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018673372556462326, + "loss": 7.9125, + "step": 6990 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018671474663123933, + "loss": 7.7855, + "step": 7000 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001866957676978554, + "loss": 7.9173, + "step": 7010 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018667678876447145, + "loss": 7.8071, + "step": 7020 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018665780983108748, + "loss": 7.8352, + "step": 7030 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018663883089770354, + "loss": 8.0009, + "step": 7040 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001866198519643196, + "loss": 7.8807, + "step": 7050 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018660087303093567, + "loss": 7.9295, + "step": 7060 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018658189409755173, + "loss": 7.9473, + "step": 7070 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018656291516416776, + "loss": 7.7944, + "step": 7080 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018654393623078382, + "loss": 7.929, + "step": 7090 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018652495729739988, + "loss": 7.9429, + "step": 7100 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018650597836401595, + "loss": 7.8215, + "step": 7110 + }, + { + "epoch": 0.68, + "learning_rate": 0.000186486999430632, + "loss": 7.9725, + "step": 7120 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018646802049724804, + "loss": 7.8404, + "step": 7130 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001864490415638641, + "loss": 7.8327, + "step": 7140 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018643006263048016, + "loss": 7.9532, + "step": 7150 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018641108369709622, + "loss": 7.8497, + "step": 7160 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018639210476371229, + "loss": 7.9127, + "step": 7170 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018637312583032835, + "loss": 7.8786, + "step": 7180 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018635414689694438, + "loss": 7.9397, + "step": 7190 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018633516796356044, + "loss": 7.9493, + "step": 7200 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001863161890301765, + "loss": 7.9002, + "step": 7210 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018629721009679257, + "loss": 7.8782, + "step": 7220 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018627823116340863, + "loss": 7.8344, + "step": 7230 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018625925223002466, + "loss": 7.8414, + "step": 7240 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018624027329664072, + "loss": 8.0188, + "step": 7250 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018622129436325678, + "loss": 8.009, + "step": 7260 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018620231542987284, + "loss": 7.8582, + "step": 7270 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001861833364964889, + "loss": 7.9226, + "step": 7280 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018616435756310494, + "loss": 7.7506, + "step": 7290 + }, + { + "epoch": 0.69, + "learning_rate": 0.000186145378629721, + "loss": 7.8297, + "step": 7300 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018612639969633706, + "loss": 7.8306, + "step": 7310 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018610742076295312, + "loss": 7.9155, + "step": 7320 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018608844182956919, + "loss": 7.906, + "step": 7330 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018606946289618525, + "loss": 7.8682, + "step": 7340 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018605048396280128, + "loss": 7.8501, + "step": 7350 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018603150502941734, + "loss": 7.8741, + "step": 7360 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001860125260960334, + "loss": 7.8392, + "step": 7370 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018599354716264946, + "loss": 7.9128, + "step": 7380 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018597456822926553, + "loss": 7.9862, + "step": 7390 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018595558929588156, + "loss": 7.8779, + "step": 7400 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018593661036249762, + "loss": 7.8301, + "step": 7410 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018591763142911368, + "loss": 7.8788, + "step": 7420 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018589865249572974, + "loss": 7.9279, + "step": 7430 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001858796735623458, + "loss": 7.8394, + "step": 7440 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018586069462896184, + "loss": 7.8696, + "step": 7450 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001858417156955779, + "loss": 7.9632, + "step": 7460 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018582273676219396, + "loss": 7.8598, + "step": 7470 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018580375782881002, + "loss": 7.9166, + "step": 7480 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018578477889542608, + "loss": 7.9847, + "step": 7490 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018576579996204215, + "loss": 7.9102, + "step": 7500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018574682102865818, + "loss": 7.8375, + "step": 7510 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018572784209527424, + "loss": 7.8349, + "step": 7520 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001857088631618903, + "loss": 7.8491, + "step": 7530 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018568988422850636, + "loss": 7.9786, + "step": 7540 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018567090529512242, + "loss": 7.9327, + "step": 7550 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018565192636173846, + "loss": 7.77, + "step": 7560 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018563294742835452, + "loss": 7.9486, + "step": 7570 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018561396849497058, + "loss": 7.8833, + "step": 7580 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018559498956158664, + "loss": 7.9048, + "step": 7590 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001855760106282027, + "loss": 7.874, + "step": 7600 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018555703169481874, + "loss": 7.8719, + "step": 7610 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001855380527614348, + "loss": 7.851, + "step": 7620 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018551907382805086, + "loss": 7.9377, + "step": 7630 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018550009489466692, + "loss": 7.9001, + "step": 7640 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018548111596128298, + "loss": 7.8736, + "step": 7650 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018546213702789902, + "loss": 7.8838, + "step": 7660 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018544315809451508, + "loss": 7.8924, + "step": 7670 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018542417916113114, + "loss": 7.8862, + "step": 7680 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001854052002277472, + "loss": 7.8334, + "step": 7690 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018538622129436326, + "loss": 7.8363, + "step": 7700 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018536724236097932, + "loss": 7.905, + "step": 7710 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018534826342759536, + "loss": 7.9271, + "step": 7720 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018532928449421142, + "loss": 7.8476, + "step": 7730 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018531030556082748, + "loss": 7.8614, + "step": 7740 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018529132662744354, + "loss": 7.8769, + "step": 7750 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001852723476940596, + "loss": 7.9633, + "step": 7760 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018525336876067564, + "loss": 7.9126, + "step": 7770 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001852343898272917, + "loss": 7.8547, + "step": 7780 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018521541089390776, + "loss": 7.9311, + "step": 7790 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018519643196052382, + "loss": 7.9018, + "step": 7800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018517745302713988, + "loss": 7.9094, + "step": 7810 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018515847409375592, + "loss": 7.8283, + "step": 7820 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018513949516037198, + "loss": 7.8136, + "step": 7830 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018512051622698804, + "loss": 7.7893, + "step": 7840 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001851015372936041, + "loss": 7.835, + "step": 7850 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018508255836022016, + "loss": 7.9367, + "step": 7860 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018506357942683622, + "loss": 7.8171, + "step": 7870 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018504460049345226, + "loss": 7.7815, + "step": 7880 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018502562156006832, + "loss": 7.8454, + "step": 7890 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018500664262668438, + "loss": 7.9403, + "step": 7900 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018498766369330044, + "loss": 7.8852, + "step": 7910 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001849686847599165, + "loss": 7.8311, + "step": 7920 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018494970582653254, + "loss": 7.8699, + "step": 7930 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001849307268931486, + "loss": 7.8523, + "step": 7940 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018491174795976466, + "loss": 7.7491, + "step": 7950 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018489276902638072, + "loss": 7.9315, + "step": 7960 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018487379009299678, + "loss": 7.9393, + "step": 7970 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018485481115961282, + "loss": 7.8317, + "step": 7980 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018483583222622888, + "loss": 7.9529, + "step": 7990 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018481685329284494, + "loss": 7.8806, + "step": 8000 + }, + { + "epoch": 0.76, + "learning_rate": 0.000184797874359461, + "loss": 7.9167, + "step": 8010 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018477889542607706, + "loss": 7.8738, + "step": 8020 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018475991649269312, + "loss": 8.0231, + "step": 8030 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018474093755930916, + "loss": 7.9135, + "step": 8040 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018472195862592522, + "loss": 7.8255, + "step": 8050 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018470297969254128, + "loss": 7.8249, + "step": 8060 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018468400075915734, + "loss": 7.8805, + "step": 8070 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001846650218257734, + "loss": 7.8936, + "step": 8080 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018464604289238944, + "loss": 7.8101, + "step": 8090 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001846270639590055, + "loss": 7.9686, + "step": 8100 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018460808502562156, + "loss": 7.8303, + "step": 8110 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018458910609223762, + "loss": 7.8838, + "step": 8120 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018457012715885368, + "loss": 7.8232, + "step": 8130 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018455114822546972, + "loss": 7.8239, + "step": 8140 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018453216929208578, + "loss": 7.8823, + "step": 8150 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018451319035870184, + "loss": 7.9962, + "step": 8160 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001844942114253179, + "loss": 7.9401, + "step": 8170 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018447523249193396, + "loss": 7.9221, + "step": 8180 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018445625355855, + "loss": 7.8689, + "step": 8190 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018443727462516606, + "loss": 7.9139, + "step": 8200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018441829569178212, + "loss": 8.0207, + "step": 8210 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018439931675839818, + "loss": 7.8523, + "step": 8220 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018438033782501424, + "loss": 7.8425, + "step": 8230 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001843613588916303, + "loss": 7.8204, + "step": 8240 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018434237995824633, + "loss": 7.8536, + "step": 8250 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001843234010248624, + "loss": 7.8367, + "step": 8260 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018430442209147846, + "loss": 7.7924, + "step": 8270 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018428544315809452, + "loss": 7.8558, + "step": 8280 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018426646422471058, + "loss": 7.9187, + "step": 8290 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018424748529132661, + "loss": 7.7554, + "step": 8300 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018422850635794268, + "loss": 7.8642, + "step": 8310 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018420952742455874, + "loss": 7.8859, + "step": 8320 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001841905484911748, + "loss": 7.8936, + "step": 8330 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018417156955779086, + "loss": 7.9325, + "step": 8340 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001841525906244069, + "loss": 7.9461, + "step": 8350 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018413361169102295, + "loss": 7.8525, + "step": 8360 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018411463275763902, + "loss": 7.9894, + "step": 8370 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018409565382425508, + "loss": 7.7846, + "step": 8380 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018407667489087114, + "loss": 7.8432, + "step": 8390 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001840576959574872, + "loss": 7.9854, + "step": 8400 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018403871702410323, + "loss": 7.7578, + "step": 8410 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001840197380907193, + "loss": 7.9038, + "step": 8420 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018400075915733536, + "loss": 7.9633, + "step": 8430 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018398178022395142, + "loss": 7.8895, + "step": 8440 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018396280129056748, + "loss": 7.9363, + "step": 8450 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001839438223571835, + "loss": 7.8365, + "step": 8460 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018392484342379957, + "loss": 7.8543, + "step": 8470 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018390586449041564, + "loss": 7.8508, + "step": 8480 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001838868855570317, + "loss": 7.7221, + "step": 8490 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018386790662364776, + "loss": 7.8354, + "step": 8500 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001838489276902638, + "loss": 7.8542, + "step": 8510 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018382994875687985, + "loss": 8.0115, + "step": 8520 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018381096982349592, + "loss": 7.9047, + "step": 8530 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018379199089011198, + "loss": 7.9698, + "step": 8540 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018377301195672804, + "loss": 7.8554, + "step": 8550 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001837540330233441, + "loss": 8.0107, + "step": 8560 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018373505408996013, + "loss": 7.9775, + "step": 8570 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001837160751565762, + "loss": 7.8525, + "step": 8580 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018369709622319226, + "loss": 8.0003, + "step": 8590 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018367811728980832, + "loss": 7.9092, + "step": 8600 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018365913835642438, + "loss": 7.8022, + "step": 8610 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001836401594230404, + "loss": 7.9229, + "step": 8620 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018362118048965647, + "loss": 7.9276, + "step": 8630 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018360220155627254, + "loss": 7.8804, + "step": 8640 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001835832226228886, + "loss": 7.917, + "step": 8650 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018356424368950466, + "loss": 7.8381, + "step": 8660 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001835452647561207, + "loss": 7.8699, + "step": 8670 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018352628582273675, + "loss": 7.8897, + "step": 8680 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018350730688935281, + "loss": 7.88, + "step": 8690 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018348832795596888, + "loss": 7.8877, + "step": 8700 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018346934902258494, + "loss": 7.959, + "step": 8710 + }, + { + "epoch": 0.83, + "learning_rate": 0.000183450370089201, + "loss": 7.8917, + "step": 8720 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018343139115581703, + "loss": 7.819, + "step": 8730 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001834124122224331, + "loss": 7.8435, + "step": 8740 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018339343328904915, + "loss": 7.8497, + "step": 8750 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018337445435566522, + "loss": 7.9061, + "step": 8760 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018335547542228128, + "loss": 7.9324, + "step": 8770 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001833364964888973, + "loss": 7.9109, + "step": 8780 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018331751755551337, + "loss": 7.9571, + "step": 8790 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018329853862212943, + "loss": 7.8536, + "step": 8800 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001832795596887455, + "loss": 7.8108, + "step": 8810 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018326058075536156, + "loss": 7.9172, + "step": 8820 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001832416018219776, + "loss": 7.8601, + "step": 8830 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018322262288859365, + "loss": 7.8404, + "step": 8840 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001832036439552097, + "loss": 7.9074, + "step": 8850 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018318466502182577, + "loss": 7.9285, + "step": 8860 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018316568608844184, + "loss": 7.8839, + "step": 8870 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001831467071550579, + "loss": 7.8742, + "step": 8880 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018312772822167393, + "loss": 7.8799, + "step": 8890 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018310874928829, + "loss": 7.9588, + "step": 8900 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018308977035490605, + "loss": 7.9399, + "step": 8910 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018307079142152212, + "loss": 7.879, + "step": 8920 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018305181248813818, + "loss": 7.9172, + "step": 8930 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001830328335547542, + "loss": 7.9306, + "step": 8940 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018301385462137027, + "loss": 7.9562, + "step": 8950 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018299487568798633, + "loss": 7.7876, + "step": 8960 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001829758967546024, + "loss": 7.9089, + "step": 8970 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018295691782121846, + "loss": 7.9165, + "step": 8980 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001829379388878345, + "loss": 7.9097, + "step": 8990 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018291895995445055, + "loss": 7.9582, + "step": 9000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001828999810210666, + "loss": 7.9603, + "step": 9010 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018288100208768267, + "loss": 7.8963, + "step": 9020 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018286202315429874, + "loss": 7.9486, + "step": 9030 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001828430442209148, + "loss": 7.9271, + "step": 9040 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018282406528753083, + "loss": 7.8293, + "step": 9050 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001828050863541469, + "loss": 7.9049, + "step": 9060 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018278610742076295, + "loss": 7.8041, + "step": 9070 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018276712848737901, + "loss": 7.8856, + "step": 9080 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018274814955399508, + "loss": 7.9318, + "step": 9090 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001827291706206111, + "loss": 7.8903, + "step": 9100 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018271019168722717, + "loss": 7.86, + "step": 9110 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018269121275384323, + "loss": 7.9715, + "step": 9120 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001826722338204593, + "loss": 7.8544, + "step": 9130 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018265325488707535, + "loss": 7.9638, + "step": 9140 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018263427595369142, + "loss": 7.8734, + "step": 9150 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018261529702030745, + "loss": 7.901, + "step": 9160 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001825963180869235, + "loss": 7.8865, + "step": 9170 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018257733915353957, + "loss": 7.8662, + "step": 9180 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018255836022015563, + "loss": 7.8327, + "step": 9190 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001825393812867717, + "loss": 7.9404, + "step": 9200 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018252040235338773, + "loss": 7.8854, + "step": 9210 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001825014234200038, + "loss": 7.8985, + "step": 9220 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018248244448661985, + "loss": 7.9045, + "step": 9230 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001824634655532359, + "loss": 7.9535, + "step": 9240 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018244448661985197, + "loss": 7.8169, + "step": 9250 + }, + { + "epoch": 0.88, + "learning_rate": 0.000182425507686468, + "loss": 7.8822, + "step": 9260 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018240652875308407, + "loss": 7.9416, + "step": 9270 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018238754981970013, + "loss": 7.871, + "step": 9280 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001823685708863162, + "loss": 7.9359, + "step": 9290 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018234959195293225, + "loss": 7.865, + "step": 9300 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018233061301954832, + "loss": 7.823, + "step": 9310 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018231163408616435, + "loss": 7.8769, + "step": 9320 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001822926551527804, + "loss": 7.8701, + "step": 9330 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018227367621939647, + "loss": 7.8604, + "step": 9340 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018225469728601253, + "loss": 7.8605, + "step": 9350 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001822357183526286, + "loss": 7.989, + "step": 9360 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018221673941924463, + "loss": 7.9755, + "step": 9370 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001821977604858607, + "loss": 7.9113, + "step": 9380 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018217878155247675, + "loss": 7.9352, + "step": 9390 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001821598026190928, + "loss": 7.8909, + "step": 9400 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018214082368570887, + "loss": 7.9461, + "step": 9410 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001821218447523249, + "loss": 7.9076, + "step": 9420 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018210286581894097, + "loss": 7.8942, + "step": 9430 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018208388688555703, + "loss": 7.9481, + "step": 9440 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001820649079521731, + "loss": 7.9706, + "step": 9450 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018204592901878915, + "loss": 7.862, + "step": 9460 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018202695008540521, + "loss": 7.8994, + "step": 9470 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018200797115202125, + "loss": 7.9575, + "step": 9480 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001819889922186373, + "loss": 7.8784, + "step": 9490 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018197001328525337, + "loss": 7.7745, + "step": 9500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018195103435186943, + "loss": 7.8305, + "step": 9510 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001819320554184855, + "loss": 7.9674, + "step": 9520 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018191307648510153, + "loss": 7.8213, + "step": 9530 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001818940975517176, + "loss": 7.9048, + "step": 9540 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018187511861833365, + "loss": 7.9176, + "step": 9550 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001818561396849497, + "loss": 7.8689, + "step": 9560 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018183716075156577, + "loss": 7.7798, + "step": 9570 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018181818181818183, + "loss": 7.9013, + "step": 9580 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018179920288479787, + "loss": 7.8507, + "step": 9590 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018178022395141393, + "loss": 7.8261, + "step": 9600 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018176124501803, + "loss": 7.9989, + "step": 9610 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018174226608464605, + "loss": 7.8337, + "step": 9620 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018172328715126211, + "loss": 7.8822, + "step": 9630 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018170430821787815, + "loss": 7.8731, + "step": 9640 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001816853292844942, + "loss": 7.8363, + "step": 9650 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018166635035111027, + "loss": 7.8563, + "step": 9660 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018164737141772633, + "loss": 7.869, + "step": 9670 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001816283924843424, + "loss": 7.8723, + "step": 9680 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018160941355095843, + "loss": 7.8806, + "step": 9690 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001815904346175745, + "loss": 7.9023, + "step": 9700 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018157145568419055, + "loss": 7.8605, + "step": 9710 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001815524767508066, + "loss": 7.9172, + "step": 9720 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018153349781742267, + "loss": 7.8754, + "step": 9730 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018151451888403873, + "loss": 7.8568, + "step": 9740 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018149553995065477, + "loss": 7.8626, + "step": 9750 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018147656101727083, + "loss": 7.9, + "step": 9760 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001814575820838869, + "loss": 7.8833, + "step": 9770 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018143860315050295, + "loss": 7.928, + "step": 9780 + }, + { + "epoch": 0.93, + "learning_rate": 0.000181419624217119, + "loss": 7.9062, + "step": 9790 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018140064528373505, + "loss": 7.9355, + "step": 9800 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001813816663503511, + "loss": 7.8508, + "step": 9810 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018136268741696717, + "loss": 7.974, + "step": 9820 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018134370848358323, + "loss": 7.8582, + "step": 9830 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001813247295501993, + "loss": 7.9101, + "step": 9840 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018130575061681533, + "loss": 7.8904, + "step": 9850 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001812867716834314, + "loss": 7.8692, + "step": 9860 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018126779275004745, + "loss": 7.85, + "step": 9870 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001812488138166635, + "loss": 7.8922, + "step": 9880 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018122983488327957, + "loss": 7.8379, + "step": 9890 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018121085594989563, + "loss": 7.9406, + "step": 9900 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018119187701651167, + "loss": 7.8289, + "step": 9910 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018117289808312773, + "loss": 7.8563, + "step": 9920 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001811539191497438, + "loss": 7.834, + "step": 9930 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018113494021635985, + "loss": 8.0085, + "step": 9940 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001811159612829759, + "loss": 7.9363, + "step": 9950 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018109698234959195, + "loss": 7.9089, + "step": 9960 + }, + { + "epoch": 0.95, + "learning_rate": 0.000181078003416208, + "loss": 7.8335, + "step": 9970 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018105902448282407, + "loss": 7.8081, + "step": 9980 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018104004554944013, + "loss": 7.873, + "step": 9990 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001810210666160562, + "loss": 7.8915, + "step": 10000 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018100208768267225, + "loss": 7.9186, + "step": 10010 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001809831087492883, + "loss": 7.8812, + "step": 10020 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018096412981590435, + "loss": 7.9097, + "step": 10030 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001809451508825204, + "loss": 7.9482, + "step": 10040 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018092617194913647, + "loss": 7.8154, + "step": 10050 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018090719301575253, + "loss": 7.8891, + "step": 10060 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018088821408236857, + "loss": 7.9105, + "step": 10070 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018086923514898463, + "loss": 7.7747, + "step": 10080 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001808502562156007, + "loss": 7.8896, + "step": 10090 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018083127728221675, + "loss": 7.8707, + "step": 10100 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001808122983488328, + "loss": 7.8592, + "step": 10110 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018079331941544885, + "loss": 7.898, + "step": 10120 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001807743404820649, + "loss": 7.9184, + "step": 10130 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018075536154868097, + "loss": 7.8481, + "step": 10140 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018073638261529703, + "loss": 7.8695, + "step": 10150 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001807174036819131, + "loss": 7.8226, + "step": 10160 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018069842474852915, + "loss": 7.8663, + "step": 10170 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018067944581514519, + "loss": 7.9406, + "step": 10180 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018066046688176125, + "loss": 7.9082, + "step": 10190 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001806414879483773, + "loss": 7.9394, + "step": 10200 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018062250901499337, + "loss": 7.9245, + "step": 10210 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018060353008160943, + "loss": 7.8805, + "step": 10220 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018058455114822546, + "loss": 7.9478, + "step": 10230 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018056557221484153, + "loss": 7.9049, + "step": 10240 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001805465932814576, + "loss": 7.9602, + "step": 10250 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018052761434807365, + "loss": 7.901, + "step": 10260 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001805086354146897, + "loss": 7.9045, + "step": 10270 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018048965648130574, + "loss": 7.901, + "step": 10280 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001804706775479218, + "loss": 7.8334, + "step": 10290 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018045169861453787, + "loss": 7.9435, + "step": 10300 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018043271968115393, + "loss": 7.8548, + "step": 10310 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018041374074777, + "loss": 7.7928, + "step": 10320 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018039476181438605, + "loss": 7.8831, + "step": 10330 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018037578288100208, + "loss": 7.8562, + "step": 10340 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018035680394761815, + "loss": 7.836, + "step": 10350 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001803378250142342, + "loss": 7.8384, + "step": 10360 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018031884608085027, + "loss": 7.9001, + "step": 10370 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018029986714746633, + "loss": 7.8501, + "step": 10380 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018028088821408236, + "loss": 7.8157, + "step": 10390 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018026190928069843, + "loss": 7.885, + "step": 10400 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001802429303473145, + "loss": 7.9899, + "step": 10410 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018022395141393055, + "loss": 7.9357, + "step": 10420 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001802049724805466, + "loss": 7.9355, + "step": 10430 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018018599354716267, + "loss": 7.9414, + "step": 10440 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001801670146137787, + "loss": 7.9003, + "step": 10450 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018014803568039477, + "loss": 7.8847, + "step": 10460 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018012905674701083, + "loss": 7.8374, + "step": 10470 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001801100778136269, + "loss": 7.8259, + "step": 10480 + }, + { + "epoch": 1.0, + "learning_rate": 0.00018009109888024295, + "loss": 7.8256, + "step": 10490 + }, + { + "epoch": 1.0, + "learning_rate": 0.00018007211994685898, + "loss": 7.8138, + "step": 10500 + }, + { + "epoch": 1.0, + "learning_rate": 0.00018005314101347505, + "loss": 7.7975, + "step": 10510 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001800341620800911, + "loss": 7.7755, + "step": 10520 + }, + { + "epoch": 1.0, + "learning_rate": 0.00018001518314670717, + "loss": 7.9293, + "step": 10530 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017999620421332323, + "loss": 7.8856, + "step": 10540 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017997722527993926, + "loss": 7.8915, + "step": 10550 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017995824634655532, + "loss": 7.891, + "step": 10560 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017993926741317139, + "loss": 7.8232, + "step": 10570 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017992028847978745, + "loss": 7.9083, + "step": 10580 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001799013095464035, + "loss": 7.9282, + "step": 10590 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017988233061301957, + "loss": 7.8374, + "step": 10600 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001798633516796356, + "loss": 7.9513, + "step": 10610 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017984437274625167, + "loss": 7.8343, + "step": 10620 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017982539381286773, + "loss": 7.7961, + "step": 10630 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001798064148794838, + "loss": 7.9153, + "step": 10640 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017978743594609985, + "loss": 7.8247, + "step": 10650 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017976845701271588, + "loss": 7.8539, + "step": 10660 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017974947807933194, + "loss": 7.9135, + "step": 10670 + }, + { + "epoch": 1.01, + "learning_rate": 0.000179730499145948, + "loss": 7.8577, + "step": 10680 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017971152021256407, + "loss": 7.8242, + "step": 10690 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017969254127918013, + "loss": 7.8424, + "step": 10700 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001796735623457962, + "loss": 7.8466, + "step": 10710 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017965458341241222, + "loss": 7.8525, + "step": 10720 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017963560447902828, + "loss": 7.9175, + "step": 10730 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017961662554564435, + "loss": 7.8664, + "step": 10740 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001795976466122604, + "loss": 7.9202, + "step": 10750 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017957866767887647, + "loss": 7.8973, + "step": 10760 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001795596887454925, + "loss": 7.8988, + "step": 10770 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017954070981210856, + "loss": 7.8721, + "step": 10780 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017952173087872463, + "loss": 7.8283, + "step": 10790 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001795027519453407, + "loss": 7.8389, + "step": 10800 + }, + { + "epoch": 1.03, + "learning_rate": 0.00017948377301195675, + "loss": 7.8965, + "step": 10810 + }, + { + "epoch": 1.03, + "learning_rate": 0.00017946479407857278, + "loss": 7.7578, + "step": 10820 + }, + { + "epoch": 1.03, + "learning_rate": 0.00017944581514518884, + "loss": 7.8069, + "step": 10830 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001794268362118049, + "loss": 7.8509, + "step": 10840 + }, + { + "epoch": 1.03, + "learning_rate": 0.00017940785727842097, + "loss": 8.0003, + "step": 10850 + }, + { + "epoch": 1.03, + "learning_rate": 0.00017938887834503703, + "loss": 7.8455, + "step": 10860 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001793698994116531, + "loss": 7.8861, + "step": 10870 + }, + { + "epoch": 1.03, + "learning_rate": 0.00017935092047826912, + "loss": 7.996, + "step": 10880 + }, + { + "epoch": 1.03, + "learning_rate": 0.00017933194154488518, + "loss": 7.8577, + "step": 10890 + }, + { + "epoch": 1.03, + "learning_rate": 0.00017931296261150125, + "loss": 7.8246, + "step": 10900 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001792939836781173, + "loss": 7.7759, + "step": 10910 + }, + { + "epoch": 1.04, + "learning_rate": 0.00017927500474473337, + "loss": 7.817, + "step": 10920 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001792560258113494, + "loss": 7.8713, + "step": 10930 + }, + { + "epoch": 1.04, + "learning_rate": 0.00017923704687796546, + "loss": 7.8883, + "step": 10940 + }, + { + "epoch": 1.04, + "learning_rate": 0.00017921806794458152, + "loss": 7.8941, + "step": 10950 + }, + { + "epoch": 1.04, + "learning_rate": 0.00017919908901119759, + "loss": 7.991, + "step": 10960 + }, + { + "epoch": 1.04, + "learning_rate": 0.00017918011007781365, + "loss": 7.9228, + "step": 10970 + }, + { + "epoch": 1.04, + "learning_rate": 0.00017916113114442968, + "loss": 7.8441, + "step": 10980 + }, + { + "epoch": 1.04, + "learning_rate": 0.00017914215221104574, + "loss": 7.9454, + "step": 10990 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001791231732776618, + "loss": 7.9598, + "step": 11000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00017910419434427787, + "loss": 7.9049, + "step": 11010 + }, + { + "epoch": 1.05, + "learning_rate": 0.00017908521541089393, + "loss": 7.8028, + "step": 11020 + }, + { + "epoch": 1.05, + "learning_rate": 0.00017906623647751, + "loss": 7.8796, + "step": 11030 + }, + { + "epoch": 1.05, + "learning_rate": 0.00017904725754412602, + "loss": 7.8912, + "step": 11040 + }, + { + "epoch": 1.05, + "learning_rate": 0.00017902827861074208, + "loss": 7.9389, + "step": 11050 + }, + { + "epoch": 1.05, + "learning_rate": 0.00017900929967735814, + "loss": 7.8783, + "step": 11060 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001789903207439742, + "loss": 7.7984, + "step": 11070 + }, + { + "epoch": 1.05, + "learning_rate": 0.00017897134181059027, + "loss": 7.8839, + "step": 11080 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001789523628772063, + "loss": 7.9485, + "step": 11090 + }, + { + "epoch": 1.05, + "learning_rate": 0.00017893338394382236, + "loss": 7.8604, + "step": 11100 + }, + { + "epoch": 1.05, + "learning_rate": 0.00017891440501043842, + "loss": 7.8475, + "step": 11110 + }, + { + "epoch": 1.06, + "learning_rate": 0.00017889542607705448, + "loss": 7.8361, + "step": 11120 + }, + { + "epoch": 1.06, + "learning_rate": 0.00017887644714367055, + "loss": 7.9018, + "step": 11130 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001788574682102866, + "loss": 7.8377, + "step": 11140 + }, + { + "epoch": 1.06, + "learning_rate": 0.00017883848927690264, + "loss": 7.9123, + "step": 11150 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001788195103435187, + "loss": 7.8456, + "step": 11160 + }, + { + "epoch": 1.06, + "learning_rate": 0.00017880053141013476, + "loss": 8.0092, + "step": 11170 + }, + { + "epoch": 1.06, + "learning_rate": 0.00017878155247675083, + "loss": 7.9217, + "step": 11180 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001787625735433669, + "loss": 7.8548, + "step": 11190 + }, + { + "epoch": 1.06, + "learning_rate": 0.00017874359460998292, + "loss": 7.8086, + "step": 11200 + }, + { + "epoch": 1.06, + "learning_rate": 0.00017872461567659898, + "loss": 7.9422, + "step": 11210 + }, + { + "epoch": 1.06, + "learning_rate": 0.00017870563674321504, + "loss": 7.8733, + "step": 11220 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001786866578098311, + "loss": 7.9344, + "step": 11230 + }, + { + "epoch": 1.07, + "learning_rate": 0.00017866767887644717, + "loss": 7.8834, + "step": 11240 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001786486999430632, + "loss": 7.9546, + "step": 11250 + }, + { + "epoch": 1.07, + "learning_rate": 0.00017862972100967926, + "loss": 7.8716, + "step": 11260 + }, + { + "epoch": 1.07, + "learning_rate": 0.00017861074207629532, + "loss": 7.8437, + "step": 11270 + }, + { + "epoch": 1.07, + "learning_rate": 0.00017859176314291138, + "loss": 7.9324, + "step": 11280 + }, + { + "epoch": 1.07, + "learning_rate": 0.00017857278420952745, + "loss": 7.9234, + "step": 11290 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001785538052761435, + "loss": 7.9025, + "step": 11300 + }, + { + "epoch": 1.07, + "learning_rate": 0.00017853482634275954, + "loss": 7.9273, + "step": 11310 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001785158474093756, + "loss": 7.921, + "step": 11320 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017849686847599166, + "loss": 7.8048, + "step": 11330 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017847788954260772, + "loss": 7.9139, + "step": 11340 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017845891060922379, + "loss": 7.8946, + "step": 11350 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017843993167583982, + "loss": 7.8894, + "step": 11360 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017842095274245588, + "loss": 7.929, + "step": 11370 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017840197380907194, + "loss": 7.8031, + "step": 11380 + }, + { + "epoch": 1.08, + "learning_rate": 0.000178382994875688, + "loss": 7.8458, + "step": 11390 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017836401594230407, + "loss": 7.7878, + "step": 11400 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001783450370089201, + "loss": 7.8181, + "step": 11410 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017832605807553616, + "loss": 7.9258, + "step": 11420 + }, + { + "epoch": 1.08, + "learning_rate": 0.00017830707914215222, + "loss": 7.8423, + "step": 11430 + }, + { + "epoch": 1.09, + "learning_rate": 0.00017828810020876828, + "loss": 7.8946, + "step": 11440 + }, + { + "epoch": 1.09, + "learning_rate": 0.00017826912127538434, + "loss": 7.8909, + "step": 11450 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001782501423420004, + "loss": 7.9299, + "step": 11460 + }, + { + "epoch": 1.09, + "learning_rate": 0.00017823116340861644, + "loss": 7.8795, + "step": 11470 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001782121844752325, + "loss": 7.9029, + "step": 11480 + }, + { + "epoch": 1.09, + "learning_rate": 0.00017819320554184856, + "loss": 7.9417, + "step": 11490 + }, + { + "epoch": 1.09, + "learning_rate": 0.00017817422660846462, + "loss": 7.8398, + "step": 11500 + }, + { + "epoch": 1.09, + "learning_rate": 0.00017815524767508069, + "loss": 7.8438, + "step": 11510 + }, + { + "epoch": 1.09, + "learning_rate": 0.00017813626874169672, + "loss": 7.8947, + "step": 11520 + }, + { + "epoch": 1.09, + "learning_rate": 0.00017811728980831278, + "loss": 7.8327, + "step": 11530 + }, + { + "epoch": 1.1, + "learning_rate": 0.00017809831087492884, + "loss": 7.8483, + "step": 11540 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001780793319415449, + "loss": 7.8825, + "step": 11550 + }, + { + "epoch": 1.1, + "learning_rate": 0.00017806035300816096, + "loss": 7.9321, + "step": 11560 + }, + { + "epoch": 1.1, + "learning_rate": 0.00017804137407477703, + "loss": 7.8518, + "step": 11570 + }, + { + "epoch": 1.1, + "learning_rate": 0.00017802239514139306, + "loss": 7.8816, + "step": 11580 + }, + { + "epoch": 1.1, + "learning_rate": 0.00017800341620800912, + "loss": 7.8593, + "step": 11590 + }, + { + "epoch": 1.1, + "learning_rate": 0.00017798443727462518, + "loss": 8.0316, + "step": 11600 + }, + { + "epoch": 1.1, + "learning_rate": 0.00017796545834124124, + "loss": 7.8125, + "step": 11610 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001779464794078573, + "loss": 7.8441, + "step": 11620 + }, + { + "epoch": 1.1, + "learning_rate": 0.00017792750047447334, + "loss": 7.8297, + "step": 11630 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001779085215410894, + "loss": 7.8087, + "step": 11640 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017788954260770546, + "loss": 7.8674, + "step": 11650 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017787056367432152, + "loss": 7.8526, + "step": 11660 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017785158474093758, + "loss": 7.8793, + "step": 11670 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017783260580755362, + "loss": 7.895, + "step": 11680 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017781362687416968, + "loss": 7.7946, + "step": 11690 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017779464794078574, + "loss": 7.8457, + "step": 11700 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001777756690074018, + "loss": 7.8713, + "step": 11710 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017775669007401786, + "loss": 7.8753, + "step": 11720 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017773771114063392, + "loss": 7.8126, + "step": 11730 + }, + { + "epoch": 1.11, + "learning_rate": 0.00017771873220724996, + "loss": 7.7843, + "step": 11740 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017769975327386602, + "loss": 7.9056, + "step": 11750 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017768077434048208, + "loss": 7.7945, + "step": 11760 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017766179540709814, + "loss": 7.9491, + "step": 11770 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001776428164737142, + "loss": 7.8972, + "step": 11780 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017762383754033024, + "loss": 7.889, + "step": 11790 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001776048586069463, + "loss": 7.7927, + "step": 11800 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017758587967356236, + "loss": 7.8745, + "step": 11810 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017756690074017842, + "loss": 8.038, + "step": 11820 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017754792180679448, + "loss": 7.8502, + "step": 11830 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017752894287341052, + "loss": 7.8414, + "step": 11840 + }, + { + "epoch": 1.12, + "learning_rate": 0.00017750996394002658, + "loss": 7.8466, + "step": 11850 + }, + { + "epoch": 1.13, + "learning_rate": 0.00017749098500664264, + "loss": 7.8095, + "step": 11860 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001774720060732587, + "loss": 7.9301, + "step": 11870 + }, + { + "epoch": 1.13, + "learning_rate": 0.00017745302713987476, + "loss": 7.8592, + "step": 11880 + }, + { + "epoch": 1.13, + "learning_rate": 0.00017743404820649082, + "loss": 7.9105, + "step": 11890 + }, + { + "epoch": 1.13, + "learning_rate": 0.00017741506927310686, + "loss": 7.8377, + "step": 11900 + }, + { + "epoch": 1.13, + "learning_rate": 0.00017739609033972292, + "loss": 7.8799, + "step": 11910 + }, + { + "epoch": 1.13, + "learning_rate": 0.00017737711140633898, + "loss": 7.8186, + "step": 11920 + }, + { + "epoch": 1.13, + "learning_rate": 0.00017735813247295504, + "loss": 7.8921, + "step": 11930 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001773391535395711, + "loss": 7.8325, + "step": 11940 + }, + { + "epoch": 1.13, + "learning_rate": 0.00017732017460618714, + "loss": 7.8533, + "step": 11950 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001773011956728032, + "loss": 7.7791, + "step": 11960 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017728221673941926, + "loss": 7.841, + "step": 11970 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017726323780603532, + "loss": 7.8712, + "step": 11980 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017724425887265138, + "loss": 7.8939, + "step": 11990 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017722527993926744, + "loss": 7.8825, + "step": 12000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017720630100588348, + "loss": 7.9506, + "step": 12010 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017718732207249954, + "loss": 7.855, + "step": 12020 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001771683431391156, + "loss": 7.7315, + "step": 12030 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017714936420573166, + "loss": 7.791, + "step": 12040 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017713038527234772, + "loss": 7.8175, + "step": 12050 + }, + { + "epoch": 1.14, + "learning_rate": 0.00017711140633896376, + "loss": 7.9103, + "step": 12060 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017709242740557982, + "loss": 7.7869, + "step": 12070 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017707344847219588, + "loss": 7.952, + "step": 12080 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017705446953881194, + "loss": 7.8767, + "step": 12090 + }, + { + "epoch": 1.15, + "learning_rate": 0.000177035490605428, + "loss": 7.7993, + "step": 12100 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017701651167204404, + "loss": 7.7995, + "step": 12110 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001769975327386601, + "loss": 7.869, + "step": 12120 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017697855380527616, + "loss": 7.8906, + "step": 12130 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017695957487189222, + "loss": 7.9035, + "step": 12140 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017694059593850828, + "loss": 7.9041, + "step": 12150 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017692161700512434, + "loss": 7.8036, + "step": 12160 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017690263807174038, + "loss": 7.9532, + "step": 12170 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017688365913835644, + "loss": 7.8963, + "step": 12180 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001768646802049725, + "loss": 7.876, + "step": 12190 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017684570127158856, + "loss": 7.8861, + "step": 12200 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017682672233820462, + "loss": 7.9612, + "step": 12210 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017680774340482066, + "loss": 7.8485, + "step": 12220 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017678876447143672, + "loss": 7.8881, + "step": 12230 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017676978553805278, + "loss": 7.8058, + "step": 12240 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017675080660466884, + "loss": 7.8773, + "step": 12250 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001767318276712849, + "loss": 7.8872, + "step": 12260 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017671284873790094, + "loss": 7.8364, + "step": 12270 + }, + { + "epoch": 1.17, + "learning_rate": 0.000176693869804517, + "loss": 7.8533, + "step": 12280 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017667489087113306, + "loss": 7.9749, + "step": 12290 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017665591193774912, + "loss": 7.9904, + "step": 12300 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017663693300436518, + "loss": 7.8272, + "step": 12310 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017661795407098124, + "loss": 7.8786, + "step": 12320 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017659897513759728, + "loss": 7.8607, + "step": 12330 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017657999620421334, + "loss": 7.9109, + "step": 12340 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001765610172708294, + "loss": 7.8558, + "step": 12350 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017654203833744546, + "loss": 8.0308, + "step": 12360 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017652305940406152, + "loss": 7.8292, + "step": 12370 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017650408047067756, + "loss": 7.927, + "step": 12380 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017648510153729362, + "loss": 8.009, + "step": 12390 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017646612260390968, + "loss": 7.8794, + "step": 12400 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017644714367052574, + "loss": 7.9297, + "step": 12410 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001764281647371418, + "loss": 7.9262, + "step": 12420 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017640918580375786, + "loss": 7.89, + "step": 12430 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001763902068703739, + "loss": 7.8293, + "step": 12440 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017637122793698996, + "loss": 8.0054, + "step": 12450 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017635224900360602, + "loss": 7.9354, + "step": 12460 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017633327007022208, + "loss": 7.8386, + "step": 12470 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017631429113683814, + "loss": 7.9373, + "step": 12480 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017629531220345418, + "loss": 7.9175, + "step": 12490 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017627633327007024, + "loss": 7.8685, + "step": 12500 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001762573543366863, + "loss": 7.8426, + "step": 12510 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017623837540330236, + "loss": 7.9382, + "step": 12520 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017621939646991842, + "loss": 7.8224, + "step": 12530 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017620041753653445, + "loss": 7.864, + "step": 12540 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017618143860315052, + "loss": 7.9494, + "step": 12550 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017616245966976658, + "loss": 7.8304, + "step": 12560 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017614348073638264, + "loss": 8.0052, + "step": 12570 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001761245018029987, + "loss": 7.942, + "step": 12580 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017610552286961473, + "loss": 7.8737, + "step": 12590 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001760865439362308, + "loss": 7.8656, + "step": 12600 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017606756500284686, + "loss": 7.9479, + "step": 12610 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017604858606946292, + "loss": 7.8352, + "step": 12620 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017602960713607898, + "loss": 7.9308, + "step": 12630 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017601062820269504, + "loss": 7.8263, + "step": 12640 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017599164926931107, + "loss": 7.8724, + "step": 12650 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017597267033592714, + "loss": 7.8569, + "step": 12660 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001759536914025432, + "loss": 7.8869, + "step": 12670 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017593471246915926, + "loss": 7.9312, + "step": 12680 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017591573353577532, + "loss": 7.8857, + "step": 12690 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017589675460239135, + "loss": 7.8912, + "step": 12700 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017587777566900741, + "loss": 7.8062, + "step": 12710 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017585879673562348, + "loss": 7.8309, + "step": 12720 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017583981780223954, + "loss": 7.8373, + "step": 12730 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001758208388688556, + "loss": 7.9202, + "step": 12740 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017580185993547163, + "loss": 7.8688, + "step": 12750 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001757828810020877, + "loss": 7.8819, + "step": 12760 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017576390206870376, + "loss": 7.9604, + "step": 12770 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017574492313531982, + "loss": 7.8108, + "step": 12780 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017572594420193588, + "loss": 7.8294, + "step": 12790 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017570696526855194, + "loss": 7.8233, + "step": 12800 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017568798633516797, + "loss": 7.8958, + "step": 12810 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017566900740178403, + "loss": 7.9881, + "step": 12820 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001756500284684001, + "loss": 7.884, + "step": 12830 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017563104953501616, + "loss": 7.8286, + "step": 12840 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017561207060163222, + "loss": 7.8803, + "step": 12850 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017559309166824825, + "loss": 7.8043, + "step": 12860 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017557411273486431, + "loss": 7.8647, + "step": 12870 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017555513380148038, + "loss": 7.918, + "step": 12880 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017553615486809644, + "loss": 7.8076, + "step": 12890 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001755171759347125, + "loss": 7.9134, + "step": 12900 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017549819700132853, + "loss": 7.9278, + "step": 12910 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001754792180679446, + "loss": 7.8538, + "step": 12920 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017546023913456065, + "loss": 7.8656, + "step": 12930 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017544126020117672, + "loss": 7.9321, + "step": 12940 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017542228126779278, + "loss": 7.9646, + "step": 12950 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017540330233440884, + "loss": 7.9302, + "step": 12960 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017538432340102487, + "loss": 7.8743, + "step": 12970 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017536534446764093, + "loss": 7.8306, + "step": 12980 + }, + { + "epoch": 1.23, + "learning_rate": 0.000175346365534257, + "loss": 7.8704, + "step": 12990 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017532738660087306, + "loss": 7.8513, + "step": 13000 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017530840766748912, + "loss": 7.8965, + "step": 13010 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017528942873410515, + "loss": 7.9604, + "step": 13020 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001752704498007212, + "loss": 7.9551, + "step": 13030 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017525147086733727, + "loss": 7.915, + "step": 13040 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017523249193395334, + "loss": 7.856, + "step": 13050 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001752135130005694, + "loss": 7.8559, + "step": 13060 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017519453406718543, + "loss": 7.8043, + "step": 13070 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001751755551338015, + "loss": 7.9493, + "step": 13080 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017515657620041755, + "loss": 7.7208, + "step": 13090 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017513759726703361, + "loss": 7.8285, + "step": 13100 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017511861833364968, + "loss": 7.8769, + "step": 13110 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001750996394002657, + "loss": 7.9413, + "step": 13120 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017508066046688177, + "loss": 7.9337, + "step": 13130 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017506168153349783, + "loss": 7.8663, + "step": 13140 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001750427026001139, + "loss": 7.8629, + "step": 13150 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017502372366672996, + "loss": 7.9704, + "step": 13160 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017500474473334602, + "loss": 7.9988, + "step": 13170 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017498576579996205, + "loss": 7.9194, + "step": 13180 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001749667868665781, + "loss": 7.9008, + "step": 13190 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017494780793319417, + "loss": 7.9587, + "step": 13200 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017492882899981023, + "loss": 7.8987, + "step": 13210 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001749098500664263, + "loss": 7.9129, + "step": 13220 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017489087113304233, + "loss": 7.8203, + "step": 13230 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001748718921996584, + "loss": 7.7843, + "step": 13240 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017485291326627445, + "loss": 7.8528, + "step": 13250 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017483393433289051, + "loss": 7.9028, + "step": 13260 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017481495539950658, + "loss": 7.9222, + "step": 13270 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001747959764661226, + "loss": 7.9008, + "step": 13280 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017477699753273867, + "loss": 7.7545, + "step": 13290 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017475801859935473, + "loss": 7.8473, + "step": 13300 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001747390396659708, + "loss": 7.8514, + "step": 13310 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017472006073258685, + "loss": 7.8281, + "step": 13320 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017470108179920292, + "loss": 7.9274, + "step": 13330 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017468210286581895, + "loss": 7.8855, + "step": 13340 + }, + { + "epoch": 1.27, + "learning_rate": 0.000174663123932435, + "loss": 7.9234, + "step": 13350 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017464414499905107, + "loss": 7.9236, + "step": 13360 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017462516606566713, + "loss": 7.8509, + "step": 13370 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001746061871322832, + "loss": 7.9477, + "step": 13380 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017458720819889923, + "loss": 7.8218, + "step": 13390 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001745682292655153, + "loss": 7.9648, + "step": 13400 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017454925033213135, + "loss": 7.9526, + "step": 13410 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001745302713987474, + "loss": 7.8842, + "step": 13420 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017451129246536347, + "loss": 7.9777, + "step": 13430 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001744923135319795, + "loss": 7.9093, + "step": 13440 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017447333459859557, + "loss": 7.8409, + "step": 13450 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017445435566521163, + "loss": 7.8896, + "step": 13460 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001744353767318277, + "loss": 7.8449, + "step": 13470 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017441639779844375, + "loss": 7.8894, + "step": 13480 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017439741886505982, + "loss": 7.9111, + "step": 13490 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017437843993167585, + "loss": 7.8638, + "step": 13500 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001743594609982919, + "loss": 7.8821, + "step": 13510 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017434048206490797, + "loss": 7.8377, + "step": 13520 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017432150313152403, + "loss": 7.9781, + "step": 13530 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001743025241981401, + "loss": 7.8674, + "step": 13540 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017428354526475613, + "loss": 7.8371, + "step": 13550 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001742645663313722, + "loss": 7.8285, + "step": 13560 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017424558739798825, + "loss": 7.8862, + "step": 13570 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001742266084646043, + "loss": 7.883, + "step": 13580 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017420762953122037, + "loss": 7.9047, + "step": 13590 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001741886505978364, + "loss": 7.9901, + "step": 13600 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017416967166445247, + "loss": 7.9442, + "step": 13610 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017415069273106853, + "loss": 7.8501, + "step": 13620 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001741317137976846, + "loss": 7.7991, + "step": 13630 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017411273486430065, + "loss": 7.7366, + "step": 13640 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001740937559309167, + "loss": 7.9723, + "step": 13650 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017407477699753275, + "loss": 7.9549, + "step": 13660 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001740557980641488, + "loss": 7.8386, + "step": 13670 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017403681913076487, + "loss": 7.816, + "step": 13680 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017401784019738093, + "loss": 7.7909, + "step": 13690 + }, + { + "epoch": 1.3, + "learning_rate": 0.000173998861263997, + "loss": 7.859, + "step": 13700 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017397988233061303, + "loss": 7.9202, + "step": 13710 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001739609033972291, + "loss": 7.8609, + "step": 13720 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017394192446384515, + "loss": 8.0131, + "step": 13730 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001739229455304612, + "loss": 7.9343, + "step": 13740 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017390396659707727, + "loss": 7.8946, + "step": 13750 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001738849876636933, + "loss": 7.8813, + "step": 13760 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017386600873030937, + "loss": 7.9063, + "step": 13770 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017384702979692543, + "loss": 7.806, + "step": 13780 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001738280508635415, + "loss": 7.8988, + "step": 13790 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017380907193015755, + "loss": 7.9128, + "step": 13800 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017379009299677359, + "loss": 7.9373, + "step": 13810 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017377111406338965, + "loss": 7.8923, + "step": 13820 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001737521351300057, + "loss": 7.91, + "step": 13830 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017373315619662177, + "loss": 7.8137, + "step": 13840 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017371417726323783, + "loss": 7.8817, + "step": 13850 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001736951983298539, + "loss": 7.874, + "step": 13860 + }, + { + "epoch": 1.32, + "learning_rate": 0.00017367621939646993, + "loss": 7.8659, + "step": 13870 + }, + { + "epoch": 1.32, + "learning_rate": 0.000173657240463086, + "loss": 7.9372, + "step": 13880 + }, + { + "epoch": 1.32, + "learning_rate": 0.00017363826152970205, + "loss": 7.7769, + "step": 13890 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001736192825963181, + "loss": 7.9974, + "step": 13900 + }, + { + "epoch": 1.32, + "learning_rate": 0.00017360030366293417, + "loss": 7.7119, + "step": 13910 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001735813247295502, + "loss": 7.9405, + "step": 13920 + }, + { + "epoch": 1.32, + "learning_rate": 0.00017356234579616627, + "loss": 7.8877, + "step": 13930 + }, + { + "epoch": 1.32, + "learning_rate": 0.00017354336686278233, + "loss": 7.8819, + "step": 13940 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001735243879293984, + "loss": 7.9628, + "step": 13950 + }, + { + "epoch": 1.32, + "learning_rate": 0.00017350540899601445, + "loss": 7.9056, + "step": 13960 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017348643006263049, + "loss": 7.7774, + "step": 13970 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017346745112924655, + "loss": 7.9571, + "step": 13980 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001734484721958626, + "loss": 7.8876, + "step": 13990 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017342949326247867, + "loss": 7.8631, + "step": 14000 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017341051432909473, + "loss": 7.8511, + "step": 14010 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001733915353957108, + "loss": 7.8158, + "step": 14020 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017337255646232683, + "loss": 7.8307, + "step": 14030 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001733535775289429, + "loss": 7.8747, + "step": 14040 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017333459859555895, + "loss": 7.8938, + "step": 14050 + }, + { + "epoch": 1.33, + "learning_rate": 0.000173315619662175, + "loss": 7.9864, + "step": 14060 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017329664072879107, + "loss": 7.8846, + "step": 14070 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001732776617954071, + "loss": 7.7599, + "step": 14080 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017325868286202317, + "loss": 7.8753, + "step": 14090 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017323970392863923, + "loss": 7.8896, + "step": 14100 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001732207249952553, + "loss": 7.9112, + "step": 14110 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017320174606187135, + "loss": 7.8636, + "step": 14120 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017318276712848738, + "loss": 7.8808, + "step": 14130 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017316378819510345, + "loss": 7.9008, + "step": 14140 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001731448092617195, + "loss": 7.828, + "step": 14150 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017312583032833557, + "loss": 7.9159, + "step": 14160 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017310685139495163, + "loss": 8.0473, + "step": 14170 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017308787246156766, + "loss": 7.9296, + "step": 14180 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017306889352818372, + "loss": 7.905, + "step": 14190 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017304991459479979, + "loss": 7.8447, + "step": 14200 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017303093566141585, + "loss": 7.8995, + "step": 14210 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001730119567280319, + "loss": 7.9104, + "step": 14220 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017299297779464797, + "loss": 7.8408, + "step": 14230 + }, + { + "epoch": 1.35, + "learning_rate": 0.000172973998861264, + "loss": 7.8335, + "step": 14240 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017295501992788007, + "loss": 7.7993, + "step": 14250 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017293604099449613, + "loss": 7.8379, + "step": 14260 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001729170620611122, + "loss": 7.8835, + "step": 14270 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017289808312772825, + "loss": 7.7236, + "step": 14280 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017287910419434428, + "loss": 7.9565, + "step": 14290 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017286012526096034, + "loss": 7.9173, + "step": 14300 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001728411463275764, + "loss": 7.8003, + "step": 14310 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017282216739419247, + "loss": 7.9398, + "step": 14320 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017280318846080853, + "loss": 7.8766, + "step": 14330 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017278420952742456, + "loss": 7.8843, + "step": 14340 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017276523059404062, + "loss": 7.7653, + "step": 14350 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017274625166065669, + "loss": 7.8396, + "step": 14360 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017272727272727275, + "loss": 7.9473, + "step": 14370 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001727082937938888, + "loss": 7.8679, + "step": 14380 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017268931486050487, + "loss": 7.9527, + "step": 14390 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001726703359271209, + "loss": 7.9523, + "step": 14400 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017265135699373696, + "loss": 7.8998, + "step": 14410 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017263237806035303, + "loss": 7.9366, + "step": 14420 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001726133991269691, + "loss": 7.8584, + "step": 14430 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017259442019358515, + "loss": 7.9295, + "step": 14440 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017257544126020118, + "loss": 7.8002, + "step": 14450 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017255646232681724, + "loss": 7.9771, + "step": 14460 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001725374833934333, + "loss": 7.9719, + "step": 14470 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017251850446004937, + "loss": 7.8804, + "step": 14480 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017249952552666543, + "loss": 7.8649, + "step": 14490 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017248054659328146, + "loss": 7.7958, + "step": 14500 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017246156765989752, + "loss": 7.8773, + "step": 14510 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017244258872651358, + "loss": 7.8707, + "step": 14520 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017242360979312965, + "loss": 7.8253, + "step": 14530 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001724046308597457, + "loss": 7.9562, + "step": 14540 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017238565192636177, + "loss": 7.8765, + "step": 14550 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001723666729929778, + "loss": 7.8363, + "step": 14560 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017234769405959386, + "loss": 7.9363, + "step": 14570 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017232871512620993, + "loss": 7.9193, + "step": 14580 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017230973619282599, + "loss": 7.8003, + "step": 14590 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017229075725944205, + "loss": 7.8655, + "step": 14600 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017227177832605808, + "loss": 7.8038, + "step": 14610 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017225279939267414, + "loss": 7.8799, + "step": 14620 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001722338204592902, + "loss": 7.81, + "step": 14630 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017221484152590627, + "loss": 7.9701, + "step": 14640 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017219586259252233, + "loss": 7.9683, + "step": 14650 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017217688365913836, + "loss": 7.9245, + "step": 14660 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017215790472575442, + "loss": 7.9538, + "step": 14670 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017213892579237048, + "loss": 7.8722, + "step": 14680 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017211994685898654, + "loss": 7.8426, + "step": 14690 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001721009679256026, + "loss": 7.8924, + "step": 14700 + }, + { + "epoch": 1.4, + "learning_rate": 0.00017208198899221864, + "loss": 7.9378, + "step": 14710 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001720630100588347, + "loss": 7.8708, + "step": 14720 + }, + { + "epoch": 1.4, + "learning_rate": 0.00017204403112545076, + "loss": 7.8503, + "step": 14730 + }, + { + "epoch": 1.4, + "learning_rate": 0.00017202505219206682, + "loss": 7.8053, + "step": 14740 + }, + { + "epoch": 1.4, + "learning_rate": 0.00017200607325868289, + "loss": 7.8506, + "step": 14750 + }, + { + "epoch": 1.4, + "learning_rate": 0.00017198709432529895, + "loss": 7.8753, + "step": 14760 + }, + { + "epoch": 1.4, + "learning_rate": 0.00017196811539191498, + "loss": 7.8996, + "step": 14770 + }, + { + "epoch": 1.4, + "learning_rate": 0.00017194913645853104, + "loss": 7.8542, + "step": 14780 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001719301575251471, + "loss": 7.8244, + "step": 14790 + }, + { + "epoch": 1.4, + "learning_rate": 0.00017191117859176316, + "loss": 7.8535, + "step": 14800 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017189219965837923, + "loss": 7.9369, + "step": 14810 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017187322072499526, + "loss": 7.9422, + "step": 14820 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017185424179161132, + "loss": 7.9927, + "step": 14830 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017183526285822738, + "loss": 7.852, + "step": 14840 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017181628392484344, + "loss": 7.947, + "step": 14850 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001717973049914595, + "loss": 7.8916, + "step": 14860 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017177832605807554, + "loss": 7.9578, + "step": 14870 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001717593471246916, + "loss": 7.975, + "step": 14880 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017174036819130766, + "loss": 7.9863, + "step": 14890 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017172138925792372, + "loss": 7.8942, + "step": 14900 + }, + { + "epoch": 1.41, + "learning_rate": 0.00017170241032453978, + "loss": 7.8584, + "step": 14910 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017168343139115585, + "loss": 7.9821, + "step": 14920 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017166445245777188, + "loss": 7.8786, + "step": 14930 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017164547352438794, + "loss": 7.8327, + "step": 14940 + }, + { + "epoch": 1.42, + "learning_rate": 0.000171626494591004, + "loss": 7.8841, + "step": 14950 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017160751565762006, + "loss": 7.8089, + "step": 14960 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017158853672423613, + "loss": 7.8177, + "step": 14970 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017156955779085216, + "loss": 7.9116, + "step": 14980 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017155057885746822, + "loss": 7.898, + "step": 14990 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017153159992408428, + "loss": 7.8931, + "step": 15000 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017151262099070034, + "loss": 7.8535, + "step": 15010 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001714936420573164, + "loss": 7.9483, + "step": 15020 + }, + { + "epoch": 1.43, + "learning_rate": 0.00017147466312393244, + "loss": 7.8901, + "step": 15030 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001714556841905485, + "loss": 7.8264, + "step": 15040 + }, + { + "epoch": 1.43, + "learning_rate": 0.00017143670525716456, + "loss": 7.8682, + "step": 15050 + }, + { + "epoch": 1.43, + "learning_rate": 0.00017141772632378062, + "loss": 7.9068, + "step": 15060 + }, + { + "epoch": 1.43, + "learning_rate": 0.00017139874739039668, + "loss": 7.8185, + "step": 15070 + }, + { + "epoch": 1.43, + "learning_rate": 0.00017137976845701274, + "loss": 7.8866, + "step": 15080 + }, + { + "epoch": 1.43, + "learning_rate": 0.00017136078952362878, + "loss": 7.8157, + "step": 15090 + }, + { + "epoch": 1.43, + "learning_rate": 0.00017134181059024484, + "loss": 7.9163, + "step": 15100 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001713228316568609, + "loss": 7.9395, + "step": 15110 + }, + { + "epoch": 1.43, + "learning_rate": 0.00017130385272347696, + "loss": 7.7885, + "step": 15120 + }, + { + "epoch": 1.44, + "learning_rate": 0.00017128487379009302, + "loss": 7.9234, + "step": 15130 + }, + { + "epoch": 1.44, + "learning_rate": 0.00017126589485670906, + "loss": 7.7686, + "step": 15140 + }, + { + "epoch": 1.44, + "learning_rate": 0.00017124691592332512, + "loss": 7.9083, + "step": 15150 + }, + { + "epoch": 1.44, + "learning_rate": 0.00017122793698994118, + "loss": 7.9122, + "step": 15160 + }, + { + "epoch": 1.44, + "learning_rate": 0.00017120895805655724, + "loss": 7.8276, + "step": 15170 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001711899791231733, + "loss": 7.8299, + "step": 15180 + }, + { + "epoch": 1.44, + "learning_rate": 0.00017117100018978934, + "loss": 7.8205, + "step": 15190 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001711520212564054, + "loss": 7.9315, + "step": 15200 + }, + { + "epoch": 1.44, + "learning_rate": 0.00017113304232302146, + "loss": 7.931, + "step": 15210 + }, + { + "epoch": 1.44, + "learning_rate": 0.00017111406338963752, + "loss": 7.7818, + "step": 15220 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017109508445625358, + "loss": 7.8072, + "step": 15230 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017107610552286962, + "loss": 7.8526, + "step": 15240 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017105712658948568, + "loss": 7.8238, + "step": 15250 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017103814765610174, + "loss": 7.8086, + "step": 15260 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001710191687227178, + "loss": 7.9769, + "step": 15270 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017100018978933386, + "loss": 7.9049, + "step": 15280 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017098121085594992, + "loss": 7.9835, + "step": 15290 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017096223192256596, + "loss": 7.9445, + "step": 15300 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017094325298918202, + "loss": 7.9323, + "step": 15310 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017092427405579808, + "loss": 7.8599, + "step": 15320 + }, + { + "epoch": 1.45, + "learning_rate": 0.00017090529512241414, + "loss": 7.8268, + "step": 15330 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001708863161890302, + "loss": 7.9166, + "step": 15340 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017086733725564624, + "loss": 7.8486, + "step": 15350 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001708483583222623, + "loss": 7.9515, + "step": 15360 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017082937938887836, + "loss": 8.0511, + "step": 15370 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017081040045549442, + "loss": 7.8987, + "step": 15380 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017079142152211048, + "loss": 7.8301, + "step": 15390 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017077244258872652, + "loss": 7.8589, + "step": 15400 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017075346365534258, + "loss": 7.8948, + "step": 15410 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017073448472195864, + "loss": 7.9108, + "step": 15420 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001707155057885747, + "loss": 7.8949, + "step": 15430 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017069652685519076, + "loss": 7.8468, + "step": 15440 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017067754792180682, + "loss": 7.8275, + "step": 15450 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017065856898842286, + "loss": 7.8753, + "step": 15460 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017063959005503892, + "loss": 7.9076, + "step": 15470 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017062061112165498, + "loss": 7.9085, + "step": 15480 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017060163218827104, + "loss": 7.8878, + "step": 15490 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001705826532548871, + "loss": 7.8957, + "step": 15500 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017056367432150314, + "loss": 7.9515, + "step": 15510 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001705446953881192, + "loss": 7.8527, + "step": 15520 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017052571645473526, + "loss": 7.8314, + "step": 15530 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017050673752135132, + "loss": 7.8202, + "step": 15540 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017048775858796738, + "loss": 7.8515, + "step": 15550 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017046877965458342, + "loss": 7.9015, + "step": 15560 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017044980072119948, + "loss": 7.9215, + "step": 15570 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017043082178781554, + "loss": 7.9696, + "step": 15580 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001704118428544316, + "loss": 7.8362, + "step": 15590 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017039286392104766, + "loss": 7.9248, + "step": 15600 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017037388498766372, + "loss": 7.8658, + "step": 15610 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017035490605427976, + "loss": 7.9837, + "step": 15620 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017033592712089582, + "loss": 7.8591, + "step": 15630 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017031694818751188, + "loss": 7.9034, + "step": 15640 + }, + { + "epoch": 1.49, + "learning_rate": 0.00017029796925412794, + "loss": 7.876, + "step": 15650 + }, + { + "epoch": 1.49, + "learning_rate": 0.000170278990320744, + "loss": 7.8455, + "step": 15660 + }, + { + "epoch": 1.49, + "learning_rate": 0.00017026001138736004, + "loss": 7.8404, + "step": 15670 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001702410324539761, + "loss": 7.7896, + "step": 15680 + }, + { + "epoch": 1.49, + "learning_rate": 0.00017022205352059216, + "loss": 7.93, + "step": 15690 + }, + { + "epoch": 1.49, + "learning_rate": 0.00017020307458720822, + "loss": 7.7629, + "step": 15700 + }, + { + "epoch": 1.49, + "learning_rate": 0.00017018409565382428, + "loss": 7.8888, + "step": 15710 + }, + { + "epoch": 1.49, + "learning_rate": 0.00017016511672044031, + "loss": 7.8987, + "step": 15720 + }, + { + "epoch": 1.49, + "learning_rate": 0.00017014613778705638, + "loss": 7.9326, + "step": 15730 + }, + { + "epoch": 1.49, + "learning_rate": 0.00017012715885367244, + "loss": 7.8659, + "step": 15740 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001701081799202885, + "loss": 7.8767, + "step": 15750 + }, + { + "epoch": 1.5, + "learning_rate": 0.00017008920098690456, + "loss": 7.8689, + "step": 15760 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001700702220535206, + "loss": 7.8666, + "step": 15770 + }, + { + "epoch": 1.5, + "learning_rate": 0.00017005124312013665, + "loss": 7.9272, + "step": 15780 + }, + { + "epoch": 1.5, + "learning_rate": 0.00017003226418675272, + "loss": 7.8931, + "step": 15790 + }, + { + "epoch": 1.5, + "learning_rate": 0.00017001328525336878, + "loss": 7.9144, + "step": 15800 + }, + { + "epoch": 1.5, + "learning_rate": 0.00016999430631998484, + "loss": 7.9967, + "step": 15810 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001699753273866009, + "loss": 7.876, + "step": 15820 + }, + { + "epoch": 1.5, + "learning_rate": 0.00016995634845321693, + "loss": 7.8345, + "step": 15830 + }, + { + "epoch": 1.5, + "learning_rate": 0.000169937369519833, + "loss": 7.9589, + "step": 15840 + }, + { + "epoch": 1.5, + "learning_rate": 0.00016991839058644906, + "loss": 7.7895, + "step": 15850 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016989941165306512, + "loss": 7.8575, + "step": 15860 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016988043271968118, + "loss": 7.882, + "step": 15870 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001698614537862972, + "loss": 7.7647, + "step": 15880 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016984247485291327, + "loss": 7.8884, + "step": 15890 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016982349591952934, + "loss": 7.8281, + "step": 15900 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001698045169861454, + "loss": 7.7815, + "step": 15910 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016978553805276146, + "loss": 7.844, + "step": 15920 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001697665591193775, + "loss": 7.8023, + "step": 15930 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016974758018599355, + "loss": 7.935, + "step": 15940 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016972860125260962, + "loss": 7.8531, + "step": 15950 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016970962231922568, + "loss": 7.8349, + "step": 15960 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016969064338584174, + "loss": 7.8348, + "step": 15970 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001696716644524578, + "loss": 7.8758, + "step": 15980 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016965268551907383, + "loss": 7.851, + "step": 15990 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001696337065856899, + "loss": 7.7998, + "step": 16000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016961472765230596, + "loss": 7.8882, + "step": 16010 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016959574871892202, + "loss": 7.8111, + "step": 16020 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016957676978553808, + "loss": 7.8344, + "step": 16030 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001695577908521541, + "loss": 7.9109, + "step": 16040 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016953881191877017, + "loss": 7.9387, + "step": 16050 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016951983298538624, + "loss": 7.9571, + "step": 16060 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001695008540520023, + "loss": 7.878, + "step": 16070 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016948187511861836, + "loss": 7.9668, + "step": 16080 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001694628961852344, + "loss": 7.891, + "step": 16090 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016944391725185045, + "loss": 7.9827, + "step": 16100 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016942493831846651, + "loss": 7.873, + "step": 16110 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016940595938508258, + "loss": 7.8736, + "step": 16120 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016938698045169864, + "loss": 7.9248, + "step": 16130 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001693680015183147, + "loss": 7.8698, + "step": 16140 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016934902258493073, + "loss": 7.9242, + "step": 16150 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001693300436515468, + "loss": 7.9224, + "step": 16160 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016931106471816285, + "loss": 7.9268, + "step": 16170 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016929208578477892, + "loss": 7.776, + "step": 16180 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016927310685139498, + "loss": 7.8963, + "step": 16190 + }, + { + "epoch": 1.54, + "learning_rate": 0.000169254127918011, + "loss": 7.8501, + "step": 16200 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016923514898462707, + "loss": 7.9608, + "step": 16210 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016921617005124313, + "loss": 7.8569, + "step": 16220 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001691971911178592, + "loss": 7.879, + "step": 16230 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016917821218447526, + "loss": 7.9396, + "step": 16240 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001691592332510913, + "loss": 7.7836, + "step": 16250 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016914025431770735, + "loss": 7.8752, + "step": 16260 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001691212753843234, + "loss": 7.9026, + "step": 16270 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016910229645093947, + "loss": 7.9376, + "step": 16280 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016908331751755554, + "loss": 7.853, + "step": 16290 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016906433858417157, + "loss": 7.8648, + "step": 16300 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016904535965078763, + "loss": 7.8895, + "step": 16310 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001690263807174037, + "loss": 7.8541, + "step": 16320 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016900740178401975, + "loss": 7.8666, + "step": 16330 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016898842285063582, + "loss": 7.9544, + "step": 16340 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016896944391725188, + "loss": 7.9778, + "step": 16350 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001689504649838679, + "loss": 7.8113, + "step": 16360 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016893148605048397, + "loss": 7.905, + "step": 16370 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016891250711710003, + "loss": 7.8119, + "step": 16380 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001688935281837161, + "loss": 8.0333, + "step": 16390 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016887454925033216, + "loss": 7.9359, + "step": 16400 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001688555703169482, + "loss": 7.8126, + "step": 16410 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016883659138356425, + "loss": 7.8503, + "step": 16420 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001688176124501803, + "loss": 7.741, + "step": 16430 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016879863351679637, + "loss": 7.9028, + "step": 16440 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016877965458341244, + "loss": 7.7372, + "step": 16450 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016876067565002847, + "loss": 7.815, + "step": 16460 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016874169671664453, + "loss": 7.8055, + "step": 16470 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001687227177832606, + "loss": 7.8315, + "step": 16480 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016870373884987665, + "loss": 7.8523, + "step": 16490 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016868475991649271, + "loss": 7.9014, + "step": 16500 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016866578098310878, + "loss": 7.7971, + "step": 16510 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001686468020497248, + "loss": 7.8215, + "step": 16520 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016862782311634087, + "loss": 7.8164, + "step": 16530 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016860884418295693, + "loss": 7.8579, + "step": 16540 + }, + { + "epoch": 1.57, + "learning_rate": 0.000168589865249573, + "loss": 7.9486, + "step": 16550 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016857088631618906, + "loss": 7.9259, + "step": 16560 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001685519073828051, + "loss": 7.8656, + "step": 16570 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016853292844942115, + "loss": 7.729, + "step": 16580 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001685139495160372, + "loss": 7.969, + "step": 16590 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016849497058265327, + "loss": 7.8591, + "step": 16600 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016847599164926933, + "loss": 7.781, + "step": 16610 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016845701271588537, + "loss": 7.8389, + "step": 16620 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016843803378250143, + "loss": 7.8652, + "step": 16630 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001684190548491175, + "loss": 7.9893, + "step": 16640 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016840007591573355, + "loss": 7.8324, + "step": 16650 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016838109698234961, + "loss": 7.8324, + "step": 16660 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016836211804896567, + "loss": 8.0189, + "step": 16670 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001683431391155817, + "loss": 7.825, + "step": 16680 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016832416018219777, + "loss": 7.8511, + "step": 16690 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016830518124881383, + "loss": 7.873, + "step": 16700 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001682862023154299, + "loss": 7.8481, + "step": 16710 + }, + { + "epoch": 1.59, + "learning_rate": 0.00016826722338204595, + "loss": 7.8828, + "step": 16720 + }, + { + "epoch": 1.59, + "learning_rate": 0.000168248244448662, + "loss": 7.8733, + "step": 16730 + }, + { + "epoch": 1.59, + "learning_rate": 0.00016822926551527805, + "loss": 7.968, + "step": 16740 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001682102865818941, + "loss": 7.806, + "step": 16750 + }, + { + "epoch": 1.59, + "learning_rate": 0.00016819130764851017, + "loss": 7.8548, + "step": 16760 + }, + { + "epoch": 1.59, + "learning_rate": 0.00016817232871512623, + "loss": 7.8729, + "step": 16770 + }, + { + "epoch": 1.59, + "learning_rate": 0.00016815334978174227, + "loss": 7.842, + "step": 16780 + }, + { + "epoch": 1.59, + "learning_rate": 0.00016813437084835833, + "loss": 7.8119, + "step": 16790 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001681153919149744, + "loss": 7.8486, + "step": 16800 + }, + { + "epoch": 1.6, + "learning_rate": 0.00016809641298159045, + "loss": 7.7795, + "step": 16810 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001680774340482065, + "loss": 7.9168, + "step": 16820 + }, + { + "epoch": 1.6, + "learning_rate": 0.00016805845511482257, + "loss": 7.8893, + "step": 16830 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001680394761814386, + "loss": 7.8338, + "step": 16840 + }, + { + "epoch": 1.6, + "learning_rate": 0.00016802049724805467, + "loss": 7.8607, + "step": 16850 + }, + { + "epoch": 1.6, + "learning_rate": 0.00016800151831467073, + "loss": 7.7818, + "step": 16860 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001679825393812868, + "loss": 7.8901, + "step": 16870 + }, + { + "epoch": 1.6, + "learning_rate": 0.00016796356044790285, + "loss": 7.9417, + "step": 16880 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001679445815145189, + "loss": 7.8965, + "step": 16890 + }, + { + "epoch": 1.6, + "learning_rate": 0.00016792560258113495, + "loss": 7.891, + "step": 16900 + }, + { + "epoch": 1.6, + "learning_rate": 0.000167906623647751, + "loss": 7.8494, + "step": 16910 + }, + { + "epoch": 1.61, + "learning_rate": 0.00016788764471436707, + "loss": 7.8651, + "step": 16920 + }, + { + "epoch": 1.61, + "learning_rate": 0.00016786866578098313, + "loss": 7.8477, + "step": 16930 + }, + { + "epoch": 1.61, + "learning_rate": 0.00016784968684759917, + "loss": 7.7701, + "step": 16940 + }, + { + "epoch": 1.61, + "learning_rate": 0.00016783070791421523, + "loss": 7.8612, + "step": 16950 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001678117289808313, + "loss": 7.8107, + "step": 16960 + }, + { + "epoch": 1.61, + "learning_rate": 0.00016779275004744735, + "loss": 7.83, + "step": 16970 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001677737711140634, + "loss": 7.9374, + "step": 16980 + }, + { + "epoch": 1.61, + "learning_rate": 0.00016775479218067945, + "loss": 7.885, + "step": 16990 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001677358132472955, + "loss": 7.8927, + "step": 17000 + }, + { + "epoch": 1.61, + "learning_rate": 0.00016771683431391157, + "loss": 7.8929, + "step": 17010 + }, + { + "epoch": 1.62, + "learning_rate": 0.00016769785538052763, + "loss": 7.7847, + "step": 17020 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001676788764471437, + "loss": 7.9631, + "step": 17030 + }, + { + "epoch": 1.62, + "learning_rate": 0.00016765989751375975, + "loss": 7.9438, + "step": 17040 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001676409185803758, + "loss": 7.7907, + "step": 17050 + }, + { + "epoch": 1.62, + "learning_rate": 0.00016762193964699185, + "loss": 7.9279, + "step": 17060 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001676029607136079, + "loss": 7.8815, + "step": 17070 + }, + { + "epoch": 1.62, + "learning_rate": 0.00016758398178022397, + "loss": 8.0089, + "step": 17080 + }, + { + "epoch": 1.62, + "learning_rate": 0.00016756500284684003, + "loss": 7.9016, + "step": 17090 + }, + { + "epoch": 1.62, + "learning_rate": 0.00016754602391345607, + "loss": 7.7751, + "step": 17100 + }, + { + "epoch": 1.62, + "learning_rate": 0.00016752704498007213, + "loss": 7.8964, + "step": 17110 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001675080660466882, + "loss": 7.7608, + "step": 17120 + }, + { + "epoch": 1.63, + "learning_rate": 0.00016748908711330425, + "loss": 7.9548, + "step": 17130 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001674701081799203, + "loss": 7.9519, + "step": 17140 + }, + { + "epoch": 1.63, + "learning_rate": 0.00016745112924653635, + "loss": 7.9832, + "step": 17150 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001674321503131524, + "loss": 7.7887, + "step": 17160 + }, + { + "epoch": 1.63, + "learning_rate": 0.00016741317137976847, + "loss": 7.972, + "step": 17170 + }, + { + "epoch": 1.63, + "learning_rate": 0.00016739419244638453, + "loss": 7.8508, + "step": 17180 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001673752135130006, + "loss": 7.8145, + "step": 17190 + }, + { + "epoch": 1.63, + "learning_rate": 0.00016735623457961665, + "loss": 7.7618, + "step": 17200 + }, + { + "epoch": 1.63, + "learning_rate": 0.00016733725564623269, + "loss": 7.8352, + "step": 17210 + }, + { + "epoch": 1.63, + "learning_rate": 0.00016731827671284875, + "loss": 7.8661, + "step": 17220 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001672992977794648, + "loss": 7.9131, + "step": 17230 + }, + { + "epoch": 1.64, + "learning_rate": 0.00016728031884608087, + "loss": 7.7831, + "step": 17240 + }, + { + "epoch": 1.64, + "learning_rate": 0.00016726133991269693, + "loss": 7.8058, + "step": 17250 + }, + { + "epoch": 1.64, + "learning_rate": 0.00016724236097931296, + "loss": 7.8926, + "step": 17260 + }, + { + "epoch": 1.64, + "learning_rate": 0.00016722338204592903, + "loss": 7.8892, + "step": 17270 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001672044031125451, + "loss": 7.9619, + "step": 17280 + }, + { + "epoch": 1.64, + "learning_rate": 0.00016718542417916115, + "loss": 7.8365, + "step": 17290 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001671664452457772, + "loss": 7.8317, + "step": 17300 + }, + { + "epoch": 1.64, + "learning_rate": 0.00016714746631239324, + "loss": 7.8683, + "step": 17310 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001671284873790093, + "loss": 7.9458, + "step": 17320 + }, + { + "epoch": 1.64, + "learning_rate": 0.00016710950844562537, + "loss": 7.9498, + "step": 17330 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016709052951224143, + "loss": 7.9496, + "step": 17340 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001670715505788575, + "loss": 7.9351, + "step": 17350 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016705257164547355, + "loss": 7.8879, + "step": 17360 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016703359271208958, + "loss": 7.8785, + "step": 17370 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016701461377870565, + "loss": 7.8406, + "step": 17380 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001669956348453217, + "loss": 7.8577, + "step": 17390 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016697665591193777, + "loss": 7.8559, + "step": 17400 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016695767697855383, + "loss": 7.8396, + "step": 17410 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016693869804516986, + "loss": 7.9009, + "step": 17420 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016691971911178593, + "loss": 7.8063, + "step": 17430 + }, + { + "epoch": 1.65, + "learning_rate": 0.000166900740178402, + "loss": 7.8424, + "step": 17440 + }, + { + "epoch": 1.66, + "learning_rate": 0.00016688176124501805, + "loss": 7.9207, + "step": 17450 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001668627823116341, + "loss": 7.9105, + "step": 17460 + }, + { + "epoch": 1.66, + "learning_rate": 0.00016684380337825014, + "loss": 7.8379, + "step": 17470 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001668248244448662, + "loss": 7.7831, + "step": 17480 + }, + { + "epoch": 1.66, + "learning_rate": 0.00016680584551148227, + "loss": 7.9231, + "step": 17490 + }, + { + "epoch": 1.66, + "learning_rate": 0.00016678686657809833, + "loss": 7.86, + "step": 17500 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001667678876447144, + "loss": 7.8946, + "step": 17510 + }, + { + "epoch": 1.66, + "learning_rate": 0.00016674890871133042, + "loss": 7.9196, + "step": 17520 + }, + { + "epoch": 1.66, + "learning_rate": 0.00016672992977794648, + "loss": 7.9259, + "step": 17530 + }, + { + "epoch": 1.66, + "learning_rate": 0.00016671095084456255, + "loss": 7.9177, + "step": 17540 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001666919719111786, + "loss": 7.8724, + "step": 17550 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016667299297779467, + "loss": 7.8552, + "step": 17560 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016665401404441073, + "loss": 7.8017, + "step": 17570 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016663503511102676, + "loss": 7.843, + "step": 17580 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016661605617764282, + "loss": 7.8963, + "step": 17590 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016659707724425889, + "loss": 7.822, + "step": 17600 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016657809831087495, + "loss": 7.844, + "step": 17610 + }, + { + "epoch": 1.67, + "learning_rate": 0.000166559119377491, + "loss": 7.8534, + "step": 17620 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016654014044410704, + "loss": 7.9459, + "step": 17630 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001665211615107231, + "loss": 7.8753, + "step": 17640 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016650218257733917, + "loss": 7.8419, + "step": 17650 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016648320364395523, + "loss": 7.9352, + "step": 17660 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001664642247105713, + "loss": 7.8022, + "step": 17670 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016644524577718732, + "loss": 7.9272, + "step": 17680 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016642626684380338, + "loss": 7.8676, + "step": 17690 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016640728791041944, + "loss": 7.9678, + "step": 17700 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001663883089770355, + "loss": 7.919, + "step": 17710 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016636933004365157, + "loss": 7.8938, + "step": 17720 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016635035111026763, + "loss": 7.9555, + "step": 17730 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016633137217688366, + "loss": 7.8598, + "step": 17740 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016631239324349972, + "loss": 7.9456, + "step": 17750 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016629341431011578, + "loss": 7.9102, + "step": 17760 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016627443537673185, + "loss": 7.8114, + "step": 17770 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001662554564433479, + "loss": 7.8783, + "step": 17780 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016623647750996394, + "loss": 7.902, + "step": 17790 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016621749857658, + "loss": 7.8586, + "step": 17800 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016619851964319606, + "loss": 7.9233, + "step": 17810 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016617954070981213, + "loss": 7.8281, + "step": 17820 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001661605617764282, + "loss": 7.7916, + "step": 17830 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016614158284304422, + "loss": 7.9073, + "step": 17840 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016612260390966028, + "loss": 7.8684, + "step": 17850 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016610362497627634, + "loss": 7.9522, + "step": 17860 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001660846460428924, + "loss": 7.7994, + "step": 17870 + }, + { + "epoch": 1.7, + "learning_rate": 0.00016606566710950847, + "loss": 7.9712, + "step": 17880 + }, + { + "epoch": 1.7, + "learning_rate": 0.00016604668817612453, + "loss": 7.9103, + "step": 17890 + }, + { + "epoch": 1.7, + "learning_rate": 0.00016602770924274056, + "loss": 7.8875, + "step": 17900 + }, + { + "epoch": 1.7, + "learning_rate": 0.00016600873030935662, + "loss": 7.9074, + "step": 17910 + }, + { + "epoch": 1.7, + "learning_rate": 0.00016598975137597268, + "loss": 7.8733, + "step": 17920 + }, + { + "epoch": 1.7, + "learning_rate": 0.00016597077244258875, + "loss": 7.8312, + "step": 17930 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001659517935092048, + "loss": 7.869, + "step": 17940 + }, + { + "epoch": 1.7, + "learning_rate": 0.00016593281457582084, + "loss": 7.8955, + "step": 17950 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001659138356424369, + "loss": 7.8527, + "step": 17960 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016589485670905296, + "loss": 7.8674, + "step": 17970 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016587587777566902, + "loss": 7.8768, + "step": 17980 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016585689884228509, + "loss": 7.9197, + "step": 17990 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016583791990890112, + "loss": 7.8606, + "step": 18000 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016581894097551718, + "loss": 7.9081, + "step": 18010 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016579996204213324, + "loss": 7.8936, + "step": 18020 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001657809831087493, + "loss": 7.9135, + "step": 18030 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016576200417536537, + "loss": 7.824, + "step": 18040 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001657430252419814, + "loss": 7.918, + "step": 18050 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016572404630859746, + "loss": 7.9342, + "step": 18060 + }, + { + "epoch": 1.71, + "learning_rate": 0.00016570506737521352, + "loss": 7.8834, + "step": 18070 + }, + { + "epoch": 1.72, + "learning_rate": 0.00016568608844182958, + "loss": 7.7842, + "step": 18080 + }, + { + "epoch": 1.72, + "learning_rate": 0.00016566710950844564, + "loss": 7.8498, + "step": 18090 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001656481305750617, + "loss": 7.8644, + "step": 18100 + }, + { + "epoch": 1.72, + "learning_rate": 0.00016562915164167774, + "loss": 7.8654, + "step": 18110 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001656101727082938, + "loss": 7.8258, + "step": 18120 + }, + { + "epoch": 1.72, + "learning_rate": 0.00016559119377490986, + "loss": 7.8322, + "step": 18130 + }, + { + "epoch": 1.72, + "learning_rate": 0.00016557221484152592, + "loss": 7.9357, + "step": 18140 + }, + { + "epoch": 1.72, + "learning_rate": 0.00016555323590814198, + "loss": 7.8355, + "step": 18150 + }, + { + "epoch": 1.72, + "learning_rate": 0.00016553425697475802, + "loss": 7.8667, + "step": 18160 + }, + { + "epoch": 1.72, + "learning_rate": 0.00016551527804137408, + "loss": 7.8392, + "step": 18170 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016549629910799014, + "loss": 7.8973, + "step": 18180 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001654773201746062, + "loss": 7.9184, + "step": 18190 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016545834124122226, + "loss": 7.8068, + "step": 18200 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001654393623078383, + "loss": 7.9054, + "step": 18210 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016542038337445436, + "loss": 7.8072, + "step": 18220 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016540140444107042, + "loss": 7.8678, + "step": 18230 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016538242550768648, + "loss": 7.9126, + "step": 18240 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016536344657430254, + "loss": 7.9272, + "step": 18250 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001653444676409186, + "loss": 7.9315, + "step": 18260 + }, + { + "epoch": 1.73, + "learning_rate": 0.00016532548870753464, + "loss": 7.8238, + "step": 18270 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001653065097741507, + "loss": 7.8731, + "step": 18280 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016528753084076676, + "loss": 7.8416, + "step": 18290 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016526855190738282, + "loss": 7.9475, + "step": 18300 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016524957297399888, + "loss": 7.9196, + "step": 18310 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016523059404061492, + "loss": 7.8805, + "step": 18320 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016521161510723098, + "loss": 7.8936, + "step": 18330 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016519263617384704, + "loss": 7.8933, + "step": 18340 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001651736572404631, + "loss": 7.8424, + "step": 18350 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016515467830707916, + "loss": 7.9242, + "step": 18360 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001651356993736952, + "loss": 7.8339, + "step": 18370 + }, + { + "epoch": 1.74, + "learning_rate": 0.00016511672044031126, + "loss": 8.0267, + "step": 18380 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016509774150692732, + "loss": 7.9168, + "step": 18390 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016507876257354338, + "loss": 7.8347, + "step": 18400 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016505978364015944, + "loss": 7.8527, + "step": 18410 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001650408047067755, + "loss": 7.8267, + "step": 18420 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016502182577339154, + "loss": 7.813, + "step": 18430 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001650028468400076, + "loss": 7.8132, + "step": 18440 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016498386790662366, + "loss": 7.9144, + "step": 18450 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016496488897323972, + "loss": 7.8411, + "step": 18460 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016494591003985578, + "loss": 7.8303, + "step": 18470 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016492693110647182, + "loss": 7.8463, + "step": 18480 + }, + { + "epoch": 1.75, + "learning_rate": 0.00016490795217308788, + "loss": 7.8983, + "step": 18490 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016488897323970394, + "loss": 7.8702, + "step": 18500 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016486999430632, + "loss": 7.7592, + "step": 18510 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016485101537293606, + "loss": 7.7817, + "step": 18520 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001648320364395521, + "loss": 7.845, + "step": 18530 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016481305750616816, + "loss": 7.8269, + "step": 18540 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016479407857278422, + "loss": 7.8083, + "step": 18550 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016477509963940028, + "loss": 7.8609, + "step": 18560 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016475612070601634, + "loss": 7.7417, + "step": 18570 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016473714177263238, + "loss": 7.8818, + "step": 18580 + }, + { + "epoch": 1.76, + "learning_rate": 0.00016471816283924844, + "loss": 7.8636, + "step": 18590 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001646991839058645, + "loss": 7.8993, + "step": 18600 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016468020497248056, + "loss": 7.9056, + "step": 18610 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016466122603909662, + "loss": 7.852, + "step": 18620 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016464224710571268, + "loss": 7.8672, + "step": 18630 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016462326817232872, + "loss": 7.8413, + "step": 18640 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016460428923894478, + "loss": 7.8847, + "step": 18650 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016458531030556084, + "loss": 7.8263, + "step": 18660 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001645663313721769, + "loss": 7.8653, + "step": 18670 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016454735243879296, + "loss": 7.857, + "step": 18680 + }, + { + "epoch": 1.77, + "learning_rate": 0.000164528373505409, + "loss": 8.0016, + "step": 18690 + }, + { + "epoch": 1.77, + "learning_rate": 0.00016450939457202506, + "loss": 7.8949, + "step": 18700 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016449041563864112, + "loss": 7.8634, + "step": 18710 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016447143670525718, + "loss": 7.8651, + "step": 18720 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016445245777187324, + "loss": 7.8809, + "step": 18730 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016443347883848928, + "loss": 7.8964, + "step": 18740 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016441449990510534, + "loss": 7.8104, + "step": 18750 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001643955209717214, + "loss": 7.911, + "step": 18760 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016437654203833746, + "loss": 7.8607, + "step": 18770 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016435756310495352, + "loss": 7.8043, + "step": 18780 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016433858417156958, + "loss": 7.9543, + "step": 18790 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016431960523818562, + "loss": 7.884, + "step": 18800 + }, + { + "epoch": 1.78, + "learning_rate": 0.00016430062630480168, + "loss": 7.9173, + "step": 18810 + }, + { + "epoch": 1.79, + "learning_rate": 0.00016428164737141774, + "loss": 7.7703, + "step": 18820 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001642626684380338, + "loss": 7.8585, + "step": 18830 + }, + { + "epoch": 1.79, + "learning_rate": 0.00016424368950464986, + "loss": 7.9445, + "step": 18840 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001642247105712659, + "loss": 7.9027, + "step": 18850 + }, + { + "epoch": 1.79, + "learning_rate": 0.00016420573163788196, + "loss": 7.8269, + "step": 18860 + }, + { + "epoch": 1.79, + "learning_rate": 0.00016418675270449802, + "loss": 7.8792, + "step": 18870 + }, + { + "epoch": 1.79, + "learning_rate": 0.00016416777377111408, + "loss": 7.9192, + "step": 18880 + }, + { + "epoch": 1.79, + "learning_rate": 0.00016414879483773014, + "loss": 7.9763, + "step": 18890 + }, + { + "epoch": 1.79, + "learning_rate": 0.00016412981590434617, + "loss": 7.8849, + "step": 18900 + }, + { + "epoch": 1.79, + "learning_rate": 0.00016411083697096224, + "loss": 7.8498, + "step": 18910 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001640918580375783, + "loss": 7.807, + "step": 18920 + }, + { + "epoch": 1.8, + "learning_rate": 0.00016407287910419436, + "loss": 7.8959, + "step": 18930 + }, + { + "epoch": 1.8, + "learning_rate": 0.00016405390017081042, + "loss": 7.8704, + "step": 18940 + }, + { + "epoch": 1.8, + "learning_rate": 0.00016403492123742648, + "loss": 7.842, + "step": 18950 + }, + { + "epoch": 1.8, + "learning_rate": 0.00016401594230404251, + "loss": 7.8434, + "step": 18960 + }, + { + "epoch": 1.8, + "learning_rate": 0.00016399696337065858, + "loss": 7.8772, + "step": 18970 + }, + { + "epoch": 1.8, + "learning_rate": 0.00016397798443727464, + "loss": 7.9123, + "step": 18980 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001639590055038907, + "loss": 7.8603, + "step": 18990 + }, + { + "epoch": 1.8, + "learning_rate": 0.00016394002657050676, + "loss": 7.871, + "step": 19000 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001639210476371228, + "loss": 7.7392, + "step": 19010 + }, + { + "epoch": 1.8, + "learning_rate": 0.00016390206870373886, + "loss": 7.8537, + "step": 19020 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016388308977035492, + "loss": 7.9253, + "step": 19030 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016386411083697098, + "loss": 7.8189, + "step": 19040 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016384513190358704, + "loss": 7.8834, + "step": 19050 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016382615297020307, + "loss": 7.8735, + "step": 19060 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016380717403681913, + "loss": 7.952, + "step": 19070 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001637881951034352, + "loss": 7.7672, + "step": 19080 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016376921617005126, + "loss": 7.88, + "step": 19090 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016375023723666732, + "loss": 7.8417, + "step": 19100 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016373125830328335, + "loss": 7.8759, + "step": 19110 + }, + { + "epoch": 1.81, + "learning_rate": 0.00016371227936989941, + "loss": 7.9401, + "step": 19120 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016369330043651548, + "loss": 7.7792, + "step": 19130 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016367432150313154, + "loss": 7.904, + "step": 19140 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001636553425697476, + "loss": 7.8967, + "step": 19150 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016363636363636366, + "loss": 7.9611, + "step": 19160 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001636173847029797, + "loss": 7.8921, + "step": 19170 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016359840576959575, + "loss": 7.9201, + "step": 19180 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016357942683621182, + "loss": 7.9224, + "step": 19190 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016356044790282788, + "loss": 7.8683, + "step": 19200 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016354146896944394, + "loss": 7.9341, + "step": 19210 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016352249003605997, + "loss": 7.9042, + "step": 19220 + }, + { + "epoch": 1.82, + "learning_rate": 0.00016350351110267603, + "loss": 7.8717, + "step": 19230 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001634845321692921, + "loss": 7.9177, + "step": 19240 + }, + { + "epoch": 1.83, + "learning_rate": 0.00016346555323590816, + "loss": 7.9534, + "step": 19250 + }, + { + "epoch": 1.83, + "learning_rate": 0.00016344657430252422, + "loss": 7.8696, + "step": 19260 + }, + { + "epoch": 1.83, + "learning_rate": 0.00016342759536914025, + "loss": 7.857, + "step": 19270 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001634086164357563, + "loss": 7.8828, + "step": 19280 + }, + { + "epoch": 1.83, + "learning_rate": 0.00016338963750237237, + "loss": 7.828, + "step": 19290 + }, + { + "epoch": 1.83, + "learning_rate": 0.00016337065856898844, + "loss": 7.9118, + "step": 19300 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001633516796356045, + "loss": 7.8622, + "step": 19310 + }, + { + "epoch": 1.83, + "learning_rate": 0.00016333270070222056, + "loss": 7.8215, + "step": 19320 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001633137217688366, + "loss": 7.9519, + "step": 19330 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016329474283545265, + "loss": 7.8305, + "step": 19340 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016327576390206871, + "loss": 7.9268, + "step": 19350 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016325678496868478, + "loss": 7.8265, + "step": 19360 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016323780603530084, + "loss": 7.8933, + "step": 19370 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016321882710191687, + "loss": 7.8313, + "step": 19380 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016319984816853293, + "loss": 7.8801, + "step": 19390 + }, + { + "epoch": 1.84, + "learning_rate": 0.000163180869235149, + "loss": 7.9039, + "step": 19400 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016316189030176506, + "loss": 7.8568, + "step": 19410 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016314291136838112, + "loss": 7.8692, + "step": 19420 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016312393243499715, + "loss": 7.9184, + "step": 19430 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001631049535016132, + "loss": 7.8313, + "step": 19440 + }, + { + "epoch": 1.85, + "learning_rate": 0.00016308597456822927, + "loss": 7.8239, + "step": 19450 + }, + { + "epoch": 1.85, + "learning_rate": 0.00016306699563484533, + "loss": 7.9093, + "step": 19460 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001630480167014614, + "loss": 7.7581, + "step": 19470 + }, + { + "epoch": 1.85, + "learning_rate": 0.00016302903776807746, + "loss": 8.0268, + "step": 19480 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001630100588346935, + "loss": 7.8394, + "step": 19490 + }, + { + "epoch": 1.85, + "learning_rate": 0.00016299107990130955, + "loss": 7.8193, + "step": 19500 + }, + { + "epoch": 1.85, + "learning_rate": 0.00016297210096792561, + "loss": 7.9236, + "step": 19510 + }, + { + "epoch": 1.85, + "learning_rate": 0.00016295312203454168, + "loss": 7.963, + "step": 19520 + }, + { + "epoch": 1.85, + "learning_rate": 0.00016293414310115774, + "loss": 7.9338, + "step": 19530 + }, + { + "epoch": 1.85, + "learning_rate": 0.00016291516416777377, + "loss": 7.8894, + "step": 19540 + }, + { + "epoch": 1.86, + "learning_rate": 0.00016289618523438983, + "loss": 7.9377, + "step": 19550 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001628772063010059, + "loss": 7.8134, + "step": 19560 + }, + { + "epoch": 1.86, + "learning_rate": 0.00016285822736762195, + "loss": 7.9315, + "step": 19570 + }, + { + "epoch": 1.86, + "learning_rate": 0.00016283924843423802, + "loss": 7.8767, + "step": 19580 + }, + { + "epoch": 1.86, + "learning_rate": 0.00016282026950085405, + "loss": 7.7828, + "step": 19590 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001628012905674701, + "loss": 7.8605, + "step": 19600 + }, + { + "epoch": 1.86, + "learning_rate": 0.00016278231163408617, + "loss": 7.7621, + "step": 19610 + }, + { + "epoch": 1.86, + "learning_rate": 0.00016276333270070223, + "loss": 7.8432, + "step": 19620 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001627443537673183, + "loss": 7.898, + "step": 19630 + }, + { + "epoch": 1.86, + "learning_rate": 0.00016272537483393433, + "loss": 7.9226, + "step": 19640 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001627063959005504, + "loss": 7.8093, + "step": 19650 + }, + { + "epoch": 1.87, + "learning_rate": 0.00016268741696716645, + "loss": 7.856, + "step": 19660 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001626684380337825, + "loss": 7.9024, + "step": 19670 + }, + { + "epoch": 1.87, + "learning_rate": 0.00016264945910039857, + "loss": 7.9161, + "step": 19680 + }, + { + "epoch": 1.87, + "learning_rate": 0.00016263048016701464, + "loss": 7.893, + "step": 19690 + }, + { + "epoch": 1.87, + "learning_rate": 0.00016261150123363067, + "loss": 7.8729, + "step": 19700 + }, + { + "epoch": 1.87, + "learning_rate": 0.00016259252230024673, + "loss": 7.8657, + "step": 19710 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001625735433668628, + "loss": 7.9963, + "step": 19720 + }, + { + "epoch": 1.87, + "learning_rate": 0.00016255456443347885, + "loss": 7.877, + "step": 19730 + }, + { + "epoch": 1.87, + "learning_rate": 0.00016253558550009491, + "loss": 7.868, + "step": 19740 + }, + { + "epoch": 1.87, + "learning_rate": 0.00016251660656671095, + "loss": 7.8692, + "step": 19750 + }, + { + "epoch": 1.88, + "learning_rate": 0.000162497627633327, + "loss": 7.9183, + "step": 19760 + }, + { + "epoch": 1.88, + "learning_rate": 0.00016247864869994307, + "loss": 7.8108, + "step": 19770 + }, + { + "epoch": 1.88, + "learning_rate": 0.00016245966976655913, + "loss": 7.7614, + "step": 19780 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001624406908331752, + "loss": 7.9221, + "step": 19790 + }, + { + "epoch": 1.88, + "learning_rate": 0.00016242171189979123, + "loss": 7.959, + "step": 19800 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001624027329664073, + "loss": 7.8201, + "step": 19810 + }, + { + "epoch": 1.88, + "learning_rate": 0.00016238375403302335, + "loss": 7.8931, + "step": 19820 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001623647750996394, + "loss": 7.7885, + "step": 19830 + }, + { + "epoch": 1.88, + "learning_rate": 0.00016234579616625547, + "loss": 7.937, + "step": 19840 + }, + { + "epoch": 1.88, + "learning_rate": 0.00016232681723287153, + "loss": 7.8538, + "step": 19850 + }, + { + "epoch": 1.88, + "learning_rate": 0.00016230783829948757, + "loss": 7.9476, + "step": 19860 + }, + { + "epoch": 1.89, + "learning_rate": 0.00016228885936610363, + "loss": 7.8243, + "step": 19870 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001622698804327197, + "loss": 7.9034, + "step": 19880 + }, + { + "epoch": 1.89, + "learning_rate": 0.00016225090149933575, + "loss": 7.8047, + "step": 19890 + }, + { + "epoch": 1.89, + "learning_rate": 0.00016223192256595181, + "loss": 7.9023, + "step": 19900 + }, + { + "epoch": 1.89, + "learning_rate": 0.00016221294363256785, + "loss": 7.7868, + "step": 19910 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001621939646991839, + "loss": 7.8242, + "step": 19920 + }, + { + "epoch": 1.89, + "learning_rate": 0.00016217498576579997, + "loss": 7.8165, + "step": 19930 + }, + { + "epoch": 1.89, + "learning_rate": 0.00016215600683241603, + "loss": 7.9508, + "step": 19940 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001621370278990321, + "loss": 7.8908, + "step": 19950 + }, + { + "epoch": 1.89, + "learning_rate": 0.00016211804896564813, + "loss": 7.892, + "step": 19960 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001620990700322642, + "loss": 7.8592, + "step": 19970 + }, + { + "epoch": 1.9, + "learning_rate": 0.00016208009109888025, + "loss": 7.9194, + "step": 19980 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001620611121654963, + "loss": 7.8351, + "step": 19990 + }, + { + "epoch": 1.9, + "learning_rate": 0.00016204213323211237, + "loss": 7.9319, + "step": 20000 + }, + { + "epoch": 1.9, + "learning_rate": 0.00016202315429872843, + "loss": 7.8847, + "step": 20010 + }, + { + "epoch": 1.9, + "learning_rate": 0.00016200417536534447, + "loss": 7.8301, + "step": 20020 + }, + { + "epoch": 1.9, + "learning_rate": 0.00016198519643196053, + "loss": 7.8568, + "step": 20030 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001619662174985766, + "loss": 7.8261, + "step": 20040 + }, + { + "epoch": 1.9, + "learning_rate": 0.00016194723856519265, + "loss": 8.0282, + "step": 20050 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001619282596318087, + "loss": 7.9086, + "step": 20060 + }, + { + "epoch": 1.9, + "learning_rate": 0.00016190928069842475, + "loss": 7.9526, + "step": 20070 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001618903017650408, + "loss": 7.7902, + "step": 20080 + }, + { + "epoch": 1.91, + "learning_rate": 0.00016187132283165687, + "loss": 7.8771, + "step": 20090 + }, + { + "epoch": 1.91, + "learning_rate": 0.00016185234389827293, + "loss": 7.8308, + "step": 20100 + }, + { + "epoch": 1.91, + "learning_rate": 0.000161833364964889, + "loss": 7.7032, + "step": 20110 + }, + { + "epoch": 1.91, + "learning_rate": 0.00016181438603150503, + "loss": 7.8337, + "step": 20120 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001617954070981211, + "loss": 7.8936, + "step": 20130 + }, + { + "epoch": 1.91, + "learning_rate": 0.00016177642816473715, + "loss": 7.9296, + "step": 20140 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001617574492313532, + "loss": 7.9812, + "step": 20150 + }, + { + "epoch": 1.91, + "learning_rate": 0.00016173847029796927, + "loss": 7.9834, + "step": 20160 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001617194913645853, + "loss": 7.8294, + "step": 20170 + }, + { + "epoch": 1.91, + "learning_rate": 0.00016170051243120137, + "loss": 7.8923, + "step": 20180 + }, + { + "epoch": 1.92, + "learning_rate": 0.00016168153349781743, + "loss": 7.8267, + "step": 20190 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001616625545644335, + "loss": 7.9175, + "step": 20200 + }, + { + "epoch": 1.92, + "learning_rate": 0.00016164357563104955, + "loss": 7.7191, + "step": 20210 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001616245966976656, + "loss": 7.8315, + "step": 20220 + }, + { + "epoch": 1.92, + "learning_rate": 0.00016160561776428165, + "loss": 7.8252, + "step": 20230 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001615866388308977, + "loss": 7.872, + "step": 20240 + }, + { + "epoch": 1.92, + "learning_rate": 0.00016156765989751377, + "loss": 7.8921, + "step": 20250 + }, + { + "epoch": 1.92, + "learning_rate": 0.00016154868096412983, + "loss": 7.8788, + "step": 20260 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001615297020307459, + "loss": 7.8861, + "step": 20270 + }, + { + "epoch": 1.92, + "learning_rate": 0.00016151072309736193, + "loss": 7.8892, + "step": 20280 + }, + { + "epoch": 1.93, + "learning_rate": 0.000161491744163978, + "loss": 7.8554, + "step": 20290 + }, + { + "epoch": 1.93, + "learning_rate": 0.00016147276523059405, + "loss": 7.9014, + "step": 20300 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001614537862972101, + "loss": 7.8427, + "step": 20310 + }, + { + "epoch": 1.93, + "learning_rate": 0.00016143480736382617, + "loss": 7.8983, + "step": 20320 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001614158284304422, + "loss": 7.8983, + "step": 20330 + }, + { + "epoch": 1.93, + "learning_rate": 0.00016139684949705827, + "loss": 7.7865, + "step": 20340 + }, + { + "epoch": 1.93, + "learning_rate": 0.00016137787056367433, + "loss": 7.8339, + "step": 20350 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001613588916302904, + "loss": 7.8371, + "step": 20360 + }, + { + "epoch": 1.93, + "learning_rate": 0.00016133991269690645, + "loss": 7.7893, + "step": 20370 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001613209337635225, + "loss": 7.9403, + "step": 20380 + }, + { + "epoch": 1.93, + "learning_rate": 0.00016130195483013855, + "loss": 7.886, + "step": 20390 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001612829758967546, + "loss": 7.8127, + "step": 20400 + }, + { + "epoch": 1.94, + "learning_rate": 0.00016126399696337067, + "loss": 7.8164, + "step": 20410 + }, + { + "epoch": 1.94, + "learning_rate": 0.00016124501802998673, + "loss": 7.8491, + "step": 20420 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001612260390966028, + "loss": 7.7832, + "step": 20430 + }, + { + "epoch": 1.94, + "learning_rate": 0.00016120706016321882, + "loss": 7.8549, + "step": 20440 + }, + { + "epoch": 1.94, + "learning_rate": 0.00016118808122983489, + "loss": 8.0095, + "step": 20450 + }, + { + "epoch": 1.94, + "learning_rate": 0.00016116910229645095, + "loss": 7.7896, + "step": 20460 + }, + { + "epoch": 1.94, + "learning_rate": 0.000161150123363067, + "loss": 7.8677, + "step": 20470 + }, + { + "epoch": 1.94, + "learning_rate": 0.00016113114442968307, + "loss": 7.8216, + "step": 20480 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001611121654962991, + "loss": 7.7524, + "step": 20490 + }, + { + "epoch": 1.95, + "learning_rate": 0.00016109318656291517, + "loss": 7.78, + "step": 20500 + }, + { + "epoch": 1.95, + "learning_rate": 0.00016107420762953123, + "loss": 7.9191, + "step": 20510 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001610552286961473, + "loss": 7.8323, + "step": 20520 + }, + { + "epoch": 1.95, + "learning_rate": 0.00016103624976276335, + "loss": 7.7972, + "step": 20530 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001610172708293794, + "loss": 7.8524, + "step": 20540 + }, + { + "epoch": 1.95, + "learning_rate": 0.00016099829189599544, + "loss": 7.8708, + "step": 20550 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001609793129626115, + "loss": 7.8355, + "step": 20560 + }, + { + "epoch": 1.95, + "learning_rate": 0.00016096033402922757, + "loss": 7.8517, + "step": 20570 + }, + { + "epoch": 1.95, + "learning_rate": 0.00016094135509584363, + "loss": 7.874, + "step": 20580 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001609223761624597, + "loss": 7.7804, + "step": 20590 + }, + { + "epoch": 1.95, + "learning_rate": 0.00016090339722907572, + "loss": 7.8556, + "step": 20600 + }, + { + "epoch": 1.96, + "learning_rate": 0.00016088441829569179, + "loss": 7.9106, + "step": 20610 + }, + { + "epoch": 1.96, + "learning_rate": 0.00016086543936230785, + "loss": 7.9515, + "step": 20620 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001608464604289239, + "loss": 7.8692, + "step": 20630 + }, + { + "epoch": 1.96, + "learning_rate": 0.00016082748149553997, + "loss": 7.8373, + "step": 20640 + }, + { + "epoch": 1.96, + "learning_rate": 0.000160808502562156, + "loss": 7.867, + "step": 20650 + }, + { + "epoch": 1.96, + "learning_rate": 0.00016078952362877206, + "loss": 7.8678, + "step": 20660 + }, + { + "epoch": 1.96, + "learning_rate": 0.00016077054469538813, + "loss": 7.8488, + "step": 20670 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001607515657620042, + "loss": 7.8678, + "step": 20680 + }, + { + "epoch": 1.96, + "learning_rate": 0.00016073258682862025, + "loss": 7.8862, + "step": 20690 + }, + { + "epoch": 1.96, + "learning_rate": 0.00016071360789523628, + "loss": 7.862, + "step": 20700 + }, + { + "epoch": 1.97, + "learning_rate": 0.00016069462896185234, + "loss": 7.7964, + "step": 20710 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001606756500284684, + "loss": 7.8565, + "step": 20720 + }, + { + "epoch": 1.97, + "learning_rate": 0.00016065667109508447, + "loss": 7.9738, + "step": 20730 + }, + { + "epoch": 1.97, + "learning_rate": 0.00016063769216170053, + "loss": 7.9275, + "step": 20740 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001606187132283166, + "loss": 7.8584, + "step": 20750 + }, + { + "epoch": 1.97, + "learning_rate": 0.00016059973429493262, + "loss": 7.8415, + "step": 20760 + }, + { + "epoch": 1.97, + "learning_rate": 0.00016058075536154868, + "loss": 7.9181, + "step": 20770 + }, + { + "epoch": 1.97, + "learning_rate": 0.00016056177642816475, + "loss": 7.7571, + "step": 20780 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001605427974947808, + "loss": 7.9947, + "step": 20790 + }, + { + "epoch": 1.97, + "learning_rate": 0.00016052381856139687, + "loss": 7.9309, + "step": 20800 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001605048396280129, + "loss": 7.788, + "step": 20810 + }, + { + "epoch": 1.98, + "learning_rate": 0.00016048586069462896, + "loss": 7.9506, + "step": 20820 + }, + { + "epoch": 1.98, + "learning_rate": 0.00016046688176124502, + "loss": 7.9019, + "step": 20830 + }, + { + "epoch": 1.98, + "learning_rate": 0.00016044790282786109, + "loss": 7.9267, + "step": 20840 + }, + { + "epoch": 1.98, + "learning_rate": 0.00016042892389447715, + "loss": 7.8396, + "step": 20850 + }, + { + "epoch": 1.98, + "learning_rate": 0.00016040994496109318, + "loss": 7.8808, + "step": 20860 + }, + { + "epoch": 1.98, + "learning_rate": 0.00016039096602770924, + "loss": 7.8719, + "step": 20870 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001603719870943253, + "loss": 7.8729, + "step": 20880 + }, + { + "epoch": 1.98, + "learning_rate": 0.00016035300816094137, + "loss": 7.8936, + "step": 20890 + }, + { + "epoch": 1.98, + "learning_rate": 0.00016033402922755743, + "loss": 7.8068, + "step": 20900 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001603150502941735, + "loss": 7.8761, + "step": 20910 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016029607136078952, + "loss": 7.8925, + "step": 20920 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016027709242740558, + "loss": 7.7976, + "step": 20930 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016025811349402164, + "loss": 7.9489, + "step": 20940 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001602391345606377, + "loss": 7.9128, + "step": 20950 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016022015562725377, + "loss": 7.8393, + "step": 20960 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001602011766938698, + "loss": 7.7641, + "step": 20970 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016018219776048586, + "loss": 7.946, + "step": 20980 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016016321882710192, + "loss": 7.8674, + "step": 20990 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016014423989371799, + "loss": 7.889, + "step": 21000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016012526096033405, + "loss": 7.9576, + "step": 21010 + }, + { + "epoch": 1.99, + "learning_rate": 0.00016010628202695008, + "loss": 7.7703, + "step": 21020 + }, + { + "epoch": 2.0, + "learning_rate": 0.00016008730309356614, + "loss": 7.921, + "step": 21030 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001600683241601822, + "loss": 7.906, + "step": 21040 + }, + { + "epoch": 2.0, + "learning_rate": 0.00016004934522679826, + "loss": 7.8525, + "step": 21050 + }, + { + "epoch": 2.0, + "learning_rate": 0.00016003036629341433, + "loss": 7.8645, + "step": 21060 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001600113873600304, + "loss": 7.8351, + "step": 21070 + }, + { + "epoch": 2.0, + "learning_rate": 0.00015999240842664642, + "loss": 7.9053, + "step": 21080 + }, + { + "epoch": 2.0, + "learning_rate": 0.00015997342949326248, + "loss": 7.9325, + "step": 21090 + }, + { + "epoch": 2.0, + "learning_rate": 0.00015995445055987854, + "loss": 7.9268, + "step": 21100 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001599354716264946, + "loss": 7.8665, + "step": 21110 + }, + { + "epoch": 2.0, + "learning_rate": 0.00015991649269311067, + "loss": 7.876, + "step": 21120 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001598975137597267, + "loss": 7.7425, + "step": 21130 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015987853482634276, + "loss": 7.8238, + "step": 21140 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015985955589295882, + "loss": 7.8337, + "step": 21150 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015984057695957488, + "loss": 7.8954, + "step": 21160 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015982159802619095, + "loss": 7.9015, + "step": 21170 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015980261909280698, + "loss": 7.8966, + "step": 21180 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015978364015942304, + "loss": 7.9607, + "step": 21190 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001597646612260391, + "loss": 8.0079, + "step": 21200 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015974568229265516, + "loss": 7.8708, + "step": 21210 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015972670335927123, + "loss": 7.9371, + "step": 21220 + }, + { + "epoch": 2.01, + "learning_rate": 0.00015970772442588726, + "loss": 7.8218, + "step": 21230 + }, + { + "epoch": 2.02, + "learning_rate": 0.00015968874549250332, + "loss": 7.819, + "step": 21240 + }, + { + "epoch": 2.02, + "learning_rate": 0.00015966976655911938, + "loss": 7.8379, + "step": 21250 + }, + { + "epoch": 2.02, + "learning_rate": 0.00015965078762573544, + "loss": 7.9082, + "step": 21260 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001596318086923515, + "loss": 7.7326, + "step": 21270 + }, + { + "epoch": 2.02, + "learning_rate": 0.00015961282975896757, + "loss": 7.8548, + "step": 21280 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001595938508255836, + "loss": 7.9232, + "step": 21290 + }, + { + "epoch": 2.02, + "learning_rate": 0.00015957487189219966, + "loss": 8.0124, + "step": 21300 + }, + { + "epoch": 2.02, + "learning_rate": 0.00015955589295881572, + "loss": 7.7575, + "step": 21310 + }, + { + "epoch": 2.02, + "learning_rate": 0.00015953691402543178, + "loss": 7.7923, + "step": 21320 + }, + { + "epoch": 2.02, + "learning_rate": 0.00015951793509204784, + "loss": 7.8825, + "step": 21330 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015949895615866388, + "loss": 7.9135, + "step": 21340 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015947997722527994, + "loss": 7.8871, + "step": 21350 + }, + { + "epoch": 2.03, + "learning_rate": 0.000159460998291896, + "loss": 7.8631, + "step": 21360 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015944201935851206, + "loss": 7.9616, + "step": 21370 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015942304042512812, + "loss": 7.8579, + "step": 21380 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015940406149174416, + "loss": 7.8539, + "step": 21390 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015938508255836022, + "loss": 7.8704, + "step": 21400 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015936610362497628, + "loss": 7.8644, + "step": 21410 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015934712469159234, + "loss": 7.8173, + "step": 21420 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001593281457582084, + "loss": 7.8461, + "step": 21430 + }, + { + "epoch": 2.03, + "learning_rate": 0.00015930916682482446, + "loss": 7.8491, + "step": 21440 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001592901878914405, + "loss": 7.8831, + "step": 21450 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015927120895805656, + "loss": 7.878, + "step": 21460 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015925223002467262, + "loss": 7.7975, + "step": 21470 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015923325109128868, + "loss": 7.8669, + "step": 21480 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015921427215790474, + "loss": 7.8426, + "step": 21490 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015919529322452078, + "loss": 7.7905, + "step": 21500 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015917631429113684, + "loss": 7.8559, + "step": 21510 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001591573353577529, + "loss": 7.9196, + "step": 21520 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015913835642436896, + "loss": 7.8966, + "step": 21530 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015911937749098502, + "loss": 7.9587, + "step": 21540 + }, + { + "epoch": 2.04, + "learning_rate": 0.00015910039855760106, + "loss": 7.904, + "step": 21550 + }, + { + "epoch": 2.05, + "learning_rate": 0.00015908141962421712, + "loss": 7.8236, + "step": 21560 + }, + { + "epoch": 2.05, + "learning_rate": 0.00015906244069083318, + "loss": 7.9078, + "step": 21570 + }, + { + "epoch": 2.05, + "learning_rate": 0.00015904346175744924, + "loss": 7.8783, + "step": 21580 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001590244828240653, + "loss": 7.767, + "step": 21590 + }, + { + "epoch": 2.05, + "learning_rate": 0.00015900550389068136, + "loss": 7.8145, + "step": 21600 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001589865249572974, + "loss": 7.8772, + "step": 21610 + }, + { + "epoch": 2.05, + "learning_rate": 0.00015896754602391346, + "loss": 7.8538, + "step": 21620 + }, + { + "epoch": 2.05, + "learning_rate": 0.00015894856709052952, + "loss": 7.8231, + "step": 21630 + }, + { + "epoch": 2.05, + "learning_rate": 0.00015892958815714558, + "loss": 7.9159, + "step": 21640 + }, + { + "epoch": 2.05, + "learning_rate": 0.00015891060922376164, + "loss": 7.9374, + "step": 21650 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015889163029037768, + "loss": 7.8821, + "step": 21660 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015887265135699374, + "loss": 7.9214, + "step": 21670 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001588536724236098, + "loss": 8.002, + "step": 21680 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015883469349022586, + "loss": 7.8611, + "step": 21690 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015881571455684192, + "loss": 8.0108, + "step": 21700 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015879673562345796, + "loss": 7.8643, + "step": 21710 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015877775669007402, + "loss": 7.9021, + "step": 21720 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015875877775669008, + "loss": 7.8611, + "step": 21730 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015873979882330614, + "loss": 7.7276, + "step": 21740 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001587208198899222, + "loss": 7.9215, + "step": 21750 + }, + { + "epoch": 2.06, + "learning_rate": 0.00015870184095653824, + "loss": 7.8625, + "step": 21760 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001586828620231543, + "loss": 7.9357, + "step": 21770 + }, + { + "epoch": 2.07, + "learning_rate": 0.00015866388308977036, + "loss": 7.89, + "step": 21780 + }, + { + "epoch": 2.07, + "learning_rate": 0.00015864490415638642, + "loss": 7.6989, + "step": 21790 + }, + { + "epoch": 2.07, + "learning_rate": 0.00015862592522300248, + "loss": 7.7832, + "step": 21800 + }, + { + "epoch": 2.07, + "learning_rate": 0.00015860694628961854, + "loss": 7.9075, + "step": 21810 + }, + { + "epoch": 2.07, + "learning_rate": 0.00015858796735623458, + "loss": 7.9715, + "step": 21820 + }, + { + "epoch": 2.07, + "learning_rate": 0.00015856898842285064, + "loss": 7.8661, + "step": 21830 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001585500094894667, + "loss": 7.8871, + "step": 21840 + }, + { + "epoch": 2.07, + "learning_rate": 0.00015853103055608276, + "loss": 7.9332, + "step": 21850 + }, + { + "epoch": 2.07, + "learning_rate": 0.00015851205162269882, + "loss": 7.839, + "step": 21860 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015849307268931486, + "loss": 7.9071, + "step": 21870 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015847409375593092, + "loss": 7.8392, + "step": 21880 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015845511482254698, + "loss": 7.8378, + "step": 21890 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015843613588916304, + "loss": 7.895, + "step": 21900 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001584171569557791, + "loss": 7.895, + "step": 21910 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015839817802239513, + "loss": 7.9746, + "step": 21920 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001583791990890112, + "loss": 7.7522, + "step": 21930 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015836022015562726, + "loss": 7.9543, + "step": 21940 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015834124122224332, + "loss": 7.9075, + "step": 21950 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015832226228885938, + "loss": 7.8838, + "step": 21960 + }, + { + "epoch": 2.08, + "learning_rate": 0.00015830328335547544, + "loss": 7.8334, + "step": 21970 + }, + { + "epoch": 2.09, + "learning_rate": 0.00015828430442209148, + "loss": 7.888, + "step": 21980 + }, + { + "epoch": 2.09, + "learning_rate": 0.00015826532548870754, + "loss": 7.853, + "step": 21990 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001582463465553236, + "loss": 8.0296, + "step": 22000 + }, + { + "epoch": 2.09, + "learning_rate": 0.00015822736762193966, + "loss": 7.8432, + "step": 22010 + }, + { + "epoch": 2.09, + "learning_rate": 0.00015820838868855572, + "loss": 7.8741, + "step": 22020 + }, + { + "epoch": 2.09, + "learning_rate": 0.00015818940975517175, + "loss": 7.8237, + "step": 22030 + }, + { + "epoch": 2.09, + "learning_rate": 0.00015817043082178782, + "loss": 7.9621, + "step": 22040 + }, + { + "epoch": 2.09, + "learning_rate": 0.00015815145188840388, + "loss": 7.8859, + "step": 22050 + }, + { + "epoch": 2.09, + "learning_rate": 0.00015813247295501994, + "loss": 7.9003, + "step": 22060 + }, + { + "epoch": 2.09, + "learning_rate": 0.000158113494021636, + "loss": 7.9417, + "step": 22070 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015809451508825203, + "loss": 7.9236, + "step": 22080 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001580755361548681, + "loss": 7.7871, + "step": 22090 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015805655722148416, + "loss": 7.8612, + "step": 22100 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015803757828810022, + "loss": 8.0152, + "step": 22110 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015801859935471628, + "loss": 7.8189, + "step": 22120 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015799962042133234, + "loss": 7.9239, + "step": 22130 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015798064148794837, + "loss": 7.8071, + "step": 22140 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015796166255456444, + "loss": 7.8181, + "step": 22150 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001579426836211805, + "loss": 7.847, + "step": 22160 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015792370468779656, + "loss": 7.8419, + "step": 22170 + }, + { + "epoch": 2.1, + "learning_rate": 0.00015790472575441262, + "loss": 7.9681, + "step": 22180 + }, + { + "epoch": 2.11, + "learning_rate": 0.00015788574682102865, + "loss": 7.8276, + "step": 22190 + }, + { + "epoch": 2.11, + "learning_rate": 0.00015786676788764472, + "loss": 7.8943, + "step": 22200 + }, + { + "epoch": 2.11, + "learning_rate": 0.00015784778895426078, + "loss": 7.8348, + "step": 22210 + }, + { + "epoch": 2.11, + "learning_rate": 0.00015782881002087684, + "loss": 7.8609, + "step": 22220 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001578098310874929, + "loss": 7.8044, + "step": 22230 + }, + { + "epoch": 2.11, + "learning_rate": 0.00015779085215410893, + "loss": 7.9309, + "step": 22240 + }, + { + "epoch": 2.11, + "learning_rate": 0.000157771873220725, + "loss": 7.8456, + "step": 22250 + }, + { + "epoch": 2.11, + "learning_rate": 0.00015775289428734106, + "loss": 7.8255, + "step": 22260 + }, + { + "epoch": 2.11, + "learning_rate": 0.00015773391535395712, + "loss": 7.9197, + "step": 22270 + }, + { + "epoch": 2.11, + "learning_rate": 0.00015771493642057318, + "loss": 7.8745, + "step": 22280 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001576959574871892, + "loss": 7.8433, + "step": 22290 + }, + { + "epoch": 2.12, + "learning_rate": 0.00015767697855380527, + "loss": 7.8798, + "step": 22300 + }, + { + "epoch": 2.12, + "learning_rate": 0.00015765799962042134, + "loss": 7.8917, + "step": 22310 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001576390206870374, + "loss": 7.884, + "step": 22320 + }, + { + "epoch": 2.12, + "learning_rate": 0.00015762004175365346, + "loss": 7.8985, + "step": 22330 + }, + { + "epoch": 2.12, + "learning_rate": 0.00015760106282026952, + "loss": 7.9394, + "step": 22340 + }, + { + "epoch": 2.12, + "learning_rate": 0.00015758208388688555, + "loss": 7.8019, + "step": 22350 + }, + { + "epoch": 2.12, + "learning_rate": 0.00015756310495350161, + "loss": 7.8047, + "step": 22360 + }, + { + "epoch": 2.12, + "learning_rate": 0.00015754412602011768, + "loss": 7.8143, + "step": 22370 + }, + { + "epoch": 2.12, + "learning_rate": 0.00015752514708673374, + "loss": 7.81, + "step": 22380 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001575061681533498, + "loss": 7.8116, + "step": 22390 + }, + { + "epoch": 2.13, + "learning_rate": 0.00015748718921996583, + "loss": 7.8917, + "step": 22400 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001574682102865819, + "loss": 7.849, + "step": 22410 + }, + { + "epoch": 2.13, + "learning_rate": 0.00015744923135319795, + "loss": 7.8409, + "step": 22420 + }, + { + "epoch": 2.13, + "learning_rate": 0.00015743025241981402, + "loss": 7.8352, + "step": 22430 + }, + { + "epoch": 2.13, + "learning_rate": 0.00015741127348643008, + "loss": 7.8681, + "step": 22440 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001573922945530461, + "loss": 7.9063, + "step": 22450 + }, + { + "epoch": 2.13, + "learning_rate": 0.00015737331561966217, + "loss": 7.8869, + "step": 22460 + }, + { + "epoch": 2.13, + "learning_rate": 0.00015735433668627823, + "loss": 7.9133, + "step": 22470 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001573353577528943, + "loss": 7.9333, + "step": 22480 + }, + { + "epoch": 2.13, + "learning_rate": 0.00015731637881951036, + "loss": 7.9291, + "step": 22490 + }, + { + "epoch": 2.14, + "learning_rate": 0.00015729739988612642, + "loss": 7.9055, + "step": 22500 + }, + { + "epoch": 2.14, + "learning_rate": 0.00015727842095274245, + "loss": 7.7936, + "step": 22510 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001572594420193585, + "loss": 7.8572, + "step": 22520 + }, + { + "epoch": 2.14, + "learning_rate": 0.00015724046308597457, + "loss": 7.7666, + "step": 22530 + }, + { + "epoch": 2.14, + "learning_rate": 0.00015722148415259064, + "loss": 7.8616, + "step": 22540 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001572025052192067, + "loss": 7.8381, + "step": 22550 + }, + { + "epoch": 2.14, + "learning_rate": 0.00015718352628582273, + "loss": 7.902, + "step": 22560 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001571645473524388, + "loss": 7.8894, + "step": 22570 + }, + { + "epoch": 2.14, + "learning_rate": 0.00015714556841905485, + "loss": 7.8568, + "step": 22580 + }, + { + "epoch": 2.14, + "learning_rate": 0.00015712658948567092, + "loss": 7.8792, + "step": 22590 + }, + { + "epoch": 2.14, + "learning_rate": 0.00015710761055228698, + "loss": 7.8798, + "step": 22600 + }, + { + "epoch": 2.15, + "learning_rate": 0.000157088631618903, + "loss": 7.8514, + "step": 22610 + }, + { + "epoch": 2.15, + "learning_rate": 0.00015706965268551907, + "loss": 7.8087, + "step": 22620 + }, + { + "epoch": 2.15, + "learning_rate": 0.00015705067375213513, + "loss": 7.8658, + "step": 22630 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001570316948187512, + "loss": 7.9174, + "step": 22640 + }, + { + "epoch": 2.15, + "learning_rate": 0.00015701271588536726, + "loss": 7.8491, + "step": 22650 + }, + { + "epoch": 2.15, + "learning_rate": 0.00015699373695198332, + "loss": 7.7947, + "step": 22660 + }, + { + "epoch": 2.15, + "learning_rate": 0.00015697475801859935, + "loss": 7.7663, + "step": 22670 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001569557790852154, + "loss": 7.8722, + "step": 22680 + }, + { + "epoch": 2.15, + "learning_rate": 0.00015693680015183147, + "loss": 7.8574, + "step": 22690 + }, + { + "epoch": 2.15, + "learning_rate": 0.00015691782121844754, + "loss": 7.706, + "step": 22700 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001568988422850636, + "loss": 7.8818, + "step": 22710 + }, + { + "epoch": 2.16, + "learning_rate": 0.00015687986335167963, + "loss": 7.8492, + "step": 22720 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001568608844182957, + "loss": 7.8555, + "step": 22730 + }, + { + "epoch": 2.16, + "learning_rate": 0.00015684190548491175, + "loss": 7.8188, + "step": 22740 + }, + { + "epoch": 2.16, + "learning_rate": 0.00015682292655152781, + "loss": 7.8021, + "step": 22750 + }, + { + "epoch": 2.16, + "learning_rate": 0.00015680394761814388, + "loss": 7.9007, + "step": 22760 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001567849686847599, + "loss": 7.7355, + "step": 22770 + }, + { + "epoch": 2.16, + "learning_rate": 0.00015676598975137597, + "loss": 7.7312, + "step": 22780 + }, + { + "epoch": 2.16, + "learning_rate": 0.00015674701081799203, + "loss": 7.9311, + "step": 22790 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001567280318846081, + "loss": 7.879, + "step": 22800 + }, + { + "epoch": 2.16, + "learning_rate": 0.00015670905295122415, + "loss": 7.8623, + "step": 22810 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001566900740178402, + "loss": 7.8358, + "step": 22820 + }, + { + "epoch": 2.17, + "learning_rate": 0.00015667109508445625, + "loss": 7.8962, + "step": 22830 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001566521161510723, + "loss": 7.8288, + "step": 22840 + }, + { + "epoch": 2.17, + "learning_rate": 0.00015663313721768837, + "loss": 7.9741, + "step": 22850 + }, + { + "epoch": 2.17, + "learning_rate": 0.00015661415828430443, + "loss": 7.8187, + "step": 22860 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001565951793509205, + "loss": 7.8735, + "step": 22870 + }, + { + "epoch": 2.17, + "learning_rate": 0.00015657620041753653, + "loss": 7.9464, + "step": 22880 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001565572214841526, + "loss": 7.8947, + "step": 22890 + }, + { + "epoch": 2.17, + "learning_rate": 0.00015653824255076865, + "loss": 7.8647, + "step": 22900 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001565192636173847, + "loss": 7.8879, + "step": 22910 + }, + { + "epoch": 2.17, + "learning_rate": 0.00015650028468400077, + "loss": 7.8699, + "step": 22920 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001564813057506168, + "loss": 7.8284, + "step": 22930 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015646232681723287, + "loss": 7.826, + "step": 22940 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015644334788384893, + "loss": 7.9656, + "step": 22950 + }, + { + "epoch": 2.18, + "learning_rate": 0.000156424368950465, + "loss": 7.748, + "step": 22960 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015640539001708105, + "loss": 7.7614, + "step": 22970 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001563864110836971, + "loss": 7.8216, + "step": 22980 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015636743215031315, + "loss": 8.0235, + "step": 22990 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001563484532169292, + "loss": 7.8968, + "step": 23000 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015632947428354527, + "loss": 7.9184, + "step": 23010 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015631049535016133, + "loss": 7.8753, + "step": 23020 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001562915164167774, + "loss": 7.9102, + "step": 23030 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015627253748339343, + "loss": 7.7806, + "step": 23040 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001562535585500095, + "loss": 7.8738, + "step": 23050 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015623457961662555, + "loss": 7.8276, + "step": 23060 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001562156006832416, + "loss": 7.8367, + "step": 23070 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015619662174985767, + "loss": 7.8343, + "step": 23080 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001561776428164737, + "loss": 7.863, + "step": 23090 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015615866388308977, + "loss": 7.8564, + "step": 23100 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015613968494970583, + "loss": 7.8091, + "step": 23110 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001561207060163219, + "loss": 7.881, + "step": 23120 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015610172708293795, + "loss": 7.9338, + "step": 23130 + }, + { + "epoch": 2.2, + "learning_rate": 0.000156082748149554, + "loss": 7.8681, + "step": 23140 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015606376921617005, + "loss": 7.7641, + "step": 23150 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001560447902827861, + "loss": 7.9572, + "step": 23160 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015602581134940217, + "loss": 7.793, + "step": 23170 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015600683241601823, + "loss": 7.8405, + "step": 23180 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001559878534826343, + "loss": 7.8318, + "step": 23190 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015596887454925033, + "loss": 7.8334, + "step": 23200 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001559498956158664, + "loss": 7.7095, + "step": 23210 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015593091668248245, + "loss": 7.885, + "step": 23220 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001559119377490985, + "loss": 7.904, + "step": 23230 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015589295881571457, + "loss": 7.8512, + "step": 23240 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001558739798823306, + "loss": 7.7914, + "step": 23250 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015585500094894667, + "loss": 7.9344, + "step": 23260 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015583602201556273, + "loss": 7.8801, + "step": 23270 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001558170430821788, + "loss": 7.8102, + "step": 23280 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015579806414879485, + "loss": 7.8134, + "step": 23290 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001557790852154109, + "loss": 7.857, + "step": 23300 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015576010628202695, + "loss": 7.902, + "step": 23310 + }, + { + "epoch": 2.21, + "learning_rate": 0.000155741127348643, + "loss": 7.8886, + "step": 23320 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015572214841525907, + "loss": 7.8732, + "step": 23330 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015570316948187513, + "loss": 7.8585, + "step": 23340 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001556841905484912, + "loss": 7.8644, + "step": 23350 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015566521161510723, + "loss": 7.8146, + "step": 23360 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001556462326817233, + "loss": 7.8164, + "step": 23370 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015562725374833935, + "loss": 7.7867, + "step": 23380 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001556082748149554, + "loss": 7.9057, + "step": 23390 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015558929588157147, + "loss": 7.8249, + "step": 23400 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001555703169481875, + "loss": 7.9146, + "step": 23410 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015555133801480357, + "loss": 7.8243, + "step": 23420 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015553235908141963, + "loss": 7.9238, + "step": 23430 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001555133801480357, + "loss": 7.8485, + "step": 23440 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015549440121465175, + "loss": 7.821, + "step": 23450 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015547542228126779, + "loss": 7.9004, + "step": 23460 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015545644334788385, + "loss": 7.8125, + "step": 23470 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001554374644144999, + "loss": 7.9106, + "step": 23480 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015541848548111597, + "loss": 7.947, + "step": 23490 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015539950654773203, + "loss": 7.8984, + "step": 23500 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015538052761434806, + "loss": 7.9675, + "step": 23510 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015536154868096413, + "loss": 7.8223, + "step": 23520 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001553425697475802, + "loss": 7.8708, + "step": 23530 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015532359081419625, + "loss": 7.9586, + "step": 23540 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001553046118808123, + "loss": 7.818, + "step": 23550 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015528563294742837, + "loss": 7.8387, + "step": 23560 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001552666540140444, + "loss": 7.8902, + "step": 23570 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015524767508066047, + "loss": 7.8054, + "step": 23580 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015522869614727653, + "loss": 7.8994, + "step": 23590 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001552097172138926, + "loss": 7.9191, + "step": 23600 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015519073828050865, + "loss": 7.8364, + "step": 23610 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015517175934712468, + "loss": 7.9712, + "step": 23620 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015515278041374075, + "loss": 7.8791, + "step": 23630 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001551338014803568, + "loss": 7.8106, + "step": 23640 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015511482254697287, + "loss": 7.8697, + "step": 23650 + }, + { + "epoch": 2.25, + "learning_rate": 0.00015509584361358893, + "loss": 7.9453, + "step": 23660 + }, + { + "epoch": 2.25, + "learning_rate": 0.00015507686468020496, + "loss": 7.8686, + "step": 23670 + }, + { + "epoch": 2.25, + "learning_rate": 0.00015505788574682103, + "loss": 7.9112, + "step": 23680 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001550389068134371, + "loss": 7.8276, + "step": 23690 + }, + { + "epoch": 2.25, + "learning_rate": 0.00015501992788005315, + "loss": 7.8023, + "step": 23700 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001550009489466692, + "loss": 7.9167, + "step": 23710 + }, + { + "epoch": 2.25, + "learning_rate": 0.00015498197001328527, + "loss": 7.9369, + "step": 23720 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001549629910799013, + "loss": 7.7976, + "step": 23730 + }, + { + "epoch": 2.25, + "learning_rate": 0.00015494401214651737, + "loss": 7.8442, + "step": 23740 + }, + { + "epoch": 2.25, + "learning_rate": 0.00015492503321313343, + "loss": 7.8399, + "step": 23750 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001549060542797495, + "loss": 7.8819, + "step": 23760 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015488707534636555, + "loss": 7.9336, + "step": 23770 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015486809641298158, + "loss": 7.8861, + "step": 23780 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015484911747959765, + "loss": 7.875, + "step": 23790 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001548301385462137, + "loss": 7.8272, + "step": 23800 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015481115961282977, + "loss": 7.8513, + "step": 23810 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015479218067944583, + "loss": 7.8837, + "step": 23820 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015477320174606186, + "loss": 7.851, + "step": 23830 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015475422281267792, + "loss": 7.8773, + "step": 23840 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015473524387929399, + "loss": 7.9477, + "step": 23850 + }, + { + "epoch": 2.26, + "learning_rate": 0.00015471626494591005, + "loss": 7.9506, + "step": 23860 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001546972860125261, + "loss": 7.8128, + "step": 23870 + }, + { + "epoch": 2.27, + "learning_rate": 0.00015467830707914217, + "loss": 7.9312, + "step": 23880 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001546593281457582, + "loss": 7.9325, + "step": 23890 + }, + { + "epoch": 2.27, + "learning_rate": 0.00015464034921237426, + "loss": 7.8281, + "step": 23900 + }, + { + "epoch": 2.27, + "learning_rate": 0.00015462137027899033, + "loss": 8.0098, + "step": 23910 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001546023913456064, + "loss": 7.7144, + "step": 23920 + }, + { + "epoch": 2.27, + "learning_rate": 0.00015458341241222245, + "loss": 7.9032, + "step": 23930 + }, + { + "epoch": 2.27, + "learning_rate": 0.00015456443347883848, + "loss": 7.7935, + "step": 23940 + }, + { + "epoch": 2.27, + "learning_rate": 0.00015454545454545454, + "loss": 7.8352, + "step": 23950 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001545264756120706, + "loss": 7.8817, + "step": 23960 + }, + { + "epoch": 2.27, + "learning_rate": 0.00015450749667868667, + "loss": 7.8525, + "step": 23970 + }, + { + "epoch": 2.28, + "learning_rate": 0.00015448851774530273, + "loss": 7.8043, + "step": 23980 + }, + { + "epoch": 2.28, + "learning_rate": 0.00015446953881191876, + "loss": 7.908, + "step": 23990 + }, + { + "epoch": 2.28, + "learning_rate": 0.00015445055987853482, + "loss": 7.8206, + "step": 24000 + } + ], + "max_steps": 105380, + "num_train_epochs": 10, + "total_flos": 2.5273221210952704e+16, + "trial_name": null, + "trial_params": null +}