diff --git "a/checkpoint-20331/trainer_state.json" "b/checkpoint-20331/trainer_state.json" deleted file mode 100644--- "a/checkpoint-20331/trainer_state.json" +++ /dev/null @@ -1,5746 +0,0 @@ -{ - "best_metric": 0.1046341210603714, - "best_model_checkpoint": "autotrain-ai-image-detect-20241219-0211/checkpoint-20331", - "epoch": 1.0, - "eval_steps": 500, - "global_step": 20331, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.001229649304018494, - "grad_norm": 0.008213584311306477, - "learning_rate": 3.9999850768074745e-05, - "loss": 0.0139, - "step": 25 - }, - { - "epoch": 0.002459298608036988, - "grad_norm": 0.010049919597804546, - "learning_rate": 3.999940307452596e-05, - "loss": 0.0675, - "step": 50 - }, - { - "epoch": 0.0036889479120554816, - "grad_norm": 34.44966506958008, - "learning_rate": 3.999865692603469e-05, - "loss": 0.1253, - "step": 75 - }, - { - "epoch": 0.004918597216073976, - "grad_norm": 0.3697604537010193, - "learning_rate": 3.9997612333735844e-05, - "loss": 0.1143, - "step": 100 - }, - { - "epoch": 0.0061482465200924695, - "grad_norm": 35.87644958496094, - "learning_rate": 3.999626931321806e-05, - "loss": 0.12, - "step": 125 - }, - { - "epoch": 0.007377895824110963, - "grad_norm": 0.11791765689849854, - "learning_rate": 3.999462788452352e-05, - "loss": 0.0924, - "step": 150 - }, - { - "epoch": 0.008607545128129458, - "grad_norm": 0.005023457109928131, - "learning_rate": 3.999268807214755e-05, - "loss": 0.0187, - "step": 175 - }, - { - "epoch": 0.009837194432147952, - "grad_norm": 0.028873136267066002, - "learning_rate": 3.999044990503837e-05, - "loss": 0.0982, - "step": 200 - }, - { - "epoch": 0.011066843736166445, - "grad_norm": 13.8193998336792, - "learning_rate": 3.998791341659656e-05, - "loss": 0.1549, - "step": 225 - }, - { - "epoch": 0.012296493040184939, - "grad_norm": 0.1344272792339325, - "learning_rate": 3.998507864467463e-05, - "loss": 0.0515, - "step": 250 - }, - { - "epoch": 0.013526142344203433, - "grad_norm": 0.1973223239183426, - "learning_rate": 3.9981945631576436e-05, - "loss": 0.1369, - "step": 275 - }, - { - "epoch": 0.014755791648221927, - "grad_norm": 8.42328929901123, - "learning_rate": 3.997851442405652e-05, - "loss": 0.0755, - "step": 300 - }, - { - "epoch": 0.015985440952240422, - "grad_norm": 21.16735076904297, - "learning_rate": 3.9974785073319464e-05, - "loss": 0.0644, - "step": 325 - }, - { - "epoch": 0.017215090256258916, - "grad_norm": 0.11640334874391556, - "learning_rate": 3.997075763501909e-05, - "loss": 0.0529, - "step": 350 - }, - { - "epoch": 0.01844473956027741, - "grad_norm": 18.0023250579834, - "learning_rate": 3.9966432169257625e-05, - "loss": 0.1751, - "step": 375 - }, - { - "epoch": 0.019674388864295903, - "grad_norm": 9.739143371582031, - "learning_rate": 3.9961808740584836e-05, - "loss": 0.0941, - "step": 400 - }, - { - "epoch": 0.020904038168314397, - "grad_norm": 0.19388121366500854, - "learning_rate": 3.995688741799703e-05, - "loss": 0.1249, - "step": 425 - }, - { - "epoch": 0.02213368747233289, - "grad_norm": 0.08443363755941391, - "learning_rate": 3.995166827493606e-05, - "loss": 0.0424, - "step": 450 - }, - { - "epoch": 0.023363336776351384, - "grad_norm": 9.207952499389648, - "learning_rate": 3.99461513892882e-05, - "loss": 0.1346, - "step": 475 - }, - { - "epoch": 0.024592986080369878, - "grad_norm": 0.02244606427848339, - "learning_rate": 3.9940336843382994e-05, - "loss": 0.0624, - "step": 500 - }, - { - "epoch": 0.025822635384388372, - "grad_norm": 0.008786759339272976, - "learning_rate": 3.993422472399203e-05, - "loss": 0.0294, - "step": 525 - }, - { - "epoch": 0.027052284688406866, - "grad_norm": 0.011749695055186749, - "learning_rate": 3.9927815122327646e-05, - "loss": 0.0813, - "step": 550 - }, - { - "epoch": 0.02828193399242536, - "grad_norm": 1.530182957649231, - "learning_rate": 3.9921108134041556e-05, - "loss": 0.0509, - "step": 575 - }, - { - "epoch": 0.029511583296443853, - "grad_norm": 1.717954158782959, - "learning_rate": 3.9914103859223445e-05, - "loss": 0.0557, - "step": 600 - }, - { - "epoch": 0.030741232600462347, - "grad_norm": 0.26876401901245117, - "learning_rate": 3.990680240239945e-05, - "loss": 0.0578, - "step": 625 - }, - { - "epoch": 0.031970881904480844, - "grad_norm": 0.14603018760681152, - "learning_rate": 3.989920387253062e-05, - "loss": 0.1472, - "step": 650 - }, - { - "epoch": 0.033200531208499334, - "grad_norm": 0.2961640954017639, - "learning_rate": 3.989130838301128e-05, - "loss": 0.1957, - "step": 675 - }, - { - "epoch": 0.03443018051251783, - "grad_norm": 0.8009811043739319, - "learning_rate": 3.9883116051667324e-05, - "loss": 0.1703, - "step": 700 - }, - { - "epoch": 0.03565982981653632, - "grad_norm": 18.910682678222656, - "learning_rate": 3.9874627000754517e-05, - "loss": 0.1149, - "step": 725 - }, - { - "epoch": 0.03688947912055482, - "grad_norm": 0.13948693871498108, - "learning_rate": 3.986584135695658e-05, - "loss": 0.0919, - "step": 750 - }, - { - "epoch": 0.03811912842457331, - "grad_norm": 0.21044179797172546, - "learning_rate": 3.9856759251383375e-05, - "loss": 0.1396, - "step": 775 - }, - { - "epoch": 0.039348777728591806, - "grad_norm": 0.8218281269073486, - "learning_rate": 3.9847380819568906e-05, - "loss": 0.1708, - "step": 800 - }, - { - "epoch": 0.0405784270326103, - "grad_norm": 0.04468478634953499, - "learning_rate": 3.9837706201469324e-05, - "loss": 0.0309, - "step": 825 - }, - { - "epoch": 0.041808076336628794, - "grad_norm": 0.08932695537805557, - "learning_rate": 3.982773554146082e-05, - "loss": 0.2227, - "step": 850 - }, - { - "epoch": 0.043037725640647284, - "grad_norm": 0.32494252920150757, - "learning_rate": 3.981746898833746e-05, - "loss": 0.137, - "step": 875 - }, - { - "epoch": 0.04426737494466578, - "grad_norm": 11.993731498718262, - "learning_rate": 3.9806906695308994e-05, - "loss": 0.0739, - "step": 900 - }, - { - "epoch": 0.04549702424868428, - "grad_norm": 0.5979361534118652, - "learning_rate": 3.979604881999857e-05, - "loss": 0.0838, - "step": 925 - }, - { - "epoch": 0.04672667355270277, - "grad_norm": 0.0024851462803781033, - "learning_rate": 3.978489552444034e-05, - "loss": 0.0582, - "step": 950 - }, - { - "epoch": 0.047956322856721266, - "grad_norm": 6.014678001403809, - "learning_rate": 3.977344697507708e-05, - "loss": 0.0484, - "step": 975 - }, - { - "epoch": 0.049185972160739756, - "grad_norm": 0.04792521893978119, - "learning_rate": 3.976170334275771e-05, - "loss": 0.1455, - "step": 1000 - }, - { - "epoch": 0.05041562146475825, - "grad_norm": 0.17816047370433807, - "learning_rate": 3.9749664802734694e-05, - "loss": 0.1477, - "step": 1025 - }, - { - "epoch": 0.051645270768776744, - "grad_norm": 0.25085172057151794, - "learning_rate": 3.9737331534661496e-05, - "loss": 0.1109, - "step": 1050 - }, - { - "epoch": 0.05287492007279524, - "grad_norm": 9.553338050842285, - "learning_rate": 3.972470372258985e-05, - "loss": 0.0634, - "step": 1075 - }, - { - "epoch": 0.05410456937681373, - "grad_norm": 0.01188324112445116, - "learning_rate": 3.9711781554967025e-05, - "loss": 0.0622, - "step": 1100 - }, - { - "epoch": 0.05533421868083223, - "grad_norm": 0.017093725502490997, - "learning_rate": 3.969856522463301e-05, - "loss": 0.0862, - "step": 1125 - }, - { - "epoch": 0.05656386798485072, - "grad_norm": 0.2477702647447586, - "learning_rate": 3.9685054928817656e-05, - "loss": 0.1485, - "step": 1150 - }, - { - "epoch": 0.057793517288869216, - "grad_norm": 9.664518356323242, - "learning_rate": 3.967125086913771e-05, - "loss": 0.1126, - "step": 1175 - }, - { - "epoch": 0.059023166592887706, - "grad_norm": 0.09257597476243973, - "learning_rate": 3.96571532515938e-05, - "loss": 0.1075, - "step": 1200 - }, - { - "epoch": 0.0602528158969062, - "grad_norm": 0.09930257499217987, - "learning_rate": 3.96427622865674e-05, - "loss": 0.1235, - "step": 1225 - }, - { - "epoch": 0.061482465200924694, - "grad_norm": 0.05136797949671745, - "learning_rate": 3.9628078188817644e-05, - "loss": 0.0652, - "step": 1250 - }, - { - "epoch": 0.06271211450494318, - "grad_norm": 0.07908641546964645, - "learning_rate": 3.9613101177478154e-05, - "loss": 0.115, - "step": 1275 - }, - { - "epoch": 0.06394176380896169, - "grad_norm": 0.06305696815252304, - "learning_rate": 3.9597831476053754e-05, - "loss": 0.1029, - "step": 1300 - }, - { - "epoch": 0.06517141311298018, - "grad_norm": 0.26717567443847656, - "learning_rate": 3.958226931241713e-05, - "loss": 0.0201, - "step": 1325 - }, - { - "epoch": 0.06640106241699867, - "grad_norm": 0.03527957573533058, - "learning_rate": 3.956641491880546e-05, - "loss": 0.0465, - "step": 1350 - }, - { - "epoch": 0.06763071172101717, - "grad_norm": 0.009182834066450596, - "learning_rate": 3.955026853181689e-05, - "loss": 0.1616, - "step": 1375 - }, - { - "epoch": 0.06886036102503566, - "grad_norm": 0.08651256561279297, - "learning_rate": 3.953383039240709e-05, - "loss": 0.1127, - "step": 1400 - }, - { - "epoch": 0.07009001032905415, - "grad_norm": 0.03200230002403259, - "learning_rate": 3.951710074588555e-05, - "loss": 0.0376, - "step": 1425 - }, - { - "epoch": 0.07131965963307264, - "grad_norm": 0.3947339951992035, - "learning_rate": 3.950007984191203e-05, - "loss": 0.1103, - "step": 1450 - }, - { - "epoch": 0.07254930893709115, - "grad_norm": 27.486982345581055, - "learning_rate": 3.948276793449275e-05, - "loss": 0.0851, - "step": 1475 - }, - { - "epoch": 0.07377895824110964, - "grad_norm": 1.3376322984695435, - "learning_rate": 3.946516528197664e-05, - "loss": 0.1626, - "step": 1500 - }, - { - "epoch": 0.07500860754512813, - "grad_norm": 13.490923881530762, - "learning_rate": 3.944727214705145e-05, - "loss": 0.1128, - "step": 1525 - }, - { - "epoch": 0.07623825684914662, - "grad_norm": 39.99186325073242, - "learning_rate": 3.942908879673991e-05, - "loss": 0.0773, - "step": 1550 - }, - { - "epoch": 0.07746790615316512, - "grad_norm": 36.99325180053711, - "learning_rate": 3.941061550239564e-05, - "loss": 0.093, - "step": 1575 - }, - { - "epoch": 0.07869755545718361, - "grad_norm": 1.3203864097595215, - "learning_rate": 3.9391852539699165e-05, - "loss": 0.0589, - "step": 1600 - }, - { - "epoch": 0.0799272047612021, - "grad_norm": 0.03819641098380089, - "learning_rate": 3.9372800188653804e-05, - "loss": 0.1897, - "step": 1625 - }, - { - "epoch": 0.0811568540652206, - "grad_norm": 0.057856734842061996, - "learning_rate": 3.935345873358144e-05, - "loss": 0.0458, - "step": 1650 - }, - { - "epoch": 0.0823865033692391, - "grad_norm": 0.02265745773911476, - "learning_rate": 3.933382846311835e-05, - "loss": 0.08, - "step": 1675 - }, - { - "epoch": 0.08361615267325759, - "grad_norm": 0.2750760614871979, - "learning_rate": 3.9313909670210836e-05, - "loss": 0.0858, - "step": 1700 - }, - { - "epoch": 0.08484580197727608, - "grad_norm": 0.06586789339780807, - "learning_rate": 3.929370265211087e-05, - "loss": 0.1084, - "step": 1725 - }, - { - "epoch": 0.08607545128129457, - "grad_norm": 0.12183527648448944, - "learning_rate": 3.927320771037168e-05, - "loss": 0.1017, - "step": 1750 - }, - { - "epoch": 0.08730510058531307, - "grad_norm": 9.33776569366455, - "learning_rate": 3.925242515084322e-05, - "loss": 0.1247, - "step": 1775 - }, - { - "epoch": 0.08853474988933156, - "grad_norm": 0.6753214001655579, - "learning_rate": 3.9231355283667636e-05, - "loss": 0.0639, - "step": 1800 - }, - { - "epoch": 0.08976439919335005, - "grad_norm": 0.43698281049728394, - "learning_rate": 3.920999842327461e-05, - "loss": 0.0452, - "step": 1825 - }, - { - "epoch": 0.09099404849736856, - "grad_norm": 17.555950164794922, - "learning_rate": 3.918835488837668e-05, - "loss": 0.1691, - "step": 1850 - }, - { - "epoch": 0.09222369780138705, - "grad_norm": 0.14000771939754486, - "learning_rate": 3.916642500196448e-05, - "loss": 0.1194, - "step": 1875 - }, - { - "epoch": 0.09345334710540554, - "grad_norm": 0.030054202303290367, - "learning_rate": 3.9144209091301934e-05, - "loss": 0.0596, - "step": 1900 - }, - { - "epoch": 0.09468299640942403, - "grad_norm": 0.05038214102387428, - "learning_rate": 3.912170748792136e-05, - "loss": 0.0956, - "step": 1925 - }, - { - "epoch": 0.09591264571344253, - "grad_norm": 0.043467674404382706, - "learning_rate": 3.90989205276185e-05, - "loss": 0.1387, - "step": 1950 - }, - { - "epoch": 0.09714229501746102, - "grad_norm": 11.638011932373047, - "learning_rate": 3.907584855044756e-05, - "loss": 0.1038, - "step": 1975 - }, - { - "epoch": 0.09837194432147951, - "grad_norm": 0.1782546192407608, - "learning_rate": 3.90524919007161e-05, - "loss": 0.1034, - "step": 2000 - }, - { - "epoch": 0.099601593625498, - "grad_norm": 0.02144436351954937, - "learning_rate": 3.902885092697989e-05, - "loss": 0.0269, - "step": 2025 - }, - { - "epoch": 0.1008312429295165, - "grad_norm": 0.05752917751669884, - "learning_rate": 3.900492598203775e-05, - "loss": 0.0447, - "step": 2050 - }, - { - "epoch": 0.102060892233535, - "grad_norm": 0.4659772217273712, - "learning_rate": 3.8980717422926225e-05, - "loss": 0.0785, - "step": 2075 - }, - { - "epoch": 0.10329054153755349, - "grad_norm": 8.144423484802246, - "learning_rate": 3.895622561091431e-05, - "loss": 0.0387, - "step": 2100 - }, - { - "epoch": 0.10452019084157198, - "grad_norm": 0.9063475131988525, - "learning_rate": 3.8931450911498024e-05, - "loss": 0.0491, - "step": 2125 - }, - { - "epoch": 0.10574984014559048, - "grad_norm": 0.23620523512363434, - "learning_rate": 3.890639369439498e-05, - "loss": 0.1356, - "step": 2150 - }, - { - "epoch": 0.10697948944960897, - "grad_norm": 13.620726585388184, - "learning_rate": 3.888105433353885e-05, - "loss": 0.1564, - "step": 2175 - }, - { - "epoch": 0.10820913875362746, - "grad_norm": 0.4523046016693115, - "learning_rate": 3.8855433207073803e-05, - "loss": 0.1143, - "step": 2200 - }, - { - "epoch": 0.10943878805764595, - "grad_norm": 30.54190444946289, - "learning_rate": 3.8829530697348836e-05, - "loss": 0.0973, - "step": 2225 - }, - { - "epoch": 0.11066843736166446, - "grad_norm": 5.405024528503418, - "learning_rate": 3.8803347190912096e-05, - "loss": 0.1269, - "step": 2250 - }, - { - "epoch": 0.11189808666568295, - "grad_norm": 1.6278903484344482, - "learning_rate": 3.877688307850507e-05, - "loss": 0.0659, - "step": 2275 - }, - { - "epoch": 0.11312773596970144, - "grad_norm": 0.06446202099323273, - "learning_rate": 3.875013875505683e-05, - "loss": 0.075, - "step": 2300 - }, - { - "epoch": 0.11435738527371994, - "grad_norm": 0.19679602980613708, - "learning_rate": 3.872311461967805e-05, - "loss": 0.08, - "step": 2325 - }, - { - "epoch": 0.11558703457773843, - "grad_norm": 35.78239440917969, - "learning_rate": 3.86958110756551e-05, - "loss": 0.0945, - "step": 2350 - }, - { - "epoch": 0.11681668388175692, - "grad_norm": 0.4157645106315613, - "learning_rate": 3.866822853044403e-05, - "loss": 0.094, - "step": 2375 - }, - { - "epoch": 0.11804633318577541, - "grad_norm": 0.279402494430542, - "learning_rate": 3.864036739566447e-05, - "loss": 0.0245, - "step": 2400 - }, - { - "epoch": 0.11927598248979392, - "grad_norm": 0.15293359756469727, - "learning_rate": 3.861222808709351e-05, - "loss": 0.1071, - "step": 2425 - }, - { - "epoch": 0.1205056317938124, - "grad_norm": 2.3946597576141357, - "learning_rate": 3.858381102465945e-05, - "loss": 0.1092, - "step": 2450 - }, - { - "epoch": 0.1217352810978309, - "grad_norm": 8.131004333496094, - "learning_rate": 3.85551166324356e-05, - "loss": 0.0495, - "step": 2475 - }, - { - "epoch": 0.12296493040184939, - "grad_norm": 4.99855375289917, - "learning_rate": 3.852614533863389e-05, - "loss": 0.111, - "step": 2500 - }, - { - "epoch": 0.12419457970586789, - "grad_norm": 0.15764915943145752, - "learning_rate": 3.849689757559852e-05, - "loss": 0.1016, - "step": 2525 - }, - { - "epoch": 0.12542422900988637, - "grad_norm": 0.0594155453145504, - "learning_rate": 3.8467373779799493e-05, - "loss": 0.1638, - "step": 2550 - }, - { - "epoch": 0.12665387831390487, - "grad_norm": 0.3766103684902191, - "learning_rate": 3.843757439182608e-05, - "loss": 0.1388, - "step": 2575 - }, - { - "epoch": 0.12788352761792338, - "grad_norm": 10.557015419006348, - "learning_rate": 3.8407499856380307e-05, - "loss": 0.1152, - "step": 2600 - }, - { - "epoch": 0.12911317692194185, - "grad_norm": 0.014135906472802162, - "learning_rate": 3.837715062227024e-05, - "loss": 0.05, - "step": 2625 - }, - { - "epoch": 0.13034282622596036, - "grad_norm": 0.022704467177391052, - "learning_rate": 3.834652714240335e-05, - "loss": 0.1188, - "step": 2650 - }, - { - "epoch": 0.13157247552997886, - "grad_norm": 0.8610201478004456, - "learning_rate": 3.831562987377972e-05, - "loss": 0.0956, - "step": 2675 - }, - { - "epoch": 0.13280212483399734, - "grad_norm": 13.583250045776367, - "learning_rate": 3.8284459277485246e-05, - "loss": 0.0952, - "step": 2700 - }, - { - "epoch": 0.13403177413801584, - "grad_norm": 0.07633771002292633, - "learning_rate": 3.8253015818684735e-05, - "loss": 0.0697, - "step": 2725 - }, - { - "epoch": 0.13526142344203435, - "grad_norm": 17.34012794494629, - "learning_rate": 3.822129996661497e-05, - "loss": 0.1738, - "step": 2750 - }, - { - "epoch": 0.13649107274605282, - "grad_norm": 0.2831138074398041, - "learning_rate": 3.8189312194577715e-05, - "loss": 0.0628, - "step": 2775 - }, - { - "epoch": 0.13772072205007133, - "grad_norm": 1.181416630744934, - "learning_rate": 3.8157052979932664e-05, - "loss": 0.1544, - "step": 2800 - }, - { - "epoch": 0.1389503713540898, - "grad_norm": 0.009475952945649624, - "learning_rate": 3.812452280409027e-05, - "loss": 0.1003, - "step": 2825 - }, - { - "epoch": 0.1401800206581083, - "grad_norm": 270.4998779296875, - "learning_rate": 3.8091722152504637e-05, - "loss": 0.1214, - "step": 2850 - }, - { - "epoch": 0.1414096699621268, - "grad_norm": 0.044576507061719894, - "learning_rate": 3.805865151466617e-05, - "loss": 0.0461, - "step": 2875 - }, - { - "epoch": 0.1426393192661453, - "grad_norm": 0.1282535344362259, - "learning_rate": 3.802531138409439e-05, - "loss": 0.041, - "step": 2900 - }, - { - "epoch": 0.1438689685701638, - "grad_norm": 0.05482589453458786, - "learning_rate": 3.7991702258330476e-05, - "loss": 0.0967, - "step": 2925 - }, - { - "epoch": 0.1450986178741823, - "grad_norm": 0.028604110702872276, - "learning_rate": 3.7957824638929885e-05, - "loss": 0.0697, - "step": 2950 - }, - { - "epoch": 0.14632826717820077, - "grad_norm": 514.7240600585938, - "learning_rate": 3.7923679031454846e-05, - "loss": 0.0692, - "step": 2975 - }, - { - "epoch": 0.14755791648221928, - "grad_norm": 0.6727167963981628, - "learning_rate": 3.788926594546684e-05, - "loss": 0.1755, - "step": 3000 - }, - { - "epoch": 0.14878756578623775, - "grad_norm": 0.03478136658668518, - "learning_rate": 3.785458589451897e-05, - "loss": 0.1424, - "step": 3025 - }, - { - "epoch": 0.15001721509025626, - "grad_norm": 17.288965225219727, - "learning_rate": 3.781963939614832e-05, - "loss": 0.1245, - "step": 3050 - }, - { - "epoch": 0.15124686439427476, - "grad_norm": 0.6229422688484192, - "learning_rate": 3.7784426971868204e-05, - "loss": 0.1224, - "step": 3075 - }, - { - "epoch": 0.15247651369829324, - "grad_norm": 0.02501567453145981, - "learning_rate": 3.7748949147160415e-05, - "loss": 0.0796, - "step": 3100 - }, - { - "epoch": 0.15370616300231174, - "grad_norm": 1.2633839845657349, - "learning_rate": 3.771320645146737e-05, - "loss": 0.11, - "step": 3125 - }, - { - "epoch": 0.15493581230633025, - "grad_norm": 11.577386856079102, - "learning_rate": 3.767719941818418e-05, - "loss": 0.1012, - "step": 3150 - }, - { - "epoch": 0.15616546161034872, - "grad_norm": 0.11757977306842804, - "learning_rate": 3.764092858465074e-05, - "loss": 0.0988, - "step": 3175 - }, - { - "epoch": 0.15739511091436723, - "grad_norm": 0.11330989748239517, - "learning_rate": 3.76043944921437e-05, - "loss": 0.0408, - "step": 3200 - }, - { - "epoch": 0.15862476021838573, - "grad_norm": 0.11894803494215012, - "learning_rate": 3.7567597685868335e-05, - "loss": 0.0963, - "step": 3225 - }, - { - "epoch": 0.1598544095224042, - "grad_norm": 0.5013144612312317, - "learning_rate": 3.753053871495047e-05, - "loss": 0.1169, - "step": 3250 - }, - { - "epoch": 0.1610840588264227, - "grad_norm": 4.426260471343994, - "learning_rate": 3.749321813242827e-05, - "loss": 0.1232, - "step": 3275 - }, - { - "epoch": 0.1623137081304412, - "grad_norm": 14.899201393127441, - "learning_rate": 3.745563649524398e-05, - "loss": 0.1896, - "step": 3300 - }, - { - "epoch": 0.1635433574344597, - "grad_norm": 0.05510088801383972, - "learning_rate": 3.74177943642356e-05, - "loss": 0.106, - "step": 3325 - }, - { - "epoch": 0.1647730067384782, - "grad_norm": 0.09842230379581451, - "learning_rate": 3.737969230412853e-05, - "loss": 0.1185, - "step": 3350 - }, - { - "epoch": 0.16600265604249667, - "grad_norm": 0.3339563012123108, - "learning_rate": 3.734133088352716e-05, - "loss": 0.0946, - "step": 3375 - }, - { - "epoch": 0.16723230534651518, - "grad_norm": 0.0028879987075924873, - "learning_rate": 3.730271067490635e-05, - "loss": 0.0647, - "step": 3400 - }, - { - "epoch": 0.16846195465053368, - "grad_norm": 0.007909489795565605, - "learning_rate": 3.7263832254602906e-05, - "loss": 0.1502, - "step": 3425 - }, - { - "epoch": 0.16969160395455216, - "grad_norm": 0.04252486675977707, - "learning_rate": 3.722469620280699e-05, - "loss": 0.0851, - "step": 3450 - }, - { - "epoch": 0.17092125325857066, - "grad_norm": 28.634723663330078, - "learning_rate": 3.7185303103553416e-05, - "loss": 0.1269, - "step": 3475 - }, - { - "epoch": 0.17215090256258914, - "grad_norm": 0.5415559411048889, - "learning_rate": 3.714565354471301e-05, - "loss": 0.0974, - "step": 3500 - }, - { - "epoch": 0.17338055186660764, - "grad_norm": 0.4741762578487396, - "learning_rate": 3.710574811798376e-05, - "loss": 0.1111, - "step": 3525 - }, - { - "epoch": 0.17461020117062614, - "grad_norm": 0.8153554797172546, - "learning_rate": 3.7065587418882035e-05, - "loss": 0.1016, - "step": 3550 - }, - { - "epoch": 0.17583985047464462, - "grad_norm": 11.659468650817871, - "learning_rate": 3.702517204673369e-05, - "loss": 0.0512, - "step": 3575 - }, - { - "epoch": 0.17706949977866313, - "grad_norm": 80.21025848388672, - "learning_rate": 3.698450260466509e-05, - "loss": 0.0438, - "step": 3600 - }, - { - "epoch": 0.17829914908268163, - "grad_norm": 0.04379294440150261, - "learning_rate": 3.6943579699594156e-05, - "loss": 0.084, - "step": 3625 - }, - { - "epoch": 0.1795287983867001, - "grad_norm": 25.604286193847656, - "learning_rate": 3.690240394222128e-05, - "loss": 0.1579, - "step": 3650 - }, - { - "epoch": 0.1807584476907186, - "grad_norm": 11.050031661987305, - "learning_rate": 3.6860975947020216e-05, - "loss": 0.1355, - "step": 3675 - }, - { - "epoch": 0.18198809699473711, - "grad_norm": 9.395716667175293, - "learning_rate": 3.68192963322289e-05, - "loss": 0.1059, - "step": 3700 - }, - { - "epoch": 0.1832177462987556, - "grad_norm": 0.017339520156383514, - "learning_rate": 3.6777365719840266e-05, - "loss": 0.0682, - "step": 3725 - }, - { - "epoch": 0.1844473956027741, - "grad_norm": 14.786436080932617, - "learning_rate": 3.6735184735592905e-05, - "loss": 0.1147, - "step": 3750 - }, - { - "epoch": 0.18567704490679257, - "grad_norm": 0.12797459959983826, - "learning_rate": 3.6692754008961774e-05, - "loss": 0.0848, - "step": 3775 - }, - { - "epoch": 0.18690669421081108, - "grad_norm": 4.5769267082214355, - "learning_rate": 3.6650074173148766e-05, - "loss": 0.0917, - "step": 3800 - }, - { - "epoch": 0.18813634351482958, - "grad_norm": 0.09307766705751419, - "learning_rate": 3.660714586507329e-05, - "loss": 0.0675, - "step": 3825 - }, - { - "epoch": 0.18936599281884806, - "grad_norm": 0.530844509601593, - "learning_rate": 3.656396972536276e-05, - "loss": 0.1011, - "step": 3850 - }, - { - "epoch": 0.19059564212286656, - "grad_norm": 0.32543912529945374, - "learning_rate": 3.6520546398343017e-05, - "loss": 0.0901, - "step": 3875 - }, - { - "epoch": 0.19182529142688506, - "grad_norm": 27.788928985595703, - "learning_rate": 3.647687653202873e-05, - "loss": 0.1019, - "step": 3900 - }, - { - "epoch": 0.19305494073090354, - "grad_norm": 0.06812018901109695, - "learning_rate": 3.643296077811371e-05, - "loss": 0.0867, - "step": 3925 - }, - { - "epoch": 0.19428459003492204, - "grad_norm": 1.0923099517822266, - "learning_rate": 3.638879979196123e-05, - "loss": 0.0251, - "step": 3950 - }, - { - "epoch": 0.19551423933894052, - "grad_norm": 0.019240351393818855, - "learning_rate": 3.634439423259418e-05, - "loss": 0.083, - "step": 3975 - }, - { - "epoch": 0.19674388864295903, - "grad_norm": 0.017756102606654167, - "learning_rate": 3.629974476268525e-05, - "loss": 0.1405, - "step": 4000 - }, - { - "epoch": 0.19797353794697753, - "grad_norm": 0.051940638571977615, - "learning_rate": 3.625485204854711e-05, - "loss": 0.0753, - "step": 4025 - }, - { - "epoch": 0.199203187250996, - "grad_norm": 55.053382873535156, - "learning_rate": 3.6209716760122356e-05, - "loss": 0.0766, - "step": 4050 - }, - { - "epoch": 0.2004328365550145, - "grad_norm": 16.147336959838867, - "learning_rate": 3.6164339570973595e-05, - "loss": 0.1801, - "step": 4075 - }, - { - "epoch": 0.201662485859033, - "grad_norm": 0.045497551560401917, - "learning_rate": 3.6118721158273345e-05, - "loss": 0.0413, - "step": 4100 - }, - { - "epoch": 0.2028921351630515, - "grad_norm": 0.013707015663385391, - "learning_rate": 3.607286220279398e-05, - "loss": 0.072, - "step": 4125 - }, - { - "epoch": 0.20412178446707, - "grad_norm": 0.009980201721191406, - "learning_rate": 3.60267633888975e-05, - "loss": 0.0445, - "step": 4150 - }, - { - "epoch": 0.2053514337710885, - "grad_norm": 12.93321418762207, - "learning_rate": 3.5980425404525406e-05, - "loss": 0.0933, - "step": 4175 - }, - { - "epoch": 0.20658108307510697, - "grad_norm": 0.35677772760391235, - "learning_rate": 3.593384894118833e-05, - "loss": 0.0851, - "step": 4200 - }, - { - "epoch": 0.20781073237912548, - "grad_norm": 0.3404024839401245, - "learning_rate": 3.588703469395583e-05, - "loss": 0.0638, - "step": 4225 - }, - { - "epoch": 0.20904038168314396, - "grad_norm": 0.09931521862745285, - "learning_rate": 3.583998336144591e-05, - "loss": 0.1079, - "step": 4250 - }, - { - "epoch": 0.21027003098716246, - "grad_norm": 20.259796142578125, - "learning_rate": 3.5792695645814673e-05, - "loss": 0.1272, - "step": 4275 - }, - { - "epoch": 0.21149968029118096, - "grad_norm": 0.06137287989258766, - "learning_rate": 3.574517225274581e-05, - "loss": 0.0685, - "step": 4300 - }, - { - "epoch": 0.21272932959519944, - "grad_norm": 0.09915748983621597, - "learning_rate": 3.569741389144005e-05, - "loss": 0.1124, - "step": 4325 - }, - { - "epoch": 0.21395897889921794, - "grad_norm": 0.8329102993011475, - "learning_rate": 3.5649421274604614e-05, - "loss": 0.1305, - "step": 4350 - }, - { - "epoch": 0.21518862820323645, - "grad_norm": 0.15974581241607666, - "learning_rate": 3.5601195118442576e-05, - "loss": 0.0898, - "step": 4375 - }, - { - "epoch": 0.21641827750725492, - "grad_norm": 1.7836397886276245, - "learning_rate": 3.5552736142642145e-05, - "loss": 0.0518, - "step": 4400 - }, - { - "epoch": 0.21764792681127343, - "grad_norm": 33.01706314086914, - "learning_rate": 3.5504045070365935e-05, - "loss": 0.1166, - "step": 4425 - }, - { - "epoch": 0.2188775761152919, - "grad_norm": 0.014293343760073185, - "learning_rate": 3.545512262824021e-05, - "loss": 0.0456, - "step": 4450 - }, - { - "epoch": 0.2201072254193104, - "grad_norm": 0.31406551599502563, - "learning_rate": 3.5405969546343984e-05, - "loss": 0.0502, - "step": 4475 - }, - { - "epoch": 0.2213368747233289, - "grad_norm": 0.9230334162712097, - "learning_rate": 3.535658655819816e-05, - "loss": 0.0818, - "step": 4500 - }, - { - "epoch": 0.2225665240273474, - "grad_norm": 0.03092017211019993, - "learning_rate": 3.530697440075458e-05, - "loss": 0.1337, - "step": 4525 - }, - { - "epoch": 0.2237961733313659, - "grad_norm": 0.06680609285831451, - "learning_rate": 3.525713381438501e-05, - "loss": 0.1335, - "step": 4550 - }, - { - "epoch": 0.2250258226353844, - "grad_norm": 3.6776840686798096, - "learning_rate": 3.520706554287013e-05, - "loss": 0.1297, - "step": 4575 - }, - { - "epoch": 0.22625547193940287, - "grad_norm": 0.13203640282154083, - "learning_rate": 3.515677033338841e-05, - "loss": 0.1148, - "step": 4600 - }, - { - "epoch": 0.22748512124342138, - "grad_norm": 15.165017127990723, - "learning_rate": 3.5106248936504906e-05, - "loss": 0.1629, - "step": 4625 - }, - { - "epoch": 0.22871477054743988, - "grad_norm": 0.18457254767417908, - "learning_rate": 3.505550210616017e-05, - "loss": 0.1205, - "step": 4650 - }, - { - "epoch": 0.22994441985145836, - "grad_norm": 0.9447794556617737, - "learning_rate": 3.500453059965893e-05, - "loss": 0.0836, - "step": 4675 - }, - { - "epoch": 0.23117406915547686, - "grad_norm": 0.03009045124053955, - "learning_rate": 3.4953335177658776e-05, - "loss": 0.1049, - "step": 4700 - }, - { - "epoch": 0.23240371845949534, - "grad_norm": 2.2781057357788086, - "learning_rate": 3.4901916604158856e-05, - "loss": 0.1955, - "step": 4725 - }, - { - "epoch": 0.23363336776351384, - "grad_norm": 14.77724552154541, - "learning_rate": 3.4850275646488436e-05, - "loss": 0.1297, - "step": 4750 - }, - { - "epoch": 0.23486301706753235, - "grad_norm": 9.654071807861328, - "learning_rate": 3.4798413075295474e-05, - "loss": 0.0768, - "step": 4775 - }, - { - "epoch": 0.23609266637155082, - "grad_norm": 8.697239875793457, - "learning_rate": 3.474632966453511e-05, - "loss": 0.0887, - "step": 4800 - }, - { - "epoch": 0.23732231567556933, - "grad_norm": 0.12378008663654327, - "learning_rate": 3.469402619145809e-05, - "loss": 0.1026, - "step": 4825 - }, - { - "epoch": 0.23855196497958783, - "grad_norm": 157.32984924316406, - "learning_rate": 3.464150343659924e-05, - "loss": 0.0426, - "step": 4850 - }, - { - "epoch": 0.2397816142836063, - "grad_norm": 0.03085164539515972, - "learning_rate": 3.458876218376572e-05, - "loss": 0.0702, - "step": 4875 - }, - { - "epoch": 0.2410112635876248, - "grad_norm": 0.20448239147663116, - "learning_rate": 3.453580322002541e-05, - "loss": 0.0967, - "step": 4900 - }, - { - "epoch": 0.2422409128916433, - "grad_norm": 0.03843291103839874, - "learning_rate": 3.448262733569512e-05, - "loss": 0.0493, - "step": 4925 - }, - { - "epoch": 0.2434705621956618, - "grad_norm": 0.01994679868221283, - "learning_rate": 3.4429235324328815e-05, - "loss": 0.0423, - "step": 4950 - }, - { - "epoch": 0.2447002114996803, - "grad_norm": 0.002851160941645503, - "learning_rate": 3.437562798270575e-05, - "loss": 0.0968, - "step": 4975 - }, - { - "epoch": 0.24592986080369877, - "grad_norm": 175.6117706298828, - "learning_rate": 3.432180611081862e-05, - "loss": 0.0866, - "step": 5000 - }, - { - "epoch": 0.24715951010771728, - "grad_norm": 0.12921656668186188, - "learning_rate": 3.4267770511861565e-05, - "loss": 0.0728, - "step": 5025 - }, - { - "epoch": 0.24838915941173578, - "grad_norm": 0.006943744141608477, - "learning_rate": 3.421352199221824e-05, - "loss": 0.1009, - "step": 5050 - }, - { - "epoch": 0.24961880871575426, - "grad_norm": 0.14961707592010498, - "learning_rate": 3.4159061361449744e-05, - "loss": 0.2192, - "step": 5075 - }, - { - "epoch": 0.25084845801977274, - "grad_norm": 0.5022117495536804, - "learning_rate": 3.410438943228256e-05, - "loss": 0.0535, - "step": 5100 - }, - { - "epoch": 0.25207810732379127, - "grad_norm": 0.014706293120980263, - "learning_rate": 3.4049507020596405e-05, - "loss": 0.052, - "step": 5125 - }, - { - "epoch": 0.25330775662780974, - "grad_norm": 17.36297607421875, - "learning_rate": 3.3994414945412076e-05, - "loss": 0.1163, - "step": 5150 - }, - { - "epoch": 0.2545374059318282, - "grad_norm": 0.01775146648287773, - "learning_rate": 3.3939114028879236e-05, - "loss": 0.0574, - "step": 5175 - }, - { - "epoch": 0.25576705523584675, - "grad_norm": 16.34295654296875, - "learning_rate": 3.3883605096264084e-05, - "loss": 0.0557, - "step": 5200 - }, - { - "epoch": 0.25699670453986523, - "grad_norm": 16.539438247680664, - "learning_rate": 3.382788897593712e-05, - "loss": 0.1888, - "step": 5225 - }, - { - "epoch": 0.2582263538438837, - "grad_norm": 19.94977378845215, - "learning_rate": 3.377196649936074e-05, - "loss": 0.1129, - "step": 5250 - }, - { - "epoch": 0.25945600314790224, - "grad_norm": 0.04088423028588295, - "learning_rate": 3.371583850107682e-05, - "loss": 0.0832, - "step": 5275 - }, - { - "epoch": 0.2606856524519207, - "grad_norm": 0.09783986210823059, - "learning_rate": 3.365950581869428e-05, - "loss": 0.1231, - "step": 5300 - }, - { - "epoch": 0.2619153017559392, - "grad_norm": 0.2700275182723999, - "learning_rate": 3.3602969292876595e-05, - "loss": 0.0591, - "step": 5325 - }, - { - "epoch": 0.2631449510599577, - "grad_norm": 31.582439422607422, - "learning_rate": 3.354622976732922e-05, - "loss": 0.1097, - "step": 5350 - }, - { - "epoch": 0.2643746003639762, - "grad_norm": 0.011808057315647602, - "learning_rate": 3.348928808878702e-05, - "loss": 0.0334, - "step": 5375 - }, - { - "epoch": 0.2656042496679947, - "grad_norm": 13.282065391540527, - "learning_rate": 3.343214510700163e-05, - "loss": 0.1275, - "step": 5400 - }, - { - "epoch": 0.2668338989720132, - "grad_norm": 1.279388427734375, - "learning_rate": 3.337480167472877e-05, - "loss": 0.0859, - "step": 5425 - }, - { - "epoch": 0.2680635482760317, - "grad_norm": 0.36707261204719543, - "learning_rate": 3.33172586477155e-05, - "loss": 0.0873, - "step": 5450 - }, - { - "epoch": 0.26929319758005016, - "grad_norm": 0.026793906465172768, - "learning_rate": 3.325951688468751e-05, - "loss": 0.0848, - "step": 5475 - }, - { - "epoch": 0.2705228468840687, - "grad_norm": 0.029557965695858, - "learning_rate": 3.320157724733625e-05, - "loss": 0.1005, - "step": 5500 - }, - { - "epoch": 0.27175249618808717, - "grad_norm": 21.087844848632812, - "learning_rate": 3.314344060030607e-05, - "loss": 0.0449, - "step": 5525 - }, - { - "epoch": 0.27298214549210564, - "grad_norm": 10.756747245788574, - "learning_rate": 3.308510781118135e-05, - "loss": 0.165, - "step": 5550 - }, - { - "epoch": 0.2742117947961241, - "grad_norm": 11.816221237182617, - "learning_rate": 3.3026579750473545e-05, - "loss": 0.1477, - "step": 5575 - }, - { - "epoch": 0.27544144410014265, - "grad_norm": 28.293155670166016, - "learning_rate": 3.2967857291608143e-05, - "loss": 0.0583, - "step": 5600 - }, - { - "epoch": 0.27667109340416113, - "grad_norm": 25.228450775146484, - "learning_rate": 3.2908941310911736e-05, - "loss": 0.1461, - "step": 5625 - }, - { - "epoch": 0.2779007427081796, - "grad_norm": 0.3979050815105438, - "learning_rate": 3.2849832687598835e-05, - "loss": 0.1004, - "step": 5650 - }, - { - "epoch": 0.27913039201219814, - "grad_norm": 19.697154998779297, - "learning_rate": 3.27905323037588e-05, - "loss": 0.0853, - "step": 5675 - }, - { - "epoch": 0.2803600413162166, - "grad_norm": 0.2542398273944855, - "learning_rate": 3.273104104434268e-05, - "loss": 0.1457, - "step": 5700 - }, - { - "epoch": 0.2815896906202351, - "grad_norm": 0.6001378893852234, - "learning_rate": 3.2671359797149986e-05, - "loss": 0.0884, - "step": 5725 - }, - { - "epoch": 0.2828193399242536, - "grad_norm": 0.008191637694835663, - "learning_rate": 3.261148945281548e-05, - "loss": 0.1621, - "step": 5750 - }, - { - "epoch": 0.2840489892282721, - "grad_norm": 0.09628278762102127, - "learning_rate": 3.255143090479582e-05, - "loss": 0.1404, - "step": 5775 - }, - { - "epoch": 0.2852786385322906, - "grad_norm": 0.11581960320472717, - "learning_rate": 3.249118504935628e-05, - "loss": 0.14, - "step": 5800 - }, - { - "epoch": 0.2865082878363091, - "grad_norm": 248.70606994628906, - "learning_rate": 3.243075278555737e-05, - "loss": 0.1084, - "step": 5825 - }, - { - "epoch": 0.2877379371403276, - "grad_norm": 20.060178756713867, - "learning_rate": 3.237013501524139e-05, - "loss": 0.1296, - "step": 5850 - }, - { - "epoch": 0.28896758644434606, - "grad_norm": 0.018032871186733246, - "learning_rate": 3.2309332643019e-05, - "loss": 0.0373, - "step": 5875 - }, - { - "epoch": 0.2901972357483646, - "grad_norm": 0.22263678908348083, - "learning_rate": 3.2248346576255704e-05, - "loss": 0.0888, - "step": 5900 - }, - { - "epoch": 0.29142688505238307, - "grad_norm": 0.05750199407339096, - "learning_rate": 3.2187177725058325e-05, - "loss": 0.0334, - "step": 5925 - }, - { - "epoch": 0.29265653435640154, - "grad_norm": 25.452117919921875, - "learning_rate": 3.21258270022614e-05, - "loss": 0.0689, - "step": 5950 - }, - { - "epoch": 0.2938861836604201, - "grad_norm": 0.02928222343325615, - "learning_rate": 3.2064295323413575e-05, - "loss": 0.097, - "step": 5975 - }, - { - "epoch": 0.29511583296443855, - "grad_norm": 0.30823367834091187, - "learning_rate": 3.2002583606763945e-05, - "loss": 0.1576, - "step": 6000 - }, - { - "epoch": 0.29634548226845703, - "grad_norm": 15.235458374023438, - "learning_rate": 3.1940692773248336e-05, - "loss": 0.0441, - "step": 6025 - }, - { - "epoch": 0.2975751315724755, - "grad_norm": 0.05450942739844322, - "learning_rate": 3.187862374647557e-05, - "loss": 0.1224, - "step": 6050 - }, - { - "epoch": 0.29880478087649404, - "grad_norm": 0.06589404493570328, - "learning_rate": 3.1816377452713685e-05, - "loss": 0.0976, - "step": 6075 - }, - { - "epoch": 0.3000344301805125, - "grad_norm": 0.3371809422969818, - "learning_rate": 3.175395482087611e-05, - "loss": 0.0631, - "step": 6100 - }, - { - "epoch": 0.301264079484531, - "grad_norm": 13.719305038452148, - "learning_rate": 3.16913567825078e-05, - "loss": 0.105, - "step": 6125 - }, - { - "epoch": 0.3024937287885495, - "grad_norm": 1.520682692527771, - "learning_rate": 3.162858427177132e-05, - "loss": 0.1283, - "step": 6150 - }, - { - "epoch": 0.303723378092568, - "grad_norm": 0.14732028543949127, - "learning_rate": 3.156563822543295e-05, - "loss": 0.0538, - "step": 6175 - }, - { - "epoch": 0.3049530273965865, - "grad_norm": 12.324581146240234, - "learning_rate": 3.150251958284865e-05, - "loss": 0.0866, - "step": 6200 - }, - { - "epoch": 0.306182676700605, - "grad_norm": 0.6621875166893005, - "learning_rate": 3.143922928595007e-05, - "loss": 0.1342, - "step": 6225 - }, - { - "epoch": 0.3074123260046235, - "grad_norm": 0.4931362271308899, - "learning_rate": 3.137576827923051e-05, - "loss": 0.1223, - "step": 6250 - }, - { - "epoch": 0.30864197530864196, - "grad_norm": 0.14590081572532654, - "learning_rate": 3.1312137509730776e-05, - "loss": 0.0482, - "step": 6275 - }, - { - "epoch": 0.3098716246126605, - "grad_norm": 0.08238765597343445, - "learning_rate": 3.12483379270251e-05, - "loss": 0.0444, - "step": 6300 - }, - { - "epoch": 0.31110127391667897, - "grad_norm": 0.04264151304960251, - "learning_rate": 3.118437048320694e-05, - "loss": 0.1362, - "step": 6325 - }, - { - "epoch": 0.31233092322069744, - "grad_norm": 1.2662688493728638, - "learning_rate": 3.112023613287477e-05, - "loss": 0.12, - "step": 6350 - }, - { - "epoch": 0.313560572524716, - "grad_norm": 0.06124679744243622, - "learning_rate": 3.105593583311784e-05, - "loss": 0.0877, - "step": 6375 - }, - { - "epoch": 0.31479022182873445, - "grad_norm": 0.2560616135597229, - "learning_rate": 3.0991470543501924e-05, - "loss": 0.0801, - "step": 6400 - }, - { - "epoch": 0.3160198711327529, - "grad_norm": 0.6508743762969971, - "learning_rate": 3.0926841226054936e-05, - "loss": 0.0868, - "step": 6425 - }, - { - "epoch": 0.31724952043677146, - "grad_norm": 0.1555066555738449, - "learning_rate": 3.086204884525263e-05, - "loss": 0.0949, - "step": 6450 - }, - { - "epoch": 0.31847916974078994, - "grad_norm": 82.56906127929688, - "learning_rate": 3.079709436800416e-05, - "loss": 0.1292, - "step": 6475 - }, - { - "epoch": 0.3197088190448084, - "grad_norm": 4.172969818115234, - "learning_rate": 3.0731978763637725e-05, - "loss": 0.1018, - "step": 6500 - }, - { - "epoch": 0.3209384683488269, - "grad_norm": 1.8177496194839478, - "learning_rate": 3.0666703003886e-05, - "loss": 0.098, - "step": 6525 - }, - { - "epoch": 0.3221681176528454, - "grad_norm": 0.02939017303287983, - "learning_rate": 3.060126806287173e-05, - "loss": 0.0497, - "step": 6550 - }, - { - "epoch": 0.3233977669568639, - "grad_norm": 0.1825716346502304, - "learning_rate": 3.053567491709314e-05, - "loss": 0.0663, - "step": 6575 - }, - { - "epoch": 0.3246274162608824, - "grad_norm": 16.94407844543457, - "learning_rate": 3.0469924545409356e-05, - "loss": 0.1483, - "step": 6600 - }, - { - "epoch": 0.3258570655649009, - "grad_norm": 0.05543515086174011, - "learning_rate": 3.0404017929025844e-05, - "loss": 0.1236, - "step": 6625 - }, - { - "epoch": 0.3270867148689194, - "grad_norm": 0.16338235139846802, - "learning_rate": 3.0337956051479735e-05, - "loss": 0.048, - "step": 6650 - }, - { - "epoch": 0.32831636417293786, - "grad_norm": 22.010915756225586, - "learning_rate": 3.0271739898625135e-05, - "loss": 0.1284, - "step": 6675 - }, - { - "epoch": 0.3295460134769564, - "grad_norm": 0.02764512598514557, - "learning_rate": 3.0205370458618442e-05, - "loss": 0.1342, - "step": 6700 - }, - { - "epoch": 0.33077566278097487, - "grad_norm": 1.0350804328918457, - "learning_rate": 3.01388487219036e-05, - "loss": 0.1287, - "step": 6725 - }, - { - "epoch": 0.33200531208499334, - "grad_norm": 0.05654584988951683, - "learning_rate": 3.007217568119728e-05, - "loss": 0.177, - "step": 6750 - }, - { - "epoch": 0.3332349613890119, - "grad_norm": 0.3662309944629669, - "learning_rate": 3.0005352331474107e-05, - "loss": 0.0258, - "step": 6775 - }, - { - "epoch": 0.33446461069303035, - "grad_norm": 0.03708276525139809, - "learning_rate": 2.9938379669951803e-05, - "loss": 0.0842, - "step": 6800 - }, - { - "epoch": 0.3356942599970488, - "grad_norm": 0.17832373082637787, - "learning_rate": 2.987125869607628e-05, - "loss": 0.1477, - "step": 6825 - }, - { - "epoch": 0.33692390930106736, - "grad_norm": 16.58653450012207, - "learning_rate": 2.9803990411506753e-05, - "loss": 0.1023, - "step": 6850 - }, - { - "epoch": 0.33815355860508584, - "grad_norm": 11.858521461486816, - "learning_rate": 2.9736575820100787e-05, - "loss": 0.2028, - "step": 6875 - }, - { - "epoch": 0.3393832079091043, - "grad_norm": 0.14739990234375, - "learning_rate": 2.9669015927899316e-05, - "loss": 0.0736, - "step": 6900 - }, - { - "epoch": 0.34061285721312284, - "grad_norm": 3.342276096343994, - "learning_rate": 2.960131174311161e-05, - "loss": 0.1298, - "step": 6925 - }, - { - "epoch": 0.3418425065171413, - "grad_norm": 1.434309720993042, - "learning_rate": 2.9533464276100258e-05, - "loss": 0.1468, - "step": 6950 - }, - { - "epoch": 0.3430721558211598, - "grad_norm": 12.109251022338867, - "learning_rate": 2.9465474539366064e-05, - "loss": 0.0983, - "step": 6975 - }, - { - "epoch": 0.3443018051251783, - "grad_norm": 0.21518515050411224, - "learning_rate": 2.9397343547532968e-05, - "loss": 0.0567, - "step": 7000 - }, - { - "epoch": 0.3455314544291968, - "grad_norm": 0.26171043515205383, - "learning_rate": 2.932907231733287e-05, - "loss": 0.0846, - "step": 7025 - }, - { - "epoch": 0.3467611037332153, - "grad_norm": 1.4874420166015625, - "learning_rate": 2.926066186759049e-05, - "loss": 0.082, - "step": 7050 - }, - { - "epoch": 0.34799075303723376, - "grad_norm": 0.014510173350572586, - "learning_rate": 2.919211321920814e-05, - "loss": 0.1771, - "step": 7075 - }, - { - "epoch": 0.3492204023412523, - "grad_norm": 0.04351402074098587, - "learning_rate": 2.9123427395150492e-05, - "loss": 0.0837, - "step": 7100 - }, - { - "epoch": 0.35045005164527077, - "grad_norm": 0.09410078823566437, - "learning_rate": 2.905460542042932e-05, - "loss": 0.0736, - "step": 7125 - }, - { - "epoch": 0.35167970094928924, - "grad_norm": 0.04987524077296257, - "learning_rate": 2.898564832208822e-05, - "loss": 0.0685, - "step": 7150 - }, - { - "epoch": 0.3529093502533078, - "grad_norm": 1.3110145330429077, - "learning_rate": 2.8916557129187225e-05, - "loss": 0.0656, - "step": 7175 - }, - { - "epoch": 0.35413899955732625, - "grad_norm": 0.024401742964982986, - "learning_rate": 2.8847332872787524e-05, - "loss": 0.0758, - "step": 7200 - }, - { - "epoch": 0.3553686488613447, - "grad_norm": 0.05889930576086044, - "learning_rate": 2.8777976585936013e-05, - "loss": 0.1213, - "step": 7225 - }, - { - "epoch": 0.35659829816536326, - "grad_norm": 438.8824157714844, - "learning_rate": 2.8708489303649915e-05, - "loss": 0.1376, - "step": 7250 - }, - { - "epoch": 0.35782794746938174, - "grad_norm": 0.13952317833900452, - "learning_rate": 2.863887206290133e-05, - "loss": 0.062, - "step": 7275 - }, - { - "epoch": 0.3590575967734002, - "grad_norm": 0.021752964705228806, - "learning_rate": 2.8569125902601736e-05, - "loss": 0.0291, - "step": 7300 - }, - { - "epoch": 0.36028724607741874, - "grad_norm": 0.09580439329147339, - "learning_rate": 2.849925186358651e-05, - "loss": 0.1677, - "step": 7325 - }, - { - "epoch": 0.3615168953814372, - "grad_norm": 16.91958999633789, - "learning_rate": 2.8429250988599402e-05, - "loss": 0.076, - "step": 7350 - }, - { - "epoch": 0.3627465446854557, - "grad_norm": 0.16056932508945465, - "learning_rate": 2.8359124322276934e-05, - "loss": 0.1179, - "step": 7375 - }, - { - "epoch": 0.36397619398947423, - "grad_norm": 84.74608612060547, - "learning_rate": 2.8288872911132846e-05, - "loss": 0.1298, - "step": 7400 - }, - { - "epoch": 0.3652058432934927, - "grad_norm": 1.3772354125976562, - "learning_rate": 2.8218497803542484e-05, - "loss": 0.0435, - "step": 7425 - }, - { - "epoch": 0.3664354925975112, - "grad_norm": 0.2272925078868866, - "learning_rate": 2.814800004972712e-05, - "loss": 0.0608, - "step": 7450 - }, - { - "epoch": 0.36766514190152966, - "grad_norm": 0.006342505104839802, - "learning_rate": 2.807738070173831e-05, - "loss": 0.0758, - "step": 7475 - }, - { - "epoch": 0.3688947912055482, - "grad_norm": 18.807802200317383, - "learning_rate": 2.8006640813442172e-05, - "loss": 0.1943, - "step": 7500 - }, - { - "epoch": 0.37012444050956667, - "grad_norm": 1.103560447692871, - "learning_rate": 2.7935781440503695e-05, - "loss": 0.1357, - "step": 7525 - }, - { - "epoch": 0.37135408981358514, - "grad_norm": 1.426470160484314, - "learning_rate": 2.7864803640370928e-05, - "loss": 0.1055, - "step": 7550 - }, - { - "epoch": 0.3725837391176037, - "grad_norm": 0.3027065694332123, - "learning_rate": 2.779370847225925e-05, - "loss": 0.1477, - "step": 7575 - }, - { - "epoch": 0.37381338842162215, - "grad_norm": 0.16107353568077087, - "learning_rate": 2.7722496997135547e-05, - "loss": 0.0331, - "step": 7600 - }, - { - "epoch": 0.3750430377256406, - "grad_norm": 6.908923625946045, - "learning_rate": 2.7651170277702376e-05, - "loss": 0.1134, - "step": 7625 - }, - { - "epoch": 0.37627268702965916, - "grad_norm": 0.9830541610717773, - "learning_rate": 2.7579729378382088e-05, - "loss": 0.0453, - "step": 7650 - }, - { - "epoch": 0.37750233633367763, - "grad_norm": 29.35677146911621, - "learning_rate": 2.7508175365300997e-05, - "loss": 0.1292, - "step": 7675 - }, - { - "epoch": 0.3787319856376961, - "grad_norm": 12.90567684173584, - "learning_rate": 2.7436509306273404e-05, - "loss": 0.0826, - "step": 7700 - }, - { - "epoch": 0.37996163494171464, - "grad_norm": 0.07573653757572174, - "learning_rate": 2.736473227078571e-05, - "loss": 0.0963, - "step": 7725 - }, - { - "epoch": 0.3811912842457331, - "grad_norm": 0.8173651099205017, - "learning_rate": 2.7292845329980435e-05, - "loss": 0.0404, - "step": 7750 - }, - { - "epoch": 0.3824209335497516, - "grad_norm": 16.61020278930664, - "learning_rate": 2.7220849556640233e-05, - "loss": 0.1393, - "step": 7775 - }, - { - "epoch": 0.38365058285377013, - "grad_norm": 0.08589452505111694, - "learning_rate": 2.7148746025171887e-05, - "loss": 0.0969, - "step": 7800 - }, - { - "epoch": 0.3848802321577886, - "grad_norm": 0.24145151674747467, - "learning_rate": 2.7076535811590293e-05, - "loss": 0.0922, - "step": 7825 - }, - { - "epoch": 0.3861098814618071, - "grad_norm": 329.5946350097656, - "learning_rate": 2.7004219993502362e-05, - "loss": 0.1094, - "step": 7850 - }, - { - "epoch": 0.3873395307658256, - "grad_norm": 23.75005340576172, - "learning_rate": 2.6931799650090962e-05, - "loss": 0.0928, - "step": 7875 - }, - { - "epoch": 0.3885691800698441, - "grad_norm": 0.15798619389533997, - "learning_rate": 2.6859275862098835e-05, - "loss": 0.0498, - "step": 7900 - }, - { - "epoch": 0.38979882937386257, - "grad_norm": 0.28518229722976685, - "learning_rate": 2.6786649711812427e-05, - "loss": 0.129, - "step": 7925 - }, - { - "epoch": 0.39102847867788104, - "grad_norm": 11.863237380981445, - "learning_rate": 2.6713922283045758e-05, - "loss": 0.0726, - "step": 7950 - }, - { - "epoch": 0.3922581279818996, - "grad_norm": 0.5470972061157227, - "learning_rate": 2.664109466112425e-05, - "loss": 0.1385, - "step": 7975 - }, - { - "epoch": 0.39348777728591805, - "grad_norm": 153.12283325195312, - "learning_rate": 2.656816793286853e-05, - "loss": 0.0894, - "step": 8000 - }, - { - "epoch": 0.3947174265899365, - "grad_norm": 0.3189511299133301, - "learning_rate": 2.64951431865782e-05, - "loss": 0.0652, - "step": 8025 - }, - { - "epoch": 0.39594707589395506, - "grad_norm": 0.12088263779878616, - "learning_rate": 2.642202151201561e-05, - "loss": 0.0999, - "step": 8050 - }, - { - "epoch": 0.39717672519797353, - "grad_norm": 0.3240305781364441, - "learning_rate": 2.6348804000389586e-05, - "loss": 0.0547, - "step": 8075 - }, - { - "epoch": 0.398406374501992, - "grad_norm": 0.04540867358446121, - "learning_rate": 2.6275491744339155e-05, - "loss": 0.0412, - "step": 8100 - }, - { - "epoch": 0.39963602380601054, - "grad_norm": 0.2657265067100525, - "learning_rate": 2.6202085837917226e-05, - "loss": 0.0517, - "step": 8125 - }, - { - "epoch": 0.400865673110029, - "grad_norm": 0.42878657579421997, - "learning_rate": 2.612858737657427e-05, - "loss": 0.0365, - "step": 8150 - }, - { - "epoch": 0.4020953224140475, - "grad_norm": 0.07897116988897324, - "learning_rate": 2.6054997457141976e-05, - "loss": 0.0533, - "step": 8175 - }, - { - "epoch": 0.403324971718066, - "grad_norm": 32.6783447265625, - "learning_rate": 2.598131717781688e-05, - "loss": 0.0795, - "step": 8200 - }, - { - "epoch": 0.4045546210220845, - "grad_norm": 0.0742940828204155, - "learning_rate": 2.5907547638143985e-05, - "loss": 0.0942, - "step": 8225 - }, - { - "epoch": 0.405784270326103, - "grad_norm": 0.056350771337747574, - "learning_rate": 2.5833689939000325e-05, - "loss": 0.133, - "step": 8250 - }, - { - "epoch": 0.4070139196301215, - "grad_norm": 0.06526649743318558, - "learning_rate": 2.5759745182578558e-05, - "loss": 0.0825, - "step": 8275 - }, - { - "epoch": 0.40824356893414, - "grad_norm": 0.25865426659584045, - "learning_rate": 2.5685714472370534e-05, - "loss": 0.063, - "step": 8300 - }, - { - "epoch": 0.40947321823815847, - "grad_norm": 0.08631353080272675, - "learning_rate": 2.561159891315079e-05, - "loss": 0.0596, - "step": 8325 - }, - { - "epoch": 0.410702867542177, - "grad_norm": 42.27391815185547, - "learning_rate": 2.5537399610960082e-05, - "loss": 0.1601, - "step": 8350 - }, - { - "epoch": 0.4119325168461955, - "grad_norm": 21.266498565673828, - "learning_rate": 2.546311767308888e-05, - "loss": 0.0916, - "step": 8375 - }, - { - "epoch": 0.41316216615021395, - "grad_norm": 0.025975046679377556, - "learning_rate": 2.5388754208060854e-05, - "loss": 0.1447, - "step": 8400 - }, - { - "epoch": 0.4143918154542324, - "grad_norm": 0.05890919640660286, - "learning_rate": 2.53143103256163e-05, - "loss": 0.0732, - "step": 8425 - }, - { - "epoch": 0.41562146475825096, - "grad_norm": 67.43768310546875, - "learning_rate": 2.523978713669561e-05, - "loss": 0.1213, - "step": 8450 - }, - { - "epoch": 0.41685111406226943, - "grad_norm": 10.836592674255371, - "learning_rate": 2.5165185753422688e-05, - "loss": 0.1027, - "step": 8475 - }, - { - "epoch": 0.4180807633662879, - "grad_norm": 0.32174211740493774, - "learning_rate": 2.509050728908833e-05, - "loss": 0.1291, - "step": 8500 - }, - { - "epoch": 0.41931041267030644, - "grad_norm": 0.331082820892334, - "learning_rate": 2.5015752858133637e-05, - "loss": 0.041, - "step": 8525 - }, - { - "epoch": 0.4205400619743249, - "grad_norm": 0.014948999509215355, - "learning_rate": 2.4940923576133376e-05, - "loss": 0.1136, - "step": 8550 - }, - { - "epoch": 0.4217697112783434, - "grad_norm": 64.26248931884766, - "learning_rate": 2.4866020559779335e-05, - "loss": 0.0995, - "step": 8575 - }, - { - "epoch": 0.4229993605823619, - "grad_norm": 0.08037187159061432, - "learning_rate": 2.4791044926863635e-05, - "loss": 0.0666, - "step": 8600 - }, - { - "epoch": 0.4242290098863804, - "grad_norm": 0.03606114163994789, - "learning_rate": 2.4715997796262094e-05, - "loss": 0.0476, - "step": 8625 - }, - { - "epoch": 0.4254586591903989, - "grad_norm": 15.894445419311523, - "learning_rate": 2.4640880287917485e-05, - "loss": 0.0595, - "step": 8650 - }, - { - "epoch": 0.4266883084944174, - "grad_norm": 0.013431191444396973, - "learning_rate": 2.4565693522822845e-05, - "loss": 0.0751, - "step": 8675 - }, - { - "epoch": 0.4279179577984359, - "grad_norm": 0.15397942066192627, - "learning_rate": 2.449043862300475e-05, - "loss": 0.1435, - "step": 8700 - }, - { - "epoch": 0.42914760710245436, - "grad_norm": 0.009304300881922245, - "learning_rate": 2.4415116711506554e-05, - "loss": 0.0188, - "step": 8725 - }, - { - "epoch": 0.4303772564064729, - "grad_norm": 43.02936935424805, - "learning_rate": 2.433972891237164e-05, - "loss": 0.0951, - "step": 8750 - }, - { - "epoch": 0.4316069057104914, - "grad_norm": 116.9205551147461, - "learning_rate": 2.426427635062666e-05, - "loss": 0.0865, - "step": 8775 - }, - { - "epoch": 0.43283655501450985, - "grad_norm": 8.969911575317383, - "learning_rate": 2.4188760152264718e-05, - "loss": 0.1108, - "step": 8800 - }, - { - "epoch": 0.4340662043185284, - "grad_norm": 0.10386780649423599, - "learning_rate": 2.4113181444228567e-05, - "loss": 0.0827, - "step": 8825 - }, - { - "epoch": 0.43529585362254686, - "grad_norm": 14.26266860961914, - "learning_rate": 2.4037541354393832e-05, - "loss": 0.0382, - "step": 8850 - }, - { - "epoch": 0.43652550292656533, - "grad_norm": 0.791311502456665, - "learning_rate": 2.3961841011552137e-05, - "loss": 0.1018, - "step": 8875 - }, - { - "epoch": 0.4377551522305838, - "grad_norm": 0.0810023695230484, - "learning_rate": 2.388608154539426e-05, - "loss": 0.0862, - "step": 8900 - }, - { - "epoch": 0.43898480153460234, - "grad_norm": 17.69713592529297, - "learning_rate": 2.3810264086493312e-05, - "loss": 0.0597, - "step": 8925 - }, - { - "epoch": 0.4402144508386208, - "grad_norm": 0.02804803103208542, - "learning_rate": 2.373438976628782e-05, - "loss": 0.0895, - "step": 8950 - }, - { - "epoch": 0.4414441001426393, - "grad_norm": 0.029248127713799477, - "learning_rate": 2.3658459717064884e-05, - "loss": 0.0495, - "step": 8975 - }, - { - "epoch": 0.4426737494466578, - "grad_norm": 0.05357440933585167, - "learning_rate": 2.3582475071943233e-05, - "loss": 0.0426, - "step": 9000 - }, - { - "epoch": 0.4439033987506763, - "grad_norm": 28.287826538085938, - "learning_rate": 2.3506436964856366e-05, - "loss": 0.0578, - "step": 9025 - }, - { - "epoch": 0.4451330480546948, - "grad_norm": 32.31364059448242, - "learning_rate": 2.3430346530535587e-05, - "loss": 0.131, - "step": 9050 - }, - { - "epoch": 0.4463626973587133, - "grad_norm": 0.09500091522932053, - "learning_rate": 2.3354204904493103e-05, - "loss": 0.0637, - "step": 9075 - }, - { - "epoch": 0.4475923466627318, - "grad_norm": 0.016421450302004814, - "learning_rate": 2.3278013223005058e-05, - "loss": 0.1137, - "step": 9100 - }, - { - "epoch": 0.44882199596675026, - "grad_norm": 0.07537207752466202, - "learning_rate": 2.3201772623094582e-05, - "loss": 0.0248, - "step": 9125 - }, - { - "epoch": 0.4500516452707688, - "grad_norm": 0.07750261574983597, - "learning_rate": 2.3125484242514817e-05, - "loss": 0.0625, - "step": 9150 - }, - { - "epoch": 0.4512812945747873, - "grad_norm": 0.1884867250919342, - "learning_rate": 2.3049149219731968e-05, - "loss": 0.0945, - "step": 9175 - }, - { - "epoch": 0.45251094387880575, - "grad_norm": 0.0455712266266346, - "learning_rate": 2.2972768693908273e-05, - "loss": 0.1188, - "step": 9200 - }, - { - "epoch": 0.4537405931828243, - "grad_norm": 0.2564387917518616, - "learning_rate": 2.2896343804885022e-05, - "loss": 0.0768, - "step": 9225 - }, - { - "epoch": 0.45497024248684276, - "grad_norm": 0.22998474538326263, - "learning_rate": 2.2819875693165546e-05, - "loss": 0.0722, - "step": 9250 - }, - { - "epoch": 0.45619989179086123, - "grad_norm": 0.00750675518065691, - "learning_rate": 2.27433654998982e-05, - "loss": 0.1093, - "step": 9275 - }, - { - "epoch": 0.45742954109487977, - "grad_norm": 0.024085476994514465, - "learning_rate": 2.266681436685933e-05, - "loss": 0.0419, - "step": 9300 - }, - { - "epoch": 0.45865919039889824, - "grad_norm": 0.08088885992765427, - "learning_rate": 2.2590223436436232e-05, - "loss": 0.0837, - "step": 9325 - }, - { - "epoch": 0.4598888397029167, - "grad_norm": 8.227357864379883, - "learning_rate": 2.2513593851610108e-05, - "loss": 0.0571, - "step": 9350 - }, - { - "epoch": 0.4611184890069352, - "grad_norm": 34.62944030761719, - "learning_rate": 2.2436926755939e-05, - "loss": 0.0912, - "step": 9375 - }, - { - "epoch": 0.4623481383109537, - "grad_norm": 0.02191956900060177, - "learning_rate": 2.2360223293540734e-05, - "loss": 0.0264, - "step": 9400 - }, - { - "epoch": 0.4635777876149722, - "grad_norm": 0.0021480433642864227, - "learning_rate": 2.228348460907586e-05, - "loss": 0.0958, - "step": 9425 - }, - { - "epoch": 0.4648074369189907, - "grad_norm": 32.30785369873047, - "learning_rate": 2.2206711847730532e-05, - "loss": 0.0524, - "step": 9450 - }, - { - "epoch": 0.4660370862230092, - "grad_norm": 0.6486133933067322, - "learning_rate": 2.212990615519945e-05, - "loss": 0.0399, - "step": 9475 - }, - { - "epoch": 0.4672667355270277, - "grad_norm": 152.88101196289062, - "learning_rate": 2.2053068677668747e-05, - "loss": 0.0636, - "step": 9500 - }, - { - "epoch": 0.46849638483104616, - "grad_norm": 24.796552658081055, - "learning_rate": 2.1976200561798903e-05, - "loss": 0.1482, - "step": 9525 - }, - { - "epoch": 0.4697260341350647, - "grad_norm": 0.009832819923758507, - "learning_rate": 2.18993029547076e-05, - "loss": 0.0861, - "step": 9550 - }, - { - "epoch": 0.4709556834390832, - "grad_norm": 0.041190896183252335, - "learning_rate": 2.182237700395264e-05, - "loss": 0.1117, - "step": 9575 - }, - { - "epoch": 0.47218533274310165, - "grad_norm": 25.116497039794922, - "learning_rate": 2.1745423857514797e-05, - "loss": 0.1686, - "step": 9600 - }, - { - "epoch": 0.4734149820471202, - "grad_norm": 0.04010776802897453, - "learning_rate": 2.1668444663780687e-05, - "loss": 0.13, - "step": 9625 - }, - { - "epoch": 0.47464463135113866, - "grad_norm": 33.51155471801758, - "learning_rate": 2.1591440571525633e-05, - "loss": 0.1155, - "step": 9650 - }, - { - "epoch": 0.47587428065515713, - "grad_norm": 0.06877290457487106, - "learning_rate": 2.1514412729896542e-05, - "loss": 0.0916, - "step": 9675 - }, - { - "epoch": 0.47710392995917567, - "grad_norm": 0.10853364318609238, - "learning_rate": 2.143736228839472e-05, - "loss": 0.0914, - "step": 9700 - }, - { - "epoch": 0.47833357926319414, - "grad_norm": 25.901405334472656, - "learning_rate": 2.1360290396858736e-05, - "loss": 0.0755, - "step": 9725 - }, - { - "epoch": 0.4795632285672126, - "grad_norm": 10.1629638671875, - "learning_rate": 2.128319820544727e-05, - "loss": 0.1373, - "step": 9750 - }, - { - "epoch": 0.48079287787123115, - "grad_norm": 72.01494598388672, - "learning_rate": 2.1206086864621924e-05, - "loss": 0.0568, - "step": 9775 - }, - { - "epoch": 0.4820225271752496, - "grad_norm": 0.23499652743339539, - "learning_rate": 2.1128957525130103e-05, - "loss": 0.0538, - "step": 9800 - }, - { - "epoch": 0.4832521764792681, - "grad_norm": 10.809874534606934, - "learning_rate": 2.105181133798778e-05, - "loss": 0.1681, - "step": 9825 - }, - { - "epoch": 0.4844818257832866, - "grad_norm": 0.02270650491118431, - "learning_rate": 2.0974649454462356e-05, - "loss": 0.0658, - "step": 9850 - }, - { - "epoch": 0.4857114750873051, - "grad_norm": 0.04404151067137718, - "learning_rate": 2.0897473026055476e-05, - "loss": 0.1342, - "step": 9875 - }, - { - "epoch": 0.4869411243913236, - "grad_norm": 0.14264808595180511, - "learning_rate": 2.0820283204485844e-05, - "loss": 0.0507, - "step": 9900 - }, - { - "epoch": 0.48817077369534206, - "grad_norm": 22.68619155883789, - "learning_rate": 2.0743081141672023e-05, - "loss": 0.1316, - "step": 9925 - }, - { - "epoch": 0.4894004229993606, - "grad_norm": 77.07496643066406, - "learning_rate": 2.066586798971526e-05, - "loss": 0.0878, - "step": 9950 - }, - { - "epoch": 0.4906300723033791, - "grad_norm": 0.04731987789273262, - "learning_rate": 2.0588644900882296e-05, - "loss": 0.0379, - "step": 9975 - }, - { - "epoch": 0.49185972160739755, - "grad_norm": 20.74472999572754, - "learning_rate": 2.0511413027588145e-05, - "loss": 0.1012, - "step": 10000 - }, - { - "epoch": 0.4930893709114161, - "grad_norm": 12.492639541625977, - "learning_rate": 2.043417352237892e-05, - "loss": 0.0858, - "step": 10025 - }, - { - "epoch": 0.49431902021543456, - "grad_norm": 9.317748069763184, - "learning_rate": 2.035692753791463e-05, - "loss": 0.08, - "step": 10050 - }, - { - "epoch": 0.49554866951945303, - "grad_norm": 0.036508627235889435, - "learning_rate": 2.0279676226951978e-05, - "loss": 0.0807, - "step": 10075 - }, - { - "epoch": 0.49677831882347157, - "grad_norm": 0.34216147661209106, - "learning_rate": 2.020242074232714e-05, - "loss": 0.1024, - "step": 10100 - }, - { - "epoch": 0.49800796812749004, - "grad_norm": 0.44841864705085754, - "learning_rate": 2.01251622369386e-05, - "loss": 0.1104, - "step": 10125 - }, - { - "epoch": 0.4992376174315085, - "grad_norm": 11.761415481567383, - "learning_rate": 2.0047901863729896e-05, - "loss": 0.0482, - "step": 10150 - }, - { - "epoch": 0.500467266735527, - "grad_norm": 0.007313193753361702, - "learning_rate": 1.9970640775672462e-05, - "loss": 0.0722, - "step": 10175 - }, - { - "epoch": 0.5016969160395455, - "grad_norm": 76.78512573242188, - "learning_rate": 1.989338012574838e-05, - "loss": 0.0313, - "step": 10200 - }, - { - "epoch": 0.5029265653435641, - "grad_norm": 14.177632331848145, - "learning_rate": 1.9816121066933203e-05, - "loss": 0.1005, - "step": 10225 - }, - { - "epoch": 0.5041562146475825, - "grad_norm": 0.0636470839381218, - "learning_rate": 1.9738864752178752e-05, - "loss": 0.0483, - "step": 10250 - }, - { - "epoch": 0.505385863951601, - "grad_norm": 0.01392904482781887, - "learning_rate": 1.9661612334395883e-05, - "loss": 0.0914, - "step": 10275 - }, - { - "epoch": 0.5066155132556195, - "grad_norm": 19.098283767700195, - "learning_rate": 1.9584364966437295e-05, - "loss": 0.096, - "step": 10300 - }, - { - "epoch": 0.507845162559638, - "grad_norm": 0.02729766070842743, - "learning_rate": 1.9507123801080334e-05, - "loss": 0.0715, - "step": 10325 - }, - { - "epoch": 0.5090748118636564, - "grad_norm": 0.5361742973327637, - "learning_rate": 1.942988999100978e-05, - "loss": 0.0218, - "step": 10350 - }, - { - "epoch": 0.510304461167675, - "grad_norm": 0.32791668176651, - "learning_rate": 1.935266468880065e-05, - "loss": 0.0594, - "step": 10375 - }, - { - "epoch": 0.5115341104716935, - "grad_norm": 0.01990235224366188, - "learning_rate": 1.9275449046901e-05, - "loss": 0.049, - "step": 10400 - }, - { - "epoch": 0.512763759775712, - "grad_norm": 11.608404159545898, - "learning_rate": 1.9198244217614716e-05, - "loss": 0.0528, - "step": 10425 - }, - { - "epoch": 0.5139934090797305, - "grad_norm": 0.02364981174468994, - "learning_rate": 1.9121051353084334e-05, - "loss": 0.1522, - "step": 10450 - }, - { - "epoch": 0.5152230583837489, - "grad_norm": 0.015520687215030193, - "learning_rate": 1.9043871605273833e-05, - "loss": 0.0977, - "step": 10475 - }, - { - "epoch": 0.5164527076877674, - "grad_norm": 20.239513397216797, - "learning_rate": 1.896670612595144e-05, - "loss": 0.0471, - "step": 10500 - }, - { - "epoch": 0.517682356991786, - "grad_norm": 0.3524812161922455, - "learning_rate": 1.8889556066672466e-05, - "loss": 0.0186, - "step": 10525 - }, - { - "epoch": 0.5189120062958045, - "grad_norm": 0.0027559343725442886, - "learning_rate": 1.8812422578762105e-05, - "loss": 0.0677, - "step": 10550 - }, - { - "epoch": 0.520141655599823, - "grad_norm": 21.11365509033203, - "learning_rate": 1.8735306813298237e-05, - "loss": 0.2302, - "step": 10575 - }, - { - "epoch": 0.5213713049038414, - "grad_norm": 3.5784640312194824, - "learning_rate": 1.8658209921094284e-05, - "loss": 0.0937, - "step": 10600 - }, - { - "epoch": 0.5226009542078599, - "grad_norm": 0.4144822061061859, - "learning_rate": 1.858113305268201e-05, - "loss": 0.1184, - "step": 10625 - }, - { - "epoch": 0.5238306035118784, - "grad_norm": 51.8597526550293, - "learning_rate": 1.8504077358294356e-05, - "loss": 0.0787, - "step": 10650 - }, - { - "epoch": 0.5250602528158969, - "grad_norm": 0.020763644948601723, - "learning_rate": 1.842704398784829e-05, - "loss": 0.0777, - "step": 10675 - }, - { - "epoch": 0.5262899021199154, - "grad_norm": 0.008841121569275856, - "learning_rate": 1.8350034090927623e-05, - "loss": 0.0916, - "step": 10700 - }, - { - "epoch": 0.5275195514239339, - "grad_norm": 0.278268426656723, - "learning_rate": 1.827304881676589e-05, - "loss": 0.0889, - "step": 10725 - }, - { - "epoch": 0.5287492007279524, - "grad_norm": 174.84793090820312, - "learning_rate": 1.8196089314229142e-05, - "loss": 0.0994, - "step": 10750 - }, - { - "epoch": 0.5299788500319709, - "grad_norm": 0.4661599397659302, - "learning_rate": 1.8119156731798862e-05, - "loss": 0.0496, - "step": 10775 - }, - { - "epoch": 0.5312084993359893, - "grad_norm": 31.551172256469727, - "learning_rate": 1.8042252217554782e-05, - "loss": 0.07, - "step": 10800 - }, - { - "epoch": 0.5324381486400078, - "grad_norm": 29.22597885131836, - "learning_rate": 1.7965376919157783e-05, - "loss": 0.0127, - "step": 10825 - }, - { - "epoch": 0.5336677979440264, - "grad_norm": 0.6085745692253113, - "learning_rate": 1.788853198383273e-05, - "loss": 0.1369, - "step": 10850 - }, - { - "epoch": 0.5348974472480449, - "grad_norm": 31.647785186767578, - "learning_rate": 1.78117185583514e-05, - "loss": 0.0981, - "step": 10875 - }, - { - "epoch": 0.5361270965520634, - "grad_norm": 0.32608604431152344, - "learning_rate": 1.7734937789015327e-05, - "loss": 0.0925, - "step": 10900 - }, - { - "epoch": 0.5373567458560818, - "grad_norm": 0.0688435360789299, - "learning_rate": 1.7658190821638716e-05, - "loss": 0.0926, - "step": 10925 - }, - { - "epoch": 0.5385863951601003, - "grad_norm": 0.7417702078819275, - "learning_rate": 1.7581478801531327e-05, - "loss": 0.0554, - "step": 10950 - }, - { - "epoch": 0.5398160444641188, - "grad_norm": 0.0864044725894928, - "learning_rate": 1.7504802873481417e-05, - "loss": 0.0877, - "step": 10975 - }, - { - "epoch": 0.5410456937681374, - "grad_norm": 0.5178314447402954, - "learning_rate": 1.742816418173861e-05, - "loss": 0.0372, - "step": 11000 - }, - { - "epoch": 0.5422753430721559, - "grad_norm": 0.01901753433048725, - "learning_rate": 1.735156386999687e-05, - "loss": 0.0714, - "step": 11025 - }, - { - "epoch": 0.5435049923761743, - "grad_norm": 0.044542666524648666, - "learning_rate": 1.7275003081377394e-05, - "loss": 0.0751, - "step": 11050 - }, - { - "epoch": 0.5447346416801928, - "grad_norm": 0.0875534862279892, - "learning_rate": 1.7198482958411568e-05, - "loss": 0.1245, - "step": 11075 - }, - { - "epoch": 0.5459642909842113, - "grad_norm": 0.9284554123878479, - "learning_rate": 1.712200464302392e-05, - "loss": 0.0784, - "step": 11100 - }, - { - "epoch": 0.5471939402882298, - "grad_norm": 0.05334128811955452, - "learning_rate": 1.7045569276515075e-05, - "loss": 0.0771, - "step": 11125 - }, - { - "epoch": 0.5484235895922482, - "grad_norm": 0.011683886870741844, - "learning_rate": 1.6969177999544723e-05, - "loss": 0.054, - "step": 11150 - }, - { - "epoch": 0.5496532388962668, - "grad_norm": 0.22173602879047394, - "learning_rate": 1.689283195211459e-05, - "loss": 0.0623, - "step": 11175 - }, - { - "epoch": 0.5508828882002853, - "grad_norm": 0.5118231177330017, - "learning_rate": 1.6816532273551454e-05, - "loss": 0.0756, - "step": 11200 - }, - { - "epoch": 0.5521125375043038, - "grad_norm": 13.7605619430542, - "learning_rate": 1.67402801024901e-05, - "loss": 0.1213, - "step": 11225 - }, - { - "epoch": 0.5533421868083223, - "grad_norm": 0.03471577167510986, - "learning_rate": 1.6664076576856362e-05, - "loss": 0.0662, - "step": 11250 - }, - { - "epoch": 0.5545718361123407, - "grad_norm": 0.011781508103013039, - "learning_rate": 1.6587922833850117e-05, - "loss": 0.1035, - "step": 11275 - }, - { - "epoch": 0.5558014854163592, - "grad_norm": 7.672538757324219, - "learning_rate": 1.6511820009928342e-05, - "loss": 0.0408, - "step": 11300 - }, - { - "epoch": 0.5570311347203778, - "grad_norm": 23.007104873657227, - "learning_rate": 1.6435769240788114e-05, - "loss": 0.0883, - "step": 11325 - }, - { - "epoch": 0.5582607840243963, - "grad_norm": 0.04628949612379074, - "learning_rate": 1.6359771661349725e-05, - "loss": 0.0619, - "step": 11350 - }, - { - "epoch": 0.5594904333284147, - "grad_norm": 3.146555185317993, - "learning_rate": 1.6283828405739673e-05, - "loss": 0.1071, - "step": 11375 - }, - { - "epoch": 0.5607200826324332, - "grad_norm": 0.0383627749979496, - "learning_rate": 1.6207940607273784e-05, - "loss": 0.053, - "step": 11400 - }, - { - "epoch": 0.5619497319364517, - "grad_norm": 0.020212816074490547, - "learning_rate": 1.6132109398440282e-05, - "loss": 0.0235, - "step": 11425 - }, - { - "epoch": 0.5631793812404702, - "grad_norm": 0.09268704801797867, - "learning_rate": 1.60563359108829e-05, - "loss": 0.1123, - "step": 11450 - }, - { - "epoch": 0.5644090305444888, - "grad_norm": 0.1128346174955368, - "learning_rate": 1.5980621275383975e-05, - "loss": 0.0194, - "step": 11475 - }, - { - "epoch": 0.5656386798485072, - "grad_norm": 0.04844852164387703, - "learning_rate": 1.5904966621847596e-05, - "loss": 0.1264, - "step": 11500 - }, - { - "epoch": 0.5668683291525257, - "grad_norm": 14.482039451599121, - "learning_rate": 1.5829373079282725e-05, - "loss": 0.0661, - "step": 11525 - }, - { - "epoch": 0.5680979784565442, - "grad_norm": 0.03152133524417877, - "learning_rate": 1.5753841775786347e-05, - "loss": 0.0818, - "step": 11550 - }, - { - "epoch": 0.5693276277605627, - "grad_norm": 10.42372989654541, - "learning_rate": 1.5678373838526648e-05, - "loss": 0.0871, - "step": 11575 - }, - { - "epoch": 0.5705572770645811, - "grad_norm": 0.031527079641819, - "learning_rate": 1.5602970393726183e-05, - "loss": 0.1017, - "step": 11600 - }, - { - "epoch": 0.5717869263685996, - "grad_norm": 106.01895141601562, - "learning_rate": 1.5527632566645077e-05, - "loss": 0.021, - "step": 11625 - }, - { - "epoch": 0.5730165756726182, - "grad_norm": 0.10324012488126755, - "learning_rate": 1.5452361481564226e-05, - "loss": 0.0268, - "step": 11650 - }, - { - "epoch": 0.5742462249766367, - "grad_norm": 0.5078524947166443, - "learning_rate": 1.5377158261768527e-05, - "loss": 0.0722, - "step": 11675 - }, - { - "epoch": 0.5754758742806552, - "grad_norm": 0.1197594553232193, - "learning_rate": 1.530202402953011e-05, - "loss": 0.0827, - "step": 11700 - }, - { - "epoch": 0.5767055235846736, - "grad_norm": 0.2292940616607666, - "learning_rate": 1.522695990609158e-05, - "loss": 0.0972, - "step": 11725 - }, - { - "epoch": 0.5779351728886921, - "grad_norm": 0.008458495140075684, - "learning_rate": 1.5151967011649313e-05, - "loss": 0.1843, - "step": 11750 - }, - { - "epoch": 0.5791648221927106, - "grad_norm": 0.10443944483995438, - "learning_rate": 1.5077046465336705e-05, - "loss": 0.0503, - "step": 11775 - }, - { - "epoch": 0.5803944714967292, - "grad_norm": 0.0905158743262291, - "learning_rate": 1.5002199385207483e-05, - "loss": 0.0932, - "step": 11800 - }, - { - "epoch": 0.5816241208007477, - "grad_norm": 38.090667724609375, - "learning_rate": 1.4927426888219053e-05, - "loss": 0.1036, - "step": 11825 - }, - { - "epoch": 0.5828537701047661, - "grad_norm": 20.15511703491211, - "learning_rate": 1.4852730090215776e-05, - "loss": 0.082, - "step": 11850 - }, - { - "epoch": 0.5840834194087846, - "grad_norm": 0.24140150845050812, - "learning_rate": 1.4778110105912348e-05, - "loss": 0.0813, - "step": 11875 - }, - { - "epoch": 0.5853130687128031, - "grad_norm": 0.015845833346247673, - "learning_rate": 1.4703568048877164e-05, - "loss": 0.0541, - "step": 11900 - }, - { - "epoch": 0.5865427180168216, - "grad_norm": 0.34318044781684875, - "learning_rate": 1.4629105031515684e-05, - "loss": 0.022, - "step": 11925 - }, - { - "epoch": 0.5877723673208402, - "grad_norm": 0.008917740546166897, - "learning_rate": 1.4554722165053858e-05, - "loss": 0.0826, - "step": 11950 - }, - { - "epoch": 0.5890020166248586, - "grad_norm": 0.04826900362968445, - "learning_rate": 1.448042055952153e-05, - "loss": 0.0443, - "step": 11975 - }, - { - "epoch": 0.5902316659288771, - "grad_norm": 0.06810244172811508, - "learning_rate": 1.440620132373585e-05, - "loss": 0.0755, - "step": 12000 - }, - { - "epoch": 0.5914613152328956, - "grad_norm": 0.851064920425415, - "learning_rate": 1.4332065565284773e-05, - "loss": 0.0414, - "step": 12025 - }, - { - "epoch": 0.5926909645369141, - "grad_norm": 0.2013673037290573, - "learning_rate": 1.4258014390510494e-05, - "loss": 0.0546, - "step": 12050 - }, - { - "epoch": 0.5939206138409325, - "grad_norm": 30.269807815551758, - "learning_rate": 1.4184048904492952e-05, - "loss": 0.1, - "step": 12075 - }, - { - "epoch": 0.595150263144951, - "grad_norm": 78.8272476196289, - "learning_rate": 1.4110170211033328e-05, - "loss": 0.0805, - "step": 12100 - }, - { - "epoch": 0.5963799124489696, - "grad_norm": 0.2367556095123291, - "learning_rate": 1.4036379412637585e-05, - "loss": 0.1043, - "step": 12125 - }, - { - "epoch": 0.5976095617529881, - "grad_norm": 0.07838735729455948, - "learning_rate": 1.396267761050003e-05, - "loss": 0.091, - "step": 12150 - }, - { - "epoch": 0.5988392110570065, - "grad_norm": 0.27805811166763306, - "learning_rate": 1.388906590448684e-05, - "loss": 0.1272, - "step": 12175 - }, - { - "epoch": 0.600068860361025, - "grad_norm": 0.0626315101981163, - "learning_rate": 1.3815545393119673e-05, - "loss": 0.0147, - "step": 12200 - }, - { - "epoch": 0.6012985096650435, - "grad_norm": 0.07897807657718658, - "learning_rate": 1.3742117173559275e-05, - "loss": 0.0671, - "step": 12225 - }, - { - "epoch": 0.602528158969062, - "grad_norm": 3.992785930633545, - "learning_rate": 1.3668782341589103e-05, - "loss": 0.0841, - "step": 12250 - }, - { - "epoch": 0.6037578082730806, - "grad_norm": 10.892356872558594, - "learning_rate": 1.359554199159897e-05, - "loss": 0.1991, - "step": 12275 - }, - { - "epoch": 0.604987457577099, - "grad_norm": 0.10505378246307373, - "learning_rate": 1.352239721656873e-05, - "loss": 0.0714, - "step": 12300 - }, - { - "epoch": 0.6062171068811175, - "grad_norm": 10.522662162780762, - "learning_rate": 1.3449349108051937e-05, - "loss": 0.1007, - "step": 12325 - }, - { - "epoch": 0.607446756185136, - "grad_norm": 0.01612783968448639, - "learning_rate": 1.3376398756159579e-05, - "loss": 0.0407, - "step": 12350 - }, - { - "epoch": 0.6086764054891545, - "grad_norm": 0.015186433680355549, - "learning_rate": 1.33035472495438e-05, - "loss": 0.0482, - "step": 12375 - }, - { - "epoch": 0.609906054793173, - "grad_norm": 0.7001699209213257, - "learning_rate": 1.323079567538166e-05, - "loss": 0.1344, - "step": 12400 - }, - { - "epoch": 0.6111357040971915, - "grad_norm": 0.06578709185123444, - "learning_rate": 1.3158145119358902e-05, - "loss": 0.1078, - "step": 12425 - }, - { - "epoch": 0.61236535340121, - "grad_norm": 1.4873911142349243, - "learning_rate": 1.3085596665653775e-05, - "loss": 0.0464, - "step": 12450 - }, - { - "epoch": 0.6135950027052285, - "grad_norm": 0.10680821537971497, - "learning_rate": 1.3013151396920808e-05, - "loss": 0.1102, - "step": 12475 - }, - { - "epoch": 0.614824652009247, - "grad_norm": 0.14984863996505737, - "learning_rate": 1.2940810394274696e-05, - "loss": 0.0557, - "step": 12500 - }, - { - "epoch": 0.6160543013132654, - "grad_norm": 8.786226272583008, - "learning_rate": 1.2868574737274155e-05, - "loss": 0.0578, - "step": 12525 - }, - { - "epoch": 0.6172839506172839, - "grad_norm": 13.04101276397705, - "learning_rate": 1.2796445503905797e-05, - "loss": 0.1212, - "step": 12550 - }, - { - "epoch": 0.6185135999213024, - "grad_norm": 0.24714748561382294, - "learning_rate": 1.2724423770568057e-05, - "loss": 0.0698, - "step": 12575 - }, - { - "epoch": 0.619743249225321, - "grad_norm": 0.009283575229346752, - "learning_rate": 1.2652510612055125e-05, - "loss": 0.0966, - "step": 12600 - }, - { - "epoch": 0.6209728985293395, - "grad_norm": 31.085586547851562, - "learning_rate": 1.2580707101540921e-05, - "loss": 0.0755, - "step": 12625 - }, - { - "epoch": 0.6222025478333579, - "grad_norm": 0.03978521376848221, - "learning_rate": 1.2509014310563053e-05, - "loss": 0.0437, - "step": 12650 - }, - { - "epoch": 0.6234321971373764, - "grad_norm": 0.046137500554323196, - "learning_rate": 1.2437433309006839e-05, - "loss": 0.0539, - "step": 12675 - }, - { - "epoch": 0.6246618464413949, - "grad_norm": 0.06031510606408119, - "learning_rate": 1.2365965165089348e-05, - "loss": 0.1089, - "step": 12700 - }, - { - "epoch": 0.6258914957454134, - "grad_norm": 0.22030946612358093, - "learning_rate": 1.2294610945343455e-05, - "loss": 0.0761, - "step": 12725 - }, - { - "epoch": 0.627121145049432, - "grad_norm": 22.44876480102539, - "learning_rate": 1.222337171460191e-05, - "loss": 0.1533, - "step": 12750 - }, - { - "epoch": 0.6283507943534504, - "grad_norm": 0.1365877091884613, - "learning_rate": 1.215224853598148e-05, - "loss": 0.1158, - "step": 12775 - }, - { - "epoch": 0.6295804436574689, - "grad_norm": 0.12208946794271469, - "learning_rate": 1.2081242470867047e-05, - "loss": 0.0587, - "step": 12800 - }, - { - "epoch": 0.6308100929614874, - "grad_norm": 0.03114691562950611, - "learning_rate": 1.201035457889579e-05, - "loss": 0.0592, - "step": 12825 - }, - { - "epoch": 0.6320397422655059, - "grad_norm": 33.92405319213867, - "learning_rate": 1.1939585917941373e-05, - "loss": 0.078, - "step": 12850 - }, - { - "epoch": 0.6332693915695243, - "grad_norm": 0.0705670565366745, - "learning_rate": 1.1868937544098143e-05, - "loss": 0.171, - "step": 12875 - }, - { - "epoch": 0.6344990408735429, - "grad_norm": 0.10429049283266068, - "learning_rate": 1.179841051166538e-05, - "loss": 0.0505, - "step": 12900 - }, - { - "epoch": 0.6357286901775614, - "grad_norm": 2.1871042251586914, - "learning_rate": 1.1728005873131583e-05, - "loss": 0.1165, - "step": 12925 - }, - { - "epoch": 0.6369583394815799, - "grad_norm": 62.67900466918945, - "learning_rate": 1.1657724679158712e-05, - "loss": 0.1183, - "step": 12950 - }, - { - "epoch": 0.6381879887855983, - "grad_norm": 0.004702188540250063, - "learning_rate": 1.158756797856657e-05, - "loss": 0.0911, - "step": 12975 - }, - { - "epoch": 0.6394176380896168, - "grad_norm": 0.15667882561683655, - "learning_rate": 1.1517536818317098e-05, - "loss": 0.0452, - "step": 13000 - }, - { - "epoch": 0.6406472873936353, - "grad_norm": 0.0774935781955719, - "learning_rate": 1.1447632243498785e-05, - "loss": 0.0648, - "step": 13025 - }, - { - "epoch": 0.6418769366976538, - "grad_norm": 15.364973068237305, - "learning_rate": 1.137785529731106e-05, - "loss": 0.0807, - "step": 13050 - }, - { - "epoch": 0.6431065860016724, - "grad_norm": 30.89567756652832, - "learning_rate": 1.1308207021048725e-05, - "loss": 0.1066, - "step": 13075 - }, - { - "epoch": 0.6443362353056908, - "grad_norm": 0.014912708662450314, - "learning_rate": 1.1238688454086413e-05, - "loss": 0.0452, - "step": 13100 - }, - { - "epoch": 0.6455658846097093, - "grad_norm": 0.02897617407143116, - "learning_rate": 1.1169300633863088e-05, - "loss": 0.1486, - "step": 13125 - }, - { - "epoch": 0.6467955339137278, - "grad_norm": 3.503535032272339, - "learning_rate": 1.110004459586654e-05, - "loss": 0.0809, - "step": 13150 - }, - { - "epoch": 0.6480251832177463, - "grad_norm": 11.746075630187988, - "learning_rate": 1.1030921373617973e-05, - "loss": 0.0554, - "step": 13175 - }, - { - "epoch": 0.6492548325217647, - "grad_norm": 0.03387843817472458, - "learning_rate": 1.096193199865653e-05, - "loss": 0.0456, - "step": 13200 - }, - { - "epoch": 0.6504844818257833, - "grad_norm": 0.04217962920665741, - "learning_rate": 1.0893077500523928e-05, - "loss": 0.0566, - "step": 13225 - }, - { - "epoch": 0.6517141311298018, - "grad_norm": 0.06980982422828674, - "learning_rate": 1.082435890674912e-05, - "loss": 0.1212, - "step": 13250 - }, - { - "epoch": 0.6529437804338203, - "grad_norm": 86.88484954833984, - "learning_rate": 1.0755777242832896e-05, - "loss": 0.0372, - "step": 13275 - }, - { - "epoch": 0.6541734297378388, - "grad_norm": 54.30220031738281, - "learning_rate": 1.0687333532232638e-05, - "loss": 0.163, - "step": 13300 - }, - { - "epoch": 0.6554030790418572, - "grad_norm": 0.058754608035087585, - "learning_rate": 1.0619028796347013e-05, - "loss": 0.1059, - "step": 13325 - }, - { - "epoch": 0.6566327283458757, - "grad_norm": 0.12086337059736252, - "learning_rate": 1.0550864054500745e-05, - "loss": 0.0496, - "step": 13350 - }, - { - "epoch": 0.6578623776498943, - "grad_norm": 0.49153295159339905, - "learning_rate": 1.0482840323929392e-05, - "loss": 0.0075, - "step": 13375 - }, - { - "epoch": 0.6590920269539128, - "grad_norm": 0.0396379753947258, - "learning_rate": 1.0414958619764197e-05, - "loss": 0.0482, - "step": 13400 - }, - { - "epoch": 0.6603216762579313, - "grad_norm": 0.007519065402448177, - "learning_rate": 1.0347219955016894e-05, - "loss": 0.077, - "step": 13425 - }, - { - "epoch": 0.6615513255619497, - "grad_norm": 17.019479751586914, - "learning_rate": 1.0279625340564618e-05, - "loss": 0.1133, - "step": 13450 - }, - { - "epoch": 0.6627809748659682, - "grad_norm": 0.046384915709495544, - "learning_rate": 1.0212175785134817e-05, - "loss": 0.0502, - "step": 13475 - }, - { - "epoch": 0.6640106241699867, - "grad_norm": 0.017213325947523117, - "learning_rate": 1.0144872295290185e-05, - "loss": 0.1482, - "step": 13500 - }, - { - "epoch": 0.6652402734740052, - "grad_norm": 0.2897595763206482, - "learning_rate": 1.0077715875413665e-05, - "loss": 0.063, - "step": 13525 - }, - { - "epoch": 0.6664699227780237, - "grad_norm": 0.17567145824432373, - "learning_rate": 1.001070752769343e-05, - "loss": 0.1084, - "step": 13550 - }, - { - "epoch": 0.6676995720820422, - "grad_norm": 0.11109635978937149, - "learning_rate": 9.943848252107974e-06, - "loss": 0.0469, - "step": 13575 - }, - { - "epoch": 0.6689292213860607, - "grad_norm": 17.236717224121094, - "learning_rate": 9.877139046411121e-06, - "loss": 0.1327, - "step": 13600 - }, - { - "epoch": 0.6701588706900792, - "grad_norm": 98.89466094970703, - "learning_rate": 9.8105809061172e-06, - "loss": 0.0922, - "step": 13625 - }, - { - "epoch": 0.6713885199940977, - "grad_norm": 0.36004456877708435, - "learning_rate": 9.74417482448615e-06, - "loss": 0.1337, - "step": 13650 - }, - { - "epoch": 0.6726181692981161, - "grad_norm": 62.698028564453125, - "learning_rate": 9.677921792508711e-06, - "loss": 0.0827, - "step": 13675 - }, - { - "epoch": 0.6738478186021347, - "grad_norm": 6.161386489868164, - "learning_rate": 9.611822798891628e-06, - "loss": 0.0377, - "step": 13700 - }, - { - "epoch": 0.6750774679061532, - "grad_norm": 0.12083186209201813, - "learning_rate": 9.545878830042923e-06, - "loss": 0.0515, - "step": 13725 - }, - { - "epoch": 0.6763071172101717, - "grad_norm": 0.2173549383878708, - "learning_rate": 9.480090870057134e-06, - "loss": 0.0548, - "step": 13750 - }, - { - "epoch": 0.6775367665141901, - "grad_norm": 59.789615631103516, - "learning_rate": 9.41445990070065e-06, - "loss": 0.0436, - "step": 13775 - }, - { - "epoch": 0.6787664158182086, - "grad_norm": 0.022865787148475647, - "learning_rate": 9.348986901397067e-06, - "loss": 0.0593, - "step": 13800 - }, - { - "epoch": 0.6799960651222271, - "grad_norm": 0.013637728057801723, - "learning_rate": 9.283672849212553e-06, - "loss": 0.1176, - "step": 13825 - }, - { - "epoch": 0.6812257144262457, - "grad_norm": 8.995498657226562, - "learning_rate": 9.21851871884129e-06, - "loss": 0.0523, - "step": 13850 - }, - { - "epoch": 0.6824553637302642, - "grad_norm": 0.4615311622619629, - "learning_rate": 9.153525482590904e-06, - "loss": 0.0688, - "step": 13875 - }, - { - "epoch": 0.6836850130342826, - "grad_norm": 0.03148731589317322, - "learning_rate": 9.088694110367975e-06, - "loss": 0.0682, - "step": 13900 - }, - { - "epoch": 0.6849146623383011, - "grad_norm": 0.09678066521883011, - "learning_rate": 9.02402556966355e-06, - "loss": 0.021, - "step": 13925 - }, - { - "epoch": 0.6861443116423196, - "grad_norm": 0.672727644443512, - "learning_rate": 8.959520825538716e-06, - "loss": 0.0893, - "step": 13950 - }, - { - "epoch": 0.6873739609463381, - "grad_norm": 115.76903533935547, - "learning_rate": 8.895180840610189e-06, - "loss": 0.1137, - "step": 13975 - }, - { - "epoch": 0.6886036102503565, - "grad_norm": 0.17933940887451172, - "learning_rate": 8.831006575035947e-06, - "loss": 0.0416, - "step": 14000 - }, - { - "epoch": 0.6898332595543751, - "grad_norm": 0.011757748201489449, - "learning_rate": 8.766998986500905e-06, - "loss": 0.096, - "step": 14025 - }, - { - "epoch": 0.6910629088583936, - "grad_norm": 0.03945520892739296, - "learning_rate": 8.703159030202646e-06, - "loss": 0.1059, - "step": 14050 - }, - { - "epoch": 0.6922925581624121, - "grad_norm": 0.12770360708236694, - "learning_rate": 8.639487658837118e-06, - "loss": 0.0767, - "step": 14075 - }, - { - "epoch": 0.6935222074664306, - "grad_norm": 0.7925165891647339, - "learning_rate": 8.575985822584459e-06, - "loss": 0.034, - "step": 14100 - }, - { - "epoch": 0.694751856770449, - "grad_norm": 1.4627012014389038, - "learning_rate": 8.512654469094793e-06, - "loss": 0.0788, - "step": 14125 - }, - { - "epoch": 0.6959815060744675, - "grad_norm": 0.03877078369259834, - "learning_rate": 8.4494945434741e-06, - "loss": 0.0426, - "step": 14150 - }, - { - "epoch": 0.6972111553784861, - "grad_norm": 0.00882378313690424, - "learning_rate": 8.386506988270109e-06, - "loss": 0.0985, - "step": 14175 - }, - { - "epoch": 0.6984408046825046, - "grad_norm": 0.026557818055152893, - "learning_rate": 8.323692743458242e-06, - "loss": 0.1392, - "step": 14200 - }, - { - "epoch": 0.699670453986523, - "grad_norm": 0.01411840133368969, - "learning_rate": 8.261052746427564e-06, - "loss": 0.0197, - "step": 14225 - }, - { - "epoch": 0.7009001032905415, - "grad_norm": 0.9666563868522644, - "learning_rate": 8.198587931966809e-06, - "loss": 0.0227, - "step": 14250 - }, - { - "epoch": 0.70212975259456, - "grad_norm": 0.04267141595482826, - "learning_rate": 8.136299232250428e-06, - "loss": 0.0545, - "step": 14275 - }, - { - "epoch": 0.7033594018985785, - "grad_norm": 138.0958251953125, - "learning_rate": 8.074187576824681e-06, - "loss": 0.0601, - "step": 14300 - }, - { - "epoch": 0.7045890512025971, - "grad_norm": 21.075031280517578, - "learning_rate": 8.012253892593751e-06, - "loss": 0.0802, - "step": 14325 - }, - { - "epoch": 0.7058187005066155, - "grad_norm": 0.10542626678943634, - "learning_rate": 7.95049910380595e-06, - "loss": 0.1013, - "step": 14350 - }, - { - "epoch": 0.707048349810634, - "grad_norm": 0.03686801716685295, - "learning_rate": 7.888924132039867e-06, - "loss": 0.1479, - "step": 14375 - }, - { - "epoch": 0.7082779991146525, - "grad_norm": 0.02271469682455063, - "learning_rate": 7.827529896190669e-06, - "loss": 0.0918, - "step": 14400 - }, - { - "epoch": 0.709507648418671, - "grad_norm": 0.040192607790231705, - "learning_rate": 7.766317312456348e-06, - "loss": 0.0518, - "step": 14425 - }, - { - "epoch": 0.7107372977226895, - "grad_norm": 0.20595033466815948, - "learning_rate": 7.705287294324081e-06, - "loss": 0.0707, - "step": 14450 - }, - { - "epoch": 0.7119669470267079, - "grad_norm": 0.04882766306400299, - "learning_rate": 7.644440752556582e-06, - "loss": 0.0473, - "step": 14475 - }, - { - "epoch": 0.7131965963307265, - "grad_norm": 0.6573377847671509, - "learning_rate": 7.583778595178495e-06, - "loss": 0.0816, - "step": 14500 - }, - { - "epoch": 0.714426245634745, - "grad_norm": 24.78392219543457, - "learning_rate": 7.5233017274628904e-06, - "loss": 0.0685, - "step": 14525 - }, - { - "epoch": 0.7156558949387635, - "grad_norm": 0.03877711296081543, - "learning_rate": 7.463011051917702e-06, - "loss": 0.1021, - "step": 14550 - }, - { - "epoch": 0.716885544242782, - "grad_norm": 18.35906982421875, - "learning_rate": 7.402907468272287e-06, - "loss": 0.1305, - "step": 14575 - }, - { - "epoch": 0.7181151935468004, - "grad_norm": 0.07426095753908157, - "learning_rate": 7.342991873463998e-06, - "loss": 0.0385, - "step": 14600 - }, - { - "epoch": 0.7193448428508189, - "grad_norm": 0.30809980630874634, - "learning_rate": 7.283265161624789e-06, - "loss": 0.0514, - "step": 14625 - }, - { - "epoch": 0.7205744921548375, - "grad_norm": 44.22898483276367, - "learning_rate": 7.223728224067881e-06, - "loss": 0.0362, - "step": 14650 - }, - { - "epoch": 0.721804141458856, - "grad_norm": 0.02759651280939579, - "learning_rate": 7.164381949274457e-06, - "loss": 0.067, - "step": 14675 - }, - { - "epoch": 0.7230337907628744, - "grad_norm": 2.6302061080932617, - "learning_rate": 7.105227222880398e-06, - "loss": 0.112, - "step": 14700 - }, - { - "epoch": 0.7242634400668929, - "grad_norm": 0.15227456390857697, - "learning_rate": 7.046264927663078e-06, - "loss": 0.1222, - "step": 14725 - }, - { - "epoch": 0.7254930893709114, - "grad_norm": 66.19929504394531, - "learning_rate": 6.987495943528177e-06, - "loss": 0.0655, - "step": 14750 - }, - { - "epoch": 0.7267227386749299, - "grad_norm": 0.023083528503775597, - "learning_rate": 6.928921147496566e-06, - "loss": 0.0289, - "step": 14775 - }, - { - "epoch": 0.7279523879789485, - "grad_norm": 1.2439980506896973, - "learning_rate": 6.87054141369119e-06, - "loss": 0.0677, - "step": 14800 - }, - { - "epoch": 0.7291820372829669, - "grad_norm": 0.19184768199920654, - "learning_rate": 6.812357613324072e-06, - "loss": 0.03, - "step": 14825 - }, - { - "epoch": 0.7304116865869854, - "grad_norm": 0.004812607541680336, - "learning_rate": 6.754370614683261e-06, - "loss": 0.1329, - "step": 14850 - }, - { - "epoch": 0.7316413358910039, - "grad_norm": 0.025912120938301086, - "learning_rate": 6.696581283119903e-06, - "loss": 0.0772, - "step": 14875 - }, - { - "epoch": 0.7328709851950224, - "grad_norm": 0.050882481038570404, - "learning_rate": 6.6389904810353145e-06, - "loss": 0.0875, - "step": 14900 - }, - { - "epoch": 0.7341006344990408, - "grad_norm": 0.07869074493646622, - "learning_rate": 6.581599067868127e-06, - "loss": 0.082, - "step": 14925 - }, - { - "epoch": 0.7353302838030593, - "grad_norm": 1.4990394115447998, - "learning_rate": 6.5244079000814465e-06, - "loss": 0.1157, - "step": 14950 - }, - { - "epoch": 0.7365599331070779, - "grad_norm": 0.04584513232111931, - "learning_rate": 6.467417831150073e-06, - "loss": 0.1023, - "step": 14975 - }, - { - "epoch": 0.7377895824110964, - "grad_norm": 0.07187489420175552, - "learning_rate": 6.410629711547793e-06, - "loss": 0.0927, - "step": 15000 - }, - { - "epoch": 0.7390192317151149, - "grad_norm": 16.336345672607422, - "learning_rate": 6.354044388734641e-06, - "loss": 0.038, - "step": 15025 - }, - { - "epoch": 0.7402488810191333, - "grad_norm": 0.008160126395523548, - "learning_rate": 6.297662707144283e-06, - "loss": 0.0873, - "step": 15050 - }, - { - "epoch": 0.7414785303231518, - "grad_norm": 0.07538675516843796, - "learning_rate": 6.24148550817141e-06, - "loss": 0.0558, - "step": 15075 - }, - { - "epoch": 0.7427081796271703, - "grad_norm": 0.023661164566874504, - "learning_rate": 6.1855136301591744e-06, - "loss": 0.059, - "step": 15100 - }, - { - "epoch": 0.7439378289311889, - "grad_norm": 0.34304460883140564, - "learning_rate": 6.1297479083866855e-06, - "loss": 0.0542, - "step": 15125 - }, - { - "epoch": 0.7451674782352073, - "grad_norm": 13.270212173461914, - "learning_rate": 6.074189175056558e-06, - "loss": 0.0465, - "step": 15150 - }, - { - "epoch": 0.7463971275392258, - "grad_norm": 15.957275390625, - "learning_rate": 6.018838259282456e-06, - "loss": 0.1237, - "step": 15175 - }, - { - "epoch": 0.7476267768432443, - "grad_norm": 0.1509442925453186, - "learning_rate": 5.963695987076752e-06, - "loss": 0.1301, - "step": 15200 - }, - { - "epoch": 0.7488564261472628, - "grad_norm": 0.07606259733438492, - "learning_rate": 5.908763181338195e-06, - "loss": 0.1563, - "step": 15225 - }, - { - "epoch": 0.7500860754512813, - "grad_norm": 0.12574882805347443, - "learning_rate": 5.85404066183962e-06, - "loss": 0.0474, - "step": 15250 - }, - { - "epoch": 0.7513157247552998, - "grad_norm": 0.016331100836396217, - "learning_rate": 5.7995292452157116e-06, - "loss": 0.0619, - "step": 15275 - }, - { - "epoch": 0.7525453740593183, - "grad_norm": 0.33405211567878723, - "learning_rate": 5.745229744950851e-06, - "loss": 0.0767, - "step": 15300 - }, - { - "epoch": 0.7537750233633368, - "grad_norm": 0.056777551770210266, - "learning_rate": 5.691142971366925e-06, - "loss": 0.0443, - "step": 15325 - }, - { - "epoch": 0.7550046726673553, - "grad_norm": 23.7675724029541, - "learning_rate": 5.637269731611272e-06, - "loss": 0.0715, - "step": 15350 - }, - { - "epoch": 0.7562343219713737, - "grad_norm": 0.044475723057985306, - "learning_rate": 5.583610829644621e-06, - "loss": 0.0501, - "step": 15375 - }, - { - "epoch": 0.7574639712753922, - "grad_norm": 0.350460410118103, - "learning_rate": 5.5301670662290925e-06, - "loss": 0.0816, - "step": 15400 - }, - { - "epoch": 0.7586936205794107, - "grad_norm": 0.16186809539794922, - "learning_rate": 5.476939238916264e-06, - "loss": 0.1226, - "step": 15425 - }, - { - "epoch": 0.7599232698834293, - "grad_norm": 11.744179725646973, - "learning_rate": 5.423928142035244e-06, - "loss": 0.0423, - "step": 15450 - }, - { - "epoch": 0.7611529191874478, - "grad_norm": 0.052282609045505524, - "learning_rate": 5.37113456668084e-06, - "loss": 0.0936, - "step": 15475 - }, - { - "epoch": 0.7623825684914662, - "grad_norm": 0.45249849557876587, - "learning_rate": 5.31855930070174e-06, - "loss": 0.0727, - "step": 15500 - }, - { - "epoch": 0.7636122177954847, - "grad_norm": 0.5530847907066345, - "learning_rate": 5.266203128688762e-06, - "loss": 0.0931, - "step": 15525 - }, - { - "epoch": 0.7648418670995032, - "grad_norm": 16.120080947875977, - "learning_rate": 5.214066831963143e-06, - "loss": 0.0676, - "step": 15550 - }, - { - "epoch": 0.7660715164035217, - "grad_norm": 0.23792412877082825, - "learning_rate": 5.1621511885648726e-06, - "loss": 0.056, - "step": 15575 - }, - { - "epoch": 0.7673011657075403, - "grad_norm": 0.014436445198953152, - "learning_rate": 5.110456973241084e-06, - "loss": 0.0545, - "step": 15600 - }, - { - "epoch": 0.7685308150115587, - "grad_norm": 0.23496727645397186, - "learning_rate": 5.058984957434525e-06, - "loss": 0.0763, - "step": 15625 - }, - { - "epoch": 0.7697604643155772, - "grad_norm": 44.09788131713867, - "learning_rate": 5.007735909271987e-06, - "loss": 0.0728, - "step": 15650 - }, - { - "epoch": 0.7709901136195957, - "grad_norm": 0.07434046268463135, - "learning_rate": 4.9567105935528805e-06, - "loss": 0.0469, - "step": 15675 - }, - { - "epoch": 0.7722197629236142, - "grad_norm": 0.2620038390159607, - "learning_rate": 4.905909771737818e-06, - "loss": 0.087, - "step": 15700 - }, - { - "epoch": 0.7734494122276326, - "grad_norm": 1.8491833209991455, - "learning_rate": 4.855334201937243e-06, - "loss": 0.0982, - "step": 15725 - }, - { - "epoch": 0.7746790615316512, - "grad_norm": 0.08282598108053207, - "learning_rate": 4.804984638900117e-06, - "loss": 0.1264, - "step": 15750 - }, - { - "epoch": 0.7759087108356697, - "grad_norm": 1.0912010669708252, - "learning_rate": 4.754861834002671e-06, - "loss": 0.1256, - "step": 15775 - }, - { - "epoch": 0.7771383601396882, - "grad_norm": 0.18005187809467316, - "learning_rate": 4.70496653523717e-06, - "loss": 0.0679, - "step": 15800 - }, - { - "epoch": 0.7783680094437067, - "grad_norm": 0.2322026938199997, - "learning_rate": 4.655299487200762e-06, - "loss": 0.0359, - "step": 15825 - }, - { - "epoch": 0.7795976587477251, - "grad_norm": 49.91640853881836, - "learning_rate": 4.605861431084366e-06, - "loss": 0.0756, - "step": 15850 - }, - { - "epoch": 0.7808273080517436, - "grad_norm": 0.050678037106990814, - "learning_rate": 4.556653104661615e-06, - "loss": 0.0452, - "step": 15875 - }, - { - "epoch": 0.7820569573557621, - "grad_norm": 0.7173961400985718, - "learning_rate": 4.507675242277836e-06, - "loss": 0.0301, - "step": 15900 - }, - { - "epoch": 0.7832866066597807, - "grad_norm": 0.09891282767057419, - "learning_rate": 4.458928574839092e-06, - "loss": 0.0493, - "step": 15925 - }, - { - "epoch": 0.7845162559637991, - "grad_norm": 0.2606590986251831, - "learning_rate": 4.4104138298012985e-06, - "loss": 0.0517, - "step": 15950 - }, - { - "epoch": 0.7857459052678176, - "grad_norm": 0.00679002096876502, - "learning_rate": 4.362131731159331e-06, - "loss": 0.0244, - "step": 15975 - }, - { - "epoch": 0.7869755545718361, - "grad_norm": 0.3237299919128418, - "learning_rate": 4.314082999436242e-06, - "loss": 0.062, - "step": 16000 - }, - { - "epoch": 0.7882052038758546, - "grad_norm": 12.362506866455078, - "learning_rate": 4.266268351672507e-06, - "loss": 0.0721, - "step": 16025 - }, - { - "epoch": 0.789434853179873, - "grad_norm": 0.26261812448501587, - "learning_rate": 4.21868850141532e-06, - "loss": 0.0508, - "step": 16050 - }, - { - "epoch": 0.7906645024838916, - "grad_norm": 0.003954671788960695, - "learning_rate": 4.171344158707941e-06, - "loss": 0.0769, - "step": 16075 - }, - { - "epoch": 0.7918941517879101, - "grad_norm": 18.404504776000977, - "learning_rate": 4.124236030079123e-06, - "loss": 0.1024, - "step": 16100 - }, - { - "epoch": 0.7931238010919286, - "grad_norm": 0.19413790106773376, - "learning_rate": 4.07736481853253e-06, - "loss": 0.0472, - "step": 16125 - }, - { - "epoch": 0.7943534503959471, - "grad_norm": 0.09710480272769928, - "learning_rate": 4.030731223536279e-06, - "loss": 0.098, - "step": 16150 - }, - { - "epoch": 0.7955830996999655, - "grad_norm": 0.01857774890959263, - "learning_rate": 3.9843359410124894e-06, - "loss": 0.1158, - "step": 16175 - }, - { - "epoch": 0.796812749003984, - "grad_norm": 0.03446051850914955, - "learning_rate": 3.938179663326886e-06, - "loss": 0.0424, - "step": 16200 - }, - { - "epoch": 0.7980423983080026, - "grad_norm": 0.2981380224227905, - "learning_rate": 3.8922630792784955e-06, - "loss": 0.1259, - "step": 16225 - }, - { - "epoch": 0.7992720476120211, - "grad_norm": 0.39373067021369934, - "learning_rate": 3.846586874089338e-06, - "loss": 0.0539, - "step": 16250 - }, - { - "epoch": 0.8005016969160396, - "grad_norm": 0.06336525827646255, - "learning_rate": 3.801151729394217e-06, - "loss": 0.0544, - "step": 16275 - }, - { - "epoch": 0.801731346220058, - "grad_norm": 6.50235652923584, - "learning_rate": 3.7559583232305465e-06, - "loss": 0.0787, - "step": 16300 - }, - { - "epoch": 0.8029609955240765, - "grad_norm": 66.61959075927734, - "learning_rate": 3.7110073300282247e-06, - "loss": 0.1175, - "step": 16325 - }, - { - "epoch": 0.804190644828095, - "grad_norm": 20.794898986816406, - "learning_rate": 3.6662994205995794e-06, - "loss": 0.0736, - "step": 16350 - }, - { - "epoch": 0.8054202941321135, - "grad_norm": 0.12973858416080475, - "learning_rate": 3.6218352621293496e-06, - "loss": 0.0491, - "step": 16375 - }, - { - "epoch": 0.806649943436132, - "grad_norm": 0.07653709501028061, - "learning_rate": 3.5776155181647297e-06, - "loss": 0.0539, - "step": 16400 - }, - { - "epoch": 0.8078795927401505, - "grad_norm": 0.4673885405063629, - "learning_rate": 3.5336408486054798e-06, - "loss": 0.1165, - "step": 16425 - }, - { - "epoch": 0.809109242044169, - "grad_norm": 0.06317152082920074, - "learning_rate": 3.4899119096940547e-06, - "loss": 0.0288, - "step": 16450 - }, - { - "epoch": 0.8103388913481875, - "grad_norm": 73.71272277832031, - "learning_rate": 3.4464293540058315e-06, - "loss": 0.0758, - "step": 16475 - }, - { - "epoch": 0.811568540652206, - "grad_norm": 0.01633506454527378, - "learning_rate": 3.4031938304393574e-06, - "loss": 0.0356, - "step": 16500 - }, - { - "epoch": 0.8127981899562244, - "grad_norm": 0.13523496687412262, - "learning_rate": 3.360205984206677e-06, - "loss": 0.0644, - "step": 16525 - }, - { - "epoch": 0.814027839260243, - "grad_norm": 0.40784603357315063, - "learning_rate": 3.3174664568236903e-06, - "loss": 0.0437, - "step": 16550 - }, - { - "epoch": 0.8152574885642615, - "grad_norm": 0.1997995525598526, - "learning_rate": 3.274975886100602e-06, - "loss": 0.0281, - "step": 16575 - }, - { - "epoch": 0.81648713786828, - "grad_norm": 0.014406194910407066, - "learning_rate": 3.2327349061323732e-06, - "loss": 0.0212, - "step": 16600 - }, - { - "epoch": 0.8177167871722985, - "grad_norm": 1.199031114578247, - "learning_rate": 3.190744147289284e-06, - "loss": 0.0887, - "step": 16625 - }, - { - "epoch": 0.8189464364763169, - "grad_norm": 0.0992857813835144, - "learning_rate": 3.149004236207511e-06, - "loss": 0.024, - "step": 16650 - }, - { - "epoch": 0.8201760857803354, - "grad_norm": 0.9201266169548035, - "learning_rate": 3.107515795779783e-06, - "loss": 0.0358, - "step": 16675 - }, - { - "epoch": 0.821405735084354, - "grad_norm": 83.2445068359375, - "learning_rate": 3.066279445146081e-06, - "loss": 0.1423, - "step": 16700 - }, - { - "epoch": 0.8226353843883725, - "grad_norm": 0.0020692290272563696, - "learning_rate": 3.0252957996844134e-06, - "loss": 0.0372, - "step": 16725 - }, - { - "epoch": 0.823865033692391, - "grad_norm": 0.08006805181503296, - "learning_rate": 2.984565471001606e-06, - "loss": 0.034, - "step": 16750 - }, - { - "epoch": 0.8250946829964094, - "grad_norm": 0.039577361196279526, - "learning_rate": 2.9440890669241962e-06, - "loss": 0.1875, - "step": 16775 - }, - { - "epoch": 0.8263243323004279, - "grad_norm": 0.150069460272789, - "learning_rate": 2.9038671914893533e-06, - "loss": 0.0809, - "step": 16800 - }, - { - "epoch": 0.8275539816044464, - "grad_norm": 0.010478384792804718, - "learning_rate": 2.863900444935872e-06, - "loss": 0.0173, - "step": 16825 - }, - { - "epoch": 0.8287836309084649, - "grad_norm": 0.018838506191968918, - "learning_rate": 2.8241894236952026e-06, - "loss": 0.1011, - "step": 16850 - }, - { - "epoch": 0.8300132802124834, - "grad_norm": 0.10605171322822571, - "learning_rate": 2.784734720382556e-06, - "loss": 0.0691, - "step": 16875 - }, - { - "epoch": 0.8312429295165019, - "grad_norm": 0.03260951489210129, - "learning_rate": 2.745536923788077e-06, - "loss": 0.0471, - "step": 16900 - }, - { - "epoch": 0.8324725788205204, - "grad_norm": 0.09011470526456833, - "learning_rate": 2.706596618868025e-06, - "loss": 0.0634, - "step": 16925 - }, - { - "epoch": 0.8337022281245389, - "grad_norm": 0.05673046410083771, - "learning_rate": 2.667914386736066e-06, - "loss": 0.088, - "step": 16950 - }, - { - "epoch": 0.8349318774285573, - "grad_norm": 0.24913927912712097, - "learning_rate": 2.6294908046546e-06, - "loss": 0.0551, - "step": 16975 - }, - { - "epoch": 0.8361615267325758, - "grad_norm": 1.0816290378570557, - "learning_rate": 2.5913264460261366e-06, - "loss": 0.0889, - "step": 17000 - }, - { - "epoch": 0.8373911760365944, - "grad_norm": 0.0809168592095375, - "learning_rate": 2.55342188038475e-06, - "loss": 0.082, - "step": 17025 - }, - { - "epoch": 0.8386208253406129, - "grad_norm": 0.23030467331409454, - "learning_rate": 2.5157776733875717e-06, - "loss": 0.0316, - "step": 17050 - }, - { - "epoch": 0.8398504746446314, - "grad_norm": 0.01393158733844757, - "learning_rate": 2.4783943868063487e-06, - "loss": 0.0992, - "step": 17075 - }, - { - "epoch": 0.8410801239486498, - "grad_norm": 9.089689254760742, - "learning_rate": 2.4412725785190627e-06, - "loss": 0.056, - "step": 17100 - }, - { - "epoch": 0.8423097732526683, - "grad_norm": 0.02887335792183876, - "learning_rate": 2.4044128025016077e-06, - "loss": 0.0582, - "step": 17125 - }, - { - "epoch": 0.8435394225566868, - "grad_norm": 0.14471715688705444, - "learning_rate": 2.367815608819517e-06, - "loss": 0.1064, - "step": 17150 - }, - { - "epoch": 0.8447690718607054, - "grad_norm": 0.2023010551929474, - "learning_rate": 2.3314815436197534e-06, - "loss": 0.0542, - "step": 17175 - }, - { - "epoch": 0.8459987211647239, - "grad_norm": 133.85064697265625, - "learning_rate": 2.2954111491225773e-06, - "loss": 0.0319, - "step": 17200 - }, - { - "epoch": 0.8472283704687423, - "grad_norm": 0.09802548587322235, - "learning_rate": 2.259604963613422e-06, - "loss": 0.1187, - "step": 17225 - }, - { - "epoch": 0.8484580197727608, - "grad_norm": 0.2763103246688843, - "learning_rate": 2.2240635214348914e-06, - "loss": 0.0523, - "step": 17250 - }, - { - "epoch": 0.8496876690767793, - "grad_norm": 0.35754579305648804, - "learning_rate": 2.1887873529787694e-06, - "loss": 0.0614, - "step": 17275 - }, - { - "epoch": 0.8509173183807978, - "grad_norm": 0.03275228291749954, - "learning_rate": 2.1537769846781066e-06, - "loss": 0.0377, - "step": 17300 - }, - { - "epoch": 0.8521469676848162, - "grad_norm": 0.015068607404828072, - "learning_rate": 2.1190329389993725e-06, - "loss": 0.0625, - "step": 17325 - }, - { - "epoch": 0.8533766169888348, - "grad_norm": 0.012918914668262005, - "learning_rate": 2.0845557344346457e-06, - "loss": 0.0518, - "step": 17350 - }, - { - "epoch": 0.8546062662928533, - "grad_norm": 15.892303466796875, - "learning_rate": 2.050345885493894e-06, - "loss": 0.0583, - "step": 17375 - }, - { - "epoch": 0.8558359155968718, - "grad_norm": 0.11859627813100815, - "learning_rate": 2.016403902697277e-06, - "loss": 0.029, - "step": 17400 - }, - { - "epoch": 0.8570655649008903, - "grad_norm": 0.007320679724216461, - "learning_rate": 1.982730292567536e-06, - "loss": 0.0734, - "step": 17425 - }, - { - "epoch": 0.8582952142049087, - "grad_norm": 0.04906320571899414, - "learning_rate": 1.9493255576224434e-06, - "loss": 0.0412, - "step": 17450 - }, - { - "epoch": 0.8595248635089272, - "grad_norm": 8.984256744384766, - "learning_rate": 1.9161901963672845e-06, - "loss": 0.0767, - "step": 17475 - }, - { - "epoch": 0.8607545128129458, - "grad_norm": 0.10479895025491714, - "learning_rate": 1.8833247032874347e-06, - "loss": 0.077, - "step": 17500 - }, - { - "epoch": 0.8619841621169643, - "grad_norm": 0.12516078352928162, - "learning_rate": 1.8507295688409788e-06, - "loss": 0.0119, - "step": 17525 - }, - { - "epoch": 0.8632138114209827, - "grad_norm": 0.04183598980307579, - "learning_rate": 1.8184052794513828e-06, - "loss": 0.0677, - "step": 17550 - }, - { - "epoch": 0.8644434607250012, - "grad_norm": 27.88933563232422, - "learning_rate": 1.7863523175002395e-06, - "loss": 0.1175, - "step": 17575 - }, - { - "epoch": 0.8656731100290197, - "grad_norm": 0.44858065247535706, - "learning_rate": 1.754571161320071e-06, - "loss": 0.0367, - "step": 17600 - }, - { - "epoch": 0.8669027593330382, - "grad_norm": 0.5012088418006897, - "learning_rate": 1.7230622851871915e-06, - "loss": 0.1229, - "step": 17625 - }, - { - "epoch": 0.8681324086370568, - "grad_norm": 0.5806565284729004, - "learning_rate": 1.6918261593146179e-06, - "loss": 0.0818, - "step": 17650 - }, - { - "epoch": 0.8693620579410752, - "grad_norm": 0.05701303109526634, - "learning_rate": 1.6608632498450816e-06, - "loss": 0.0692, - "step": 17675 - }, - { - "epoch": 0.8705917072450937, - "grad_norm": 0.025888176634907722, - "learning_rate": 1.6301740188440373e-06, - "loss": 0.099, - "step": 17700 - }, - { - "epoch": 0.8718213565491122, - "grad_norm": 0.02076895162463188, - "learning_rate": 1.5997589242927869e-06, - "loss": 0.0415, - "step": 17725 - }, - { - "epoch": 0.8730510058531307, - "grad_norm": 0.03364446014165878, - "learning_rate": 1.5696184200816422e-06, - "loss": 0.0653, - "step": 17750 - }, - { - "epoch": 0.8742806551571491, - "grad_norm": 0.011630027554929256, - "learning_rate": 1.539752956003151e-06, - "loss": 0.0595, - "step": 17775 - }, - { - "epoch": 0.8755103044611676, - "grad_norm": 39.39053726196289, - "learning_rate": 1.5101629777453841e-06, - "loss": 0.0366, - "step": 17800 - }, - { - "epoch": 0.8767399537651862, - "grad_norm": 0.029014553874731064, - "learning_rate": 1.480848926885281e-06, - "loss": 0.052, - "step": 17825 - }, - { - "epoch": 0.8779696030692047, - "grad_norm": 0.15381889045238495, - "learning_rate": 1.4518112408820706e-06, - "loss": 0.0647, - "step": 17850 - }, - { - "epoch": 0.8791992523732232, - "grad_norm": 0.039262525737285614, - "learning_rate": 1.4230503530707274e-06, - "loss": 0.0616, - "step": 17875 - }, - { - "epoch": 0.8804289016772416, - "grad_norm": 7.716977119445801, - "learning_rate": 1.3945666926555212e-06, - "loss": 0.1362, - "step": 17900 - }, - { - "epoch": 0.8816585509812601, - "grad_norm": 45.723228454589844, - "learning_rate": 1.3663606847035981e-06, - "loss": 0.0529, - "step": 17925 - }, - { - "epoch": 0.8828882002852786, - "grad_norm": 0.016346005722880363, - "learning_rate": 1.338432750138645e-06, - "loss": 0.0714, - "step": 17950 - }, - { - "epoch": 0.8841178495892972, - "grad_norm": 0.15536203980445862, - "learning_rate": 1.3107833057346064e-06, - "loss": 0.0624, - "step": 17975 - }, - { - "epoch": 0.8853474988933157, - "grad_norm": 0.4494189918041229, - "learning_rate": 1.2834127641094663e-06, - "loss": 0.0594, - "step": 18000 - }, - { - "epoch": 0.8865771481973341, - "grad_norm": 21.255159378051758, - "learning_rate": 1.2563215337190848e-06, - "loss": 0.0844, - "step": 18025 - }, - { - "epoch": 0.8878067975013526, - "grad_norm": 0.2974194586277008, - "learning_rate": 1.2295100188511099e-06, - "loss": 0.075, - "step": 18050 - }, - { - "epoch": 0.8890364468053711, - "grad_norm": 14.292924880981445, - "learning_rate": 1.2029786196189418e-06, - "loss": 0.1068, - "step": 18075 - }, - { - "epoch": 0.8902660961093896, - "grad_norm": 13.70726203918457, - "learning_rate": 1.1767277319557534e-06, - "loss": 0.0155, - "step": 18100 - }, - { - "epoch": 0.8914957454134081, - "grad_norm": 0.045881837606430054, - "learning_rate": 1.1507577476085995e-06, - "loss": 0.1291, - "step": 18125 - }, - { - "epoch": 0.8927253947174266, - "grad_norm": 0.10129638761281967, - "learning_rate": 1.125069054132557e-06, - "loss": 0.0932, - "step": 18150 - }, - { - "epoch": 0.8939550440214451, - "grad_norm": 0.06716735661029816, - "learning_rate": 1.099662034884943e-06, - "loss": 0.101, - "step": 18175 - }, - { - "epoch": 0.8951846933254636, - "grad_norm": 0.23828832805156708, - "learning_rate": 1.0745370690195988e-06, - "loss": 0.0601, - "step": 18200 - }, - { - "epoch": 0.896414342629482, - "grad_norm": 0.1076824814081192, - "learning_rate": 1.0496945314812247e-06, - "loss": 0.0803, - "step": 18225 - }, - { - "epoch": 0.8976439919335005, - "grad_norm": 0.004836052190512419, - "learning_rate": 1.0251347929997934e-06, - "loss": 0.0462, - "step": 18250 - }, - { - "epoch": 0.898873641237519, - "grad_norm": 0.007113552186638117, - "learning_rate": 1.000858220085008e-06, - "loss": 0.0424, - "step": 18275 - }, - { - "epoch": 0.9001032905415376, - "grad_norm": 54.878257751464844, - "learning_rate": 9.768651750208403e-07, - "loss": 0.0422, - "step": 18300 - }, - { - "epoch": 0.9013329398455561, - "grad_norm": 0.028295330703258514, - "learning_rate": 9.531560158601261e-07, - "loss": 0.0866, - "step": 18325 - }, - { - "epoch": 0.9025625891495745, - "grad_norm": 0.04308134689927101, - "learning_rate": 9.297310964192064e-07, - "loss": 0.1016, - "step": 18350 - }, - { - "epoch": 0.903792238453593, - "grad_norm": 0.2102079838514328, - "learning_rate": 9.06590766272668e-07, - "loss": 0.0948, - "step": 18375 - }, - { - "epoch": 0.9050218877576115, - "grad_norm": 0.021506965160369873, - "learning_rate": 8.837353707481089e-07, - "loss": 0.071, - "step": 18400 - }, - { - "epoch": 0.90625153706163, - "grad_norm": 0.07498989999294281, - "learning_rate": 8.611652509210011e-07, - "loss": 0.0352, - "step": 18425 - }, - { - "epoch": 0.9074811863656486, - "grad_norm": 0.009460424073040485, - "learning_rate": 8.388807436095847e-07, - "loss": 0.0732, - "step": 18450 - }, - { - "epoch": 0.908710835669667, - "grad_norm": 0.03926212713122368, - "learning_rate": 8.168821813698513e-07, - "loss": 0.0935, - "step": 18475 - }, - { - "epoch": 0.9099404849736855, - "grad_norm": 0.07123960554599762, - "learning_rate": 7.951698924905838e-07, - "loss": 0.0348, - "step": 18500 - }, - { - "epoch": 0.911170134277704, - "grad_norm": 0.036921992897987366, - "learning_rate": 7.737442009884511e-07, - "loss": 0.0372, - "step": 18525 - }, - { - "epoch": 0.9123997835817225, - "grad_norm": 12.337058067321777, - "learning_rate": 7.526054266031701e-07, - "loss": 0.0513, - "step": 18550 - }, - { - "epoch": 0.9136294328857409, - "grad_norm": 146.36697387695312, - "learning_rate": 7.317538847927386e-07, - "loss": 0.0754, - "step": 18575 - }, - { - "epoch": 0.9148590821897595, - "grad_norm": 0.10903728753328323, - "learning_rate": 7.111898867287315e-07, - "loss": 0.0318, - "step": 18600 - }, - { - "epoch": 0.916088731493778, - "grad_norm": 0.12928414344787598, - "learning_rate": 6.909137392916521e-07, - "loss": 0.072, - "step": 18625 - }, - { - "epoch": 0.9173183807977965, - "grad_norm": 0.009232209995388985, - "learning_rate": 6.709257450663509e-07, - "loss": 0.0667, - "step": 18650 - }, - { - "epoch": 0.918548030101815, - "grad_norm": 0.03198140859603882, - "learning_rate": 6.51226202337516e-07, - "loss": 0.0666, - "step": 18675 - }, - { - "epoch": 0.9197776794058334, - "grad_norm": 0.26223111152648926, - "learning_rate": 6.318154050852122e-07, - "loss": 0.0941, - "step": 18700 - }, - { - "epoch": 0.9210073287098519, - "grad_norm": 0.09098884463310242, - "learning_rate": 6.126936429805064e-07, - "loss": 0.0782, - "step": 18725 - }, - { - "epoch": 0.9222369780138704, - "grad_norm": 0.23236669600009918, - "learning_rate": 5.938612013811363e-07, - "loss": 0.0711, - "step": 18750 - }, - { - "epoch": 0.923466627317889, - "grad_norm": 0.1742514967918396, - "learning_rate": 5.753183613272506e-07, - "loss": 0.0504, - "step": 18775 - }, - { - "epoch": 0.9246962766219075, - "grad_norm": 0.34252628684043884, - "learning_rate": 5.570653995372266e-07, - "loss": 0.0762, - "step": 18800 - }, - { - "epoch": 0.9259259259259259, - "grad_norm": 0.09779156744480133, - "learning_rate": 5.391025884035239e-07, - "loss": 0.0625, - "step": 18825 - }, - { - "epoch": 0.9271555752299444, - "grad_norm": 0.1622578650712967, - "learning_rate": 5.214301959886326e-07, - "loss": 0.0024, - "step": 18850 - }, - { - "epoch": 0.9283852245339629, - "grad_norm": 18.103897094726562, - "learning_rate": 5.040484860210671e-07, - "loss": 0.0607, - "step": 18875 - }, - { - "epoch": 0.9296148738379814, - "grad_norm": 0.04039117693901062, - "learning_rate": 4.869577178914297e-07, - "loss": 0.1046, - "step": 18900 - }, - { - "epoch": 0.930844523142, - "grad_norm": 0.026142995804548264, - "learning_rate": 4.7015814664854276e-07, - "loss": 0.0343, - "step": 18925 - }, - { - "epoch": 0.9320741724460184, - "grad_norm": 0.012725235894322395, - "learning_rate": 4.5365002299564865e-07, - "loss": 0.1029, - "step": 18950 - }, - { - "epoch": 0.9333038217500369, - "grad_norm": 0.5410313606262207, - "learning_rate": 4.3743359328664916e-07, - "loss": 0.064, - "step": 18975 - }, - { - "epoch": 0.9345334710540554, - "grad_norm": 0.2672090232372284, - "learning_rate": 4.215090995224502e-07, - "loss": 0.0201, - "step": 19000 - }, - { - "epoch": 0.9357631203580739, - "grad_norm": 0.06157681345939636, - "learning_rate": 4.058767793473362e-07, - "loss": 0.0923, - "step": 19025 - }, - { - "epoch": 0.9369927696620923, - "grad_norm": 16.66170310974121, - "learning_rate": 3.9053686604543004e-07, - "loss": 0.0808, - "step": 19050 - }, - { - "epoch": 0.9382224189661109, - "grad_norm": 0.07082456350326538, - "learning_rate": 3.754895885372101e-07, - "loss": 0.1029, - "step": 19075 - }, - { - "epoch": 0.9394520682701294, - "grad_norm": 0.13143180310726166, - "learning_rate": 3.6073517137610094e-07, - "loss": 0.0326, - "step": 19100 - }, - { - "epoch": 0.9406817175741479, - "grad_norm": 0.02031579799950123, - "learning_rate": 3.462738347451078e-07, - "loss": 0.1138, - "step": 19125 - }, - { - "epoch": 0.9419113668781663, - "grad_norm": 4.525613307952881, - "learning_rate": 3.3210579445354106e-07, - "loss": 0.0463, - "step": 19150 - }, - { - "epoch": 0.9431410161821848, - "grad_norm": 22.079936981201172, - "learning_rate": 3.182312619337924e-07, - "loss": 0.0766, - "step": 19175 - }, - { - "epoch": 0.9443706654862033, - "grad_norm": 18.63210678100586, - "learning_rate": 3.046504442381837e-07, - "loss": 0.0729, - "step": 19200 - }, - { - "epoch": 0.9456003147902218, - "grad_norm": 0.03249223902821541, - "learning_rate": 2.9136354403586976e-07, - "loss": 0.0407, - "step": 19225 - }, - { - "epoch": 0.9468299640942404, - "grad_norm": 0.14357614517211914, - "learning_rate": 2.7837075960982505e-07, - "loss": 0.0623, - "step": 19250 - }, - { - "epoch": 0.9480596133982588, - "grad_norm": 0.11966054886579514, - "learning_rate": 2.6567228485386845e-07, - "loss": 0.0915, - "step": 19275 - }, - { - "epoch": 0.9492892627022773, - "grad_norm": 0.1509937196969986, - "learning_rate": 2.5326830926978296e-07, - "loss": 0.0694, - "step": 19300 - }, - { - "epoch": 0.9505189120062958, - "grad_norm": 0.04472092539072037, - "learning_rate": 2.411590179644896e-07, - "loss": 0.0692, - "step": 19325 - }, - { - "epoch": 0.9517485613103143, - "grad_norm": 24.242467880249023, - "learning_rate": 2.293445916472692e-07, - "loss": 0.124, - "step": 19350 - }, - { - "epoch": 0.9529782106143327, - "grad_norm": 0.023187115788459778, - "learning_rate": 2.178252066270825e-07, - "loss": 0.0844, - "step": 19375 - }, - { - "epoch": 0.9542078599183513, - "grad_norm": 0.03516969457268715, - "learning_rate": 2.066010348099301e-07, - "loss": 0.0289, - "step": 19400 - }, - { - "epoch": 0.9554375092223698, - "grad_norm": 0.007452072575688362, - "learning_rate": 1.9567224369628767e-07, - "loss": 0.0589, - "step": 19425 - }, - { - "epoch": 0.9566671585263883, - "grad_norm": 0.025689415633678436, - "learning_rate": 1.8503899637860812e-07, - "loss": 0.0229, - "step": 19450 - }, - { - "epoch": 0.9578968078304068, - "grad_norm": 0.004985098261386156, - "learning_rate": 1.7470145153889228e-07, - "loss": 0.0607, - "step": 19475 - }, - { - "epoch": 0.9591264571344252, - "grad_norm": 1.15916109085083, - "learning_rate": 1.6465976344630873e-07, - "loss": 0.0447, - "step": 19500 - }, - { - "epoch": 0.9603561064384437, - "grad_norm": 0.008567633107304573, - "learning_rate": 1.549140819549022e-07, - "loss": 0.0511, - "step": 19525 - }, - { - "epoch": 0.9615857557424623, - "grad_norm": 0.12471627444028854, - "learning_rate": 1.4546455250135317e-07, - "loss": 0.0464, - "step": 19550 - }, - { - "epoch": 0.9628154050464808, - "grad_norm": 0.02060401625931263, - "learning_rate": 1.363113161028129e-07, - "loss": 0.0805, - "step": 19575 - }, - { - "epoch": 0.9640450543504993, - "grad_norm": 11.324369430541992, - "learning_rate": 1.2745450935478742e-07, - "loss": 0.0289, - "step": 19600 - }, - { - "epoch": 0.9652747036545177, - "grad_norm": 27.575231552124023, - "learning_rate": 1.1889426442911022e-07, - "loss": 0.137, - "step": 19625 - }, - { - "epoch": 0.9665043529585362, - "grad_norm": 0.24694132804870605, - "learning_rate": 1.1063070907196382e-07, - "loss": 0.0942, - "step": 19650 - }, - { - "epoch": 0.9677340022625547, - "grad_norm": 10.80643367767334, - "learning_rate": 1.0266396660197464e-07, - "loss": 0.0548, - "step": 19675 - }, - { - "epoch": 0.9689636515665732, - "grad_norm": 0.17567524313926697, - "learning_rate": 9.499415590837668e-08, - "loss": 0.0656, - "step": 19700 - }, - { - "epoch": 0.9701933008705917, - "grad_norm": 0.2594759464263916, - "learning_rate": 8.762139144922855e-08, - "loss": 0.0894, - "step": 19725 - }, - { - "epoch": 0.9714229501746102, - "grad_norm": 0.010923475958406925, - "learning_rate": 8.054578324971695e-08, - "loss": 0.0613, - "step": 19750 - }, - { - "epoch": 0.9726525994786287, - "grad_norm": 0.2180577963590622, - "learning_rate": 7.376743690050259e-08, - "loss": 0.0547, - "step": 19775 - }, - { - "epoch": 0.9738822487826472, - "grad_norm": 0.11950449645519257, - "learning_rate": 6.728645355615682e-08, - "loss": 0.1043, - "step": 19800 - }, - { - "epoch": 0.9751118980866657, - "grad_norm": 0.019944198429584503, - "learning_rate": 6.110292993363632e-08, - "loss": 0.1229, - "step": 19825 - }, - { - "epoch": 0.9763415473906841, - "grad_norm": 0.7072005867958069, - "learning_rate": 5.521695831085971e-08, - "loss": 0.0533, - "step": 19850 - }, - { - "epoch": 0.9775711966947027, - "grad_norm": 14.585501670837402, - "learning_rate": 4.9628626525310975e-08, - "loss": 0.1054, - "step": 19875 - }, - { - "epoch": 0.9788008459987212, - "grad_norm": 16.45695686340332, - "learning_rate": 4.433801797274484e-08, - "loss": 0.0736, - "step": 19900 - }, - { - "epoch": 0.9800304953027397, - "grad_norm": 22.392364501953125, - "learning_rate": 3.9345211605927855e-08, - "loss": 0.1131, - "step": 19925 - }, - { - "epoch": 0.9812601446067581, - "grad_norm": 0.14765414595603943, - "learning_rate": 3.465028193347264e-08, - "loss": 0.1124, - "step": 19950 - }, - { - "epoch": 0.9824897939107766, - "grad_norm": 0.13240164518356323, - "learning_rate": 3.0253299018718765e-08, - "loss": 0.0549, - "step": 19975 - }, - { - "epoch": 0.9837194432147951, - "grad_norm": 0.010844554752111435, - "learning_rate": 2.615432847868693e-08, - "loss": 0.1116, - "step": 20000 - }, - { - "epoch": 0.9849490925188137, - "grad_norm": 16.790220260620117, - "learning_rate": 2.2353431483106426e-08, - "loss": 0.1407, - "step": 20025 - }, - { - "epoch": 0.9861787418228322, - "grad_norm": 0.008404899388551712, - "learning_rate": 1.88506647534914e-08, - "loss": 0.0509, - "step": 20050 - }, - { - "epoch": 0.9874083911268506, - "grad_norm": 0.08918372541666031, - "learning_rate": 1.5646080562308208e-08, - "loss": 0.1087, - "step": 20075 - }, - { - "epoch": 0.9886380404308691, - "grad_norm": 0.24005398154258728, - "learning_rate": 1.2739726732180491e-08, - "loss": 0.0507, - "step": 20100 - }, - { - "epoch": 0.9898676897348876, - "grad_norm": 0.11946118623018265, - "learning_rate": 1.0131646635187508e-08, - "loss": 0.0413, - "step": 20125 - }, - { - "epoch": 0.9910973390389061, - "grad_norm": 0.22036096453666687, - "learning_rate": 7.821879192209113e-09, - "loss": 0.0358, - "step": 20150 - }, - { - "epoch": 0.9923269883429245, - "grad_norm": 0.0757390633225441, - "learning_rate": 5.810458872350655e-09, - "loss": 0.0701, - "step": 20175 - }, - { - "epoch": 0.9935566376469431, - "grad_norm": 0.0867391973733902, - "learning_rate": 4.097415692427831e-09, - "loss": 0.1079, - "step": 20200 - }, - { - "epoch": 0.9947862869509616, - "grad_norm": 26.076948165893555, - "learning_rate": 2.682775216507061e-09, - "loss": 0.116, - "step": 20225 - }, - { - "epoch": 0.9960159362549801, - "grad_norm": 0.12132783979177475, - "learning_rate": 1.5665585555479924e-09, - "loss": 0.0502, - "step": 20250 - }, - { - "epoch": 0.9972455855589986, - "grad_norm": 0.1664504110813141, - "learning_rate": 7.487823670615513e-10, - "loss": 0.1589, - "step": 20275 - }, - { - "epoch": 0.998475234863017, - "grad_norm": 195.63522338867188, - "learning_rate": 2.2945885487901663e-10, - "loss": 0.0417, - "step": 20300 - }, - { - "epoch": 0.9997048841670355, - "grad_norm": 0.04839450493454933, - "learning_rate": 8.595768969943407e-12, - "loss": 0.1429, - "step": 20325 - }, - { - "epoch": 1.0, - "eval_accuracy": 0.9800136843242465, - "eval_auc": 0.9928329489685142, - "eval_f1": 0.9865750707530055, - "eval_loss": 0.1046341210603714, - "eval_precision": 0.9823690929235512, - "eval_recall": 0.9908172189291614, - "eval_runtime": 4084.1231, - "eval_samples_per_second": 6.799, - "eval_steps_per_second": 0.284, - "step": 20331 - } - ], - "logging_steps": 25, - "max_steps": 20331, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 500, - "stateful_callbacks": { - "EarlyStoppingCallback": { - "args": { - "early_stopping_patience": 5, - "early_stopping_threshold": 0.01 - }, - "attributes": { - "early_stopping_patience_counter": 0 - } - }, - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 5.617957861409056e+19, - "train_batch_size": 12, - "trial_name": null, - "trial_params": null -}