diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,6618 +1,5884 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.7647858599592114, + "epoch": 0.6798096532970768, "eval_steps": 1000, - "global_step": 9000, + "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000849762066621346, - "grad_norm": 18.268434524536133, - "learning_rate": 6.796941376380629e-08, - "loss": 4.229, + "grad_norm": 14.73544692993164, + "learning_rate": 8.496176720475787e-08, + "loss": 3.8114, "step": 10 }, { "epoch": 0.001699524133242692, - "grad_norm": 17.71913719177246, - "learning_rate": 1.5293118096856415e-07, - "loss": 4.2873, + "grad_norm": 10.841628074645996, + "learning_rate": 1.6142735768903995e-07, + "loss": 3.8901, "step": 20 }, { "epoch": 0.002549286199864038, - "grad_norm": 14.518835067749023, - "learning_rate": 2.3789294817332202e-07, - "loss": 3.6209, + "grad_norm": 14.642532348632812, + "learning_rate": 2.463891248937978e-07, + "loss": 3.598, "step": 30 }, { "epoch": 0.003399048266485384, - "grad_norm": 26.50405502319336, - "learning_rate": 3.228547153780799e-07, - "loss": 4.2523, + "grad_norm": 9.909016609191895, + "learning_rate": 3.3135089209855564e-07, + "loss": 3.8369, "step": 40 }, { "epoch": 0.00424881033310673, - "grad_norm": 18.675613403320312, - "learning_rate": 4.0781648258283776e-07, - "loss": 3.6983, + "grad_norm": 13.172293663024902, + "learning_rate": 4.1631265930331356e-07, + "loss": 3.4766, "step": 50 }, { "epoch": 0.005098572399728076, - "grad_norm": 19.47433853149414, - "learning_rate": 4.927782497875956e-07, - "loss": 3.9072, + "grad_norm": 10.401021003723145, + "learning_rate": 5.012744265080714e-07, + "loss": 3.5983, "step": 60 }, { "epoch": 0.005948334466349422, - "grad_norm": 17.185827255249023, - "learning_rate": 5.777400169923535e-07, - "loss": 3.403, + "grad_norm": 14.548458099365234, + "learning_rate": 5.862361937128293e-07, + "loss": 3.3285, "step": 70 }, { "epoch": 0.006798096532970768, - "grad_norm": 15.945021629333496, - "learning_rate": 6.627017841971113e-07, - "loss": 3.0885, + "grad_norm": 13.83597183227539, + "learning_rate": 6.711979609175871e-07, + "loss": 3.1135, "step": 80 }, { "epoch": 0.007647858599592114, - "grad_norm": 31.154003143310547, - "learning_rate": 7.476635514018691e-07, - "loss": 3.3575, + "grad_norm": 12.430092811584473, + "learning_rate": 7.56159728122345e-07, + "loss": 2.9757, "step": 90 }, { "epoch": 0.00849762066621346, - "grad_norm": 15.41059398651123, - "learning_rate": 8.326253186066271e-07, - "loss": 3.9237, + "grad_norm": 13.983314514160156, + "learning_rate": 8.411214953271029e-07, + "loss": 3.3373, "step": 100 }, { "epoch": 0.009347382732834806, - "grad_norm": 14.243741035461426, - "learning_rate": 9.090909090909091e-07, - "loss": 3.6796, + "grad_norm": 10.040183067321777, + "learning_rate": 9.260832625318606e-07, + "loss": 3.1236, "step": 110 }, { "epoch": 0.010197144799456152, - "grad_norm": 15.607385635375977, - "learning_rate": 9.94052676295667e-07, - "loss": 3.182, + "grad_norm": 13.374701499938965, + "learning_rate": 1.0110450297366187e-06, + "loss": 2.7132, "step": 120 }, { "epoch": 0.011046906866077498, - "grad_norm": 18.54163932800293, - "learning_rate": 1.0790144435004249e-06, - "loss": 3.5442, + "grad_norm": 10.595660209655762, + "learning_rate": 1.0960067969413766e-06, + "loss": 2.8783, "step": 130 }, { "epoch": 0.011896668932698844, - "grad_norm": 20.704797744750977, - "learning_rate": 1.1639762107051828e-06, - "loss": 2.9727, + "grad_norm": 10.290271759033203, + "learning_rate": 1.1809685641461345e-06, + "loss": 2.3779, "step": 140 }, { "epoch": 0.01274643099932019, - "grad_norm": 10.565589904785156, - "learning_rate": 1.2489379779099406e-06, - "loss": 2.9999, + "grad_norm": 12.051546096801758, + "learning_rate": 1.2659303313508921e-06, + "loss": 2.6556, "step": 150 }, { "epoch": 0.013596193065941536, - "grad_norm": 14.996217727661133, - "learning_rate": 1.3338997451146985e-06, - "loss": 2.8556, + "grad_norm": 37.49174499511719, + "learning_rate": 1.35089209855565e-06, + "loss": 2.2028, "step": 160 }, { "epoch": 0.014445955132562882, - "grad_norm": 16.501012802124023, - "learning_rate": 1.4188615123194564e-06, - "loss": 2.7022, + "grad_norm": 9.25294303894043, + "learning_rate": 1.4358538657604079e-06, + "loss": 2.2236, "step": 170 }, { "epoch": 0.015295717199184228, - "grad_norm": 10.947978973388672, - "learning_rate": 1.503823279524214e-06, - "loss": 3.2263, + "grad_norm": 6.885627269744873, + "learning_rate": 1.5208156329651657e-06, + "loss": 2.7309, "step": 180 }, { "epoch": 0.016145479265805573, - "grad_norm": 10.752420425415039, - "learning_rate": 1.588785046728972e-06, - "loss": 2.796, + "grad_norm": 10.370819091796875, + "learning_rate": 1.6057774001699236e-06, + "loss": 2.4107, "step": 190 }, { "epoch": 0.01699524133242692, - "grad_norm": 12.628373146057129, - "learning_rate": 1.6737468139337298e-06, - "loss": 2.6446, + "grad_norm": 7.793025016784668, + "learning_rate": 1.6907391673746815e-06, + "loss": 2.3434, "step": 200 }, { "epoch": 0.017845003399048265, - "grad_norm": 11.598666191101074, - "learning_rate": 1.7587085811384877e-06, - "loss": 2.3583, + "grad_norm": 9.495673179626465, + "learning_rate": 1.7757009345794394e-06, + "loss": 1.9811, "step": 210 }, { "epoch": 0.018694765465669613, - "grad_norm": 12.28815746307373, - "learning_rate": 1.8436703483432457e-06, - "loss": 2.8925, + "grad_norm": 8.9649019241333, + "learning_rate": 1.8606627017841972e-06, + "loss": 2.6514, "step": 220 }, { "epoch": 0.019544527532290957, - "grad_norm": 12.328848838806152, - "learning_rate": 1.928632115548004e-06, - "loss": 2.8626, + "grad_norm": 8.40274715423584, + "learning_rate": 1.945624468988955e-06, + "loss": 2.6114, "step": 230 }, { "epoch": 0.020394289598912305, - "grad_norm": 12.9681396484375, - "learning_rate": 2.0135938827527617e-06, - "loss": 2.9225, + "grad_norm": 9.24577808380127, + "learning_rate": 2.0305862361937128e-06, + "loss": 2.5214, "step": 240 }, { "epoch": 0.02124405166553365, - "grad_norm": 11.879500389099121, - "learning_rate": 2.0985556499575196e-06, - "loss": 2.2267, + "grad_norm": 9.348999977111816, + "learning_rate": 2.115548003398471e-06, + "loss": 2.01, "step": 250 }, { "epoch": 0.022093813732154997, - "grad_norm": 11.527056694030762, - "learning_rate": 2.183517417162277e-06, - "loss": 1.9864, + "grad_norm": 7.33099889755249, + "learning_rate": 2.200509770603229e-06, + "loss": 1.7568, "step": 260 }, { "epoch": 0.02294357579877634, - "grad_norm": 9.24322509765625, - "learning_rate": 2.268479184367035e-06, - "loss": 2.6644, + "grad_norm": 8.247879981994629, + "learning_rate": 2.285471537807987e-06, + "loss": 2.356, "step": 270 }, { "epoch": 0.02379333786539769, - "grad_norm": 11.155841827392578, - "learning_rate": 2.3534409515717928e-06, - "loss": 2.6638, + "grad_norm": 7.958196640014648, + "learning_rate": 2.3704333050127447e-06, + "loss": 2.5519, "step": 280 }, { "epoch": 0.024643099932019033, - "grad_norm": 15.58091926574707, - "learning_rate": 2.4384027187765506e-06, - "loss": 2.3745, + "grad_norm": 14.198843002319336, + "learning_rate": 2.4553950722175025e-06, + "loss": 2.0232, "step": 290 }, { "epoch": 0.02549286199864038, - "grad_norm": 9.601865768432617, - "learning_rate": 2.5233644859813085e-06, - "loss": 1.8501, + "grad_norm": 6.649204730987549, + "learning_rate": 2.54035683942226e-06, + "loss": 1.6215, "step": 300 }, { "epoch": 0.026342624065261725, - "grad_norm": 10.356588363647461, - "learning_rate": 2.6083262531860664e-06, - "loss": 2.8233, + "grad_norm": 8.822147369384766, + "learning_rate": 2.6253186066270183e-06, + "loss": 2.6331, "step": 310 }, { "epoch": 0.027192386131883073, - "grad_norm": 10.197558403015137, - "learning_rate": 2.6932880203908247e-06, - "loss": 2.1929, + "grad_norm": 7.730117321014404, + "learning_rate": 2.7102803738317757e-06, + "loss": 2.0053, "step": 320 }, { "epoch": 0.028042148198504417, - "grad_norm": 8.68546199798584, - "learning_rate": 2.778249787595582e-06, - "loss": 2.4857, + "grad_norm": 8.546724319458008, + "learning_rate": 2.7952421410365336e-06, + "loss": 2.3054, "step": 330 }, { "epoch": 0.028891910265125765, - "grad_norm": 10.212944030761719, - "learning_rate": 2.8632115548003404e-06, - "loss": 2.1028, + "grad_norm": 7.879939556121826, + "learning_rate": 2.880203908241292e-06, + "loss": 1.9774, "step": 340 }, { "epoch": 0.02974167233174711, - "grad_norm": 9.90490436553955, - "learning_rate": 2.948173322005098e-06, - "loss": 1.9448, + "grad_norm": 9.588859558105469, + "learning_rate": 2.9651656754460494e-06, + "loss": 1.8434, "step": 350 }, { "epoch": 0.030591434398368457, - "grad_norm": 7.6113080978393555, - "learning_rate": 3.033135089209856e-06, - "loss": 1.5421, + "grad_norm": 5.803117752075195, + "learning_rate": 3.0501274426508076e-06, + "loss": 1.3065, "step": 360 }, { "epoch": 0.0314411964649898, - "grad_norm": 14.545954704284668, - "learning_rate": 3.1180968564146136e-06, - "loss": 2.7751, + "grad_norm": 13.746269226074219, + "learning_rate": 3.135089209855565e-06, + "loss": 2.5697, "step": 370 }, { "epoch": 0.032290958531611146, - "grad_norm": 12.547419548034668, - "learning_rate": 3.203058623619372e-06, - "loss": 2.5278, + "grad_norm": 11.798929214477539, + "learning_rate": 3.2200509770603234e-06, + "loss": 2.3131, "step": 380 }, { "epoch": 0.0331407205982325, - "grad_norm": 9.155962944030762, - "learning_rate": 3.2880203908241294e-06, - "loss": 2.2187, + "grad_norm": 8.668383598327637, + "learning_rate": 3.305012744265081e-06, + "loss": 2.0535, "step": 390 }, { "epoch": 0.03399048266485384, - "grad_norm": 9.267026901245117, - "learning_rate": 3.3729821580288872e-06, - "loss": 1.848, + "grad_norm": 7.256958484649658, + "learning_rate": 3.389974511469839e-06, + "loss": 1.5674, "step": 400 }, { "epoch": 0.034840244731475185, - "grad_norm": 9.409789085388184, - "learning_rate": 3.457943925233645e-06, - "loss": 2.5653, + "grad_norm": 8.976884841918945, + "learning_rate": 3.4749362786745966e-06, + "loss": 2.45, "step": 410 }, { "epoch": 0.03569000679809653, - "grad_norm": 8.781575202941895, - "learning_rate": 3.542905692438403e-06, - "loss": 2.1551, + "grad_norm": 6.406662464141846, + "learning_rate": 3.559898045879355e-06, + "loss": 1.9994, "step": 420 }, { "epoch": 0.03653976886471788, - "grad_norm": 9.391977310180664, - "learning_rate": 3.627867459643161e-06, - "loss": 2.7703, + "grad_norm": 8.969710350036621, + "learning_rate": 3.6448598130841123e-06, + "loss": 2.6629, "step": 430 }, { "epoch": 0.037389530931339225, - "grad_norm": 10.008152961730957, - "learning_rate": 3.7128292268479187e-06, - "loss": 2.2344, + "grad_norm": 8.452781677246094, + "learning_rate": 3.7298215802888706e-06, + "loss": 2.0677, "step": 440 }, { "epoch": 0.03823929299796057, - "grad_norm": 10.446879386901855, - "learning_rate": 3.7977909940526766e-06, - "loss": 1.8822, + "grad_norm": 9.167816162109375, + "learning_rate": 3.814783347493628e-06, + "loss": 1.7282, "step": 450 }, { "epoch": 0.039089055064581914, - "grad_norm": 10.419329643249512, - "learning_rate": 3.882752761257435e-06, - "loss": 2.2032, + "grad_norm": 10.11596965789795, + "learning_rate": 3.899745114698386e-06, + "loss": 2.1117, "step": 460 }, { "epoch": 0.039938817131203265, - "grad_norm": 11.649075508117676, - "learning_rate": 3.967714528462192e-06, - "loss": 2.5615, + "grad_norm": 11.748783111572266, + "learning_rate": 3.984706881903144e-06, + "loss": 2.374, "step": 470 }, { "epoch": 0.04078857919782461, - "grad_norm": 6.804622173309326, - "learning_rate": 4.052676295666951e-06, - "loss": 1.9196, + "grad_norm": 5.1859660148620605, + "learning_rate": 4.069668649107902e-06, + "loss": 1.7799, "step": 480 }, { "epoch": 0.041638341264445954, - "grad_norm": 10.680384635925293, - "learning_rate": 4.137638062871708e-06, - "loss": 1.7664, + "grad_norm": 9.730149269104004, + "learning_rate": 4.1546304163126596e-06, + "loss": 1.6734, "step": 490 }, { "epoch": 0.0424881033310673, - "grad_norm": 13.3015775680542, - "learning_rate": 4.2225998300764655e-06, - "loss": 1.6504, + "grad_norm": 11.203630447387695, + "learning_rate": 4.239592183517418e-06, + "loss": 1.4893, "step": 500 }, { "epoch": 0.04333786539768865, - "grad_norm": 12.803914070129395, - "learning_rate": 4.307561597281224e-06, - "loss": 2.1702, + "grad_norm": 11.456293106079102, + "learning_rate": 4.324553950722175e-06, + "loss": 2.031, "step": 510 }, { "epoch": 0.04418762746430999, - "grad_norm": 14.765999794006348, - "learning_rate": 4.392523364485981e-06, - "loss": 2.5528, + "grad_norm": 13.225220680236816, + "learning_rate": 4.409515717926934e-06, + "loss": 2.4175, "step": 520 }, { "epoch": 0.04503738953093134, - "grad_norm": 9.281490325927734, - "learning_rate": 4.4774851316907396e-06, - "loss": 2.3397, + "grad_norm": 8.564604759216309, + "learning_rate": 4.494477485131691e-06, + "loss": 2.2505, "step": 530 }, { "epoch": 0.04588715159755268, - "grad_norm": 10.90330696105957, - "learning_rate": 4.562446898895497e-06, - "loss": 2.4859, + "grad_norm": 9.196183204650879, + "learning_rate": 4.579439252336449e-06, + "loss": 2.3695, "step": 540 }, { "epoch": 0.04673691366417403, - "grad_norm": 8.136676788330078, - "learning_rate": 4.647408666100255e-06, - "loss": 2.3076, + "grad_norm": 6.964520454406738, + "learning_rate": 4.664401019541207e-06, + "loss": 2.1952, "step": 550 }, { "epoch": 0.04758667573079538, - "grad_norm": 13.599370002746582, - "learning_rate": 4.732370433305013e-06, - "loss": 2.7487, + "grad_norm": 12.784635543823242, + "learning_rate": 4.749362786745964e-06, + "loss": 2.582, "step": 560 }, { "epoch": 0.04843643779741672, - "grad_norm": 9.191825866699219, - "learning_rate": 4.817332200509771e-06, - "loss": 1.9055, + "grad_norm": 8.13243579864502, + "learning_rate": 4.8343245539507225e-06, + "loss": 1.7935, "step": 570 }, { "epoch": 0.049286199864038066, - "grad_norm": 11.754880905151367, - "learning_rate": 4.9022939677145285e-06, - "loss": 2.2271, + "grad_norm": 11.175552368164062, + "learning_rate": 4.91928632115548e-06, + "loss": 2.156, "step": 580 }, { "epoch": 0.05013596193065942, - "grad_norm": 11.308605194091797, - "learning_rate": 4.987255734919287e-06, - "loss": 1.5778, + "grad_norm": 10.315290451049805, + "learning_rate": 5.004248088360238e-06, + "loss": 1.5579, "step": 590 }, { "epoch": 0.05098572399728076, - "grad_norm": 13.667522430419922, - "learning_rate": 5.072217502124045e-06, - "loss": 2.7517, + "grad_norm": 11.324199676513672, + "learning_rate": 5.089209855564996e-06, + "loss": 2.572, "step": 600 }, { "epoch": 0.051835486063902106, - "grad_norm": 10.282818794250488, - "learning_rate": 5.1571792693288025e-06, - "loss": 1.941, + "grad_norm": 9.260397911071777, + "learning_rate": 5.174171622769753e-06, + "loss": 1.8751, "step": 610 }, { "epoch": 0.05268524813052345, - "grad_norm": 9.502120971679688, - "learning_rate": 5.24214103653356e-06, - "loss": 2.1769, + "grad_norm": 7.611422538757324, + "learning_rate": 5.259133389974512e-06, + "loss": 2.1146, "step": 620 }, { "epoch": 0.0535350101971448, - "grad_norm": 9.003978729248047, - "learning_rate": 5.3271028037383174e-06, - "loss": 1.7947, + "grad_norm": 8.637801170349121, + "learning_rate": 5.34409515717927e-06, + "loss": 1.739, "step": 630 }, { "epoch": 0.054384772263766146, - "grad_norm": 11.52032470703125, - "learning_rate": 5.4120645709430766e-06, - "loss": 1.8666, + "grad_norm": 11.575106620788574, + "learning_rate": 5.429056924384027e-06, + "loss": 1.7652, "step": 640 }, { "epoch": 0.05523453433038749, - "grad_norm": 12.302214622497559, - "learning_rate": 5.497026338147834e-06, - "loss": 2.4285, + "grad_norm": 11.192960739135742, + "learning_rate": 5.514018691588785e-06, + "loss": 2.3194, "step": 650 }, { "epoch": 0.056084296397008834, - "grad_norm": 10.840524673461914, - "learning_rate": 5.5819881053525915e-06, - "loss": 1.998, + "grad_norm": 9.807344436645508, + "learning_rate": 5.598980458793544e-06, + "loss": 1.8637, "step": 660 }, { "epoch": 0.056934058463630186, - "grad_norm": 10.77685832977295, - "learning_rate": 5.66694987255735e-06, - "loss": 2.0776, + "grad_norm": 10.799173355102539, + "learning_rate": 5.683942225998301e-06, + "loss": 1.9794, "step": 670 }, { "epoch": 0.05778382053025153, - "grad_norm": 11.400283813476562, - "learning_rate": 5.751911639762108e-06, - "loss": 1.7107, + "grad_norm": 10.048852920532227, + "learning_rate": 5.768903993203059e-06, + "loss": 1.6374, "step": 680 }, { "epoch": 0.058633582596872874, - "grad_norm": 10.180913925170898, - "learning_rate": 5.8368734069668655e-06, - "loss": 1.4931, + "grad_norm": 9.95223617553711, + "learning_rate": 5.853865760407818e-06, + "loss": 1.4355, "step": 690 }, { "epoch": 0.05948334466349422, - "grad_norm": 7.688689231872559, - "learning_rate": 5.921835174171623e-06, - "loss": 1.4785, + "grad_norm": 6.564249515533447, + "learning_rate": 5.938827527612575e-06, + "loss": 1.3763, "step": 700 }, { "epoch": 0.06033310673011557, - "grad_norm": 10.244742393493652, - "learning_rate": 6.006796941376381e-06, - "loss": 2.3844, + "grad_norm": 9.307559967041016, + "learning_rate": 6.023789294817333e-06, + "loss": 2.2797, "step": 710 }, { "epoch": 0.061182868796736914, - "grad_norm": 9.278727531433105, - "learning_rate": 6.0917587085811396e-06, - "loss": 1.7501, + "grad_norm": 7.567359447479248, + "learning_rate": 6.10875106202209e-06, + "loss": 1.6895, "step": 720 }, { "epoch": 0.06203263086335826, - "grad_norm": 10.120676040649414, - "learning_rate": 6.176720475785897e-06, - "loss": 1.7656, + "grad_norm": 8.765115737915039, + "learning_rate": 6.1937128292268485e-06, + "loss": 1.6998, "step": 730 }, { "epoch": 0.0628823929299796, - "grad_norm": 15.554547309875488, - "learning_rate": 6.2616822429906544e-06, - "loss": 2.1083, + "grad_norm": 14.329473495483398, + "learning_rate": 6.27017841971113e-06, + "loss": 2.0926, "step": 740 }, { "epoch": 0.06373215499660095, - "grad_norm": 8.595041275024414, - "learning_rate": 6.346644010195413e-06, - "loss": 2.3617, + "grad_norm": 8.000829696655273, + "learning_rate": 6.355140186915888e-06, + "loss": 2.2495, "step": 750 }, { "epoch": 0.06458191706322229, - "grad_norm": 8.327468872070312, - "learning_rate": 6.43160577740017e-06, - "loss": 1.9304, + "grad_norm": 7.170144081115723, + "learning_rate": 6.440101954120647e-06, + "loss": 1.8361, "step": 760 }, { "epoch": 0.06543167912984364, - "grad_norm": 8.711671829223633, - "learning_rate": 6.5165675446049285e-06, - "loss": 2.1605, + "grad_norm": 8.615540504455566, + "learning_rate": 6.525063721325404e-06, + "loss": 2.0814, "step": 770 }, { "epoch": 0.066281441196465, - "grad_norm": 12.639328956604004, - "learning_rate": 6.601529311809686e-06, - "loss": 2.0953, + "grad_norm": 10.925422668457031, + "learning_rate": 6.610025488530162e-06, + "loss": 1.9751, "step": 780 }, { "epoch": 0.06713120326308633, - "grad_norm": 10.030131340026855, - "learning_rate": 6.686491079014444e-06, - "loss": 1.6414, + "grad_norm": 9.352291107177734, + "learning_rate": 6.694987255734919e-06, + "loss": 1.5877, "step": 790 }, { "epoch": 0.06798096532970768, - "grad_norm": 11.07939338684082, - "learning_rate": 6.771452846219202e-06, - "loss": 2.9633, + "grad_norm": 10.513012886047363, + "learning_rate": 6.779949022939678e-06, + "loss": 2.9411, "step": 800 }, { "epoch": 0.06883072739632903, - "grad_norm": 15.321415901184082, - "learning_rate": 6.856414613423959e-06, - "loss": 2.4998, + "grad_norm": 13.80375862121582, + "learning_rate": 6.864910790144436e-06, + "loss": 2.466, "step": 810 }, { "epoch": 0.06968048946295037, - "grad_norm": 7.1824822425842285, - "learning_rate": 6.9413763806287174e-06, - "loss": 1.8811, + "grad_norm": 6.150080680847168, + "learning_rate": 6.949872557349193e-06, + "loss": 1.8303, "step": 820 }, { "epoch": 0.07053025152957172, - "grad_norm": 8.840105056762695, - "learning_rate": 7.026338147833476e-06, - "loss": 1.4012, + "grad_norm": 7.838348388671875, + "learning_rate": 7.034834324553951e-06, + "loss": 1.3468, "step": 830 }, { "epoch": 0.07138001359619306, - "grad_norm": 8.294805526733398, - "learning_rate": 7.111299915038233e-06, - "loss": 1.6192, + "grad_norm": 7.2478132247924805, + "learning_rate": 7.11979609175871e-06, + "loss": 1.5485, "step": 840 }, { "epoch": 0.07222977566281441, - "grad_norm": 11.937904357910156, - "learning_rate": 7.196261682242991e-06, - "loss": 2.1715, + "grad_norm": 10.934910774230957, + "learning_rate": 7.204757858963467e-06, + "loss": 2.0856, "step": 850 }, { "epoch": 0.07307953772943576, - "grad_norm": 9.503459930419922, - "learning_rate": 7.28122344944775e-06, - "loss": 1.965, + "grad_norm": 8.459626197814941, + "learning_rate": 7.289719626168225e-06, + "loss": 1.9067, "step": 860 }, { "epoch": 0.0739292997960571, - "grad_norm": 7.293017864227295, - "learning_rate": 7.366185216652507e-06, - "loss": 1.61, + "grad_norm": 7.03658390045166, + "learning_rate": 7.374681393372983e-06, + "loss": 1.5406, "step": 870 }, { "epoch": 0.07477906186267845, - "grad_norm": 8.3853178024292, - "learning_rate": 7.451146983857265e-06, - "loss": 2.1058, + "grad_norm": 7.933267116546631, + "learning_rate": 7.459643160577741e-06, + "loss": 2.0842, "step": 880 }, { "epoch": 0.0756288239292998, - "grad_norm": 10.05813980102539, - "learning_rate": 7.536108751062022e-06, - "loss": 1.346, + "grad_norm": 9.203567504882812, + "learning_rate": 7.544604927782499e-06, + "loss": 1.3399, "step": 890 }, { "epoch": 0.07647858599592114, - "grad_norm": 9.065289497375488, - "learning_rate": 7.621070518266781e-06, - "loss": 1.8801, + "grad_norm": 7.762595176696777, + "learning_rate": 7.629566694987256e-06, + "loss": 1.8138, "step": 900 }, { "epoch": 0.07732834806254249, - "grad_norm": 9.709288597106934, - "learning_rate": 7.706032285471538e-06, - "loss": 1.9211, + "grad_norm": 8.992586135864258, + "learning_rate": 7.714528462192015e-06, + "loss": 1.8355, "step": 910 }, { "epoch": 0.07817811012916383, - "grad_norm": 9.781811714172363, - "learning_rate": 7.790994052676297e-06, - "loss": 2.234, + "grad_norm": 9.716043472290039, + "learning_rate": 7.799490229396773e-06, + "loss": 2.2083, "step": 920 }, { "epoch": 0.07902787219578518, - "grad_norm": 8.916315078735352, - "learning_rate": 7.875955819881054e-06, - "loss": 1.9114, + "grad_norm": 8.2730131149292, + "learning_rate": 7.88445199660153e-06, + "loss": 1.849, "step": 930 }, { "epoch": 0.07987763426240653, - "grad_norm": 8.898033142089844, - "learning_rate": 7.960917587085812e-06, - "loss": 1.9662, + "grad_norm": 7.929242134094238, + "learning_rate": 7.969413763806288e-06, + "loss": 1.9105, "step": 940 }, { "epoch": 0.08072739632902787, - "grad_norm": 9.798989295959473, - "learning_rate": 8.04587935429057e-06, - "loss": 1.5801, + "grad_norm": 9.326386451721191, + "learning_rate": 8.054375531011047e-06, + "loss": 1.5099, "step": 950 }, { "epoch": 0.08157715839564922, - "grad_norm": 6.919313907623291, - "learning_rate": 8.130841121495327e-06, - "loss": 1.3301, + "grad_norm": 6.161294460296631, + "learning_rate": 8.139337298215804e-06, + "loss": 1.2589, "step": 960 }, { "epoch": 0.08242692046227057, - "grad_norm": 9.985525131225586, - "learning_rate": 8.215802888700086e-06, - "loss": 1.6247, + "grad_norm": 8.59307861328125, + "learning_rate": 8.224299065420562e-06, + "loss": 1.5917, "step": 970 }, { "epoch": 0.08327668252889191, - "grad_norm": 14.699216842651367, - "learning_rate": 8.300764655904843e-06, - "loss": 1.5851, + "grad_norm": 13.442170143127441, + "learning_rate": 8.309260832625319e-06, + "loss": 1.5236, "step": 980 }, { "epoch": 0.08412644459551326, - "grad_norm": 11.023968696594238, - "learning_rate": 8.3857264231096e-06, - "loss": 1.9348, + "grad_norm": 10.815309524536133, + "learning_rate": 8.394222599830078e-06, + "loss": 1.9194, "step": 990 }, { "epoch": 0.0849762066621346, - "grad_norm": 7.14918851852417, - "learning_rate": 8.470688190314358e-06, - "loss": 1.7108, + "grad_norm": 5.992090225219727, + "learning_rate": 8.479184367034836e-06, + "loss": 1.6147, "step": 1000 }, { "epoch": 0.0849762066621346, - "eval_cosine_accuracy@1": 0.666, - "eval_cosine_accuracy@10": 0.897, - "eval_cosine_accuracy@3": 0.8055, - "eval_cosine_accuracy@5": 0.854, - "eval_cosine_map@100": 0.7490156040699963, - "eval_cosine_mrr@10": 0.7451317460317455, - "eval_cosine_ndcg@10": 0.7820982250683307, - "eval_cosine_precision@1": 0.666, - "eval_cosine_precision@10": 0.08970000000000002, - "eval_cosine_precision@3": 0.2685, - "eval_cosine_precision@5": 0.1708, - "eval_cosine_recall@1": 0.666, - "eval_cosine_recall@10": 0.897, - "eval_cosine_recall@3": 0.8055, - "eval_cosine_recall@5": 0.854, - "eval_loss": 1.793184757232666, - "eval_runtime": 2.7362, - "eval_samples_per_second": 275.561, - "eval_sequential_score": 0.7490156040699963, - "eval_steps_per_second": 4.386, - "eval_sts-dev_pearson_cosine": 0.7976785829149171, - "eval_sts-dev_pearson_dot": 0.7944480613492959, - "eval_sts-dev_pearson_euclidean": 0.7808700096757055, - "eval_sts-dev_pearson_manhattan": 0.7791175418755985, - "eval_sts-dev_pearson_max": 0.7976785829149171, - "eval_sts-dev_spearman_cosine": 0.8062056784519672, - "eval_sts-dev_spearman_dot": 0.8023538658183383, - "eval_sts-dev_spearman_euclidean": 0.8001427543756516, - "eval_sts-dev_spearman_manhattan": 0.7978660817820529, - "eval_sts-dev_spearman_max": 0.8062056784519672, + "eval_cosine_accuracy@1": 0.671, + "eval_cosine_accuracy@10": 0.9055, + "eval_cosine_accuracy@3": 0.8185, + "eval_cosine_accuracy@5": 0.863, + "eval_cosine_map@100": 0.7579504816368724, + "eval_cosine_mrr@10": 0.7543708333333332, + "eval_cosine_ndcg@10": 0.7912626599366506, + "eval_cosine_precision@1": 0.671, + "eval_cosine_precision@10": 0.09055, + "eval_cosine_precision@3": 0.2728333333333333, + "eval_cosine_precision@5": 0.17260000000000003, + "eval_cosine_recall@1": 0.671, + "eval_cosine_recall@10": 0.9055, + "eval_cosine_recall@3": 0.8185, + "eval_cosine_recall@5": 0.863, + "eval_loss": 1.7405914068222046, + "eval_runtime": 2.6891, + "eval_samples_per_second": 280.388, + "eval_sequential_score": 0.7579504816368724, + "eval_steps_per_second": 4.462, + "eval_sts-dev_pearson_cosine": 0.8030491957053217, + "eval_sts-dev_pearson_dot": 0.8017216107034908, + "eval_sts-dev_pearson_euclidean": 0.7867940275529253, + "eval_sts-dev_pearson_manhattan": 0.784309174204914, + "eval_sts-dev_pearson_max": 0.8030491957053217, + "eval_sts-dev_spearman_cosine": 0.8109435875379872, + "eval_sts-dev_spearman_dot": 0.8093038367891731, + "eval_sts-dev_spearman_euclidean": 0.8050486935112244, + "eval_sts-dev_spearman_manhattan": 0.8026155261117491, + "eval_sts-dev_spearman_max": 0.8109435875379872, "step": 1000 }, { "epoch": 0.08582596872875595, - "grad_norm": 7.752645492553711, - "learning_rate": 8.555649957519117e-06, - "loss": 1.84, + "grad_norm": 7.56091833114624, + "learning_rate": 8.564146134239593e-06, + "loss": 1.8092, "step": 1010 }, { "epoch": 0.0866757307953773, - "grad_norm": 9.170955657958984, - "learning_rate": 8.640611724723875e-06, - "loss": 2.2945, + "grad_norm": 8.53748893737793, + "learning_rate": 8.64910790144435e-06, + "loss": 2.2912, "step": 1020 }, { "epoch": 0.08752549286199864, - "grad_norm": 12.583295822143555, - "learning_rate": 8.725573491928632e-06, - "loss": 1.9166, + "grad_norm": 12.247106552124023, + "learning_rate": 8.734069668649108e-06, + "loss": 1.8473, "step": 1030 }, { "epoch": 0.08837525492861999, - "grad_norm": 6.952343940734863, - "learning_rate": 8.81053525913339e-06, - "loss": 1.4608, + "grad_norm": 6.0466179847717285, + "learning_rate": 8.819031435853867e-06, + "loss": 1.3879, "step": 1040 }, { "epoch": 0.08922501699524134, - "grad_norm": 9.50481128692627, - "learning_rate": 8.895497026338149e-06, - "loss": 2.6738, + "grad_norm": 9.765048027038574, + "learning_rate": 8.903993203058625e-06, + "loss": 2.5645, "step": 1050 }, { "epoch": 0.09007477906186268, - "grad_norm": 6.4373602867126465, - "learning_rate": 8.980458793542906e-06, - "loss": 1.9956, + "grad_norm": 5.525635719299316, + "learning_rate": 8.988954970263382e-06, + "loss": 1.9847, "step": 1060 }, { "epoch": 0.09092454112848403, - "grad_norm": 11.445114135742188, - "learning_rate": 9.065420560747664e-06, - "loss": 1.8495, + "grad_norm": 12.070135116577148, + "learning_rate": 9.07391673746814e-06, + "loss": 1.7767, "step": 1070 }, { "epoch": 0.09177430319510536, - "grad_norm": 9.475210189819336, - "learning_rate": 9.150382327952421e-06, - "loss": 1.8593, + "grad_norm": 9.723821640014648, + "learning_rate": 9.158878504672899e-06, + "loss": 1.8132, "step": 1080 }, { "epoch": 0.09262406526172672, - "grad_norm": 9.296758651733398, - "learning_rate": 9.23534409515718e-06, - "loss": 2.3967, + "grad_norm": 9.767189025878906, + "learning_rate": 9.243840271877656e-06, + "loss": 2.356, "step": 1090 }, { "epoch": 0.09347382732834807, - "grad_norm": 6.36351203918457, - "learning_rate": 9.320305862361938e-06, - "loss": 1.9406, + "grad_norm": 5.764347076416016, + "learning_rate": 9.328802039082414e-06, + "loss": 1.8806, "step": 1100 }, { "epoch": 0.0943235893949694, - "grad_norm": 10.402345657348633, - "learning_rate": 9.405267629566695e-06, - "loss": 1.7431, + "grad_norm": 9.958595275878906, + "learning_rate": 9.413763806287171e-06, + "loss": 1.7226, "step": 1110 }, { "epoch": 0.09517335146159075, - "grad_norm": 9.209178924560547, - "learning_rate": 9.490229396771453e-06, - "loss": 1.6852, + "grad_norm": 8.53773021697998, + "learning_rate": 9.498725573491928e-06, + "loss": 1.6482, "step": 1120 }, { "epoch": 0.0960231135282121, - "grad_norm": 11.181512832641602, - "learning_rate": 9.575191163976212e-06, - "loss": 2.5254, + "grad_norm": 11.895674705505371, + "learning_rate": 9.583687340696688e-06, + "loss": 2.5, "step": 1130 }, { "epoch": 0.09687287559483344, - "grad_norm": 9.188380241394043, - "learning_rate": 9.66015293118097e-06, - "loss": 1.5981, + "grad_norm": 8.990635871887207, + "learning_rate": 9.668649107901445e-06, + "loss": 1.5931, "step": 1140 }, { "epoch": 0.0977226376614548, - "grad_norm": 9.776697158813477, - "learning_rate": 9.745114698385727e-06, - "loss": 1.4272, + "grad_norm": 8.807872772216797, + "learning_rate": 9.753610875106203e-06, + "loss": 1.3899, "step": 1150 }, { "epoch": 0.09857239972807613, - "grad_norm": 9.87637710571289, - "learning_rate": 9.82158028887001e-06, - "loss": 1.5939, + "grad_norm": 9.076343536376953, + "learning_rate": 9.83857264231096e-06, + "loss": 1.5451, "step": 1160 }, { "epoch": 0.09942216179469748, - "grad_norm": 9.491826057434082, - "learning_rate": 9.906542056074768e-06, - "loss": 1.6437, + "grad_norm": 8.96530818939209, + "learning_rate": 9.923534409515717e-06, + "loss": 1.59, "step": 1170 }, { "epoch": 0.10027192386131883, - "grad_norm": 8.475481033325195, - "learning_rate": 9.991503823279525e-06, - "loss": 1.9103, + "grad_norm": 7.113520622253418, + "learning_rate": 9.999999780028805e-06, + "loss": 1.8115, "step": 1180 }, { "epoch": 0.10112168592794017, - "grad_norm": 9.808141708374023, - "learning_rate": 9.99998218234363e-06, - "loss": 2.0834, + "grad_norm": 9.126861572265625, + "learning_rate": 9.999973383508784e-06, + "loss": 2.062, "step": 1190 }, { "epoch": 0.10197144799456152, - "grad_norm": 10.641770362854004, - "learning_rate": 9.999920590608097e-06, - "loss": 1.9915, + "grad_norm": 10.153830528259277, + "learning_rate": 9.999902993015826e-06, + "loss": 1.9508, "step": 1200 }, { "epoch": 0.10282121006118287, - "grad_norm": 9.805014610290527, - "learning_rate": 9.999815005364144e-06, - "loss": 2.486, + "grad_norm": 9.75989818572998, + "learning_rate": 9.999788609169283e-06, + "loss": 2.4069, "step": 1210 }, { "epoch": 0.10367097212780421, - "grad_norm": 8.563312530517578, - "learning_rate": 9.999665427540796e-06, - "loss": 2.0627, + "grad_norm": 8.199569702148438, + "learning_rate": 9.999630232975603e-06, + "loss": 2.0273, "step": 1220 }, { "epoch": 0.10452073419442556, - "grad_norm": 7.607810020446777, - "learning_rate": 9.999471858454165e-06, - "loss": 1.6629, + "grad_norm": 7.153557300567627, + "learning_rate": 9.999427865828314e-06, + "loss": 1.6278, "step": 1230 }, { "epoch": 0.1053704962610469, - "grad_norm": 11.007046699523926, - "learning_rate": 9.99923429980744e-06, - "loss": 2.5541, + "grad_norm": 10.450045585632324, + "learning_rate": 9.99918150950801e-06, + "loss": 2.5481, "step": 1240 }, { "epoch": 0.10622025832766825, - "grad_norm": 8.265456199645996, - "learning_rate": 9.998952753690852e-06, - "loss": 1.9547, + "grad_norm": 7.822266578674316, + "learning_rate": 9.998891166182342e-06, + "loss": 1.9195, "step": 1250 }, { "epoch": 0.1070700203942896, - "grad_norm": 9.270110130310059, - "learning_rate": 9.99862722258169e-06, - "loss": 1.4342, + "grad_norm": 8.367025375366211, + "learning_rate": 9.998556838405995e-06, + "loss": 1.3667, "step": 1260 }, { "epoch": 0.10791978246091094, - "grad_norm": 8.084294319152832, - "learning_rate": 9.998257709344246e-06, - "loss": 2.5096, + "grad_norm": 7.437953472137451, + "learning_rate": 9.998178529120666e-06, + "loss": 2.4832, "step": 1270 }, { "epoch": 0.10876954452753229, - "grad_norm": 10.906116485595703, - "learning_rate": 9.997844217229809e-06, - "loss": 2.0713, + "grad_norm": 10.818694114685059, + "learning_rate": 9.997756241655036e-06, + "loss": 2.0343, "step": 1280 }, { "epoch": 0.10961930659415364, - "grad_norm": 8.502143859863281, - "learning_rate": 9.99738674987663e-06, - "loss": 2.0861, + "grad_norm": 8.044429779052734, + "learning_rate": 9.997289979724751e-06, + "loss": 2.0113, "step": 1290 }, { "epoch": 0.11046906866077498, - "grad_norm": 6.842615127563477, - "learning_rate": 9.996885311309892e-06, - "loss": 1.6125, + "grad_norm": 6.1590895652771, + "learning_rate": 9.99677974743237e-06, + "loss": 1.5492, "step": 1300 }, { "epoch": 0.11131883072739633, - "grad_norm": 8.510361671447754, - "learning_rate": 9.996339905941674e-06, - "loss": 1.6563, + "grad_norm": 8.634650230407715, + "learning_rate": 9.99622554926735e-06, + "loss": 1.6053, "step": 1310 }, { "epoch": 0.11216859279401767, - "grad_norm": 17.573272705078125, - "learning_rate": 9.995750538570912e-06, - "loss": 1.759, + "grad_norm": 9.916925430297852, + "learning_rate": 9.995627390105993e-06, + "loss": 1.7595, "step": 1320 }, { "epoch": 0.11301835486063902, - "grad_norm": 10.040366172790527, - "learning_rate": 9.995117214383356e-06, - "loss": 1.3864, + "grad_norm": 9.837615966796875, + "learning_rate": 9.994985275211403e-06, + "loss": 1.356, "step": 1330 }, { "epoch": 0.11386811692726037, - "grad_norm": 9.944698333740234, - "learning_rate": 9.994439938951523e-06, - "loss": 1.5737, + "grad_norm": 9.660857200622559, + "learning_rate": 9.994299210233451e-06, + "loss": 1.5716, "step": 1340 }, { "epoch": 0.11471787899388171, - "grad_norm": 11.481671333312988, - "learning_rate": 9.993718718234654e-06, - "loss": 2.2294, + "grad_norm": 11.417171478271484, + "learning_rate": 9.99356920120871e-06, + "loss": 2.1764, "step": 1350 }, { "epoch": 0.11556764106050306, - "grad_norm": 9.144421577453613, - "learning_rate": 9.992953558578656e-06, - "loss": 1.9181, + "grad_norm": 8.765302658081055, + "learning_rate": 9.992795254560418e-06, + "loss": 1.9217, "step": 1360 }, { "epoch": 0.11641740312712441, - "grad_norm": 8.614849090576172, - "learning_rate": 9.992144466716046e-06, - "loss": 2.2238, + "grad_norm": 8.307648658752441, + "learning_rate": 9.991977377098405e-06, + "loss": 2.1936, "step": 1370 }, { "epoch": 0.11726716519374575, - "grad_norm": 5.382638931274414, - "learning_rate": 9.991291449765895e-06, - "loss": 1.4119, + "grad_norm": 5.614004135131836, + "learning_rate": 9.991115576019048e-06, + "loss": 1.3914, "step": 1380 }, { "epoch": 0.1181169272603671, - "grad_norm": 10.466590881347656, - "learning_rate": 9.990394515233764e-06, - "loss": 2.1013, + "grad_norm": 10.269454002380371, + "learning_rate": 9.990209858905196e-06, + "loss": 1.9944, "step": 1390 }, { "epoch": 0.11896668932698844, - "grad_norm": 14.626543998718262, - "learning_rate": 9.98945367101164e-06, - "loss": 2.1451, + "grad_norm": 14.825604438781738, + "learning_rate": 9.98926023372611e-06, + "loss": 2.1162, "step": 1400 }, { "epoch": 0.11981645139360979, - "grad_norm": 8.006312370300293, - "learning_rate": 9.988468925377859e-06, - "loss": 1.7866, + "grad_norm": 7.789546966552734, + "learning_rate": 9.988266708837393e-06, + "loss": 1.7333, "step": 1410 }, { "epoch": 0.12066621346023114, - "grad_norm": 12.995379447937012, - "learning_rate": 9.987440286997043e-06, - "loss": 2.2415, + "grad_norm": 12.159082412719727, + "learning_rate": 9.987229292980913e-06, + "loss": 2.1856, "step": 1420 }, { "epoch": 0.12151597552685248, - "grad_norm": 11.312751770019531, - "learning_rate": 9.98636776492002e-06, - "loss": 2.118, + "grad_norm": 11.174983024597168, + "learning_rate": 9.986147995284726e-06, + "loss": 2.1026, "step": 1430 }, { "epoch": 0.12236573759347383, - "grad_norm": 7.426715850830078, - "learning_rate": 9.985251368583738e-06, - "loss": 1.2572, + "grad_norm": 7.547834873199463, + "learning_rate": 9.985022825263001e-06, + "loss": 1.2478, "step": 1440 }, { "epoch": 0.12321549966009518, - "grad_norm": 8.718595504760742, - "learning_rate": 9.984091107811194e-06, - "loss": 2.2137, + "grad_norm": 9.001571655273438, + "learning_rate": 9.98385379281593e-06, + "loss": 2.1637, "step": 1450 }, { "epoch": 0.12406526172671652, - "grad_norm": 11.221927642822266, - "learning_rate": 9.982886992811336e-06, - "loss": 1.8935, + "grad_norm": 9.454615592956543, + "learning_rate": 9.982640908229642e-06, + "loss": 1.8734, "step": 1460 }, { "epoch": 0.12491502379333787, - "grad_norm": 8.0070219039917, - "learning_rate": 9.981639034178985e-06, - "loss": 1.9213, + "grad_norm": 7.656332015991211, + "learning_rate": 9.981384182176116e-06, + "loss": 1.8867, "step": 1470 }, { "epoch": 0.1257647858599592, - "grad_norm": 8.883262634277344, - "learning_rate": 9.980347242894726e-06, - "loss": 2.2463, + "grad_norm": 8.913731575012207, + "learning_rate": 9.98008362571309e-06, + "loss": 2.2377, "step": 1480 }, { "epoch": 0.12661454792658056, - "grad_norm": 6.109762668609619, - "learning_rate": 9.97901163032483e-06, - "loss": 1.634, + "grad_norm": 6.2996721267700195, + "learning_rate": 9.978739250283951e-06, + "loss": 1.6174, "step": 1490 }, { "epoch": 0.1274643099932019, - "grad_norm": 9.314282417297363, - "learning_rate": 9.977632208221139e-06, - "loss": 1.3852, + "grad_norm": 8.871682167053223, + "learning_rate": 9.977351067717644e-06, + "loss": 1.356, "step": 1500 }, { "epoch": 0.12831407205982326, - "grad_norm": 9.365290641784668, - "learning_rate": 9.976208988720968e-06, - "loss": 2.0875, + "grad_norm": 9.214465141296387, + "learning_rate": 9.975919090228571e-06, + "loss": 2.0684, "step": 1510 }, { "epoch": 0.12916383412644458, - "grad_norm": 7.084475994110107, - "learning_rate": 9.974741984347e-06, - "loss": 1.5067, + "grad_norm": 7.033022403717041, + "learning_rate": 9.97444333041647e-06, + "loss": 1.4745, "step": 1520 }, { "epoch": 0.13001359619306593, - "grad_norm": 9.04839038848877, - "learning_rate": 9.973231208007175e-06, - "loss": 2.1362, + "grad_norm": 9.510734558105469, + "learning_rate": 9.97292380126632e-06, + "loss": 2.0965, "step": 1530 }, { "epoch": 0.13086335825968728, - "grad_norm": 9.598000526428223, - "learning_rate": 9.971676672994574e-06, - "loss": 1.8342, + "grad_norm": 9.086190223693848, + "learning_rate": 9.971360516148218e-06, + "loss": 1.8437, "step": 1540 }, { "epoch": 0.13171312032630864, - "grad_norm": 10.618086814880371, - "learning_rate": 9.970078392987303e-06, - "loss": 1.4859, + "grad_norm": 10.115914344787598, + "learning_rate": 9.969753488817263e-06, + "loss": 1.4531, "step": 1550 }, { "epoch": 0.13256288239293, - "grad_norm": 16.32321548461914, - "learning_rate": 9.968436382048378e-06, - "loss": 2.4904, + "grad_norm": 14.614215850830078, + "learning_rate": 9.968102733413429e-06, + "loss": 2.4221, "step": 1560 }, { "epoch": 0.13341264445955134, - "grad_norm": 10.043795585632324, - "learning_rate": 9.966750654625587e-06, - "loss": 1.5241, + "grad_norm": 8.7617769241333, + "learning_rate": 9.966408264461454e-06, + "loss": 1.5201, "step": 1570 }, { "epoch": 0.13426240652617266, - "grad_norm": 8.244874000549316, - "learning_rate": 9.965021225551383e-06, - "loss": 1.6228, + "grad_norm": 7.793543338775635, + "learning_rate": 9.9646700968707e-06, + "loss": 1.5904, "step": 1580 }, { "epoch": 0.135112168592794, - "grad_norm": 11.411089897155762, - "learning_rate": 9.963248110042739e-06, - "loss": 1.5443, + "grad_norm": 10.72291088104248, + "learning_rate": 9.962888245935032e-06, + "loss": 1.5357, "step": 1590 }, { "epoch": 0.13596193065941536, - "grad_norm": 8.641087532043457, - "learning_rate": 9.96143132370101e-06, - "loss": 2.3666, + "grad_norm": 8.450425148010254, + "learning_rate": 9.961062727332668e-06, + "loss": 2.2998, "step": 1600 }, { "epoch": 0.13681169272603672, - "grad_norm": 8.1345796585083, - "learning_rate": 9.959570882511818e-06, - "loss": 1.3122, + "grad_norm": 7.952602386474609, + "learning_rate": 9.959193557126062e-06, + "loss": 1.2875, "step": 1610 }, { "epoch": 0.13766145479265807, - "grad_norm": 8.277626991271973, - "learning_rate": 9.957666802844888e-06, - "loss": 1.1251, + "grad_norm": 7.678238391876221, + "learning_rate": 9.957280751761743e-06, + "loss": 1.089, "step": 1620 }, { "epoch": 0.1385112168592794, - "grad_norm": 9.099385261535645, - "learning_rate": 9.955719101453916e-06, - "loss": 2.1177, + "grad_norm": 8.827707290649414, + "learning_rate": 9.955324328070184e-06, + "loss": 2.0749, "step": 1630 }, { "epoch": 0.13936097892590074, - "grad_norm": 8.906343460083008, - "learning_rate": 9.953727795476415e-06, - "loss": 2.2723, + "grad_norm": 8.926315307617188, + "learning_rate": 9.953324303265645e-06, + "loss": 2.2554, "step": 1640 }, { "epoch": 0.1402107409925221, - "grad_norm": 8.920252799987793, - "learning_rate": 9.951692902433572e-06, - "loss": 2.0156, + "grad_norm": 8.376702308654785, + "learning_rate": 9.951280694946028e-06, + "loss": 1.969, "step": 1650 }, { "epoch": 0.14106050305914344, - "grad_norm": 9.611597061157227, - "learning_rate": 9.94961444023009e-06, - "loss": 2.6568, + "grad_norm": 9.565410614013672, + "learning_rate": 9.94919352109272e-06, + "loss": 2.6012, "step": 1660 }, { "epoch": 0.1419102651257648, - "grad_norm": 12.192018508911133, - "learning_rate": 9.947492427154023e-06, - "loss": 2.6251, + "grad_norm": 11.733724594116211, + "learning_rate": 9.947062800070428e-06, + "loss": 2.4911, "step": 1670 }, { "epoch": 0.14276002719238612, - "grad_norm": 10.49497127532959, - "learning_rate": 9.945326881876634e-06, - "loss": 2.535, + "grad_norm": 10.455894470214844, + "learning_rate": 9.944888550627034e-06, + "loss": 2.5227, "step": 1680 }, { "epoch": 0.14360978925900747, - "grad_norm": 6.832841873168945, - "learning_rate": 9.943117823452209e-06, - "loss": 1.5212, + "grad_norm": 6.765949249267578, + "learning_rate": 9.942670791893412e-06, + "loss": 1.4801, "step": 1690 }, { "epoch": 0.14445955132562882, - "grad_norm": 9.554526329040527, - "learning_rate": 9.940865271317902e-06, - "loss": 1.8659, + "grad_norm": 9.16794490814209, + "learning_rate": 9.940409543383269e-06, + "loss": 1.8368, "step": 1700 }, { "epoch": 0.14530931339225017, - "grad_norm": 9.813288688659668, - "learning_rate": 9.938569245293566e-06, - "loss": 1.3508, + "grad_norm": 9.051717758178711, + "learning_rate": 9.938104824992972e-06, + "loss": 1.3036, "step": 1710 }, { "epoch": 0.14615907545887152, - "grad_norm": 8.244912147521973, - "learning_rate": 9.936229765581564e-06, - "loss": 1.0096, + "grad_norm": 8.060193061828613, + "learning_rate": 9.935756657001373e-06, + "loss": 1.0037, "step": 1720 }, { "epoch": 0.14700883752549287, - "grad_norm": 8.698970794677734, - "learning_rate": 9.933846852766614e-06, - "loss": 1.8963, + "grad_norm": 8.77188777923584, + "learning_rate": 9.93336506006963e-06, + "loss": 1.9339, "step": 1730 }, { "epoch": 0.1478585995921142, - "grad_norm": 7.188722133636475, - "learning_rate": 9.931420527815582e-06, - "loss": 1.3431, + "grad_norm": 6.56894063949585, + "learning_rate": 9.930930055241024e-06, + "loss": 1.3418, "step": 1740 }, { "epoch": 0.14870836165873555, - "grad_norm": 9.609166145324707, - "learning_rate": 9.92895081207732e-06, - "loss": 1.6355, + "grad_norm": 10.02035140991211, + "learning_rate": 9.928451663940778e-06, + "loss": 1.6051, "step": 1750 }, { "epoch": 0.1495581237253569, - "grad_norm": 9.131246566772461, - "learning_rate": 9.926437727282463e-06, - "loss": 1.5312, + "grad_norm": 7.834774971008301, + "learning_rate": 9.925929907975864e-06, + "loss": 1.519, "step": 1760 }, { "epoch": 0.15040788579197825, - "grad_norm": 8.260835647583008, - "learning_rate": 9.923881295543248e-06, - "loss": 1.7622, + "grad_norm": 8.129759788513184, + "learning_rate": 9.92336480953481e-06, + "loss": 1.7575, "step": 1770 }, { "epoch": 0.1512576478585996, - "grad_norm": 9.285859107971191, - "learning_rate": 9.921281539353309e-06, - "loss": 2.4495, + "grad_norm": 9.091249465942383, + "learning_rate": 9.920756391187514e-06, + "loss": 2.4666, "step": 1780 }, { "epoch": 0.15210740992522093, - "grad_norm": 7.803510665893555, - "learning_rate": 9.918638481587491e-06, - "loss": 1.6367, + "grad_norm": 8.187232971191406, + "learning_rate": 9.918104675885034e-06, + "loss": 1.6071, "step": 1790 }, { "epoch": 0.15295717199184228, - "grad_norm": 4.60161018371582, - "learning_rate": 9.91595214550164e-06, - "loss": 1.588, + "grad_norm": 4.071987628936768, + "learning_rate": 9.915409686959392e-06, + "loss": 1.5381, "step": 1800 }, { "epoch": 0.15380693405846363, - "grad_norm": 9.657403945922852, - "learning_rate": 9.913222554732403e-06, - "loss": 2.0486, + "grad_norm": 9.045771598815918, + "learning_rate": 9.912671448123368e-06, + "loss": 2.0542, "step": 1810 }, { "epoch": 0.15465669612508498, - "grad_norm": 11.236567497253418, - "learning_rate": 9.910449733297012e-06, - "loss": 1.5316, + "grad_norm": 11.008074760437012, + "learning_rate": 9.909889983470293e-06, + "loss": 1.489, "step": 1820 }, { "epoch": 0.15550645819170633, - "grad_norm": 9.062292098999023, - "learning_rate": 9.907633705593085e-06, - "loss": 1.6174, + "grad_norm": 9.349640846252441, + "learning_rate": 9.907065317473832e-06, + "loss": 1.6377, "step": 1830 }, { "epoch": 0.15635622025832766, - "grad_norm": 9.804792404174805, - "learning_rate": 9.904774496398405e-06, - "loss": 1.8715, + "grad_norm": 9.698678970336914, + "learning_rate": 9.904197474987773e-06, + "loss": 1.8472, "step": 1840 }, { "epoch": 0.157205982324949, - "grad_norm": 8.25210952758789, - "learning_rate": 9.901872130870698e-06, - "loss": 1.209, + "grad_norm": 7.523035049438477, + "learning_rate": 9.901286481245809e-06, + "loss": 1.1818, "step": 1850 }, { "epoch": 0.15805574439157036, - "grad_norm": 7.5723958015441895, - "learning_rate": 9.89892663454742e-06, - "loss": 1.3456, + "grad_norm": 7.2859039306640625, + "learning_rate": 9.898332361861312e-06, + "loss": 1.3088, "step": 1860 }, { "epoch": 0.1589055064581917, - "grad_norm": 8.326875686645508, - "learning_rate": 9.895938033345524e-06, - "loss": 1.8368, + "grad_norm": 8.572216033935547, + "learning_rate": 9.895335142827108e-06, + "loss": 1.7981, "step": 1870 }, { "epoch": 0.15975526852481306, - "grad_norm": 6.730352401733398, - "learning_rate": 9.89290635356124e-06, - "loss": 1.627, + "grad_norm": 6.056286334991455, + "learning_rate": 9.892294850515251e-06, + "loss": 1.6091, "step": 1880 }, { "epoch": 0.1606050305914344, - "grad_norm": 12.25359058380127, - "learning_rate": 9.889831621869836e-06, - "loss": 1.9879, + "grad_norm": 11.740818977355957, + "learning_rate": 9.889211511676795e-06, + "loss": 1.9716, "step": 1890 }, { "epoch": 0.16145479265805573, - "grad_norm": 7.9691009521484375, - "learning_rate": 9.88671386532539e-06, - "loss": 1.9625, + "grad_norm": 8.194591522216797, + "learning_rate": 9.886085153441549e-06, + "loss": 1.9483, "step": 1900 }, { "epoch": 0.16230455472467709, - "grad_norm": 9.276070594787598, - "learning_rate": 9.883553111360546e-06, - "loss": 2.0395, + "grad_norm": 8.784558296203613, + "learning_rate": 9.88291580331784e-06, + "loss": 2.0124, "step": 1910 }, { "epoch": 0.16315431679129844, - "grad_norm": 4.820672035217285, - "learning_rate": 9.88034938778628e-06, - "loss": 1.7252, + "grad_norm": 4.655549049377441, + "learning_rate": 9.879703489192279e-06, + "loss": 1.6491, "step": 1920 }, { "epoch": 0.1640040788579198, - "grad_norm": 9.923358917236328, - "learning_rate": 9.877102722791641e-06, - "loss": 1.8004, + "grad_norm": 10.1798677444458, + "learning_rate": 9.87644823932951e-06, + "loss": 1.7327, "step": 1930 }, { "epoch": 0.16485384092454114, - "grad_norm": 7.421237945556641, - "learning_rate": 9.873813144943527e-06, - "loss": 2.2082, + "grad_norm": 7.294815540313721, + "learning_rate": 9.873150082371959e-06, + "loss": 2.1865, "step": 1940 }, { "epoch": 0.16570360299116246, - "grad_norm": 8.488396644592285, - "learning_rate": 9.870480683186407e-06, - "loss": 2.2102, + "grad_norm": 8.242097854614258, + "learning_rate": 9.869809047339585e-06, + "loss": 2.169, "step": 1950 }, { "epoch": 0.16655336505778381, - "grad_norm": 6.614634037017822, - "learning_rate": 9.867105366842086e-06, - "loss": 1.1757, + "grad_norm": 6.371087551116943, + "learning_rate": 9.866425163629628e-06, + "loss": 1.1178, "step": 1960 }, { "epoch": 0.16740312712440517, - "grad_norm": 7.678738594055176, - "learning_rate": 9.86368722560944e-06, - "loss": 1.9025, + "grad_norm": 7.52371072769165, + "learning_rate": 9.86299846101634e-06, + "loss": 1.8374, "step": 1970 }, { "epoch": 0.16825288919102652, - "grad_norm": 5.782405376434326, - "learning_rate": 9.860226289564143e-06, - "loss": 1.5126, + "grad_norm": 5.663933753967285, + "learning_rate": 9.859528969650739e-06, + "loss": 1.493, "step": 1980 }, { "epoch": 0.16910265125764787, - "grad_norm": 8.204840660095215, - "learning_rate": 9.85672258915843e-06, - "loss": 1.4776, + "grad_norm": 8.575224876403809, + "learning_rate": 9.856016720060326e-06, + "loss": 1.4554, "step": 1990 }, { "epoch": 0.1699524133242692, - "grad_norm": 6.656698703765869, - "learning_rate": 9.853176155220802e-06, - "loss": 1.5689, + "grad_norm": 6.303723335266113, + "learning_rate": 9.85246174314883e-06, + "loss": 1.5359, "step": 2000 }, { "epoch": 0.1699524133242692, - "eval_cosine_accuracy@1": 0.677, - "eval_cosine_accuracy@10": 0.903, - "eval_cosine_accuracy@3": 0.8225, - "eval_cosine_accuracy@5": 0.8575, - "eval_cosine_map@100": 0.760008008710078, - "eval_cosine_mrr@10": 0.7564845238095231, - "eval_cosine_ndcg@10": 0.7921748093657405, - "eval_cosine_precision@1": 0.677, - "eval_cosine_precision@10": 0.0903, - "eval_cosine_precision@3": 0.2741666666666666, - "eval_cosine_precision@5": 0.1715, - "eval_cosine_recall@1": 0.677, - "eval_cosine_recall@10": 0.903, - "eval_cosine_recall@3": 0.8225, - "eval_cosine_recall@5": 0.8575, - "eval_loss": 1.6420265436172485, - "eval_runtime": 2.7303, - "eval_samples_per_second": 276.16, - "eval_sequential_score": 0.760008008710078, - "eval_steps_per_second": 4.395, - "eval_sts-dev_pearson_cosine": 0.8032520801117715, - "eval_sts-dev_pearson_dot": 0.8009286845017748, - "eval_sts-dev_pearson_euclidean": 0.7893842175159516, - "eval_sts-dev_pearson_manhattan": 0.7898910745915261, - "eval_sts-dev_pearson_max": 0.8032520801117715, - "eval_sts-dev_spearman_cosine": 0.809990391902194, - "eval_sts-dev_spearman_dot": 0.8019251400848677, - "eval_sts-dev_spearman_euclidean": 0.8099354072406699, - "eval_sts-dev_spearman_manhattan": 0.8084811975211984, - "eval_sts-dev_spearman_max": 0.809990391902194, + "eval_cosine_accuracy@1": 0.683, + "eval_cosine_accuracy@10": 0.9065, + "eval_cosine_accuracy@3": 0.8275, + "eval_cosine_accuracy@5": 0.867, + "eval_cosine_map@100": 0.766330389731565, + "eval_cosine_mrr@10": 0.7628323412698406, + "eval_cosine_ndcg@10": 0.7979463324190808, + "eval_cosine_precision@1": 0.683, + "eval_cosine_precision@10": 0.09065000000000001, + "eval_cosine_precision@3": 0.2758333333333333, + "eval_cosine_precision@5": 0.17340000000000003, + "eval_cosine_recall@1": 0.683, + "eval_cosine_recall@10": 0.9065, + "eval_cosine_recall@3": 0.8275, + "eval_cosine_recall@5": 0.867, + "eval_loss": 1.6272203922271729, + "eval_runtime": 2.612, + "eval_samples_per_second": 288.665, + "eval_sequential_score": 0.766330389731565, + "eval_steps_per_second": 4.594, + "eval_sts-dev_pearson_cosine": 0.8025338857443952, + "eval_sts-dev_pearson_dot": 0.8022952619615792, + "eval_sts-dev_pearson_euclidean": 0.78810662641818, + "eval_sts-dev_pearson_manhattan": 0.7879788229453927, + "eval_sts-dev_pearson_max": 0.8025338857443952, + "eval_sts-dev_spearman_cosine": 0.8067609081809247, + "eval_sts-dev_spearman_dot": 0.8053745558292306, + "eval_sts-dev_spearman_euclidean": 0.805688515027142, + "eval_sts-dev_spearman_manhattan": 0.8046476515394081, + "eval_sts-dev_spearman_max": 0.8067609081809247, "step": 2000 }, { "epoch": 0.17080217539089054, - "grad_norm": 8.541144371032715, - "learning_rate": 9.849587018955771e-06, - "loss": 1.6085, + "grad_norm": 8.932975769042969, + "learning_rate": 9.848864070195927e-06, + "loss": 1.5926, "step": 2010 }, { "epoch": 0.1716519374575119, - "grad_norm": 7.509024620056152, - "learning_rate": 9.845955211943577e-06, - "loss": 1.5845, + "grad_norm": 7.0394439697265625, + "learning_rate": 9.845223732856975e-06, + "loss": 1.5631, "step": 2020 }, { "epoch": 0.17250169952413325, - "grad_norm": 8.98787784576416, - "learning_rate": 9.842280766139913e-06, - "loss": 2.0365, + "grad_norm": 9.07466983795166, + "learning_rate": 9.84154076316272e-06, + "loss": 2.054, "step": 2030 }, { "epoch": 0.1733514615907546, - "grad_norm": 8.542064666748047, - "learning_rate": 9.838563713875646e-06, - "loss": 1.7228, + "grad_norm": 8.720723152160645, + "learning_rate": 9.837815193519031e-06, + "loss": 1.7155, "step": 2040 }, { "epoch": 0.17420122365737595, - "grad_norm": 7.782131671905518, - "learning_rate": 9.834804087856528e-06, - "loss": 2.2482, + "grad_norm": 7.311922550201416, + "learning_rate": 9.834047056706604e-06, + "loss": 2.2145, "step": 2050 }, { "epoch": 0.17505098572399727, - "grad_norm": 8.752131462097168, - "learning_rate": 9.831001921162913e-06, - "loss": 1.961, + "grad_norm": 8.780041694641113, + "learning_rate": 9.830236385880677e-06, + "loss": 1.9712, "step": 2060 }, { "epoch": 0.17590074779061862, - "grad_norm": 12.332489967346191, - "learning_rate": 9.827157247249463e-06, - "loss": 1.3036, + "grad_norm": 11.39024829864502, + "learning_rate": 9.826383214570738e-06, + "loss": 1.2845, "step": 2070 }, { "epoch": 0.17675050985723997, - "grad_norm": 10.847315788269043, - "learning_rate": 9.823270099944856e-06, - "loss": 1.6365, + "grad_norm": 10.047648429870605, + "learning_rate": 9.822487576680231e-06, + "loss": 1.5927, "step": 2080 }, { "epoch": 0.17760027192386132, - "grad_norm": 10.597957611083984, - "learning_rate": 9.819340513451481e-06, - "loss": 2.0563, + "grad_norm": 10.864714622497559, + "learning_rate": 9.818549506486256e-06, + "loss": 2.0479, "step": 2090 }, { "epoch": 0.17845003399048268, - "grad_norm": 6.6673383712768555, - "learning_rate": 9.81536852234515e-06, - "loss": 1.6711, + "grad_norm": 6.908188343048096, + "learning_rate": 9.81456903863927e-06, + "loss": 1.6388, "step": 2100 }, { "epoch": 0.179299796057104, - "grad_norm": 10.704626083374023, - "learning_rate": 9.81135416157478e-06, - "loss": 1.438, + "grad_norm": 9.867384910583496, + "learning_rate": 9.810546208162776e-06, + "loss": 1.4514, "step": 2110 }, { "epoch": 0.18014955812372535, - "grad_norm": 6.122239112854004, - "learning_rate": 9.807297466462096e-06, - "loss": 1.551, + "grad_norm": 6.380552768707275, + "learning_rate": 9.806481050453021e-06, + "loss": 1.5075, "step": 2120 }, { "epoch": 0.1809993201903467, - "grad_norm": 8.788558959960938, - "learning_rate": 9.803198472701316e-06, - "loss": 1.4024, + "grad_norm": 8.780299186706543, + "learning_rate": 9.802373601278685e-06, + "loss": 1.3573, "step": 2130 }, { "epoch": 0.18184908225696805, - "grad_norm": 9.57160758972168, - "learning_rate": 9.799057216358833e-06, - "loss": 1.6733, + "grad_norm": 8.982914924621582, + "learning_rate": 9.79822389678056e-06, + "loss": 1.6252, "step": 2140 }, { "epoch": 0.1826988443235894, - "grad_norm": 9.019649505615234, - "learning_rate": 9.79487373387291e-06, - "loss": 1.7135, + "grad_norm": 9.312070846557617, + "learning_rate": 9.794031973471239e-06, + "loss": 1.73, "step": 2150 }, { "epoch": 0.18354860639021073, - "grad_norm": 5.682429313659668, - "learning_rate": 9.790648062053341e-06, - "loss": 1.6766, + "grad_norm": 5.505138874053955, + "learning_rate": 9.78979786823479e-06, + "loss": 1.6867, "step": 2160 }, { "epoch": 0.18439836845683208, - "grad_norm": 6.802454948425293, - "learning_rate": 9.786380238081145e-06, - "loss": 1.4573, + "grad_norm": 6.972192764282227, + "learning_rate": 9.785521618326434e-06, + "loss": 1.4409, "step": 2170 }, { "epoch": 0.18524813052345343, - "grad_norm": 6.934159755706787, - "learning_rate": 9.782070299508228e-06, - "loss": 1.0338, + "grad_norm": 6.379241466522217, + "learning_rate": 9.781203261372216e-06, + "loss": 1.0126, "step": 2180 }, { "epoch": 0.18609789259007478, - "grad_norm": 10.261723518371582, - "learning_rate": 9.777718284257054e-06, - "loss": 1.6221, + "grad_norm": 9.349108695983887, + "learning_rate": 9.776842835368674e-06, + "loss": 1.5874, "step": 2190 }, { "epoch": 0.18694765465669613, - "grad_norm": 6.773005962371826, - "learning_rate": 9.773324230620318e-06, - "loss": 1.5327, + "grad_norm": 6.675095081329346, + "learning_rate": 9.772440378682504e-06, + "loss": 1.5113, "step": 2200 }, { "epoch": 0.18779741672331748, - "grad_norm": 10.13906478881836, - "learning_rate": 9.7688881772606e-06, - "loss": 2.1644, + "grad_norm": 10.094067573547363, + "learning_rate": 9.767995930050226e-06, + "loss": 2.129, "step": 2210 }, { "epoch": 0.1886471787899388, - "grad_norm": 5.345935344696045, - "learning_rate": 9.76441016321003e-06, - "loss": 1.3049, + "grad_norm": 4.828550815582275, + "learning_rate": 9.763509528577836e-06, + "loss": 1.2366, "step": 2220 }, { "epoch": 0.18949694085656016, - "grad_norm": 7.268871784210205, - "learning_rate": 9.759890227869944e-06, - "loss": 2.1003, + "grad_norm": 7.644665718078613, + "learning_rate": 9.758981213740472e-06, + "loss": 2.0757, "step": 2230 }, { "epoch": 0.1903467029231815, - "grad_norm": 9.787980079650879, - "learning_rate": 9.755328411010533e-06, - "loss": 1.9195, + "grad_norm": 10.053690910339355, + "learning_rate": 9.754411025382058e-06, + "loss": 1.8596, "step": 2240 }, { "epoch": 0.19119646498980286, - "grad_norm": 9.921524047851562, - "learning_rate": 9.750724752770507e-06, - "loss": 2.1153, + "grad_norm": 10.28764533996582, + "learning_rate": 9.749799003714954e-06, + "loss": 2.1074, "step": 2250 }, { "epoch": 0.1920462270564242, - "grad_norm": 9.916701316833496, - "learning_rate": 9.746545719445371e-06, - "loss": 1.5994, + "grad_norm": 9.630173683166504, + "learning_rate": 9.745145189319611e-06, + "loss": 1.5711, "step": 2260 }, { "epoch": 0.19289598912304554, - "grad_norm": 9.663291931152344, - "learning_rate": 9.741862674482556e-06, - "loss": 1.417, + "grad_norm": 9.78607177734375, + "learning_rate": 9.7404496231442e-06, + "loss": 1.3869, "step": 2270 }, { "epoch": 0.1937457511896669, - "grad_norm": 7.063597679138184, - "learning_rate": 9.737137906622003e-06, - "loss": 1.7211, + "grad_norm": 6.420983791351318, + "learning_rate": 9.735712346504267e-06, + "loss": 1.7303, "step": 2280 }, { "epoch": 0.19459551325628824, - "grad_norm": 6.238364219665527, - "learning_rate": 9.732371457436197e-06, - "loss": 1.8263, + "grad_norm": 6.115922927856445, + "learning_rate": 9.730933401082354e-06, + "loss": 1.8375, "step": 2290 }, { "epoch": 0.1954452753229096, - "grad_norm": 6.705079555511475, - "learning_rate": 9.727563368864361e-06, - "loss": 1.6932, + "grad_norm": 5.8398661613464355, + "learning_rate": 9.726112828927648e-06, + "loss": 1.6658, "step": 2300 }, { "epoch": 0.19629503738953094, - "grad_norm": 9.163618087768555, - "learning_rate": 9.722713683212113e-06, - "loss": 2.5187, + "grad_norm": 9.154995918273926, + "learning_rate": 9.721250672455596e-06, + "loss": 2.4472, "step": 2310 }, { "epoch": 0.19714479945615226, - "grad_norm": 5.289361953735352, - "learning_rate": 9.717822443151064e-06, - "loss": 1.2162, + "grad_norm": 5.0671491622924805, + "learning_rate": 9.71634697444754e-06, + "loss": 1.1964, "step": 2320 }, { "epoch": 0.19799456152277362, - "grad_norm": 8.493903160095215, - "learning_rate": 9.712889691718458e-06, - "loss": 2.1805, + "grad_norm": 8.500700950622559, + "learning_rate": 9.71140177805035e-06, + "loss": 2.1802, "step": 2330 }, { "epoch": 0.19884432358939497, - "grad_norm": 10.76265811920166, - "learning_rate": 9.707915472316796e-06, - "loss": 2.3068, + "grad_norm": 10.09263801574707, + "learning_rate": 9.706415126776014e-06, + "loss": 2.2913, "step": 2340 }, { "epoch": 0.19969408565601632, - "grad_norm": 6.735853672027588, - "learning_rate": 9.702899828713443e-06, - "loss": 1.7788, + "grad_norm": 6.817173957824707, + "learning_rate": 9.701387064501292e-06, + "loss": 1.7305, "step": 2350 }, { "epoch": 0.20054384772263767, - "grad_norm": 5.5452189445495605, - "learning_rate": 9.697842805040254e-06, - "loss": 1.2979, + "grad_norm": 5.4047064781188965, + "learning_rate": 9.696317635467304e-06, + "loss": 1.2718, "step": 2360 }, { "epoch": 0.20139360978925902, - "grad_norm": 10.43302059173584, - "learning_rate": 9.692744445793177e-06, - "loss": 2.1878, + "grad_norm": 11.601601600646973, + "learning_rate": 9.691206884279155e-06, + "loss": 2.1567, "step": 2370 }, { "epoch": 0.20224337185588034, - "grad_norm": 6.1132659912109375, - "learning_rate": 9.687604795831867e-06, - "loss": 1.5155, + "grad_norm": 5.925401210784912, + "learning_rate": 9.686054855905534e-06, + "loss": 1.4862, "step": 2380 }, { "epoch": 0.2030931339225017, - "grad_norm": 7.860180854797363, - "learning_rate": 9.68242390037929e-06, - "loss": 1.8877, + "grad_norm": 7.726446151733398, + "learning_rate": 9.68086159567832e-06, + "loss": 1.8498, "step": 2390 }, { "epoch": 0.20394289598912305, - "grad_norm": 4.963020324707031, - "learning_rate": 9.67720180502132e-06, - "loss": 2.1062, + "grad_norm": 5.089211463928223, + "learning_rate": 9.675627149292184e-06, + "loss": 2.0407, "step": 2400 }, { "epoch": 0.2047926580557444, - "grad_norm": 12.15691089630127, - "learning_rate": 9.671938555706348e-06, - "loss": 2.0619, + "grad_norm": 11.48768138885498, + "learning_rate": 9.670351562804195e-06, + "loss": 1.9914, "step": 2410 }, { "epoch": 0.20564242012236575, - "grad_norm": 9.804619789123535, - "learning_rate": 9.666634198744873e-06, - "loss": 1.8003, + "grad_norm": 9.819585800170898, + "learning_rate": 9.665034882633398e-06, + "loss": 1.7447, "step": 2420 }, { "epoch": 0.20649218218898707, - "grad_norm": 10.136634826660156, - "learning_rate": 9.661288780809086e-06, - "loss": 1.9592, + "grad_norm": 9.629562377929688, + "learning_rate": 9.65967715556042e-06, + "loss": 1.944, "step": 2430 }, { "epoch": 0.20734194425560842, - "grad_norm": 8.298238754272461, - "learning_rate": 9.655902348932474e-06, - "loss": 1.7833, + "grad_norm": 7.665591239929199, + "learning_rate": 9.654278428727053e-06, + "loss": 1.7682, "step": 2440 }, { "epoch": 0.20819170632222977, - "grad_norm": 11.206682205200195, - "learning_rate": 9.650474950509398e-06, - "loss": 2.0723, + "grad_norm": 11.004101753234863, + "learning_rate": 9.648838749635836e-06, + "loss": 2.0332, "step": 2450 }, { "epoch": 0.20904146838885113, - "grad_norm": 11.285218238830566, - "learning_rate": 9.645006633294676e-06, - "loss": 2.4516, + "grad_norm": 11.378152847290039, + "learning_rate": 9.643358166149646e-06, + "loss": 2.4602, "step": 2460 }, { "epoch": 0.20989123045547248, - "grad_norm": 11.80936336517334, - "learning_rate": 9.639497445403161e-06, - "loss": 1.7211, + "grad_norm": 10.160055160522461, + "learning_rate": 9.637836726491265e-06, + "loss": 1.6737, "step": 2470 }, { "epoch": 0.2107409925220938, - "grad_norm": 6.3373589515686035, - "learning_rate": 9.633947435309325e-06, - "loss": 1.2233, + "grad_norm": 5.931601047515869, + "learning_rate": 9.632274479242964e-06, + "loss": 1.2002, "step": 2480 }, { "epoch": 0.21159075458871515, - "grad_norm": 7.767312049865723, - "learning_rate": 9.62835665184683e-06, - "loss": 2.0983, + "grad_norm": 7.314711570739746, + "learning_rate": 9.62667147334608e-06, + "loss": 2.0536, "step": 2490 }, { "epoch": 0.2124405166553365, - "grad_norm": 5.433526992797852, - "learning_rate": 9.62272514420809e-06, - "loss": 1.2546, + "grad_norm": 5.574723243713379, + "learning_rate": 9.621027758100567e-06, + "loss": 1.2564, "step": 2500 }, { "epoch": 0.21329027872195785, - "grad_norm": 8.995706558227539, - "learning_rate": 9.617052961943848e-06, - "loss": 1.8098, + "grad_norm": 8.82634162902832, + "learning_rate": 9.615343383164587e-06, + "loss": 1.7968, "step": 2510 }, { "epoch": 0.2141400407885792, - "grad_norm": 7.032045841217041, - "learning_rate": 9.611340154962735e-06, - "loss": 1.8222, + "grad_norm": 5.783646583557129, + "learning_rate": 9.609618398554046e-06, + "loss": 1.7934, "step": 2520 }, { "epoch": 0.21498980285520056, - "grad_norm": 7.42227840423584, - "learning_rate": 9.605586773530837e-06, - "loss": 1.4316, + "grad_norm": 7.303430557250977, + "learning_rate": 9.603852854642183e-06, + "loss": 1.3855, "step": 2530 }, { "epoch": 0.21583956492182188, - "grad_norm": 8.21651554107666, - "learning_rate": 9.599792868271242e-06, - "loss": 1.5401, + "grad_norm": 7.56771183013916, + "learning_rate": 9.5980468021591e-06, + "loss": 1.5086, "step": 2540 }, { "epoch": 0.21668932698844323, - "grad_norm": 9.704622268676758, - "learning_rate": 9.593958490163604e-06, - "loss": 2.347, + "grad_norm": 9.690388679504395, + "learning_rate": 9.592200292191333e-06, + "loss": 2.3278, "step": 2550 }, { "epoch": 0.21753908905506458, - "grad_norm": 9.580229759216309, - "learning_rate": 9.588083690543694e-06, - "loss": 1.6326, + "grad_norm": 9.153576850891113, + "learning_rate": 9.586313376181398e-06, + "loss": 1.62, "step": 2560 }, { "epoch": 0.21838885112168593, - "grad_norm": 10.13274097442627, - "learning_rate": 9.58216852110294e-06, - "loss": 2.0589, + "grad_norm": 9.105634689331055, + "learning_rate": 9.580386105927336e-06, + "loss": 2.0118, "step": 2570 }, { "epoch": 0.21923861318830729, - "grad_norm": 9.455711364746094, - "learning_rate": 9.576213033887978e-06, - "loss": 1.7676, + "grad_norm": 10.291397094726562, + "learning_rate": 9.574418533582253e-06, + "loss": 1.7665, "step": 2580 }, { "epoch": 0.2200883752549286, - "grad_norm": 7.641183853149414, - "learning_rate": 9.570217281300198e-06, - "loss": 1.3967, + "grad_norm": 7.565315246582031, + "learning_rate": 9.568410711653876e-06, + "loss": 1.4106, "step": 2590 }, { "epoch": 0.22093813732154996, - "grad_norm": 9.368104934692383, - "learning_rate": 9.564181316095275e-06, - "loss": 2.0463, + "grad_norm": 9.466012954711914, + "learning_rate": 9.562362693004076e-06, + "loss": 2.0529, "step": 2600 }, { "epoch": 0.2217878993881713, - "grad_norm": 10.192388534545898, - "learning_rate": 9.55810519138271e-06, - "loss": 1.5589, + "grad_norm": 9.214174270629883, + "learning_rate": 9.55627453084841e-06, + "loss": 1.5266, "step": 2610 }, { "epoch": 0.22263766145479266, - "grad_norm": 6.582274436950684, - "learning_rate": 9.551988960625363e-06, - "loss": 2.1871, + "grad_norm": 6.392216682434082, + "learning_rate": 9.550146278755654e-06, + "loss": 2.2004, "step": 2620 }, { "epoch": 0.223487423521414, - "grad_norm": 9.636944770812988, - "learning_rate": 9.545832677638975e-06, - "loss": 1.2149, + "grad_norm": 9.040064811706543, + "learning_rate": 9.543977990647323e-06, + "loss": 1.2109, "step": 2630 }, { "epoch": 0.22433718558803534, - "grad_norm": 9.454007148742676, - "learning_rate": 9.539636396591705e-06, - "loss": 1.4454, + "grad_norm": 9.503253936767578, + "learning_rate": 9.537769720797207e-06, + "loss": 1.4509, "step": 2640 }, { "epoch": 0.2251869476546567, - "grad_norm": 8.678142547607422, - "learning_rate": 9.533400172003651e-06, - "loss": 1.5395, + "grad_norm": 8.925986289978027, + "learning_rate": 9.53152152383089e-06, + "loss": 1.494, "step": 2650 }, { "epoch": 0.22603670972127804, - "grad_norm": 7.63258695602417, - "learning_rate": 9.527124058746358e-06, - "loss": 1.5236, + "grad_norm": 7.668337821960449, + "learning_rate": 9.525233454725262e-06, + "loss": 1.5459, "step": 2660 }, { "epoch": 0.2268864717878994, - "grad_norm": 7.889406204223633, - "learning_rate": 9.520808112042354e-06, - "loss": 2.0349, + "grad_norm": 7.511707305908203, + "learning_rate": 9.51890556880805e-06, + "loss": 2.0089, "step": 2670 }, { "epoch": 0.22773623385452074, - "grad_norm": 6.830529689788818, - "learning_rate": 9.514452387464656e-06, - "loss": 2.001, + "grad_norm": 6.789205551147461, + "learning_rate": 9.512537921757317e-06, + "loss": 1.9762, "step": 2680 }, { "epoch": 0.2285859959211421, - "grad_norm": 9.945779800415039, - "learning_rate": 9.508056940936273e-06, - "loss": 1.3894, + "grad_norm": 10.295784950256348, + "learning_rate": 9.50613056960098e-06, + "loss": 1.3596, "step": 2690 }, { "epoch": 0.22943575798776342, - "grad_norm": 5.824800968170166, - "learning_rate": 9.501621828729724e-06, - "loss": 1.5097, + "grad_norm": 5.897681713104248, + "learning_rate": 9.499683568716313e-06, + "loss": 1.5094, "step": 2700 }, { "epoch": 0.23028552005438477, - "grad_norm": 11.277963638305664, - "learning_rate": 9.495147107466543e-06, - "loss": 1.754, + "grad_norm": 10.942471504211426, + "learning_rate": 9.493196975829457e-06, + "loss": 1.7427, "step": 2710 }, { "epoch": 0.23113528212100612, - "grad_norm": 8.04029655456543, - "learning_rate": 9.488632834116774e-06, - "loss": 1.3568, + "grad_norm": 7.598457336425781, + "learning_rate": 9.486670848014912e-06, + "loss": 1.354, "step": 2720 }, { "epoch": 0.23198504418762747, - "grad_norm": 12.442475318908691, - "learning_rate": 9.482079065998477e-06, - "loss": 2.0414, + "grad_norm": 12.048646926879883, + "learning_rate": 9.480105242695044e-06, + "loss": 1.9882, "step": 2730 }, { "epoch": 0.23283480625424882, - "grad_norm": 6.784972190856934, - "learning_rate": 9.475485860777219e-06, - "loss": 1.4016, + "grad_norm": 7.270595073699951, + "learning_rate": 9.473500217639571e-06, + "loss": 1.3848, "step": 2740 }, { "epoch": 0.23368456832087015, - "grad_norm": 7.946235656738281, - "learning_rate": 9.468853276465564e-06, - "loss": 1.6315, + "grad_norm": 8.0617036819458, + "learning_rate": 9.466855830965064e-06, + "loss": 1.6313, "step": 2750 }, { "epoch": 0.2345343303874915, - "grad_norm": 8.815011024475098, - "learning_rate": 9.462181371422572e-06, - "loss": 1.8147, + "grad_norm": 8.679147720336914, + "learning_rate": 9.460172141134423e-06, + "loss": 1.7722, "step": 2760 }, { "epoch": 0.23538409245411285, - "grad_norm": 9.286500930786133, - "learning_rate": 9.455470204353277e-06, - "loss": 1.3025, + "grad_norm": 8.749470710754395, + "learning_rate": 9.45344920695638e-06, + "loss": 1.2339, "step": 2770 }, { "epoch": 0.2362338545207342, - "grad_norm": 5.141117572784424, - "learning_rate": 9.448719834308175e-06, - "loss": 1.3445, + "grad_norm": 5.062926292419434, + "learning_rate": 9.446687087584962e-06, + "loss": 1.3144, "step": 2780 }, { "epoch": 0.23708361658735555, - "grad_norm": 9.371500968933105, - "learning_rate": 9.4419303206827e-06, - "loss": 1.741, + "grad_norm": 8.712714195251465, + "learning_rate": 9.43988584251899e-06, + "loss": 1.7124, "step": 2790 }, { "epoch": 0.23793337865397687, - "grad_norm": 10.242998123168945, - "learning_rate": 9.435101723216703e-06, - "loss": 1.8655, + "grad_norm": 10.875704765319824, + "learning_rate": 9.433045531601538e-06, + "loss": 1.8489, "step": 2800 }, { "epoch": 0.23878314072059822, - "grad_norm": 6.816215515136719, - "learning_rate": 9.428234101993938e-06, - "loss": 1.4883, + "grad_norm": 6.526149272918701, + "learning_rate": 9.426166215019417e-06, + "loss": 1.4535, "step": 2810 }, { "epoch": 0.23963290278721958, - "grad_norm": 6.136675834655762, - "learning_rate": 9.421327517441509e-06, - "loss": 1.6449, + "grad_norm": 6.165802001953125, + "learning_rate": 9.41924795330264e-06, + "loss": 1.6224, "step": 2820 }, { "epoch": 0.24048266485384093, - "grad_norm": 10.119004249572754, - "learning_rate": 9.414382030329358e-06, - "loss": 1.662, + "grad_norm": 8.270697593688965, + "learning_rate": 9.412290807323903e-06, + "loss": 1.6815, "step": 2830 }, { "epoch": 0.24133242692046228, - "grad_norm": 6.952613830566406, - "learning_rate": 9.40739770176973e-06, - "loss": 1.2729, + "grad_norm": 6.400485992431641, + "learning_rate": 9.405294838298023e-06, + "loss": 1.2336, "step": 2840 }, { "epoch": 0.24218218898708363, - "grad_norm": 9.02924633026123, - "learning_rate": 9.400374593216617e-06, - "loss": 1.518, + "grad_norm": 8.567676544189453, + "learning_rate": 9.398260107781422e-06, + "loss": 1.4843, "step": 2850 }, { "epoch": 0.24303195105370495, - "grad_norm": 5.770287990570068, - "learning_rate": 9.393312766465245e-06, - "loss": 1.3131, + "grad_norm": 5.906859874725342, + "learning_rate": 9.391186677671585e-06, + "loss": 1.295, "step": 2860 }, { "epoch": 0.2438817131203263, - "grad_norm": 8.95716667175293, - "learning_rate": 9.386212283651503e-06, - "loss": 1.6467, + "grad_norm": 9.095629692077637, + "learning_rate": 9.384074610206495e-06, + "loss": 1.6095, "step": 2870 }, { "epoch": 0.24473147518694766, - "grad_norm": 10.637526512145996, - "learning_rate": 9.379073207251414e-06, - "loss": 1.8138, + "grad_norm": 10.681280136108398, + "learning_rate": 9.376923967964108e-06, + "loss": 1.7894, "step": 2880 }, { "epoch": 0.245581237253569, - "grad_norm": 8.24285888671875, - "learning_rate": 9.37189560008058e-06, - "loss": 1.664, + "grad_norm": 7.8013458251953125, + "learning_rate": 9.369734813861791e-06, + "loss": 1.6503, "step": 2890 }, { "epoch": 0.24643099932019036, - "grad_norm": 4.791931629180908, - "learning_rate": 9.364679525293628e-06, - "loss": 1.6083, + "grad_norm": 4.590408802032471, + "learning_rate": 9.362507211155774e-06, + "loss": 1.6089, "step": 2900 }, { "epoch": 0.24728076138681168, - "grad_norm": 9.948259353637695, - "learning_rate": 9.357425046383658e-06, - "loss": 1.8901, + "grad_norm": 9.771063804626465, + "learning_rate": 9.355241223440582e-06, + "loss": 1.8407, "step": 2910 }, { "epoch": 0.24813052345343303, - "grad_norm": 7.875885963439941, - "learning_rate": 9.350132227181675e-06, - "loss": 1.5651, + "grad_norm": 8.298754692077637, + "learning_rate": 9.347936914648492e-06, + "loss": 1.5631, "step": 2920 }, { "epoch": 0.24898028552005438, - "grad_norm": 7.465170860290527, - "learning_rate": 9.34280113185604e-06, - "loss": 1.477, + "grad_norm": 7.540797710418701, + "learning_rate": 9.340594349048957e-06, + "loss": 1.4495, "step": 2930 }, { "epoch": 0.24983004758667574, - "grad_norm": 7.039208889007568, - "learning_rate": 9.335431824911903e-06, - "loss": 2.043, + "grad_norm": 7.5335774421691895, + "learning_rate": 9.33321359124805e-06, + "loss": 2.0262, "step": 2940 }, { "epoch": 0.25067980965329706, - "grad_norm": 9.442414283752441, - "learning_rate": 9.32802437119062e-06, - "loss": 1.7563, + "grad_norm": 9.183195114135742, + "learning_rate": 9.325794706187885e-06, + "loss": 1.7444, "step": 2950 }, { "epoch": 0.2515295717199184, - "grad_norm": 6.793369293212891, - "learning_rate": 9.320578835869207e-06, - "loss": 1.1634, + "grad_norm": 7.190180778503418, + "learning_rate": 9.318337759146059e-06, + "loss": 1.1065, "step": 2960 }, { "epoch": 0.25237933378653976, - "grad_norm": 8.331452369689941, - "learning_rate": 9.313095284459748e-06, - "loss": 2.095, + "grad_norm": 8.939278602600098, + "learning_rate": 9.310842815735064e-06, + "loss": 2.1085, "step": 2970 }, { "epoch": 0.2532290958531611, - "grad_norm": 9.370742797851562, - "learning_rate": 9.305573782808823e-06, - "loss": 1.9133, + "grad_norm": 9.265079498291016, + "learning_rate": 9.303309941901717e-06, + "loss": 1.8828, "step": 2980 }, { "epoch": 0.25407885791978246, - "grad_norm": 9.510766983032227, - "learning_rate": 9.298014397096933e-06, - "loss": 1.9891, + "grad_norm": 9.620991706848145, + "learning_rate": 9.295739203926585e-06, + "loss": 1.9617, "step": 2990 }, { "epoch": 0.2549286199864038, - "grad_norm": 9.61568832397461, - "learning_rate": 9.290417193837915e-06, - "loss": 2.1321, + "grad_norm": 9.919524192810059, + "learning_rate": 9.28813066842339e-06, + "loss": 2.1222, "step": 3000 }, { "epoch": 0.2549286199864038, - "eval_cosine_accuracy@1": 0.6865, - "eval_cosine_accuracy@10": 0.907, - "eval_cosine_accuracy@3": 0.8225, - "eval_cosine_accuracy@5": 0.8645, - "eval_cosine_map@100": 0.7675185021946501, - "eval_cosine_mrr@10": 0.7637712301587296, - "eval_cosine_ndcg@10": 0.7986846768080416, - "eval_cosine_precision@1": 0.6865, - "eval_cosine_precision@10": 0.09070000000000002, - "eval_cosine_precision@3": 0.2741666666666666, - "eval_cosine_precision@5": 0.17290000000000003, - "eval_cosine_recall@1": 0.6865, - "eval_cosine_recall@10": 0.907, - "eval_cosine_recall@3": 0.8225, - "eval_cosine_recall@5": 0.8645, - "eval_loss": 1.5350741147994995, - "eval_runtime": 2.7867, - "eval_samples_per_second": 270.575, - "eval_sequential_score": 0.7675185021946501, - "eval_steps_per_second": 4.306, - "eval_sts-dev_pearson_cosine": 0.8008188909405103, - "eval_sts-dev_pearson_dot": 0.7952079356521629, - "eval_sts-dev_pearson_euclidean": 0.7911834415414336, - "eval_sts-dev_pearson_manhattan": 0.7923055504058029, - "eval_sts-dev_pearson_max": 0.8008188909405103, - "eval_sts-dev_spearman_cosine": 0.8069312452792128, - "eval_sts-dev_spearman_dot": 0.7999522326961347, - "eval_sts-dev_spearman_euclidean": 0.8038367239089573, - "eval_sts-dev_spearman_manhattan": 0.8065221286368232, - "eval_sts-dev_spearman_max": 0.8069312452792128, + "eval_cosine_accuracy@1": 0.6915, + "eval_cosine_accuracy@10": 0.912, + "eval_cosine_accuracy@3": 0.8245, + "eval_cosine_accuracy@5": 0.8675, + "eval_cosine_map@100": 0.7715895898102414, + "eval_cosine_mrr@10": 0.7680996031746029, + "eval_cosine_ndcg@10": 0.8030586876970981, + "eval_cosine_precision@1": 0.6915, + "eval_cosine_precision@10": 0.09120000000000002, + "eval_cosine_precision@3": 0.2748333333333333, + "eval_cosine_precision@5": 0.1735, + "eval_cosine_recall@1": 0.6915, + "eval_cosine_recall@10": 0.912, + "eval_cosine_recall@3": 0.8245, + "eval_cosine_recall@5": 0.8675, + "eval_loss": 1.5225398540496826, + "eval_runtime": 2.6738, + "eval_samples_per_second": 281.993, + "eval_sequential_score": 0.7715895898102414, + "eval_steps_per_second": 4.488, + "eval_sts-dev_pearson_cosine": 0.7941967730468217, + "eval_sts-dev_pearson_dot": 0.7889453864017859, + "eval_sts-dev_pearson_euclidean": 0.7876984876552253, + "eval_sts-dev_pearson_manhattan": 0.7874895502329541, + "eval_sts-dev_pearson_max": 0.7941967730468217, + "eval_sts-dev_spearman_cosine": 0.7985243596878527, + "eval_sts-dev_spearman_dot": 0.791432702582675, + "eval_sts-dev_spearman_euclidean": 0.7975527076621773, + "eval_sts-dev_spearman_manhattan": 0.7978376281809844, + "eval_sts-dev_spearman_max": 0.7985243596878527, "step": 3000 }, { "epoch": 0.25577838205302517, - "grad_norm": 7.736785888671875, - "learning_rate": 9.282782239878354e-06, - "loss": 1.8755, + "grad_norm": 7.191762924194336, + "learning_rate": 9.280484402338424e-06, + "loss": 1.8215, "step": 3010 }, { "epoch": 0.2566281441196465, - "grad_norm": 9.573341369628906, - "learning_rate": 9.275109602397003e-06, - "loss": 2.3428, + "grad_norm": 9.23027229309082, + "learning_rate": 9.272800472949976e-06, + "loss": 2.3271, "step": 3020 }, { "epoch": 0.25747790618626787, - "grad_norm": 10.329427719116211, - "learning_rate": 9.267399348904178e-06, - "loss": 1.3314, + "grad_norm": 10.632865905761719, + "learning_rate": 9.265078947867719e-06, + "loss": 1.3244, "step": 3030 }, { "epoch": 0.25832766825288916, - "grad_norm": 7.623870372772217, - "learning_rate": 9.25965154724118e-06, - "loss": 1.5536, + "grad_norm": 7.0926923751831055, + "learning_rate": 9.257319895032128e-06, + "loss": 1.5012, "step": 3040 }, { "epoch": 0.2591774303195105, - "grad_norm": 8.167257308959961, - "learning_rate": 9.251866265579685e-06, - "loss": 1.7259, + "grad_norm": 8.035869598388672, + "learning_rate": 9.249523382713882e-06, + "loss": 1.7094, "step": 3050 }, { "epoch": 0.26002719238613187, - "grad_norm": 8.580568313598633, - "learning_rate": 9.244043572421155e-06, - "loss": 1.7929, + "grad_norm": 8.490530967712402, + "learning_rate": 9.241689479513252e-06, + "loss": 1.7635, "step": 3060 }, { "epoch": 0.2608769544527532, - "grad_norm": 9.876664161682129, - "learning_rate": 9.23618353659622e-06, - "loss": 1.4687, + "grad_norm": 9.66800308227539, + "learning_rate": 9.23381825435951e-06, + "loss": 1.4024, "step": 3070 }, { "epoch": 0.26172671651937457, - "grad_norm": 6.559403896331787, - "learning_rate": 9.228286227264098e-06, - "loss": 1.9342, + "grad_norm": 5.634133338928223, + "learning_rate": 9.225909776510321e-06, + "loss": 1.8977, "step": 3080 }, { "epoch": 0.2625764785859959, - "grad_norm": 7.239197731018066, - "learning_rate": 9.220351713911955e-06, - "loss": 1.5374, + "grad_norm": 7.532492160797119, + "learning_rate": 9.217964115551127e-06, + "loss": 1.4965, "step": 3090 }, { "epoch": 0.26342624065261727, - "grad_norm": 6.916477203369141, - "learning_rate": 9.212380066354319e-06, - "loss": 1.9888, + "grad_norm": 8.258689880371094, + "learning_rate": 9.209981341394537e-06, + "loss": 1.986, "step": 3100 }, { "epoch": 0.2642760027192386, - "grad_norm": 7.113123416900635, - "learning_rate": 9.20437135473245e-06, - "loss": 1.7331, + "grad_norm": 7.528514385223389, + "learning_rate": 9.201961524279717e-06, + "loss": 1.6921, "step": 3110 }, { "epoch": 0.26512576478586, - "grad_norm": 5.012565612792969, - "learning_rate": 9.196325649513733e-06, - "loss": 1.1483, + "grad_norm": 4.7679829597473145, + "learning_rate": 9.193904734771764e-06, + "loss": 1.1191, "step": 3120 }, { "epoch": 0.2659755268524813, - "grad_norm": 8.476727485656738, - "learning_rate": 9.188243021491053e-06, - "loss": 1.5639, + "grad_norm": 8.68427562713623, + "learning_rate": 9.185811043761093e-06, + "loss": 1.5588, "step": 3130 }, { "epoch": 0.2668252889191027, - "grad_norm": 9.976286888122559, - "learning_rate": 9.180123541782172e-06, - "loss": 2.3273, + "grad_norm": 9.816774368286133, + "learning_rate": 9.177680522462806e-06, + "loss": 2.2996, "step": 3140 }, { "epoch": 0.267675050985724, - "grad_norm": 9.127235412597656, - "learning_rate": 9.171967281829101e-06, - "loss": 1.3604, + "grad_norm": 9.342156410217285, + "learning_rate": 9.169513242416072e-06, + "loss": 1.3422, "step": 3150 }, { "epoch": 0.2685248130523453, - "grad_norm": 8.79888916015625, - "learning_rate": 9.163774313397485e-06, - "loss": 2.005, + "grad_norm": 8.452969551086426, + "learning_rate": 9.161309275483491e-06, + "loss": 1.9579, "step": 3160 }, { "epoch": 0.2693745751189667, - "grad_norm": 9.516885757446289, - "learning_rate": 9.155544708575947e-06, - "loss": 1.0727, + "grad_norm": 9.451214790344238, + "learning_rate": 9.153068693850472e-06, + "loss": 1.0521, "step": 3170 }, { "epoch": 0.270224337185588, - "grad_norm": 7.4066691398620605, - "learning_rate": 9.147278539775479e-06, - "loss": 1.9414, + "grad_norm": 7.481897354125977, + "learning_rate": 9.144791570024582e-06, + "loss": 1.8859, "step": 3180 }, { "epoch": 0.2710740992522094, - "grad_norm": 8.429295539855957, - "learning_rate": 9.13897587972879e-06, - "loss": 1.5934, + "grad_norm": 8.855888366699219, + "learning_rate": 9.13647797683492e-06, + "loss": 1.6077, "step": 3190 }, { "epoch": 0.27192386131883073, - "grad_norm": 4.609223365783691, - "learning_rate": 9.130636801489665e-06, - "loss": 1.027, + "grad_norm": 4.496568202972412, + "learning_rate": 9.12812798743148e-06, + "loss": 1.0576, "step": 3200 }, { "epoch": 0.2727736233854521, - "grad_norm": 8.485708236694336, - "learning_rate": 9.122261378432336e-06, - "loss": 1.5364, + "grad_norm": 8.572591781616211, + "learning_rate": 9.119741675284487e-06, + "loss": 1.527, "step": 3210 }, { "epoch": 0.27362338545207343, - "grad_norm": 6.669787406921387, - "learning_rate": 9.113849684250818e-06, - "loss": 1.2373, + "grad_norm": 6.622188568115234, + "learning_rate": 9.111319114183776e-06, + "loss": 1.2154, "step": 3220 }, { "epoch": 0.2744731475186948, - "grad_norm": 8.848769187927246, - "learning_rate": 9.105401792958278e-06, - "loss": 1.6682, + "grad_norm": 9.572870254516602, + "learning_rate": 9.102860378238127e-06, + "loss": 1.6487, "step": 3230 }, { "epoch": 0.27532290958531613, - "grad_norm": 8.608809471130371, - "learning_rate": 9.09691777888637e-06, - "loss": 1.9316, + "grad_norm": 8.45828628540039, + "learning_rate": 9.094365541874615e-06, + "loss": 1.918, "step": 3240 }, { "epoch": 0.2761726716519375, - "grad_norm": 8.1502685546875, - "learning_rate": 9.088397716684586e-06, - "loss": 1.8804, + "grad_norm": 8.160860061645508, + "learning_rate": 9.085834679837957e-06, + "loss": 1.8735, "step": 3250 }, { "epoch": 0.2770224337185588, - "grad_norm": 8.826888084411621, - "learning_rate": 9.079841681319607e-06, - "loss": 2.5283, + "grad_norm": 8.58696460723877, + "learning_rate": 9.077267867189858e-06, + "loss": 2.508, "step": 3260 }, { "epoch": 0.27787219578518013, - "grad_norm": 11.749123573303223, - "learning_rate": 9.071249748074628e-06, - "loss": 1.5819, + "grad_norm": 10.723849296569824, + "learning_rate": 9.068665179308343e-06, + "loss": 1.5813, "step": 3270 }, { "epoch": 0.2787219578518015, - "grad_norm": 4.572829723358154, - "learning_rate": 9.062621992548711e-06, - "loss": 1.3799, + "grad_norm": 4.296560287475586, + "learning_rate": 9.060026691887097e-06, + "loss": 1.3501, "step": 3280 }, { "epoch": 0.27957171991842283, - "grad_norm": 8.494523048400879, - "learning_rate": 9.053958490656103e-06, - "loss": 1.3378, + "grad_norm": 7.842785358428955, + "learning_rate": 9.051352480934802e-06, + "loss": 1.364, "step": 3290 }, { "epoch": 0.2804214819850442, - "grad_norm": 6.4082932472229, - "learning_rate": 9.045259318625588e-06, - "loss": 1.5859, + "grad_norm": 6.360820293426514, + "learning_rate": 9.042642622774465e-06, + "loss": 1.5669, "step": 3300 }, { "epoch": 0.28127124405166554, - "grad_norm": 6.964341163635254, - "learning_rate": 9.0365245529998e-06, - "loss": 1.2679, + "grad_norm": 7.005684852600098, + "learning_rate": 9.033897194042746e-06, + "loss": 1.2687, "step": 3310 }, { "epoch": 0.2821210061182869, - "grad_norm": 9.248893737792969, - "learning_rate": 9.027754270634554e-06, - "loss": 1.9714, + "grad_norm": 9.032875061035156, + "learning_rate": 9.025116271689287e-06, + "loss": 1.9495, "step": 3320 }, { "epoch": 0.28297076818490824, - "grad_norm": 10.546152114868164, - "learning_rate": 9.01894854869818e-06, - "loss": 1.1809, + "grad_norm": 9.786654472351074, + "learning_rate": 9.016299932976032e-06, + "loss": 1.1315, "step": 3330 }, { "epoch": 0.2838205302515296, - "grad_norm": 6.565592288970947, - "learning_rate": 9.010107464670822e-06, - "loss": 0.9777, + "grad_norm": 6.246298789978027, + "learning_rate": 9.007448255476544e-06, + "loss": 0.9636, "step": 3340 }, { "epoch": 0.28467029231815094, - "grad_norm": 7.881227016448975, - "learning_rate": 9.001231096343781e-06, - "loss": 1.3289, + "grad_norm": 7.677283763885498, + "learning_rate": 8.998561317075332e-06, + "loss": 1.3071, "step": 3350 }, { "epoch": 0.28552005438477224, - "grad_norm": 7.000848293304443, - "learning_rate": 8.992319521818812e-06, - "loss": 1.3455, + "grad_norm": 7.165188789367676, + "learning_rate": 8.989639195967157e-06, + "loss": 1.3237, "step": 3360 }, { "epoch": 0.2863698164513936, - "grad_norm": 10.600214958190918, - "learning_rate": 8.983372819507447e-06, - "loss": 2.1545, + "grad_norm": 9.816590309143066, + "learning_rate": 8.980681970656349e-06, + "loss": 2.1571, "step": 3370 }, { "epoch": 0.28721957851801494, - "grad_norm": 8.424556732177734, - "learning_rate": 8.974391068130301e-06, - "loss": 1.5625, + "grad_norm": 8.933812141418457, + "learning_rate": 8.971689719956112e-06, + "loss": 1.5394, "step": 3380 }, { "epoch": 0.2880693405846363, - "grad_norm": 7.668473720550537, - "learning_rate": 8.965374346716382e-06, - "loss": 1.5034, + "grad_norm": 7.159363746643066, + "learning_rate": 8.962662522987833e-06, + "loss": 1.493, "step": 3390 }, { "epoch": 0.28891910265125764, - "grad_norm": 9.721426963806152, - "learning_rate": 8.956322734602388e-06, - "loss": 1.8048, + "grad_norm": 9.511648178100586, + "learning_rate": 8.95360045918039e-06, + "loss": 1.8023, "step": 3400 }, { "epoch": 0.289768864717879, - "grad_norm": 8.593113899230957, - "learning_rate": 8.947236311432019e-06, - "loss": 2.0181, + "grad_norm": 8.576047897338867, + "learning_rate": 8.94450360826944e-06, + "loss": 1.9951, "step": 3410 }, { "epoch": 0.29061862678450034, - "grad_norm": 6.665369033813477, - "learning_rate": 8.938115157155275e-06, - "loss": 1.5169, + "grad_norm": 6.648324966430664, + "learning_rate": 8.935372050296738e-06, + "loss": 1.4618, "step": 3420 }, { "epoch": 0.2914683888511217, - "grad_norm": 12.818399429321289, - "learning_rate": 8.928959352027745e-06, - "loss": 1.5428, + "grad_norm": 9.013810157775879, + "learning_rate": 8.92620586560941e-06, + "loss": 1.5207, "step": 3430 }, { "epoch": 0.29231815091774305, - "grad_norm": 8.738973617553711, - "learning_rate": 8.919768976609906e-06, - "loss": 1.8036, + "grad_norm": 8.857348442077637, + "learning_rate": 8.917005134859263e-06, + "loss": 1.8013, "step": 3440 }, { "epoch": 0.2931679129843644, - "grad_norm": 9.629112243652344, - "learning_rate": 8.910544111766413e-06, - "loss": 1.5026, + "grad_norm": 9.265905380249023, + "learning_rate": 8.907769939002068e-06, + "loss": 1.4841, "step": 3450 }, { "epoch": 0.29401767505098575, - "grad_norm": 6.802762031555176, - "learning_rate": 8.901284838665388e-06, - "loss": 2.1377, + "grad_norm": 6.705354690551758, + "learning_rate": 8.89850035929685e-06, + "loss": 2.1567, "step": 3460 }, { "epoch": 0.29486743711760705, - "grad_norm": 7.883736610412598, - "learning_rate": 8.89199123877771e-06, - "loss": 1.7275, + "grad_norm": 7.624939918518066, + "learning_rate": 8.889196477305168e-06, + "loss": 1.7638, "step": 3470 }, { "epoch": 0.2957171991842284, - "grad_norm": 7.158336162567139, - "learning_rate": 8.882663393876288e-06, - "loss": 1.4765, + "grad_norm": 6.322500228881836, + "learning_rate": 8.879858374890409e-06, + "loss": 1.4507, "step": 3480 }, { "epoch": 0.29656696125084975, - "grad_norm": 8.003816604614258, - "learning_rate": 8.873301386035352e-06, - "loss": 2.1761, + "grad_norm": 7.840251922607422, + "learning_rate": 8.87048613421705e-06, + "loss": 2.1364, "step": 3490 }, { "epoch": 0.2974167233174711, - "grad_norm": 6.995957851409912, - "learning_rate": 8.863905297629724e-06, - "loss": 1.3884, + "grad_norm": 6.594165802001953, + "learning_rate": 8.861079837749952e-06, + "loss": 1.3655, "step": 3500 }, { "epoch": 0.29826648538409245, - "grad_norm": 12.178868293762207, - "learning_rate": 8.854475211334095e-06, - "loss": 1.1503, + "grad_norm": 12.44531536102295, + "learning_rate": 8.85163956825363e-06, + "loss": 1.147, "step": 3510 }, { "epoch": 0.2991162474507138, - "grad_norm": 9.340962409973145, - "learning_rate": 8.845011210122299e-06, - "loss": 1.9235, + "grad_norm": 9.974011421203613, + "learning_rate": 8.842165408791515e-06, + "loss": 1.8986, "step": 3520 }, { "epoch": 0.29996600951733515, - "grad_norm": 7.180028915405273, - "learning_rate": 8.835513377266582e-06, - "loss": 1.6309, + "grad_norm": 7.055538654327393, + "learning_rate": 8.832657442725233e-06, + "loss": 1.6014, "step": 3530 }, { "epoch": 0.3008157715839565, - "grad_norm": 8.44916820526123, - "learning_rate": 8.825981796336871e-06, - "loss": 1.3259, + "grad_norm": 8.148590087890625, + "learning_rate": 8.823115753713868e-06, + "loss": 1.2619, "step": 3540 }, { "epoch": 0.30166553365057786, - "grad_norm": 9.889388084411621, - "learning_rate": 8.816416551200034e-06, - "loss": 1.3987, + "grad_norm": 8.987916946411133, + "learning_rate": 8.813540425713234e-06, + "loss": 1.3716, "step": 3550 }, { "epoch": 0.3025152957171992, - "grad_norm": 9.106419563293457, - "learning_rate": 8.806817726019144e-06, - "loss": 1.5873, + "grad_norm": 9.050952911376953, + "learning_rate": 8.803931542975121e-06, + "loss": 1.5904, "step": 3560 }, { "epoch": 0.30336505778382056, - "grad_norm": 11.503569602966309, - "learning_rate": 8.797185405252744e-06, - "loss": 1.7668, + "grad_norm": 10.804072380065918, + "learning_rate": 8.794289190046566e-06, + "loss": 1.726, "step": 3570 }, { "epoch": 0.30421481985044185, - "grad_norm": 5.427318096160889, - "learning_rate": 8.787519673654097e-06, - "loss": 1.6178, + "grad_norm": 5.2469658851623535, + "learning_rate": 8.784613451769098e-06, + "loss": 1.6235, "step": 3580 }, { "epoch": 0.3050645819170632, - "grad_norm": 9.168242454528809, - "learning_rate": 8.777820616270443e-06, - "loss": 1.7817, + "grad_norm": 9.142160415649414, + "learning_rate": 8.774904413278011e-06, + "loss": 1.7598, "step": 3590 }, { "epoch": 0.30591434398368456, - "grad_norm": 9.649458885192871, - "learning_rate": 8.768088318442247e-06, - "loss": 1.8823, + "grad_norm": 10.596346855163574, + "learning_rate": 8.76516216000159e-06, + "loss": 1.8795, "step": 3600 }, { "epoch": 0.3067641060503059, - "grad_norm": 8.845854759216309, - "learning_rate": 8.758322865802458e-06, - "loss": 1.6265, + "grad_norm": 8.566193580627441, + "learning_rate": 8.755386777660379e-06, + "loss": 1.6107, "step": 3610 }, { "epoch": 0.30761386811692726, - "grad_norm": 8.783435821533203, - "learning_rate": 8.748524344275744e-06, - "loss": 1.3581, + "grad_norm": 8.57463264465332, + "learning_rate": 8.745578352266416e-06, + "loss": 1.3525, "step": 3620 }, { "epoch": 0.3084636301835486, - "grad_norm": 8.010035514831543, - "learning_rate": 8.738692840077739e-06, - "loss": 1.8566, + "grad_norm": 8.3265380859375, + "learning_rate": 8.735736970122481e-06, + "loss": 1.8275, "step": 3630 }, { "epoch": 0.30931339225016996, - "grad_norm": 4.803843975067139, - "learning_rate": 8.728828439714295e-06, - "loss": 1.35, + "grad_norm": 4.678050994873047, + "learning_rate": 8.725862717821336e-06, + "loss": 1.333, "step": 3640 }, { "epoch": 0.3101631543167913, - "grad_norm": 8.378717422485352, - "learning_rate": 8.718931229980701e-06, - "loss": 1.7068, + "grad_norm": 8.182330131530762, + "learning_rate": 8.71595568224496e-06, + "loss": 1.6917, "step": 3650 }, { "epoch": 0.31101291638341266, - "grad_norm": 5.423689365386963, - "learning_rate": 8.709001297960941e-06, - "loss": 1.6618, + "grad_norm": 5.293543338775635, + "learning_rate": 8.706015950563789e-06, + "loss": 1.6108, "step": 3660 }, { "epoch": 0.311862678450034, - "grad_norm": 9.354557037353516, - "learning_rate": 8.69903873102691e-06, - "loss": 1.6905, + "grad_norm": 8.869766235351562, + "learning_rate": 8.696043610235944e-06, + "loss": 1.6899, "step": 3670 }, { "epoch": 0.3127124405166553, - "grad_norm": 5.780129909515381, - "learning_rate": 8.689043616837653e-06, - "loss": 1.2407, + "grad_norm": 5.5872802734375, + "learning_rate": 8.68603874900647e-06, + "loss": 1.2133, "step": 3680 }, { "epoch": 0.31356220258327666, - "grad_norm": 8.687576293945312, - "learning_rate": 8.679016043338596e-06, - "loss": 1.4478, + "grad_norm": 9.004737854003906, + "learning_rate": 8.67600145490655e-06, + "loss": 1.4407, "step": 3690 }, { "epoch": 0.314411964649898, - "grad_norm": 7.531462669372559, - "learning_rate": 8.66895609876077e-06, - "loss": 1.9195, + "grad_norm": 7.551668167114258, + "learning_rate": 8.665931816252743e-06, + "loss": 1.8746, "step": 3700 }, { "epoch": 0.31526172671651936, - "grad_norm": 8.13393497467041, - "learning_rate": 8.658863871620032e-06, - "loss": 1.6154, + "grad_norm": 8.384748458862305, + "learning_rate": 8.655829921646208e-06, + "loss": 1.6211, "step": 3710 }, { "epoch": 0.3161114887831407, - "grad_norm": 5.591124534606934, - "learning_rate": 8.648739450716283e-06, - "loss": 1.5699, + "grad_norm": 5.298479080200195, + "learning_rate": 8.64569585997191e-06, + "loss": 1.5504, "step": 3720 }, { "epoch": 0.31696125084976207, - "grad_norm": 7.531632900238037, - "learning_rate": 8.638582925132703e-06, - "loss": 1.8805, + "grad_norm": 7.893293857574463, + "learning_rate": 8.63552972039785e-06, + "loss": 1.8787, "step": 3730 }, { "epoch": 0.3178110129163834, - "grad_norm": 6.7070231437683105, - "learning_rate": 8.628394384234949e-06, - "loss": 2.069, + "grad_norm": 6.742856025695801, + "learning_rate": 8.626352842101714e-06, + "loss": 2.0654, "step": 3740 }, { "epoch": 0.31866077498300477, - "grad_norm": 7.8806538581848145, - "learning_rate": 8.618173917670373e-06, - "loss": 1.4729, + "grad_norm": 7.550837993621826, + "learning_rate": 8.616126001186192e-06, + "loss": 1.4762, "step": 3750 }, { "epoch": 0.3195105370496261, - "grad_norm": 5.6835761070251465, - "learning_rate": 8.607921615367243e-06, - "loss": 1.6945, + "grad_norm": 5.313843727111816, + "learning_rate": 8.605867342551408e-06, + "loss": 1.7039, "step": 3760 }, { "epoch": 0.32036029911624747, - "grad_norm": 6.916445732116699, - "learning_rate": 8.597637567533942e-06, - "loss": 1.8679, + "grad_norm": 6.952681541442871, + "learning_rate": 8.595576956461671e-06, + "loss": 1.8382, "step": 3770 }, { "epoch": 0.3212100611828688, - "grad_norm": 8.864204406738281, - "learning_rate": 8.587321864658174e-06, - "loss": 1.6665, + "grad_norm": 5.118858337402344, + "learning_rate": 8.585254933460459e-06, + "loss": 1.684, "step": 3780 }, { "epoch": 0.3220598232494901, - "grad_norm": 8.838143348693848, - "learning_rate": 8.576974597506172e-06, - "loss": 1.5134, + "grad_norm": 8.853779792785645, + "learning_rate": 8.574901364369614e-06, + "loss": 1.5044, "step": 3790 }, { "epoch": 0.32290958531611147, - "grad_norm": 5.65764045715332, - "learning_rate": 8.566595857121902e-06, - "loss": 1.9532, + "grad_norm": 5.593980312347412, + "learning_rate": 8.564516340288549e-06, + "loss": 1.9366, "step": 3800 }, { "epoch": 0.3237593473827328, - "grad_norm": 11.450773239135742, - "learning_rate": 8.556185734826252e-06, - "loss": 1.3903, + "grad_norm": 11.13154125213623, + "learning_rate": 8.554099952593443e-06, + "loss": 1.3692, "step": 3810 }, { "epoch": 0.32460910944935417, - "grad_norm": 9.498995780944824, - "learning_rate": 8.545744322216238e-06, - "loss": 1.9471, + "grad_norm": 8.996947288513184, + "learning_rate": 8.543652292936443e-06, + "loss": 1.9425, "step": 3820 }, { "epoch": 0.3254588715159755, - "grad_norm": 10.564948081970215, - "learning_rate": 8.535271711164195e-06, - "loss": 1.9619, + "grad_norm": 10.260974884033203, + "learning_rate": 8.533173453244844e-06, + "loss": 1.9457, "step": 3830 }, { "epoch": 0.3263086335825969, - "grad_norm": 6.411941051483154, - "learning_rate": 8.524767993816967e-06, - "loss": 2.0609, + "grad_norm": 5.83531379699707, + "learning_rate": 8.5226635257203e-06, + "loss": 2.0349, "step": 3840 }, { "epoch": 0.3271583956492182, - "grad_norm": 10.451356887817383, - "learning_rate": 8.514233262595097e-06, - "loss": 2.2819, + "grad_norm": 10.096120834350586, + "learning_rate": 8.512122602837993e-06, + "loss": 2.2629, "step": 3850 }, { "epoch": 0.3280081577158396, - "grad_norm": 9.08843994140625, - "learning_rate": 8.503667610192013e-06, - "loss": 1.775, + "grad_norm": 9.009894371032715, + "learning_rate": 8.501550777345836e-06, + "loss": 1.782, "step": 3860 }, { "epoch": 0.32885791978246093, - "grad_norm": 7.053817272186279, - "learning_rate": 8.493071129573214e-06, - "loss": 1.1209, + "grad_norm": 6.897549152374268, + "learning_rate": 8.49094814226364e-06, + "loss": 1.1131, "step": 3870 }, { "epoch": 0.3297076818490823, - "grad_norm": 6.348076343536377, - "learning_rate": 8.482443913975458e-06, - "loss": 1.6595, + "grad_norm": 5.784816741943359, + "learning_rate": 8.480314790882314e-06, + "loss": 1.6522, "step": 3880 }, { "epoch": 0.33055744391570363, - "grad_norm": 8.386449813842773, - "learning_rate": 8.471786056905926e-06, - "loss": 1.44, + "grad_norm": 8.868574142456055, + "learning_rate": 8.469650816763028e-06, + "loss": 1.4468, "step": 3890 }, { "epoch": 0.3314072059823249, - "grad_norm": 7.739043235778809, - "learning_rate": 8.461097652141411e-06, - "loss": 1.2321, + "grad_norm": 7.580058574676514, + "learning_rate": 8.4589563137364e-06, + "loss": 1.2263, "step": 3900 }, { "epoch": 0.3322569680489463, - "grad_norm": 8.008532524108887, - "learning_rate": 8.450378793727495e-06, - "loss": 1.4774, + "grad_norm": 7.456427097320557, + "learning_rate": 8.448231375901668e-06, + "loss": 1.4744, "step": 3910 }, { "epoch": 0.33310673011556763, - "grad_norm": 5.789026260375977, - "learning_rate": 8.439629575977712e-06, - "loss": 1.3621, + "grad_norm": 5.792886734008789, + "learning_rate": 8.437476097625856e-06, + "loss": 1.346, "step": 3920 }, { "epoch": 0.333956492182189, - "grad_norm": 9.902092933654785, - "learning_rate": 8.428850093472723e-06, - "loss": 1.6229, + "grad_norm": 9.659575462341309, + "learning_rate": 8.426690573542955e-06, + "loss": 1.6235, "step": 3930 }, { "epoch": 0.33480625424881033, - "grad_norm": 7.288994312286377, - "learning_rate": 8.418040441059489e-06, - "loss": 1.5508, + "grad_norm": 6.110198020935059, + "learning_rate": 8.415874898553082e-06, + "loss": 1.5373, "step": 3940 }, { "epoch": 0.3356560163154317, - "grad_norm": 9.090726852416992, - "learning_rate": 8.407200713850423e-06, - "loss": 2.0397, + "grad_norm": 8.936569213867188, + "learning_rate": 8.405029167821641e-06, + "loss": 1.9912, "step": 3950 }, { "epoch": 0.33650577838205303, - "grad_norm": 9.180893898010254, - "learning_rate": 8.396331007222568e-06, - "loss": 1.5626, + "grad_norm": 8.948286056518555, + "learning_rate": 8.394153476778503e-06, + "loss": 1.5235, "step": 3960 }, { "epoch": 0.3373555404486744, - "grad_norm": 6.935198783874512, - "learning_rate": 8.385431416816749e-06, - "loss": 1.2912, + "grad_norm": 6.563770771026611, + "learning_rate": 8.383247921117144e-06, + "loss": 1.2973, "step": 3970 }, { "epoch": 0.33820530251529574, - "grad_norm": 9.077092170715332, - "learning_rate": 8.374502038536734e-06, - "loss": 1.9062, + "grad_norm": 9.62635612487793, + "learning_rate": 8.372312596793821e-06, + "loss": 1.8943, "step": 3980 }, { "epoch": 0.3390550645819171, - "grad_norm": 6.475048542022705, - "learning_rate": 8.363542968548389e-06, - "loss": 1.8215, + "grad_norm": 6.852767467498779, + "learning_rate": 8.36134760002672e-06, + "loss": 1.796, "step": 3990 }, { "epoch": 0.3399048266485384, - "grad_norm": 9.412741661071777, - "learning_rate": 8.352554303278833e-06, - "loss": 1.4379, + "grad_norm": 8.876890182495117, + "learning_rate": 8.350353027295105e-06, + "loss": 1.4485, "step": 4000 }, { "epoch": 0.3399048266485384, - "eval_cosine_accuracy@1": 0.701, - "eval_cosine_accuracy@10": 0.9125, - "eval_cosine_accuracy@3": 0.8315, - "eval_cosine_accuracy@5": 0.8665, - "eval_cosine_map@100": 0.7777126402554685, - "eval_cosine_mrr@10": 0.7744843253968242, - "eval_cosine_ndcg@10": 0.807985418129195, - "eval_cosine_precision@1": 0.701, - "eval_cosine_precision@10": 0.09125000000000001, - "eval_cosine_precision@3": 0.2771666666666667, - "eval_cosine_precision@5": 0.1733, - "eval_cosine_recall@1": 0.701, - "eval_cosine_recall@10": 0.9125, - "eval_cosine_recall@3": 0.8315, - "eval_cosine_recall@5": 0.8665, - "eval_loss": 1.5198493003845215, - "eval_runtime": 2.8571, - "eval_samples_per_second": 263.902, - "eval_sequential_score": 0.7777126402554685, - "eval_steps_per_second": 4.2, - "eval_sts-dev_pearson_cosine": 0.7998673514730497, - "eval_sts-dev_pearson_dot": 0.7956248137203142, - "eval_sts-dev_pearson_euclidean": 0.7915256395052884, - "eval_sts-dev_pearson_manhattan": 0.7921928577735269, - "eval_sts-dev_pearson_max": 0.7998673514730497, - "eval_sts-dev_spearman_cosine": 0.8033964621086414, - "eval_sts-dev_spearman_dot": 0.7981246690986586, - "eval_sts-dev_spearman_euclidean": 0.8020964401463111, - "eval_sts-dev_spearman_manhattan": 0.8036806136112034, - "eval_sts-dev_spearman_max": 0.8036806136112034, + "eval_cosine_accuracy@1": 0.7005, + "eval_cosine_accuracy@10": 0.9105, + "eval_cosine_accuracy@3": 0.8285, + "eval_cosine_accuracy@5": 0.867, + "eval_cosine_map@100": 0.776740731521151, + "eval_cosine_mrr@10": 0.7731932539682532, + "eval_cosine_ndcg@10": 0.8065206475557063, + "eval_cosine_precision@1": 0.7005, + "eval_cosine_precision@10": 0.09105, + "eval_cosine_precision@3": 0.2761666666666666, + "eval_cosine_precision@5": 0.17340000000000003, + "eval_cosine_recall@1": 0.7005, + "eval_cosine_recall@10": 0.9105, + "eval_cosine_recall@3": 0.8285, + "eval_cosine_recall@5": 0.867, + "eval_loss": 1.4988008737564087, + "eval_runtime": 2.6257, + "eval_samples_per_second": 287.162, + "eval_sequential_score": 0.776740731521151, + "eval_steps_per_second": 4.57, + "eval_sts-dev_pearson_cosine": 0.7955332110301017, + "eval_sts-dev_pearson_dot": 0.7913499191513131, + "eval_sts-dev_pearson_euclidean": 0.7888708887493069, + "eval_sts-dev_pearson_manhattan": 0.7886560171692799, + "eval_sts-dev_pearson_max": 0.7955332110301017, + "eval_sts-dev_spearman_cosine": 0.8003330858963121, + "eval_sts-dev_spearman_dot": 0.794669870250916, + "eval_sts-dev_spearman_euclidean": 0.7995575330131356, + "eval_sts-dev_spearman_manhattan": 0.7988273520884194, + "eval_sts-dev_spearman_max": 0.8003330858963121, "step": 4000 }, { "epoch": 0.34075458871515973, - "grad_norm": 8.467841148376465, - "learning_rate": 8.341536139415589e-06, - "loss": 1.4425, + "grad_norm": 8.38383960723877, + "learning_rate": 8.33932897533848e-06, + "loss": 1.4139, "step": 4010 }, { "epoch": 0.3416043507817811, - "grad_norm": 7.077808380126953, - "learning_rate": 8.330488573905733e-06, - "loss": 1.5301, + "grad_norm": 7.034097671508789, + "learning_rate": 8.328275541155734e-06, + "loss": 1.5104, "step": 4020 }, { "epoch": 0.34245411284840244, - "grad_norm": 6.147716045379639, - "learning_rate": 8.319411703955042e-06, - "loss": 1.4369, + "grad_norm": 6.040533542633057, + "learning_rate": 8.317192822004276e-06, + "loss": 1.4306, "step": 4030 }, { "epoch": 0.3433038749150238, - "grad_norm": 6.165444374084473, - "learning_rate": 8.30830562702714e-06, - "loss": 2.0321, + "grad_norm": 5.8344831466674805, + "learning_rate": 8.306080915399194e-06, + "loss": 2.0212, "step": 4040 }, { "epoch": 0.34415363698164514, - "grad_norm": 7.108325958251953, - "learning_rate": 8.297170440842636e-06, - "loss": 1.5148, + "grad_norm": 6.679718971252441, + "learning_rate": 8.294939919112398e-06, + "loss": 1.4815, "step": 4050 }, { "epoch": 0.3450033990482665, - "grad_norm": 6.766551494598389, - "learning_rate": 8.286006243378268e-06, - "loss": 1.1118, + "grad_norm": 6.015053749084473, + "learning_rate": 8.283769931171744e-06, + "loss": 1.0738, "step": 4060 }, { "epoch": 0.34585316111488784, - "grad_norm": 7.434624195098877, - "learning_rate": 8.274813132866037e-06, - "loss": 0.9621, + "grad_norm": 7.583785533905029, + "learning_rate": 8.272571049860183e-06, + "loss": 0.9565, "step": 4070 }, { "epoch": 0.3467029231815092, - "grad_norm": 5.975522994995117, - "learning_rate": 8.26359120779235e-06, - "loss": 1.0928, + "grad_norm": 5.765251636505127, + "learning_rate": 8.261343373714898e-06, + "loss": 1.0451, "step": 4080 }, { "epoch": 0.34755268524813054, - "grad_norm": 9.990774154663086, - "learning_rate": 8.252340566897144e-06, - "loss": 1.5899, + "grad_norm": 10.114737510681152, + "learning_rate": 8.250087001526432e-06, + "loss": 1.5975, "step": 4090 }, { "epoch": 0.3484024473147519, - "grad_norm": 7.930508613586426, - "learning_rate": 8.241061309173024e-06, - "loss": 1.8656, + "grad_norm": 7.381314277648926, + "learning_rate": 8.238802032337816e-06, + "loss": 1.8642, "step": 4100 }, { "epoch": 0.3492522093813732, - "grad_norm": 8.030131340026855, - "learning_rate": 8.229753533864392e-06, - "loss": 1.8926, + "grad_norm": 8.024608612060547, + "learning_rate": 8.227488565443706e-06, + "loss": 1.8995, "step": 4110 }, { "epoch": 0.35010197144799454, - "grad_norm": 6.240615367889404, - "learning_rate": 8.218417340466571e-06, - "loss": 1.8682, + "grad_norm": 6.357876777648926, + "learning_rate": 8.216146700389504e-06, + "loss": 1.8488, "step": 4120 }, { "epoch": 0.3509517335146159, - "grad_norm": 9.507622718811035, - "learning_rate": 8.207052828724925e-06, - "loss": 1.1497, + "grad_norm": 9.650424003601074, + "learning_rate": 8.204776536970481e-06, + "loss": 1.1606, "step": 4130 }, { "epoch": 0.35180149558123724, - "grad_norm": 7.851670742034912, - "learning_rate": 8.195660098633996e-06, - "loss": 1.8596, + "grad_norm": 7.757226467132568, + "learning_rate": 8.193378175230903e-06, + "loss": 1.8689, "step": 4140 }, { "epoch": 0.3526512576478586, - "grad_norm": 8.877213478088379, - "learning_rate": 8.184239250436602e-06, - "loss": 1.3003, + "grad_norm": 8.639363288879395, + "learning_rate": 8.181951715463145e-06, + "loss": 1.2646, "step": 4150 }, { "epoch": 0.35350101971447995, - "grad_norm": 7.962785243988037, - "learning_rate": 8.172790384622982e-06, - "loss": 0.8871, + "grad_norm": 6.988254547119141, + "learning_rate": 8.170497258206817e-06, + "loss": 0.8987, "step": 4160 }, { "epoch": 0.3543507817811013, - "grad_norm": 8.136030197143555, - "learning_rate": 8.16131360192989e-06, - "loss": 1.4811, + "grad_norm": 7.310701370239258, + "learning_rate": 8.15901490424787e-06, + "loss": 1.4526, "step": 4170 }, { "epoch": 0.35520054384772265, - "grad_norm": 7.604763984680176, - "learning_rate": 8.149809003339713e-06, - "loss": 1.8324, + "grad_norm": 7.828703880310059, + "learning_rate": 8.14750475461772e-06, + "loss": 1.8155, "step": 4180 }, { "epoch": 0.356050305914344, - "grad_norm": 9.970459938049316, - "learning_rate": 8.138276690079594e-06, - "loss": 1.4659, + "grad_norm": 9.603683471679688, + "learning_rate": 8.135966910592343e-06, + "loss": 1.4764, "step": 4190 }, { "epoch": 0.35690006798096535, - "grad_norm": 5.03046178817749, - "learning_rate": 8.126716763620526e-06, - "loss": 1.3213, + "grad_norm": 4.951098442077637, + "learning_rate": 8.124401473691401e-06, + "loss": 1.2846, "step": 4200 }, { "epoch": 0.3577498300475867, - "grad_norm": 7.165824890136719, - "learning_rate": 8.11512932567647e-06, - "loss": 1.7336, + "grad_norm": 7.320103645324707, + "learning_rate": 8.11280854567734e-06, + "loss": 1.7014, "step": 4210 }, { "epoch": 0.358599592114208, - "grad_norm": 4.811755180358887, - "learning_rate": 8.103514478203453e-06, - "loss": 1.2831, + "grad_norm": 5.094399929046631, + "learning_rate": 8.101188228554494e-06, + "loss": 1.2782, "step": 4220 }, { "epoch": 0.35944935418082935, - "grad_norm": 7.720498085021973, - "learning_rate": 8.09187232339868e-06, - "loss": 1.465, + "grad_norm": 8.11352252960205, + "learning_rate": 8.089540624568192e-06, + "loss": 1.4259, "step": 4230 }, { "epoch": 0.3602991162474507, - "grad_norm": 6.9877448081970215, - "learning_rate": 8.080202963699621e-06, - "loss": 1.6939, + "grad_norm": 6.660062789916992, + "learning_rate": 8.077865836203855e-06, + "loss": 1.6493, "step": 4240 }, { "epoch": 0.36114887831407205, - "grad_norm": 9.422313690185547, - "learning_rate": 8.068506501783123e-06, - "loss": 2.2305, + "grad_norm": 9.000665664672852, + "learning_rate": 8.066163966186093e-06, + "loss": 2.1898, "step": 4250 }, { "epoch": 0.3619986403806934, - "grad_norm": 8.121376037597656, - "learning_rate": 8.056783040564502e-06, - "loss": 2.0054, + "grad_norm": 7.744436740875244, + "learning_rate": 8.054435117477804e-06, + "loss": 2.011, "step": 4260 }, { "epoch": 0.36284840244731476, - "grad_norm": 8.214845657348633, - "learning_rate": 8.04503268319663e-06, - "loss": 1.4337, + "grad_norm": 8.127425193786621, + "learning_rate": 8.042679393279269e-06, + "loss": 1.4618, "step": 4270 }, { "epoch": 0.3636981645139361, - "grad_norm": 6.983572483062744, - "learning_rate": 8.033255533069042e-06, - "loss": 1.5175, + "grad_norm": 6.6823906898498535, + "learning_rate": 8.030896897027245e-06, + "loss": 1.4918, "step": 4280 }, { "epoch": 0.36454792658055746, - "grad_norm": 9.19643497467041, - "learning_rate": 8.021451693807012e-06, - "loss": 1.1785, + "grad_norm": 8.99140739440918, + "learning_rate": 8.01908773239404e-06, + "loss": 1.203, "step": 4290 }, { "epoch": 0.3653976886471788, - "grad_norm": 9.17553424835205, - "learning_rate": 8.00962126927065e-06, - "loss": 2.1175, + "grad_norm": 8.855307579040527, + "learning_rate": 8.007252003286626e-06, + "loss": 2.0598, "step": 4300 }, { "epoch": 0.36624745071380016, - "grad_norm": 8.604659080505371, - "learning_rate": 7.997764363553988e-06, - "loss": 1.3009, + "grad_norm": 8.348673820495605, + "learning_rate": 7.995389813845706e-06, + "loss": 1.2831, "step": 4310 }, { "epoch": 0.36709721278042146, - "grad_norm": 6.369007587432861, - "learning_rate": 7.985881080984055e-06, - "loss": 1.72, + "grad_norm": 6.327515602111816, + "learning_rate": 7.983501268444804e-06, + "loss": 1.6989, "step": 4320 }, { "epoch": 0.3679469748470428, - "grad_norm": 8.862330436706543, - "learning_rate": 7.973971526119974e-06, - "loss": 1.5373, + "grad_norm": 8.671867370605469, + "learning_rate": 7.971586471689347e-06, + "loss": 1.5319, "step": 4330 }, { "epoch": 0.36879673691366416, - "grad_norm": 9.502862930297852, - "learning_rate": 7.962035803752027e-06, - "loss": 1.8053, + "grad_norm": 9.713516235351562, + "learning_rate": 7.95964552841574e-06, + "loss": 1.7994, "step": 4340 }, { "epoch": 0.3696464989802855, - "grad_norm": 9.814229965209961, - "learning_rate": 7.950074018900745e-06, - "loss": 1.9372, + "grad_norm": 9.930511474609375, + "learning_rate": 7.947678543690449e-06, + "loss": 1.9254, "step": 4350 }, { "epoch": 0.37049626104690686, - "grad_norm": 6.97931432723999, - "learning_rate": 7.938086276815972e-06, - "loss": 1.3671, + "grad_norm": 6.926268100738525, + "learning_rate": 7.935685622809076e-06, + "loss": 1.373, "step": 4360 }, { "epoch": 0.3713460231135282, - "grad_norm": 9.007609367370605, - "learning_rate": 7.926072682975954e-06, - "loss": 1.8016, + "grad_norm": 8.565302848815918, + "learning_rate": 7.92366687129543e-06, + "loss": 1.7809, "step": 4370 }, { "epoch": 0.37219578518014956, - "grad_norm": 9.562712669372559, - "learning_rate": 7.914033343086397e-06, - "loss": 1.5083, + "grad_norm": 9.755486488342285, + "learning_rate": 7.911622394900601e-06, + "loss": 1.5119, "step": 4380 }, { "epoch": 0.3730455472467709, - "grad_norm": 7.235018253326416, - "learning_rate": 7.901968363079542e-06, - "loss": 0.9237, + "grad_norm": 7.140373229980469, + "learning_rate": 7.899552299602027e-06, + "loss": 0.9275, "step": 4390 }, { "epoch": 0.37389530931339227, - "grad_norm": 8.958460807800293, - "learning_rate": 7.889877849113237e-06, - "loss": 2.0224, + "grad_norm": 9.28571605682373, + "learning_rate": 7.887456691602559e-06, + "loss": 1.9906, "step": 4400 }, { "epoch": 0.3747450713800136, - "grad_norm": 9.535149574279785, - "learning_rate": 7.877761907569993e-06, - "loss": 1.69, + "grad_norm": 9.213364601135254, + "learning_rate": 7.875335677329537e-06, + "loss": 1.6756, "step": 4410 }, { "epoch": 0.37559483344663497, - "grad_norm": 9.828917503356934, - "learning_rate": 7.865620645056063e-06, - "loss": 1.9105, + "grad_norm": 9.870597839355469, + "learning_rate": 7.863189363433841e-06, + "loss": 1.8964, "step": 4420 }, { "epoch": 0.37644459551325626, - "grad_norm": 12.163987159729004, - "learning_rate": 7.85345416840049e-06, - "loss": 1.4098, + "grad_norm": 10.660319328308105, + "learning_rate": 7.851017856788962e-06, + "loss": 1.3878, "step": 4430 }, { "epoch": 0.3772943575798776, - "grad_norm": 9.919075965881348, - "learning_rate": 7.841262584654172e-06, - "loss": 2.2114, + "grad_norm": 9.772543907165527, + "learning_rate": 7.838821264490059e-06, + "loss": 2.1686, "step": 4440 }, { "epoch": 0.37814411964649897, - "grad_norm": 8.339529037475586, - "learning_rate": 7.829046001088918e-06, - "loss": 1.7582, + "grad_norm": 8.62176513671875, + "learning_rate": 7.826599693853012e-06, + "loss": 1.7287, "step": 4450 }, { "epoch": 0.3789938817131203, - "grad_norm": 8.928970336914062, - "learning_rate": 7.816804525196515e-06, - "loss": 1.4494, + "grad_norm": 8.811626434326172, + "learning_rate": 7.814353252413483e-06, + "loss": 1.4491, "step": 4460 }, { "epoch": 0.37984364377974167, - "grad_norm": 7.9853668212890625, - "learning_rate": 7.804538264687764e-06, - "loss": 1.2828, + "grad_norm": 7.640169143676758, + "learning_rate": 7.802082047925966e-06, + "loss": 1.2374, "step": 4470 }, { "epoch": 0.380693405846363, - "grad_norm": 7.567132949829102, - "learning_rate": 7.792247327491548e-06, - "loss": 1.7309, + "grad_norm": 7.157872676849365, + "learning_rate": 7.789786188362848e-06, + "loss": 1.7013, "step": 4480 }, { "epoch": 0.38154316791298437, - "grad_norm": 7.30474853515625, - "learning_rate": 7.779931821753875e-06, - "loss": 1.5327, + "grad_norm": 7.102935791015625, + "learning_rate": 7.777465781913443e-06, + "loss": 1.511, "step": 4490 }, { "epoch": 0.3823929299796057, - "grad_norm": 10.07349681854248, - "learning_rate": 7.767591855836925e-06, - "loss": 1.7761, + "grad_norm": 10.072187423706055, + "learning_rate": 7.765120936983056e-06, + "loss": 1.7912, "step": 4500 }, { "epoch": 0.3832426920462271, - "grad_norm": 6.691227436065674, - "learning_rate": 7.755227538318104e-06, - "loss": 1.3889, + "grad_norm": 6.578811168670654, + "learning_rate": 7.75275176219202e-06, + "loss": 1.3491, "step": 4510 }, { "epoch": 0.3840924541128484, - "grad_norm": 8.261728286743164, - "learning_rate": 7.74283897798908e-06, - "loss": 1.1631, + "grad_norm": 8.394582748413086, + "learning_rate": 7.740358366374745e-06, + "loss": 1.1391, "step": 4520 }, { "epoch": 0.3849422161794698, - "grad_norm": 11.02812385559082, - "learning_rate": 7.730426283854828e-06, - "loss": 2.2732, + "grad_norm": 10.514127731323242, + "learning_rate": 7.727940858578749e-06, + "loss": 2.2409, "step": 4530 }, { "epoch": 0.38579197824609107, - "grad_norm": 5.950666427612305, - "learning_rate": 7.717989565132679e-06, - "loss": 1.2204, + "grad_norm": 5.173934459686279, + "learning_rate": 7.71549934806372e-06, + "loss": 1.1876, "step": 4540 }, { "epoch": 0.3866417403127124, - "grad_norm": 6.9215874671936035, - "learning_rate": 7.705528931251349e-06, - "loss": 1.6794, + "grad_norm": 7.144071578979492, + "learning_rate": 7.703033944300532e-06, + "loss": 1.6563, "step": 4550 }, { "epoch": 0.3874915023793338, - "grad_norm": 7.782924652099609, - "learning_rate": 7.693044491849977e-06, - "loss": 1.4625, + "grad_norm": 7.457736492156982, + "learning_rate": 7.690544756970298e-06, + "loss": 1.4501, "step": 4560 }, { "epoch": 0.3883412644459551, - "grad_norm": 7.537171840667725, - "learning_rate": 7.680536356777166e-06, - "loss": 1.4197, + "grad_norm": 7.362822532653809, + "learning_rate": 7.678031895963398e-06, + "loss": 1.4546, "step": 4570 }, { "epoch": 0.3891910265125765, - "grad_norm": 8.504171371459961, - "learning_rate": 7.668004636090016e-06, - "loss": 1.8114, + "grad_norm": 8.738716125488281, + "learning_rate": 7.66549547137851e-06, + "loss": 1.8082, "step": 4580 }, { "epoch": 0.39004078857919783, - "grad_norm": 9.088093757629395, - "learning_rate": 7.655449440053147e-06, - "loss": 1.6546, + "grad_norm": 9.042044639587402, + "learning_rate": 7.652935593521646e-06, + "loss": 1.6279, "step": 4590 }, { "epoch": 0.3908905506458192, - "grad_norm": 9.801681518554688, - "learning_rate": 7.64287087913774e-06, - "loss": 1.6623, + "grad_norm": 9.298995018005371, + "learning_rate": 7.640352372905182e-06, + "loss": 1.6263, "step": 4600 }, { "epoch": 0.39174031271244053, - "grad_norm": 8.624170303344727, - "learning_rate": 7.630269064020557e-06, - "loss": 1.32, + "grad_norm": 8.552679061889648, + "learning_rate": 7.6277459202468775e-06, + "loss": 1.3064, "step": 4610 }, { "epoch": 0.3925900747790619, - "grad_norm": 8.79816722869873, - "learning_rate": 7.617644105582971e-06, - "loss": 1.3878, + "grad_norm": 8.55628490447998, + "learning_rate": 7.6151163464689115e-06, + "loss": 1.3364, "step": 4620 }, { "epoch": 0.39343983684568323, - "grad_norm": 7.821738243103027, - "learning_rate": 7.604996114909988e-06, - "loss": 1.4223, + "grad_norm": 7.827617645263672, + "learning_rate": 7.602463762696903e-06, + "loss": 1.3731, "step": 4630 }, { "epoch": 0.39428959891230453, - "grad_norm": 8.218290328979492, - "learning_rate": 7.592325203289275e-06, - "loss": 1.6874, + "grad_norm": 7.802983283996582, + "learning_rate": 7.589788280258927e-06, + "loss": 1.6393, "step": 4640 }, { "epoch": 0.3951393609789259, - "grad_norm": 6.902328014373779, - "learning_rate": 7.57963148221017e-06, - "loss": 1.5294, + "grad_norm": 7.087003231048584, + "learning_rate": 7.577090010684546e-06, + "loss": 1.5386, "step": 4650 }, { "epoch": 0.39598912304554723, - "grad_norm": 8.165420532226562, - "learning_rate": 7.566915063362716e-06, - "loss": 1.3812, + "grad_norm": 8.292591094970703, + "learning_rate": 7.564369065703818e-06, + "loss": 1.3492, "step": 4660 }, { "epoch": 0.3968388851121686, - "grad_norm": 7.9304609298706055, - "learning_rate": 7.554176058636666e-06, - "loss": 1.4364, + "grad_norm": 7.877934455871582, + "learning_rate": 7.551625557246322e-06, + "loss": 1.3999, "step": 4670 }, { "epoch": 0.39768864717878993, - "grad_norm": 10.279181480407715, - "learning_rate": 7.541414580120498e-06, - "loss": 1.6881, + "grad_norm": 10.34054946899414, + "learning_rate": 7.5388595974401715e-06, + "loss": 1.6538, "step": 4680 }, { "epoch": 0.3985384092454113, - "grad_norm": 8.270859718322754, - "learning_rate": 7.528630740100443e-06, - "loss": 1.128, + "grad_norm": 8.356302261352539, + "learning_rate": 7.526071298611017e-06, + "loss": 1.1034, "step": 4690 }, { "epoch": 0.39938817131203264, - "grad_norm": 5.550293445587158, - "learning_rate": 7.51582465105948e-06, - "loss": 1.2465, + "grad_norm": 5.701018333435059, + "learning_rate": 7.513260773281072e-06, + "loss": 1.2209, "step": 4700 }, { "epoch": 0.400237933378654, - "grad_norm": 6.127009868621826, - "learning_rate": 7.502996425676356e-06, - "loss": 1.2718, + "grad_norm": 6.343472480773926, + "learning_rate": 7.5004281341681225e-06, + "loss": 1.2475, "step": 4710 }, { "epoch": 0.40108769544527534, - "grad_norm": 7.330491065979004, - "learning_rate": 7.490146176824589e-06, - "loss": 1.4827, + "grad_norm": 7.651251792907715, + "learning_rate": 7.4875734941845235e-06, + "loss": 1.4437, "step": 4720 }, { "epoch": 0.4019374575118967, - "grad_norm": 9.004974365234375, - "learning_rate": 7.477274017571485e-06, - "loss": 1.3148, + "grad_norm": 7.739134311676025, + "learning_rate": 7.47469696643621e-06, + "loss": 1.3123, "step": 4730 }, { "epoch": 0.40278721957851804, - "grad_norm": 7.544227123260498, - "learning_rate": 7.464380061177132e-06, - "loss": 1.3925, + "grad_norm": 7.064877033233643, + "learning_rate": 7.461798664221711e-06, + "loss": 1.3572, "step": 4740 }, { "epoch": 0.40363698164513934, - "grad_norm": 6.836915493011475, - "learning_rate": 7.451464421093405e-06, - "loss": 1.752, + "grad_norm": 6.596515655517578, + "learning_rate": 7.4488787010311425e-06, + "loss": 1.7064, "step": 4750 }, { "epoch": 0.4044867437117607, - "grad_norm": 5.613548278808594, - "learning_rate": 7.438527210962973e-06, - "loss": 1.131, + "grad_norm": 5.8477959632873535, + "learning_rate": 7.43593719054521e-06, + "loss": 1.1078, "step": 4760 }, { "epoch": 0.40533650577838204, - "grad_norm": 8.718232154846191, - "learning_rate": 7.4255685446183e-06, - "loss": 1.5217, + "grad_norm": 8.53952693939209, + "learning_rate": 7.422974246634216e-06, + "loss": 1.5242, "step": 4770 }, { "epoch": 0.4061862678450034, - "grad_norm": 7.67559289932251, - "learning_rate": 7.412588536080634e-06, - "loss": 2.0192, + "grad_norm": 7.754718780517578, + "learning_rate": 7.409989983357042e-06, + "loss": 1.9819, "step": 4780 }, { "epoch": 0.40703602991162474, - "grad_norm": 6.534012794494629, - "learning_rate": 7.399587299559011e-06, - "loss": 1.2351, + "grad_norm": 5.990664005279541, + "learning_rate": 7.396984514960167e-06, + "loss": 1.2159, "step": 4790 }, { "epoch": 0.4078857919782461, - "grad_norm": 6.662696838378906, - "learning_rate": 7.38656494944925e-06, - "loss": 0.9344, + "grad_norm": 6.1663079261779785, + "learning_rate": 7.3839579558766435e-06, + "loss": 0.9277, "step": 4800 }, { "epoch": 0.40873555404486744, - "grad_norm": 9.426609992980957, - "learning_rate": 7.373521600332944e-06, - "loss": 1.771, + "grad_norm": 9.056880950927734, + "learning_rate": 7.370910420725099e-06, + "loss": 1.7686, "step": 4810 }, { "epoch": 0.4095853161114888, - "grad_norm": 5.540933609008789, - "learning_rate": 7.360457366976457e-06, - "loss": 1.2621, + "grad_norm": 5.699574947357178, + "learning_rate": 7.357842024308724e-06, + "loss": 1.2682, "step": 4820 }, { "epoch": 0.41043507817811015, - "grad_norm": 7.994694709777832, - "learning_rate": 7.3473723643299035e-06, - "loss": 1.4606, + "grad_norm": 7.813380241394043, + "learning_rate": 7.344752881614272e-06, + "loss": 1.4559, "step": 4830 }, { "epoch": 0.4112848402447315, - "grad_norm": 8.455775260925293, - "learning_rate": 7.334266707526146e-06, - "loss": 1.6739, + "grad_norm": 8.558573722839355, + "learning_rate": 7.331643107811028e-06, + "loss": 1.6704, "step": 4840 }, { "epoch": 0.41213460231135285, - "grad_norm": 9.671225547790527, - "learning_rate": 7.321140511879784e-06, - "loss": 1.8958, + "grad_norm": 9.197162628173828, + "learning_rate": 7.318512818249816e-06, + "loss": 1.8827, "step": 4850 }, { "epoch": 0.41298436437797414, - "grad_norm": 13.373485565185547, - "learning_rate": 7.307993892886128e-06, - "loss": 1.8307, + "grad_norm": 12.833270072937012, + "learning_rate": 7.305362128461973e-06, + "loss": 1.8031, "step": 4860 }, { "epoch": 0.4138341264445955, - "grad_norm": 9.690964698791504, - "learning_rate": 7.294826966220198e-06, - "loss": 1.566, + "grad_norm": 9.722432136535645, + "learning_rate": 7.29219115415833e-06, + "loss": 1.5041, "step": 4870 }, { "epoch": 0.41468388851121685, - "grad_norm": 8.634622573852539, - "learning_rate": 7.281639847735689e-06, - "loss": 1.7765, + "grad_norm": 8.810405731201172, + "learning_rate": 7.279000011228201e-06, + "loss": 1.7433, "step": 4880 }, { "epoch": 0.4155336505778382, - "grad_norm": 8.077951431274414, - "learning_rate": 7.268432653463966e-06, - "loss": 1.1729, + "grad_norm": 7.90717887878418, + "learning_rate": 7.265788815738366e-06, + "loss": 1.1801, "step": 4890 }, { "epoch": 0.41638341264445955, - "grad_norm": 6.252593040466309, - "learning_rate": 7.255205499613043e-06, - "loss": 1.7541, + "grad_norm": 6.2236223220825195, + "learning_rate": 7.2525576839320324e-06, + "loss": 1.7493, "step": 4900 }, { "epoch": 0.4172331747110809, - "grad_norm": 8.768767356872559, - "learning_rate": 7.241958502566542e-06, - "loss": 1.3042, + "grad_norm": 9.999082565307617, + "learning_rate": 7.239306732227835e-06, + "loss": 1.3221, "step": 4910 }, { "epoch": 0.41808293677770225, - "grad_norm": 7.221401691436768, - "learning_rate": 7.2286917788826926e-06, - "loss": 1.5605, + "grad_norm": 7.399506568908691, + "learning_rate": 7.2260360772187965e-06, + "loss": 1.5274, "step": 4920 }, { "epoch": 0.4189326988443236, - "grad_norm": 6.990739345550537, - "learning_rate": 7.215405445293293e-06, - "loss": 1.29, + "grad_norm": 7.195659637451172, + "learning_rate": 7.212745835671303e-06, + "loss": 1.2865, "step": 4930 }, { "epoch": 0.41978246091094495, - "grad_norm": 7.27325963973999, - "learning_rate": 7.2020996187026834e-06, - "loss": 1.1997, + "grad_norm": 7.6898322105407715, + "learning_rate": 7.199436124524087e-06, + "loss": 1.1829, "step": 4940 }, { "epoch": 0.4206322229775663, - "grad_norm": 7.545306205749512, - "learning_rate": 7.188774416186726e-06, - "loss": 1.6358, + "grad_norm": 7.707851886749268, + "learning_rate": 7.186107060887182e-06, + "loss": 1.6341, "step": 4950 }, { "epoch": 0.4214819850441876, - "grad_norm": 12.020845413208008, - "learning_rate": 7.175429954991762e-06, - "loss": 1.7445, + "grad_norm": 11.568890571594238, + "learning_rate": 7.172758762040907e-06, + "loss": 1.7116, "step": 4960 }, { "epoch": 0.42233174711080895, - "grad_norm": 8.9022855758667, - "learning_rate": 7.162066352533588e-06, - "loss": 2.1302, + "grad_norm": 8.33413314819336, + "learning_rate": 7.159391345434828e-06, + "loss": 2.116, "step": 4970 }, { "epoch": 0.4231815091774303, - "grad_norm": 6.184177398681641, - "learning_rate": 7.148683726396426e-06, - "loss": 1.0585, + "grad_norm": 6.171403884887695, + "learning_rate": 7.146004928686723e-06, + "loss": 1.0212, "step": 4980 }, { "epoch": 0.42403127124405166, - "grad_norm": 8.16968822479248, - "learning_rate": 7.135282194331881e-06, - "loss": 1.6825, + "grad_norm": 7.8836445808410645, + "learning_rate": 7.1325996295815495e-06, + "loss": 1.6326, "step": 4990 }, { "epoch": 0.424881033310673, - "grad_norm": 8.731705665588379, - "learning_rate": 7.121861874257906e-06, - "loss": 1.568, + "grad_norm": 8.432229995727539, + "learning_rate": 7.119175566070409e-06, + "loss": 1.5782, "step": 5000 }, { "epoch": 0.424881033310673, - "eval_cosine_accuracy@1": 0.7045, - "eval_cosine_accuracy@10": 0.914, - "eval_cosine_accuracy@3": 0.8325, - "eval_cosine_accuracy@5": 0.8735, - "eval_cosine_map@100": 0.780577101122243, - "eval_cosine_mrr@10": 0.777320634920634, - "eval_cosine_ndcg@10": 0.8105761407297323, - "eval_cosine_precision@1": 0.7045, - "eval_cosine_precision@10": 0.09140000000000001, - "eval_cosine_precision@3": 0.2775, - "eval_cosine_precision@5": 0.1747, - "eval_cosine_recall@1": 0.7045, - "eval_cosine_recall@10": 0.914, - "eval_cosine_recall@3": 0.8325, - "eval_cosine_recall@5": 0.8735, - "eval_loss": 1.4439417123794556, - "eval_runtime": 2.777, - "eval_samples_per_second": 271.518, - "eval_sequential_score": 0.780577101122243, - "eval_steps_per_second": 4.321, - "eval_sts-dev_pearson_cosine": 0.7983715832851553, - "eval_sts-dev_pearson_dot": 0.7949398525111272, - "eval_sts-dev_pearson_euclidean": 0.7864998353770025, - "eval_sts-dev_pearson_manhattan": 0.788021160315683, - "eval_sts-dev_pearson_max": 0.7983715832851553, - "eval_sts-dev_spearman_cosine": 0.8021687276733499, - "eval_sts-dev_spearman_dot": 0.7973264317688963, - "eval_sts-dev_spearman_euclidean": 0.8029215945772964, - "eval_sts-dev_spearman_manhattan": 0.804175475564921, - "eval_sts-dev_spearman_max": 0.804175475564921, + "eval_cosine_accuracy@1": 0.706, + "eval_cosine_accuracy@10": 0.9145, + "eval_cosine_accuracy@3": 0.8355, + "eval_cosine_accuracy@5": 0.873, + "eval_cosine_map@100": 0.7816911554217831, + "eval_cosine_mrr@10": 0.7784920634920632, + "eval_cosine_ndcg@10": 0.8115559047709803, + "eval_cosine_precision@1": 0.706, + "eval_cosine_precision@10": 0.09145000000000002, + "eval_cosine_precision@3": 0.2785, + "eval_cosine_precision@5": 0.17460000000000003, + "eval_cosine_recall@1": 0.706, + "eval_cosine_recall@10": 0.9145, + "eval_cosine_recall@3": 0.8355, + "eval_cosine_recall@5": 0.873, + "eval_loss": 1.4283329248428345, + "eval_runtime": 2.6352, + "eval_samples_per_second": 286.128, + "eval_sequential_score": 0.7816911554217831, + "eval_steps_per_second": 4.554, + "eval_sts-dev_pearson_cosine": 0.7955530868319933, + "eval_sts-dev_pearson_dot": 0.7916694662965349, + "eval_sts-dev_pearson_euclidean": 0.7860384964059665, + "eval_sts-dev_pearson_manhattan": 0.7858348336287322, + "eval_sts-dev_pearson_max": 0.7955530868319933, + "eval_sts-dev_spearman_cosine": 0.8030242582460162, + "eval_sts-dev_spearman_dot": 0.796334787224842, + "eval_sts-dev_spearman_euclidean": 0.8000570090643238, + "eval_sts-dev_spearman_manhattan": 0.7982098320436095, + "eval_sts-dev_spearman_max": 0.8030242582460162, "step": 5000 }, { "epoch": 0.42573079537729436, - "grad_norm": 7.691525459289551, - "learning_rate": 7.108422884257772e-06, - "loss": 1.2089, + "grad_norm": 8.005908966064453, + "learning_rate": 7.105732856269509e-06, + "loss": 1.1953, "step": 5010 }, { "epoch": 0.4265805574439157, - "grad_norm": 7.477416515350342, - "learning_rate": 7.094965342579018e-06, - "loss": 1.2646, + "grad_norm": 7.294424057006836, + "learning_rate": 7.09227161845912e-06, + "loss": 1.2725, "step": 5020 }, { "epoch": 0.42743031951053706, - "grad_norm": 7.4865803718566895, - "learning_rate": 7.081489367632422e-06, - "loss": 1.168, + "grad_norm": 7.306400299072266, + "learning_rate": 7.078791971082538e-06, + "loss": 1.1633, "step": 5030 }, { "epoch": 0.4282800815771584, - "grad_norm": 7.340981960296631, - "learning_rate": 7.067995077990948e-06, - "loss": 1.4717, + "grad_norm": 7.5250115394592285, + "learning_rate": 7.065294032745046e-06, + "loss": 1.4567, "step": 5040 }, { "epoch": 0.42912984364377976, - "grad_norm": 8.560068130493164, - "learning_rate": 7.054482592388713e-06, - "loss": 1.5907, + "grad_norm": 8.874271392822266, + "learning_rate": 7.05177792221286e-06, + "loss": 1.5835, "step": 5050 }, { "epoch": 0.4299796057104011, - "grad_norm": 6.671414375305176, - "learning_rate": 7.040952029719934e-06, - "loss": 1.7169, + "grad_norm": 6.954692840576172, + "learning_rate": 7.038243758412093e-06, + "loss": 1.7031, "step": 5060 }, { "epoch": 0.4308293677770224, - "grad_norm": 10.062768936157227, - "learning_rate": 7.0274035090378875e-06, - "loss": 1.8394, + "grad_norm": 10.508516311645508, + "learning_rate": 7.024691660427709e-06, + "loss": 1.8205, "step": 5070 }, { "epoch": 0.43167912984364376, - "grad_norm": 9.504973411560059, - "learning_rate": 7.013837149553857e-06, - "loss": 1.819, + "grad_norm": 9.475639343261719, + "learning_rate": 7.0111217475024675e-06, + "loss": 1.7956, "step": 5080 }, { "epoch": 0.4325288919102651, - "grad_norm": 7.36112642288208, - "learning_rate": 7.0002530706360895e-06, - "loss": 1.4452, + "grad_norm": 7.41269588470459, + "learning_rate": 6.997534139035881e-06, + "loss": 1.4548, "step": 5090 }, { "epoch": 0.43337865397688646, - "grad_norm": 6.239314556121826, - "learning_rate": 6.986651391808741e-06, - "loss": 1.3222, + "grad_norm": 6.058510780334473, + "learning_rate": 6.983928954583161e-06, + "loss": 1.3128, "step": 5100 }, { "epoch": 0.4342284160435078, - "grad_norm": 9.044203758239746, - "learning_rate": 6.973032232750825e-06, - "loss": 1.5417, + "grad_norm": 8.848884582519531, + "learning_rate": 6.970306313854167e-06, + "loss": 1.4953, "step": 5110 }, { "epoch": 0.43507817811012917, - "grad_norm": 7.0525736808776855, - "learning_rate": 6.959395713295164e-06, - "loss": 1.276, + "grad_norm": 7.286949634552002, + "learning_rate": 6.956666336712357e-06, + "loss": 1.2878, "step": 5120 }, { "epoch": 0.4359279401767505, - "grad_norm": 8.724111557006836, - "learning_rate": 6.945741953427332e-06, - "loss": 1.275, + "grad_norm": 8.425675392150879, + "learning_rate": 6.943009143173726e-06, + "loss": 1.2808, "step": 5130 }, { "epoch": 0.43677770224337187, - "grad_norm": 8.947551727294922, - "learning_rate": 6.932071073284593e-06, - "loss": 1.7294, + "grad_norm": 8.623629570007324, + "learning_rate": 6.929334853405753e-06, + "loss": 1.6998, "step": 5140 }, { "epoch": 0.4376274643099932, - "grad_norm": 9.118173599243164, - "learning_rate": 6.918383193154856e-06, - "loss": 1.513, + "grad_norm": 8.960277557373047, + "learning_rate": 6.915643587726347e-06, + "loss": 1.5072, "step": 5150 }, { "epoch": 0.43847722637661457, - "grad_norm": 8.94058895111084, - "learning_rate": 6.9046784334756075e-06, - "loss": 2.1804, + "grad_norm": 8.669367790222168, + "learning_rate": 6.901935466602785e-06, + "loss": 2.1685, "step": 5160 }, { "epoch": 0.43932698844323587, - "grad_norm": 8.706911087036133, - "learning_rate": 6.890956914832856e-06, - "loss": 1.5445, + "grad_norm": 8.367534637451172, + "learning_rate": 6.888210610650646e-06, + "loss": 1.5449, "step": 5170 }, { "epoch": 0.4401767505098572, - "grad_norm": 7.781229496002197, - "learning_rate": 6.8772187579600646e-06, - "loss": 1.5222, + "grad_norm": 8.151419639587402, + "learning_rate": 6.874469140632767e-06, + "loss": 1.5365, "step": 5180 }, { "epoch": 0.44102651257647857, - "grad_norm": 6.174472808837891, - "learning_rate": 6.8634640837371015e-06, - "loss": 3.1254, + "grad_norm": 6.400411605834961, + "learning_rate": 6.860711177458159e-06, + "loss": 2.8665, "step": 5190 }, { "epoch": 0.4418762746430999, - "grad_norm": 8.80291748046875, - "learning_rate": 6.84969301318916e-06, - "loss": 1.3722, + "grad_norm": 8.766158103942871, + "learning_rate": 6.84693684218096e-06, + "loss": 1.3293, "step": 5200 }, { "epoch": 0.44272603670972127, - "grad_norm": 8.462565422058105, - "learning_rate": 6.835905667485709e-06, - "loss": 1.9542, + "grad_norm": 8.537097930908203, + "learning_rate": 6.833146255999365e-06, + "loss": 1.9454, "step": 5210 }, { "epoch": 0.4435757987763426, - "grad_norm": 11.32432746887207, - "learning_rate": 6.822102167939415e-06, - "loss": 2.1457, + "grad_norm": 11.336971282958984, + "learning_rate": 6.819339540254548e-06, + "loss": 2.1613, "step": 5220 }, { "epoch": 0.444425560842964, - "grad_norm": 8.96746826171875, - "learning_rate": 6.808282636005083e-06, - "loss": 1.8624, + "grad_norm": 8.233909606933594, + "learning_rate": 6.805516816429618e-06, + "loss": 1.8404, "step": 5230 }, { "epoch": 0.4452753229095853, - "grad_norm": 6.485705375671387, - "learning_rate": 6.7944471932785825e-06, - "loss": 1.7684, + "grad_norm": 6.815123081207275, + "learning_rate": 6.791678206148528e-06, + "loss": 1.7808, "step": 5240 }, { "epoch": 0.4461250849762067, - "grad_norm": 8.290863990783691, - "learning_rate": 6.78059596149578e-06, - "loss": 1.2237, + "grad_norm": 8.017534255981445, + "learning_rate": 6.7778238311750126e-06, + "loss": 1.2141, "step": 5250 }, { "epoch": 0.446974847042828, - "grad_norm": 11.067569732666016, - "learning_rate": 6.766729062531469e-06, - "loss": 1.3555, + "grad_norm": 10.79391860961914, + "learning_rate": 6.763953813411523e-06, + "loss": 1.3211, "step": 5260 }, { "epoch": 0.4478246091094494, - "grad_norm": 7.866631984710693, - "learning_rate": 6.752846618398292e-06, - "loss": 2.0354, + "grad_norm": 7.547606468200684, + "learning_rate": 6.750068274898147e-06, + "loss": 2.0617, "step": 5270 }, { "epoch": 0.4486743711760707, - "grad_norm": 8.817779541015625, - "learning_rate": 6.738948751245676e-06, - "loss": 2.1129, + "grad_norm": 8.352490425109863, + "learning_rate": 6.736167337811534e-06, + "loss": 2.0629, "step": 5280 }, { "epoch": 0.449524133242692, - "grad_norm": 6.634336948394775, - "learning_rate": 6.725035583358749e-06, - "loss": 1.2955, + "grad_norm": 6.5088372230529785, + "learning_rate": 6.722251124463828e-06, + "loss": 1.2651, "step": 5290 }, { "epoch": 0.4503738953093134, - "grad_norm": 9.12083625793457, - "learning_rate": 6.7111072371572685e-06, - "loss": 1.9078, + "grad_norm": 9.166635513305664, + "learning_rate": 6.708319757301579e-06, + "loss": 1.9326, "step": 5300 }, { "epoch": 0.45122365737593473, - "grad_norm": 6.263072490692139, - "learning_rate": 6.697163835194544e-06, - "loss": 1.4993, + "grad_norm": 6.351733684539795, + "learning_rate": 6.694373358904681e-06, + "loss": 1.455, "step": 5310 }, { "epoch": 0.4520734194425561, - "grad_norm": 8.75985336303711, - "learning_rate": 6.683205500156361e-06, - "loss": 2.0274, + "grad_norm": 8.117010116577148, + "learning_rate": 6.68041205198528e-06, + "loss": 2.0163, "step": 5320 }, { "epoch": 0.45292318150917743, - "grad_norm": 9.481352806091309, - "learning_rate": 6.669232354859893e-06, - "loss": 1.3875, + "grad_norm": 8.947404861450195, + "learning_rate": 6.6664359593867035e-06, + "loss": 1.3844, "step": 5330 }, { "epoch": 0.4537729435757988, - "grad_norm": 8.203059196472168, - "learning_rate": 6.655244522252631e-06, - "loss": 2.1148, + "grad_norm": 8.320416450500488, + "learning_rate": 6.6524452040823715e-06, + "loss": 2.1358, "step": 5340 }, { "epoch": 0.45462270564242013, - "grad_norm": 9.478967666625977, - "learning_rate": 6.641242125411296e-06, - "loss": 1.6435, + "grad_norm": 9.105202674865723, + "learning_rate": 6.638439909174724e-06, + "loss": 1.6149, "step": 5350 }, { "epoch": 0.4554724677090415, - "grad_norm": 7.239821910858154, - "learning_rate": 6.627225287540756e-06, - "loss": 1.5698, + "grad_norm": 7.074492454528809, + "learning_rate": 6.624420197894129e-06, + "loss": 1.5739, "step": 5360 }, { "epoch": 0.45632222977566284, - "grad_norm": 9.34277629852295, - "learning_rate": 6.613194131972947e-06, - "loss": 1.3709, + "grad_norm": 9.93025016784668, + "learning_rate": 6.610386193597804e-06, + "loss": 1.365, "step": 5370 }, { "epoch": 0.4571719918422842, - "grad_norm": 7.141616344451904, - "learning_rate": 6.59914878216578e-06, - "loss": 1.4534, + "grad_norm": 5.934605121612549, + "learning_rate": 6.596338019768725e-06, + "loss": 1.4386, "step": 5380 }, { "epoch": 0.4580217539089055, - "grad_norm": 9.660737037658691, - "learning_rate": 6.585089361702062e-06, - "loss": 1.9158, + "grad_norm": 9.780712127685547, + "learning_rate": 6.582275800014548e-06, + "loss": 1.8719, "step": 5390 }, { "epoch": 0.45887151597552683, - "grad_norm": 6.371088981628418, - "learning_rate": 6.571015994288403e-06, - "loss": 1.3541, + "grad_norm": 6.486886978149414, + "learning_rate": 6.568199658066512e-06, + "loss": 1.357, "step": 5400 }, { "epoch": 0.4597212780421482, - "grad_norm": 9.174999237060547, - "learning_rate": 6.556928803754132e-06, - "loss": 1.5309, + "grad_norm": 9.179461479187012, + "learning_rate": 6.554109717778361e-06, + "loss": 1.5401, "step": 5410 }, { "epoch": 0.46057104010876954, - "grad_norm": 7.95292854309082, - "learning_rate": 6.542827914050206e-06, - "loss": 1.572, + "grad_norm": 8.25363826751709, + "learning_rate": 6.540006103125245e-06, + "loss": 1.6023, "step": 5420 }, { "epoch": 0.4614208021753909, - "grad_norm": 7.535348892211914, - "learning_rate": 6.528713449248115e-06, - "loss": 1.3007, + "grad_norm": 7.671759128570557, + "learning_rate": 6.525888938202629e-06, + "loss": 1.277, "step": 5430 }, { "epoch": 0.46227056424201224, - "grad_norm": 11.886709213256836, - "learning_rate": 6.514585533538798e-06, - "loss": 1.5653, + "grad_norm": 12.01596450805664, + "learning_rate": 6.511758347225214e-06, + "loss": 1.5706, "step": 5440 }, { "epoch": 0.4631203263086336, - "grad_norm": 9.953372955322266, - "learning_rate": 6.500444291231544e-06, - "loss": 1.7814, + "grad_norm": 9.37887954711914, + "learning_rate": 6.497614454525827e-06, + "loss": 1.7458, "step": 5450 }, { "epoch": 0.46397008837525494, - "grad_norm": 6.726216793060303, - "learning_rate": 6.486289846752904e-06, - "loss": 1.2233, + "grad_norm": 6.1312713623046875, + "learning_rate": 6.483457384554335e-06, + "loss": 1.2394, "step": 5460 }, { "epoch": 0.4648198504418763, - "grad_norm": 7.960389614105225, - "learning_rate": 6.472122324645585e-06, - "loss": 1.2173, + "grad_norm": 8.302346229553223, + "learning_rate": 6.469287261876555e-06, + "loss": 1.1898, "step": 5470 }, { "epoch": 0.46566961250849764, - "grad_norm": 4.985921859741211, - "learning_rate": 6.457941849567375e-06, - "loss": 1.7029, + "grad_norm": 4.888485908508301, + "learning_rate": 6.455104211173147e-06, + "loss": 1.6555, "step": 5480 }, { "epoch": 0.46651937457511894, - "grad_norm": 11.334051132202148, - "learning_rate": 6.443748546290021e-06, - "loss": 2.1117, + "grad_norm": 10.377059936523438, + "learning_rate": 6.440908357238525e-06, + "loss": 2.1313, "step": 5490 }, { "epoch": 0.4673691366417403, - "grad_norm": 7.912303924560547, - "learning_rate": 6.429542539698148e-06, - "loss": 1.556, + "grad_norm": 7.879420280456543, + "learning_rate": 6.42669982497976e-06, + "loss": 1.5389, "step": 5500 }, { "epoch": 0.46821889870836164, - "grad_norm": 7.067042350769043, - "learning_rate": 6.415323954788155e-06, - "loss": 1.8433, + "grad_norm": 7.732510566711426, + "learning_rate": 6.41247873941547e-06, + "loss": 1.8014, "step": 5510 }, { "epoch": 0.469068660774983, - "grad_norm": 5.579277515411377, - "learning_rate": 6.401092916667118e-06, - "loss": 0.8075, + "grad_norm": 5.46242618560791, + "learning_rate": 6.398245225674734e-06, + "loss": 0.8131, "step": 5520 }, { "epoch": 0.46991842284160434, - "grad_norm": 10.324089050292969, - "learning_rate": 6.3868495505516835e-06, - "loss": 2.0082, + "grad_norm": 10.11772632598877, + "learning_rate": 6.383999408995983e-06, + "loss": 1.9825, "step": 5530 }, { "epoch": 0.4707681849082257, - "grad_norm": 7.171175956726074, - "learning_rate": 6.372593981766971e-06, - "loss": 1.1483, + "grad_norm": 7.516256332397461, + "learning_rate": 6.369741414725896e-06, + "loss": 1.1446, "step": 5540 }, { "epoch": 0.47161794697484705, - "grad_norm": 9.941575050354004, - "learning_rate": 6.3583263357454696e-06, - "loss": 1.6459, + "grad_norm": 9.727339744567871, + "learning_rate": 6.355471368318308e-06, + "loss": 1.6029, "step": 5550 }, { "epoch": 0.4724677090414684, - "grad_norm": 5.382468223571777, - "learning_rate": 6.344046738025935e-06, - "loss": 0.8295, + "grad_norm": 5.756147384643555, + "learning_rate": 6.341189395333092e-06, + "loss": 0.8073, "step": 5560 }, { "epoch": 0.47331747110808975, - "grad_norm": 11.01451587677002, - "learning_rate": 6.329755314252284e-06, - "loss": 1.4555, + "grad_norm": 10.764717102050781, + "learning_rate": 6.326895621435062e-06, + "loss": 1.4648, "step": 5570 }, { "epoch": 0.4741672331747111, - "grad_norm": 9.689501762390137, - "learning_rate": 6.315452190172487e-06, - "loss": 1.4153, + "grad_norm": 9.980502128601074, + "learning_rate": 6.312590172392871e-06, + "loss": 1.4102, "step": 5580 }, { "epoch": 0.47501699524133245, - "grad_norm": 8.302642822265625, - "learning_rate": 6.301137491637462e-06, - "loss": 1.4018, + "grad_norm": 8.467399597167969, + "learning_rate": 6.298273174077895e-06, + "loss": 1.3797, "step": 5590 }, { "epoch": 0.47586675730795375, - "grad_norm": 7.848912239074707, - "learning_rate": 6.2868113445999765e-06, - "loss": 1.5548, + "grad_norm": 8.147005081176758, + "learning_rate": 6.283944752463131e-06, + "loss": 1.5279, "step": 5600 }, { "epoch": 0.4767165193745751, - "grad_norm": 8.380659103393555, - "learning_rate": 6.272473875113524e-06, - "loss": 1.5612, + "grad_norm": 8.436983108520508, + "learning_rate": 6.269605033622091e-06, + "loss": 1.5366, "step": 5610 }, { "epoch": 0.47756628144119645, - "grad_norm": 6.567471504211426, - "learning_rate": 6.258125209331224e-06, - "loss": 1.8034, + "grad_norm": 6.177023410797119, + "learning_rate": 6.255254143727686e-06, + "loss": 1.7663, "step": 5620 }, { "epoch": 0.4784160435078178, - "grad_norm": 9.527120590209961, - "learning_rate": 6.243765473504714e-06, - "loss": 1.4459, + "grad_norm": 9.247788429260254, + "learning_rate": 6.24089220905112e-06, + "loss": 1.4334, "step": 5630 }, { "epoch": 0.47926580557443915, - "grad_norm": 8.124838829040527, - "learning_rate": 6.22939479398303e-06, - "loss": 1.7229, + "grad_norm": 8.03177261352539, + "learning_rate": 6.226519355960778e-06, + "loss": 1.7049, "step": 5640 }, { "epoch": 0.4801155676410605, - "grad_norm": 8.870731353759766, - "learning_rate": 6.215013297211504e-06, - "loss": 1.9693, + "grad_norm": 9.060959815979004, + "learning_rate": 6.212135710921119e-06, + "loss": 1.9447, "step": 5650 }, { "epoch": 0.48096532970768185, - "grad_norm": 7.698125839233398, - "learning_rate": 6.200621109730644e-06, - "loss": 1.3275, + "grad_norm": 7.434525489807129, + "learning_rate": 6.197741400491551e-06, + "loss": 1.3648, "step": 5660 }, { "epoch": 0.4818150917743032, - "grad_norm": 8.39632511138916, - "learning_rate": 6.186218358175027e-06, - "loss": 1.7843, + "grad_norm": 7.733390808105469, + "learning_rate": 6.183336551325331e-06, + "loss": 1.7867, "step": 5670 }, { "epoch": 0.48266485384092456, - "grad_norm": 8.271629333496094, - "learning_rate": 6.171805169272179e-06, - "loss": 1.6584, + "grad_norm": 7.936905860900879, + "learning_rate": 6.168921290168443e-06, + "loss": 1.6188, "step": 5680 }, { "epoch": 0.4835146159075459, - "grad_norm": 9.045364379882812, - "learning_rate": 6.1573816698414646e-06, - "loss": 1.8105, + "grad_norm": 7.95361852645874, + "learning_rate": 6.154495743858479e-06, + "loss": 1.7816, "step": 5690 }, { "epoch": 0.48436437797416726, - "grad_norm": 6.140502452850342, - "learning_rate": 6.1429479867929654e-06, - "loss": 1.4635, + "grad_norm": 5.712251663208008, + "learning_rate": 6.141504063277857e-06, + "loss": 1.4414, "step": 5700 }, { "epoch": 0.48521414004078856, - "grad_norm": 6.779681205749512, - "learning_rate": 6.128504247126374e-06, - "loss": 1.2386, + "grad_norm": 6.968475818634033, + "learning_rate": 6.127059324937853e-06, + "loss": 1.1949, "step": 5710 }, { "epoch": 0.4860639021074099, - "grad_norm": 8.877243995666504, - "learning_rate": 6.114050577929862e-06, + "grad_norm": 8.757183074951172, + "learning_rate": 6.112604669781572e-06, "loss": 1.9432, "step": 5720 }, { "epoch": 0.48691366417403126, - "grad_norm": 5.0266547203063965, - "learning_rate": 6.099587106378973e-06, - "loss": 1.6577, + "grad_norm": 4.859631061553955, + "learning_rate": 6.098140224993233e-06, + "loss": 1.6184, "step": 5730 }, { "epoch": 0.4877634262406526, - "grad_norm": 6.566911697387695, - "learning_rate": 6.085113959735502e-06, - "loss": 1.5778, + "grad_norm": 6.180206775665283, + "learning_rate": 6.083666117843188e-06, + "loss": 1.5613, "step": 5740 }, { "epoch": 0.48861318830727396, - "grad_norm": 11.004392623901367, - "learning_rate": 6.07063126534637e-06, - "loss": 1.7221, + "grad_norm": 10.80888557434082, + "learning_rate": 6.0691824756868145e-06, + "loss": 1.7348, "step": 5750 }, { "epoch": 0.4894629503738953, - "grad_norm": 5.242037773132324, - "learning_rate": 6.056139150642506e-06, - "loss": 1.3791, + "grad_norm": 5.543217182159424, + "learning_rate": 6.0546894259633804e-06, + "loss": 1.3744, "step": 5760 }, { "epoch": 0.49031271244051666, - "grad_norm": 6.329412937164307, - "learning_rate": 6.041637743137734e-06, - "loss": 1.9913, + "grad_norm": 5.817404747009277, + "learning_rate": 6.040187096194934e-06, + "loss": 1.9828, "step": 5770 }, { "epoch": 0.491162474507138, - "grad_norm": 9.366252899169922, - "learning_rate": 6.027127170427634e-06, - "loss": 1.7371, + "grad_norm": 9.284978866577148, + "learning_rate": 6.025675613985175e-06, + "loss": 1.7423, "step": 5780 }, { "epoch": 0.49201223657375937, - "grad_norm": 9.438436508178711, - "learning_rate": 6.012607560188438e-06, - "loss": 1.393, + "grad_norm": 9.64454174041748, + "learning_rate": 6.0111551070183315e-06, + "loss": 1.3677, "step": 5790 }, { "epoch": 0.4928619986403807, - "grad_norm": 6.912937641143799, - "learning_rate": 5.998079040175893e-06, - "loss": 1.16, + "grad_norm": 6.645661354064941, + "learning_rate": 5.9966257030580455e-06, + "loss": 1.1892, "step": 5800 }, { "epoch": 0.493711760707002, - "grad_norm": 8.882918357849121, - "learning_rate": 5.983541738224141e-06, - "loss": 1.5597, + "grad_norm": 9.195141792297363, + "learning_rate": 5.9820875299462364e-06, + "loss": 1.588, "step": 5810 }, { "epoch": 0.49456152277362336, - "grad_norm": 5.645468711853027, - "learning_rate": 5.9689957822445995e-06, - "loss": 1.5462, + "grad_norm": 5.596313953399658, + "learning_rate": 5.967540715601983e-06, + "loss": 1.5046, "step": 5820 }, { "epoch": 0.4954112848402447, - "grad_norm": 8.029216766357422, - "learning_rate": 5.954441300224827e-06, - "loss": 1.6387, + "grad_norm": 7.805430889129639, + "learning_rate": 5.952985388020403e-06, + "loss": 1.5982, "step": 5830 }, { "epoch": 0.49626104690686607, - "grad_norm": 7.521322250366211, - "learning_rate": 5.939878420227402e-06, - "loss": 1.5032, + "grad_norm": 7.463534355163574, + "learning_rate": 5.938421675271509e-06, + "loss": 1.492, "step": 5840 }, { "epoch": 0.4971108089734874, - "grad_norm": 8.00305461883545, - "learning_rate": 5.9253072703887984e-06, - "loss": 1.7237, + "grad_norm": 7.803483009338379, + "learning_rate": 5.923849705499103e-06, + "loss": 1.7543, "step": 5850 }, { "epoch": 0.49796057104010877, - "grad_norm": 4.509445667266846, - "learning_rate": 5.910727978918251e-06, - "loss": 2.005, + "grad_norm": 4.219145774841309, + "learning_rate": 5.9092696069196385e-06, + "loss": 1.9768, "step": 5860 }, { "epoch": 0.4988103331067301, - "grad_norm": 7.122054576873779, - "learning_rate": 5.896140674096634e-06, - "loss": 1.5409, + "grad_norm": 6.826533317565918, + "learning_rate": 5.894681507821089e-06, + "loss": 1.5444, "step": 5870 }, { "epoch": 0.49966009517335147, - "grad_norm": 9.378992080688477, - "learning_rate": 5.881545484275332e-06, - "loss": 1.3109, + "grad_norm": 8.97088623046875, + "learning_rate": 5.880085536561823e-06, + "loss": 1.3143, "step": 5880 }, { "epoch": 0.5005098572399728, - "grad_norm": 8.70549488067627, - "learning_rate": 5.866942537875102e-06, - "loss": 1.0679, + "grad_norm": 9.037909507751465, + "learning_rate": 5.86548182156948e-06, + "loss": 1.0762, "step": 5890 }, { "epoch": 0.5013596193065941, - "grad_norm": 8.661267280578613, - "learning_rate": 5.852331963384956e-06, - "loss": 1.9591, + "grad_norm": 7.988813877105713, + "learning_rate": 5.850870491339832e-06, + "loss": 1.9283, "step": 5900 }, { "epoch": 0.5022093813732155, - "grad_norm": 6.92659330368042, - "learning_rate": 5.837713889361021e-06, - "loss": 1.9399, + "grad_norm": 6.701596260070801, + "learning_rate": 5.8362516744356555e-06, + "loss": 1.9011, "step": 5910 }, { "epoch": 0.5030591434398368, - "grad_norm": 7.153946876525879, - "learning_rate": 5.823088444425413e-06, - "loss": 1.647, + "grad_norm": 7.62468957901001, + "learning_rate": 5.821625499485604e-06, + "loss": 1.6025, "step": 5920 }, { "epoch": 0.5039089055064582, - "grad_norm": 8.576470375061035, - "learning_rate": 5.808455757265103e-06, - "loss": 1.5435, + "grad_norm": 8.307475090026855, + "learning_rate": 5.80699209518307e-06, + "loss": 1.5606, "step": 5930 }, { "epoch": 0.5047586675730795, - "grad_norm": 6.338377952575684, - "learning_rate": 5.7938159566307865e-06, - "loss": 1.2534, + "grad_norm": 6.718509197235107, + "learning_rate": 5.792351590285059e-06, + "loss": 1.2376, "step": 5940 }, { "epoch": 0.5056084296397009, - "grad_norm": 6.941882610321045, - "learning_rate": 5.7791691713357424e-06, - "loss": 1.3733, + "grad_norm": 6.831080913543701, + "learning_rate": 5.777704113611049e-06, + "loss": 1.322, "step": 5950 }, { "epoch": 0.5064581917063222, - "grad_norm": 8.583059310913086, - "learning_rate": 5.764515530254717e-06, - "loss": 1.3342, + "grad_norm": 8.597857475280762, + "learning_rate": 5.7630497940418685e-06, + "loss": 1.2843, "step": 5960 }, { "epoch": 0.5073079537729436, - "grad_norm": 8.21761703491211, - "learning_rate": 5.7498551623227725e-06, - "loss": 1.3615, + "grad_norm": 7.922711372375488, + "learning_rate": 5.74838876051855e-06, + "loss": 1.3481, "step": 5970 }, { "epoch": 0.5081577158395649, - "grad_norm": 6.7219624519348145, - "learning_rate": 5.735188196534162e-06, - "loss": 1.0245, + "grad_norm": 6.574146747589111, + "learning_rate": 5.733721142041202e-06, + "loss": 1.0269, "step": 5980 }, { "epoch": 0.5090074779061863, - "grad_norm": 8.252737045288086, - "learning_rate": 5.720514761941192e-06, - "loss": 1.1976, + "grad_norm": 7.93048095703125, + "learning_rate": 5.719047067667875e-06, + "loss": 1.204, "step": 5990 }, { "epoch": 0.5098572399728076, - "grad_norm": 6.0221734046936035, - "learning_rate": 5.705834987653086e-06, - "loss": 1.6379, + "grad_norm": 6.917042255401611, + "learning_rate": 5.704366666513419e-06, + "loss": 1.6248, "step": 6000 }, { "epoch": 0.5098572399728076, "eval_cosine_accuracy@1": 0.705, - "eval_cosine_accuracy@10": 0.918, - "eval_cosine_accuracy@3": 0.838, - "eval_cosine_accuracy@5": 0.877, - "eval_cosine_map@100": 0.7828104138162045, - "eval_cosine_mrr@10": 0.7796081349206347, - "eval_cosine_ndcg@10": 0.8133689039576084, + "eval_cosine_accuracy@10": 0.9175, + "eval_cosine_accuracy@3": 0.8385, + "eval_cosine_accuracy@5": 0.8725, + "eval_cosine_map@100": 0.7822715456430124, + "eval_cosine_mrr@10": 0.7789053571428564, + "eval_cosine_ndcg@10": 0.8126113672191609, "eval_cosine_precision@1": 0.705, - "eval_cosine_precision@10": 0.0918, - "eval_cosine_precision@3": 0.2793333333333333, - "eval_cosine_precision@5": 0.1754, + "eval_cosine_precision@10": 0.09175, + "eval_cosine_precision@3": 0.27949999999999997, + "eval_cosine_precision@5": 0.1745, "eval_cosine_recall@1": 0.705, - "eval_cosine_recall@10": 0.918, - "eval_cosine_recall@3": 0.838, - "eval_cosine_recall@5": 0.877, - "eval_loss": 1.4186460971832275, - "eval_runtime": 2.7939, - "eval_samples_per_second": 269.875, - "eval_sequential_score": 0.7828104138162045, - "eval_steps_per_second": 4.295, - "eval_sts-dev_pearson_cosine": 0.8049364663899228, - "eval_sts-dev_pearson_dot": 0.8003663188905723, - "eval_sts-dev_pearson_euclidean": 0.7923367271835267, - "eval_sts-dev_pearson_manhattan": 0.794212189611971, - "eval_sts-dev_pearson_max": 0.8049364663899228, - "eval_sts-dev_spearman_cosine": 0.8076914178374875, - "eval_sts-dev_spearman_dot": 0.8026507055190013, - "eval_sts-dev_spearman_euclidean": 0.8077740870838351, - "eval_sts-dev_spearman_manhattan": 0.8093674887436685, - "eval_sts-dev_spearman_max": 0.8093674887436685, + "eval_cosine_recall@10": 0.9175, + "eval_cosine_recall@3": 0.8385, + "eval_cosine_recall@5": 0.8725, + "eval_loss": 1.4043562412261963, + "eval_runtime": 2.6251, + "eval_samples_per_second": 287.225, + "eval_sequential_score": 0.7822715456430124, + "eval_steps_per_second": 4.571, + "eval_sts-dev_pearson_cosine": 0.803805122131811, + "eval_sts-dev_pearson_dot": 0.798410117040713, + "eval_sts-dev_pearson_euclidean": 0.791038923957703, + "eval_sts-dev_pearson_manhattan": 0.7921607549897144, + "eval_sts-dev_pearson_max": 0.803805122131811, + "eval_sts-dev_spearman_cosine": 0.8080791571793059, + "eval_sts-dev_spearman_dot": 0.8014456593766724, + "eval_sts-dev_spearman_euclidean": 0.8064071607081819, + "eval_sts-dev_spearman_manhattan": 0.805310159034556, + "eval_sts-dev_spearman_max": 0.8080791571793059, "step": 6000 }, { "epoch": 0.510707002039429, - "grad_norm": 9.350522994995117, - "learning_rate": 5.69114900283485e-06, - "loss": 1.3978, + "grad_norm": 9.084571838378906, + "learning_rate": 5.689680067748358e-06, + "loss": 1.3755, "step": 6010 }, { "epoch": 0.5115567641060503, - "grad_norm": 6.432076454162598, - "learning_rate": 5.676456936706136e-06, - "loss": 0.9718, + "grad_norm": 6.724961280822754, + "learning_rate": 5.674987400597749e-06, + "loss": 0.9876, "step": 6020 }, { "epoch": 0.5124065261726717, - "grad_norm": 7.410585880279541, - "learning_rate": 5.6617589185401055e-06, - "loss": 1.5345, + "grad_norm": 8.530107498168945, + "learning_rate": 5.660288794340035e-06, + "loss": 1.5123, "step": 6030 }, { "epoch": 0.513256288239293, - "grad_norm": 10.9153413772583, - "learning_rate": 5.6470550776622875e-06, - "loss": 1.4453, + "grad_norm": 10.571995735168457, + "learning_rate": 5.645584378305925e-06, + "loss": 1.4224, "step": 6040 }, { "epoch": 0.5141060503059144, - "grad_norm": 9.173379898071289, - "learning_rate": 5.632345543449447e-06, - "loss": 1.5474, + "grad_norm": 9.035595893859863, + "learning_rate": 5.630874281877243e-06, + "loss": 1.5319, "step": 6050 }, { "epoch": 0.5149558123725357, - "grad_norm": 7.908231258392334, - "learning_rate": 5.617630445328443e-06, - "loss": 1.7022, + "grad_norm": 8.231688499450684, + "learning_rate": 5.616158634485793e-06, + "loss": 1.6707, "step": 6060 }, { "epoch": 0.5158055744391571, - "grad_norm": 8.981069564819336, - "learning_rate": 5.60290991277509e-06, - "loss": 1.8088, + "grad_norm": 8.586012840270996, + "learning_rate": 5.601437565612228e-06, + "loss": 1.7906, "step": 6070 }, { "epoch": 0.5166553365057783, - "grad_norm": 6.037376403808594, - "learning_rate": 5.588184075313021e-06, - "loss": 1.0467, + "grad_norm": 5.786081314086914, + "learning_rate": 5.586711204784893e-06, + "loss": 1.0413, "step": 6080 }, { "epoch": 0.5175050985723997, - "grad_norm": 8.4783353805542, - "learning_rate": 5.573453062512544e-06, - "loss": 1.3502, + "grad_norm": 8.943538665771484, + "learning_rate": 5.571979681578704e-06, + "loss": 1.3346, "step": 6090 }, { "epoch": 0.518354860639021, - "grad_norm": 10.445894241333008, - "learning_rate": 5.558717003989502e-06, - "loss": 1.8022, + "grad_norm": 10.053081512451172, + "learning_rate": 5.557243125613999e-06, + "loss": 1.8298, "step": 6100 }, { "epoch": 0.5192046227056424, - "grad_norm": 8.388592720031738, - "learning_rate": 5.543976029404142e-06, - "loss": 1.4738, + "grad_norm": 7.782529354095459, + "learning_rate": 5.542501666555394e-06, + "loss": 1.4339, "step": 6110 }, { "epoch": 0.5200543847722637, - "grad_norm": 7.277247905731201, - "learning_rate": 5.529230268459959e-06, - "loss": 1.6153, + "grad_norm": 7.83359956741333, + "learning_rate": 5.527755434110652e-06, + "loss": 1.6045, "step": 6120 }, { "epoch": 0.5209041468388851, - "grad_norm": 10.580738067626953, - "learning_rate": 5.514479850902564e-06, - "loss": 1.5299, + "grad_norm": 10.804980278015137, + "learning_rate": 5.5130045580295346e-06, + "loss": 1.5257, "step": 6130 }, { "epoch": 0.5217539089055064, - "grad_norm": 9.77304744720459, - "learning_rate": 5.499724906518546e-06, - "loss": 1.4705, + "grad_norm": 10.048989295959473, + "learning_rate": 5.4982491681026585e-06, + "loss": 1.4627, "step": 6140 }, { "epoch": 0.5226036709721278, - "grad_norm": 10.168603897094727, - "learning_rate": 5.4849655651343194e-06, - "loss": 1.7995, + "grad_norm": 9.560049057006836, + "learning_rate": 5.4834893941603615e-06, + "loss": 1.8083, "step": 6150 }, { "epoch": 0.5234534330387491, - "grad_norm": 7.371551036834717, - "learning_rate": 5.470201956614985e-06, - "loss": 1.1177, + "grad_norm": 7.623703956604004, + "learning_rate": 5.4687253660715545e-06, + "loss": 1.1072, "step": 6160 }, { "epoch": 0.5243031951053705, - "grad_norm": 8.640555381774902, - "learning_rate": 5.455434210863198e-06, - "loss": 1.4093, + "grad_norm": 8.676218032836914, + "learning_rate": 5.453957213742579e-06, + "loss": 1.3782, "step": 6170 }, { "epoch": 0.5251529571719918, - "grad_norm": 7.5279364585876465, - "learning_rate": 5.44066245781801e-06, - "loss": 1.5201, + "grad_norm": 7.2693190574646, + "learning_rate": 5.439185067116065e-06, + "loss": 1.539, "step": 6180 }, { "epoch": 0.5260027192386132, - "grad_norm": 8.439915657043457, - "learning_rate": 5.4258868274537324e-06, - "loss": 1.3657, + "grad_norm": 7.482497692108154, + "learning_rate": 5.424409056169789e-06, + "loss": 1.3758, "step": 6190 }, { "epoch": 0.5268524813052345, - "grad_norm": 7.069014072418213, - "learning_rate": 5.411107449778793e-06, - "loss": 2.1107, + "grad_norm": 7.0961174964904785, + "learning_rate": 5.409629310915529e-06, + "loss": 2.0819, "step": 6200 }, { "epoch": 0.5277022433718559, - "grad_norm": 8.235812187194824, - "learning_rate": 5.3963244548345946e-06, - "loss": 1.2417, + "grad_norm": 8.848167419433594, + "learning_rate": 5.3948459613979155e-06, + "loss": 1.2339, "step": 6210 }, { "epoch": 0.5285520054384772, - "grad_norm": 5.443424701690674, - "learning_rate": 5.3815379726943664e-06, - "loss": 1.334, + "grad_norm": 5.530652046203613, + "learning_rate": 5.380059137693302e-06, + "loss": 1.346, "step": 6220 }, { "epoch": 0.5294017675050986, - "grad_norm": 11.620553016662598, - "learning_rate": 5.3667481334620165e-06, - "loss": 1.6861, + "grad_norm": 11.35029125213623, + "learning_rate": 5.3652689699086034e-06, + "loss": 1.6628, "step": 6230 }, { "epoch": 0.53025152957172, - "grad_norm": 9.24329662322998, - "learning_rate": 5.3519550672709975e-06, - "loss": 2.0962, + "grad_norm": 9.598590850830078, + "learning_rate": 5.3504755881801595e-06, + "loss": 2.0857, "step": 6240 }, { "epoch": 0.5311012916383413, - "grad_norm": 7.359653472900391, - "learning_rate": 5.337158904283152e-06, - "loss": 1.3969, + "grad_norm": 7.118504524230957, + "learning_rate": 5.335679122672592e-06, + "loss": 1.3907, "step": 6250 }, { "epoch": 0.5319510537049627, - "grad_norm": 7.26228666305542, - "learning_rate": 5.322359774687572e-06, - "loss": 1.3165, + "grad_norm": 6.610283851623535, + "learning_rate": 5.3208797035776535e-06, + "loss": 1.3082, "step": 6260 }, { "epoch": 0.532800815771584, - "grad_norm": 7.836460590362549, - "learning_rate": 5.307557808699454e-06, - "loss": 1.8014, + "grad_norm": 7.749236583709717, + "learning_rate": 5.306077461113086e-06, + "loss": 1.8005, "step": 6270 }, { "epoch": 0.5336505778382054, - "grad_norm": 8.226802825927734, - "learning_rate": 5.292753136558946e-06, - "loss": 2.4102, + "grad_norm": 8.36940860748291, + "learning_rate": 5.291272525521477e-06, + "loss": 2.1571, "step": 6280 }, { "epoch": 0.5345003399048266, - "grad_norm": 12.628873825073242, - "learning_rate": 5.2779458885300126e-06, - "loss": 1.9067, + "grad_norm": 12.32206916809082, + "learning_rate": 5.2764650270691e-06, + "loss": 1.9294, "step": 6290 }, { "epoch": 0.535350101971448, - "grad_norm": 7.771126747131348, - "learning_rate": 5.263136194899281e-06, - "loss": 2.1971, + "grad_norm": 7.857509613037109, + "learning_rate": 5.2616550960447925e-06, + "loss": 2.2004, "step": 6300 }, { "epoch": 0.5361998640380693, - "grad_norm": 6.915012836456299, - "learning_rate": 5.248324185974897e-06, - "loss": 1.5464, + "grad_norm": 7.234168529510498, + "learning_rate": 5.246842862758785e-06, + "loss": 1.5136, "step": 6310 }, { "epoch": 0.5370496261046906, - "grad_norm": 8.306807518005371, - "learning_rate": 5.233509992085378e-06, - "loss": 1.7206, + "grad_norm": 8.325047492980957, + "learning_rate": 5.232028457541571e-06, + "loss": 1.6803, "step": 6320 }, { "epoch": 0.537899388171312, - "grad_norm": 5.752651691436768, - "learning_rate": 5.218693743578468e-06, - "loss": 1.4256, + "grad_norm": 5.808905124664307, + "learning_rate": 5.217212010742754e-06, + "loss": 1.3923, "step": 6330 }, { "epoch": 0.5387491502379333, - "grad_norm": 9.146262168884277, - "learning_rate": 5.203875570819986e-06, - "loss": 2.4398, + "grad_norm": 8.492155075073242, + "learning_rate": 5.202393652729898e-06, + "loss": 2.4211, "step": 6340 }, { "epoch": 0.5395989123045547, - "grad_norm": 8.414597511291504, - "learning_rate": 5.189055604192688e-06, - "loss": 1.4742, + "grad_norm": 8.309314727783203, + "learning_rate": 5.187573513887384e-06, + "loss": 1.4678, "step": 6350 }, { "epoch": 0.540448674371176, - "grad_norm": 8.044291496276855, - "learning_rate": 5.174233974095105e-06, - "loss": 1.6697, + "grad_norm": 8.213577270507812, + "learning_rate": 5.172751724615269e-06, + "loss": 1.6661, "step": 6360 }, { "epoch": 0.5412984364377974, - "grad_norm": 7.830363750457764, - "learning_rate": 5.159410810940414e-06, - "loss": 1.0131, + "grad_norm": 7.860897541046143, + "learning_rate": 5.157928415328121e-06, + "loss": 0.9979, "step": 6370 }, { "epoch": 0.5421481985044188, - "grad_norm": 6.405018329620361, - "learning_rate": 5.144586245155278e-06, - "loss": 1.2043, + "grad_norm": 6.391665458679199, + "learning_rate": 5.143103716453889e-06, + "loss": 1.1718, "step": 6380 }, { "epoch": 0.5429979605710401, - "grad_norm": 8.924790382385254, - "learning_rate": 5.129760407178695e-06, - "loss": 1.9432, + "grad_norm": 9.395598411560059, + "learning_rate": 5.1282777584327505e-06, + "loss": 1.9122, "step": 6390 }, { "epoch": 0.5438477226376615, - "grad_norm": 10.58996295928955, - "learning_rate": 5.114933427460867e-06, - "loss": 1.8042, + "grad_norm": 11.279014587402344, + "learning_rate": 5.1134506717159535e-06, + "loss": 1.7934, "step": 6400 }, { "epoch": 0.5446974847042828, - "grad_norm": 6.702895164489746, - "learning_rate": 5.100105436462037e-06, - "loss": 1.6424, + "grad_norm": 6.980748653411865, + "learning_rate": 5.09862258676469e-06, + "loss": 1.6539, "step": 6410 }, { "epoch": 0.5455472467709042, - "grad_norm": 9.357471466064453, - "learning_rate": 5.085276564651345e-06, - "loss": 1.7921, + "grad_norm": 9.435534477233887, + "learning_rate": 5.083793634048924e-06, + "loss": 1.8081, "step": 6420 }, { "epoch": 0.5463970088375255, - "grad_norm": 9.2583646774292, - "learning_rate": 5.070446942505684e-06, - "loss": 1.8778, + "grad_norm": 8.803879737854004, + "learning_rate": 5.068963944046257e-06, + "loss": 1.8629, "step": 6430 }, { "epoch": 0.5472467709041469, - "grad_norm": 8.184134483337402, - "learning_rate": 5.055616700508545e-06, - "loss": 1.4037, + "grad_norm": 8.948919296264648, + "learning_rate": 5.0541336472407846e-06, + "loss": 1.3883, "step": 6440 }, { "epoch": 0.5480965329707682, - "grad_norm": 8.702832221984863, - "learning_rate": 5.040785969148879e-06, - "loss": 1.3223, + "grad_norm": 8.440637588500977, + "learning_rate": 5.039302874121933e-06, + "loss": 1.3248, "step": 6450 }, { "epoch": 0.5489462950373896, - "grad_norm": 6.28436279296875, - "learning_rate": 5.025954878919939e-06, - "loss": 1.6476, + "grad_norm": 5.993195056915283, + "learning_rate": 5.024471755183324e-06, + "loss": 1.6304, "step": 6460 }, { "epoch": 0.5497960571040109, - "grad_norm": 7.8175153732299805, - "learning_rate": 5.011123560318133e-06, - "loss": 0.9701, + "grad_norm": 8.045123100280762, + "learning_rate": 5.0096404209216235e-06, + "loss": 0.9951, "step": 6470 }, { "epoch": 0.5506458191706323, - "grad_norm": 8.318731307983398, - "learning_rate": 4.9962921438418854e-06, - "loss": 1.0142, + "grad_norm": 8.018733978271484, + "learning_rate": 4.994809001835385e-06, + "loss": 0.9729, "step": 6480 }, { "epoch": 0.5514955812372536, - "grad_norm": 9.885448455810547, - "learning_rate": 4.9814607599904756e-06, - "loss": 2.2385, + "grad_norm": 9.855351448059082, + "learning_rate": 4.97997762842392e-06, + "loss": 2.2003, "step": 6490 }, { "epoch": 0.552345343303875, - "grad_norm": 5.5691657066345215, - "learning_rate": 4.966629539262898e-06, - "loss": 0.9459, + "grad_norm": 5.184337139129639, + "learning_rate": 4.965146431186127e-06, + "loss": 0.9242, "step": 6500 }, { "epoch": 0.5531951053704962, - "grad_norm": 9.805514335632324, - "learning_rate": 4.951798612156714e-06, - "loss": 1.6989, + "grad_norm": 10.342740058898926, + "learning_rate": 4.950315540619357e-06, + "loss": 1.6794, "step": 6510 }, { "epoch": 0.5540448674371176, - "grad_norm": 5.01717472076416, - "learning_rate": 4.9369681091668966e-06, - "loss": 1.3368, + "grad_norm": 5.053089141845703, + "learning_rate": 4.935485087218267e-06, + "loss": 1.2956, "step": 6520 }, { "epoch": 0.5548946295037389, - "grad_norm": 5.933539867401123, - "learning_rate": 4.922138160784693e-06, - "loss": 1.4556, + "grad_norm": 6.063964366912842, + "learning_rate": 4.920655201473664e-06, + "loss": 1.4456, "step": 6530 }, { "epoch": 0.5557443915703603, - "grad_norm": 7.548472881317139, - "learning_rate": 4.907308897496466e-06, - "loss": 1.202, + "grad_norm": 7.5671305656433105, + "learning_rate": 4.905826013871362e-06, + "loss": 1.1975, "step": 6540 }, { "epoch": 0.5565941536369816, - "grad_norm": 8.114418029785156, - "learning_rate": 4.892480449782551e-06, - "loss": 2.0945, + "grad_norm": 9.246622085571289, + "learning_rate": 4.890997654891032e-06, + "loss": 2.0751, "step": 6550 }, { "epoch": 0.557443915703603, - "grad_norm": 9.7769136428833, - "learning_rate": 4.877652948116109e-06, - "loss": 1.6245, + "grad_norm": 10.030631065368652, + "learning_rate": 4.876170255005049e-06, + "loss": 1.5858, "step": 6560 }, { "epoch": 0.5582936777702243, - "grad_norm": 9.697370529174805, - "learning_rate": 4.862826522961973e-06, - "loss": 1.8268, + "grad_norm": 10.063614845275879, + "learning_rate": 4.8613439446773575e-06, + "loss": 1.8451, "step": 6570 }, { "epoch": 0.5591434398368457, - "grad_norm": 4.425044059753418, - "learning_rate": 4.848001304775511e-06, - "loss": 1.0148, + "grad_norm": 4.296286106109619, + "learning_rate": 4.846518854362309e-06, + "loss": 0.9895, "step": 6580 }, { "epoch": 0.559993201903467, - "grad_norm": 7.075048446655273, - "learning_rate": 4.833177424001466e-06, - "loss": 1.5507, + "grad_norm": 7.022743225097656, + "learning_rate": 4.831695114503523e-06, + "loss": 1.5388, "step": 6590 }, { "epoch": 0.5608429639700884, - "grad_norm": 5.033480167388916, - "learning_rate": 4.818355011072814e-06, - "loss": 1.4048, + "grad_norm": 5.699609756469727, + "learning_rate": 4.8168728555327345e-06, + "loss": 1.443, "step": 6600 }, { "epoch": 0.5616927260367097, - "grad_norm": 8.580843925476074, - "learning_rate": 4.803534196409615e-06, - "loss": 1.409, + "grad_norm": 8.620737075805664, + "learning_rate": 4.802052207868654e-06, + "loss": 1.4455, "step": 6610 }, { "epoch": 0.5625424881033311, - "grad_norm": 6.649796962738037, - "learning_rate": 4.788715110417867e-06, - "loss": 1.5894, + "grad_norm": 6.305805683135986, + "learning_rate": 4.787233301915805e-06, + "loss": 1.5491, "step": 6620 }, { "epoch": 0.5633922501699524, - "grad_norm": 7.948225975036621, - "learning_rate": 4.773897883488358e-06, - "loss": 1.2725, + "grad_norm": 7.3978657722473145, + "learning_rate": 4.772416268063394e-06, + "loss": 1.2772, "step": 6630 }, { "epoch": 0.5642420122365738, - "grad_norm": 8.729104995727539, - "learning_rate": 4.759082645995519e-06, - "loss": 1.5723, + "grad_norm": 7.526206016540527, + "learning_rate": 4.757601236684151e-06, + "loss": 1.566, "step": 6640 }, { "epoch": 0.5650917743031951, - "grad_norm": 10.261311531066895, - "learning_rate": 4.744269528296273e-06, - "loss": 1.1249, + "grad_norm": 10.089953422546387, + "learning_rate": 4.742788338133189e-06, + "loss": 1.1092, "step": 6650 }, { "epoch": 0.5659415363698165, - "grad_norm": 8.199252128601074, - "learning_rate": 4.729458660728899e-06, - "loss": 1.447, + "grad_norm": 7.867586612701416, + "learning_rate": 4.727977702746852e-06, + "loss": 1.4266, "step": 6660 }, { "epoch": 0.5667912984364378, - "grad_norm": 7.589265823364258, - "learning_rate": 4.7146501736118665e-06, - "loss": 1.9558, + "grad_norm": 8.242040634155273, + "learning_rate": 4.713169460841574e-06, + "loss": 1.9267, "step": 6670 }, { "epoch": 0.5676410605030592, - "grad_norm": 5.311832904815674, - "learning_rate": 4.699844197242707e-06, - "loss": 1.4534, + "grad_norm": 5.025913238525391, + "learning_rate": 4.698363742712726e-06, + "loss": 1.4297, "step": 6680 }, { "epoch": 0.5684908225696805, - "grad_norm": 10.871603012084961, - "learning_rate": 4.68504086189686e-06, - "loss": 1.4421, + "grad_norm": 10.94573974609375, + "learning_rate": 4.683560678633473e-06, + "loss": 1.4397, "step": 6690 }, { "epoch": 0.5693405846363019, - "grad_norm": 7.4568939208984375, - "learning_rate": 4.6702402978265235e-06, - "loss": 1.4235, + "grad_norm": 8.809229850769043, + "learning_rate": 4.668760398853632e-06, + "loss": 1.4476, "step": 6700 }, { "epoch": 0.5701903467029232, - "grad_norm": 7.83881139755249, - "learning_rate": 4.655442635259516e-06, - "loss": 1.6045, + "grad_norm": 7.7826457023620605, + "learning_rate": 4.653963033598513e-06, + "loss": 1.6113, "step": 6710 }, { "epoch": 0.5710401087695445, - "grad_norm": 5.530339241027832, - "learning_rate": 4.640648004398125e-06, - "loss": 0.9086, + "grad_norm": 4.636202812194824, + "learning_rate": 4.639168713067791e-06, + "loss": 0.8579, "step": 6720 }, { "epoch": 0.5718898708361658, - "grad_norm": 8.478376388549805, - "learning_rate": 4.625856535417958e-06, - "loss": 2.1706, + "grad_norm": 7.776772499084473, + "learning_rate": 4.624377567434346e-06, + "loss": 2.1762, "step": 6730 }, { "epoch": 0.5727396329027872, - "grad_norm": 8.314583778381348, - "learning_rate": 4.6110683584668045e-06, - "loss": 1.7236, + "grad_norm": 7.773914337158203, + "learning_rate": 4.609589726843118e-06, + "loss": 1.7159, "step": 6740 }, { "epoch": 0.5735893949694085, - "grad_norm": 9.516361236572266, - "learning_rate": 4.596283603663493e-06, - "loss": 1.2832, + "grad_norm": 8.973238945007324, + "learning_rate": 4.5948053214099756e-06, + "loss": 1.247, "step": 6750 }, { "epoch": 0.5744391570360299, - "grad_norm": 8.256692886352539, - "learning_rate": 4.5815024010967335e-06, - "loss": 1.4669, + "grad_norm": 8.574946403503418, + "learning_rate": 4.580024481220558e-06, + "loss": 1.4467, "step": 6760 }, { "epoch": 0.5752889191026512, - "grad_norm": 8.96350383758545, - "learning_rate": 4.5667248808239855e-06, - "loss": 1.8367, + "grad_norm": 8.376298904418945, + "learning_rate": 4.565247336329131e-06, + "loss": 1.8219, "step": 6770 }, { "epoch": 0.5761386811692726, - "grad_norm": 9.328225135803223, - "learning_rate": 4.5519511728703044e-06, - "loss": 1.7386, + "grad_norm": 8.362481117248535, + "learning_rate": 4.550474016757455e-06, + "loss": 1.729, "step": 6780 }, { "epoch": 0.5769884432358939, - "grad_norm": 8.867408752441406, - "learning_rate": 4.537181407227203e-06, - "loss": 1.5395, + "grad_norm": 8.901246070861816, + "learning_rate": 4.53570465249362e-06, + "loss": 1.58, "step": 6790 }, { "epoch": 0.5778382053025153, - "grad_norm": 5.739239692687988, - "learning_rate": 4.52241571385151e-06, - "loss": 1.5374, + "grad_norm": 5.8708014488220215, + "learning_rate": 4.520939373490923e-06, + "loss": 1.5089, "step": 6800 }, { "epoch": 0.5786879673691366, - "grad_norm": 6.783992290496826, - "learning_rate": 4.507654222664218e-06, - "loss": 1.298, + "grad_norm": 6.7487640380859375, + "learning_rate": 4.5061783096667125e-06, + "loss": 1.2977, "step": 6810 }, { "epoch": 0.579537729435758, - "grad_norm": 6.790402889251709, - "learning_rate": 4.492897063549348e-06, - "loss": 1.6973, + "grad_norm": 6.126246929168701, + "learning_rate": 4.491421590901248e-06, + "loss": 1.6302, "step": 6820 }, { "epoch": 0.5803874915023793, - "grad_norm": 8.407273292541504, - "learning_rate": 4.478144366352804e-06, - "loss": 1.7195, + "grad_norm": 8.985424041748047, + "learning_rate": 4.476669347036558e-06, + "loss": 1.7185, "step": 6830 }, { "epoch": 0.5812372535690007, - "grad_norm": 6.919350624084473, - "learning_rate": 4.463396260881226e-06, - "loss": 1.175, + "grad_norm": 6.828622341156006, + "learning_rate": 4.461921707875299e-06, + "loss": 1.1584, "step": 6840 }, { "epoch": 0.582087015635622, - "grad_norm": 8.451251983642578, - "learning_rate": 4.448652876900858e-06, - "loss": 1.6742, + "grad_norm": 8.610594749450684, + "learning_rate": 4.447178803179604e-06, + "loss": 1.6683, "step": 6850 }, { "epoch": 0.5829367777022434, - "grad_norm": 6.475736141204834, - "learning_rate": 4.433914344136396e-06, - "loss": 1.1064, + "grad_norm": 7.457666397094727, + "learning_rate": 4.432440762669959e-06, + "loss": 1.1037, "step": 6860 }, { "epoch": 0.5837865397688647, - "grad_norm": 9.750404357910156, - "learning_rate": 4.419180792269854e-06, - "loss": 1.7774, + "grad_norm": 9.219897270202637, + "learning_rate": 4.417707716024042e-06, + "loss": 1.7633, "step": 6870 }, { "epoch": 0.5846363018354861, - "grad_norm": 9.114805221557617, - "learning_rate": 4.40445235093942e-06, - "loss": 1.39, + "grad_norm": 9.102129936218262, + "learning_rate": 4.402979792875596e-06, + "loss": 1.4152, "step": 6880 }, { "epoch": 0.5854860639021074, - "grad_norm": 7.812666893005371, - "learning_rate": 4.3897291497383114e-06, - "loss": 1.9344, + "grad_norm": 7.444299221038818, + "learning_rate": 4.388257122813282e-06, + "loss": 1.8851, "step": 6890 }, { "epoch": 0.5863358259687288, - "grad_norm": 7.569613933563232, - "learning_rate": 4.3750113182136406e-06, - "loss": 1.6311, + "grad_norm": 7.604689598083496, + "learning_rate": 4.373539835379538e-06, + "loss": 1.6294, "step": 6900 }, { "epoch": 0.5871855880353501, - "grad_norm": 9.441134452819824, - "learning_rate": 4.360298985865274e-06, - "loss": 1.2828, + "grad_norm": 8.8158540725708, + "learning_rate": 4.358828060069442e-06, + "loss": 1.2872, "step": 6910 }, { "epoch": 0.5880353501019715, - "grad_norm": 6.812602519989014, - "learning_rate": 4.345592282144691e-06, - "loss": 1.3609, + "grad_norm": 6.616950035095215, + "learning_rate": 4.344121926329572e-06, + "loss": 1.3789, "step": 6920 }, { "epoch": 0.5888851121685927, - "grad_norm": 6.7624664306640625, - "learning_rate": 4.330891336453847e-06, - "loss": 1.6737, + "grad_norm": 6.668274879455566, + "learning_rate": 4.329421563556867e-06, + "loss": 1.6389, "step": 6930 }, { "epoch": 0.5897348742352141, - "grad_norm": 9.611124038696289, - "learning_rate": 4.316196278144028e-06, - "loss": 2.1979, + "grad_norm": 9.344346046447754, + "learning_rate": 4.314727101097489e-06, + "loss": 2.172, "step": 6940 }, { "epoch": 0.5905846363018354, - "grad_norm": 7.772810459136963, - "learning_rate": 4.301507236514728e-06, - "loss": 1.2899, + "grad_norm": 7.59160041809082, + "learning_rate": 4.3000386682456815e-06, + "loss": 1.2677, "step": 6950 }, { "epoch": 0.5914343983684568, - "grad_norm": 7.386330604553223, - "learning_rate": 4.28682434081249e-06, - "loss": 1.6052, + "grad_norm": 7.224785327911377, + "learning_rate": 4.285356394242639e-06, + "loss": 1.5623, "step": 6960 }, { "epoch": 0.5922841604350781, - "grad_norm": 8.341931343078613, - "learning_rate": 4.272147720229785e-06, - "loss": 1.9835, + "grad_norm": 8.175339698791504, + "learning_rate": 4.270680408275358e-06, + "loss": 1.993, "step": 6970 }, { "epoch": 0.5931339225016995, - "grad_norm": 6.350169658660889, - "learning_rate": 4.257477503903873e-06, - "loss": 1.032, + "grad_norm": 6.374772071838379, + "learning_rate": 4.256010839475514e-06, + "loss": 0.9549, "step": 6980 }, { "epoch": 0.5939836845683208, - "grad_norm": 9.261310577392578, - "learning_rate": 4.242813820915658e-06, - "loss": 1.3547, + "grad_norm": 8.51705551147461, + "learning_rate": 4.241347816918316e-06, + "loss": 1.3705, "step": 6990 }, { "epoch": 0.5948334466349422, - "grad_norm": 5.736369609832764, - "learning_rate": 4.228156800288564e-06, - "loss": 1.0388, + "grad_norm": 5.933287620544434, + "learning_rate": 4.226691469621375e-06, + "loss": 1.0568, "step": 7000 }, { "epoch": 0.5948334466349422, - "eval_cosine_accuracy@1": 0.705, - "eval_cosine_accuracy@10": 0.913, - "eval_cosine_accuracy@3": 0.8385, - "eval_cosine_accuracy@5": 0.8745, - "eval_cosine_map@100": 0.7822219183965622, - "eval_cosine_mrr@10": 0.7789426587301579, - "eval_cosine_ndcg@10": 0.811728901960815, - "eval_cosine_precision@1": 0.705, - "eval_cosine_precision@10": 0.0913, - "eval_cosine_precision@3": 0.2795, - "eval_cosine_precision@5": 0.17490000000000003, - "eval_cosine_recall@1": 0.705, - "eval_cosine_recall@10": 0.913, - "eval_cosine_recall@3": 0.8385, - "eval_cosine_recall@5": 0.8745, - "eval_loss": 1.381672978401184, - "eval_runtime": 2.801, - "eval_samples_per_second": 269.19, - "eval_sequential_score": 0.7822219183965622, - "eval_steps_per_second": 4.284, - "eval_sts-dev_pearson_cosine": 0.8026802140755287, - "eval_sts-dev_pearson_dot": 0.7969531570763682, - "eval_sts-dev_pearson_euclidean": 0.7907385685801254, - "eval_sts-dev_pearson_manhattan": 0.7915930884355521, - "eval_sts-dev_pearson_max": 0.8026802140755287, - "eval_sts-dev_spearman_cosine": 0.8037855843286587, - "eval_sts-dev_spearman_dot": 0.7992754996998044, - "eval_sts-dev_spearman_euclidean": 0.8044553974781349, - "eval_sts-dev_spearman_manhattan": 0.8037313686833796, - "eval_sts-dev_spearman_max": 0.8044553974781349, + "eval_cosine_accuracy@1": 0.7095, + "eval_cosine_accuracy@10": 0.914, + "eval_cosine_accuracy@3": 0.8395, + "eval_cosine_accuracy@5": 0.8755, + "eval_cosine_map@100": 0.7841994307640932, + "eval_cosine_mrr@10": 0.7808999999999992, + "eval_cosine_ndcg@10": 0.8133458140886136, + "eval_cosine_precision@1": 0.7095, + "eval_cosine_precision@10": 0.09140000000000001, + "eval_cosine_precision@3": 0.27983333333333327, + "eval_cosine_precision@5": 0.17510000000000003, + "eval_cosine_recall@1": 0.7095, + "eval_cosine_recall@10": 0.914, + "eval_cosine_recall@3": 0.8395, + "eval_cosine_recall@5": 0.8755, + "eval_loss": 1.3679609298706055, + "eval_runtime": 2.6102, + "eval_samples_per_second": 288.868, + "eval_sequential_score": 0.7841994307640932, + "eval_steps_per_second": 4.597, + "eval_sts-dev_pearson_cosine": 0.7997710602823775, + "eval_sts-dev_pearson_dot": 0.7943478368765433, + "eval_sts-dev_pearson_euclidean": 0.787259810638856, + "eval_sts-dev_pearson_manhattan": 0.7872885699954215, + "eval_sts-dev_pearson_max": 0.7997710602823775, + "eval_sts-dev_spearman_cosine": 0.8020262291519911, + "eval_sts-dev_spearman_dot": 0.7971553259828925, + "eval_sts-dev_spearman_euclidean": 0.8013808705302519, + "eval_sts-dev_spearman_manhattan": 0.8014796891177464, + "eval_sts-dev_spearman_max": 0.8020262291519911, "step": 7000 }, { "epoch": 0.5956832087015635, - "grad_norm": 9.445639610290527, - "learning_rate": 4.2135065709873925e-06, - "loss": 1.2208, + "grad_norm": 9.081294059753418, + "learning_rate": 4.212041926543567e-06, + "loss": 1.2301, "step": 7010 }, { "epoch": 0.5965329707681849, - "grad_norm": 5.607753276824951, - "learning_rate": 4.198863261917183e-06, - "loss": 1.7331, + "grad_norm": 5.393598556518555, + "learning_rate": 4.197399316583899e-06, + "loss": 1.7126, "step": 7020 }, { "epoch": 0.5973827328348063, - "grad_norm": 8.175435066223145, - "learning_rate": 4.1842270019220925e-06, - "loss": 1.5314, + "grad_norm": 8.24130916595459, + "learning_rate": 4.182763768580372e-06, + "loss": 1.5412, "step": 7030 }, { "epoch": 0.5982324949014276, - "grad_norm": 6.9969892501831055, - "learning_rate": 4.169597919784253e-06, - "loss": 1.1264, + "grad_norm": 7.444013595581055, + "learning_rate": 4.168135411308851e-06, + "loss": 1.1385, "step": 7040 }, { "epoch": 0.599082256968049, - "grad_norm": 7.108619689941406, - "learning_rate": 4.1549761442226366e-06, - "loss": 1.2415, + "grad_norm": 6.959202289581299, + "learning_rate": 4.153514373481937e-06, + "loss": 1.2436, "step": 7050 }, { "epoch": 0.5999320190346703, - "grad_norm": 9.353248596191406, - "learning_rate": 4.1403618038919314e-06, - "loss": 1.337, + "grad_norm": 9.123313903808594, + "learning_rate": 4.1389007837478186e-06, + "loss": 1.323, "step": 7060 }, { "epoch": 0.6007817811012917, - "grad_norm": 10.584664344787598, - "learning_rate": 4.125755027381396e-06, - "loss": 1.4333, + "grad_norm": 10.237748146057129, + "learning_rate": 4.124294770689159e-06, + "loss": 1.4247, "step": 7070 }, { "epoch": 0.601631543167913, - "grad_norm": 6.002866744995117, - "learning_rate": 4.111155943213742e-06, - "loss": 1.7178, + "grad_norm": 6.0468854904174805, + "learning_rate": 4.109696462821946e-06, + "loss": 1.6796, "step": 7080 }, { "epoch": 0.6024813052345344, - "grad_norm": 7.278838157653809, - "learning_rate": 4.096564679843997e-06, - "loss": 1.453, + "grad_norm": 7.531249046325684, + "learning_rate": 4.095105988594379e-06, + "loss": 1.4213, "step": 7090 }, { "epoch": 0.6033310673011557, - "grad_norm": 6.495766639709473, - "learning_rate": 4.0819813656583726e-06, - "loss": 1.0239, + "grad_norm": 6.170567512512207, + "learning_rate": 4.080523476385726e-06, + "loss": 0.9983, "step": 7100 }, { "epoch": 0.6041808293677771, - "grad_norm": 10.745160102844238, - "learning_rate": 4.06740612897314e-06, - "loss": 1.6459, + "grad_norm": 10.130817413330078, + "learning_rate": 4.065949054505198e-06, + "loss": 1.5862, "step": 7110 }, { "epoch": 0.6050305914343984, - "grad_norm": 7.8019938468933105, - "learning_rate": 4.052839098033493e-06, - "loss": 1.1091, + "grad_norm": 7.508733749389648, + "learning_rate": 4.051382851190826e-06, + "loss": 1.118, "step": 7120 }, { "epoch": 0.6058803535010198, - "grad_norm": 8.501754760742188, - "learning_rate": 4.038280401012426e-06, - "loss": 1.6325, + "grad_norm": 8.398032188415527, + "learning_rate": 4.036824994608321e-06, + "loss": 1.6444, "step": 7130 }, { "epoch": 0.6067301155676411, - "grad_norm": 7.133418560028076, - "learning_rate": 4.023730166009609e-06, - "loss": 2.149, + "grad_norm": 6.391273021697998, + "learning_rate": 4.022275612849952e-06, + "loss": 1.7763, "step": 7140 }, { "epoch": 0.6075798776342624, - "grad_norm": 8.996438026428223, - "learning_rate": 4.009188521050249e-06, - "loss": 1.8641, + "grad_norm": 8.808743476867676, + "learning_rate": 4.007734833933426e-06, + "loss": 1.8345, "step": 7150 }, { "epoch": 0.6084296397008837, - "grad_norm": 10.462785720825195, - "learning_rate": 3.994655594083976e-06, - "loss": 1.7123, + "grad_norm": 9.154818534851074, + "learning_rate": 3.993202785800748e-06, + "loss": 1.6835, "step": 7160 }, { "epoch": 0.6092794017675051, - "grad_norm": 6.820357799530029, - "learning_rate": 3.980131512983711e-06, - "loss": 1.0413, + "grad_norm": 6.946958541870117, + "learning_rate": 3.978679596317105e-06, + "loss": 1.0519, "step": 7170 }, { "epoch": 0.6101291638341264, - "grad_norm": 8.384248733520508, - "learning_rate": 3.96561640554454e-06, - "loss": 1.7179, + "grad_norm": 8.178505897521973, + "learning_rate": 3.964165393269742e-06, + "loss": 1.6993, "step": 7180 }, { "epoch": 0.6109789259007478, - "grad_norm": 6.348658561706543, - "learning_rate": 3.951110399482588e-06, - "loss": 1.8186, + "grad_norm": 5.774956226348877, + "learning_rate": 3.9496603043668284e-06, + "loss": 1.8109, "step": 7190 }, { "epoch": 0.6118286879673691, - "grad_norm": 9.565887451171875, - "learning_rate": 3.936613622433906e-06, - "loss": 1.7262, + "grad_norm": 8.877692222595215, + "learning_rate": 3.9351644572363385e-06, + "loss": 1.7157, "step": 7200 }, { "epoch": 0.6126784500339905, - "grad_norm": 8.592019081115723, - "learning_rate": 3.9221262019533345e-06, - "loss": 1.5939, + "grad_norm": 8.597440719604492, + "learning_rate": 3.920677979424935e-06, + "loss": 1.5706, "step": 7210 }, { "epoch": 0.6135282121006118, - "grad_norm": 5.790738582611084, - "learning_rate": 3.907648265513391e-06, - "loss": 1.5618, + "grad_norm": 5.1828532218933105, + "learning_rate": 3.906200998396839e-06, + "loss": 1.5365, "step": 7220 }, { "epoch": 0.6143779741672332, - "grad_norm": 10.428531646728516, - "learning_rate": 3.893179940503139e-06, - "loss": 1.5007, + "grad_norm": 10.440409660339355, + "learning_rate": 3.89173364153271e-06, + "loss": 1.4711, "step": 7230 }, { "epoch": 0.6152277362338545, - "grad_norm": 5.01715612411499, - "learning_rate": 3.878721354227081e-06, - "loss": 1.6406, + "grad_norm": 4.942476272583008, + "learning_rate": 3.877276036128526e-06, + "loss": 1.5818, "step": 7240 }, { "epoch": 0.6160774983004759, - "grad_norm": 9.64760684967041, - "learning_rate": 3.86427263390402e-06, - "loss": 1.3818, + "grad_norm": 9.714569091796875, + "learning_rate": 3.862828309394469e-06, + "loss": 1.3997, "step": 7250 }, { "epoch": 0.6169272603670972, - "grad_norm": 5.303813457489014, - "learning_rate": 3.8498339066659576e-06, - "loss": 1.0627, + "grad_norm": 5.081727027893066, + "learning_rate": 3.848390588453791e-06, + "loss": 1.044, "step": 7260 }, { "epoch": 0.6177770224337186, - "grad_norm": 7.699285507202148, - "learning_rate": 3.835405299556964e-06, - "loss": 1.6257, + "grad_norm": 7.431955337524414, + "learning_rate": 3.83396300034171e-06, + "loss": 1.6471, "step": 7270 }, { "epoch": 0.6186267845003399, - "grad_norm": 8.37662410736084, - "learning_rate": 3.820986939532065e-06, - "loss": 1.263, + "grad_norm": 8.578307151794434, + "learning_rate": 3.819545672004285e-06, + "loss": 1.2558, "step": 7280 }, { "epoch": 0.6194765465669613, - "grad_norm": 8.03685188293457, - "learning_rate": 3.806578953456125e-06, - "loss": 1.0137, + "grad_norm": 8.127335548400879, + "learning_rate": 3.8051387302973042e-06, + "loss": 1.0215, "step": 7290 }, { "epoch": 0.6203263086335826, - "grad_norm": 7.359855651855469, - "learning_rate": 3.7921814681027315e-06, - "loss": 1.6892, + "grad_norm": 8.804500579833984, + "learning_rate": 3.7907423019851624e-06, + "loss": 1.6653, "step": 7300 }, { "epoch": 0.621176070700204, - "grad_norm": 6.734193325042725, - "learning_rate": 3.777794610153069e-06, - "loss": 1.3141, + "grad_norm": 6.184629917144775, + "learning_rate": 3.776356513739751e-06, + "loss": 1.2894, "step": 7310 }, { "epoch": 0.6220258327668253, - "grad_norm": 5.372673511505127, - "learning_rate": 3.7634185061948212e-06, - "loss": 1.6547, + "grad_norm": 5.791973114013672, + "learning_rate": 3.761981492139334e-06, + "loss": 1.6529, "step": 7320 }, { "epoch": 0.6228755948334467, - "grad_norm": 9.152029991149902, - "learning_rate": 3.7490532827210507e-06, - "loss": 1.77, + "grad_norm": 9.095921516418457, + "learning_rate": 3.7476173636674517e-06, + "loss": 1.7363, "step": 7330 }, { "epoch": 0.623725356900068, - "grad_norm": 3.642228364944458, - "learning_rate": 3.7346990661290766e-06, - "loss": 0.8455, + "grad_norm": 3.543181896209717, + "learning_rate": 3.7332642547117927e-06, + "loss": 0.8245, "step": 7340 }, { "epoch": 0.6245751189666894, - "grad_norm": 8.339434623718262, - "learning_rate": 3.7203559827193793e-06, - "loss": 2.2177, + "grad_norm": 7.724462509155273, + "learning_rate": 3.718922291563084e-06, + "loss": 2.1902, "step": 7350 }, { "epoch": 0.6254248810333106, - "grad_norm": 5.82117223739624, - "learning_rate": 3.70602415869447e-06, - "loss": 1.1667, + "grad_norm": 6.039712905883789, + "learning_rate": 3.7045916004139883e-06, + "loss": 1.1631, "step": 7360 }, { "epoch": 0.626274643099932, - "grad_norm": 10.03207015991211, - "learning_rate": 3.6917037201577977e-06, - "loss": 1.7424, + "grad_norm": 9.918465614318848, + "learning_rate": 3.6902723073579837e-06, + "loss": 1.735, "step": 7370 }, { "epoch": 0.6271244051665533, - "grad_norm": 4.2060546875, - "learning_rate": 3.6773947931126306e-06, - "loss": 1.4834, + "grad_norm": 3.839500665664673, + "learning_rate": 3.6759645383882565e-06, + "loss": 1.4256, "step": 7380 }, { "epoch": 0.6279741672331747, - "grad_norm": 10.672497749328613, - "learning_rate": 3.663097503460948e-06, - "loss": 1.6292, + "grad_norm": 10.98939037322998, + "learning_rate": 3.6616684193966002e-06, + "loss": 1.6377, "step": 7390 }, { "epoch": 0.628823929299796, - "grad_norm": 6.874520301818848, - "learning_rate": 3.648811977002337e-06, - "loss": 1.5977, + "grad_norm": 6.822175025939941, + "learning_rate": 3.647384076172296e-06, + "loss": 1.5828, "step": 7400 }, { "epoch": 0.6296736913664174, - "grad_norm": 7.652101993560791, - "learning_rate": 3.6345383394328782e-06, - "loss": 1.4547, + "grad_norm": 7.257697105407715, + "learning_rate": 3.6331116344010176e-06, + "loss": 1.4463, "step": 7410 }, { "epoch": 0.6305234534330387, - "grad_norm": 4.0796074867248535, - "learning_rate": 3.620276716344042e-06, - "loss": 0.9533, + "grad_norm": 4.102172374725342, + "learning_rate": 3.618851219663717e-06, + "loss": 0.9314, "step": 7420 }, { "epoch": 0.6313732154996601, - "grad_norm": 13.315808296203613, - "learning_rate": 3.606027233221593e-06, - "loss": 1.1249, + "grad_norm": 11.864062309265137, + "learning_rate": 3.604602957435519e-06, + "loss": 1.1351, "step": 7430 }, { "epoch": 0.6322229775662814, - "grad_norm": 5.023453235626221, - "learning_rate": 3.5917900154444695e-06, - "loss": 1.3636, + "grad_norm": 5.526922702789307, + "learning_rate": 3.5903669730846257e-06, + "loss": 1.3325, "step": 7440 }, { "epoch": 0.6330727396329028, - "grad_norm": 8.574872970581055, - "learning_rate": 3.5775651882836953e-06, - "loss": 1.8638, + "grad_norm": 8.837275505065918, + "learning_rate": 3.576143391871205e-06, + "loss": 1.8632, "step": 7450 }, { "epoch": 0.6339225016995241, - "grad_norm": 6.6440300941467285, - "learning_rate": 3.5633528769012704e-06, - "loss": 1.0395, + "grad_norm": 6.607968330383301, + "learning_rate": 3.5619323389462933e-06, + "loss": 1.014, "step": 7460 }, { "epoch": 0.6347722637661455, - "grad_norm": 8.723431587219238, - "learning_rate": 3.549153206349068e-06, - "loss": 1.5179, + "grad_norm": 8.240415573120117, + "learning_rate": 3.5477339393506927e-06, + "loss": 1.4796, "step": 7470 }, { "epoch": 0.6356220258327668, - "grad_norm": 9.5170316696167, - "learning_rate": 3.534966301567735e-06, - "loss": 1.8981, + "grad_norm": 9.025439262390137, + "learning_rate": 3.5335483180138696e-06, + "loss": 1.8911, "step": 7480 }, { "epoch": 0.6364717878993882, - "grad_norm": 9.286306381225586, - "learning_rate": 3.5207922873855998e-06, - "loss": 1.6223, + "grad_norm": 9.310150146484375, + "learning_rate": 3.5193755997528537e-06, + "loss": 1.6274, "step": 7490 }, { "epoch": 0.6373215499660095, - "grad_norm": 7.77163028717041, - "learning_rate": 3.5066312885175634e-06, - "loss": 1.2023, + "grad_norm": 7.964165210723877, + "learning_rate": 3.505215909271149e-06, + "loss": 1.2259, "step": 7500 }, { "epoch": 0.6381713120326309, - "grad_norm": 5.62451696395874, - "learning_rate": 3.492483429564012e-06, - "loss": 1.1419, + "grad_norm": 5.5617289543151855, + "learning_rate": 3.491069371157624e-06, + "loss": 1.1066, "step": 7510 }, { "epoch": 0.6390210740992522, - "grad_norm": 6.1769609451293945, - "learning_rate": 3.4783488350097104e-06, - "loss": 1.3842, + "grad_norm": 6.504508018493652, + "learning_rate": 3.476936109885427e-06, + "loss": 1.3845, "step": 7520 }, { "epoch": 0.6398708361658736, - "grad_norm": 8.48708438873291, - "learning_rate": 3.464227629222718e-06, - "loss": 1.509, + "grad_norm": 8.49374008178711, + "learning_rate": 3.4628162498108795e-06, + "loss": 1.4874, "step": 7530 }, { "epoch": 0.6407205982324949, - "grad_norm": 9.36297607421875, - "learning_rate": 3.4501199364532818e-06, - "loss": 1.5919, + "grad_norm": 9.180696487426758, + "learning_rate": 3.4487099151723964e-06, + "loss": 1.5912, "step": 7540 }, { "epoch": 0.6415703602991163, - "grad_norm": 7.025388240814209, - "learning_rate": 3.4360258808327533e-06, - "loss": 1.4598, + "grad_norm": 6.687403678894043, + "learning_rate": 3.4346172300893744e-06, + "loss": 1.4071, "step": 7550 }, { "epoch": 0.6424201223657376, - "grad_norm": 5.5343546867370605, - "learning_rate": 3.421945586372496e-06, - "loss": 1.2801, + "grad_norm": 5.057962417602539, + "learning_rate": 3.4205383185611153e-06, + "loss": 1.2559, "step": 7560 }, { "epoch": 0.6432698844323589, - "grad_norm": 11.291803359985352, - "learning_rate": 3.407879176962785e-06, - "loss": 1.2692, + "grad_norm": 10.030919075012207, + "learning_rate": 3.406473304465732e-06, + "loss": 1.2858, "step": 7570 }, { "epoch": 0.6441196464989802, - "grad_norm": 5.124117374420166, - "learning_rate": 3.393826776371728e-06, - "loss": 1.5529, + "grad_norm": 4.89003324508667, + "learning_rate": 3.3924223115590494e-06, + "loss": 1.5097, "step": 7580 }, { "epoch": 0.6449694085656016, - "grad_norm": 7.193918228149414, - "learning_rate": 3.3797885082441717e-06, - "loss": 1.1517, + "grad_norm": 7.029448509216309, + "learning_rate": 3.378385463473528e-06, + "loss": 1.1406, "step": 7590 }, { "epoch": 0.6458191706322229, - "grad_norm": 8.71646785736084, - "learning_rate": 3.3657644961006063e-06, - "loss": 1.6056, + "grad_norm": 8.784164428710938, + "learning_rate": 3.3643628837171723e-06, + "loss": 1.6047, "step": 7600 }, { "epoch": 0.6466689326988443, - "grad_norm": 9.048382759094238, - "learning_rate": 3.3517548633360926e-06, - "loss": 1.2688, + "grad_norm": 9.046801567077637, + "learning_rate": 3.3503546956724305e-06, + "loss": 1.2911, "step": 7610 }, { "epoch": 0.6475186947654656, - "grad_norm": 4.025764465332031, - "learning_rate": 3.3377597332191693e-06, - "loss": 1.479, + "grad_norm": 4.445494174957275, + "learning_rate": 3.336361022595131e-06, + "loss": 1.4758, "step": 7620 }, { "epoch": 0.648368456832087, - "grad_norm": 6.733732223510742, - "learning_rate": 3.3237792288907643e-06, - "loss": 1.4652, + "grad_norm": 6.574621200561523, + "learning_rate": 3.3223819876133854e-06, + "loss": 1.4608, "step": 7630 }, { "epoch": 0.6492182188987083, - "grad_norm": 8.058868408203125, - "learning_rate": 3.3098134733631223e-06, - "loss": 1.4347, + "grad_norm": 7.47935152053833, + "learning_rate": 3.3084177137265037e-06, + "loss": 1.4307, "step": 7640 }, { "epoch": 0.6500679809653297, - "grad_norm": 8.020914077758789, - "learning_rate": 3.295862589518709e-06, - "loss": 1.2185, + "grad_norm": 7.811788082122803, + "learning_rate": 3.294468323803921e-06, + "loss": 1.1705, "step": 7650 }, { "epoch": 0.650917743031951, - "grad_norm": 6.626306533813477, - "learning_rate": 3.2819267001091382e-06, - "loss": 1.1517, + "grad_norm": 6.9271931648254395, + "learning_rate": 3.2805339405841047e-06, + "loss": 1.1394, "step": 7660 }, { "epoch": 0.6517675050985724, - "grad_norm": 8.968182563781738, - "learning_rate": 3.2680059277540926e-06, - "loss": 1.156, + "grad_norm": 8.386269569396973, + "learning_rate": 3.2666146866734817e-06, + "loss": 1.133, "step": 7670 }, { "epoch": 0.6526172671651937, - "grad_norm": 10.843755722045898, - "learning_rate": 3.2541003949402394e-06, - "loss": 1.8372, + "grad_norm": 10.459746360778809, + "learning_rate": 3.252710684545362e-06, + "loss": 1.8461, "step": 7680 }, { "epoch": 0.6534670292318151, - "grad_norm": 10.310787200927734, - "learning_rate": 3.2402102240201583e-06, - "loss": 1.6171, + "grad_norm": 10.564249038696289, + "learning_rate": 3.2388220565388538e-06, + "loss": 1.6305, "step": 7690 }, { "epoch": 0.6543167912984365, - "grad_norm": 8.911741256713867, - "learning_rate": 3.226335537211261e-06, - "loss": 1.3407, + "grad_norm": 8.70982837677002, + "learning_rate": 3.224948924857796e-06, + "loss": 1.3304, "step": 7700 }, { "epoch": 0.6551665533650578, - "grad_norm": 6.0829925537109375, - "learning_rate": 3.212476456594713e-06, - "loss": 0.9872, + "grad_norm": 6.149306774139404, + "learning_rate": 3.2110914115696724e-06, + "loss": 0.9695, "step": 7710 }, { "epoch": 0.6560163154316792, - "grad_norm": 6.397298336029053, - "learning_rate": 3.1986331041143715e-06, - "loss": 1.3951, + "grad_norm": 6.652544021606445, + "learning_rate": 3.1972496386045426e-06, + "loss": 1.3937, "step": 7720 }, { "epoch": 0.6568660774983005, - "grad_norm": 10.486702919006348, - "learning_rate": 3.1848056015756956e-06, - "loss": 1.4822, + "grad_norm": 10.421853065490723, + "learning_rate": 3.183423727753976e-06, + "loss": 1.4486, "step": 7730 }, { "epoch": 0.6577158395649219, - "grad_norm": 4.832906723022461, - "learning_rate": 3.17099407064469e-06, - "loss": 1.3158, + "grad_norm": 5.358234405517578, + "learning_rate": 3.169613800669966e-06, + "loss": 1.3141, "step": 7740 }, { "epoch": 0.6585656016315432, - "grad_norm": 12.305822372436523, - "learning_rate": 3.1571986328468256e-06, - "loss": 1.1347, + "grad_norm": 11.560007095336914, + "learning_rate": 3.1558199788638734e-06, + "loss": 1.1174, "step": 7750 }, { "epoch": 0.6594153636981646, - "grad_norm": 4.890016078948975, - "learning_rate": 3.143419409565972e-06, - "loss": 1.0472, + "grad_norm": 4.884171009063721, + "learning_rate": 3.142042383705349e-06, + "loss": 1.0358, "step": 7760 }, { "epoch": 0.6602651257647859, - "grad_norm": 8.203442573547363, - "learning_rate": 3.1296565220433273e-06, - "loss": 1.457, + "grad_norm": 7.722766876220703, + "learning_rate": 3.1282811364212684e-06, + "loss": 1.4542, "step": 7770 }, { "epoch": 0.6611148878314073, - "grad_norm": 6.849444389343262, - "learning_rate": 3.115910091376359e-06, - "loss": 1.3921, + "grad_norm": 6.970156669616699, + "learning_rate": 3.114536358094662e-06, + "loss": 1.3459, "step": 7780 }, { "epoch": 0.6619646498980285, - "grad_norm": 8.289427757263184, - "learning_rate": 3.1021802385177292e-06, - "loss": 1.3998, + "grad_norm": 8.596039772033691, + "learning_rate": 3.100808169663656e-06, + "loss": 1.3809, "step": 7790 }, { "epoch": 0.6628144119646499, - "grad_norm": 7.740545749664307, - "learning_rate": 3.088467084274237e-06, - "loss": 1.1502, + "grad_norm": 7.726800918579102, + "learning_rate": 3.0870966919204034e-06, + "loss": 1.1335, "step": 7800 }, { "epoch": 0.6636641740312712, - "grad_norm": 8.983527183532715, - "learning_rate": 3.0747707493057504e-06, - "loss": 2.2267, + "grad_norm": 9.120200157165527, + "learning_rate": 3.0734020455100233e-06, + "loss": 2.2354, "step": 7810 }, { "epoch": 0.6645139360978926, - "grad_norm": 8.558188438415527, - "learning_rate": 3.0610913541241526e-06, - "loss": 1.9351, + "grad_norm": 8.32419204711914, + "learning_rate": 3.0597243509295362e-06, + "loss": 1.9021, "step": 7820 }, { "epoch": 0.6653636981645139, - "grad_norm": 7.360873699188232, - "learning_rate": 3.047429019092269e-06, - "loss": 1.4257, + "grad_norm": 7.033876895904541, + "learning_rate": 3.04606372852681e-06, + "loss": 1.4453, "step": 7830 }, { "epoch": 0.6662134602311353, - "grad_norm": 4.532500267028809, - "learning_rate": 3.033783864422819e-06, - "loss": 1.6525, + "grad_norm": 4.431329727172852, + "learning_rate": 3.0324202984994938e-06, + "loss": 1.621, "step": 7840 }, { "epoch": 0.6670632222977566, - "grad_norm": 8.307069778442383, - "learning_rate": 3.0201560101773575e-06, - "loss": 1.381, + "grad_norm": 8.134126663208008, + "learning_rate": 3.0187941808939635e-06, + "loss": 1.3936, "step": 7850 }, { "epoch": 0.667912984364378, - "grad_norm": 9.029865264892578, - "learning_rate": 3.0065455762652095e-06, - "loss": 1.5691, + "grad_norm": 7.43812894821167, + "learning_rate": 3.00518549560427e-06, + "loss": 1.5465, "step": 7860 }, { "epoch": 0.6687627464309993, - "grad_norm": 10.770353317260742, - "learning_rate": 2.992952682442428e-06, - "loss": 1.5265, + "grad_norm": 10.495246887207031, + "learning_rate": 2.991594362371074e-06, + "loss": 1.4917, "step": 7870 }, { "epoch": 0.6696125084976207, - "grad_norm": 9.259413719177246, - "learning_rate": 2.9793774483107304e-06, - "loss": 1.977, + "grad_norm": 9.296730995178223, + "learning_rate": 2.9780209007806037e-06, + "loss": 1.9427, "step": 7880 }, { "epoch": 0.670462270564242, - "grad_norm": 6.673673629760742, - "learning_rate": 2.9658199933164465e-06, - "loss": 1.2655, + "grad_norm": 6.684450626373291, + "learning_rate": 2.9644652302635984e-06, + "loss": 1.2764, "step": 7890 }, { "epoch": 0.6713120326308634, - "grad_norm": 7.389532089233398, - "learning_rate": 2.952280436749473e-06, - "loss": 1.8937, + "grad_norm": 6.910099029541016, + "learning_rate": 2.950927470094247e-06, + "loss": 1.8721, "step": 7900 }, { "epoch": 0.6721617946974847, - "grad_norm": 10.662602424621582, - "learning_rate": 2.9387588977422236e-06, - "loss": 1.6809, + "grad_norm": 10.85179615020752, + "learning_rate": 2.9374077393891564e-06, + "loss": 1.6532, "step": 7910 }, { "epoch": 0.6730115567641061, - "grad_norm": 7.918490886688232, - "learning_rate": 2.925255495268574e-06, - "loss": 1.025, + "grad_norm": 7.266791343688965, + "learning_rate": 2.923906157106296e-06, + "loss": 0.9971, "step": 7920 }, { "epoch": 0.6738613188307274, - "grad_norm": 6.741595268249512, - "learning_rate": 2.911770348142822e-06, - "loss": 1.4864, + "grad_norm": 6.735509395599365, + "learning_rate": 2.9104228420439455e-06, + "loss": 1.4542, "step": 7930 }, { "epoch": 0.6747110808973488, - "grad_norm": 11.137955665588379, - "learning_rate": 2.8983035750186396e-06, - "loss": 1.5898, + "grad_norm": 11.409846305847168, + "learning_rate": 2.896957912839655e-06, + "loss": 1.5839, "step": 7940 }, { "epoch": 0.6755608429639701, - "grad_norm": 10.083993911743164, - "learning_rate": 2.884855294388027e-06, - "loss": 1.6339, + "grad_norm": 9.758691787719727, + "learning_rate": 2.883511487969205e-06, + "loss": 1.6431, "step": 7950 }, { "epoch": 0.6764106050305915, - "grad_norm": 8.287982940673828, - "learning_rate": 2.8714256245802717e-06, - "loss": 1.8916, + "grad_norm": 8.224542617797852, + "learning_rate": 2.8700836857455506e-06, + "loss": 1.8941, "step": 7960 }, { "epoch": 0.6772603670972128, - "grad_norm": 7.776177406311035, - "learning_rate": 2.85801468376091e-06, - "loss": 1.0224, + "grad_norm": 7.737159729003906, + "learning_rate": 2.8566746243177966e-06, + "loss": 1.0336, "step": 7970 }, { "epoch": 0.6781101291638342, - "grad_norm": 4.9061279296875, - "learning_rate": 2.8446225899306833e-06, - "loss": 1.7948, + "grad_norm": 5.036005020141602, + "learning_rate": 2.843284421670151e-06, + "loss": 1.7703, "step": 7980 }, { "epoch": 0.6789598912304555, - "grad_norm": 8.37524127960205, - "learning_rate": 2.831249460924504e-06, - "loss": 1.1079, + "grad_norm": 7.605679035186768, + "learning_rate": 2.8299131956208784e-06, + "loss": 1.1059, "step": 7990 }, { "epoch": 0.6798096532970768, - "grad_norm": 13.4224271774292, - "learning_rate": 2.817895414410411e-06, - "loss": 1.7875, + "grad_norm": 14.047964096069336, + "learning_rate": 2.8165610638212816e-06, + "loss": 1.7855, "step": 8000 }, { "epoch": 0.6798096532970768, - "eval_cosine_accuracy@1": 0.709, - "eval_cosine_accuracy@10": 0.913, - "eval_cosine_accuracy@3": 0.834, - "eval_cosine_accuracy@5": 0.877, - "eval_cosine_map@100": 0.7839118833055261, - "eval_cosine_mrr@10": 0.7806015873015867, - "eval_cosine_ndcg@10": 0.8128807055289251, - "eval_cosine_precision@1": 0.709, - "eval_cosine_precision@10": 0.0913, - "eval_cosine_precision@3": 0.278, - "eval_cosine_precision@5": 0.1754, - "eval_cosine_recall@1": 0.709, - "eval_cosine_recall@10": 0.913, - "eval_cosine_recall@3": 0.834, - "eval_cosine_recall@5": 0.877, - "eval_loss": 1.3587793111801147, - "eval_runtime": 2.7694, - "eval_samples_per_second": 272.262, - "eval_sequential_score": 0.7839118833055261, - "eval_steps_per_second": 4.333, - "eval_sts-dev_pearson_cosine": 0.8049540027655138, - "eval_sts-dev_pearson_dot": 0.7990862184332669, - "eval_sts-dev_pearson_euclidean": 0.7920885919847314, - "eval_sts-dev_pearson_manhattan": 0.7936452586810735, - "eval_sts-dev_pearson_max": 0.8049540027655138, - "eval_sts-dev_spearman_cosine": 0.8058038674639061, - "eval_sts-dev_spearman_dot": 0.8004313288969457, - "eval_sts-dev_spearman_euclidean": 0.8060049652119979, - "eval_sts-dev_spearman_manhattan": 0.8077760096244478, - "eval_sts-dev_spearman_max": 0.8077760096244478, + "eval_cosine_accuracy@1": 0.7175, + "eval_cosine_accuracy@10": 0.9155, + "eval_cosine_accuracy@3": 0.841, + "eval_cosine_accuracy@5": 0.878, + "eval_cosine_map@100": 0.7890154491139222, + "eval_cosine_mrr@10": 0.7856547619047611, + "eval_cosine_ndcg@10": 0.8172358824512647, + "eval_cosine_precision@1": 0.7175, + "eval_cosine_precision@10": 0.09155, + "eval_cosine_precision@3": 0.28033333333333327, + "eval_cosine_precision@5": 0.17560000000000003, + "eval_cosine_recall@1": 0.7175, + "eval_cosine_recall@10": 0.9155, + "eval_cosine_recall@3": 0.841, + "eval_cosine_recall@5": 0.878, + "eval_loss": 1.347324013710022, + "eval_runtime": 2.618, + "eval_samples_per_second": 288.001, + "eval_sequential_score": 0.7890154491139222, + "eval_steps_per_second": 4.584, + "eval_sts-dev_pearson_cosine": 0.8015277726105404, + "eval_sts-dev_pearson_dot": 0.7952010752539163, + "eval_sts-dev_pearson_euclidean": 0.7893816883662468, + "eval_sts-dev_pearson_manhattan": 0.7895258398435966, + "eval_sts-dev_pearson_max": 0.8015277726105404, + "eval_sts-dev_spearman_cosine": 0.8038248041571585, + "eval_sts-dev_spearman_dot": 0.7982104142453529, + "eval_sts-dev_spearman_euclidean": 0.8029392819509334, + "eval_sts-dev_spearman_manhattan": 0.8012166855619245, + "eval_sts-dev_spearman_max": 0.8038248041571585, "step": 8000 - }, - { - "epoch": 0.6806594153636981, - "grad_norm": 8.8360013961792, - "learning_rate": 2.8045605678885414e-06, - "loss": 1.4137, - "step": 8010 - }, - { - "epoch": 0.6815091774303195, - "grad_norm": 10.709798812866211, - "learning_rate": 2.7912450386900957e-06, - "loss": 1.9477, - "step": 8020 - }, - { - "epoch": 0.6823589394969408, - "grad_norm": 9.272855758666992, - "learning_rate": 2.777948943976304e-06, - "loss": 1.0443, - "step": 8030 - }, - { - "epoch": 0.6832087015635622, - "grad_norm": 6.625214576721191, - "learning_rate": 2.7646724007373958e-06, - "loss": 1.497, - "step": 8040 - }, - { - "epoch": 0.6840584636301835, - "grad_norm": 9.569297790527344, - "learning_rate": 2.751415525791572e-06, - "loss": 1.5791, - "step": 8050 - }, - { - "epoch": 0.6849082256968049, - "grad_norm": 7.8546552658081055, - "learning_rate": 2.73817843578397e-06, - "loss": 1.9525, - "step": 8060 - }, - { - "epoch": 0.6857579877634262, - "grad_norm": 8.316608428955078, - "learning_rate": 2.7249612471856445e-06, - "loss": 1.1862, - "step": 8070 - }, - { - "epoch": 0.6866077498300476, - "grad_norm": 9.65625, - "learning_rate": 2.7117640762925446e-06, - "loss": 1.7105, - "step": 8080 - }, - { - "epoch": 0.6874575118966689, - "grad_norm": 8.204623222351074, - "learning_rate": 2.6985870392244828e-06, - "loss": 1.2424, - "step": 8090 - }, - { - "epoch": 0.6883072739632903, - "grad_norm": 8.9264497756958, - "learning_rate": 2.6854302519241226e-06, - "loss": 1.317, - "step": 8100 - }, - { - "epoch": 0.6891570360299116, - "grad_norm": 7.6044602394104, - "learning_rate": 2.672293830155945e-06, - "loss": 1.424, - "step": 8110 - }, - { - "epoch": 0.690006798096533, - "grad_norm": 6.276149749755859, - "learning_rate": 2.6591778895052412e-06, - "loss": 1.3014, - "step": 8120 - }, - { - "epoch": 0.6908565601631543, - "grad_norm": 7.067321300506592, - "learning_rate": 2.6460825453770953e-06, - "loss": 1.4309, - "step": 8130 - }, - { - "epoch": 0.6917063222297757, - "grad_norm": 8.15877914428711, - "learning_rate": 2.633007912995364e-06, - "loss": 1.7155, - "step": 8140 - }, - { - "epoch": 0.692556084296397, - "grad_norm": 8.09945297241211, - "learning_rate": 2.6199541074016655e-06, - "loss": 1.2908, - "step": 8150 - }, - { - "epoch": 0.6934058463630184, - "grad_norm": 7.2092366218566895, - "learning_rate": 2.6069212434543612e-06, - "loss": 1.4366, - "step": 8160 - }, - { - "epoch": 0.6942556084296397, - "grad_norm": 10.20711898803711, - "learning_rate": 2.5939094358275587e-06, - "loss": 1.5324, - "step": 8170 - }, - { - "epoch": 0.6951053704962611, - "grad_norm": 8.490253448486328, - "learning_rate": 2.580918799010086e-06, - "loss": 1.859, - "step": 8180 - }, - { - "epoch": 0.6959551325628824, - "grad_norm": 6.806061267852783, - "learning_rate": 2.5679494473044985e-06, - "loss": 1.2369, - "step": 8190 - }, - { - "epoch": 0.6968048946295038, - "grad_norm": 7.446094512939453, - "learning_rate": 2.5550014948260653e-06, - "loss": 1.1292, - "step": 8200 - }, - { - "epoch": 0.697654656696125, - "grad_norm": 9.342281341552734, - "learning_rate": 2.5420750555017688e-06, - "loss": 1.7091, - "step": 8210 - }, - { - "epoch": 0.6985044187627464, - "grad_norm": 10.630812644958496, - "learning_rate": 2.529170243069295e-06, - "loss": 1.2218, - "step": 8220 - }, - { - "epoch": 0.6993541808293677, - "grad_norm": 8.054471015930176, - "learning_rate": 2.5162871710760463e-06, - "loss": 1.823, - "step": 8230 - }, - { - "epoch": 0.7002039428959891, - "grad_norm": 7.6317243576049805, - "learning_rate": 2.5034259528781256e-06, - "loss": 1.5848, - "step": 8240 - }, - { - "epoch": 0.7010537049626104, - "grad_norm": 11.157418251037598, - "learning_rate": 2.4905867016393547e-06, - "loss": 1.973, - "step": 8250 - }, - { - "epoch": 0.7019034670292318, - "grad_norm": 9.590888977050781, - "learning_rate": 2.4777695303302717e-06, - "loss": 1.2432, - "step": 8260 - }, - { - "epoch": 0.7027532290958531, - "grad_norm": 6.605616092681885, - "learning_rate": 2.46497455172713e-06, - "loss": 1.0431, - "step": 8270 - }, - { - "epoch": 0.7036029911624745, - "grad_norm": 8.644810676574707, - "learning_rate": 2.4522018784109213e-06, - "loss": 1.4068, - "step": 8280 - }, - { - "epoch": 0.7044527532290958, - "grad_norm": 8.619047164916992, - "learning_rate": 2.4394516227663693e-06, - "loss": 1.1851, - "step": 8290 - }, - { - "epoch": 0.7053025152957172, - "grad_norm": 5.964475631713867, - "learning_rate": 2.426723896980954e-06, - "loss": 1.2187, - "step": 8300 - }, - { - "epoch": 0.7061522773623385, - "grad_norm": 8.245816230773926, - "learning_rate": 2.4140188130439145e-06, - "loss": 1.0254, - "step": 8310 - }, - { - "epoch": 0.7070020394289599, - "grad_norm": 6.809252738952637, - "learning_rate": 2.4013364827452727e-06, - "loss": 1.293, - "step": 8320 - }, - { - "epoch": 0.7078518014955812, - "grad_norm": 7.471244812011719, - "learning_rate": 2.388677017674838e-06, - "loss": 1.393, - "step": 8330 - }, - { - "epoch": 0.7087015635622026, - "grad_norm": 8.105233192443848, - "learning_rate": 2.37604052922124e-06, - "loss": 1.8043, - "step": 8340 - }, - { - "epoch": 0.709551325628824, - "grad_norm": 8.175520896911621, - "learning_rate": 2.3634271285709327e-06, - "loss": 1.4796, - "step": 8350 - }, - { - "epoch": 0.7104010876954453, - "grad_norm": 6.40727424621582, - "learning_rate": 2.35083692670723e-06, - "loss": 1.3933, - "step": 8360 - }, - { - "epoch": 0.7112508497620667, - "grad_norm": 9.597043991088867, - "learning_rate": 2.338270034409324e-06, - "loss": 1.2227, - "step": 8370 - }, - { - "epoch": 0.712100611828688, - "grad_norm": 7.239832401275635, - "learning_rate": 2.3257265622513037e-06, - "loss": 0.7937, - "step": 8380 - }, - { - "epoch": 0.7129503738953094, - "grad_norm": 13.128355979919434, - "learning_rate": 2.3132066206011927e-06, - "loss": 1.8031, - "step": 8390 - }, - { - "epoch": 0.7138001359619307, - "grad_norm": 4.622241020202637, - "learning_rate": 2.3007103196199744e-06, - "loss": 1.1379, - "step": 8400 - }, - { - "epoch": 0.714649898028552, - "grad_norm": 7.2725982666015625, - "learning_rate": 2.2882377692606156e-06, - "loss": 1.3593, - "step": 8410 - }, - { - "epoch": 0.7154996600951734, - "grad_norm": 5.220696926116943, - "learning_rate": 2.275789079267113e-06, - "loss": 1.1971, - "step": 8420 - }, - { - "epoch": 0.7163494221617946, - "grad_norm": 6.871493339538574, - "learning_rate": 2.2633643591735124e-06, - "loss": 1.5309, - "step": 8430 - }, - { - "epoch": 0.717199184228416, - "grad_norm": 14.020758628845215, - "learning_rate": 2.250963718302957e-06, - "loss": 1.4029, - "step": 8440 - }, - { - "epoch": 0.7180489462950373, - "grad_norm": 6.9064836502075195, - "learning_rate": 2.23858726576672e-06, - "loss": 1.0953, - "step": 8450 - }, - { - "epoch": 0.7188987083616587, - "grad_norm": 7.006031513214111, - "learning_rate": 2.226235110463246e-06, - "loss": 0.9642, - "step": 8460 - }, - { - "epoch": 0.71974847042828, - "grad_norm": 12.737631797790527, - "learning_rate": 2.2139073610771876e-06, - "loss": 1.559, - "step": 8470 - }, - { - "epoch": 0.7205982324949014, - "grad_norm": 5.922829627990723, - "learning_rate": 2.2016041260784604e-06, - "loss": 1.2116, - "step": 8480 - }, - { - "epoch": 0.7214479945615228, - "grad_norm": 9.439011573791504, - "learning_rate": 2.1893255137212736e-06, - "loss": 1.5634, - "step": 8490 - }, - { - "epoch": 0.7222977566281441, - "grad_norm": 8.343891143798828, - "learning_rate": 2.1770716320431935e-06, - "loss": 1.4752, - "step": 8500 - }, - { - "epoch": 0.7231475186947655, - "grad_norm": 7.722195148468018, - "learning_rate": 2.1648425888641806e-06, - "loss": 1.579, - "step": 8510 - }, - { - "epoch": 0.7239972807613868, - "grad_norm": 10.152587890625, - "learning_rate": 2.1526384917856492e-06, - "loss": 1.3966, - "step": 8520 - }, - { - "epoch": 0.7248470428280082, - "grad_norm": 6.416513919830322, - "learning_rate": 2.1404594481895126e-06, - "loss": 1.249, - "step": 8530 - }, - { - "epoch": 0.7256968048946295, - "grad_norm": 9.243882179260254, - "learning_rate": 2.1283055652372414e-06, - "loss": 1.6503, - "step": 8540 - }, - { - "epoch": 0.7265465669612509, - "grad_norm": 6.850565433502197, - "learning_rate": 2.1161769498689274e-06, - "loss": 1.5157, - "step": 8550 - }, - { - "epoch": 0.7273963290278722, - "grad_norm": 5.64463996887207, - "learning_rate": 2.1040737088023323e-06, - "loss": 1.3254, - "step": 8560 - }, - { - "epoch": 0.7282460910944936, - "grad_norm": 9.26573371887207, - "learning_rate": 2.0919959485319585e-06, - "loss": 1.379, - "step": 8570 - }, - { - "epoch": 0.7290958531611149, - "grad_norm": 7.37186861038208, - "learning_rate": 2.079943775328101e-06, - "loss": 1.4666, - "step": 8580 - }, - { - "epoch": 0.7299456152277363, - "grad_norm": 9.601011276245117, - "learning_rate": 2.0679172952359185e-06, - "loss": 2.0379, - "step": 8590 - }, - { - "epoch": 0.7307953772943576, - "grad_norm": 6.231343746185303, - "learning_rate": 2.0559166140745046e-06, - "loss": 1.0678, - "step": 8600 - }, - { - "epoch": 0.731645139360979, - "grad_norm": 7.5384840965271, - "learning_rate": 2.0439418374359493e-06, - "loss": 0.8082, - "step": 8610 - }, - { - "epoch": 0.7324949014276003, - "grad_norm": 5.7244110107421875, - "learning_rate": 2.031993070684414e-06, - "loss": 1.892, - "step": 8620 - }, - { - "epoch": 0.7333446634942217, - "grad_norm": 20.916139602661133, - "learning_rate": 2.0200704189552033e-06, - "loss": 1.5657, - "step": 8630 - }, - { - "epoch": 0.7341944255608429, - "grad_norm": 13.778528213500977, - "learning_rate": 2.0081739871538384e-06, - "loss": 2.0957, - "step": 8640 - }, - { - "epoch": 0.7350441876274643, - "grad_norm": 9.449681282043457, - "learning_rate": 1.9963038799551343e-06, - "loss": 1.6971, - "step": 8650 - }, - { - "epoch": 0.7358939496940856, - "grad_norm": 9.729654312133789, - "learning_rate": 1.9844602018022835e-06, - "loss": 1.6588, - "step": 8660 - }, - { - "epoch": 0.736743711760707, - "grad_norm": 11.512124061584473, - "learning_rate": 1.9726430569059325e-06, - "loss": 2.0749, - "step": 8670 - }, - { - "epoch": 0.7375934738273283, - "grad_norm": 8.714293479919434, - "learning_rate": 1.960852549243266e-06, - "loss": 1.4549, - "step": 8680 - }, - { - "epoch": 0.7384432358939497, - "grad_norm": 10.241358757019043, - "learning_rate": 1.9490887825570905e-06, - "loss": 1.5151, - "step": 8690 - }, - { - "epoch": 0.739292997960571, - "grad_norm": 6.081053733825684, - "learning_rate": 1.9373518603549214e-06, - "loss": 1.4208, - "step": 8700 - }, - { - "epoch": 0.7401427600271924, - "grad_norm": 6.992905616760254, - "learning_rate": 1.925641885908078e-06, - "loss": 1.1372, - "step": 8710 - }, - { - "epoch": 0.7409925220938137, - "grad_norm": 5.226426601409912, - "learning_rate": 1.9139589622507686e-06, - "loss": 1.3161, - "step": 8720 - }, - { - "epoch": 0.7418422841604351, - "grad_norm": 8.087979316711426, - "learning_rate": 1.9023031921791857e-06, - "loss": 1.7169, - "step": 8730 - }, - { - "epoch": 0.7426920462270564, - "grad_norm": 9.095001220703125, - "learning_rate": 1.8906746782506036e-06, - "loss": 1.1011, - "step": 8740 - }, - { - "epoch": 0.7435418082936778, - "grad_norm": 9.052830696105957, - "learning_rate": 1.8790735227824702e-06, - "loss": 1.5178, - "step": 8750 - }, - { - "epoch": 0.7443915703602991, - "grad_norm": 8.323946952819824, - "learning_rate": 1.867499827851512e-06, - "loss": 1.3397, - "step": 8760 - }, - { - "epoch": 0.7452413324269205, - "grad_norm": 8.433329582214355, - "learning_rate": 1.855953695292837e-06, - "loss": 1.0119, - "step": 8770 - }, - { - "epoch": 0.7460910944935418, - "grad_norm": 5.9390692710876465, - "learning_rate": 1.844435226699034e-06, - "loss": 1.4627, - "step": 8780 - }, - { - "epoch": 0.7469408565601632, - "grad_norm": 9.195110321044922, - "learning_rate": 1.832944523419284e-06, - "loss": 0.9999, - "step": 8790 - }, - { - "epoch": 0.7477906186267845, - "grad_norm": 6.598045349121094, - "learning_rate": 1.8214816865584595e-06, - "loss": 0.9797, - "step": 8800 - }, - { - "epoch": 0.7486403806934059, - "grad_norm": 10.747418403625488, - "learning_rate": 1.810046816976247e-06, - "loss": 1.6933, - "step": 8810 - }, - { - "epoch": 0.7494901427600272, - "grad_norm": 8.619893074035645, - "learning_rate": 1.7986400152862482e-06, - "loss": 1.2377, - "step": 8820 - }, - { - "epoch": 0.7503399048266486, - "grad_norm": 7.153450965881348, - "learning_rate": 1.7872613818551037e-06, - "loss": 1.2728, - "step": 8830 - }, - { - "epoch": 0.7511896668932699, - "grad_norm": 7.99755859375, - "learning_rate": 1.775911016801607e-06, - "loss": 1.1199, - "step": 8840 - }, - { - "epoch": 0.7520394289598912, - "grad_norm": 6.9795966148376465, - "learning_rate": 1.7645890199958154e-06, - "loss": 1.5914, - "step": 8850 - }, - { - "epoch": 0.7528891910265125, - "grad_norm": 9.386270523071289, - "learning_rate": 1.7532954910581868e-06, - "loss": 1.7762, - "step": 8860 - }, - { - "epoch": 0.7537389530931339, - "grad_norm": 11.056981086730957, - "learning_rate": 1.742030529358692e-06, - "loss": 1.4173, - "step": 8870 - }, - { - "epoch": 0.7545887151597552, - "grad_norm": 5.89794397354126, - "learning_rate": 1.73079423401594e-06, - "loss": 1.7525, - "step": 8880 - }, - { - "epoch": 0.7554384772263766, - "grad_norm": 9.688243865966797, - "learning_rate": 1.7195867038963133e-06, - "loss": 2.119, - "step": 8890 - }, - { - "epoch": 0.7562882392929979, - "grad_norm": 8.137627601623535, - "learning_rate": 1.708408037613094e-06, - "loss": 1.6517, - "step": 8900 - }, - { - "epoch": 0.7571380013596193, - "grad_norm": 9.514354705810547, - "learning_rate": 1.6972583335255899e-06, - "loss": 1.6911, - "step": 8910 - }, - { - "epoch": 0.7579877634262406, - "grad_norm": 4.005157947540283, - "learning_rate": 1.6861376897382837e-06, - "loss": 1.5806, - "step": 8920 - }, - { - "epoch": 0.758837525492862, - "grad_norm": 10.505317687988281, - "learning_rate": 1.6750462040999522e-06, - "loss": 1.5838, - "step": 8930 - }, - { - "epoch": 0.7596872875594833, - "grad_norm": 8.158041954040527, - "learning_rate": 1.6639839742028214e-06, - "loss": 1.6819, - "step": 8940 - }, - { - "epoch": 0.7605370496261047, - "grad_norm": 9.959892272949219, - "learning_rate": 1.6529510973816991e-06, - "loss": 1.5756, - "step": 8950 - }, - { - "epoch": 0.761386811692726, - "grad_norm": 8.621078491210938, - "learning_rate": 1.6419476707131154e-06, - "loss": 1.3978, - "step": 8960 - }, - { - "epoch": 0.7622365737593474, - "grad_norm": 10.64427661895752, - "learning_rate": 1.630973791014479e-06, - "loss": 1.7492, - "step": 8970 - }, - { - "epoch": 0.7630863358259687, - "grad_norm": 8.072078704833984, - "learning_rate": 1.6200295548432188e-06, - "loss": 1.0175, - "step": 8980 - }, - { - "epoch": 0.7639360978925901, - "grad_norm": 10.248087882995605, - "learning_rate": 1.6091150584959293e-06, - "loss": 2.0354, - "step": 8990 - }, - { - "epoch": 0.7647858599592114, - "grad_norm": 7.684450626373291, - "learning_rate": 1.598230398007537e-06, - "loss": 1.5181, - "step": 9000 - }, - { - "epoch": 0.7647858599592114, - "eval_cosine_accuracy@1": 0.7065, - "eval_cosine_accuracy@10": 0.9125, - "eval_cosine_accuracy@3": 0.84, - "eval_cosine_accuracy@5": 0.877, - "eval_cosine_map@100": 0.7837897332371666, - "eval_cosine_mrr@10": 0.7803275793650786, - "eval_cosine_ndcg@10": 0.8126831693564126, - "eval_cosine_precision@1": 0.7065, - "eval_cosine_precision@10": 0.09125, - "eval_cosine_precision@3": 0.28, - "eval_cosine_precision@5": 0.17540000000000003, - "eval_cosine_recall@1": 0.7065, - "eval_cosine_recall@10": 0.9125, - "eval_cosine_recall@3": 0.84, - "eval_cosine_recall@5": 0.877, - "eval_loss": 1.3433513641357422, - "eval_runtime": 2.847, - "eval_samples_per_second": 264.84, - "eval_sequential_score": 0.7837897332371666, - "eval_steps_per_second": 4.215, - "eval_sts-dev_pearson_cosine": 0.8041663291384161, - "eval_sts-dev_pearson_dot": 0.7985215152878228, - "eval_sts-dev_pearson_euclidean": 0.7922157416357797, - "eval_sts-dev_pearson_manhattan": 0.7938961056566315, - "eval_sts-dev_pearson_max": 0.8041663291384161, - "eval_sts-dev_spearman_cosine": 0.8057219672338036, - "eval_sts-dev_spearman_dot": 0.7985137905715485, - "eval_sts-dev_spearman_euclidean": 0.8028596887695664, - "eval_sts-dev_spearman_manhattan": 0.804480775014223, - "eval_sts-dev_spearman_max": 0.8057219672338036, - "step": 9000 } ], "logging_steps": 10,