|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5708840656431975, |
|
"eval_steps": 337, |
|
"global_step": 1348, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00042350449973530967, |
|
"grad_norm": 0.027733758091926575, |
|
"learning_rate": 2e-05, |
|
"loss": 10.3743, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00042350449973530967, |
|
"eval_loss": 10.376607894897461, |
|
"eval_runtime": 3.5039, |
|
"eval_samples_per_second": 283.969, |
|
"eval_steps_per_second": 142.127, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008470089994706193, |
|
"grad_norm": 0.02669823355972767, |
|
"learning_rate": 4e-05, |
|
"loss": 10.374, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.001270513499205929, |
|
"grad_norm": 0.021611209958791733, |
|
"learning_rate": 6e-05, |
|
"loss": 10.3801, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0016940179989412387, |
|
"grad_norm": 0.027095356956124306, |
|
"learning_rate": 8e-05, |
|
"loss": 10.3786, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0021175224986765486, |
|
"grad_norm": 0.030345361679792404, |
|
"learning_rate": 0.0001, |
|
"loss": 10.378, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002541026998411858, |
|
"grad_norm": 0.025746231898665428, |
|
"learning_rate": 0.00012, |
|
"loss": 10.3767, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002964531498147168, |
|
"grad_norm": 0.026296626776456833, |
|
"learning_rate": 0.00014, |
|
"loss": 10.375, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0033880359978824773, |
|
"grad_norm": 0.026994528248906136, |
|
"learning_rate": 0.00016, |
|
"loss": 10.3775, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0038115404976177872, |
|
"grad_norm": 0.02642114832997322, |
|
"learning_rate": 0.00018, |
|
"loss": 10.3785, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004235044997353097, |
|
"grad_norm": 0.03136637434363365, |
|
"learning_rate": 0.0002, |
|
"loss": 10.3753, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004658549497088407, |
|
"grad_norm": 0.022933412343263626, |
|
"learning_rate": 0.00019999972435042745, |
|
"loss": 10.3753, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.005082053996823716, |
|
"grad_norm": 0.02334180846810341, |
|
"learning_rate": 0.0001999988974032295, |
|
"loss": 10.3753, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.005505558496559026, |
|
"grad_norm": 0.03419239819049835, |
|
"learning_rate": 0.00019999751916296505, |
|
"loss": 10.3767, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005929062996294336, |
|
"grad_norm": 0.022363845258951187, |
|
"learning_rate": 0.0001999955896372324, |
|
"loss": 10.3773, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.006352567496029645, |
|
"grad_norm": 0.025751987472176552, |
|
"learning_rate": 0.0001999931088366689, |
|
"loss": 10.3788, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006776071995764955, |
|
"grad_norm": 0.02451767958700657, |
|
"learning_rate": 0.00019999007677495127, |
|
"loss": 10.3781, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.007199576495500265, |
|
"grad_norm": 0.023951657116413116, |
|
"learning_rate": 0.00019998649346879524, |
|
"loss": 10.3746, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0076230809952355745, |
|
"grad_norm": 0.02496548369526863, |
|
"learning_rate": 0.0001999823589379555, |
|
"loss": 10.3739, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.008046585494970884, |
|
"grad_norm": 0.02257522940635681, |
|
"learning_rate": 0.0001999776732052257, |
|
"loss": 10.3755, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.008470089994706194, |
|
"grad_norm": 0.027529660612344742, |
|
"learning_rate": 0.00019997243629643827, |
|
"loss": 10.3753, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008893594494441503, |
|
"grad_norm": 0.025733161717653275, |
|
"learning_rate": 0.0001999666482404642, |
|
"loss": 10.376, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.009317098994176813, |
|
"grad_norm": 0.028723513707518578, |
|
"learning_rate": 0.00019996030906921302, |
|
"loss": 10.373, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.009740603493912123, |
|
"grad_norm": 0.03213539347052574, |
|
"learning_rate": 0.00019995341881763254, |
|
"loss": 10.3749, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.010164107993647432, |
|
"grad_norm": 0.033838726580142975, |
|
"learning_rate": 0.0001999459775237086, |
|
"loss": 10.374, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.010587612493382742, |
|
"grad_norm": 0.029485682025551796, |
|
"learning_rate": 0.00019993798522846508, |
|
"loss": 10.3769, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.011011116993118053, |
|
"grad_norm": 0.03559406101703644, |
|
"learning_rate": 0.00019992944197596337, |
|
"loss": 10.3748, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.011434621492853361, |
|
"grad_norm": 0.033679552376270294, |
|
"learning_rate": 0.00019992034781330235, |
|
"loss": 10.3733, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.011858125992588672, |
|
"grad_norm": 0.032387569546699524, |
|
"learning_rate": 0.00019991070279061808, |
|
"loss": 10.3711, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01228163049232398, |
|
"grad_norm": 0.03400762379169464, |
|
"learning_rate": 0.0001999005069610835, |
|
"loss": 10.3726, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01270513499205929, |
|
"grad_norm": 0.030853325501084328, |
|
"learning_rate": 0.0001998897603809081, |
|
"loss": 10.3756, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0131286394917946, |
|
"grad_norm": 0.039369914680719376, |
|
"learning_rate": 0.00019987846310933768, |
|
"loss": 10.373, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01355214399152991, |
|
"grad_norm": 0.04837853088974953, |
|
"learning_rate": 0.00019986661520865405, |
|
"loss": 10.3751, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01397564849126522, |
|
"grad_norm": 0.045920245349407196, |
|
"learning_rate": 0.00019985421674417452, |
|
"loss": 10.3693, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01439915299100053, |
|
"grad_norm": 0.04273354262113571, |
|
"learning_rate": 0.00019984126778425178, |
|
"loss": 10.3702, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.014822657490735839, |
|
"grad_norm": 0.048800382763147354, |
|
"learning_rate": 0.0001998277684002733, |
|
"loss": 10.3745, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.015246161990471149, |
|
"grad_norm": 0.05085352063179016, |
|
"learning_rate": 0.00019981371866666109, |
|
"loss": 10.3745, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.015669666490206458, |
|
"grad_norm": 0.05351710319519043, |
|
"learning_rate": 0.0001997991186608712, |
|
"loss": 10.3718, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.016093170989941768, |
|
"grad_norm": 0.048667021095752716, |
|
"learning_rate": 0.0001997839684633933, |
|
"loss": 10.3713, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.016516675489677078, |
|
"grad_norm": 0.04797323793172836, |
|
"learning_rate": 0.0001997682681577504, |
|
"loss": 10.3696, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01694017998941239, |
|
"grad_norm": 0.04910498112440109, |
|
"learning_rate": 0.00019975201783049805, |
|
"loss": 10.3753, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0173636844891477, |
|
"grad_norm": 0.04971903935074806, |
|
"learning_rate": 0.00019973521757122418, |
|
"loss": 10.3724, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.017787188988883006, |
|
"grad_norm": 0.06753654778003693, |
|
"learning_rate": 0.00019971786747254852, |
|
"loss": 10.3717, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.018210693488618316, |
|
"grad_norm": 0.04575066268444061, |
|
"learning_rate": 0.00019969996763012198, |
|
"loss": 10.3708, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.018634197988353626, |
|
"grad_norm": 0.0645640566945076, |
|
"learning_rate": 0.00019968151814262627, |
|
"loss": 10.37, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.019057702488088937, |
|
"grad_norm": 0.07102999091148376, |
|
"learning_rate": 0.00019966251911177323, |
|
"loss": 10.371, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.019481206987824247, |
|
"grad_norm": 0.07168291509151459, |
|
"learning_rate": 0.00019964297064230436, |
|
"loss": 10.3691, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.019904711487559554, |
|
"grad_norm": 0.06409385055303574, |
|
"learning_rate": 0.0001996228728419902, |
|
"loss": 10.3712, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.020328215987294864, |
|
"grad_norm": 0.06838654726743698, |
|
"learning_rate": 0.00019960222582162976, |
|
"loss": 10.3681, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.020751720487030174, |
|
"grad_norm": 0.10280771553516388, |
|
"learning_rate": 0.0001995810296950499, |
|
"loss": 10.3681, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.021175224986765485, |
|
"grad_norm": 0.08676479011774063, |
|
"learning_rate": 0.00019955928457910464, |
|
"loss": 10.3678, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.021598729486500795, |
|
"grad_norm": 0.0853012353181839, |
|
"learning_rate": 0.00019953699059367468, |
|
"loss": 10.3662, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.022022233986236105, |
|
"grad_norm": 0.09723170846700668, |
|
"learning_rate": 0.00019951414786166654, |
|
"loss": 10.3658, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.022445738485971412, |
|
"grad_norm": 0.08303584158420563, |
|
"learning_rate": 0.00019949075650901196, |
|
"loss": 10.3637, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.022869242985706723, |
|
"grad_norm": 0.09252041578292847, |
|
"learning_rate": 0.00019946681666466737, |
|
"loss": 10.3663, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.023292747485442033, |
|
"grad_norm": 0.07753727585077286, |
|
"learning_rate": 0.00019944232846061283, |
|
"loss": 10.363, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.023716251985177343, |
|
"grad_norm": 0.09255944937467575, |
|
"learning_rate": 0.00019941729203185165, |
|
"loss": 10.3598, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.024139756484912653, |
|
"grad_norm": 0.1016978919506073, |
|
"learning_rate": 0.0001993917075164095, |
|
"loss": 10.3632, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02456326098464796, |
|
"grad_norm": 0.09657126665115356, |
|
"learning_rate": 0.00019936557505533344, |
|
"loss": 10.363, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02498676548438327, |
|
"grad_norm": 0.08984223008155823, |
|
"learning_rate": 0.00019933889479269162, |
|
"loss": 10.366, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02541026998411858, |
|
"grad_norm": 0.11775655299425125, |
|
"learning_rate": 0.0001993116668755721, |
|
"loss": 10.3619, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02583377448385389, |
|
"grad_norm": 0.10796835273504257, |
|
"learning_rate": 0.00019928389145408213, |
|
"loss": 10.3609, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0262572789835892, |
|
"grad_norm": 0.10575428605079651, |
|
"learning_rate": 0.00019925556868134736, |
|
"loss": 10.3571, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.026680783483324512, |
|
"grad_norm": 0.12180919945240021, |
|
"learning_rate": 0.000199226698713511, |
|
"loss": 10.3553, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.02710428798305982, |
|
"grad_norm": 0.10506236553192139, |
|
"learning_rate": 0.00019919728170973296, |
|
"loss": 10.3593, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.02752779248279513, |
|
"grad_norm": 0.09984668344259262, |
|
"learning_rate": 0.00019916731783218888, |
|
"loss": 10.3593, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02795129698253044, |
|
"grad_norm": 0.10555399954319, |
|
"learning_rate": 0.00019913680724606945, |
|
"loss": 10.3591, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.02837480148226575, |
|
"grad_norm": 0.08873631060123444, |
|
"learning_rate": 0.00019910575011957918, |
|
"loss": 10.3568, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02879830598200106, |
|
"grad_norm": 0.09549526870250702, |
|
"learning_rate": 0.00019907414662393574, |
|
"loss": 10.3574, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.029221810481736367, |
|
"grad_norm": 0.09750920534133911, |
|
"learning_rate": 0.000199041996933369, |
|
"loss": 10.3536, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.029645314981471677, |
|
"grad_norm": 0.10485312342643738, |
|
"learning_rate": 0.00019900930122511993, |
|
"loss": 10.3566, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.030068819481206988, |
|
"grad_norm": 0.09972581267356873, |
|
"learning_rate": 0.00019897605967943963, |
|
"loss": 10.3531, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.030492323980942298, |
|
"grad_norm": 0.08210822939872742, |
|
"learning_rate": 0.00019894227247958845, |
|
"loss": 10.3534, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.030915828480677608, |
|
"grad_norm": 0.08600655943155289, |
|
"learning_rate": 0.00019890793981183503, |
|
"loss": 10.356, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.031339332980412915, |
|
"grad_norm": 0.08058468252420425, |
|
"learning_rate": 0.00019887306186545497, |
|
"loss": 10.3549, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03176283748014823, |
|
"grad_norm": 0.0659925639629364, |
|
"learning_rate": 0.00019883763883273012, |
|
"loss": 10.3507, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.032186341979883536, |
|
"grad_norm": 0.06881393492221832, |
|
"learning_rate": 0.0001988016709089474, |
|
"loss": 10.3529, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03260984647961884, |
|
"grad_norm": 0.0784982293844223, |
|
"learning_rate": 0.00019876515829239763, |
|
"loss": 10.3528, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.033033350979354156, |
|
"grad_norm": 0.06941844522953033, |
|
"learning_rate": 0.00019872810118437456, |
|
"loss": 10.351, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03345685547908946, |
|
"grad_norm": 0.06965084373950958, |
|
"learning_rate": 0.00019869049978917368, |
|
"loss": 10.3507, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03388035997882478, |
|
"grad_norm": 0.0600489042699337, |
|
"learning_rate": 0.00019865235431409123, |
|
"loss": 10.3514, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.034303864478560084, |
|
"grad_norm": 0.06106571480631828, |
|
"learning_rate": 0.00019861366496942283, |
|
"loss": 10.3501, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0347273689782954, |
|
"grad_norm": 0.05668988823890686, |
|
"learning_rate": 0.0001985744319684625, |
|
"loss": 10.3479, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.035150873478030704, |
|
"grad_norm": 0.05988716706633568, |
|
"learning_rate": 0.00019853465552750147, |
|
"loss": 10.3472, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03557437797776601, |
|
"grad_norm": 0.047210004180669785, |
|
"learning_rate": 0.00019849433586582692, |
|
"loss": 10.3522, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.035997882477501325, |
|
"grad_norm": 0.04648837819695473, |
|
"learning_rate": 0.00019845347320572078, |
|
"loss": 10.3489, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03642138697723663, |
|
"grad_norm": 0.057975709438323975, |
|
"learning_rate": 0.00019841206777245857, |
|
"loss": 10.3482, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.036844891476971946, |
|
"grad_norm": 0.06715747714042664, |
|
"learning_rate": 0.00019837011979430806, |
|
"loss": 10.3486, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.03726839597670725, |
|
"grad_norm": 0.05633699893951416, |
|
"learning_rate": 0.00019832762950252813, |
|
"loss": 10.3506, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.03769190047644256, |
|
"grad_norm": 0.04711679369211197, |
|
"learning_rate": 0.00019828459713136737, |
|
"loss": 10.349, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.03811540497617787, |
|
"grad_norm": 0.050088070333004, |
|
"learning_rate": 0.0001982410229180629, |
|
"loss": 10.3457, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03853890947591318, |
|
"grad_norm": 0.0481443926692009, |
|
"learning_rate": 0.00019819690710283893, |
|
"loss": 10.3488, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.038962413975648494, |
|
"grad_norm": 0.04781080409884453, |
|
"learning_rate": 0.0001981522499289056, |
|
"loss": 10.3476, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0393859184753838, |
|
"grad_norm": 0.04098181053996086, |
|
"learning_rate": 0.00019810705164245756, |
|
"loss": 10.3486, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.03980942297511911, |
|
"grad_norm": 0.050709549337625504, |
|
"learning_rate": 0.00019806131249267255, |
|
"loss": 10.3465, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04023292747485442, |
|
"grad_norm": 0.04031967371702194, |
|
"learning_rate": 0.00019801503273171012, |
|
"loss": 10.3497, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04065643197458973, |
|
"grad_norm": 0.029422029852867126, |
|
"learning_rate": 0.00019796821261471018, |
|
"loss": 10.3476, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04107993647432504, |
|
"grad_norm": 0.04418569803237915, |
|
"learning_rate": 0.00019792085239979162, |
|
"loss": 10.3488, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.04150344097406035, |
|
"grad_norm": 0.05277906730771065, |
|
"learning_rate": 0.00019787295234805096, |
|
"loss": 10.3495, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.041926945473795656, |
|
"grad_norm": 0.03719155862927437, |
|
"learning_rate": 0.00019782451272356075, |
|
"loss": 10.3493, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04235044997353097, |
|
"grad_norm": 0.042001668363809586, |
|
"learning_rate": 0.0001977755337933682, |
|
"loss": 10.3474, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.042773954473266276, |
|
"grad_norm": 0.045230720192193985, |
|
"learning_rate": 0.00019772601582749376, |
|
"loss": 10.3498, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.04319745897300159, |
|
"grad_norm": 0.03940007835626602, |
|
"learning_rate": 0.00019767595909892953, |
|
"loss": 10.3499, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0436209634727369, |
|
"grad_norm": 0.044866979122161865, |
|
"learning_rate": 0.00019762536388363784, |
|
"loss": 10.3464, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.04404446797247221, |
|
"grad_norm": 0.039521895349025726, |
|
"learning_rate": 0.00019757423046054968, |
|
"loss": 10.3491, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.04446797247220752, |
|
"grad_norm": 0.04928427189588547, |
|
"learning_rate": 0.00019752255911156317, |
|
"loss": 10.345, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.044891476971942824, |
|
"grad_norm": 0.04378641024231911, |
|
"learning_rate": 0.00019747035012154202, |
|
"loss": 10.3488, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.04531498147167814, |
|
"grad_norm": 0.048980504274368286, |
|
"learning_rate": 0.00019741760377831396, |
|
"loss": 10.3468, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.045738485971413445, |
|
"grad_norm": 0.04159266874194145, |
|
"learning_rate": 0.00019736432037266912, |
|
"loss": 10.3478, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04616199047114876, |
|
"grad_norm": 0.0287900660187006, |
|
"learning_rate": 0.00019731050019835842, |
|
"loss": 10.3497, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.046585494970884066, |
|
"grad_norm": 0.03839430958032608, |
|
"learning_rate": 0.00019725614355209204, |
|
"loss": 10.35, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04700899947061937, |
|
"grad_norm": 0.04616628587245941, |
|
"learning_rate": 0.00019720125073353776, |
|
"loss": 10.3471, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.047432503970354686, |
|
"grad_norm": 0.05492490157485008, |
|
"learning_rate": 0.00019714582204531918, |
|
"loss": 10.3503, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.04785600847008999, |
|
"grad_norm": 0.037890441715717316, |
|
"learning_rate": 0.00019708985779301417, |
|
"loss": 10.3488, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.04827951296982531, |
|
"grad_norm": 0.036491066217422485, |
|
"learning_rate": 0.00019703335828515322, |
|
"loss": 10.3476, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.048703017469560614, |
|
"grad_norm": 0.03580768033862114, |
|
"learning_rate": 0.00019697632383321756, |
|
"loss": 10.3509, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.04912652196929592, |
|
"grad_norm": 0.04286257550120354, |
|
"learning_rate": 0.0001969187547516377, |
|
"loss": 10.3475, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.049550026469031234, |
|
"grad_norm": 0.06037011742591858, |
|
"learning_rate": 0.00019686065135779144, |
|
"loss": 10.3534, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.04997353096876654, |
|
"grad_norm": 0.05510607734322548, |
|
"learning_rate": 0.00019680201397200236, |
|
"loss": 10.3529, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.050397035468501855, |
|
"grad_norm": 0.04488476365804672, |
|
"learning_rate": 0.00019674284291753785, |
|
"loss": 10.3482, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.05082053996823716, |
|
"grad_norm": 0.02746366150677204, |
|
"learning_rate": 0.00019668313852060735, |
|
"loss": 10.3507, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05124404446797247, |
|
"grad_norm": 0.031117988750338554, |
|
"learning_rate": 0.00019662290111036078, |
|
"loss": 10.3472, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.05166754896770778, |
|
"grad_norm": 0.04041313752532005, |
|
"learning_rate": 0.00019656213101888645, |
|
"loss": 10.3468, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.05209105346744309, |
|
"grad_norm": 0.04518342763185501, |
|
"learning_rate": 0.00019650082858120932, |
|
"loss": 10.35, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0525145579671784, |
|
"grad_norm": 0.034027136862277985, |
|
"learning_rate": 0.00019643899413528926, |
|
"loss": 10.3474, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.05293806246691371, |
|
"grad_norm": 0.0336722694337368, |
|
"learning_rate": 0.000196376628022019, |
|
"loss": 10.347, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.053361566966649024, |
|
"grad_norm": 0.03731876611709595, |
|
"learning_rate": 0.00019631373058522238, |
|
"loss": 10.3484, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.05378507146638433, |
|
"grad_norm": 0.038337815552949905, |
|
"learning_rate": 0.00019625030217165245, |
|
"loss": 10.3493, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.05420857596611964, |
|
"grad_norm": 0.036029715090990067, |
|
"learning_rate": 0.00019618634313098952, |
|
"loss": 10.346, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.05463208046585495, |
|
"grad_norm": 0.031205767765641212, |
|
"learning_rate": 0.00019612185381583924, |
|
"loss": 10.3502, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.05505558496559026, |
|
"grad_norm": 0.04413217306137085, |
|
"learning_rate": 0.0001960568345817306, |
|
"loss": 10.3507, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05547908946532557, |
|
"grad_norm": 0.03828402981162071, |
|
"learning_rate": 0.00019599128578711415, |
|
"loss": 10.3485, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05590259396506088, |
|
"grad_norm": 0.03328114375472069, |
|
"learning_rate": 0.0001959252077933598, |
|
"loss": 10.3481, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.056326098464796186, |
|
"grad_norm": 0.04720017686486244, |
|
"learning_rate": 0.000195858600964755, |
|
"loss": 10.3468, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0567496029645315, |
|
"grad_norm": 0.03394393250346184, |
|
"learning_rate": 0.00019579146566850252, |
|
"loss": 10.3457, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.057173107464266806, |
|
"grad_norm": 0.03747075796127319, |
|
"learning_rate": 0.0001957238022747188, |
|
"loss": 10.3488, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05759661196400212, |
|
"grad_norm": 0.03510262444615364, |
|
"learning_rate": 0.00019565561115643152, |
|
"loss": 10.3504, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05802011646373743, |
|
"grad_norm": 0.03729300945997238, |
|
"learning_rate": 0.00019558689268957767, |
|
"loss": 10.3464, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.058443620963472734, |
|
"grad_norm": 0.029604580253362656, |
|
"learning_rate": 0.00019551764725300166, |
|
"loss": 10.3438, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.05886712546320805, |
|
"grad_norm": 0.039334215223789215, |
|
"learning_rate": 0.0001954478752284529, |
|
"loss": 10.3472, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.059290629962943354, |
|
"grad_norm": 0.04949035122990608, |
|
"learning_rate": 0.00019537757700058403, |
|
"loss": 10.3476, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05971413446267867, |
|
"grad_norm": 0.034930143505334854, |
|
"learning_rate": 0.00019530675295694857, |
|
"loss": 10.3475, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.060137638962413975, |
|
"grad_norm": 0.02766244113445282, |
|
"learning_rate": 0.00019523540348799885, |
|
"loss": 10.3457, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.06056114346214928, |
|
"grad_norm": 0.02754233032464981, |
|
"learning_rate": 0.0001951635289870839, |
|
"loss": 10.3471, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.060984647961884596, |
|
"grad_norm": 0.05756423994898796, |
|
"learning_rate": 0.00019509112985044717, |
|
"loss": 10.348, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0614081524616199, |
|
"grad_norm": 0.03342543542385101, |
|
"learning_rate": 0.00019501820647722457, |
|
"loss": 10.349, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.061831656961355216, |
|
"grad_norm": 0.04082402214407921, |
|
"learning_rate": 0.00019494475926944195, |
|
"loss": 10.3486, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.06225516146109052, |
|
"grad_norm": 0.03864405304193497, |
|
"learning_rate": 0.00019487078863201322, |
|
"loss": 10.351, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.06267866596082583, |
|
"grad_norm": 0.028355760499835014, |
|
"learning_rate": 0.00019479629497273781, |
|
"loss": 10.3474, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06310217046056114, |
|
"grad_norm": 0.03946223482489586, |
|
"learning_rate": 0.00019472127870229867, |
|
"loss": 10.349, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.06352567496029646, |
|
"grad_norm": 0.04293173551559448, |
|
"learning_rate": 0.00019464574023425984, |
|
"loss": 10.3508, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06394917946003176, |
|
"grad_norm": 0.04612809792160988, |
|
"learning_rate": 0.0001945696799850642, |
|
"loss": 10.3473, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.06437268395976707, |
|
"grad_norm": 0.04514515772461891, |
|
"learning_rate": 0.00019449309837403137, |
|
"loss": 10.3484, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.06479618845950239, |
|
"grad_norm": 0.03168589621782303, |
|
"learning_rate": 0.00019441599582335498, |
|
"loss": 10.3465, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.06521969295923769, |
|
"grad_norm": 0.04755236580967903, |
|
"learning_rate": 0.00019433837275810082, |
|
"loss": 10.3474, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.065643197458973, |
|
"grad_norm": 0.031274329870939255, |
|
"learning_rate": 0.00019426022960620417, |
|
"loss": 10.3451, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.06606670195870831, |
|
"grad_norm": 0.036476653069257736, |
|
"learning_rate": 0.00019418156679846754, |
|
"loss": 10.3483, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.06649020645844363, |
|
"grad_norm": 0.0386991873383522, |
|
"learning_rate": 0.0001941023847685583, |
|
"loss": 10.3474, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.06691371095817893, |
|
"grad_norm": 0.034830257296562195, |
|
"learning_rate": 0.00019402268395300637, |
|
"loss": 10.3493, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.06733721545791424, |
|
"grad_norm": 0.03715137764811516, |
|
"learning_rate": 0.00019394246479120163, |
|
"loss": 10.3529, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.06776071995764955, |
|
"grad_norm": 0.03950640186667442, |
|
"learning_rate": 0.00019386172772539162, |
|
"loss": 10.3479, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06818422445738485, |
|
"grad_norm": 0.04391263425350189, |
|
"learning_rate": 0.0001937804732006791, |
|
"loss": 10.3456, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.06860772895712017, |
|
"grad_norm": 0.047799207270145416, |
|
"learning_rate": 0.00019369870166501959, |
|
"loss": 10.3451, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.06903123345685548, |
|
"grad_norm": 0.031426433473825455, |
|
"learning_rate": 0.00019361641356921883, |
|
"loss": 10.3499, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.0694547379565908, |
|
"grad_norm": 0.0359007902443409, |
|
"learning_rate": 0.00019353360936693041, |
|
"loss": 10.3433, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0698782424563261, |
|
"grad_norm": 0.02672567404806614, |
|
"learning_rate": 0.00019345028951465318, |
|
"loss": 10.343, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07030174695606141, |
|
"grad_norm": 0.04336037486791611, |
|
"learning_rate": 0.0001933664544717288, |
|
"loss": 10.3488, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.07072525145579672, |
|
"grad_norm": 0.030480332672595978, |
|
"learning_rate": 0.0001932821047003391, |
|
"loss": 10.3464, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.07114875595553202, |
|
"grad_norm": 0.03520766645669937, |
|
"learning_rate": 0.00019319724066550373, |
|
"loss": 10.3475, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.07157226045526734, |
|
"grad_norm": 0.020646894350647926, |
|
"learning_rate": 0.0001931118628350773, |
|
"loss": 10.3476, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.07199576495500265, |
|
"grad_norm": 0.04156513512134552, |
|
"learning_rate": 0.00019302597167974707, |
|
"loss": 10.3485, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07241926945473795, |
|
"grad_norm": 0.02938881516456604, |
|
"learning_rate": 0.0001929395676730303, |
|
"loss": 10.3464, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.07284277395447326, |
|
"grad_norm": 0.03371270000934601, |
|
"learning_rate": 0.00019285265129127151, |
|
"loss": 10.3443, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.07326627845420858, |
|
"grad_norm": 0.045955732464790344, |
|
"learning_rate": 0.00019276522301363996, |
|
"loss": 10.346, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.07368978295394389, |
|
"grad_norm": 0.022017156705260277, |
|
"learning_rate": 0.000192677283322127, |
|
"loss": 10.3461, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.07411328745367919, |
|
"grad_norm": 0.045463208109140396, |
|
"learning_rate": 0.0001925888327015434, |
|
"loss": 10.3462, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0745367919534145, |
|
"grad_norm": 0.041146885603666306, |
|
"learning_rate": 0.00019249987163951667, |
|
"loss": 10.3453, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.07496029645314982, |
|
"grad_norm": 0.04077988117933273, |
|
"learning_rate": 0.0001924104006264884, |
|
"loss": 10.3472, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.07538380095288512, |
|
"grad_norm": 0.033624131232500076, |
|
"learning_rate": 0.00019232042015571152, |
|
"loss": 10.3493, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.07580730545262043, |
|
"grad_norm": 0.04149757698178291, |
|
"learning_rate": 0.00019222993072324758, |
|
"loss": 10.347, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.07623080995235575, |
|
"grad_norm": 0.04390670359134674, |
|
"learning_rate": 0.00019213893282796405, |
|
"loss": 10.3499, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07665431445209106, |
|
"grad_norm": 0.04238109290599823, |
|
"learning_rate": 0.00019204742697153155, |
|
"loss": 10.3482, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.07707781895182636, |
|
"grad_norm": 0.0415191613137722, |
|
"learning_rate": 0.0001919554136584211, |
|
"loss": 10.3485, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.07750132345156167, |
|
"grad_norm": 0.04313662648200989, |
|
"learning_rate": 0.0001918628933959013, |
|
"loss": 10.3447, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.07792482795129699, |
|
"grad_norm": 0.0481775663793087, |
|
"learning_rate": 0.00019176986669403555, |
|
"loss": 10.3456, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.07834833245103229, |
|
"grad_norm": 0.031192272901535034, |
|
"learning_rate": 0.0001916763340656793, |
|
"loss": 10.3488, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0787718369507676, |
|
"grad_norm": 0.04172395542263985, |
|
"learning_rate": 0.00019158229602647708, |
|
"loss": 10.3442, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.07919534145050292, |
|
"grad_norm": 0.03788716346025467, |
|
"learning_rate": 0.00019148775309485983, |
|
"loss": 10.3443, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.07961884595023822, |
|
"grad_norm": 0.0322580486536026, |
|
"learning_rate": 0.00019139270579204194, |
|
"loss": 10.3478, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.08004235044997353, |
|
"grad_norm": 0.035218119621276855, |
|
"learning_rate": 0.00019129715464201832, |
|
"loss": 10.3475, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.08046585494970884, |
|
"grad_norm": 0.0283061470836401, |
|
"learning_rate": 0.0001912011001715617, |
|
"loss": 10.3469, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08088935944944416, |
|
"grad_norm": 0.03684883192181587, |
|
"learning_rate": 0.00019110454291021954, |
|
"loss": 10.3483, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.08131286394917946, |
|
"grad_norm": 0.028339441865682602, |
|
"learning_rate": 0.00019100748339031113, |
|
"loss": 10.3484, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.08173636844891477, |
|
"grad_norm": 0.03159940615296364, |
|
"learning_rate": 0.00019090992214692488, |
|
"loss": 10.346, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.08215987294865008, |
|
"grad_norm": 0.029895318672060966, |
|
"learning_rate": 0.00019081185971791504, |
|
"loss": 10.3481, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.08258337744838538, |
|
"grad_norm": 0.04218447580933571, |
|
"learning_rate": 0.0001907132966438989, |
|
"loss": 10.3453, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0830068819481207, |
|
"grad_norm": 0.042808372527360916, |
|
"learning_rate": 0.00019061423346825395, |
|
"loss": 10.3466, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.08343038644785601, |
|
"grad_norm": 0.03805699571967125, |
|
"learning_rate": 0.00019051467073711456, |
|
"loss": 10.3466, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.08385389094759131, |
|
"grad_norm": 0.04781021177768707, |
|
"learning_rate": 0.00019041460899936921, |
|
"loss": 10.3436, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.08427739544732663, |
|
"grad_norm": 0.025532910600304604, |
|
"learning_rate": 0.00019031404880665739, |
|
"loss": 10.3478, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.08470089994706194, |
|
"grad_norm": 0.030978702008724213, |
|
"learning_rate": 0.00019021299071336664, |
|
"loss": 10.3455, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08512440444679725, |
|
"grad_norm": 0.03757680207490921, |
|
"learning_rate": 0.00019011143527662935, |
|
"loss": 10.3481, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.08554790894653255, |
|
"grad_norm": 0.04030987620353699, |
|
"learning_rate": 0.00019000938305631975, |
|
"loss": 10.3465, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.08597141344626787, |
|
"grad_norm": 0.04490538313984871, |
|
"learning_rate": 0.00018990683461505087, |
|
"loss": 10.3444, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.08639491794600318, |
|
"grad_norm": 0.03259282931685448, |
|
"learning_rate": 0.00018980379051817138, |
|
"loss": 10.3471, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.08681842244573848, |
|
"grad_norm": 0.04348522052168846, |
|
"learning_rate": 0.00018970025133376253, |
|
"loss": 10.3488, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.0872419269454738, |
|
"grad_norm": 0.0327000729739666, |
|
"learning_rate": 0.00018959621763263494, |
|
"loss": 10.347, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.08766543144520911, |
|
"grad_norm": 0.043357010930776596, |
|
"learning_rate": 0.0001894916899883255, |
|
"loss": 10.3514, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.08808893594494442, |
|
"grad_norm": 0.03237845376133919, |
|
"learning_rate": 0.00018938666897709425, |
|
"loss": 10.3454, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.08851244044467972, |
|
"grad_norm": 0.040286242961883545, |
|
"learning_rate": 0.0001892811551779211, |
|
"loss": 10.3446, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.08893594494441504, |
|
"grad_norm": 0.03772817552089691, |
|
"learning_rate": 0.00018917514917250275, |
|
"loss": 10.3458, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08935944944415035, |
|
"grad_norm": 0.0332292765378952, |
|
"learning_rate": 0.00018906865154524942, |
|
"loss": 10.3453, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.08978295394388565, |
|
"grad_norm": 0.038554366677999496, |
|
"learning_rate": 0.00018896166288328155, |
|
"loss": 10.3463, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.09020645844362096, |
|
"grad_norm": 0.05035999044775963, |
|
"learning_rate": 0.00018885418377642674, |
|
"loss": 10.346, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.09062996294335628, |
|
"grad_norm": 0.03604661673307419, |
|
"learning_rate": 0.00018874621481721645, |
|
"loss": 10.3474, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.09105346744309158, |
|
"grad_norm": 0.0357426181435585, |
|
"learning_rate": 0.00018863775660088258, |
|
"loss": 10.347, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.09147697194282689, |
|
"grad_norm": 0.031967032700777054, |
|
"learning_rate": 0.00018852880972535432, |
|
"loss": 10.3471, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.0919004764425622, |
|
"grad_norm": 0.031692031770944595, |
|
"learning_rate": 0.0001884193747912549, |
|
"loss": 10.3457, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.09232398094229752, |
|
"grad_norm": 0.04586448892951012, |
|
"learning_rate": 0.00018830945240189817, |
|
"loss": 10.3457, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.09274748544203282, |
|
"grad_norm": 0.039624523371458054, |
|
"learning_rate": 0.00018819904316328532, |
|
"loss": 10.3455, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.09317098994176813, |
|
"grad_norm": 0.042145851999521255, |
|
"learning_rate": 0.00018808814768410157, |
|
"loss": 10.3445, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09359449444150345, |
|
"grad_norm": 0.0323745459318161, |
|
"learning_rate": 0.0001879767665757127, |
|
"loss": 10.3408, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.09401799894123875, |
|
"grad_norm": 0.03138961270451546, |
|
"learning_rate": 0.00018786490045216182, |
|
"loss": 10.3448, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.09444150344097406, |
|
"grad_norm": 0.0241545382887125, |
|
"learning_rate": 0.00018775254993016595, |
|
"loss": 10.3481, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.09486500794070937, |
|
"grad_norm": 0.03655562922358513, |
|
"learning_rate": 0.0001876397156291125, |
|
"loss": 10.3438, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.09528851244044469, |
|
"grad_norm": 0.042878881096839905, |
|
"learning_rate": 0.00018752639817105606, |
|
"loss": 10.345, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.09571201694017999, |
|
"grad_norm": 0.03456420823931694, |
|
"learning_rate": 0.0001874125981807148, |
|
"loss": 10.3447, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.0961355214399153, |
|
"grad_norm": 0.033554937690496445, |
|
"learning_rate": 0.00018729831628546702, |
|
"loss": 10.3467, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.09655902593965061, |
|
"grad_norm": 0.028273334726691246, |
|
"learning_rate": 0.00018718355311534793, |
|
"loss": 10.348, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.09698253043938591, |
|
"grad_norm": 0.03176790103316307, |
|
"learning_rate": 0.00018706830930304585, |
|
"loss": 10.3438, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.09740603493912123, |
|
"grad_norm": 0.04405641928315163, |
|
"learning_rate": 0.000186952585483899, |
|
"loss": 10.3432, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.09782953943885654, |
|
"grad_norm": 0.039611659944057465, |
|
"learning_rate": 0.00018683638229589168, |
|
"loss": 10.3477, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.09825304393859184, |
|
"grad_norm": 0.03426756337285042, |
|
"learning_rate": 0.00018671970037965118, |
|
"loss": 10.3482, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.09867654843832716, |
|
"grad_norm": 0.0545201450586319, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 10.3437, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.09910005293806247, |
|
"grad_norm": 0.041258279234170914, |
|
"learning_rate": 0.00018648490293817185, |
|
"loss": 10.3463, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.09952355743779778, |
|
"grad_norm": 0.025181951001286507, |
|
"learning_rate": 0.00018636678870736928, |
|
"loss": 10.3454, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.09994706193753308, |
|
"grad_norm": 0.02877328358590603, |
|
"learning_rate": 0.00018624819833719896, |
|
"loss": 10.3448, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.1003705664372684, |
|
"grad_norm": 0.049800995737314224, |
|
"learning_rate": 0.00018612913248144852, |
|
"loss": 10.3473, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.10079407093700371, |
|
"grad_norm": 0.02784821018576622, |
|
"learning_rate": 0.0001860095917965271, |
|
"loss": 10.3458, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.10121757543673901, |
|
"grad_norm": 0.0490335077047348, |
|
"learning_rate": 0.00018588957694146138, |
|
"loss": 10.3444, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.10164107993647432, |
|
"grad_norm": 0.03331589698791504, |
|
"learning_rate": 0.0001857690885778923, |
|
"loss": 10.3478, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.10206458443620964, |
|
"grad_norm": 0.03565964475274086, |
|
"learning_rate": 0.00018564812737007112, |
|
"loss": 10.3445, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.10248808893594494, |
|
"grad_norm": 0.036929886788129807, |
|
"learning_rate": 0.00018552669398485598, |
|
"loss": 10.3427, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.10291159343568025, |
|
"grad_norm": 0.03922504186630249, |
|
"learning_rate": 0.0001854047890917081, |
|
"loss": 10.3466, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.10333509793541557, |
|
"grad_norm": 0.03552815318107605, |
|
"learning_rate": 0.0001852824133626881, |
|
"loss": 10.3414, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.10375860243515088, |
|
"grad_norm": 0.051186930388212204, |
|
"learning_rate": 0.0001851595674724523, |
|
"loss": 10.3479, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.10418210693488618, |
|
"grad_norm": 0.03887473791837692, |
|
"learning_rate": 0.00018503625209824906, |
|
"loss": 10.3456, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.10460561143462149, |
|
"grad_norm": 0.03032403625547886, |
|
"learning_rate": 0.00018491246791991502, |
|
"loss": 10.3421, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.1050291159343568, |
|
"grad_norm": 0.03820972517132759, |
|
"learning_rate": 0.0001847882156198713, |
|
"loss": 10.3479, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1054526204340921, |
|
"grad_norm": 0.04590925946831703, |
|
"learning_rate": 0.0001846634958831197, |
|
"loss": 10.3442, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.10587612493382742, |
|
"grad_norm": 0.033261772245168686, |
|
"learning_rate": 0.00018453830939723913, |
|
"loss": 10.3457, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.10629962943356273, |
|
"grad_norm": 0.037462469190359116, |
|
"learning_rate": 0.00018441265685238158, |
|
"loss": 10.3421, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.10672313393329805, |
|
"grad_norm": 0.0378030389547348, |
|
"learning_rate": 0.00018428653894126846, |
|
"loss": 10.345, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.10714663843303335, |
|
"grad_norm": 0.030371299013495445, |
|
"learning_rate": 0.00018415995635918676, |
|
"loss": 10.3488, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.10757014293276866, |
|
"grad_norm": 0.028029056265950203, |
|
"learning_rate": 0.00018403290980398512, |
|
"loss": 10.3436, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.10799364743250398, |
|
"grad_norm": 0.03999907523393631, |
|
"learning_rate": 0.00018390539997607014, |
|
"loss": 10.3432, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.10841715193223928, |
|
"grad_norm": 0.046100400388240814, |
|
"learning_rate": 0.00018377742757840244, |
|
"loss": 10.3444, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.10884065643197459, |
|
"grad_norm": 0.03245000168681145, |
|
"learning_rate": 0.0001836489933164927, |
|
"loss": 10.3434, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.1092641609317099, |
|
"grad_norm": 0.028921889141201973, |
|
"learning_rate": 0.000183520097898398, |
|
"loss": 10.3453, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.1096876654314452, |
|
"grad_norm": 0.033345550298690796, |
|
"learning_rate": 0.00018339074203471757, |
|
"loss": 10.3431, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.11011116993118052, |
|
"grad_norm": 0.036411840468645096, |
|
"learning_rate": 0.00018326092643858923, |
|
"loss": 10.3433, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11053467443091583, |
|
"grad_norm": 0.039705585688352585, |
|
"learning_rate": 0.00018313065182568527, |
|
"loss": 10.3447, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.11095817893065114, |
|
"grad_norm": 0.0351327620446682, |
|
"learning_rate": 0.00018299991891420847, |
|
"loss": 10.3451, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.11138168343038644, |
|
"grad_norm": 0.04147129878401756, |
|
"learning_rate": 0.00018286872842488832, |
|
"loss": 10.3408, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.11180518793012176, |
|
"grad_norm": 0.036880653351545334, |
|
"learning_rate": 0.00018273708108097677, |
|
"loss": 10.3433, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.11222869242985707, |
|
"grad_norm": 0.05368657410144806, |
|
"learning_rate": 0.00018260497760824458, |
|
"loss": 10.3491, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.11265219692959237, |
|
"grad_norm": 0.03917551413178444, |
|
"learning_rate": 0.00018247241873497707, |
|
"loss": 10.3421, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.11307570142932769, |
|
"grad_norm": 0.03874152526259422, |
|
"learning_rate": 0.0001823394051919701, |
|
"loss": 10.3435, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.113499205929063, |
|
"grad_norm": 0.04026317596435547, |
|
"learning_rate": 0.0001822059377125263, |
|
"loss": 10.3456, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.11392271042879831, |
|
"grad_norm": 0.06843981891870499, |
|
"learning_rate": 0.00018207201703245062, |
|
"loss": 10.3463, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.11434621492853361, |
|
"grad_norm": 0.05057435482740402, |
|
"learning_rate": 0.00018193764389004674, |
|
"loss": 10.3409, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.11476971942826893, |
|
"grad_norm": 0.03488187864422798, |
|
"learning_rate": 0.0001818028190261126, |
|
"loss": 10.3471, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.11519322392800424, |
|
"grad_norm": 0.05958685651421547, |
|
"learning_rate": 0.0001816675431839365, |
|
"loss": 10.3482, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.11561672842773954, |
|
"grad_norm": 0.03940315917134285, |
|
"learning_rate": 0.000181531817109293, |
|
"loss": 10.3487, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.11604023292747485, |
|
"grad_norm": 0.044273462146520615, |
|
"learning_rate": 0.00018139564155043885, |
|
"loss": 10.3443, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.11646373742721017, |
|
"grad_norm": 0.028659025207161903, |
|
"learning_rate": 0.00018125901725810865, |
|
"loss": 10.346, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.11688724192694547, |
|
"grad_norm": 0.054137472063302994, |
|
"learning_rate": 0.00018112194498551106, |
|
"loss": 10.3445, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.11731074642668078, |
|
"grad_norm": 0.028421467170119286, |
|
"learning_rate": 0.00018098442548832426, |
|
"loss": 10.3408, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.1177342509264161, |
|
"grad_norm": 0.036345433443784714, |
|
"learning_rate": 0.0001808464595246921, |
|
"loss": 10.3425, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.11815775542615141, |
|
"grad_norm": 0.034704405814409256, |
|
"learning_rate": 0.00018070804785521975, |
|
"loss": 10.3469, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.11858125992588671, |
|
"grad_norm": 0.04459795728325844, |
|
"learning_rate": 0.0001805691912429696, |
|
"loss": 10.3433, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11900476442562202, |
|
"grad_norm": 0.03889624401926994, |
|
"learning_rate": 0.0001804298904534569, |
|
"loss": 10.3421, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.11942826892535734, |
|
"grad_norm": 0.03947869688272476, |
|
"learning_rate": 0.0001802901462546457, |
|
"loss": 10.3403, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.11985177342509264, |
|
"grad_norm": 0.055074214935302734, |
|
"learning_rate": 0.00018014995941694468, |
|
"loss": 10.344, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.12027527792482795, |
|
"grad_norm": 0.029949650168418884, |
|
"learning_rate": 0.00018000933071320258, |
|
"loss": 10.3431, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.12069878242456326, |
|
"grad_norm": 0.041310764849185944, |
|
"learning_rate": 0.0001798682609187043, |
|
"loss": 10.3423, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.12112228692429856, |
|
"grad_norm": 0.0433335155248642, |
|
"learning_rate": 0.00017972675081116637, |
|
"loss": 10.3431, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.12154579142403388, |
|
"grad_norm": 0.03031458891928196, |
|
"learning_rate": 0.0001795848011707328, |
|
"loss": 10.3417, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.12196929592376919, |
|
"grad_norm": 0.03494204208254814, |
|
"learning_rate": 0.00017944241277997077, |
|
"loss": 10.345, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.1223928004235045, |
|
"grad_norm": 0.027185741811990738, |
|
"learning_rate": 0.0001792995864238663, |
|
"loss": 10.3429, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.1228163049232398, |
|
"grad_norm": 0.056868940591812134, |
|
"learning_rate": 0.00017915632288981978, |
|
"loss": 10.3404, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12323980942297512, |
|
"grad_norm": 0.03859318792819977, |
|
"learning_rate": 0.0001790126229676419, |
|
"loss": 10.3404, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.12366331392271043, |
|
"grad_norm": 0.029093610122799873, |
|
"learning_rate": 0.0001788684874495491, |
|
"loss": 10.3451, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.12408681842244573, |
|
"grad_norm": 0.0350499302148819, |
|
"learning_rate": 0.00017872391713015924, |
|
"loss": 10.3397, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.12451032292218105, |
|
"grad_norm": 0.03968917950987816, |
|
"learning_rate": 0.00017857891280648728, |
|
"loss": 10.3428, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.12493382742191636, |
|
"grad_norm": 0.038241248577833176, |
|
"learning_rate": 0.00017843347527794081, |
|
"loss": 10.3415, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.12535733192165166, |
|
"grad_norm": 0.03694219887256622, |
|
"learning_rate": 0.00017828760534631565, |
|
"loss": 10.341, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.125780836421387, |
|
"grad_norm": 0.03630373254418373, |
|
"learning_rate": 0.00017814130381579155, |
|
"loss": 10.3388, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.1262043409211223, |
|
"grad_norm": 0.031546298414468765, |
|
"learning_rate": 0.00017799457149292753, |
|
"loss": 10.3418, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.1266278454208576, |
|
"grad_norm": 0.05403247848153114, |
|
"learning_rate": 0.00017784740918665767, |
|
"loss": 10.3451, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.12705134992059292, |
|
"grad_norm": 0.03717590495944023, |
|
"learning_rate": 0.00017769981770828652, |
|
"loss": 10.3419, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12747485442032822, |
|
"grad_norm": 0.05108652263879776, |
|
"learning_rate": 0.0001775517978714846, |
|
"loss": 10.3414, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.12789835892006352, |
|
"grad_norm": 0.04460764676332474, |
|
"learning_rate": 0.000177403350492284, |
|
"loss": 10.3443, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.12832186341979884, |
|
"grad_norm": 0.049197494983673096, |
|
"learning_rate": 0.00017725447638907392, |
|
"loss": 10.3426, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.12874536791953414, |
|
"grad_norm": 0.029134899377822876, |
|
"learning_rate": 0.0001771051763825959, |
|
"loss": 10.3409, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.12916887241926944, |
|
"grad_norm": 0.03543701395392418, |
|
"learning_rate": 0.00017695545129593973, |
|
"loss": 10.3442, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.12959237691900477, |
|
"grad_norm": 0.03356650099158287, |
|
"learning_rate": 0.00017680530195453845, |
|
"loss": 10.3429, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.13001588141874007, |
|
"grad_norm": 0.0437590628862381, |
|
"learning_rate": 0.00017665472918616413, |
|
"loss": 10.3449, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.13043938591847537, |
|
"grad_norm": 0.03688879683613777, |
|
"learning_rate": 0.00017650373382092314, |
|
"loss": 10.3398, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.1308628904182107, |
|
"grad_norm": 0.02486329711973667, |
|
"learning_rate": 0.00017635231669125165, |
|
"loss": 10.3408, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.131286394917946, |
|
"grad_norm": 0.03931661695241928, |
|
"learning_rate": 0.000176200478631911, |
|
"loss": 10.3393, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13170989941768133, |
|
"grad_norm": 0.03328394889831543, |
|
"learning_rate": 0.00017604822047998306, |
|
"loss": 10.3418, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.13213340391741663, |
|
"grad_norm": 0.04236508905887604, |
|
"learning_rate": 0.0001758955430748658, |
|
"loss": 10.3432, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.13255690841715193, |
|
"grad_norm": 0.026039429008960724, |
|
"learning_rate": 0.0001757424472582684, |
|
"loss": 10.3464, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.13298041291688725, |
|
"grad_norm": 0.0246927160769701, |
|
"learning_rate": 0.00017558893387420682, |
|
"loss": 10.3451, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.13340391741662255, |
|
"grad_norm": 0.030340131372213364, |
|
"learning_rate": 0.00017543500376899902, |
|
"loss": 10.3401, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.13382742191635785, |
|
"grad_norm": 0.04260968044400215, |
|
"learning_rate": 0.00017528065779126033, |
|
"loss": 10.3414, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.13425092641609318, |
|
"grad_norm": 0.03421700373291969, |
|
"learning_rate": 0.00017512589679189887, |
|
"loss": 10.3402, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.13467443091582848, |
|
"grad_norm": 0.03428565710783005, |
|
"learning_rate": 0.0001749707216241106, |
|
"loss": 10.3406, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.13509793541556378, |
|
"grad_norm": 0.042442288249731064, |
|
"learning_rate": 0.000174815133143375, |
|
"loss": 10.3381, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.1355214399152991, |
|
"grad_norm": 0.0397978350520134, |
|
"learning_rate": 0.00017465913220744998, |
|
"loss": 10.3427, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1359449444150344, |
|
"grad_norm": 0.03605269640684128, |
|
"learning_rate": 0.00017450271967636737, |
|
"loss": 10.3397, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.1363684489147697, |
|
"grad_norm": 0.034129634499549866, |
|
"learning_rate": 0.00017434589641242813, |
|
"loss": 10.3463, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.13679195341450504, |
|
"grad_norm": 0.03590450435876846, |
|
"learning_rate": 0.0001741886632801976, |
|
"loss": 10.3416, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.13721545791424034, |
|
"grad_norm": 0.040069352835416794, |
|
"learning_rate": 0.0001740310211465006, |
|
"loss": 10.3427, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.13763896241397564, |
|
"grad_norm": 0.03840317204594612, |
|
"learning_rate": 0.00017387297088041693, |
|
"loss": 10.3431, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.13806246691371096, |
|
"grad_norm": 0.04085763916373253, |
|
"learning_rate": 0.0001737145133532764, |
|
"loss": 10.3379, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.13848597141344626, |
|
"grad_norm": 0.03601207211613655, |
|
"learning_rate": 0.0001735556494386539, |
|
"loss": 10.3407, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.1389094759131816, |
|
"grad_norm": 0.03058718331158161, |
|
"learning_rate": 0.00017339638001236492, |
|
"loss": 10.3411, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.1393329804129169, |
|
"grad_norm": 0.03896321728825569, |
|
"learning_rate": 0.0001732367059524604, |
|
"loss": 10.3426, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.1397564849126522, |
|
"grad_norm": 0.040502067655324936, |
|
"learning_rate": 0.0001730766281392221, |
|
"loss": 10.3411, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14017998941238752, |
|
"grad_norm": 0.032813332974910736, |
|
"learning_rate": 0.0001729161474551576, |
|
"loss": 10.343, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.14060349391212282, |
|
"grad_norm": 0.032831039279699326, |
|
"learning_rate": 0.00017275526478499555, |
|
"loss": 10.3403, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.14102699841185812, |
|
"grad_norm": 0.033066242933273315, |
|
"learning_rate": 0.00017259398101568076, |
|
"loss": 10.3439, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.14145050291159345, |
|
"grad_norm": 0.032812707126140594, |
|
"learning_rate": 0.00017243229703636922, |
|
"loss": 10.3396, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.14187400741132875, |
|
"grad_norm": 0.03849957883358002, |
|
"learning_rate": 0.0001722702137384234, |
|
"loss": 10.3437, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.14229751191106405, |
|
"grad_norm": 0.047831226140260696, |
|
"learning_rate": 0.00017210773201540707, |
|
"loss": 10.3375, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.14272101641079937, |
|
"grad_norm": 0.04042219743132591, |
|
"learning_rate": 0.0001719448527630806, |
|
"loss": 10.3405, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.14272101641079937, |
|
"eval_loss": 10.340270042419434, |
|
"eval_runtime": 3.4931, |
|
"eval_samples_per_second": 284.85, |
|
"eval_steps_per_second": 142.568, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.14314452091053467, |
|
"grad_norm": 0.030297674238681793, |
|
"learning_rate": 0.00017178157687939592, |
|
"loss": 10.3392, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.14356802541026997, |
|
"grad_norm": 0.030716104432940483, |
|
"learning_rate": 0.00017161790526449156, |
|
"loss": 10.3387, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.1439915299100053, |
|
"grad_norm": 0.034860242158174515, |
|
"learning_rate": 0.00017145383882068778, |
|
"loss": 10.3383, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1444150344097406, |
|
"grad_norm": 0.04767249897122383, |
|
"learning_rate": 0.00017128937845248146, |
|
"loss": 10.3434, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.1448385389094759, |
|
"grad_norm": 0.02438390627503395, |
|
"learning_rate": 0.00017112452506654117, |
|
"loss": 10.3438, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.14526204340921123, |
|
"grad_norm": 0.04478878155350685, |
|
"learning_rate": 0.00017095927957170228, |
|
"loss": 10.3411, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.14568554790894653, |
|
"grad_norm": 0.03832190856337547, |
|
"learning_rate": 0.00017079364287896174, |
|
"loss": 10.3427, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.14610905240868186, |
|
"grad_norm": 0.03669346123933792, |
|
"learning_rate": 0.00017062761590147323, |
|
"loss": 10.3416, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.14653255690841716, |
|
"grad_norm": 0.03234705701470375, |
|
"learning_rate": 0.00017046119955454206, |
|
"loss": 10.3382, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.14695606140815246, |
|
"grad_norm": 0.028199292719364166, |
|
"learning_rate": 0.00017029439475562015, |
|
"loss": 10.3395, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.14737956590788778, |
|
"grad_norm": 0.03456572815775871, |
|
"learning_rate": 0.0001701272024243009, |
|
"loss": 10.3412, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.14780307040762308, |
|
"grad_norm": 0.041843071579933167, |
|
"learning_rate": 0.00016995962348231424, |
|
"loss": 10.3384, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.14822657490735838, |
|
"grad_norm": 0.0353543683886528, |
|
"learning_rate": 0.0001697916588535214, |
|
"loss": 10.3402, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1486500794070937, |
|
"grad_norm": 0.03074280545115471, |
|
"learning_rate": 0.00016962330946391, |
|
"loss": 10.3408, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.149073583906829, |
|
"grad_norm": 0.029107527807354927, |
|
"learning_rate": 0.00016945457624158871, |
|
"loss": 10.3404, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.1494970884065643, |
|
"grad_norm": 0.028990836814045906, |
|
"learning_rate": 0.00016928546011678238, |
|
"loss": 10.3366, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.14992059290629964, |
|
"grad_norm": 0.026332266628742218, |
|
"learning_rate": 0.00016911596202182677, |
|
"loss": 10.3423, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.15034409740603494, |
|
"grad_norm": 0.044704243540763855, |
|
"learning_rate": 0.00016894608289116342, |
|
"loss": 10.3407, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.15076760190577024, |
|
"grad_norm": 0.036491744220256805, |
|
"learning_rate": 0.00016877582366133455, |
|
"loss": 10.3393, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.15119110640550557, |
|
"grad_norm": 0.02925538271665573, |
|
"learning_rate": 0.0001686051852709778, |
|
"loss": 10.3394, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.15161461090524087, |
|
"grad_norm": 0.03139074891805649, |
|
"learning_rate": 0.00016843416866082117, |
|
"loss": 10.3381, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.15203811540497617, |
|
"grad_norm": 0.03710184246301651, |
|
"learning_rate": 0.00016826277477367775, |
|
"loss": 10.3378, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.1524616199047115, |
|
"grad_norm": 0.0361437126994133, |
|
"learning_rate": 0.0001680910045544406, |
|
"loss": 10.3408, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1528851244044468, |
|
"grad_norm": 0.04383867606520653, |
|
"learning_rate": 0.0001679188589500775, |
|
"loss": 10.3415, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.15330862890418212, |
|
"grad_norm": 0.03228599205613136, |
|
"learning_rate": 0.0001677463389096256, |
|
"loss": 10.3413, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.15373213340391742, |
|
"grad_norm": 0.03311069682240486, |
|
"learning_rate": 0.00016757344538418653, |
|
"loss": 10.3409, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.15415563790365272, |
|
"grad_norm": 0.037153564393520355, |
|
"learning_rate": 0.00016740017932692075, |
|
"loss": 10.338, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.15457914240338805, |
|
"grad_norm": 0.03567847982048988, |
|
"learning_rate": 0.00016722654169304253, |
|
"loss": 10.3395, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.15500264690312335, |
|
"grad_norm": 0.026938440278172493, |
|
"learning_rate": 0.0001670525334398147, |
|
"loss": 10.3397, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.15542615140285865, |
|
"grad_norm": 0.02322826161980629, |
|
"learning_rate": 0.00016687815552654327, |
|
"loss": 10.3386, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.15584965590259398, |
|
"grad_norm": 0.03586160019040108, |
|
"learning_rate": 0.00016670340891457216, |
|
"loss": 10.3396, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.15627316040232928, |
|
"grad_norm": 0.03536440059542656, |
|
"learning_rate": 0.00016652829456727797, |
|
"loss": 10.3412, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.15669666490206458, |
|
"grad_norm": 0.025009091943502426, |
|
"learning_rate": 0.00016635281345006461, |
|
"loss": 10.34, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1571201694017999, |
|
"grad_norm": 0.02612980827689171, |
|
"learning_rate": 0.00016617696653035795, |
|
"loss": 10.3401, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.1575436739015352, |
|
"grad_norm": 0.04117359593510628, |
|
"learning_rate": 0.00016600075477760058, |
|
"loss": 10.3393, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.1579671784012705, |
|
"grad_norm": 0.0326978899538517, |
|
"learning_rate": 0.00016582417916324635, |
|
"loss": 10.3384, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.15839068290100583, |
|
"grad_norm": 0.044377487152814865, |
|
"learning_rate": 0.00016564724066075515, |
|
"loss": 10.3382, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.15881418740074113, |
|
"grad_norm": 0.050321295857429504, |
|
"learning_rate": 0.00016546994024558743, |
|
"loss": 10.3387, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.15923769190047643, |
|
"grad_norm": 0.022547969594597816, |
|
"learning_rate": 0.00016529227889519886, |
|
"loss": 10.3385, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.15966119640021176, |
|
"grad_norm": 0.034384775906801224, |
|
"learning_rate": 0.00016511425758903493, |
|
"loss": 10.3391, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.16008470089994706, |
|
"grad_norm": 0.02677147649228573, |
|
"learning_rate": 0.00016493587730852558, |
|
"loss": 10.3399, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.16050820539968239, |
|
"grad_norm": 0.03600003570318222, |
|
"learning_rate": 0.00016475713903707978, |
|
"loss": 10.3418, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.16093170989941769, |
|
"grad_norm": 0.032675545662641525, |
|
"learning_rate": 0.00016457804376008008, |
|
"loss": 10.3388, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16135521439915299, |
|
"grad_norm": 0.03568057715892792, |
|
"learning_rate": 0.00016439859246487724, |
|
"loss": 10.3362, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.1617787188988883, |
|
"grad_norm": 0.035958852618932724, |
|
"learning_rate": 0.00016421878614078468, |
|
"loss": 10.3396, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.1622022233986236, |
|
"grad_norm": 0.03066675178706646, |
|
"learning_rate": 0.00016403862577907315, |
|
"loss": 10.3426, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.1626257278983589, |
|
"grad_norm": 0.042271457612514496, |
|
"learning_rate": 0.0001638581123729652, |
|
"loss": 10.3404, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.16304923239809424, |
|
"grad_norm": 0.034284938126802444, |
|
"learning_rate": 0.00016367724691762967, |
|
"loss": 10.3381, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.16347273689782954, |
|
"grad_norm": 0.023705342784523964, |
|
"learning_rate": 0.00016349603041017626, |
|
"loss": 10.3375, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.16389624139756484, |
|
"grad_norm": 0.031792912632226944, |
|
"learning_rate": 0.00016331446384965003, |
|
"loss": 10.3383, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.16431974589730017, |
|
"grad_norm": 0.035305608063936234, |
|
"learning_rate": 0.0001631325482370259, |
|
"loss": 10.3434, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.16474325039703547, |
|
"grad_norm": 0.03486499562859535, |
|
"learning_rate": 0.00016295028457520306, |
|
"loss": 10.3428, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.16516675489677077, |
|
"grad_norm": 0.03409821167588234, |
|
"learning_rate": 0.00016276767386899955, |
|
"loss": 10.3386, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1655902593965061, |
|
"grad_norm": 0.02966834418475628, |
|
"learning_rate": 0.0001625847171251466, |
|
"loss": 10.3393, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.1660137638962414, |
|
"grad_norm": 0.02835707552731037, |
|
"learning_rate": 0.00016240141535228323, |
|
"loss": 10.3388, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.1664372683959767, |
|
"grad_norm": 0.03911609947681427, |
|
"learning_rate": 0.00016221776956095046, |
|
"loss": 10.3423, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.16686077289571202, |
|
"grad_norm": 0.02803829312324524, |
|
"learning_rate": 0.00016203378076358598, |
|
"loss": 10.3427, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.16728427739544732, |
|
"grad_norm": 0.03135819733142853, |
|
"learning_rate": 0.00016184944997451854, |
|
"loss": 10.3364, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.16770778189518262, |
|
"grad_norm": 0.03102540783584118, |
|
"learning_rate": 0.00016166477820996216, |
|
"loss": 10.3403, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.16813128639491795, |
|
"grad_norm": 0.026423562318086624, |
|
"learning_rate": 0.0001614797664880107, |
|
"loss": 10.3372, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.16855479089465325, |
|
"grad_norm": 0.03439650684595108, |
|
"learning_rate": 0.00016129441582863217, |
|
"loss": 10.342, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.16897829539438858, |
|
"grad_norm": 0.03104579448699951, |
|
"learning_rate": 0.00016110872725366316, |
|
"loss": 10.3377, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.16940179989412388, |
|
"grad_norm": 0.03529537469148636, |
|
"learning_rate": 0.0001609227017868033, |
|
"loss": 10.3395, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16982530439385918, |
|
"grad_norm": 0.03002871572971344, |
|
"learning_rate": 0.00016073634045360932, |
|
"loss": 10.3411, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.1702488088935945, |
|
"grad_norm": 0.03288958594202995, |
|
"learning_rate": 0.00016054964428148963, |
|
"loss": 10.3367, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.1706723133933298, |
|
"grad_norm": 0.026648705825209618, |
|
"learning_rate": 0.00016036261429969867, |
|
"loss": 10.3367, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.1710958178930651, |
|
"grad_norm": 0.035582203418016434, |
|
"learning_rate": 0.00016017525153933114, |
|
"loss": 10.3375, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.17151932239280043, |
|
"grad_norm": 0.024190323427319527, |
|
"learning_rate": 0.00015998755703331634, |
|
"loss": 10.3416, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.17194282689253573, |
|
"grad_norm": 0.03089403547346592, |
|
"learning_rate": 0.00015979953181641246, |
|
"loss": 10.3418, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.17236633139227103, |
|
"grad_norm": 0.026094770058989525, |
|
"learning_rate": 0.00015961117692520088, |
|
"loss": 10.3357, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.17278983589200636, |
|
"grad_norm": 0.04286188259720802, |
|
"learning_rate": 0.00015942249339808058, |
|
"loss": 10.3408, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.17321334039174166, |
|
"grad_norm": 0.0313500352203846, |
|
"learning_rate": 0.00015923348227526218, |
|
"loss": 10.3354, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.17363684489147696, |
|
"grad_norm": 0.03544219583272934, |
|
"learning_rate": 0.00015904414459876238, |
|
"loss": 10.3367, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1740603493912123, |
|
"grad_norm": 0.03017052263021469, |
|
"learning_rate": 0.00015885448141239822, |
|
"loss": 10.3418, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.1744838538909476, |
|
"grad_norm": 0.030451800674200058, |
|
"learning_rate": 0.00015866449376178117, |
|
"loss": 10.3386, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.1749073583906829, |
|
"grad_norm": 0.035226162523031235, |
|
"learning_rate": 0.00015847418269431153, |
|
"loss": 10.3412, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.17533086289041822, |
|
"grad_norm": 0.02857392653822899, |
|
"learning_rate": 0.00015828354925917262, |
|
"loss": 10.3414, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.17575436739015352, |
|
"grad_norm": 0.050622567534446716, |
|
"learning_rate": 0.00015809259450732494, |
|
"loss": 10.3392, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.17617787188988884, |
|
"grad_norm": 0.0338461808860302, |
|
"learning_rate": 0.00015790131949150035, |
|
"loss": 10.3419, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.17660137638962414, |
|
"grad_norm": 0.027923308312892914, |
|
"learning_rate": 0.00015770972526619646, |
|
"loss": 10.3385, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.17702488088935944, |
|
"grad_norm": 0.03212830424308777, |
|
"learning_rate": 0.0001575178128876705, |
|
"loss": 10.339, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.17744838538909477, |
|
"grad_norm": 0.020661218091845512, |
|
"learning_rate": 0.00015732558341393385, |
|
"loss": 10.338, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.17787188988883007, |
|
"grad_norm": 0.02785920538008213, |
|
"learning_rate": 0.00015713303790474594, |
|
"loss": 10.3392, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.17829539438856537, |
|
"grad_norm": 0.018963869661092758, |
|
"learning_rate": 0.00015694017742160846, |
|
"loss": 10.3381, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.1787188988883007, |
|
"grad_norm": 0.02660539373755455, |
|
"learning_rate": 0.0001567470030277597, |
|
"loss": 10.3389, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.179142403388036, |
|
"grad_norm": 0.03342144191265106, |
|
"learning_rate": 0.00015655351578816834, |
|
"loss": 10.3395, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.1795659078877713, |
|
"grad_norm": 0.03541478142142296, |
|
"learning_rate": 0.00015635971676952797, |
|
"loss": 10.3356, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.17998941238750663, |
|
"grad_norm": 0.04339861124753952, |
|
"learning_rate": 0.00015616560704025088, |
|
"loss": 10.34, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.18041291688724193, |
|
"grad_norm": 0.030052557587623596, |
|
"learning_rate": 0.00015597118767046232, |
|
"loss": 10.3366, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.18083642138697723, |
|
"grad_norm": 0.03362065181136131, |
|
"learning_rate": 0.00015577645973199465, |
|
"loss": 10.3446, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.18125992588671255, |
|
"grad_norm": 0.033407680690288544, |
|
"learning_rate": 0.00015558142429838133, |
|
"loss": 10.3382, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.18168343038644785, |
|
"grad_norm": 0.03306809812784195, |
|
"learning_rate": 0.00015538608244485103, |
|
"loss": 10.3391, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.18210693488618315, |
|
"grad_norm": 0.035972122102975845, |
|
"learning_rate": 0.0001551904352483217, |
|
"loss": 10.3378, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.18253043938591848, |
|
"grad_norm": 0.02942793443799019, |
|
"learning_rate": 0.0001549944837873947, |
|
"loss": 10.341, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.18295394388565378, |
|
"grad_norm": 0.0311295036226511, |
|
"learning_rate": 0.00015479822914234875, |
|
"loss": 10.3427, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.1833774483853891, |
|
"grad_norm": 0.03349452093243599, |
|
"learning_rate": 0.00015460167239513396, |
|
"loss": 10.3335, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.1838009528851244, |
|
"grad_norm": 0.024683522060513496, |
|
"learning_rate": 0.00015440481462936613, |
|
"loss": 10.3403, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.1842244573848597, |
|
"grad_norm": 0.02533009834587574, |
|
"learning_rate": 0.00015420765693032035, |
|
"loss": 10.3352, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.18464796188459504, |
|
"grad_norm": 0.02682666666805744, |
|
"learning_rate": 0.0001540102003849253, |
|
"loss": 10.3351, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.18507146638433034, |
|
"grad_norm": 0.026133093982934952, |
|
"learning_rate": 0.0001538124460817573, |
|
"loss": 10.3377, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.18549497088406564, |
|
"grad_norm": 0.04049040377140045, |
|
"learning_rate": 0.00015361439511103414, |
|
"loss": 10.3402, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.18591847538380096, |
|
"grad_norm": 0.02733178623020649, |
|
"learning_rate": 0.00015341604856460904, |
|
"loss": 10.3352, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.18634197988353626, |
|
"grad_norm": 0.02330494113266468, |
|
"learning_rate": 0.0001532174075359649, |
|
"loss": 10.341, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.18676548438327156, |
|
"grad_norm": 0.03259949013590813, |
|
"learning_rate": 0.00015301847312020796, |
|
"loss": 10.3403, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.1871889888830069, |
|
"grad_norm": 0.05194835364818573, |
|
"learning_rate": 0.000152819246414062, |
|
"loss": 10.3413, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.1876124933827422, |
|
"grad_norm": 0.0325242318212986, |
|
"learning_rate": 0.0001526197285158621, |
|
"loss": 10.3396, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.1880359978824775, |
|
"grad_norm": 0.02710815891623497, |
|
"learning_rate": 0.00015241992052554876, |
|
"loss": 10.34, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.18845950238221282, |
|
"grad_norm": 0.024676240980625153, |
|
"learning_rate": 0.0001522198235446617, |
|
"loss": 10.3362, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.18888300688194812, |
|
"grad_norm": 0.02788936160504818, |
|
"learning_rate": 0.0001520194386763339, |
|
"loss": 10.3376, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.18930651138168342, |
|
"grad_norm": 0.03856251761317253, |
|
"learning_rate": 0.00015181876702528537, |
|
"loss": 10.3352, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.18973001588141875, |
|
"grad_norm": 0.03264036402106285, |
|
"learning_rate": 0.00015161780969781728, |
|
"loss": 10.338, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.19015352038115405, |
|
"grad_norm": 0.027694035321474075, |
|
"learning_rate": 0.00015141656780180558, |
|
"loss": 10.3354, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.19057702488088937, |
|
"grad_norm": 0.030413135886192322, |
|
"learning_rate": 0.00015121504244669515, |
|
"loss": 10.3383, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19100052938062467, |
|
"grad_norm": 0.03150556609034538, |
|
"learning_rate": 0.0001510132347434936, |
|
"loss": 10.3389, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.19142403388035997, |
|
"grad_norm": 0.029888266697525978, |
|
"learning_rate": 0.000150811145804765, |
|
"loss": 10.3394, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.1918475383800953, |
|
"grad_norm": 0.03171524032950401, |
|
"learning_rate": 0.000150608776744624, |
|
"loss": 10.334, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.1922710428798306, |
|
"grad_norm": 0.032492250204086304, |
|
"learning_rate": 0.00015040612867872947, |
|
"loss": 10.3366, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.1926945473795659, |
|
"grad_norm": 0.030303264036774635, |
|
"learning_rate": 0.00015020320272427843, |
|
"loss": 10.3366, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.19311805187930123, |
|
"grad_norm": 0.03860599547624588, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 10.3379, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.19354155637903653, |
|
"grad_norm": 0.03272419795393944, |
|
"learning_rate": 0.00014979652162614904, |
|
"loss": 10.3352, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.19396506087877183, |
|
"grad_norm": 0.038201820105314255, |
|
"learning_rate": 0.00014959276872450006, |
|
"loss": 10.3362, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.19438856537850716, |
|
"grad_norm": 0.025923024863004684, |
|
"learning_rate": 0.00014938874241834108, |
|
"loss": 10.3403, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.19481206987824246, |
|
"grad_norm": 0.03889621049165726, |
|
"learning_rate": 0.00014918444383246737, |
|
"loss": 10.3385, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.19523557437797776, |
|
"grad_norm": 0.031947895884513855, |
|
"learning_rate": 0.00014897987409317532, |
|
"loss": 10.3385, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.19565907887771308, |
|
"grad_norm": 0.03579488396644592, |
|
"learning_rate": 0.00014877503432825614, |
|
"loss": 10.3339, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.19608258337744838, |
|
"grad_norm": 0.033163949847221375, |
|
"learning_rate": 0.00014856992566698965, |
|
"loss": 10.3402, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.19650608787718368, |
|
"grad_norm": 0.03128167986869812, |
|
"learning_rate": 0.00014836454924013824, |
|
"loss": 10.3408, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.196929592376919, |
|
"grad_norm": 0.04108097031712532, |
|
"learning_rate": 0.00014815890617994034, |
|
"loss": 10.3394, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.1973530968766543, |
|
"grad_norm": 0.04260754585266113, |
|
"learning_rate": 0.0001479529976201044, |
|
"loss": 10.3428, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.19777660137638964, |
|
"grad_norm": 0.027531959116458893, |
|
"learning_rate": 0.00014774682469580248, |
|
"loss": 10.3395, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.19820010587612494, |
|
"grad_norm": 0.028333760797977448, |
|
"learning_rate": 0.00014754038854366424, |
|
"loss": 10.3374, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.19862361037586024, |
|
"grad_norm": 0.029396837577223778, |
|
"learning_rate": 0.00014733369030177042, |
|
"loss": 10.3363, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.19904711487559557, |
|
"grad_norm": 0.029380813241004944, |
|
"learning_rate": 0.00014712673110964665, |
|
"loss": 10.3372, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.19947061937533087, |
|
"grad_norm": 0.02283712849020958, |
|
"learning_rate": 0.0001469195121082571, |
|
"loss": 10.3408, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.19989412387506617, |
|
"grad_norm": 0.025367606431245804, |
|
"learning_rate": 0.00014671203443999845, |
|
"loss": 10.3383, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.2003176283748015, |
|
"grad_norm": 0.034685924649238586, |
|
"learning_rate": 0.0001465042992486933, |
|
"loss": 10.3373, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.2007411328745368, |
|
"grad_norm": 0.0398382693529129, |
|
"learning_rate": 0.00014629630767958396, |
|
"loss": 10.3374, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.2011646373742721, |
|
"grad_norm": 0.03815117105841637, |
|
"learning_rate": 0.00014608806087932619, |
|
"loss": 10.3382, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.20158814187400742, |
|
"grad_norm": 0.028847893700003624, |
|
"learning_rate": 0.0001458795599959828, |
|
"loss": 10.3355, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.20201164637374272, |
|
"grad_norm": 0.033290982246398926, |
|
"learning_rate": 0.00014567080617901735, |
|
"loss": 10.3353, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.20243515087347802, |
|
"grad_norm": 0.03120148368179798, |
|
"learning_rate": 0.00014546180057928792, |
|
"loss": 10.3365, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.20285865537321335, |
|
"grad_norm": 0.03227855637669563, |
|
"learning_rate": 0.00014525254434904055, |
|
"loss": 10.3373, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.20328215987294865, |
|
"grad_norm": 0.02253713831305504, |
|
"learning_rate": 0.00014504303864190307, |
|
"loss": 10.3379, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.20370566437268395, |
|
"grad_norm": 0.027942582964897156, |
|
"learning_rate": 0.00014483328461287862, |
|
"loss": 10.3387, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.20412916887241928, |
|
"grad_norm": 0.028897034004330635, |
|
"learning_rate": 0.0001446232834183394, |
|
"loss": 10.3406, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.20455267337215458, |
|
"grad_norm": 0.03516876697540283, |
|
"learning_rate": 0.00014441303621602017, |
|
"loss": 10.3317, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.20497617787188988, |
|
"grad_norm": 0.030100248754024506, |
|
"learning_rate": 0.00014420254416501197, |
|
"loss": 10.3365, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.2053996823716252, |
|
"grad_norm": 0.020048066973686218, |
|
"learning_rate": 0.00014399180842575575, |
|
"loss": 10.3426, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2058231868713605, |
|
"grad_norm": 0.031375959515571594, |
|
"learning_rate": 0.00014378083016003572, |
|
"loss": 10.3376, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.20624669137109583, |
|
"grad_norm": 0.034831635653972626, |
|
"learning_rate": 0.00014356961053097332, |
|
"loss": 10.3354, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.20667019587083113, |
|
"grad_norm": 0.030198190361261368, |
|
"learning_rate": 0.00014335815070302054, |
|
"loss": 10.3361, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.20709370037056643, |
|
"grad_norm": 0.031040605157613754, |
|
"learning_rate": 0.00014314645184195364, |
|
"loss": 10.3412, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.20751720487030176, |
|
"grad_norm": 0.05391615629196167, |
|
"learning_rate": 0.00014293451511486658, |
|
"loss": 10.3402, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.20794070937003706, |
|
"grad_norm": 0.030534790828824043, |
|
"learning_rate": 0.00014272234169016474, |
|
"loss": 10.3402, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.20836421386977236, |
|
"grad_norm": 0.03578052297234535, |
|
"learning_rate": 0.00014250993273755844, |
|
"loss": 10.3348, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.20878771836950769, |
|
"grad_norm": 0.03920895233750343, |
|
"learning_rate": 0.00014229728942805636, |
|
"loss": 10.3417, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.20921122286924299, |
|
"grad_norm": 0.030715953558683395, |
|
"learning_rate": 0.00014208441293395925, |
|
"loss": 10.3379, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.20963472736897829, |
|
"grad_norm": 0.036160390824079514, |
|
"learning_rate": 0.00014187130442885345, |
|
"loss": 10.3368, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2100582318687136, |
|
"grad_norm": 0.032142747193574905, |
|
"learning_rate": 0.0001416579650876043, |
|
"loss": 10.3404, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.2104817363684489, |
|
"grad_norm": 0.02567223645746708, |
|
"learning_rate": 0.00014144439608634976, |
|
"loss": 10.3387, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.2109052408681842, |
|
"grad_norm": 0.03470413014292717, |
|
"learning_rate": 0.0001412305986024939, |
|
"loss": 10.3419, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.21132874536791954, |
|
"grad_norm": 0.036063164472579956, |
|
"learning_rate": 0.00014101657381470045, |
|
"loss": 10.3335, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.21175224986765484, |
|
"grad_norm": 0.02859325334429741, |
|
"learning_rate": 0.00014080232290288622, |
|
"loss": 10.3385, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21217575436739014, |
|
"grad_norm": 0.03691897913813591, |
|
"learning_rate": 0.00014058784704821465, |
|
"loss": 10.3371, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.21259925886712547, |
|
"grad_norm": 0.02370496280491352, |
|
"learning_rate": 0.0001403731474330893, |
|
"loss": 10.3373, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.21302276336686077, |
|
"grad_norm": 0.02717514894902706, |
|
"learning_rate": 0.0001401582252411473, |
|
"loss": 10.3362, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.2134462678665961, |
|
"grad_norm": 0.027684593573212624, |
|
"learning_rate": 0.00013994308165725288, |
|
"loss": 10.3407, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.2138697723663314, |
|
"grad_norm": 0.027036601677536964, |
|
"learning_rate": 0.00013972771786749074, |
|
"loss": 10.3387, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2142932768660667, |
|
"grad_norm": 0.03559018298983574, |
|
"learning_rate": 0.00013951213505915969, |
|
"loss": 10.3398, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.21471678136580202, |
|
"grad_norm": 0.04133779555559158, |
|
"learning_rate": 0.0001392963344207658, |
|
"loss": 10.3355, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.21514028586553732, |
|
"grad_norm": 0.03785044327378273, |
|
"learning_rate": 0.0001390803171420162, |
|
"loss": 10.3344, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.21556379036527262, |
|
"grad_norm": 0.023411711677908897, |
|
"learning_rate": 0.00013886408441381233, |
|
"loss": 10.3362, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.21598729486500795, |
|
"grad_norm": 0.0443277508020401, |
|
"learning_rate": 0.00013864763742824334, |
|
"loss": 10.339, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.21641079936474325, |
|
"grad_norm": 0.036806512624025345, |
|
"learning_rate": 0.0001384309773785796, |
|
"loss": 10.338, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.21683430386447855, |
|
"grad_norm": 0.02885564975440502, |
|
"learning_rate": 0.00013821410545926613, |
|
"loss": 10.3333, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.21725780836421388, |
|
"grad_norm": 0.03067517653107643, |
|
"learning_rate": 0.00013799702286591598, |
|
"loss": 10.3356, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.21768131286394918, |
|
"grad_norm": 0.03321646526455879, |
|
"learning_rate": 0.00013777973079530362, |
|
"loss": 10.3388, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.21810481736368448, |
|
"grad_norm": 0.03147870674729347, |
|
"learning_rate": 0.00013756223044535833, |
|
"loss": 10.3391, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.2185283218634198, |
|
"grad_norm": 0.02573389932513237, |
|
"learning_rate": 0.00013734452301515776, |
|
"loss": 10.3377, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.2189518263631551, |
|
"grad_norm": 0.026358777657151222, |
|
"learning_rate": 0.00013712660970492107, |
|
"loss": 10.3371, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.2193753308628904, |
|
"grad_norm": 0.02714933454990387, |
|
"learning_rate": 0.00013690849171600245, |
|
"loss": 10.3378, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.21979883536262573, |
|
"grad_norm": 0.02859034389257431, |
|
"learning_rate": 0.00013669017025088456, |
|
"loss": 10.3365, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.22022233986236103, |
|
"grad_norm": 0.044585928320884705, |
|
"learning_rate": 0.00013647164651317176, |
|
"loss": 10.3362, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22064584436209636, |
|
"grad_norm": 0.053858619183301926, |
|
"learning_rate": 0.00013625292170758356, |
|
"loss": 10.3373, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.22106934886183166, |
|
"grad_norm": 0.03403494879603386, |
|
"learning_rate": 0.00013603399703994787, |
|
"loss": 10.3309, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.22149285336156696, |
|
"grad_norm": 0.028249001130461693, |
|
"learning_rate": 0.00013581487371719457, |
|
"loss": 10.3379, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.2219163578613023, |
|
"grad_norm": 0.028280075639486313, |
|
"learning_rate": 0.00013559555294734868, |
|
"loss": 10.3388, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.2223398623610376, |
|
"grad_norm": 0.04397103190422058, |
|
"learning_rate": 0.00013537603593952367, |
|
"loss": 10.3335, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2227633668607729, |
|
"grad_norm": 0.035089749842882156, |
|
"learning_rate": 0.000135156323903915, |
|
"loss": 10.34, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.22318687136050822, |
|
"grad_norm": 0.03598684072494507, |
|
"learning_rate": 0.00013493641805179319, |
|
"loss": 10.3348, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.22361037586024352, |
|
"grad_norm": 0.03583105653524399, |
|
"learning_rate": 0.0001347163195954973, |
|
"loss": 10.3383, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.22403388035997882, |
|
"grad_norm": 0.03622949495911598, |
|
"learning_rate": 0.0001344960297484283, |
|
"loss": 10.3378, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.22445738485971414, |
|
"grad_norm": 0.027924714609980583, |
|
"learning_rate": 0.00013427554972504226, |
|
"loss": 10.3372, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.22488088935944944, |
|
"grad_norm": 0.047317858785390854, |
|
"learning_rate": 0.00013405488074084358, |
|
"loss": 10.3375, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.22530439385918474, |
|
"grad_norm": 0.031993038952350616, |
|
"learning_rate": 0.0001338340240123785, |
|
"loss": 10.3371, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.22572789835892007, |
|
"grad_norm": 0.03276574984192848, |
|
"learning_rate": 0.00013361298075722833, |
|
"loss": 10.3376, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.22615140285865537, |
|
"grad_norm": 0.024694286286830902, |
|
"learning_rate": 0.00013339175219400257, |
|
"loss": 10.34, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.22657490735839067, |
|
"grad_norm": 0.031688570976257324, |
|
"learning_rate": 0.00013317033954233246, |
|
"loss": 10.3411, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.226998411858126, |
|
"grad_norm": 0.03652056306600571, |
|
"learning_rate": 0.00013294874402286402, |
|
"loss": 10.3329, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.2274219163578613, |
|
"grad_norm": 0.03224468603730202, |
|
"learning_rate": 0.0001327269668572515, |
|
"loss": 10.3386, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.22784542085759663, |
|
"grad_norm": 0.034342508763074875, |
|
"learning_rate": 0.00013250500926815045, |
|
"loss": 10.3371, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.22826892535733193, |
|
"grad_norm": 0.030163973569869995, |
|
"learning_rate": 0.0001322828724792112, |
|
"loss": 10.336, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.22869242985706723, |
|
"grad_norm": 0.030578266829252243, |
|
"learning_rate": 0.00013206055771507197, |
|
"loss": 10.3391, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.22911593435680255, |
|
"grad_norm": 0.035477470606565475, |
|
"learning_rate": 0.00013183806620135216, |
|
"loss": 10.3384, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.22953943885653785, |
|
"grad_norm": 0.026009559631347656, |
|
"learning_rate": 0.00013161539916464558, |
|
"loss": 10.3369, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.22996294335627315, |
|
"grad_norm": 0.033704426139593124, |
|
"learning_rate": 0.00013139255783251367, |
|
"loss": 10.3369, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.23038644785600848, |
|
"grad_norm": 0.03469805791974068, |
|
"learning_rate": 0.00013116954343347882, |
|
"loss": 10.3359, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.23080995235574378, |
|
"grad_norm": 0.029503265395760536, |
|
"learning_rate": 0.0001309463571970175, |
|
"loss": 10.3337, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.23123345685547908, |
|
"grad_norm": 0.027178343385457993, |
|
"learning_rate": 0.0001307230003535535, |
|
"loss": 10.3383, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.2316569613552144, |
|
"grad_norm": 0.026484569534659386, |
|
"learning_rate": 0.00013049947413445125, |
|
"loss": 10.3411, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.2320804658549497, |
|
"grad_norm": 0.03568257763981819, |
|
"learning_rate": 0.00013027577977200883, |
|
"loss": 10.3351, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.232503970354685, |
|
"grad_norm": 0.044057317078113556, |
|
"learning_rate": 0.0001300519184994513, |
|
"loss": 10.3367, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.23292747485442034, |
|
"grad_norm": 0.03619583323597908, |
|
"learning_rate": 0.00012982789155092407, |
|
"loss": 10.3385, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.23335097935415564, |
|
"grad_norm": 0.042276639491319656, |
|
"learning_rate": 0.00012960370016148567, |
|
"loss": 10.337, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.23377448385389094, |
|
"grad_norm": 0.03055988810956478, |
|
"learning_rate": 0.00012937934556710143, |
|
"loss": 10.3385, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.23419798835362626, |
|
"grad_norm": 0.02854546532034874, |
|
"learning_rate": 0.00012915482900463624, |
|
"loss": 10.3393, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.23462149285336156, |
|
"grad_norm": 0.029309969395399094, |
|
"learning_rate": 0.00012893015171184797, |
|
"loss": 10.3319, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.23504499735309686, |
|
"grad_norm": 0.0332510843873024, |
|
"learning_rate": 0.00012870531492738065, |
|
"loss": 10.3338, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.2354685018528322, |
|
"grad_norm": 0.03669944778084755, |
|
"learning_rate": 0.00012848031989075754, |
|
"loss": 10.3325, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.2358920063525675, |
|
"grad_norm": 0.027661770582199097, |
|
"learning_rate": 0.00012825516784237436, |
|
"loss": 10.3382, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.23631551085230282, |
|
"grad_norm": 0.029674025252461433, |
|
"learning_rate": 0.0001280298600234924, |
|
"loss": 10.3387, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.23673901535203812, |
|
"grad_norm": 0.03104621358215809, |
|
"learning_rate": 0.00012780439767623181, |
|
"loss": 10.3354, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.23716251985177342, |
|
"grad_norm": 0.0300068948417902, |
|
"learning_rate": 0.0001275787820435645, |
|
"loss": 10.3396, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.23758602435150875, |
|
"grad_norm": 0.03742906451225281, |
|
"learning_rate": 0.00012735301436930758, |
|
"loss": 10.3364, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.23800952885124405, |
|
"grad_norm": 0.029214419424533844, |
|
"learning_rate": 0.0001271270958981163, |
|
"loss": 10.3368, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.23843303335097935, |
|
"grad_norm": 0.034154172986745834, |
|
"learning_rate": 0.00012690102787547722, |
|
"loss": 10.3364, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.23885653785071467, |
|
"grad_norm": 0.024321483448147774, |
|
"learning_rate": 0.00012667481154770148, |
|
"loss": 10.3348, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.23928004235044997, |
|
"grad_norm": 0.030538305640220642, |
|
"learning_rate": 0.0001264484481619177, |
|
"loss": 10.3374, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.23970354685018527, |
|
"grad_norm": 0.028275547549128532, |
|
"learning_rate": 0.00012622193896606528, |
|
"loss": 10.3343, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.2401270513499206, |
|
"grad_norm": 0.024137398228049278, |
|
"learning_rate": 0.00012599528520888757, |
|
"loss": 10.3363, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.2405505558496559, |
|
"grad_norm": 0.0387752428650856, |
|
"learning_rate": 0.00012576848813992475, |
|
"loss": 10.3355, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.2409740603493912, |
|
"grad_norm": 0.02671218290925026, |
|
"learning_rate": 0.00012554154900950708, |
|
"loss": 10.339, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.24139756484912653, |
|
"grad_norm": 0.031162571161985397, |
|
"learning_rate": 0.00012531446906874808, |
|
"loss": 10.3402, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.24182106934886183, |
|
"grad_norm": 0.03754870593547821, |
|
"learning_rate": 0.00012508724956953755, |
|
"loss": 10.3392, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.24224457384859713, |
|
"grad_norm": 0.030516209080815315, |
|
"learning_rate": 0.00012485989176453462, |
|
"loss": 10.3373, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.24266807834833246, |
|
"grad_norm": 0.04033865034580231, |
|
"learning_rate": 0.0001246323969071609, |
|
"loss": 10.3358, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.24309158284806776, |
|
"grad_norm": 0.0301966555416584, |
|
"learning_rate": 0.00012440476625159364, |
|
"loss": 10.335, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.24351508734780308, |
|
"grad_norm": 0.036701519042253494, |
|
"learning_rate": 0.00012417700105275866, |
|
"loss": 10.3382, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.24393859184753838, |
|
"grad_norm": 0.02948085404932499, |
|
"learning_rate": 0.00012394910256632356, |
|
"loss": 10.3342, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.24436209634727368, |
|
"grad_norm": 0.0245877243578434, |
|
"learning_rate": 0.00012372107204869077, |
|
"loss": 10.3364, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.244785600847009, |
|
"grad_norm": 0.023439116775989532, |
|
"learning_rate": 0.00012349291075699058, |
|
"loss": 10.3361, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.2452091053467443, |
|
"grad_norm": 0.026123927906155586, |
|
"learning_rate": 0.00012326461994907424, |
|
"loss": 10.3398, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.2456326098464796, |
|
"grad_norm": 0.03437687084078789, |
|
"learning_rate": 0.000123036200883507, |
|
"loss": 10.3373, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.24605611434621494, |
|
"grad_norm": 0.03299521282315254, |
|
"learning_rate": 0.00012280765481956124, |
|
"loss": 10.3344, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.24647961884595024, |
|
"grad_norm": 0.03710121661424637, |
|
"learning_rate": 0.0001225789830172094, |
|
"loss": 10.3354, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.24690312334568554, |
|
"grad_norm": 0.032498303800821304, |
|
"learning_rate": 0.0001223501867371173, |
|
"loss": 10.3344, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.24732662784542087, |
|
"grad_norm": 0.03610834851861, |
|
"learning_rate": 0.00012212126724063676, |
|
"loss": 10.3359, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.24775013234515617, |
|
"grad_norm": 0.03149677813053131, |
|
"learning_rate": 0.00012189222578979903, |
|
"loss": 10.3376, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.24817363684489147, |
|
"grad_norm": 0.031013086438179016, |
|
"learning_rate": 0.00012166306364730766, |
|
"loss": 10.3333, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.2485971413446268, |
|
"grad_norm": 0.030261732637882233, |
|
"learning_rate": 0.00012143378207653164, |
|
"loss": 10.3327, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.2490206458443621, |
|
"grad_norm": 0.030076345428824425, |
|
"learning_rate": 0.00012120438234149827, |
|
"loss": 10.3393, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.2494441503440974, |
|
"grad_norm": 0.027937186881899834, |
|
"learning_rate": 0.00012097486570688634, |
|
"loss": 10.3386, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.24986765484383272, |
|
"grad_norm": 0.037603769451379776, |
|
"learning_rate": 0.00012074523343801906, |
|
"loss": 10.3306, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.25029115934356805, |
|
"grad_norm": 0.027752617374062538, |
|
"learning_rate": 0.0001205154868008572, |
|
"loss": 10.3352, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.2507146638433033, |
|
"grad_norm": 0.030105147510766983, |
|
"learning_rate": 0.000120285627061992, |
|
"loss": 10.3306, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.25113816834303865, |
|
"grad_norm": 0.026609288528561592, |
|
"learning_rate": 0.00012005565548863822, |
|
"loss": 10.3347, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.251561672842774, |
|
"grad_norm": 0.04250922426581383, |
|
"learning_rate": 0.00011982557334862723, |
|
"loss": 10.3303, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.25198517734250925, |
|
"grad_norm": 0.03030312806367874, |
|
"learning_rate": 0.00011959538191039985, |
|
"loss": 10.3389, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.2524086818422446, |
|
"grad_norm": 0.03314143419265747, |
|
"learning_rate": 0.00011936508244299948, |
|
"loss": 10.336, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.2528321863419799, |
|
"grad_norm": 0.03237884119153023, |
|
"learning_rate": 0.0001191346762160652, |
|
"loss": 10.3406, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.2532556908417152, |
|
"grad_norm": 0.02621031180024147, |
|
"learning_rate": 0.00011890416449982451, |
|
"loss": 10.3367, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.2536791953414505, |
|
"grad_norm": 0.023484721779823303, |
|
"learning_rate": 0.00011867354856508656, |
|
"loss": 10.3327, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.25410269984118583, |
|
"grad_norm": 0.02962653897702694, |
|
"learning_rate": 0.00011844282968323501, |
|
"loss": 10.3359, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2545262043409211, |
|
"grad_norm": 0.028051255270838737, |
|
"learning_rate": 0.0001182120091262211, |
|
"loss": 10.3356, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.25494970884065643, |
|
"grad_norm": 0.029255535453557968, |
|
"learning_rate": 0.00011798108816655657, |
|
"loss": 10.3365, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.25537321334039176, |
|
"grad_norm": 0.029102357104420662, |
|
"learning_rate": 0.00011775006807730667, |
|
"loss": 10.3347, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.25579671784012703, |
|
"grad_norm": 0.02935311570763588, |
|
"learning_rate": 0.00011751895013208325, |
|
"loss": 10.3369, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.25622022233986236, |
|
"grad_norm": 0.033430956304073334, |
|
"learning_rate": 0.00011728773560503751, |
|
"loss": 10.3381, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.2566437268395977, |
|
"grad_norm": 0.03818434476852417, |
|
"learning_rate": 0.00011705642577085316, |
|
"loss": 10.3354, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.25706723133933296, |
|
"grad_norm": 0.029122449457645416, |
|
"learning_rate": 0.00011682502190473938, |
|
"loss": 10.3382, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.2574907358390683, |
|
"grad_norm": 0.030079467222094536, |
|
"learning_rate": 0.00011659352528242366, |
|
"loss": 10.3413, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.2579142403388036, |
|
"grad_norm": 0.02247581258416176, |
|
"learning_rate": 0.00011636193718014494, |
|
"loss": 10.3364, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.2583377448385389, |
|
"grad_norm": 0.032431941479444504, |
|
"learning_rate": 0.00011613025887464641, |
|
"loss": 10.3323, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2587612493382742, |
|
"grad_norm": 0.032824281603097916, |
|
"learning_rate": 0.00011589849164316862, |
|
"loss": 10.3351, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.25918475383800954, |
|
"grad_norm": 0.036410853266716, |
|
"learning_rate": 0.00011566663676344232, |
|
"loss": 10.3414, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.2596082583377448, |
|
"grad_norm": 0.03686416149139404, |
|
"learning_rate": 0.00011543469551368144, |
|
"loss": 10.3375, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.26003176283748014, |
|
"grad_norm": 0.04031093418598175, |
|
"learning_rate": 0.00011520266917257618, |
|
"loss": 10.3361, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.26045526733721547, |
|
"grad_norm": 0.027354370802640915, |
|
"learning_rate": 0.00011497055901928577, |
|
"loss": 10.3334, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.26087877183695074, |
|
"grad_norm": 0.029079321771860123, |
|
"learning_rate": 0.00011473836633343144, |
|
"loss": 10.3376, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.26130227633668607, |
|
"grad_norm": 0.027393948286771774, |
|
"learning_rate": 0.00011450609239508951, |
|
"loss": 10.3359, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.2617257808364214, |
|
"grad_norm": 0.037023283541202545, |
|
"learning_rate": 0.00011427373848478422, |
|
"loss": 10.336, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.2621492853361567, |
|
"grad_norm": 0.04202662780880928, |
|
"learning_rate": 0.00011404130588348072, |
|
"loss": 10.3383, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.262572789835892, |
|
"grad_norm": 0.031701017171144485, |
|
"learning_rate": 0.00011380879587257792, |
|
"loss": 10.3356, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2629962943356273, |
|
"grad_norm": 0.03459370136260986, |
|
"learning_rate": 0.00011357620973390151, |
|
"loss": 10.3337, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.26341979883536265, |
|
"grad_norm": 0.03404482826590538, |
|
"learning_rate": 0.0001133435487496969, |
|
"loss": 10.3373, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.2638433033350979, |
|
"grad_norm": 0.03435559198260307, |
|
"learning_rate": 0.0001131108142026221, |
|
"loss": 10.3394, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.26426680783483325, |
|
"grad_norm": 0.04172271490097046, |
|
"learning_rate": 0.00011287800737574072, |
|
"loss": 10.3312, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.2646903123345686, |
|
"grad_norm": 0.024423452094197273, |
|
"learning_rate": 0.00011264512955251478, |
|
"loss": 10.3384, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.26511381683430385, |
|
"grad_norm": 0.036313965916633606, |
|
"learning_rate": 0.00011241218201679773, |
|
"loss": 10.3343, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.2655373213340392, |
|
"grad_norm": 0.03670899197459221, |
|
"learning_rate": 0.00011217916605282728, |
|
"loss": 10.3421, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.2659608258337745, |
|
"grad_norm": 0.04206259921193123, |
|
"learning_rate": 0.00011194608294521854, |
|
"loss": 10.3304, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.2663843303335098, |
|
"grad_norm": 0.029241429641842842, |
|
"learning_rate": 0.00011171293397895665, |
|
"loss": 10.3403, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.2668078348332451, |
|
"grad_norm": 0.029772555455565453, |
|
"learning_rate": 0.00011147972043938988, |
|
"loss": 10.3356, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.26723133933298043, |
|
"grad_norm": 0.038933563977479935, |
|
"learning_rate": 0.00011124644361222245, |
|
"loss": 10.3396, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.2676548438327157, |
|
"grad_norm": 0.03326569125056267, |
|
"learning_rate": 0.00011101310478350754, |
|
"loss": 10.337, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.26807834833245103, |
|
"grad_norm": 0.03632461279630661, |
|
"learning_rate": 0.00011077970523964011, |
|
"loss": 10.337, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.26850185283218636, |
|
"grad_norm": 0.03578447178006172, |
|
"learning_rate": 0.00011054624626734984, |
|
"loss": 10.3358, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.26892535733192163, |
|
"grad_norm": 0.032311227172613144, |
|
"learning_rate": 0.0001103127291536941, |
|
"loss": 10.3417, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.26934886183165696, |
|
"grad_norm": 0.03721488639712334, |
|
"learning_rate": 0.00011007915518605067, |
|
"loss": 10.3341, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.2697723663313923, |
|
"grad_norm": 0.026686688885092735, |
|
"learning_rate": 0.00010984552565211089, |
|
"loss": 10.3337, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.27019587083112756, |
|
"grad_norm": 0.03955764323472977, |
|
"learning_rate": 0.00010961184183987233, |
|
"loss": 10.3331, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.2706193753308629, |
|
"grad_norm": 0.024867044761776924, |
|
"learning_rate": 0.00010937810503763191, |
|
"loss": 10.3319, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.2710428798305982, |
|
"grad_norm": 0.026639580726623535, |
|
"learning_rate": 0.00010914431653397856, |
|
"loss": 10.3394, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2714663843303335, |
|
"grad_norm": 0.04265257716178894, |
|
"learning_rate": 0.00010891047761778637, |
|
"loss": 10.3355, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.2718898888300688, |
|
"grad_norm": 0.03401639685034752, |
|
"learning_rate": 0.00010867658957820723, |
|
"loss": 10.3362, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.27231339332980414, |
|
"grad_norm": 0.03278350457549095, |
|
"learning_rate": 0.00010844265370466393, |
|
"loss": 10.3369, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.2727368978295394, |
|
"grad_norm": 0.03625522553920746, |
|
"learning_rate": 0.00010820867128684292, |
|
"loss": 10.3386, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.27316040232927474, |
|
"grad_norm": 0.028470052406191826, |
|
"learning_rate": 0.0001079746436146873, |
|
"loss": 10.3359, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.27358390682901007, |
|
"grad_norm": 0.03894231840968132, |
|
"learning_rate": 0.00010774057197838963, |
|
"loss": 10.3363, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.27400741132874534, |
|
"grad_norm": 0.04798604175448418, |
|
"learning_rate": 0.00010750645766838477, |
|
"loss": 10.3351, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.27443091582848067, |
|
"grad_norm": 0.038566704839468, |
|
"learning_rate": 0.00010727230197534299, |
|
"loss": 10.3386, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.274854420328216, |
|
"grad_norm": 0.038909364491701126, |
|
"learning_rate": 0.0001070381061901626, |
|
"loss": 10.3376, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.27527792482795127, |
|
"grad_norm": 0.029502833262085915, |
|
"learning_rate": 0.00010680387160396293, |
|
"loss": 10.3356, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2757014293276866, |
|
"grad_norm": 0.028758224099874496, |
|
"learning_rate": 0.00010656959950807728, |
|
"loss": 10.3313, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.2761249338274219, |
|
"grad_norm": 0.024828476831316948, |
|
"learning_rate": 0.0001063352911940457, |
|
"loss": 10.3318, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.27654843832715725, |
|
"grad_norm": 0.02429981529712677, |
|
"learning_rate": 0.00010610094795360795, |
|
"loss": 10.333, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.2769719428268925, |
|
"grad_norm": 0.028827672824263573, |
|
"learning_rate": 0.00010586657107869626, |
|
"loss": 10.3318, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.27739544732662785, |
|
"grad_norm": 0.04222332313656807, |
|
"learning_rate": 0.00010563216186142839, |
|
"loss": 10.3354, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.2778189518263632, |
|
"grad_norm": 0.04045010358095169, |
|
"learning_rate": 0.00010539772159410036, |
|
"loss": 10.3356, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.27824245632609845, |
|
"grad_norm": 0.02479146048426628, |
|
"learning_rate": 0.00010516325156917926, |
|
"loss": 10.3395, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.2786659608258338, |
|
"grad_norm": 0.036765843629837036, |
|
"learning_rate": 0.00010492875307929644, |
|
"loss": 10.3334, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.2790894653255691, |
|
"grad_norm": 0.02949843928217888, |
|
"learning_rate": 0.00010469422741724003, |
|
"loss": 10.3405, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.2795129698253044, |
|
"grad_norm": 0.02545243129134178, |
|
"learning_rate": 0.000104459675875948, |
|
"loss": 10.3339, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2799364743250397, |
|
"grad_norm": 0.032835401594638824, |
|
"learning_rate": 0.00010422509974850099, |
|
"loss": 10.3426, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.28035997882477504, |
|
"grad_norm": 0.029005464166402817, |
|
"learning_rate": 0.00010399050032811519, |
|
"loss": 10.3353, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.2807834833245103, |
|
"grad_norm": 0.02459227293729782, |
|
"learning_rate": 0.00010375587890813518, |
|
"loss": 10.3345, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.28120698782424564, |
|
"grad_norm": 0.04449470341205597, |
|
"learning_rate": 0.00010352123678202685, |
|
"loss": 10.3358, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.28163049232398096, |
|
"grad_norm": 0.025347614660859108, |
|
"learning_rate": 0.00010328657524337029, |
|
"loss": 10.3357, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.28205399682371624, |
|
"grad_norm": 0.028995616361498833, |
|
"learning_rate": 0.00010305189558585248, |
|
"loss": 10.3386, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.28247750132345156, |
|
"grad_norm": 0.029563058167696, |
|
"learning_rate": 0.00010281719910326042, |
|
"loss": 10.3369, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.2829010058231869, |
|
"grad_norm": 0.03033272735774517, |
|
"learning_rate": 0.00010258248708947375, |
|
"loss": 10.337, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.28332451032292216, |
|
"grad_norm": 0.03558272868394852, |
|
"learning_rate": 0.00010234776083845787, |
|
"loss": 10.3345, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.2837480148226575, |
|
"grad_norm": 0.023746639490127563, |
|
"learning_rate": 0.00010211302164425655, |
|
"loss": 10.3326, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2841715193223928, |
|
"grad_norm": 0.02846304513514042, |
|
"learning_rate": 0.00010187827080098498, |
|
"loss": 10.3353, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.2845950238221281, |
|
"grad_norm": 0.035858284682035446, |
|
"learning_rate": 0.00010164350960282252, |
|
"loss": 10.336, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.2850185283218634, |
|
"grad_norm": 0.026505351066589355, |
|
"learning_rate": 0.00010140873934400567, |
|
"loss": 10.3382, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.28544203282159875, |
|
"grad_norm": 0.02379724755883217, |
|
"learning_rate": 0.00010117396131882087, |
|
"loss": 10.3372, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.28544203282159875, |
|
"eval_loss": 10.334465026855469, |
|
"eval_runtime": 3.4817, |
|
"eval_samples_per_second": 285.783, |
|
"eval_steps_per_second": 143.035, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.285865537321334, |
|
"grad_norm": 0.030138272792100906, |
|
"learning_rate": 0.00010093917682159735, |
|
"loss": 10.3361, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.28628904182106935, |
|
"grad_norm": 0.023656543344259262, |
|
"learning_rate": 0.00010070438714670002, |
|
"loss": 10.3345, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.2867125463208047, |
|
"grad_norm": 0.035104621201753616, |
|
"learning_rate": 0.00010046959358852244, |
|
"loss": 10.3347, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.28713605082053995, |
|
"grad_norm": 0.030601153150200844, |
|
"learning_rate": 0.00010023479744147936, |
|
"loss": 10.3325, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.2875595553202753, |
|
"grad_norm": 0.030649134889245033, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3351, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.2879830598200106, |
|
"grad_norm": 0.04906442388892174, |
|
"learning_rate": 9.976520255852065e-05, |
|
"loss": 10.3382, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2884065643197459, |
|
"grad_norm": 0.036667853593826294, |
|
"learning_rate": 9.953040641147761e-05, |
|
"loss": 10.3336, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.2888300688194812, |
|
"grad_norm": 0.032969675958156586, |
|
"learning_rate": 9.929561285329999e-05, |
|
"loss": 10.3347, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.28925357331921653, |
|
"grad_norm": 0.04049724340438843, |
|
"learning_rate": 9.906082317840266e-05, |
|
"loss": 10.337, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.2896770778189518, |
|
"grad_norm": 0.03217809647321701, |
|
"learning_rate": 9.882603868117917e-05, |
|
"loss": 10.332, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.29010058231868713, |
|
"grad_norm": 0.03514156490564346, |
|
"learning_rate": 9.859126065599434e-05, |
|
"loss": 10.3331, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.29052408681842246, |
|
"grad_norm": 0.02941136807203293, |
|
"learning_rate": 9.83564903971775e-05, |
|
"loss": 10.3325, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.29094759131815773, |
|
"grad_norm": 0.026193542405962944, |
|
"learning_rate": 9.812172919901506e-05, |
|
"loss": 10.3382, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.29137109581789306, |
|
"grad_norm": 0.031999170780181885, |
|
"learning_rate": 9.788697835574347e-05, |
|
"loss": 10.3378, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.2917946003176284, |
|
"grad_norm": 0.0316544771194458, |
|
"learning_rate": 9.765223916154217e-05, |
|
"loss": 10.3369, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.2922181048173637, |
|
"grad_norm": 0.030304009094834328, |
|
"learning_rate": 9.741751291052626e-05, |
|
"loss": 10.3381, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.292641609317099, |
|
"grad_norm": 0.035043906420469284, |
|
"learning_rate": 9.718280089673959e-05, |
|
"loss": 10.3327, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.2930651138168343, |
|
"grad_norm": 0.031086809933185577, |
|
"learning_rate": 9.694810441414754e-05, |
|
"loss": 10.3331, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.29348861831656964, |
|
"grad_norm": 0.03664236515760422, |
|
"learning_rate": 9.671342475662975e-05, |
|
"loss": 10.3384, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.2939121228163049, |
|
"grad_norm": 0.036936696618795395, |
|
"learning_rate": 9.647876321797314e-05, |
|
"loss": 10.3379, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.29433562731604024, |
|
"grad_norm": 0.03095340169966221, |
|
"learning_rate": 9.624412109186484e-05, |
|
"loss": 10.3351, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.29475913181577557, |
|
"grad_norm": 0.026670867577195168, |
|
"learning_rate": 9.600949967188484e-05, |
|
"loss": 10.3324, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.29518263631551084, |
|
"grad_norm": 0.03176816925406456, |
|
"learning_rate": 9.577490025149903e-05, |
|
"loss": 10.336, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.29560614081524617, |
|
"grad_norm": 0.041850414127111435, |
|
"learning_rate": 9.554032412405204e-05, |
|
"loss": 10.3335, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.2960296453149815, |
|
"grad_norm": 0.02709740586578846, |
|
"learning_rate": 9.530577258275998e-05, |
|
"loss": 10.335, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.29645314981471677, |
|
"grad_norm": 0.03338076174259186, |
|
"learning_rate": 9.507124692070355e-05, |
|
"loss": 10.3393, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2968766543144521, |
|
"grad_norm": 0.03312176465988159, |
|
"learning_rate": 9.483674843082075e-05, |
|
"loss": 10.336, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.2973001588141874, |
|
"grad_norm": 0.026730258017778397, |
|
"learning_rate": 9.460227840589967e-05, |
|
"loss": 10.3366, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.2977236633139227, |
|
"grad_norm": 0.04017185419797897, |
|
"learning_rate": 9.436783813857161e-05, |
|
"loss": 10.3349, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.298147167813658, |
|
"grad_norm": 0.025352856144309044, |
|
"learning_rate": 9.413342892130376e-05, |
|
"loss": 10.331, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.29857067231339335, |
|
"grad_norm": 0.04028523713350296, |
|
"learning_rate": 9.389905204639206e-05, |
|
"loss": 10.3326, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.2989941768131286, |
|
"grad_norm": 0.034634605050086975, |
|
"learning_rate": 9.366470880595434e-05, |
|
"loss": 10.3326, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.29941768131286395, |
|
"grad_norm": 0.037610601633787155, |
|
"learning_rate": 9.343040049192274e-05, |
|
"loss": 10.3342, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.2998411858125993, |
|
"grad_norm": 0.0313008613884449, |
|
"learning_rate": 9.31961283960371e-05, |
|
"loss": 10.3337, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.30026469031233455, |
|
"grad_norm": 0.03718707337975502, |
|
"learning_rate": 9.296189380983747e-05, |
|
"loss": 10.3325, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.3006881948120699, |
|
"grad_norm": 0.03456999734044075, |
|
"learning_rate": 9.272769802465705e-05, |
|
"loss": 10.3325, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3011116993118052, |
|
"grad_norm": 0.03181077539920807, |
|
"learning_rate": 9.249354233161523e-05, |
|
"loss": 10.3338, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.3015352038115405, |
|
"grad_norm": 0.0410895049571991, |
|
"learning_rate": 9.225942802161042e-05, |
|
"loss": 10.3376, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.3019587083112758, |
|
"grad_norm": 0.05550311505794525, |
|
"learning_rate": 9.202535638531273e-05, |
|
"loss": 10.3373, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.30238221281101113, |
|
"grad_norm": 0.03022390976548195, |
|
"learning_rate": 9.179132871315708e-05, |
|
"loss": 10.3323, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.3028057173107464, |
|
"grad_norm": 0.058899421244859695, |
|
"learning_rate": 9.155734629533611e-05, |
|
"loss": 10.3373, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.30322922181048173, |
|
"grad_norm": 0.0289511289447546, |
|
"learning_rate": 9.132341042179279e-05, |
|
"loss": 10.3365, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.30365272631021706, |
|
"grad_norm": 0.024074682965874672, |
|
"learning_rate": 9.108952238221365e-05, |
|
"loss": 10.3343, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.30407623080995233, |
|
"grad_norm": 0.03383636474609375, |
|
"learning_rate": 9.085568346602145e-05, |
|
"loss": 10.3376, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.30449973530968766, |
|
"grad_norm": 0.03680823743343353, |
|
"learning_rate": 9.062189496236813e-05, |
|
"loss": 10.332, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.304923239809423, |
|
"grad_norm": 0.034177515655756, |
|
"learning_rate": 9.038815816012767e-05, |
|
"loss": 10.3365, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.30534674430915826, |
|
"grad_norm": 0.04184051603078842, |
|
"learning_rate": 9.015447434788915e-05, |
|
"loss": 10.3308, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.3057702488088936, |
|
"grad_norm": 0.031081423163414, |
|
"learning_rate": 8.992084481394934e-05, |
|
"loss": 10.332, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.3061937533086289, |
|
"grad_norm": 0.04926011338829994, |
|
"learning_rate": 8.968727084630594e-05, |
|
"loss": 10.3388, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.30661725780836424, |
|
"grad_norm": 0.03448108211159706, |
|
"learning_rate": 8.945375373265017e-05, |
|
"loss": 10.3371, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.3070407623080995, |
|
"grad_norm": 0.030851799994707108, |
|
"learning_rate": 8.92202947603599e-05, |
|
"loss": 10.3402, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.30746426680783484, |
|
"grad_norm": 0.03434957191348076, |
|
"learning_rate": 8.898689521649251e-05, |
|
"loss": 10.3371, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.30788777130757017, |
|
"grad_norm": 0.034013282507658005, |
|
"learning_rate": 8.875355638777757e-05, |
|
"loss": 10.3344, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.30831127580730544, |
|
"grad_norm": 0.03570681810379028, |
|
"learning_rate": 8.852027956061015e-05, |
|
"loss": 10.3333, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.30873478030704077, |
|
"grad_norm": 0.04296912997961044, |
|
"learning_rate": 8.828706602104337e-05, |
|
"loss": 10.3388, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.3091582848067761, |
|
"grad_norm": 0.037189483642578125, |
|
"learning_rate": 8.805391705478147e-05, |
|
"loss": 10.335, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.30958178930651137, |
|
"grad_norm": 0.02627628669142723, |
|
"learning_rate": 8.782083394717272e-05, |
|
"loss": 10.3354, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.3100052938062467, |
|
"grad_norm": 0.026290280744433403, |
|
"learning_rate": 8.758781798320233e-05, |
|
"loss": 10.3344, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.310428798305982, |
|
"grad_norm": 0.033993784338235855, |
|
"learning_rate": 8.735487044748523e-05, |
|
"loss": 10.3324, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.3108523028057173, |
|
"grad_norm": 0.02894951030611992, |
|
"learning_rate": 8.712199262425927e-05, |
|
"loss": 10.3343, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.3112758073054526, |
|
"grad_norm": 0.02918967790901661, |
|
"learning_rate": 8.68891857973779e-05, |
|
"loss": 10.3364, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.31169931180518795, |
|
"grad_norm": 0.04133673012256622, |
|
"learning_rate": 8.665645125030311e-05, |
|
"loss": 10.3339, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.3121228163049232, |
|
"grad_norm": 0.03206159546971321, |
|
"learning_rate": 8.642379026609849e-05, |
|
"loss": 10.3422, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.31254632080465855, |
|
"grad_norm": 0.03564688563346863, |
|
"learning_rate": 8.619120412742212e-05, |
|
"loss": 10.3388, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.3129698253043939, |
|
"grad_norm": 0.033441901206970215, |
|
"learning_rate": 8.595869411651931e-05, |
|
"loss": 10.3375, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.31339332980412915, |
|
"grad_norm": 0.0351875014603138, |
|
"learning_rate": 8.572626151521581e-05, |
|
"loss": 10.3327, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3138168343038645, |
|
"grad_norm": 0.046769220381975174, |
|
"learning_rate": 8.549390760491051e-05, |
|
"loss": 10.3333, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.3142403388035998, |
|
"grad_norm": 0.02873465232551098, |
|
"learning_rate": 8.526163366656858e-05, |
|
"loss": 10.3342, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.3146638433033351, |
|
"grad_norm": 0.03012407198548317, |
|
"learning_rate": 8.502944098071427e-05, |
|
"loss": 10.334, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.3150873478030704, |
|
"grad_norm": 0.03743249177932739, |
|
"learning_rate": 8.479733082742384e-05, |
|
"loss": 10.3344, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.31551085230280573, |
|
"grad_norm": 0.02463219314813614, |
|
"learning_rate": 8.456530448631855e-05, |
|
"loss": 10.3322, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.315934356802541, |
|
"grad_norm": 0.035319242626428604, |
|
"learning_rate": 8.433336323655774e-05, |
|
"loss": 10.3363, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.31635786130227633, |
|
"grad_norm": 0.03892083838582039, |
|
"learning_rate": 8.41015083568314e-05, |
|
"loss": 10.3344, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.31678136580201166, |
|
"grad_norm": 0.04276084899902344, |
|
"learning_rate": 8.386974112535358e-05, |
|
"loss": 10.3367, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.31720487030174693, |
|
"grad_norm": 0.03648482635617256, |
|
"learning_rate": 8.363806281985509e-05, |
|
"loss": 10.333, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.31762837480148226, |
|
"grad_norm": 0.03600320592522621, |
|
"learning_rate": 8.340647471757636e-05, |
|
"loss": 10.3314, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3180518793012176, |
|
"grad_norm": 0.0343911312520504, |
|
"learning_rate": 8.317497809526063e-05, |
|
"loss": 10.3391, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.31847538380095286, |
|
"grad_norm": 0.028392106294631958, |
|
"learning_rate": 8.294357422914685e-05, |
|
"loss": 10.3343, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.3188988883006882, |
|
"grad_norm": 0.03276420384645462, |
|
"learning_rate": 8.27122643949625e-05, |
|
"loss": 10.3329, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.3193223928004235, |
|
"grad_norm": 0.030692044645547867, |
|
"learning_rate": 8.248104986791676e-05, |
|
"loss": 10.3287, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.3197458973001588, |
|
"grad_norm": 0.037886835634708405, |
|
"learning_rate": 8.224993192269334e-05, |
|
"loss": 10.3316, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.3201694017998941, |
|
"grad_norm": 0.029941901564598083, |
|
"learning_rate": 8.201891183344345e-05, |
|
"loss": 10.3293, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.32059290629962944, |
|
"grad_norm": 0.0404081791639328, |
|
"learning_rate": 8.178799087377894e-05, |
|
"loss": 10.3364, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.32101641079936477, |
|
"grad_norm": 0.03296668082475662, |
|
"learning_rate": 8.1557170316765e-05, |
|
"loss": 10.3363, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.32143991529910004, |
|
"grad_norm": 0.03453279659152031, |
|
"learning_rate": 8.132645143491346e-05, |
|
"loss": 10.3369, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.32186341979883537, |
|
"grad_norm": 0.042309049516916275, |
|
"learning_rate": 8.10958355001755e-05, |
|
"loss": 10.3325, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3222869242985707, |
|
"grad_norm": 0.03626590222120285, |
|
"learning_rate": 8.086532378393482e-05, |
|
"loss": 10.3374, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.32271042879830597, |
|
"grad_norm": 0.029558565467596054, |
|
"learning_rate": 8.063491755700051e-05, |
|
"loss": 10.3367, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.3231339332980413, |
|
"grad_norm": 0.031136656180024147, |
|
"learning_rate": 8.04046180896002e-05, |
|
"loss": 10.3312, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.3235574377977766, |
|
"grad_norm": 0.03206343576312065, |
|
"learning_rate": 8.017442665137278e-05, |
|
"loss": 10.3357, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.3239809422975119, |
|
"grad_norm": 0.04191575571894646, |
|
"learning_rate": 7.994434451136177e-05, |
|
"loss": 10.3358, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.3244044467972472, |
|
"grad_norm": 0.03315071761608124, |
|
"learning_rate": 7.971437293800803e-05, |
|
"loss": 10.3338, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.32482795129698255, |
|
"grad_norm": 0.03882451355457306, |
|
"learning_rate": 7.948451319914282e-05, |
|
"loss": 10.3311, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.3252514557967178, |
|
"grad_norm": 0.046539660543203354, |
|
"learning_rate": 7.925476656198095e-05, |
|
"loss": 10.3364, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.32567496029645315, |
|
"grad_norm": 0.035186078399419785, |
|
"learning_rate": 7.90251342931137e-05, |
|
"loss": 10.3322, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.3260984647961885, |
|
"grad_norm": 0.02894584834575653, |
|
"learning_rate": 7.879561765850176e-05, |
|
"loss": 10.335, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.32652196929592375, |
|
"grad_norm": 0.05743710324168205, |
|
"learning_rate": 7.856621792346837e-05, |
|
"loss": 10.3358, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.3269454737956591, |
|
"grad_norm": 0.03184637799859047, |
|
"learning_rate": 7.833693635269235e-05, |
|
"loss": 10.3323, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.3273689782953944, |
|
"grad_norm": 0.028403330594301224, |
|
"learning_rate": 7.8107774210201e-05, |
|
"loss": 10.3368, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.3277924827951297, |
|
"grad_norm": 0.03520669415593147, |
|
"learning_rate": 7.78787327593633e-05, |
|
"loss": 10.3329, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.328215987294865, |
|
"grad_norm": 0.033936478197574615, |
|
"learning_rate": 7.764981326288273e-05, |
|
"loss": 10.3354, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.32863949179460034, |
|
"grad_norm": 0.04284480959177017, |
|
"learning_rate": 7.74210169827906e-05, |
|
"loss": 10.3353, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.3290629962943356, |
|
"grad_norm": 0.035401035100221634, |
|
"learning_rate": 7.719234518043881e-05, |
|
"loss": 10.3383, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.32948650079407094, |
|
"grad_norm": 0.02768767438828945, |
|
"learning_rate": 7.696379911649303e-05, |
|
"loss": 10.333, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.32991000529380626, |
|
"grad_norm": 0.03562779724597931, |
|
"learning_rate": 7.673538005092578e-05, |
|
"loss": 10.3365, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.33033350979354154, |
|
"grad_norm": 0.03725546598434448, |
|
"learning_rate": 7.650708924300944e-05, |
|
"loss": 10.3284, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.33075701429327686, |
|
"grad_norm": 0.032989148050546646, |
|
"learning_rate": 7.627892795130925e-05, |
|
"loss": 10.3375, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.3311805187930122, |
|
"grad_norm": 0.02557358518242836, |
|
"learning_rate": 7.605089743367644e-05, |
|
"loss": 10.3355, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.33160402329274746, |
|
"grad_norm": 0.03748362138867378, |
|
"learning_rate": 7.582299894724138e-05, |
|
"loss": 10.3362, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.3320275277924828, |
|
"grad_norm": 0.04423379525542259, |
|
"learning_rate": 7.55952337484064e-05, |
|
"loss": 10.3372, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.3324510322922181, |
|
"grad_norm": 0.03931692987680435, |
|
"learning_rate": 7.536760309283912e-05, |
|
"loss": 10.3319, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.3328745367919534, |
|
"grad_norm": 0.028346918523311615, |
|
"learning_rate": 7.514010823546543e-05, |
|
"loss": 10.3355, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.3332980412916887, |
|
"grad_norm": 0.041941095143556595, |
|
"learning_rate": 7.491275043046246e-05, |
|
"loss": 10.3351, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.33372154579142405, |
|
"grad_norm": 0.03487636148929596, |
|
"learning_rate": 7.46855309312519e-05, |
|
"loss": 10.3333, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.3341450502911593, |
|
"grad_norm": 0.032287437468767166, |
|
"learning_rate": 7.445845099049294e-05, |
|
"loss": 10.3308, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.33456855479089465, |
|
"grad_norm": 0.03427242115139961, |
|
"learning_rate": 7.423151186007527e-05, |
|
"loss": 10.3318, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.33499205929063, |
|
"grad_norm": 0.03202645853161812, |
|
"learning_rate": 7.400471479111247e-05, |
|
"loss": 10.3365, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.33541556379036525, |
|
"grad_norm": 0.036663834005594254, |
|
"learning_rate": 7.377806103393473e-05, |
|
"loss": 10.3315, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.3358390682901006, |
|
"grad_norm": 0.037792034447193146, |
|
"learning_rate": 7.355155183808234e-05, |
|
"loss": 10.3371, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.3362625727898359, |
|
"grad_norm": 0.03239692375063896, |
|
"learning_rate": 7.332518845229859e-05, |
|
"loss": 10.3333, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.33668607728957123, |
|
"grad_norm": 0.028021618723869324, |
|
"learning_rate": 7.309897212452279e-05, |
|
"loss": 10.3329, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.3371095817893065, |
|
"grad_norm": 0.03356965258717537, |
|
"learning_rate": 7.287290410188373e-05, |
|
"loss": 10.3318, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.33753308628904183, |
|
"grad_norm": 0.030086075887084007, |
|
"learning_rate": 7.264698563069246e-05, |
|
"loss": 10.3378, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.33795659078877716, |
|
"grad_norm": 0.03997505083680153, |
|
"learning_rate": 7.242121795643552e-05, |
|
"loss": 10.3386, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.33838009528851243, |
|
"grad_norm": 0.03527563437819481, |
|
"learning_rate": 7.219560232376821e-05, |
|
"loss": 10.338, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.33880359978824776, |
|
"grad_norm": 0.032303664833307266, |
|
"learning_rate": 7.197013997650762e-05, |
|
"loss": 10.3403, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3392271042879831, |
|
"grad_norm": 0.03437490016222, |
|
"learning_rate": 7.174483215762568e-05, |
|
"loss": 10.3319, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.33965060878771836, |
|
"grad_norm": 0.03714921697974205, |
|
"learning_rate": 7.151968010924249e-05, |
|
"loss": 10.3357, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.3400741132874537, |
|
"grad_norm": 0.03493595868349075, |
|
"learning_rate": 7.12946850726194e-05, |
|
"loss": 10.333, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.340497617787189, |
|
"grad_norm": 0.027758195996284485, |
|
"learning_rate": 7.106984828815206e-05, |
|
"loss": 10.3392, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.3409211222869243, |
|
"grad_norm": 0.03370975703001022, |
|
"learning_rate": 7.084517099536377e-05, |
|
"loss": 10.3326, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.3413446267866596, |
|
"grad_norm": 0.03559848666191101, |
|
"learning_rate": 7.062065443289859e-05, |
|
"loss": 10.3339, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.34176813128639494, |
|
"grad_norm": 0.03199275955557823, |
|
"learning_rate": 7.039629983851432e-05, |
|
"loss": 10.3325, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.3421916357861302, |
|
"grad_norm": 0.0587792843580246, |
|
"learning_rate": 7.017210844907598e-05, |
|
"loss": 10.3334, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.34261514028586554, |
|
"grad_norm": 0.04129471629858017, |
|
"learning_rate": 6.994808150054872e-05, |
|
"loss": 10.3343, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.34303864478560087, |
|
"grad_norm": 0.03535553812980652, |
|
"learning_rate": 6.972422022799121e-05, |
|
"loss": 10.3325, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.34346214928533614, |
|
"grad_norm": 0.03517236188054085, |
|
"learning_rate": 6.95005258655488e-05, |
|
"loss": 10.3332, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.34388565378507147, |
|
"grad_norm": 0.03364865854382515, |
|
"learning_rate": 6.927699964644652e-05, |
|
"loss": 10.3341, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.3443091582848068, |
|
"grad_norm": 0.04013295844197273, |
|
"learning_rate": 6.905364280298252e-05, |
|
"loss": 10.3285, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.34473266278454207, |
|
"grad_norm": 0.03748088330030441, |
|
"learning_rate": 6.883045656652122e-05, |
|
"loss": 10.3321, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.3451561672842774, |
|
"grad_norm": 0.032113492488861084, |
|
"learning_rate": 6.860744216748634e-05, |
|
"loss": 10.3329, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3455796717840127, |
|
"grad_norm": 0.031743258237838745, |
|
"learning_rate": 6.838460083535445e-05, |
|
"loss": 10.3353, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.346003176283748, |
|
"grad_norm": 0.0373242124915123, |
|
"learning_rate": 6.816193379864786e-05, |
|
"loss": 10.3358, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.3464266807834833, |
|
"grad_norm": 0.03148363158106804, |
|
"learning_rate": 6.793944228492803e-05, |
|
"loss": 10.3392, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.34685018528321865, |
|
"grad_norm": 0.02826239913702011, |
|
"learning_rate": 6.77171275207888e-05, |
|
"loss": 10.3354, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.3472736897829539, |
|
"grad_norm": 0.02493412233889103, |
|
"learning_rate": 6.749499073184957e-05, |
|
"loss": 10.3331, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.34769719428268925, |
|
"grad_norm": 0.030130870640277863, |
|
"learning_rate": 6.727303314274852e-05, |
|
"loss": 10.3335, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.3481206987824246, |
|
"grad_norm": 0.03829304128885269, |
|
"learning_rate": 6.705125597713598e-05, |
|
"loss": 10.337, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.34854420328215985, |
|
"grad_norm": 0.036645397543907166, |
|
"learning_rate": 6.682966045766758e-05, |
|
"loss": 10.3323, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.3489677077818952, |
|
"grad_norm": 0.03612329065799713, |
|
"learning_rate": 6.660824780599744e-05, |
|
"loss": 10.3288, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.3493912122816305, |
|
"grad_norm": 0.04094702750444412, |
|
"learning_rate": 6.638701924277174e-05, |
|
"loss": 10.3292, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3498147167813658, |
|
"grad_norm": 0.031782373785972595, |
|
"learning_rate": 6.61659759876215e-05, |
|
"loss": 10.333, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.3502382212811011, |
|
"grad_norm": 0.03760769963264465, |
|
"learning_rate": 6.594511925915646e-05, |
|
"loss": 10.3337, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.35066172578083643, |
|
"grad_norm": 0.033052217215299606, |
|
"learning_rate": 6.572445027495779e-05, |
|
"loss": 10.3336, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.35108523028057176, |
|
"grad_norm": 0.0381859727203846, |
|
"learning_rate": 6.550397025157169e-05, |
|
"loss": 10.3385, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.35150873478030703, |
|
"grad_norm": 0.033314503729343414, |
|
"learning_rate": 6.528368040450268e-05, |
|
"loss": 10.3333, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.35193223928004236, |
|
"grad_norm": 0.029306232929229736, |
|
"learning_rate": 6.506358194820685e-05, |
|
"loss": 10.3326, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.3523557437797777, |
|
"grad_norm": 0.035479675978422165, |
|
"learning_rate": 6.484367609608503e-05, |
|
"loss": 10.3346, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.35277924827951296, |
|
"grad_norm": 0.028150904923677444, |
|
"learning_rate": 6.462396406047634e-05, |
|
"loss": 10.336, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.3532027527792483, |
|
"grad_norm": 0.029760006815195084, |
|
"learning_rate": 6.440444705265136e-05, |
|
"loss": 10.3317, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.3536262572789836, |
|
"grad_norm": 0.039765894412994385, |
|
"learning_rate": 6.418512628280544e-05, |
|
"loss": 10.3309, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.3540497617787189, |
|
"grad_norm": 0.02911820076406002, |
|
"learning_rate": 6.396600296005213e-05, |
|
"loss": 10.3351, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.3544732662784542, |
|
"grad_norm": 0.0354015938937664, |
|
"learning_rate": 6.374707829241648e-05, |
|
"loss": 10.3336, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.35489677077818954, |
|
"grad_norm": 0.030309785157442093, |
|
"learning_rate": 6.352835348682823e-05, |
|
"loss": 10.3339, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.3553202752779248, |
|
"grad_norm": 0.03831500932574272, |
|
"learning_rate": 6.330982974911542e-05, |
|
"loss": 10.3343, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.35574377977766014, |
|
"grad_norm": 0.02785351127386093, |
|
"learning_rate": 6.309150828399754e-05, |
|
"loss": 10.3333, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.35616728427739547, |
|
"grad_norm": 0.033175136893987656, |
|
"learning_rate": 6.287339029507894e-05, |
|
"loss": 10.3336, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.35659078877713074, |
|
"grad_norm": 0.03539146110415459, |
|
"learning_rate": 6.265547698484226e-05, |
|
"loss": 10.3291, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.35701429327686607, |
|
"grad_norm": 0.033553339540958405, |
|
"learning_rate": 6.243776955464169e-05, |
|
"loss": 10.3332, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.3574377977766014, |
|
"grad_norm": 0.02890482172369957, |
|
"learning_rate": 6.22202692046964e-05, |
|
"loss": 10.3323, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.35786130227633667, |
|
"grad_norm": 0.035188328474760056, |
|
"learning_rate": 6.200297713408405e-05, |
|
"loss": 10.3333, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.358284806776072, |
|
"grad_norm": 0.025319932028651237, |
|
"learning_rate": 6.178589454073386e-05, |
|
"loss": 10.3335, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.3587083112758073, |
|
"grad_norm": 0.03855932876467705, |
|
"learning_rate": 6.156902262142041e-05, |
|
"loss": 10.3339, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.3591318157755426, |
|
"grad_norm": 0.03646783158183098, |
|
"learning_rate": 6.135236257175668e-05, |
|
"loss": 10.3318, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.3595553202752779, |
|
"grad_norm": 0.030003085732460022, |
|
"learning_rate": 6.11359155861877e-05, |
|
"loss": 10.3356, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.35997882477501325, |
|
"grad_norm": 0.0374993160367012, |
|
"learning_rate": 6.091968285798379e-05, |
|
"loss": 10.3337, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3604023292747485, |
|
"grad_norm": 0.030076105147600174, |
|
"learning_rate": 6.0703665579234235e-05, |
|
"loss": 10.3314, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.36082583377448385, |
|
"grad_norm": 0.03631633147597313, |
|
"learning_rate": 6.048786494084036e-05, |
|
"loss": 10.3328, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.3612493382742192, |
|
"grad_norm": 0.040016159415245056, |
|
"learning_rate": 6.027228213250926e-05, |
|
"loss": 10.3283, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.36167284277395445, |
|
"grad_norm": 0.0370076447725296, |
|
"learning_rate": 6.005691834274716e-05, |
|
"loss": 10.332, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.3620963472736898, |
|
"grad_norm": 0.030154719948768616, |
|
"learning_rate": 5.984177475885272e-05, |
|
"loss": 10.3317, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.3625198517734251, |
|
"grad_norm": 0.03587184473872185, |
|
"learning_rate": 5.962685256691071e-05, |
|
"loss": 10.3323, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.3629433562731604, |
|
"grad_norm": 0.03230219706892967, |
|
"learning_rate": 5.941215295178537e-05, |
|
"loss": 10.3358, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.3633668607728957, |
|
"grad_norm": 0.03372441977262497, |
|
"learning_rate": 5.919767709711381e-05, |
|
"loss": 10.3354, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.36379036527263103, |
|
"grad_norm": 0.03200405836105347, |
|
"learning_rate": 5.898342618529955e-05, |
|
"loss": 10.3328, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.3642138697723663, |
|
"grad_norm": 0.029012855142354965, |
|
"learning_rate": 5.876940139750612e-05, |
|
"loss": 10.332, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.36463737427210163, |
|
"grad_norm": 0.03257838636636734, |
|
"learning_rate": 5.8555603913650246e-05, |
|
"loss": 10.3345, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.36506087877183696, |
|
"grad_norm": 0.03526037186384201, |
|
"learning_rate": 5.834203491239574e-05, |
|
"loss": 10.3391, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.36548438327157223, |
|
"grad_norm": 0.02882193773984909, |
|
"learning_rate": 5.812869557114658e-05, |
|
"loss": 10.3312, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.36590788777130756, |
|
"grad_norm": 0.036499861627817154, |
|
"learning_rate": 5.791558706604074e-05, |
|
"loss": 10.3337, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.3663313922710429, |
|
"grad_norm": 0.029512615874409676, |
|
"learning_rate": 5.7702710571943696e-05, |
|
"loss": 10.3326, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.3667548967707782, |
|
"grad_norm": 0.029680045321583748, |
|
"learning_rate": 5.7490067262441615e-05, |
|
"loss": 10.3327, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.3671784012705135, |
|
"grad_norm": 0.03450106456875801, |
|
"learning_rate": 5.727765830983525e-05, |
|
"loss": 10.3335, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.3676019057702488, |
|
"grad_norm": 0.04042774438858032, |
|
"learning_rate": 5.7065484885133466e-05, |
|
"loss": 10.3325, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.36802541026998414, |
|
"grad_norm": 0.026330476626753807, |
|
"learning_rate": 5.685354815804638e-05, |
|
"loss": 10.3357, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.3684489147697194, |
|
"grad_norm": 0.04447445273399353, |
|
"learning_rate": 5.664184929697945e-05, |
|
"loss": 10.3358, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.36887241926945474, |
|
"grad_norm": 0.03038848005235195, |
|
"learning_rate": 5.643038946902668e-05, |
|
"loss": 10.3287, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.36929592376919007, |
|
"grad_norm": 0.03465115278959274, |
|
"learning_rate": 5.621916983996429e-05, |
|
"loss": 10.3332, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.36971942826892534, |
|
"grad_norm": 0.03414342552423477, |
|
"learning_rate": 5.600819157424427e-05, |
|
"loss": 10.3313, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.37014293276866067, |
|
"grad_norm": 0.03694528341293335, |
|
"learning_rate": 5.579745583498801e-05, |
|
"loss": 10.3327, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.370566437268396, |
|
"grad_norm": 0.03394203633069992, |
|
"learning_rate": 5.558696378397983e-05, |
|
"loss": 10.3348, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.37098994176813127, |
|
"grad_norm": 0.03003198839724064, |
|
"learning_rate": 5.537671658166063e-05, |
|
"loss": 10.3372, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.3714134462678666, |
|
"grad_norm": 0.03449974209070206, |
|
"learning_rate": 5.51667153871214e-05, |
|
"loss": 10.3282, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.3718369507676019, |
|
"grad_norm": 0.03920021653175354, |
|
"learning_rate": 5.495696135809696e-05, |
|
"loss": 10.3374, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.3722604552673372, |
|
"grad_norm": 0.02695903740823269, |
|
"learning_rate": 5.4747455650959464e-05, |
|
"loss": 10.3328, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.3726839597670725, |
|
"grad_norm": 0.02427126094698906, |
|
"learning_rate": 5.453819942071211e-05, |
|
"loss": 10.3338, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.37310746426680785, |
|
"grad_norm": 0.04181993380188942, |
|
"learning_rate": 5.432919382098267e-05, |
|
"loss": 10.3335, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.3735309687665431, |
|
"grad_norm": 0.03968915343284607, |
|
"learning_rate": 5.412044000401726e-05, |
|
"loss": 10.3298, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.37395447326627845, |
|
"grad_norm": 0.04720834270119667, |
|
"learning_rate": 5.391193912067386e-05, |
|
"loss": 10.3303, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.3743779777660138, |
|
"grad_norm": 0.03866041824221611, |
|
"learning_rate": 5.3703692320416034e-05, |
|
"loss": 10.3328, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.37480148226574905, |
|
"grad_norm": 0.031206615269184113, |
|
"learning_rate": 5.3495700751306735e-05, |
|
"loss": 10.3295, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.3752249867654844, |
|
"grad_norm": 0.02863953448832035, |
|
"learning_rate": 5.328796556000153e-05, |
|
"loss": 10.3352, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.3756484912652197, |
|
"grad_norm": 0.027401378378272057, |
|
"learning_rate": 5.308048789174289e-05, |
|
"loss": 10.3319, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.376071995764955, |
|
"grad_norm": 0.039630185812711716, |
|
"learning_rate": 5.2873268890353424e-05, |
|
"loss": 10.3303, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.3764955002646903, |
|
"grad_norm": 0.037442997097969055, |
|
"learning_rate": 5.266630969822958e-05, |
|
"loss": 10.3304, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.37691900476442564, |
|
"grad_norm": 0.03644905984401703, |
|
"learning_rate": 5.2459611456335746e-05, |
|
"loss": 10.3322, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3773425092641609, |
|
"grad_norm": 0.03049355559051037, |
|
"learning_rate": 5.225317530419751e-05, |
|
"loss": 10.3303, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.37776601376389624, |
|
"grad_norm": 0.0509609617292881, |
|
"learning_rate": 5.2047002379895636e-05, |
|
"loss": 10.3276, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.37818951826363156, |
|
"grad_norm": 0.039859797805547714, |
|
"learning_rate": 5.1841093820059686e-05, |
|
"loss": 10.3278, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.37861302276336684, |
|
"grad_norm": 0.040018096566200256, |
|
"learning_rate": 5.163545075986178e-05, |
|
"loss": 10.3321, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.37903652726310216, |
|
"grad_norm": 0.030337341129779816, |
|
"learning_rate": 5.143007433301035e-05, |
|
"loss": 10.3373, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.3794600317628375, |
|
"grad_norm": 0.0383714959025383, |
|
"learning_rate": 5.12249656717439e-05, |
|
"loss": 10.3338, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.37988353626257276, |
|
"grad_norm": 0.03546814247965813, |
|
"learning_rate": 5.10201259068247e-05, |
|
"loss": 10.3348, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.3803070407623081, |
|
"grad_norm": 0.025767376646399498, |
|
"learning_rate": 5.081555616753264e-05, |
|
"loss": 10.336, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.3807305452620434, |
|
"grad_norm": 0.03003775328397751, |
|
"learning_rate": 5.061125758165896e-05, |
|
"loss": 10.3323, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.38115404976177875, |
|
"grad_norm": 0.04286766052246094, |
|
"learning_rate": 5.040723127549998e-05, |
|
"loss": 10.3369, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.381577554261514, |
|
"grad_norm": 0.03410165011882782, |
|
"learning_rate": 5.0203478373850955e-05, |
|
"loss": 10.3316, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.38200105876124935, |
|
"grad_norm": 0.03914531320333481, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 10.3333, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.3824245632609847, |
|
"grad_norm": 0.02953316643834114, |
|
"learning_rate": 4.979679727572159e-05, |
|
"loss": 10.3354, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.38284806776071995, |
|
"grad_norm": 0.034125540405511856, |
|
"learning_rate": 4.959387132127054e-05, |
|
"loss": 10.3298, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.3832715722604553, |
|
"grad_norm": 0.035765476524829865, |
|
"learning_rate": 4.939122325537604e-05, |
|
"loss": 10.3343, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.3836950767601906, |
|
"grad_norm": 0.04484931752085686, |
|
"learning_rate": 4.918885419523499e-05, |
|
"loss": 10.3357, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.3841185812599259, |
|
"grad_norm": 0.033943966031074524, |
|
"learning_rate": 4.898676525650639e-05, |
|
"loss": 10.3321, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.3845420857596612, |
|
"grad_norm": 0.03354055806994438, |
|
"learning_rate": 4.8784957553304876e-05, |
|
"loss": 10.3308, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.38496559025939653, |
|
"grad_norm": 0.029053689911961555, |
|
"learning_rate": 4.858343219819442e-05, |
|
"loss": 10.3289, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.3853890947591318, |
|
"grad_norm": 0.04069928824901581, |
|
"learning_rate": 4.838219030218274e-05, |
|
"loss": 10.3315, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.38581259925886713, |
|
"grad_norm": 0.035262517631053925, |
|
"learning_rate": 4.818123297471463e-05, |
|
"loss": 10.3373, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.38623610375860246, |
|
"grad_norm": 0.034540239721536636, |
|
"learning_rate": 4.7980561323666115e-05, |
|
"loss": 10.323, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.38665960825833773, |
|
"grad_norm": 0.031878579407930374, |
|
"learning_rate": 4.77801764553383e-05, |
|
"loss": 10.3362, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.38708311275807306, |
|
"grad_norm": 0.029519235715270042, |
|
"learning_rate": 4.758007947445125e-05, |
|
"loss": 10.3275, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.3875066172578084, |
|
"grad_norm": 0.03876268118619919, |
|
"learning_rate": 4.7380271484137915e-05, |
|
"loss": 10.3288, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.38793012175754366, |
|
"grad_norm": 0.029615303501486778, |
|
"learning_rate": 4.718075358593802e-05, |
|
"loss": 10.3347, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.388353626257279, |
|
"grad_norm": 0.030300240963697433, |
|
"learning_rate": 4.698152687979205e-05, |
|
"loss": 10.3329, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.3887771307570143, |
|
"grad_norm": 0.05134044587612152, |
|
"learning_rate": 4.678259246403512e-05, |
|
"loss": 10.3394, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.3892006352567496, |
|
"grad_norm": 0.04115286096930504, |
|
"learning_rate": 4.6583951435390973e-05, |
|
"loss": 10.3301, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.3896241397564849, |
|
"grad_norm": 0.033398233354091644, |
|
"learning_rate": 4.638560488896589e-05, |
|
"loss": 10.3336, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.39004764425622024, |
|
"grad_norm": 0.0268535315990448, |
|
"learning_rate": 4.618755391824268e-05, |
|
"loss": 10.3314, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.3904711487559555, |
|
"grad_norm": 0.042327847331762314, |
|
"learning_rate": 4.598979961507471e-05, |
|
"loss": 10.3317, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.39089465325569084, |
|
"grad_norm": 0.033836785703897476, |
|
"learning_rate": 4.57923430696797e-05, |
|
"loss": 10.3344, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.39131815775542617, |
|
"grad_norm": 0.03876091167330742, |
|
"learning_rate": 4.5595185370633875e-05, |
|
"loss": 10.3312, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.39174166225516144, |
|
"grad_norm": 0.04275533929467201, |
|
"learning_rate": 4.5398327604866054e-05, |
|
"loss": 10.3328, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.39216516675489677, |
|
"grad_norm": 0.038993559777736664, |
|
"learning_rate": 4.5201770857651274e-05, |
|
"loss": 10.3345, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.3925886712546321, |
|
"grad_norm": 0.028194980695843697, |
|
"learning_rate": 4.50055162126053e-05, |
|
"loss": 10.3356, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.39301217575436737, |
|
"grad_norm": 0.038792677223682404, |
|
"learning_rate": 4.48095647516783e-05, |
|
"loss": 10.3328, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.3934356802541027, |
|
"grad_norm": 0.031241275370121002, |
|
"learning_rate": 4.461391755514899e-05, |
|
"loss": 10.3274, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.393859184753838, |
|
"grad_norm": 0.04370317608118057, |
|
"learning_rate": 4.4418575701618715e-05, |
|
"loss": 10.3334, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3942826892535733, |
|
"grad_norm": 0.032410670071840286, |
|
"learning_rate": 4.422354026800536e-05, |
|
"loss": 10.3373, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.3947061937533086, |
|
"grad_norm": 0.02156672440469265, |
|
"learning_rate": 4.4028812329537694e-05, |
|
"loss": 10.3344, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.39512969825304395, |
|
"grad_norm": 0.042322322726249695, |
|
"learning_rate": 4.3834392959749146e-05, |
|
"loss": 10.3309, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.3955532027527793, |
|
"grad_norm": 0.027538040652871132, |
|
"learning_rate": 4.3640283230472044e-05, |
|
"loss": 10.3305, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.39597670725251455, |
|
"grad_norm": 0.026913011446595192, |
|
"learning_rate": 4.344648421183166e-05, |
|
"loss": 10.3326, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.3964002117522499, |
|
"grad_norm": 0.03797266632318497, |
|
"learning_rate": 4.3252996972240324e-05, |
|
"loss": 10.3286, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.3968237162519852, |
|
"grad_norm": 0.03437899798154831, |
|
"learning_rate": 4.305982257839154e-05, |
|
"loss": 10.3333, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.3972472207517205, |
|
"grad_norm": 0.032235968858003616, |
|
"learning_rate": 4.286696209525409e-05, |
|
"loss": 10.3373, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.3976707252514558, |
|
"grad_norm": 0.03257599472999573, |
|
"learning_rate": 4.2674416586066165e-05, |
|
"loss": 10.3336, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.39809422975119113, |
|
"grad_norm": 0.03536880016326904, |
|
"learning_rate": 4.248218711232952e-05, |
|
"loss": 10.3347, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3985177342509264, |
|
"grad_norm": 0.03932619467377663, |
|
"learning_rate": 4.229027473380355e-05, |
|
"loss": 10.3343, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.39894123875066173, |
|
"grad_norm": 0.03219004347920418, |
|
"learning_rate": 4.2098680508499665e-05, |
|
"loss": 10.3355, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.39936474325039706, |
|
"grad_norm": 0.03659631311893463, |
|
"learning_rate": 4.1907405492675065e-05, |
|
"loss": 10.3342, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.39978824775013233, |
|
"grad_norm": 0.02803085185587406, |
|
"learning_rate": 4.171645074082737e-05, |
|
"loss": 10.3313, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.40021175224986766, |
|
"grad_norm": 0.024601435288786888, |
|
"learning_rate": 4.15258173056885e-05, |
|
"loss": 10.3333, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.400635256749603, |
|
"grad_norm": 0.036193400621414185, |
|
"learning_rate": 4.133550623821885e-05, |
|
"loss": 10.3359, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.40105876124933826, |
|
"grad_norm": 0.03234044834971428, |
|
"learning_rate": 4.114551858760183e-05, |
|
"loss": 10.3351, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.4014822657490736, |
|
"grad_norm": 0.03343448042869568, |
|
"learning_rate": 4.095585540123762e-05, |
|
"loss": 10.3276, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.4019057702488089, |
|
"grad_norm": 0.030924122780561447, |
|
"learning_rate": 4.076651772473783e-05, |
|
"loss": 10.3379, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.4023292747485442, |
|
"grad_norm": 0.044260427355766296, |
|
"learning_rate": 4.0577506601919467e-05, |
|
"loss": 10.3332, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4027527792482795, |
|
"grad_norm": 0.027923841029405594, |
|
"learning_rate": 4.038882307479912e-05, |
|
"loss": 10.3391, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.40317628374801484, |
|
"grad_norm": 0.0312094334512949, |
|
"learning_rate": 4.0200468183587556e-05, |
|
"loss": 10.3327, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.4035997882477501, |
|
"grad_norm": 0.03471310809254646, |
|
"learning_rate": 4.0012442966683674e-05, |
|
"loss": 10.3367, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.40402329274748544, |
|
"grad_norm": 0.03101627714931965, |
|
"learning_rate": 3.982474846066886e-05, |
|
"loss": 10.3284, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.40444679724722077, |
|
"grad_norm": 0.03931306675076485, |
|
"learning_rate": 3.963738570030134e-05, |
|
"loss": 10.3312, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.40487030174695604, |
|
"grad_norm": 0.024929361417889595, |
|
"learning_rate": 3.94503557185104e-05, |
|
"loss": 10.3326, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.40529380624669137, |
|
"grad_norm": 0.043676454573869705, |
|
"learning_rate": 3.926365954639073e-05, |
|
"loss": 10.3289, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.4057173107464267, |
|
"grad_norm": 0.03379151597619057, |
|
"learning_rate": 3.90772982131967e-05, |
|
"loss": 10.3342, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.40614081524616197, |
|
"grad_norm": 0.03445500135421753, |
|
"learning_rate": 3.8891272746336845e-05, |
|
"loss": 10.337, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.4065643197458973, |
|
"grad_norm": 0.03671969100832939, |
|
"learning_rate": 3.8705584171367885e-05, |
|
"loss": 10.3389, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4069878242456326, |
|
"grad_norm": 0.03856462240219116, |
|
"learning_rate": 3.8520233511989324e-05, |
|
"loss": 10.3318, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.4074113287453679, |
|
"grad_norm": 0.037579286843538284, |
|
"learning_rate": 3.833522179003788e-05, |
|
"loss": 10.3312, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.4078348332451032, |
|
"grad_norm": 0.0324142761528492, |
|
"learning_rate": 3.8150550025481445e-05, |
|
"loss": 10.3357, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.40825833774483855, |
|
"grad_norm": 0.035630084574222565, |
|
"learning_rate": 3.796621923641404e-05, |
|
"loss": 10.3304, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.4086818422445738, |
|
"grad_norm": 0.029326455667614937, |
|
"learning_rate": 3.77822304390496e-05, |
|
"loss": 10.3306, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.40910534674430915, |
|
"grad_norm": 0.03198442980647087, |
|
"learning_rate": 3.7598584647716804e-05, |
|
"loss": 10.3319, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.4095288512440445, |
|
"grad_norm": 0.035467833280563354, |
|
"learning_rate": 3.7415282874853444e-05, |
|
"loss": 10.3316, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.40995235574377975, |
|
"grad_norm": 0.047377362847328186, |
|
"learning_rate": 3.723232613100046e-05, |
|
"loss": 10.3287, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.4103758602435151, |
|
"grad_norm": 0.036050811409950256, |
|
"learning_rate": 3.704971542479695e-05, |
|
"loss": 10.3347, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.4107993647432504, |
|
"grad_norm": 0.037851523607969284, |
|
"learning_rate": 3.6867451762974114e-05, |
|
"loss": 10.3334, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.41122286924298573, |
|
"grad_norm": 0.030836213380098343, |
|
"learning_rate": 3.6685536150349986e-05, |
|
"loss": 10.3328, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.411646373742721, |
|
"grad_norm": 0.026154899969697, |
|
"learning_rate": 3.650396958982377e-05, |
|
"loss": 10.3323, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.41206987824245633, |
|
"grad_norm": 0.036884456872940063, |
|
"learning_rate": 3.6322753082370365e-05, |
|
"loss": 10.33, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.41249338274219166, |
|
"grad_norm": 0.041880205273628235, |
|
"learning_rate": 3.614188762703482e-05, |
|
"loss": 10.3294, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.41291688724192693, |
|
"grad_norm": 0.04928620532155037, |
|
"learning_rate": 3.596137422092686e-05, |
|
"loss": 10.3351, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.41334039174166226, |
|
"grad_norm": 0.027833838015794754, |
|
"learning_rate": 3.578121385921533e-05, |
|
"loss": 10.3309, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.4137638962413976, |
|
"grad_norm": 0.03103015385568142, |
|
"learning_rate": 3.560140753512279e-05, |
|
"loss": 10.3359, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.41418740074113286, |
|
"grad_norm": 0.03528593108057976, |
|
"learning_rate": 3.542195623991991e-05, |
|
"loss": 10.3282, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.4146109052408682, |
|
"grad_norm": 0.03291507437825203, |
|
"learning_rate": 3.524286096292025e-05, |
|
"loss": 10.3309, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.4150344097406035, |
|
"grad_norm": 0.04097427800297737, |
|
"learning_rate": 3.5064122691474454e-05, |
|
"loss": 10.3362, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4154579142403388, |
|
"grad_norm": 0.04069104790687561, |
|
"learning_rate": 3.4885742410965104e-05, |
|
"loss": 10.3347, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.4158814187400741, |
|
"grad_norm": 0.03851715475320816, |
|
"learning_rate": 3.4707721104801175e-05, |
|
"loss": 10.334, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.41630492323980944, |
|
"grad_norm": 0.03845444321632385, |
|
"learning_rate": 3.4530059754412555e-05, |
|
"loss": 10.3324, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.4167284277395447, |
|
"grad_norm": 0.027850644662976265, |
|
"learning_rate": 3.435275933924487e-05, |
|
"loss": 10.3309, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.41715193223928004, |
|
"grad_norm": 0.03326322138309479, |
|
"learning_rate": 3.417582083675365e-05, |
|
"loss": 10.3325, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.41757543673901537, |
|
"grad_norm": 0.027192946523427963, |
|
"learning_rate": 3.399924522239943e-05, |
|
"loss": 10.332, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.41799894123875064, |
|
"grad_norm": 0.035279251635074615, |
|
"learning_rate": 3.382303346964209e-05, |
|
"loss": 10.3317, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.41842244573848597, |
|
"grad_norm": 0.03443683683872223, |
|
"learning_rate": 3.36471865499354e-05, |
|
"loss": 10.3326, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.4188459502382213, |
|
"grad_norm": 0.030605580657720566, |
|
"learning_rate": 3.3471705432722035e-05, |
|
"loss": 10.3345, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.41926945473795657, |
|
"grad_norm": 0.032888561487197876, |
|
"learning_rate": 3.329659108542785e-05, |
|
"loss": 10.3265, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4196929592376919, |
|
"grad_norm": 0.02829040214419365, |
|
"learning_rate": 3.3121844473456756e-05, |
|
"loss": 10.3325, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.4201164637374272, |
|
"grad_norm": 0.030971676111221313, |
|
"learning_rate": 3.294746656018532e-05, |
|
"loss": 10.3281, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.4205399682371625, |
|
"grad_norm": 0.03257730230689049, |
|
"learning_rate": 3.2773458306957495e-05, |
|
"loss": 10.3281, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.4209634727368978, |
|
"grad_norm": 0.03114408068358898, |
|
"learning_rate": 3.259982067307928e-05, |
|
"loss": 10.3343, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.42138697723663315, |
|
"grad_norm": 0.03386252745985985, |
|
"learning_rate": 3.2426554615813484e-05, |
|
"loss": 10.3316, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.4218104817363684, |
|
"grad_norm": 0.03416220098733902, |
|
"learning_rate": 3.2253661090374396e-05, |
|
"loss": 10.329, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.42223398623610375, |
|
"grad_norm": 0.031190721318125725, |
|
"learning_rate": 3.2081141049922535e-05, |
|
"loss": 10.3331, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.4226574907358391, |
|
"grad_norm": 0.03264687955379486, |
|
"learning_rate": 3.190899544555941e-05, |
|
"loss": 10.3313, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.42308099523557435, |
|
"grad_norm": 0.03346019983291626, |
|
"learning_rate": 3.173722522632228e-05, |
|
"loss": 10.3353, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.4235044997353097, |
|
"grad_norm": 0.03909333422780037, |
|
"learning_rate": 3.156583133917884e-05, |
|
"loss": 10.3316, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.423928004235045, |
|
"grad_norm": 0.030095241963863373, |
|
"learning_rate": 3.1394814729022235e-05, |
|
"loss": 10.3369, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.4243515087347803, |
|
"grad_norm": 0.02995004691183567, |
|
"learning_rate": 3.1224176338665476e-05, |
|
"loss": 10.3329, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.4247750132345156, |
|
"grad_norm": 0.039438553154468536, |
|
"learning_rate": 3.105391710883656e-05, |
|
"loss": 10.3305, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.42519851773425094, |
|
"grad_norm": 0.04090533405542374, |
|
"learning_rate": 3.088403797817325e-05, |
|
"loss": 10.3314, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.42562202223398626, |
|
"grad_norm": 0.0377449207007885, |
|
"learning_rate": 3.071453988321762e-05, |
|
"loss": 10.3298, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.42604552673372154, |
|
"grad_norm": 0.06536738574504852, |
|
"learning_rate": 3.0545423758411295e-05, |
|
"loss": 10.3276, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.42646903123345686, |
|
"grad_norm": 0.0357985682785511, |
|
"learning_rate": 3.037669053609006e-05, |
|
"loss": 10.3334, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.4268925357331922, |
|
"grad_norm": 0.03490246832370758, |
|
"learning_rate": 3.0208341146478602e-05, |
|
"loss": 10.3342, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.42731604023292746, |
|
"grad_norm": 0.03769504651427269, |
|
"learning_rate": 3.0040376517685764e-05, |
|
"loss": 10.3334, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.4277395447326628, |
|
"grad_norm": 0.027772339060902596, |
|
"learning_rate": 2.9872797575699097e-05, |
|
"loss": 10.3321, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4281630492323981, |
|
"grad_norm": 0.034188639372587204, |
|
"learning_rate": 2.9705605244379853e-05, |
|
"loss": 10.3324, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.4281630492323981, |
|
"eval_loss": 10.331077575683594, |
|
"eval_runtime": 3.4933, |
|
"eval_samples_per_second": 284.832, |
|
"eval_steps_per_second": 142.559, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.4285865537321334, |
|
"grad_norm": 0.03415974974632263, |
|
"learning_rate": 2.9538800445457946e-05, |
|
"loss": 10.3323, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.4290100582318687, |
|
"grad_norm": 0.039172153919935226, |
|
"learning_rate": 2.9372384098526784e-05, |
|
"loss": 10.3347, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.42943356273160405, |
|
"grad_norm": 0.031853485852479935, |
|
"learning_rate": 2.9206357121038285e-05, |
|
"loss": 10.3338, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.4298570672313393, |
|
"grad_norm": 0.04923943430185318, |
|
"learning_rate": 2.904072042829775e-05, |
|
"loss": 10.3323, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.43028057173107465, |
|
"grad_norm": 0.03674182668328285, |
|
"learning_rate": 2.8875474933458847e-05, |
|
"loss": 10.3334, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.43070407623081, |
|
"grad_norm": 0.030546877533197403, |
|
"learning_rate": 2.871062154751858e-05, |
|
"loss": 10.3296, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.43112758073054525, |
|
"grad_norm": 0.030613403767347336, |
|
"learning_rate": 2.8546161179312248e-05, |
|
"loss": 10.3354, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.4315510852302806, |
|
"grad_norm": 0.030776720494031906, |
|
"learning_rate": 2.8382094735508457e-05, |
|
"loss": 10.3303, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.4319745897300159, |
|
"grad_norm": 0.03810757398605347, |
|
"learning_rate": 2.821842312060409e-05, |
|
"loss": 10.3334, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4323980942297512, |
|
"grad_norm": 0.030035821720957756, |
|
"learning_rate": 2.8055147236919442e-05, |
|
"loss": 10.3345, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.4328215987294865, |
|
"grad_norm": 0.03650267794728279, |
|
"learning_rate": 2.789226798459298e-05, |
|
"loss": 10.3299, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.43324510322922183, |
|
"grad_norm": 0.030346672981977463, |
|
"learning_rate": 2.7729786261576617e-05, |
|
"loss": 10.334, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.4336686077289571, |
|
"grad_norm": 0.0330539271235466, |
|
"learning_rate": 2.7567702963630803e-05, |
|
"loss": 10.3316, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.43409211222869243, |
|
"grad_norm": 0.03174733370542526, |
|
"learning_rate": 2.740601898431925e-05, |
|
"loss": 10.3278, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.43451561672842776, |
|
"grad_norm": 0.03628386929631233, |
|
"learning_rate": 2.7244735215004446e-05, |
|
"loss": 10.3274, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.43493912122816303, |
|
"grad_norm": 0.024906015023589134, |
|
"learning_rate": 2.7083852544842436e-05, |
|
"loss": 10.3332, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.43536262572789836, |
|
"grad_norm": 0.043956976383924484, |
|
"learning_rate": 2.692337186077791e-05, |
|
"loss": 10.3266, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.4357861302276337, |
|
"grad_norm": 0.032996706664562225, |
|
"learning_rate": 2.67632940475396e-05, |
|
"loss": 10.3346, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.43620963472736896, |
|
"grad_norm": 0.044276829808950424, |
|
"learning_rate": 2.6603619987635086e-05, |
|
"loss": 10.3274, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4366331392271043, |
|
"grad_norm": 0.038449618965387344, |
|
"learning_rate": 2.64443505613461e-05, |
|
"loss": 10.3341, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.4370566437268396, |
|
"grad_norm": 0.03220584616065025, |
|
"learning_rate": 2.6285486646723634e-05, |
|
"loss": 10.3324, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.4374801482265749, |
|
"grad_norm": 0.03746611624956131, |
|
"learning_rate": 2.612702911958308e-05, |
|
"loss": 10.3354, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.4379036527263102, |
|
"grad_norm": 0.04333876073360443, |
|
"learning_rate": 2.5968978853499425e-05, |
|
"loss": 10.329, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.43832715722604554, |
|
"grad_norm": 0.03539913892745972, |
|
"learning_rate": 2.581133671980246e-05, |
|
"loss": 10.3324, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.4387506617257808, |
|
"grad_norm": 0.04690808430314064, |
|
"learning_rate": 2.565410358757189e-05, |
|
"loss": 10.3316, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.43917416622551614, |
|
"grad_norm": 0.038458049297332764, |
|
"learning_rate": 2.5497280323632654e-05, |
|
"loss": 10.3431, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.43959767072525147, |
|
"grad_norm": 0.03451355919241905, |
|
"learning_rate": 2.534086779255005e-05, |
|
"loss": 10.3296, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.44002117522498674, |
|
"grad_norm": 0.03873763233423233, |
|
"learning_rate": 2.5184866856625023e-05, |
|
"loss": 10.3273, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.44044467972472207, |
|
"grad_norm": 0.044388849288225174, |
|
"learning_rate": 2.5029278375889387e-05, |
|
"loss": 10.3324, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4408681842244574, |
|
"grad_norm": 0.03534289821982384, |
|
"learning_rate": 2.4874103208101183e-05, |
|
"loss": 10.3343, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.4412916887241927, |
|
"grad_norm": 0.0375693254172802, |
|
"learning_rate": 2.4719342208739693e-05, |
|
"loss": 10.3323, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.441715193223928, |
|
"grad_norm": 0.03341260179877281, |
|
"learning_rate": 2.456499623100098e-05, |
|
"loss": 10.3318, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.4421386977236633, |
|
"grad_norm": 0.04234972223639488, |
|
"learning_rate": 2.4411066125793203e-05, |
|
"loss": 10.3319, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.44256220222339865, |
|
"grad_norm": 0.031914252787828445, |
|
"learning_rate": 2.4257552741731592e-05, |
|
"loss": 10.3361, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.4429857067231339, |
|
"grad_norm": 0.05003447085618973, |
|
"learning_rate": 2.41044569251342e-05, |
|
"loss": 10.3313, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.44340921122286925, |
|
"grad_norm": 0.03364928439259529, |
|
"learning_rate": 2.3951779520016937e-05, |
|
"loss": 10.33, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.4438327157226046, |
|
"grad_norm": 0.028291532769799232, |
|
"learning_rate": 2.379952136808903e-05, |
|
"loss": 10.3336, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.44425622022233985, |
|
"grad_norm": 0.042799290269613266, |
|
"learning_rate": 2.3647683308748392e-05, |
|
"loss": 10.3348, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.4446797247220752, |
|
"grad_norm": 0.042522724717855453, |
|
"learning_rate": 2.3496266179076864e-05, |
|
"loss": 10.3288, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4451032292218105, |
|
"grad_norm": 0.02918383479118347, |
|
"learning_rate": 2.3345270813835886e-05, |
|
"loss": 10.3361, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.4455267337215458, |
|
"grad_norm": 0.046009406447410583, |
|
"learning_rate": 2.319469804546156e-05, |
|
"loss": 10.3349, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.4459502382212811, |
|
"grad_norm": 0.03431849181652069, |
|
"learning_rate": 2.3044548704060288e-05, |
|
"loss": 10.3283, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.44637374272101643, |
|
"grad_norm": 0.03582574054598808, |
|
"learning_rate": 2.2894823617404104e-05, |
|
"loss": 10.3314, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.4467972472207517, |
|
"grad_norm": 0.02972414344549179, |
|
"learning_rate": 2.2745523610926122e-05, |
|
"loss": 10.3289, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.44722075172048703, |
|
"grad_norm": 0.03548819199204445, |
|
"learning_rate": 2.2596649507716018e-05, |
|
"loss": 10.3299, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.44764425622022236, |
|
"grad_norm": 0.04241335019469261, |
|
"learning_rate": 2.244820212851544e-05, |
|
"loss": 10.3308, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.44806776071995763, |
|
"grad_norm": 0.033176884055137634, |
|
"learning_rate": 2.2300182291713513e-05, |
|
"loss": 10.3351, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.44849126521969296, |
|
"grad_norm": 0.032935190945863724, |
|
"learning_rate": 2.2152590813342345e-05, |
|
"loss": 10.3356, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.4489147697194283, |
|
"grad_norm": 0.030969172716140747, |
|
"learning_rate": 2.2005428507072467e-05, |
|
"loss": 10.3307, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.44933827421916356, |
|
"grad_norm": 0.036834247410297394, |
|
"learning_rate": 2.1858696184208484e-05, |
|
"loss": 10.3324, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.4497617787188989, |
|
"grad_norm": 0.038617976009845734, |
|
"learning_rate": 2.1712394653684344e-05, |
|
"loss": 10.3371, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.4501852832186342, |
|
"grad_norm": 0.026445934548974037, |
|
"learning_rate": 2.15665247220592e-05, |
|
"loss": 10.3334, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.4506087877183695, |
|
"grad_norm": 0.04230870306491852, |
|
"learning_rate": 2.1421087193512756e-05, |
|
"loss": 10.3261, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.4510322922181048, |
|
"grad_norm": 0.03189300373196602, |
|
"learning_rate": 2.1276082869840765e-05, |
|
"loss": 10.3297, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.45145579671784014, |
|
"grad_norm": 0.03367699310183525, |
|
"learning_rate": 2.113151255045095e-05, |
|
"loss": 10.3308, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.4518793012175754, |
|
"grad_norm": 0.032475464046001434, |
|
"learning_rate": 2.0987377032358114e-05, |
|
"loss": 10.339, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.45230280571731074, |
|
"grad_norm": 0.04436371102929115, |
|
"learning_rate": 2.084367711018024e-05, |
|
"loss": 10.3301, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.45272631021704607, |
|
"grad_norm": 0.037988126277923584, |
|
"learning_rate": 2.070041357613376e-05, |
|
"loss": 10.3309, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.45314981471678134, |
|
"grad_norm": 0.03870435804128647, |
|
"learning_rate": 2.0557587220029228e-05, |
|
"loss": 10.3353, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.45357331921651667, |
|
"grad_norm": 0.03660368546843529, |
|
"learning_rate": 2.0415198829267212e-05, |
|
"loss": 10.3317, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.453996823716252, |
|
"grad_norm": 0.03593965247273445, |
|
"learning_rate": 2.0273249188833654e-05, |
|
"loss": 10.3343, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.45442032821598727, |
|
"grad_norm": 0.03798775374889374, |
|
"learning_rate": 2.013173908129573e-05, |
|
"loss": 10.329, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.4548438327157226, |
|
"grad_norm": 0.030165789648890495, |
|
"learning_rate": 1.9990669286797438e-05, |
|
"loss": 10.3325, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.4552673372154579, |
|
"grad_norm": 0.029242129996418953, |
|
"learning_rate": 1.985004058305535e-05, |
|
"loss": 10.3337, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.45569084171519325, |
|
"grad_norm": 0.029076050966978073, |
|
"learning_rate": 1.9709853745354313e-05, |
|
"loss": 10.3347, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.4561143462149285, |
|
"grad_norm": 0.039899520576000214, |
|
"learning_rate": 1.9570109546543126e-05, |
|
"loss": 10.3334, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.45653785071466385, |
|
"grad_norm": 0.03501451388001442, |
|
"learning_rate": 1.943080875703045e-05, |
|
"loss": 10.325, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.4569613552143992, |
|
"grad_norm": 0.029382554814219475, |
|
"learning_rate": 1.929195214478028e-05, |
|
"loss": 10.336, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.45738485971413445, |
|
"grad_norm": 0.03819538280367851, |
|
"learning_rate": 1.915354047530791e-05, |
|
"loss": 10.3329, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.4578083642138698, |
|
"grad_norm": 0.03543626144528389, |
|
"learning_rate": 1.901557451167578e-05, |
|
"loss": 10.3326, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.4582318687136051, |
|
"grad_norm": 0.04363977536559105, |
|
"learning_rate": 1.887805501448896e-05, |
|
"loss": 10.3289, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.4586553732133404, |
|
"grad_norm": 0.03918329253792763, |
|
"learning_rate": 1.8740982741891377e-05, |
|
"loss": 10.3276, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.4590788777130757, |
|
"grad_norm": 0.029666945338249207, |
|
"learning_rate": 1.860435844956121e-05, |
|
"loss": 10.3307, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.45950238221281103, |
|
"grad_norm": 0.035329993814229965, |
|
"learning_rate": 1.8468182890707007e-05, |
|
"loss": 10.3336, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.4599258867125463, |
|
"grad_norm": 0.040378130972385406, |
|
"learning_rate": 1.833245681606356e-05, |
|
"loss": 10.3296, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.46034939121228163, |
|
"grad_norm": 0.04233788326382637, |
|
"learning_rate": 1.8197180973887428e-05, |
|
"loss": 10.3312, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.46077289571201696, |
|
"grad_norm": 0.03670990467071533, |
|
"learning_rate": 1.806235610995327e-05, |
|
"loss": 10.3303, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.46119640021175223, |
|
"grad_norm": 0.03234660625457764, |
|
"learning_rate": 1.7927982967549384e-05, |
|
"loss": 10.3355, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.46161990471148756, |
|
"grad_norm": 0.042892660945653915, |
|
"learning_rate": 1.7794062287473735e-05, |
|
"loss": 10.331, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.4620434092112229, |
|
"grad_norm": 0.04852224513888359, |
|
"learning_rate": 1.7660594808029908e-05, |
|
"loss": 10.3361, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.46246691371095816, |
|
"grad_norm": 0.036822058260440826, |
|
"learning_rate": 1.7527581265022965e-05, |
|
"loss": 10.3364, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.4628904182106935, |
|
"grad_norm": 0.03043217770755291, |
|
"learning_rate": 1.7395022391755434e-05, |
|
"loss": 10.335, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.4633139227104288, |
|
"grad_norm": 0.027736082673072815, |
|
"learning_rate": 1.7262918919023243e-05, |
|
"loss": 10.3335, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.4637374272101641, |
|
"grad_norm": 0.03186174854636192, |
|
"learning_rate": 1.713127157511172e-05, |
|
"loss": 10.3365, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.4641609317098994, |
|
"grad_norm": 0.03788574039936066, |
|
"learning_rate": 1.700008108579154e-05, |
|
"loss": 10.3317, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.46458443620963474, |
|
"grad_norm": 0.047464434057474136, |
|
"learning_rate": 1.6869348174314738e-05, |
|
"loss": 10.3307, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.46500794070937, |
|
"grad_norm": 0.03223862871527672, |
|
"learning_rate": 1.673907356141079e-05, |
|
"loss": 10.3337, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.46543144520910534, |
|
"grad_norm": 0.02775878831744194, |
|
"learning_rate": 1.6609257965282453e-05, |
|
"loss": 10.3376, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.46585494970884067, |
|
"grad_norm": 0.0346621610224247, |
|
"learning_rate": 1.647990210160204e-05, |
|
"loss": 10.334, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.46627845420857594, |
|
"grad_norm": 0.03867461159825325, |
|
"learning_rate": 1.6351006683507297e-05, |
|
"loss": 10.3321, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.46670195870831127, |
|
"grad_norm": 0.033736009150743484, |
|
"learning_rate": 1.622257242159756e-05, |
|
"loss": 10.329, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.4671254632080466, |
|
"grad_norm": 0.03446945920586586, |
|
"learning_rate": 1.6094600023929884e-05, |
|
"loss": 10.3281, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.46754896770778187, |
|
"grad_norm": 0.03439204394817352, |
|
"learning_rate": 1.59670901960149e-05, |
|
"loss": 10.3339, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.4679724722075172, |
|
"grad_norm": 0.03250345215201378, |
|
"learning_rate": 1.5840043640813274e-05, |
|
"loss": 10.3308, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.4683959767072525, |
|
"grad_norm": 0.030219173058867455, |
|
"learning_rate": 1.5713461058731572e-05, |
|
"loss": 10.333, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.4688194812069878, |
|
"grad_norm": 0.031828220933675766, |
|
"learning_rate": 1.558734314761844e-05, |
|
"loss": 10.3353, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.4692429857067231, |
|
"grad_norm": 0.047410812228918076, |
|
"learning_rate": 1.546169060276088e-05, |
|
"loss": 10.3289, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.46966649020645845, |
|
"grad_norm": 0.036803584545850754, |
|
"learning_rate": 1.53365041168803e-05, |
|
"loss": 10.3358, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.4700899947061937, |
|
"grad_norm": 0.03534479811787605, |
|
"learning_rate": 1.5211784380128714e-05, |
|
"loss": 10.33, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.47051349920592905, |
|
"grad_norm": 0.036183904856443405, |
|
"learning_rate": 1.5087532080084976e-05, |
|
"loss": 10.3289, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.4709370037056644, |
|
"grad_norm": 0.033738043159246445, |
|
"learning_rate": 1.4963747901750936e-05, |
|
"loss": 10.3303, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.4713605082053997, |
|
"grad_norm": 0.03870893269777298, |
|
"learning_rate": 1.4840432527547732e-05, |
|
"loss": 10.3364, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.471784012705135, |
|
"grad_norm": 0.04043989256024361, |
|
"learning_rate": 1.4717586637311943e-05, |
|
"loss": 10.3316, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.4722075172048703, |
|
"grad_norm": 0.03024929389357567, |
|
"learning_rate": 1.4595210908291935e-05, |
|
"loss": 10.3364, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.47263102170460564, |
|
"grad_norm": 0.04411826282739639, |
|
"learning_rate": 1.447330601514405e-05, |
|
"loss": 10.3331, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.4730545262043409, |
|
"grad_norm": 0.03368929401040077, |
|
"learning_rate": 1.4351872629928908e-05, |
|
"loss": 10.3323, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.47347803070407624, |
|
"grad_norm": 0.038087401539087296, |
|
"learning_rate": 1.423091142210774e-05, |
|
"loss": 10.3295, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.47390153520381156, |
|
"grad_norm": 0.03507355973124504, |
|
"learning_rate": 1.4110423058538624e-05, |
|
"loss": 10.3273, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.47432503970354684, |
|
"grad_norm": 0.03440206125378609, |
|
"learning_rate": 1.3990408203472938e-05, |
|
"loss": 10.3336, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.47474854420328216, |
|
"grad_norm": 0.03201809525489807, |
|
"learning_rate": 1.387086751855149e-05, |
|
"loss": 10.3323, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.4751720487030175, |
|
"grad_norm": 0.02803219109773636, |
|
"learning_rate": 1.3751801662801056e-05, |
|
"loss": 10.3343, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.47559555320275276, |
|
"grad_norm": 0.03642897307872772, |
|
"learning_rate": 1.3633211292630742e-05, |
|
"loss": 10.3309, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.4760190577024881, |
|
"grad_norm": 0.04547721892595291, |
|
"learning_rate": 1.3515097061828164e-05, |
|
"loss": 10.3248, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.4764425622022234, |
|
"grad_norm": 0.03152972459793091, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 10.3396, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.4768660667019587, |
|
"grad_norm": 0.028171587735414505, |
|
"learning_rate": 1.3280299620348846e-05, |
|
"loss": 10.33, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.477289571201694, |
|
"grad_norm": 0.03410959243774414, |
|
"learning_rate": 1.3163617704108321e-05, |
|
"loss": 10.3344, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.47771307570142935, |
|
"grad_norm": 0.030304502695798874, |
|
"learning_rate": 1.304741451610103e-05, |
|
"loss": 10.3309, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.4781365802011646, |
|
"grad_norm": 0.03257643058896065, |
|
"learning_rate": 1.2931690696954135e-05, |
|
"loss": 10.3346, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.47856008470089995, |
|
"grad_norm": 0.04555933550000191, |
|
"learning_rate": 1.2816446884652066e-05, |
|
"loss": 10.3302, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.4789835892006353, |
|
"grad_norm": 0.0384778194129467, |
|
"learning_rate": 1.2701683714532975e-05, |
|
"loss": 10.3317, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.47940709370037055, |
|
"grad_norm": 0.03637570142745972, |
|
"learning_rate": 1.2587401819285239e-05, |
|
"loss": 10.3295, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.4798305982001059, |
|
"grad_norm": 0.04053565487265587, |
|
"learning_rate": 1.2473601828943949e-05, |
|
"loss": 10.3293, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.4802541026998412, |
|
"grad_norm": 0.042270079255104065, |
|
"learning_rate": 1.236028437088751e-05, |
|
"loss": 10.3271, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.4806776071995765, |
|
"grad_norm": 0.04081670939922333, |
|
"learning_rate": 1.2247450069834076e-05, |
|
"loss": 10.3365, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.4811011116993118, |
|
"grad_norm": 0.03796311840415001, |
|
"learning_rate": 1.2135099547838192e-05, |
|
"loss": 10.333, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.48152461619904713, |
|
"grad_norm": 0.02851458452641964, |
|
"learning_rate": 1.2023233424287328e-05, |
|
"loss": 10.3304, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.4819481206987824, |
|
"grad_norm": 0.03447718173265457, |
|
"learning_rate": 1.1911852315898463e-05, |
|
"loss": 10.3316, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.48237162519851773, |
|
"grad_norm": 0.037812747061252594, |
|
"learning_rate": 1.1800956836714682e-05, |
|
"loss": 10.3288, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.48279512969825306, |
|
"grad_norm": 0.03977108374238014, |
|
"learning_rate": 1.1690547598101864e-05, |
|
"loss": 10.3303, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.48321863419798833, |
|
"grad_norm": 0.031228644773364067, |
|
"learning_rate": 1.1580625208745145e-05, |
|
"loss": 10.3294, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.48364213869772366, |
|
"grad_norm": 0.0270911306142807, |
|
"learning_rate": 1.1471190274645704e-05, |
|
"loss": 10.3322, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.484065643197459, |
|
"grad_norm": 0.03246387094259262, |
|
"learning_rate": 1.1362243399117478e-05, |
|
"loss": 10.3306, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.48448914769719426, |
|
"grad_norm": 0.03161618486046791, |
|
"learning_rate": 1.1253785182783572e-05, |
|
"loss": 10.335, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.4849126521969296, |
|
"grad_norm": 0.03287721052765846, |
|
"learning_rate": 1.1145816223573259e-05, |
|
"loss": 10.3312, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.4853361566966649, |
|
"grad_norm": 0.029835056513547897, |
|
"learning_rate": 1.1038337116718467e-05, |
|
"loss": 10.3309, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.48575966119640024, |
|
"grad_norm": 0.03465202450752258, |
|
"learning_rate": 1.0931348454750601e-05, |
|
"loss": 10.3336, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.4861831656961355, |
|
"grad_norm": 0.03778757527470589, |
|
"learning_rate": 1.0824850827497246e-05, |
|
"loss": 10.3342, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.48660667019587084, |
|
"grad_norm": 0.03788898512721062, |
|
"learning_rate": 1.07188448220789e-05, |
|
"loss": 10.3338, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.48703017469560617, |
|
"grad_norm": 0.03392605856060982, |
|
"learning_rate": 1.061333102290576e-05, |
|
"loss": 10.3314, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.48745367919534144, |
|
"grad_norm": 0.03181210905313492, |
|
"learning_rate": 1.0508310011674516e-05, |
|
"loss": 10.3347, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.48787718369507677, |
|
"grad_norm": 0.03807486966252327, |
|
"learning_rate": 1.0403782367365088e-05, |
|
"loss": 10.3334, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.4883006881948121, |
|
"grad_norm": 0.04221343249082565, |
|
"learning_rate": 1.0299748666237485e-05, |
|
"loss": 10.33, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.48872419269454737, |
|
"grad_norm": 0.03662874549627304, |
|
"learning_rate": 1.0196209481828633e-05, |
|
"loss": 10.3337, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.4891476971942827, |
|
"grad_norm": 0.03761863335967064, |
|
"learning_rate": 1.0093165384949155e-05, |
|
"loss": 10.3363, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.489571201694018, |
|
"grad_norm": 0.03691156208515167, |
|
"learning_rate": 9.990616943680265e-06, |
|
"loss": 10.3355, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.4899947061937533, |
|
"grad_norm": 0.03406470641493797, |
|
"learning_rate": 9.888564723370664e-06, |
|
"loss": 10.3348, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.4904182106934886, |
|
"grad_norm": 0.03452722728252411, |
|
"learning_rate": 9.787009286633363e-06, |
|
"loss": 10.3332, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.49084171519322395, |
|
"grad_norm": 0.03500404581427574, |
|
"learning_rate": 9.685951193342602e-06, |
|
"loss": 10.3328, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.4912652196929592, |
|
"grad_norm": 0.034697335213422775, |
|
"learning_rate": 9.585391000630828e-06, |
|
"loss": 10.3292, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.49168872419269455, |
|
"grad_norm": 0.028287572786211967, |
|
"learning_rate": 9.485329262885457e-06, |
|
"loss": 10.3337, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.4921122286924299, |
|
"grad_norm": 0.0407349169254303, |
|
"learning_rate": 9.385766531746054e-06, |
|
"loss": 10.3314, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.49253573319216515, |
|
"grad_norm": 0.03521955758333206, |
|
"learning_rate": 9.28670335610109e-06, |
|
"loss": 10.3313, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.4929592376919005, |
|
"grad_norm": 0.038377124816179276, |
|
"learning_rate": 9.188140282084967e-06, |
|
"loss": 10.3295, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.4933827421916358, |
|
"grad_norm": 0.037929970771074295, |
|
"learning_rate": 9.090077853075118e-06, |
|
"loss": 10.331, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.4938062466913711, |
|
"grad_norm": 0.03767012432217598, |
|
"learning_rate": 8.992516609688862e-06, |
|
"loss": 10.3305, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.4942297511911064, |
|
"grad_norm": 0.04114054888486862, |
|
"learning_rate": 8.89545708978049e-06, |
|
"loss": 10.3327, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.49465325569084173, |
|
"grad_norm": 0.03139737620949745, |
|
"learning_rate": 8.798899828438333e-06, |
|
"loss": 10.3342, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.495076760190577, |
|
"grad_norm": 0.0350373312830925, |
|
"learning_rate": 8.70284535798168e-06, |
|
"loss": 10.3335, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.49550026469031233, |
|
"grad_norm": 0.03645787015557289, |
|
"learning_rate": 8.607294207958073e-06, |
|
"loss": 10.3285, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.49592376919004766, |
|
"grad_norm": 0.04092005640268326, |
|
"learning_rate": 8.512246905140165e-06, |
|
"loss": 10.332, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.49634727368978293, |
|
"grad_norm": 0.03972132131457329, |
|
"learning_rate": 8.417703973522917e-06, |
|
"loss": 10.3336, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.49677077818951826, |
|
"grad_norm": 0.02949652262032032, |
|
"learning_rate": 8.323665934320713e-06, |
|
"loss": 10.3329, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.4971942826892536, |
|
"grad_norm": 0.04814364016056061, |
|
"learning_rate": 8.23013330596445e-06, |
|
"loss": 10.3317, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.49761778718898886, |
|
"grad_norm": 0.0334940031170845, |
|
"learning_rate": 8.13710660409871e-06, |
|
"loss": 10.3367, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.4980412916887242, |
|
"grad_norm": 0.03809863701462746, |
|
"learning_rate": 8.044586341578886e-06, |
|
"loss": 10.3347, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.4984647961884595, |
|
"grad_norm": 0.03746895492076874, |
|
"learning_rate": 7.952573028468457e-06, |
|
"loss": 10.3362, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.4988883006881948, |
|
"grad_norm": 0.024187074974179268, |
|
"learning_rate": 7.861067172035962e-06, |
|
"loss": 10.3327, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.4993118051879301, |
|
"grad_norm": 0.03394331783056259, |
|
"learning_rate": 7.770069276752422e-06, |
|
"loss": 10.3268, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.49973530968766544, |
|
"grad_norm": 0.0327443964779377, |
|
"learning_rate": 7.679579844288509e-06, |
|
"loss": 10.332, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5001588141874007, |
|
"grad_norm": 0.027774417772889137, |
|
"learning_rate": 7.589599373511602e-06, |
|
"loss": 10.329, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.5005823186871361, |
|
"grad_norm": 0.03464759886264801, |
|
"learning_rate": 7.500128360483338e-06, |
|
"loss": 10.3334, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.5010058231868714, |
|
"grad_norm": 0.03733719512820244, |
|
"learning_rate": 7.411167298456634e-06, |
|
"loss": 10.3307, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.5014293276866066, |
|
"grad_norm": 0.033785175532102585, |
|
"learning_rate": 7.32271667787302e-06, |
|
"loss": 10.3362, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.501852832186342, |
|
"grad_norm": 0.038209252059459686, |
|
"learning_rate": 7.234776986360059e-06, |
|
"loss": 10.3309, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5022763366860773, |
|
"grad_norm": 0.03651139885187149, |
|
"learning_rate": 7.147348708728507e-06, |
|
"loss": 10.335, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.5026998411858126, |
|
"grad_norm": 0.03249209746718407, |
|
"learning_rate": 7.060432326969713e-06, |
|
"loss": 10.3326, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.503123345685548, |
|
"grad_norm": 0.049712520092725754, |
|
"learning_rate": 6.974028320252934e-06, |
|
"loss": 10.3269, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.5035468501852832, |
|
"grad_norm": 0.03345096856355667, |
|
"learning_rate": 6.888137164922725e-06, |
|
"loss": 10.3273, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.5039703546850185, |
|
"grad_norm": 0.028842521831393242, |
|
"learning_rate": 6.802759334496289e-06, |
|
"loss": 10.3299, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5043938591847539, |
|
"grad_norm": 0.02980581857264042, |
|
"learning_rate": 6.717895299660892e-06, |
|
"loss": 10.3337, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.5048173636844892, |
|
"grad_norm": 0.032008688896894455, |
|
"learning_rate": 6.633545528271212e-06, |
|
"loss": 10.3275, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.5052408681842244, |
|
"grad_norm": 0.03007701225578785, |
|
"learning_rate": 6.549710485346827e-06, |
|
"loss": 10.3319, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.5056643726839598, |
|
"grad_norm": 0.03393697366118431, |
|
"learning_rate": 6.466390633069608e-06, |
|
"loss": 10.3292, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.5060878771836951, |
|
"grad_norm": 0.04486103355884552, |
|
"learning_rate": 6.383586430781197e-06, |
|
"loss": 10.3289, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5065113816834304, |
|
"grad_norm": 0.03052888996899128, |
|
"learning_rate": 6.301298334980421e-06, |
|
"loss": 10.3374, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.5069348861831657, |
|
"grad_norm": 0.030694812536239624, |
|
"learning_rate": 6.219526799320919e-06, |
|
"loss": 10.3308, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.507358390682901, |
|
"grad_norm": 0.03446760028600693, |
|
"learning_rate": 6.138272274608403e-06, |
|
"loss": 10.3346, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.5077818951826363, |
|
"grad_norm": 0.033587660640478134, |
|
"learning_rate": 6.057535208798371e-06, |
|
"loss": 10.3337, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.5082053996823717, |
|
"grad_norm": 0.03484556823968887, |
|
"learning_rate": 5.977316046993642e-06, |
|
"loss": 10.3311, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5086289041821069, |
|
"grad_norm": 0.03142661601305008, |
|
"learning_rate": 5.897615231441689e-06, |
|
"loss": 10.3335, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.5090524086818422, |
|
"grad_norm": 0.03492956608533859, |
|
"learning_rate": 5.81843320153248e-06, |
|
"loss": 10.3298, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.5094759131815776, |
|
"grad_norm": 0.035875819623470306, |
|
"learning_rate": 5.739770393795851e-06, |
|
"loss": 10.3339, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.5098994176813129, |
|
"grad_norm": 0.028575167059898376, |
|
"learning_rate": 5.6616272418991926e-06, |
|
"loss": 10.3306, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.5103229221810481, |
|
"grad_norm": 0.034280769526958466, |
|
"learning_rate": 5.584004176645052e-06, |
|
"loss": 10.3339, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5107464266807835, |
|
"grad_norm": 0.03369034081697464, |
|
"learning_rate": 5.5069016259686635e-06, |
|
"loss": 10.3293, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.5111699311805188, |
|
"grad_norm": 0.03932506591081619, |
|
"learning_rate": 5.430320014935797e-06, |
|
"loss": 10.3339, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.5115934356802541, |
|
"grad_norm": 0.04464678466320038, |
|
"learning_rate": 5.354259765740177e-06, |
|
"loss": 10.3316, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.5120169401799894, |
|
"grad_norm": 0.033909354358911514, |
|
"learning_rate": 5.278721297701339e-06, |
|
"loss": 10.3317, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.5124404446797247, |
|
"grad_norm": 0.02771197073161602, |
|
"learning_rate": 5.203705027262184e-06, |
|
"loss": 10.3337, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.51286394917946, |
|
"grad_norm": 0.03711957111954689, |
|
"learning_rate": 5.129211367986786e-06, |
|
"loss": 10.3374, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.5132874536791954, |
|
"grad_norm": 0.04035378247499466, |
|
"learning_rate": 5.055240730558042e-06, |
|
"loss": 10.3278, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.5137109581789306, |
|
"grad_norm": 0.037376079708337784, |
|
"learning_rate": 4.981793522775457e-06, |
|
"loss": 10.3354, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.5141344626786659, |
|
"grad_norm": 0.033283621072769165, |
|
"learning_rate": 4.908870149552835e-06, |
|
"loss": 10.3304, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.5145579671784013, |
|
"grad_norm": 0.04279647022485733, |
|
"learning_rate": 4.836471012916144e-06, |
|
"loss": 10.3317, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.5149814716781366, |
|
"grad_norm": 0.026392200961709023, |
|
"learning_rate": 4.764596512001162e-06, |
|
"loss": 10.3338, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.5154049761778718, |
|
"grad_norm": 0.038188233971595764, |
|
"learning_rate": 4.693247043051441e-06, |
|
"loss": 10.3363, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.5158284806776072, |
|
"grad_norm": 0.03593307361006737, |
|
"learning_rate": 4.622422999415965e-06, |
|
"loss": 10.3302, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.5162519851773425, |
|
"grad_norm": 0.03967192396521568, |
|
"learning_rate": 4.5521247715470945e-06, |
|
"loss": 10.33, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.5166754896770778, |
|
"grad_norm": 0.0491623692214489, |
|
"learning_rate": 4.482352746998364e-06, |
|
"loss": 10.3386, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5170989941768132, |
|
"grad_norm": 0.0371236614882946, |
|
"learning_rate": 4.413107310422326e-06, |
|
"loss": 10.3336, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.5175224986765484, |
|
"grad_norm": 0.027762679383158684, |
|
"learning_rate": 4.344388843568503e-06, |
|
"loss": 10.3282, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.5179460031762837, |
|
"grad_norm": 0.03931552171707153, |
|
"learning_rate": 4.2761977252811945e-06, |
|
"loss": 10.3331, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.5183695076760191, |
|
"grad_norm": 0.047121018171310425, |
|
"learning_rate": 4.2085343314974715e-06, |
|
"loss": 10.3297, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.5187930121757544, |
|
"grad_norm": 0.042633168399333954, |
|
"learning_rate": 4.141399035245052e-06, |
|
"loss": 10.3337, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5192165166754896, |
|
"grad_norm": 0.03988894075155258, |
|
"learning_rate": 4.07479220664021e-06, |
|
"loss": 10.3262, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.519640021175225, |
|
"grad_norm": 0.030842246487736702, |
|
"learning_rate": 4.008714212885856e-06, |
|
"loss": 10.3322, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.5200635256749603, |
|
"grad_norm": 0.04261520504951477, |
|
"learning_rate": 3.943165418269401e-06, |
|
"loss": 10.328, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.5204870301746956, |
|
"grad_norm": 0.030063187703490257, |
|
"learning_rate": 3.87814618416078e-06, |
|
"loss": 10.3345, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.5209105346744309, |
|
"grad_norm": 0.030118783935904503, |
|
"learning_rate": 3.8136568690104957e-06, |
|
"loss": 10.3325, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5213340391741662, |
|
"grad_norm": 0.03795788437128067, |
|
"learning_rate": 3.7496978283475648e-06, |
|
"loss": 10.3327, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.5217575436739015, |
|
"grad_norm": 0.036961231380701065, |
|
"learning_rate": 3.686269414777643e-06, |
|
"loss": 10.3344, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.5221810481736369, |
|
"grad_norm": 0.0403430350124836, |
|
"learning_rate": 3.623371977981027e-06, |
|
"loss": 10.3324, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.5226045526733721, |
|
"grad_norm": 0.03135257214307785, |
|
"learning_rate": 3.5610058647107538e-06, |
|
"loss": 10.3319, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.5230280571731075, |
|
"grad_norm": 0.0364365391433239, |
|
"learning_rate": 3.499171418790681e-06, |
|
"loss": 10.3343, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.5234515616728428, |
|
"grad_norm": 0.025732390582561493, |
|
"learning_rate": 3.437868981113557e-06, |
|
"loss": 10.3338, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.5238750661725781, |
|
"grad_norm": 0.03495744988322258, |
|
"learning_rate": 3.37709888963923e-06, |
|
"loss": 10.3302, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.5242985706723134, |
|
"grad_norm": 0.032097022980451584, |
|
"learning_rate": 3.3168614793926524e-06, |
|
"loss": 10.3356, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.5247220751720487, |
|
"grad_norm": 0.029357150197029114, |
|
"learning_rate": 3.2571570824621923e-06, |
|
"loss": 10.3304, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.525145579671784, |
|
"grad_norm": 0.03179454430937767, |
|
"learning_rate": 3.197986027997657e-06, |
|
"loss": 10.3311, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5255690841715194, |
|
"grad_norm": 0.038864728063344955, |
|
"learning_rate": 3.1393486422085618e-06, |
|
"loss": 10.3308, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.5259925886712546, |
|
"grad_norm": 0.027193231508135796, |
|
"learning_rate": 3.08124524836233e-06, |
|
"loss": 10.3314, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.5264160931709899, |
|
"grad_norm": 0.035837847739458084, |
|
"learning_rate": 3.023676166782452e-06, |
|
"loss": 10.3327, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.5268395976707253, |
|
"grad_norm": 0.02682778798043728, |
|
"learning_rate": 2.9666417148468072e-06, |
|
"loss": 10.3325, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.5272631021704606, |
|
"grad_norm": 0.04898487776517868, |
|
"learning_rate": 2.910142206985833e-06, |
|
"loss": 10.3317, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5276866066701958, |
|
"grad_norm": 0.030211864039301872, |
|
"learning_rate": 2.8541779546808256e-06, |
|
"loss": 10.3292, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.5281101111699312, |
|
"grad_norm": 0.03472064808011055, |
|
"learning_rate": 2.7987492664622307e-06, |
|
"loss": 10.3324, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.5285336156696665, |
|
"grad_norm": 0.03139955550432205, |
|
"learning_rate": 2.743856447907944e-06, |
|
"loss": 10.3309, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.5289571201694018, |
|
"grad_norm": 0.02904195711016655, |
|
"learning_rate": 2.689499801641593e-06, |
|
"loss": 10.332, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.5293806246691372, |
|
"grad_norm": 0.045261383056640625, |
|
"learning_rate": 2.6356796273309116e-06, |
|
"loss": 10.33, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5298041291688724, |
|
"grad_norm": 0.03183293342590332, |
|
"learning_rate": 2.5823962216860562e-06, |
|
"loss": 10.3297, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.5302276336686077, |
|
"grad_norm": 0.04214952513575554, |
|
"learning_rate": 2.5296498784579846e-06, |
|
"loss": 10.3309, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.5306511381683431, |
|
"grad_norm": 0.03488962724804878, |
|
"learning_rate": 2.4774408884368215e-06, |
|
"loss": 10.3333, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.5310746426680784, |
|
"grad_norm": 0.03279737010598183, |
|
"learning_rate": 2.4257695394503287e-06, |
|
"loss": 10.3278, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.5314981471678136, |
|
"grad_norm": 0.03219415992498398, |
|
"learning_rate": 2.374636116362172e-06, |
|
"loss": 10.3334, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.531921651667549, |
|
"grad_norm": 0.05066683888435364, |
|
"learning_rate": 2.32404090107049e-06, |
|
"loss": 10.3306, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.5323451561672843, |
|
"grad_norm": 0.028979485854506493, |
|
"learning_rate": 2.2739841725062715e-06, |
|
"loss": 10.3319, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.5327686606670196, |
|
"grad_norm": 0.03191670775413513, |
|
"learning_rate": 2.2244662066318146e-06, |
|
"loss": 10.333, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.5331921651667549, |
|
"grad_norm": 0.04911280795931816, |
|
"learning_rate": 2.1754872764392698e-06, |
|
"loss": 10.3313, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.5336156696664902, |
|
"grad_norm": 0.039490871131420135, |
|
"learning_rate": 2.1270476519490435e-06, |
|
"loss": 10.3244, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5340391741662255, |
|
"grad_norm": 0.03646280616521835, |
|
"learning_rate": 2.079147600208364e-06, |
|
"loss": 10.3303, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.5344626786659609, |
|
"grad_norm": 0.039123885333538055, |
|
"learning_rate": 2.0317873852898518e-06, |
|
"loss": 10.332, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.5348861831656961, |
|
"grad_norm": 0.04183242470026016, |
|
"learning_rate": 1.9849672682898944e-06, |
|
"loss": 10.3297, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.5353096876654314, |
|
"grad_norm": 0.03520303592085838, |
|
"learning_rate": 1.9386875073274636e-06, |
|
"loss": 10.3265, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.5357331921651668, |
|
"grad_norm": 0.0325089730322361, |
|
"learning_rate": 1.8929483575424455e-06, |
|
"loss": 10.3345, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.5361566966649021, |
|
"grad_norm": 0.029976682737469673, |
|
"learning_rate": 1.8477500710944007e-06, |
|
"loss": 10.3292, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.5365802011646373, |
|
"grad_norm": 0.034131329506635666, |
|
"learning_rate": 1.803092897161096e-06, |
|
"loss": 10.3276, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.5370037056643727, |
|
"grad_norm": 0.03793232887983322, |
|
"learning_rate": 1.75897708193713e-06, |
|
"loss": 10.3349, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.537427210164108, |
|
"grad_norm": 0.025969160720705986, |
|
"learning_rate": 1.715402868632643e-06, |
|
"loss": 10.3325, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.5378507146638433, |
|
"grad_norm": 0.04372668266296387, |
|
"learning_rate": 1.6723704974718756e-06, |
|
"loss": 10.33, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5382742191635786, |
|
"grad_norm": 0.03358982875943184, |
|
"learning_rate": 1.629880205691936e-06, |
|
"loss": 10.3321, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.5386977236633139, |
|
"grad_norm": 0.045495398342609406, |
|
"learning_rate": 1.5879322275414332e-06, |
|
"loss": 10.3334, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.5391212281630492, |
|
"grad_norm": 0.02813423052430153, |
|
"learning_rate": 1.5465267942792127e-06, |
|
"loss": 10.332, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.5395447326627846, |
|
"grad_norm": 0.02770121954381466, |
|
"learning_rate": 1.5056641341730903e-06, |
|
"loss": 10.3296, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.5399682371625198, |
|
"grad_norm": 0.04436861723661423, |
|
"learning_rate": 1.465344472498531e-06, |
|
"loss": 10.3286, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5403917416622551, |
|
"grad_norm": 0.043747782707214355, |
|
"learning_rate": 1.4255680315375164e-06, |
|
"loss": 10.3332, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.5408152461619905, |
|
"grad_norm": 0.028111323714256287, |
|
"learning_rate": 1.3863350305772017e-06, |
|
"loss": 10.3319, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.5412387506617258, |
|
"grad_norm": 0.03884616121649742, |
|
"learning_rate": 1.3476456859087828e-06, |
|
"loss": 10.3317, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.541662255161461, |
|
"grad_norm": 0.04214450716972351, |
|
"learning_rate": 1.3095002108263199e-06, |
|
"loss": 10.3336, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.5420857596611964, |
|
"grad_norm": 0.0312722884118557, |
|
"learning_rate": 1.2718988156254607e-06, |
|
"loss": 10.3357, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5425092641609317, |
|
"grad_norm": 0.09322332590818405, |
|
"learning_rate": 1.2348417076023745e-06, |
|
"loss": 10.3333, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.542932768660667, |
|
"grad_norm": 0.04540476202964783, |
|
"learning_rate": 1.198329091052608e-06, |
|
"loss": 10.3309, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.5433562731604024, |
|
"grad_norm": 0.029997704550623894, |
|
"learning_rate": 1.1623611672698765e-06, |
|
"loss": 10.3358, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.5437797776601376, |
|
"grad_norm": 0.0350346714258194, |
|
"learning_rate": 1.1269381345450526e-06, |
|
"loss": 10.3306, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.5442032821598729, |
|
"grad_norm": 0.04271746799349785, |
|
"learning_rate": 1.0920601881650006e-06, |
|
"loss": 10.3313, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.5446267866596083, |
|
"grad_norm": 0.03767610713839531, |
|
"learning_rate": 1.0577275204115444e-06, |
|
"loss": 10.3275, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.5450502911593436, |
|
"grad_norm": 0.02964678965508938, |
|
"learning_rate": 1.0239403205604014e-06, |
|
"loss": 10.3296, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.5454737956590788, |
|
"grad_norm": 0.03278511017560959, |
|
"learning_rate": 9.906987748800944e-07, |
|
"loss": 10.3329, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.5458973001588142, |
|
"grad_norm": 0.05790937691926956, |
|
"learning_rate": 9.580030666309969e-07, |
|
"loss": 10.3372, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.5463208046585495, |
|
"grad_norm": 0.03746120631694794, |
|
"learning_rate": 9.258533760642563e-07, |
|
"loss": 10.3302, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5467443091582848, |
|
"grad_norm": 0.03203713148832321, |
|
"learning_rate": 8.942498804208498e-07, |
|
"loss": 10.3328, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.5471678136580201, |
|
"grad_norm": 0.032408781349658966, |
|
"learning_rate": 8.631927539305862e-07, |
|
"loss": 10.3328, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.5475913181577554, |
|
"grad_norm": 0.038404081016778946, |
|
"learning_rate": 8.326821678111163e-07, |
|
"loss": 10.3357, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.5480148226574907, |
|
"grad_norm": 0.03704221174120903, |
|
"learning_rate": 8.027182902670571e-07, |
|
"loss": 10.3267, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.5484383271572261, |
|
"grad_norm": 0.02777581661939621, |
|
"learning_rate": 7.733012864890032e-07, |
|
"loss": 10.3331, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.5488618316569613, |
|
"grad_norm": 0.0339139886200428, |
|
"learning_rate": 7.444313186526608e-07, |
|
"loss": 10.3355, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.5492853361566966, |
|
"grad_norm": 0.027996981516480446, |
|
"learning_rate": 7.161085459178929e-07, |
|
"loss": 10.3301, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.549708840656432, |
|
"grad_norm": 0.04270913451910019, |
|
"learning_rate": 6.88333124427909e-07, |
|
"loss": 10.3269, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.5501323451561673, |
|
"grad_norm": 0.0351426862180233, |
|
"learning_rate": 6.611052073083768e-07, |
|
"loss": 10.3306, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.5505558496559025, |
|
"grad_norm": 0.0378975048661232, |
|
"learning_rate": 6.344249446665674e-07, |
|
"loss": 10.3283, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5509793541556379, |
|
"grad_norm": 0.028754916042089462, |
|
"learning_rate": 6.082924835905446e-07, |
|
"loss": 10.3287, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.5514028586553732, |
|
"grad_norm": 0.0465865433216095, |
|
"learning_rate": 5.827079681483438e-07, |
|
"loss": 10.3325, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.5518263631551085, |
|
"grad_norm": 0.037231337279081345, |
|
"learning_rate": 5.576715393871613e-07, |
|
"loss": 10.3278, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.5522498676548439, |
|
"grad_norm": 0.03710845485329628, |
|
"learning_rate": 5.331833353326432e-07, |
|
"loss": 10.3344, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.5526733721545791, |
|
"grad_norm": 0.02809790149331093, |
|
"learning_rate": 5.092434909880317e-07, |
|
"loss": 10.3321, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.5530968766543145, |
|
"grad_norm": 0.045991264283657074, |
|
"learning_rate": 4.858521383334868e-07, |
|
"loss": 10.3345, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.5535203811540498, |
|
"grad_norm": 0.03640573099255562, |
|
"learning_rate": 4.630094063253321e-07, |
|
"loss": 10.3294, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.553943885653785, |
|
"grad_norm": 0.029001332819461823, |
|
"learning_rate": 4.4071542089535454e-07, |
|
"loss": 10.3318, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.5543673901535204, |
|
"grad_norm": 0.02934233844280243, |
|
"learning_rate": 4.18970304950117e-07, |
|
"loss": 10.3299, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.5547908946532557, |
|
"grad_norm": 0.03224503621459007, |
|
"learning_rate": 3.977741783702471e-07, |
|
"loss": 10.3285, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.555214399152991, |
|
"grad_norm": 0.03147895634174347, |
|
"learning_rate": 3.771271580098157e-07, |
|
"loss": 10.3325, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.5556379036527264, |
|
"grad_norm": 0.03843318298459053, |
|
"learning_rate": 3.570293576956596e-07, |
|
"loss": 10.3301, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.5560614081524616, |
|
"grad_norm": 0.0349433533847332, |
|
"learning_rate": 3.3748088822679325e-07, |
|
"loss": 10.332, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.5564849126521969, |
|
"grad_norm": 0.03259619325399399, |
|
"learning_rate": 3.184818573737425e-07, |
|
"loss": 10.3296, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.5569084171519323, |
|
"grad_norm": 0.03497344255447388, |
|
"learning_rate": 3.0003236987802274e-07, |
|
"loss": 10.3314, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.5573319216516676, |
|
"grad_norm": 0.03283681720495224, |
|
"learning_rate": 2.821325274514952e-07, |
|
"loss": 10.3307, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.5577554261514028, |
|
"grad_norm": 0.03914149850606918, |
|
"learning_rate": 2.6478242877583383e-07, |
|
"loss": 10.3321, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.5581789306511382, |
|
"grad_norm": 0.028979448601603508, |
|
"learning_rate": 2.4798216950198127e-07, |
|
"loss": 10.3295, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.5586024351508735, |
|
"grad_norm": 0.0339006632566452, |
|
"learning_rate": 2.317318422496273e-07, |
|
"loss": 10.3326, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.5590259396506088, |
|
"grad_norm": 0.027926115319132805, |
|
"learning_rate": 2.1603153660668674e-07, |
|
"loss": 10.3305, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5594494441503441, |
|
"grad_norm": 0.031478822231292725, |
|
"learning_rate": 2.0088133912881113e-07, |
|
"loss": 10.3288, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.5598729486500794, |
|
"grad_norm": 0.03274491801857948, |
|
"learning_rate": 1.862813333389113e-07, |
|
"loss": 10.3361, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.5602964531498147, |
|
"grad_norm": 0.0399165078997612, |
|
"learning_rate": 1.722315997267021e-07, |
|
"loss": 10.3344, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.5607199576495501, |
|
"grad_norm": 0.030695218592882156, |
|
"learning_rate": 1.5873221574822516e-07, |
|
"loss": 10.3298, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.5611434621492853, |
|
"grad_norm": 0.03967565670609474, |
|
"learning_rate": 1.4578325582548237e-07, |
|
"loss": 10.3305, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5615669666490206, |
|
"grad_norm": 0.03664049133658409, |
|
"learning_rate": 1.3338479134596958e-07, |
|
"loss": 10.3293, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.561990471148756, |
|
"grad_norm": 0.03802071511745453, |
|
"learning_rate": 1.2153689066233266e-07, |
|
"loss": 10.3305, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.5624139756484913, |
|
"grad_norm": 0.036713242530822754, |
|
"learning_rate": 1.1023961909192304e-07, |
|
"loss": 10.3287, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.5628374801482265, |
|
"grad_norm": 0.04824815317988396, |
|
"learning_rate": 9.949303891653161e-08, |
|
"loss": 10.3353, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.5632609846479619, |
|
"grad_norm": 0.03399055823683739, |
|
"learning_rate": 8.929720938193331e-08, |
|
"loss": 10.3302, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5636844891476972, |
|
"grad_norm": 0.030519891530275345, |
|
"learning_rate": 7.965218669766516e-08, |
|
"loss": 10.3277, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.5641079936474325, |
|
"grad_norm": 0.03647278994321823, |
|
"learning_rate": 7.05580240366488e-08, |
|
"loss": 10.3276, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.5645314981471679, |
|
"grad_norm": 0.0370662622153759, |
|
"learning_rate": 6.201477153493506e-08, |
|
"loss": 10.3344, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.5649550026469031, |
|
"grad_norm": 0.038933202624320984, |
|
"learning_rate": 5.402247629139323e-08, |
|
"loss": 10.3313, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.5653785071466384, |
|
"grad_norm": 0.030461156740784645, |
|
"learning_rate": 4.658118236747777e-08, |
|
"loss": 10.3292, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.5658020116463738, |
|
"grad_norm": 0.030602607876062393, |
|
"learning_rate": 3.9690930786995264e-08, |
|
"loss": 10.3294, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.566225516146109, |
|
"grad_norm": 0.03394312039017677, |
|
"learning_rate": 3.335175953581571e-08, |
|
"loss": 10.3342, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.5666490206458443, |
|
"grad_norm": 0.051167815923690796, |
|
"learning_rate": 2.756370356175042e-08, |
|
"loss": 10.3349, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.5670725251455797, |
|
"grad_norm": 0.03260042518377304, |
|
"learning_rate": 2.232679477430777e-08, |
|
"loss": 10.3333, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.567496029645315, |
|
"grad_norm": 0.040678899735212326, |
|
"learning_rate": 1.7641062044515544e-08, |
|
"loss": 10.3287, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5679195341450503, |
|
"grad_norm": 0.04332433268427849, |
|
"learning_rate": 1.350653120477663e-08, |
|
"loss": 10.3336, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.5683430386447856, |
|
"grad_norm": 0.03431249037384987, |
|
"learning_rate": 9.923225048724671e-09, |
|
"loss": 10.3331, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.5687665431445209, |
|
"grad_norm": 0.0347750224173069, |
|
"learning_rate": 6.891163331101957e-09, |
|
"loss": 10.3338, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.5691900476442562, |
|
"grad_norm": 0.031236495822668076, |
|
"learning_rate": 4.410362767626186e-09, |
|
"loss": 10.3311, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.5696135521439916, |
|
"grad_norm": 0.036892782896757126, |
|
"learning_rate": 2.4808370349460596e-09, |
|
"loss": 10.332, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.5700370566437268, |
|
"grad_norm": 0.02656089887022972, |
|
"learning_rate": 1.1025967705080576e-09, |
|
"loss": 10.3323, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.5704605611434621, |
|
"grad_norm": 0.03345981240272522, |
|
"learning_rate": 2.756495725342312e-10, |
|
"loss": 10.3286, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.5708840656431975, |
|
"grad_norm": 0.03345588967204094, |
|
"learning_rate": 0.0, |
|
"loss": 10.3287, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.5708840656431975, |
|
"eval_loss": 10.330697059631348, |
|
"eval_runtime": 3.473, |
|
"eval_samples_per_second": 286.494, |
|
"eval_steps_per_second": 143.391, |
|
"step": 1348 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1348, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 337, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 28945837916160.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|